Compare commits

...

11 Commits

Author SHA1 Message Date
Waleed Latif
95a8d6410c v0.3.15: helm charts, evaluator block fixes, ArXiv and Wikipedia tools 2025-07-29 10:22:34 -07:00
Waleed Latif
08720d926c feat(landing): add rb2b (#815)
Co-authored-by: waleedlatif <waleedlatif@waleedlatifs-MacBook-Pro.local>
2025-07-28 20:16:44 -07:00
Waleed Latif
308f39e8b9 feat(tools): added arXiv and wikipedia tools/blocks & docs (#814)
* feat(tools): added arxiv tools

* feat(tools): added wikipedia tool

* updated docs & remove empty interface

* remove empty interface

* fixed docs generator

* fixed wikipedia

* removed hasExpandableContent from tool-input for consistency across all tools, irregardless of their parsm

* lint

---------

Co-authored-by: waleedlatif <waleedlatif@waleedlatifs-MacBook-Pro.local>
2025-07-28 19:39:26 -07:00
Vikhyath Mondreti
5b1f948686 feat(kb-tags): natural language pre-filter tag system for knowledge base searches (#800)
* fix lint

* checkpoint

* works

* simplify

* checkpoint

* works

* fix lint

* checkpoint - create doc ui

* working block

* fix import conflicts

* fix tests

* add blockers to going past max tag slots

* remove console logs

* forgot a few

* Update apps/sim/tools/knowledge/search.ts

Co-authored-by: greptile-apps[bot] <165735046+greptile-apps[bot]@users.noreply.github.com>

* remove console.warn

* Update apps/sim/hooks/use-tag-definitions.ts

Co-authored-by: greptile-apps[bot] <165735046+greptile-apps[bot]@users.noreply.github.com>

* use tag slots consts in more places

* remove duplicate title

---------

Co-authored-by: greptile-apps[bot] <165735046+greptile-apps[bot]@users.noreply.github.com>
2025-07-28 18:43:52 -07:00
Waleed Latif
cb17691c01 feat(helm): added helm charts for self-hosting (#813)
* feat(helm): added helm charts for self-hosting

* ack PR comments, use sim instead of simstudio

---------

Co-authored-by: waleedlatif <waleedlatif@waleedlatifs-MacBook-Pro.local>
2025-07-28 18:03:47 -07:00
Vikhyath Mondreti
c00b18594e fix(webhook-modal): on copy do not change webhook url, fix auth to use regular perms system (#812)
* fix(webhook-modal): don't regenerate webhook url on copy

* fix authentication checks for webhook saves
2025-07-28 16:17:34 -07:00
Vikhyath Mondreti
95efae9035 improvement(webhooks): move webhook exeucution to trigger.dev (#810)
* improvement(webhooks): move webhook exeucution to trigger dev

* remove old tests
2025-07-28 13:04:25 -07:00
Waleed Latif
b12e415fea fix(assets): update README.md (#811)
Co-authored-by: waleedlatif <waleedlatif@waleedlatifs-MacBook-Pro.local>
2025-07-28 12:25:39 -07:00
Waleed Latif
510ce4b7da improvement(cdn): add cdn for large video assets with fallback to static assets (#809)
* added CDN for large assets with fallback to static assets

* remove video assets from docs

---------

Co-authored-by: waleedlatif <waleedlatif@waleedlatifs-MacBook-Pro.local>
2025-07-28 12:15:41 -07:00
Waleed Latif
abed816afd fix(standalone): selectively enable vercel speed insights, add annotations for envvars (#808)
* add annotations for environment variables

* selectively enable vercel speed insights

* use DOCKER_BUILD flag to selectively disable vercel speed insights and analytics

* lint

* additional info

---------

Co-authored-by: waleedlatif <waleedlatif@waleedlatifs-MacBook-Pro.local>
2025-07-28 11:10:20 -07:00
Waleed Latif
6f390c0d1d fix(evaluator): fix tag dropdown for evaluator block (#807)
* fix(evaluator): fix tag dropdown for evaluator block

* lint

---------

Co-authored-by: waleedlatif <waleedlatif@waleedlatifs-MacBook-Pro.local>
2025-07-28 11:03:39 -07:00
134 changed files with 15236 additions and 1789 deletions

5
.gitignore vendored
View File

@@ -65,4 +65,7 @@ start-collector.sh
.turbo
# VSCode
.vscode
.vscode
## Helm Chart Tests
helm/sim/test

View File

@@ -0,0 +1,138 @@
---
title: ArXiv
description: Search and retrieve academic papers from ArXiv
---
import { BlockInfoCard } from "@/components/ui/block-info-card"
<BlockInfoCard
type="arxiv"
color="#E0E0E0"
icon={true}
iconSvg={`<svg className="block-icon" id='logomark' xmlns='http://www.w3.org/2000/svg' viewBox='0 0 17.732 24.269'>
<g id='tiny'>
<path
d='M573.549,280.916l2.266,2.738,6.674-7.84c.353-.47.52-.717.353-1.117a1.218,1.218,0,0,0-1.061-.748h0a.953.953,0,0,0-.712.262Z'
transform='translate(-566.984 -271.548)'
fill='#bdb9b4'
/>
<path
d='M579.525,282.225l-10.606-10.174a1.413,1.413,0,0,0-.834-.5,1.09,1.09,0,0,0-1.027.66c-.167.4-.047.681.319,1.206l8.44,10.242h0l-6.282,7.716a1.336,1.336,0,0,0-.323,1.3,1.114,1.114,0,0,0,1.04.69A.992.992,0,0,0,571,293l8.519-7.92A1.924,1.924,0,0,0,579.525,282.225Z'
transform='translate(-566.984 -271.548)'
fill='#b31b1b'
/>
<path
d='M584.32,293.912l-8.525-10.275,0,0L573.53,280.9l-1.389,1.254a2.063,2.063,0,0,0,0,2.965l10.812,10.419a.925.925,0,0,0,.742.282,1.039,1.039,0,0,0,.953-.667A1.261,1.261,0,0,0,584.32,293.912Z'
transform='translate(-566.984 -271.548)'
fill='#bdb9b4'
/>
</g>
</svg>`}
/>
{/* MANUAL-CONTENT-START:intro */}
[ArXiv](https://arxiv.org/) is a free, open-access repository of scientific research papers in fields such as physics, mathematics, computer science, quantitative biology, quantitative finance, statistics, electrical engineering, systems science, and economics. ArXiv provides a vast collection of preprints and published articles, making it a primary resource for researchers and practitioners worldwide.
With ArXiv, you can:
- **Search for academic papers**: Find research by keywords, author names, titles, categories, and more
- **Retrieve paper metadata**: Access abstracts, author lists, publication dates, and other bibliographic information
- **Download full-text PDFs**: Obtain the complete text of most papers for in-depth study
- **Explore author contributions**: View all papers by a specific author
- **Stay up-to-date**: Discover the latest submissions and trending topics in your field
In Sim Studio, the ArXiv integration enables your agents to programmatically search, retrieve, and analyze scientific papers from ArXiv. This allows you to automate literature reviews, build research assistants, or incorporate up-to-date scientific knowledge into your agentic workflows. Use ArXiv as a dynamic data source for research, discovery, and knowledge extraction within your Sim Studio projects.
{/* MANUAL-CONTENT-END */}
## Usage Instructions
Search for academic papers, retrieve metadata, download papers, and access the vast collection of scientific research on ArXiv.
## Tools
### `arxiv_search`
Search for academic papers on ArXiv by keywords, authors, titles, or other fields.
#### Input
| Parameter | Type | Required | Description |
| --------- | ---- | -------- | ----------- |
| `query` | string | Yes | The search query to execute |
| `searchField` | string | No | Field to search in: all, ti \(title\), au \(author\), abs \(abstract\), co \(comment\), jr \(journal\), cat \(category\), rn \(report number\) |
| `maxResults` | number | No | Maximum number of results to return \(default: 10, max: 2000\) |
| `sortBy` | string | No | Sort by: relevance, lastUpdatedDate, submittedDate \(default: relevance\) |
| `sortOrder` | string | No | Sort order: ascending, descending \(default: descending\) |
#### Output
| Parameter | Type |
| --------- | ---- |
| `query` | string |
| `papers` | string |
| `totalResults` | string |
### `arxiv_get_paper`
Get detailed information about a specific ArXiv paper by its ID.
#### Input
| Parameter | Type | Required | Description |
| --------- | ---- | -------- | ----------- |
| `paperId` | string | Yes | ArXiv paper ID \(e.g., |
#### Output
| Parameter | Type |
| --------- | ---- |
| `paper` | string |
### `arxiv_get_author_papers`
Search for papers by a specific author on ArXiv.
#### Input
| Parameter | Type | Required | Description |
| --------- | ---- | -------- | ----------- |
| `authorName` | string | Yes | Author name to search for |
| `maxResults` | number | No | Maximum number of results to return \(default: 10, max: 2000\) |
#### Output
| Parameter | Type |
| --------- | ---- |
| `authorPapers` | string |
| `authorName` | string |
| `totalResults` | string |
## Block Configuration
### Input
| Parameter | Type | Required | Description |
| --------- | ---- | -------- | ----------- |
| `operation` | string | Yes | Operation |
### Outputs
| Output | Type | Description |
| ------ | ---- | ----------- |
| `papers` | json | papers output from the block |
| `totalResults` | number | totalResults output from the block |
| `paper` | json | paper output from the block |
| `authorPapers` | json | authorPapers output from the block |
## Notes
- Category: `tools`
- Type: `arxiv`

View File

@@ -26,7 +26,7 @@ import { BlockInfoCard } from "@/components/ui/block-info-card"
/>
{/* MANUAL-CONTENT-START:intro */}
[Exa](https://exa.ai/) is an AI-powered search engine designed specifically for developers and researchers that provides highly relevant and up-to-date information from across the web. It combines advanced semantic search capabilities with AI understanding to deliver more accurate and contextually relevant results than traditional search engines.
[Exa](https://exa.ai/) is an AI-powered search engine designed specifically for developers and researchers, providing highly relevant and up-to-date information from across the web. It combines advanced semantic search capabilities with AI understanding to deliver more accurate and contextually relevant results than traditional search engines.
With Exa, you can:
@@ -35,14 +35,16 @@ With Exa, you can:
- **Access up-to-date information**: Retrieve current information from across the web
- **Find similar content**: Discover related resources based on content similarity
- **Extract webpage contents**: Retrieve and process the full text of web pages
- **Answer questions with citations**: Ask questions and receive direct answers with supporting sources
- **Perform research tasks**: Automate multi-step research workflows to gather, synthesize, and summarize information
In Sim Studio, the Exa integration allows your agents to search the web for information, retrieve content from specific URLs, and find similar resources - all programmatically through API calls. This enables your agents to access real-time information from the internet, enhancing their ability to provide accurate, current, and relevant responses. The integration is particularly valuable for research tasks, information gathering, content discovery, and answering questions that require up-to-date information from across the web.
In Sim Studio, the Exa integration allows your agents to search the web for information, retrieve content from specific URLs, find similar resources, answer questions with citations, and conduct research tasks—all programmatically through API calls. This enables your agents to access real-time information from the internet, enhancing their ability to provide accurate, current, and relevant responses. The integration is particularly valuable for research tasks, information gathering, content discovery, and answering questions that require up-to-date information from across the web.
{/* MANUAL-CONTENT-END */}
## Usage Instructions
Search the web, retrieve content, find similar links, and answer questions using Exa
Search the web, retrieve content, find similar links, and answer questions using Exa's powerful AI search capabilities.

View File

@@ -44,8 +44,16 @@ With Firecrawl in Sim Studio, you can:
- **Handle JavaScript-heavy sites**: Process content from modern web applications that rely on JavaScript
- **Filter content**: Focus on specific parts of a page using CSS selectors
- **Process at scale**: Handle high-volume scraping needs with a reliable API
- **Search the web**: Perform intelligent web searches and retrieve structured results
- **Crawl entire sites**: Crawl multiple pages from a website and aggregate their content
The Firecrawl integration allows your agents to access and process web content programmatically without leaving the Sim Studio environment. This enables scenarios like research, content aggregation, data extraction, and information analysis from across the web. Your agents can gather information from websites, extract structured data, and use that information to make decisions or generate insights - all without having to navigate the complexities of raw HTML parsing or browser automation. Simply configure the Firecrawl block with your API key, provide the target URL, and your agents can immediately begin working with web content in a clean, structured format.
In Sim Studio, the Firecrawl integration enables your agents to access and process web content programmatically as part of their workflows. Supported operations include:
- **Scrape**: Extract structured content (Markdown, HTML, metadata) from a single web page.
- **Search**: Search the web for information using Firecrawl's intelligent search capabilities.
- **Crawl**: Crawl multiple pages from a website, returning structured content and metadata for each page.
This allows your agents to gather information from websites, extract structured data, and use that information to make decisions or generate insights—all without having to navigate the complexities of raw HTML parsing or browser automation. Simply configure the Firecrawl block with your API key, select the operation (Scrape, Search, or Crawl), and provide the relevant parameters. Your agents can immediately begin working with web content in a clean, structured format.
{/* MANUAL-CONTENT-END */}

View File

@@ -90,7 +90,7 @@ In Sim Studio, the Google Calendar integration enables your agents to programmat
## Usage Instructions
Integrate Google Calendar functionality to create, read, update, and list calendar events within your workflow. Automate scheduling, check availability, and manage events using OAuth authentication. Email invitations are sent asynchronously and delivery depends on recipients
Integrate Google Calendar functionality to create, read, update, and list calendar events within your workflow. Automate scheduling, check availability, and manage events using OAuth authentication. Email invitations are sent asynchronously and delivery depends on recipients' Google Calendar settings.

View File

@@ -46,7 +46,7 @@ In Sim Studio, the DALL-E integration enables your agents to generate images pro
## Usage Instructions
Create high-quality images using OpenAI
Create high-quality images using OpenAI's image generation models. Configure resolution, quality, style, and other parameters to get exactly the image you need.

View File

@@ -63,7 +63,7 @@ This integration is particularly valuable for building agents that need to gathe
## Usage Instructions
Transform web content into clean, readable text using Jina AI
Transform web content into clean, readable text using Jina AI's advanced extraction capabilities. Extract meaningful content from websites while preserving important information and optionally gathering links.

View File

@@ -2,6 +2,7 @@
"items": [
"index",
"airtable",
"arxiv",
"browser_use",
"clay",
"confluence",
@@ -53,6 +54,7 @@
"wealthbox",
"webhook",
"whatsapp",
"wikipedia",
"x",
"youtube"
]

View File

@@ -29,7 +29,17 @@ With Notion, you can:
- **Connect information**: Link between pages and databases to create a knowledge network
- **Access anywhere**: Use Notion across web, desktop, and mobile platforms with automatic syncing
In Sim Studio, the Notion integration enables your agents to interact directly with your Notion workspace programmatically. This allows for powerful automation scenarios such as knowledge management, content creation, and information retrieval. Your agents can read existing Notion pages to extract information, write to pages to update content, and create new pages from scratch. This integration bridges the gap between your AI workflows and your knowledge base, enabling seamless documentation and information management. By connecting Sim Studio with Notion, you can automate documentation processes, maintain up-to-date information repositories, generate reports, and organize information intelligently - all through your intelligent agents.
In Sim Studio, the Notion integration enables your agents to interact directly with your Notion workspace programmatically. This allows for powerful automation scenarios such as knowledge management, content creation, and information retrieval. Your agents can:
- **Read Notion pages**: Extract content and metadata from any Notion page.
- **Read Notion databases**: Retrieve database structure and information.
- **Write to pages**: Append new content to existing Notion pages.
- **Create new pages**: Generate new Notion pages under a parent page, with custom titles and content.
- **Query databases**: Search and filter database entries using advanced filter and sort criteria.
- **Search workspace**: Search across your entire Notion workspace for pages or databases matching specific queries.
- **Create new databases**: Programmatically create new databases with custom properties and structure.
This integration bridges the gap between your AI workflows and your knowledge base, enabling seamless documentation and information management. By connecting Sim Studio with Notion, you can automate documentation processes, maintain up-to-date information repositories, generate reports, and organize information intelligently—all through your intelligent agents.
{/* MANUAL-CONTENT-END */}

View File

@@ -43,7 +43,7 @@ In Sim Studio, the OpenAI integration enables your agents to leverage these powe
## Usage Instructions
Convert text into numerical vector representations using OpenAI
Convert text into numerical vector representations using OpenAI's embedding models. Transform text data into embeddings for semantic search, clustering, and other vector-based operations.

View File

@@ -45,7 +45,7 @@ In Sim Studio, the Pinecone integration enables your agents to leverage vector s
## Usage Instructions
Store, search, and retrieve vector embeddings using Pinecone
Store, search, and retrieve vector embeddings using Pinecone's specialized vector database. Generate embeddings from text and perform semantic similarity searches with customizable filtering options.

View File

@@ -80,6 +80,27 @@ import { BlockInfoCard } from "@/components/ui/block-info-card"
</svg>`}
/>
{/* MANUAL-CONTENT-START:intro */}
[Qdrant](https://qdrant.tech) is an open-source vector database designed for efficient storage, management, and retrieval of high-dimensional vector embeddings. Qdrant enables fast and scalable semantic search, making it ideal for AI applications that require similarity search, recommendation systems, and contextual information retrieval.
With Qdrant, you can:
- **Store vector embeddings**: Efficiently manage and persist high-dimensional vectors at scale
- **Perform semantic similarity search**: Find the most similar vectors to a query vector in real time
- **Filter and organize data**: Use advanced filtering to narrow down search results based on metadata or payload
- **Fetch specific points**: Retrieve vectors and their associated payloads by ID
- **Scale seamlessly**: Handle large collections and high-throughput workloads
In Sim Studio, the Qdrant integration enables your agents to interact with Qdrant programmatically as part of their workflows. Supported operations include:
- **Upsert**: Insert or update points (vectors and payloads) in a Qdrant collection
- **Search**: Perform similarity search to find vectors most similar to a given query vector, with optional filtering and result customization
- **Fetch**: Retrieve specific points from a collection by their IDs, with options to include payloads and vectors
This integration allows your agents to leverage powerful vector search and management capabilities, enabling advanced automation scenarios such as semantic search, recommendation, and contextual retrieval. By connecting Sim Studio with Qdrant, you can build agents that understand context, retrieve relevant information from large datasets, and deliver more intelligent and personalized responses—all without managing complex infrastructure.
{/* MANUAL-CONTENT-END */}
## Usage Instructions
Store, search, and retrieve vector embeddings using Qdrant. Perform semantic similarity searches and manage your vector collections.

View File

@@ -26,19 +26,14 @@ import { BlockInfoCard } from "@/components/ui/block-info-card"
/>
{/* MANUAL-CONTENT-START:intro */}
[Reddit](https://www.reddit.com/) is a vast social news aggregation, content rating, and discussion platform where registered users submit content such as text posts, images, and links, which are then voted up or down by other members. Known as "the front page of the internet," Reddit is organized into thousands of communities called subreddits, each focused on a specific topic.
[Reddit](https://www.reddit.com/) is a social platform where users share and discuss content in topic-based communities called subreddits.
With Reddit, you can:
In Sim Studio, you can use the Reddit integration to:
- **Access diverse content**: Browse thousands of specialized communities covering virtually every topic
- **Stay informed**: Get real-time updates on trending news, discussions, and viral content
- **Engage with communities**: Participate in discussions with like-minded individuals
- **Discover trending topics**: See what's popular across different interest groups
- **Gather insights**: Collect opinions, feedback, and perspectives from diverse user groups
- **Monitor public sentiment**: Track reactions and discussions around specific topics or brands
- **Research niche topics**: Access specialized knowledge in dedicated communities
- **Get Posts**: Retrieve posts from any subreddit, with options to sort (Hot, New, Top, Rising) and filter Top posts by time (Day, Week, Month, Year, All Time).
- **Get Comments**: Fetch comments from a specific post, with options to sort and set the number of comments.
In Sim Studio, the Reddit integration enables your agents to programmatically access and analyze content from Reddit's vast ecosystem. This allows for powerful automation scenarios such as trend monitoring, content aggregation, and sentiment analysis. Your agents can retrieve popular posts from specific subreddits, extract valuable information, and incorporate these insights into their workflows. This integration bridges the gap between social media monitoring and your AI workflows, enabling more informed decision-making based on public discussions and trending topics. By connecting Sim Studio with Reddit, you can create agents that stay on top of relevant conversations, identify emerging trends, gather diverse perspectives, and deliver timely insights - all without requiring manual browsing of countless Reddit threads.
These operations let your agents access and analyze Reddit content as part of your automated workflows.
{/* MANUAL-CONTENT-END */}

View File

@@ -78,7 +78,7 @@ In Sim Studio, the Serper integration enables your agents to leverage the power
## Usage Instructions
Access real-time web search results with Serper
Access real-time web search results with Serper's Google Search API integration. Retrieve structured search data including web pages, news, images, and places with customizable language and region settings.

View File

@@ -49,14 +49,22 @@ With Slack, you can:
- **Automate agent notifications**: Send real-time updates from your Sim Studio agents to any Slack channel
- **Create webhook endpoints**: Configure Slack bots as webhooks to trigger Sim Studio workflows from Slack activities
- **Enhance agent workflows**: Integrate Slack messaging into your agents to deliver results, alerts, and status updates
- **Create and share Slack canvases**: Programmatically generate collaborative documents (canvases) in Slack channels
- **Read messages from channels**: Retrieve and process recent messages from any Slack channel for monitoring or workflow triggers
In Sim Studio, the Slack integration enables your agents to programmatically send messages to any Slack channel or user as part of their workflows. This allows for powerful automation scenarios such as sending notifications, alerts, updates, and reports directly to your team's communication hub. Your agents can deliver timely information, share results from processes they've completed, or alert team members when attention is needed. This integration bridges the gap between your AI workflows and your team's communication, ensuring everyone stays informed without manual intervention. By connecting Sim Studio with Slack, you can create agents that keep your team updated with relevant information at the right time, enhance collaboration by sharing insights automatically, and reduce the need for manual status updates - all while leveraging your existing Slack workspace where your team already communicates.
In Sim Studio, the Slack integration enables your agents to programmatically interact with Slack in several ways as part of their workflows:
- **Send messages**: Agents can send formatted messages to any Slack channel or user, supporting Slack's mrkdwn syntax for rich formatting.
- **Create canvases**: Agents can create and share Slack canvases (collaborative documents) directly in channels, enabling richer content sharing and documentation.
- **Read messages**: Agents can read recent messages from channels, allowing for monitoring, reporting, or triggering further actions based on channel activity.
This allows for powerful automation scenarios such as sending notifications, alerts, updates, and reports directly to your team's communication hub, sharing structured documents, or monitoring conversations for workflow triggers. Your agents can deliver timely information, share results from processes they've completed, create collaborative documents, or alert team members when attention is needed. This integration bridges the gap between your AI workflows and your team's communication, ensuring everyone stays informed without manual intervention. By connecting Sim Studio with Slack, you can create agents that keep your team updated with relevant information at the right time, enhance collaboration by sharing insights automatically, and reduce the need for manual status updates—all while leveraging your existing Slack workspace where your team already communicates.
{/* MANUAL-CONTENT-END */}
## Usage Instructions
Comprehensive Slack integration with OAuth authentication. Send formatted messages using Slack
Comprehensive Slack integration with OAuth authentication. Send formatted messages using Slack's mrkdwn syntax.

View File

@@ -51,19 +51,26 @@ import { BlockInfoCard } from "@/components/ui/block-info-card"
/>
{/* MANUAL-CONTENT-START:intro */}
[Supabase](https://www.supabase.com/) is an open-source Firebase alternative that provides a suite of tools for building modern applications. It offers a PostgreSQL database, authentication, instant APIs, real-time subscriptions, storage, and edge functions, all within a unified platform.
[Supabase](https://www.supabase.com/) is a powerful open-source backend-as-a-service platform that provides developers with a suite of tools to build, scale, and manage modern applications. Supabase offers a fully managed [PostgreSQL](https://www.postgresql.org/) database, robust authentication, instant RESTful and GraphQL APIs, real-time subscriptions, file storage, and edge functionsall accessible through a unified and developer-friendly interface. Its open-source nature and compatibility with popular frameworks make it a compelling alternative to Firebase, with the added benefit of SQL flexibility and transparency.
With Supabase, you can:
**Why Supabase?**
- **Instant APIs:** Every table and view in your database is instantly available via REST and GraphQL endpoints, making it easy to build data-driven applications without writing custom backend code.
- **Real-time Data:** Supabase enables real-time subscriptions, allowing your apps to react instantly to changes in your database.
- **Authentication & Authorization:** Built-in user management with support for email, OAuth, SSO, and more, plus row-level security for granular access control.
- **Storage:** Securely upload, serve, and manage files with built-in storage that integrates seamlessly with your database.
- **Edge Functions:** Deploy serverless functions close to your users for low-latency custom logic.
- **Manage relational data**: Work with a powerful PostgreSQL database with full SQL capabilities
- **Implement authentication**: Add secure user authentication with multiple providers
- **Create instant APIs**: Generate RESTful APIs automatically based on your database schema
- **Enable real-time updates**: Subscribe to database changes and build reactive applications
- **Store files**: Upload, transform, and serve files with storage buckets
- **Deploy serverless functions**: Run code in response to database changes or HTTP requests
- **Secure your application**: Implement row-level security and manage permissions
**Using Supabase in Sim Studio**
In Sim Studio, the Supabase integration enables your agents to interact with your Supabase projects programmatically. This allows for powerful automation scenarios such as data querying, record creation, user management, and file operations. Your agents can retrieve information from your database, insert new records, update existing data, and leverage Supabase's authentication and storage capabilities as part of their workflows. This integration bridges the gap between your AI workflows and your application's data layer, enabling more sophisticated and data-driven automations. By connecting Sim Studio with Supabase, you can create agents that maintain data consistency across systems, trigger actions based on database changes, perform complex data operations, and build workflows that leverage your application's existing data infrastructure - all without requiring manual intervention or custom code.
Sim Studios Supabase integration makes it effortless to connect your agentic workflows to your Supabase projects. With just a few configuration fields—your Project ID, Table name, and Service Role Secret—you can securely interact with your database directly from your Sim Studio blocks. The integration abstracts away the complexity of API calls, letting you focus on building logic and automations.
**Key benefits of using Supabase in Sim Studio:**
- **No-code/low-code database operations:** Query, insert, update, and delete rows in your Supabase tables without writing SQL or backend code.
- **Flexible querying:** Use [PostgREST filter syntax](https://postgrest.org/en/stable/api.html#operators) to perform advanced queries, including filtering, ordering, and limiting results.
- **Seamless integration:** Easily connect Supabase to other tools and services in your workflow, enabling powerful automations such as syncing data, triggering notifications, or enriching records.
- **Secure and scalable:** All operations use your Supabase Service Role Secret, ensuring secure access to your data with the scalability of a managed cloud platform.
Whether youre building internal tools, automating business processes, or powering production applications, Supabase in Sim Studio provides a fast, reliable, and developer-friendly way to manage your data and backend logic—no infrastructure management required. Simply configure your block, select the operation you need, and let Sim Studio handle the rest.
{/* MANUAL-CONTENT-END */}

View File

@@ -58,7 +58,7 @@ In Sim Studio, the Tavily integration enables your agents to search the web and
## Usage Instructions
Access Tavily
Access Tavily's AI-powered search engine to find relevant information from across the web. Extract and process content from specific URLs with customizable depth options.

View File

@@ -0,0 +1,179 @@
---
title: Wikipedia
description: Search and retrieve content from Wikipedia
---
import { BlockInfoCard } from "@/components/ui/block-info-card"
<BlockInfoCard
type="wikipedia"
color="#000000"
icon={true}
iconSvg={`<svg className="block-icon"
fill='currentColor'
version='1.1'
id='Capa_1'
xmlns='http://www.w3.org/2000/svg'
xmlnsXlink='http://www.w3.org/1999/xlink'
viewBox='0 0 98.05 98.05'
xmlSpace='preserve'
>
<g>
<path
d='M98.023,17.465l-19.584-0.056c-0.004,0.711-0.006,1.563-0.017,2.121c1.664,0.039,5.922,0.822,7.257,4.327L66.92,67.155
c-0.919-2.149-9.643-21.528-10.639-24.02l9.072-18.818c1.873-2.863,5.455-4.709,8.918-4.843l-0.01-1.968L55.42,17.489
c-0.045,0.499,0.001,1.548-0.068,2.069c5.315,0.144,7.215,1.334,5.941,4.508c-2.102,4.776-6.51,13.824-7.372,15.475
c-2.696-5.635-4.41-9.972-7.345-16.064c-1.266-2.823,1.529-3.922,4.485-4.004v-1.981l-21.82-0.067
c0.016,0.93-0.021,1.451-0.021,2.131c3.041,0.046,6.988,0.371,8.562,3.019c2.087,4.063,9.044,20.194,11.149,24.514
c-2.685,5.153-9.207,17.341-11.544,21.913c-3.348-7.43-15.732-36.689-19.232-44.241c-1.304-3.218,3.732-5.077,6.646-5.213
l0.019-2.148L0,17.398c0.005,0.646,0.027,1.71,0.029,2.187c4.025-0.037,9.908,6.573,11.588,10.683
c7.244,16.811,14.719,33.524,21.928,50.349c0.002,0.029,2.256,0.059,2.281,0.008c4.717-9.653,10.229-19.797,15.206-29.56
L63.588,80.64c0.005,0.004,2.082,0.016,2.093,0.007c7.962-18.196,19.892-46.118,23.794-54.933c1.588-3.767,4.245-6.064,8.543-6.194
l0.032-1.956L98.023,17.465z'
/>
</g>
</svg>`}
/>
{/* MANUAL-CONTENT-START:intro */}
[Wikipedia](https://www.wikipedia.org/) is the world's largest free online encyclopedia, offering millions of articles on a vast range of topics, collaboratively written and maintained by volunteers.
With Wikipedia, you can:
- **Search for articles**: Find relevant Wikipedia pages by searching for keywords or topics
- **Get article summaries**: Retrieve concise summaries of Wikipedia pages for quick reference
- **Access full content**: Obtain the complete content of Wikipedia articles for in-depth information
- **Discover random articles**: Explore new topics by retrieving random Wikipedia pages
In Sim Studio, the Wikipedia integration enables your agents to programmatically access and interact with Wikipedia content as part of their workflows. Agents can search for articles, fetch summaries, retrieve full page content, and discover random articles, empowering your automations with up-to-date, reliable information from the world's largest encyclopedia. This integration is ideal for scenarios such as research, content enrichment, fact-checking, and knowledge discovery, allowing your agents to seamlessly incorporate Wikipedia data into their decision-making and task execution processes.
{/* MANUAL-CONTENT-END */}
## Usage Instructions
Access Wikipedia articles, search for pages, get summaries, retrieve full content, and discover random articles from the world's largest encyclopedia.
## Tools
### `wikipedia_summary`
Get a summary and metadata for a specific Wikipedia page.
#### Input
| Parameter | Type | Required | Description |
| --------- | ---- | -------- | ----------- |
| `pageTitle` | string | Yes | Title of the Wikipedia page to get summary for |
#### Output
| Parameter | Type |
| --------- | ---- |
| `summary` | string |
| `title` | string |
| `displaytitle` | string |
| `description` | string |
| `extract` | string |
| `extract_html` | string |
| `thumbnail` | string |
| `originalimage` | string |
| `content_urls` | string |
| `revisions` | string |
| `edit` | string |
| `talk` | string |
### `wikipedia_search`
Search for Wikipedia pages by title or content.
#### Input
| Parameter | Type | Required | Description |
| --------- | ---- | -------- | ----------- |
| `query` | string | Yes | Search query to find Wikipedia pages |
| `searchLimit` | number | No | Maximum number of results to return \(default: 10, max: 50\) |
#### Output
| Parameter | Type |
| --------- | ---- |
| `totalHits` | string |
| `query` | string |
| `searchResults` | string |
### `wikipedia_content`
Get the full HTML content of a Wikipedia page.
#### Input
| Parameter | Type | Required | Description |
| --------- | ---- | -------- | ----------- |
| `pageTitle` | string | Yes | Title of the Wikipedia page to get content for |
#### Output
| Parameter | Type |
| --------- | ---- |
| `content` | string |
| `pageid` | string |
| `html` | string |
| `revision` | string |
| `tid` | string |
| `timestamp` | string |
| `content_model` | string |
| `content_format` | string |
### `wikipedia_random`
Get a random Wikipedia page.
#### Input
| Parameter | Type | Required | Description |
| --------- | ---- | -------- | ----------- |
#### Output
| Parameter | Type |
| --------- | ---- |
| `randomPage` | string |
| `title` | string |
| `displaytitle` | string |
| `description` | string |
| `extract` | string |
| `thumbnail` | string |
| `content_urls` | string |
## Block Configuration
### Input
| Parameter | Type | Required | Description |
| --------- | ---- | -------- | ----------- |
| `operation` | string | Yes | Operation |
### Outputs
| Output | Type | Description |
| ------ | ---- | ----------- |
| `summary` | json | summary output from the block |
| `searchResults` | json | searchResults output from the block |
| `totalHits` | number | totalHits output from the block |
| `content` | json | content output from the block |
| `randomPage` | json | randomPage output from the block |
## Notes
- Category: `tools`
- Type: `wikipedia`

View File

@@ -9,13 +9,23 @@ export function cn(...inputs: ClassValue[]) {
}
/**
* Get the full URL for a video asset stored in Vercel Blob
* Get the full URL for an asset stored in Vercel Blob or local fallback
* - If CDN is configured (NEXT_PUBLIC_BLOB_BASE_URL), uses CDN URL
* - Otherwise falls back to local static assets served from root path
*/
export function getAssetUrl(filename: string) {
const cdnBaseUrl = process.env.NEXT_PUBLIC_BLOB_BASE_URL
if (cdnBaseUrl) {
return `${cdnBaseUrl}/${filename}`
}
return `/${filename}`
}
/**
* Get the full URL for a video asset stored in Vercel Blob or local fallback
* - If CDN is configured (NEXT_PUBLIC_BLOB_BASE_URL), uses CDN URL
* - Otherwise falls back to local static assets served from root path
*/
export function getVideoUrl(filename: string) {
const baseUrl = process.env.NEXT_PUBLIC_BLOB_BASE_URL
if (!baseUrl) {
console.warn('NEXT_PUBLIC_BLOB_BASE_URL not configured, falling back to local path')
return `/${filename}`
}
return `${baseUrl}/${filename}`
return getAssetUrl(filename)
}

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

View File

@@ -1,6 +1,7 @@
'use client'
import { motion } from 'framer-motion'
import { getAssetUrl } from '@/lib/utils'
import { BlogCard } from '@/app/(landing)/components/blog-card'
function Blogs() {
@@ -50,7 +51,7 @@ function Blogs() {
date={new Date('25 April 2025')}
author='Emir Ayaz'
authorRole='Designer'
avatar='/static/sim.png'
avatar={getAssetUrl('static/sim.png')}
type='Agents'
readTime='6'
/>
@@ -61,7 +62,7 @@ function Blogs() {
date={new Date('25 April 2025')}
author='Emir Ayaz'
authorRole='Designer'
avatar='/static/sim.png'
avatar={getAssetUrl('static/sim.png')}
type='Agents'
readTime='6'
/>
@@ -80,10 +81,10 @@ function Blogs() {
date={new Date('25 April 2025')}
author='Emir Ayaz'
authorRole='Designer'
avatar='/static/sim.png'
avatar={getAssetUrl('static/sim.png')}
type='Agents'
readTime='6'
image='/static/hero.png'
image={getAssetUrl('static/hero.png')}
/>
<BlogCard
href='/blog/test'
@@ -91,7 +92,7 @@ function Blogs() {
description="Learn how to create a fully functional AI agent using SimStudio.ai's unified API and workflows."
author='Emir Ayaz'
authorRole='Designer'
avatar='/static/sim.png'
avatar={getAssetUrl('static/sim.png')}
type='Agents'
readTime='6'
/>
@@ -110,7 +111,7 @@ function Blogs() {
date={new Date('25 April 2025')}
author='Emir Ayaz'
authorRole='Designer'
avatar='/static/sim.png'
avatar={getAssetUrl('static/sim.png')}
type='Agents'
readTime='6'
/>
@@ -121,7 +122,7 @@ function Blogs() {
date={new Date('25 April 2025')}
author='Emir Ayaz'
authorRole='Designer'
avatar='/static/sim.png'
avatar={getAssetUrl('static/sim.png')}
type='Functions'
readTime='6'
/>

View File

@@ -1,6 +1,7 @@
'use client'
import { motion } from 'framer-motion'
import { getAssetUrl } from '@/lib/utils'
import useIsMobile from '@/app/(landing)/components/hooks/use-is-mobile'
import { Marquee } from '@/app/(landing)/components/magicui/marquee'
@@ -10,63 +11,63 @@ const X_TESTIMONIALS = [
username: '@GithubProjects',
viewCount: '90.4k',
tweetUrl: 'https://x.com/GithubProjects/status/1906383555707490499',
profileImage: '/twitter/github-projects.jpg',
profileImage: getAssetUrl('twitter/github-projects.jpg'),
},
{
text: 'A very good looking agent workflow builder 🔥 and open source!',
username: '@xyflowdev',
viewCount: '3,246',
tweetUrl: 'https://x.com/xyflowdev/status/1909501499719438670',
profileImage: '/twitter/xyflow.jpg',
profileImage: getAssetUrl('twitter/xyflow.jpg'),
},
{
text: "🚨 BREAKING: This startup just dropped the fastest way to build AI agents.\n\nThis Figma-like canvas to build agents will blow your mind.\n\nHere's why this is the best tool for building AI agents:",
username: '@hasantoxr',
viewCount: '515k',
tweetUrl: 'https://x.com/hasantoxr/status/1912909502036525271',
profileImage: '/twitter/hasan.jpg',
profileImage: getAssetUrl('twitter/hasan.jpg'),
},
{
text: 'omfggggg this is the zapier of agent building\n\ni always believed that building agents and using ai should not be limited to technical people. i think this solves just that\n\nthe fact that this is also open source makes me so optimistic about the future of building with ai :)))\n\ncongrats @karabegemir & @typingwala !!!',
username: '@nizzyabi',
viewCount: '6,269',
tweetUrl: 'https://x.com/nizzyabi/status/1907864421227180368',
profileImage: '/twitter/nizzy.jpg',
profileImage: getAssetUrl('twitter/nizzy.jpg'),
},
{
text: "One of the best products I've seen in the space, and the hustle and grind I've seen from @karabegemir and @typingwala is insane. Sim Studio is positioned to build something game-changing, and there's no better team for the job.\n\nCongrats on the launch 🚀 🎊 great things ahead!",
username: '@firestorm776',
viewCount: '956',
tweetUrl: 'https://x.com/firestorm776/status/1907896097735061598',
profileImage: '/twitter/samarth.jpg',
profileImage: getAssetUrl('twitter/samarth.jpg'),
},
{
text: 'lfgg got access to @simstudioai via @zerodotemail 😎',
username: '@nizzyabi',
viewCount: '1,585',
tweetUrl: 'https://x.com/nizzyabi/status/1910482357821595944',
profileImage: '/twitter/nizzy.jpg',
profileImage: getAssetUrl('twitter/nizzy.jpg'),
},
{
text: 'Feels like we\'re finally getting a "Photoshop moment" for AI devs—visual, intuitive, and fast enough to keep up with ideas mid-flow.',
username: '@syamrajk',
viewCount: '2,643',
tweetUrl: 'https://x.com/syamrajk/status/1912911980110946491',
profileImage: '/twitter/syamrajk.jpg',
profileImage: getAssetUrl('twitter/syamrajk.jpg'),
},
{
text: "🚨 BREAKING: This startup just dropped the fastest way to build AI agents.\n\nThis Figma-like canvas to build agents will blow your mind.\n\nHere's why this is the best tool for building AI agents:",
username: '@lazukars',
viewCount: '47.4k',
tweetUrl: 'https://x.com/lazukars/status/1913136390503600575',
profileImage: '/twitter/lazukars.png',
profileImage: getAssetUrl('twitter/lazukars.png'),
},
{
text: 'The use cases are endless. Great work @simstudioai',
username: '@daniel_zkim',
viewCount: '103',
tweetUrl: 'https://x.com/daniel_zkim/status/1907891273664782708',
profileImage: '/twitter/daniel.jpg',
profileImage: getAssetUrl('twitter/daniel.jpg'),
},
]

View File

@@ -1,6 +1,7 @@
import { eq, sql } from 'drizzle-orm'
import { type NextRequest, NextResponse } from 'next/server'
import { v4 as uuidv4 } from 'uuid'
import { checkServerSideUsageLimits } from '@/lib/billing'
import { isDev } from '@/lib/environment'
import { createLogger } from '@/lib/logs/console/logger'
import { LoggingSession } from '@/lib/logs/execution/logging-session'
@@ -330,6 +331,22 @@ export async function executeWorkflowForChat(
const workflowId = deployment.workflowId
const executionId = uuidv4()
const usageCheck = await checkServerSideUsageLimits(deployment.userId)
if (usageCheck.isExceeded) {
logger.warn(
`[${requestId}] User ${deployment.userId} has exceeded usage limits. Skipping chat execution.`,
{
currentUsage: usageCheck.currentUsage,
limit: usageCheck.limit,
workflowId: deployment.workflowId,
chatId,
}
)
throw new Error(
usageCheck.message || 'Usage limit exceeded. Please upgrade your plan to continue using chat.'
)
}
// Set up logging for chat execution
const loggingSession = new LoggingSession(workflowId, executionId, 'chat', requestId)

View File

@@ -218,8 +218,15 @@ describe('Document By ID API Route', () => {
}),
}
// Mock transaction
mockDbChain.transaction.mockImplementation(async (callback) => {
const mockTx = {
update: vi.fn().mockReturnValue(updateChain),
}
await callback(mockTx)
})
// Mock db operations in sequence
mockDbChain.update.mockReturnValue(updateChain)
mockDbChain.select.mockReturnValue(selectChain)
const req = createMockRequest('PUT', validUpdateData)
@@ -231,7 +238,7 @@ describe('Document By ID API Route', () => {
expect(data.success).toBe(true)
expect(data.data.filename).toBe('updated-document.pdf')
expect(data.data.enabled).toBe(false)
expect(mockDbChain.update).toHaveBeenCalled()
expect(mockDbChain.transaction).toHaveBeenCalled()
expect(mockDbChain.select).toHaveBeenCalled()
})
@@ -298,8 +305,15 @@ describe('Document By ID API Route', () => {
}),
}
// Mock transaction
mockDbChain.transaction.mockImplementation(async (callback) => {
const mockTx = {
update: vi.fn().mockReturnValue(updateChain),
}
await callback(mockTx)
})
// Mock db operations in sequence
mockDbChain.update.mockReturnValue(updateChain)
mockDbChain.select.mockReturnValue(selectChain)
const req = createMockRequest('PUT', { markFailedDueToTimeout: true })
@@ -309,7 +323,7 @@ describe('Document By ID API Route', () => {
expect(response.status).toBe(200)
expect(data.success).toBe(true)
expect(mockDbChain.update).toHaveBeenCalled()
expect(mockDbChain.transaction).toHaveBeenCalled()
expect(updateChain.set).toHaveBeenCalledWith(
expect.objectContaining({
processingStatus: 'failed',
@@ -479,7 +493,9 @@ describe('Document By ID API Route', () => {
document: mockDocument,
knowledgeBase: { id: 'kb-123', userId: 'user-123' },
})
mockDbChain.set.mockRejectedValue(new Error('Database error'))
// Mock transaction to throw an error
mockDbChain.transaction.mockRejectedValue(new Error('Database error'))
const req = createMockRequest('PUT', validUpdateData)
const { PUT } = await import('@/app/api/knowledge/[id]/documents/[documentId]/route')

View File

@@ -2,6 +2,7 @@ import { eq } from 'drizzle-orm'
import { type NextRequest, NextResponse } from 'next/server'
import { z } from 'zod'
import { getSession } from '@/lib/auth'
import { TAG_SLOTS } from '@/lib/constants/knowledge'
import { createLogger } from '@/lib/logs/console/logger'
export const dynamic = 'force-dynamic'
@@ -26,6 +27,14 @@ const UpdateDocumentSchema = z.object({
processingError: z.string().optional(),
markFailedDueToTimeout: z.boolean().optional(),
retryProcessing: z.boolean().optional(),
// Tag fields
tag1: z.string().optional(),
tag2: z.string().optional(),
tag3: z.string().optional(),
tag4: z.string().optional(),
tag5: z.string().optional(),
tag6: z.string().optional(),
tag7: z.string().optional(),
})
export async function GET(
@@ -213,9 +222,36 @@ export async function PUT(
updateData.processingStatus = validatedData.processingStatus
if (validatedData.processingError !== undefined)
updateData.processingError = validatedData.processingError
// Tag field updates
TAG_SLOTS.forEach((slot) => {
if ((validatedData as any)[slot] !== undefined) {
;(updateData as any)[slot] = (validatedData as any)[slot]
}
})
}
await db.update(document).set(updateData).where(eq(document.id, documentId))
await db.transaction(async (tx) => {
// Update the document
await tx.update(document).set(updateData).where(eq(document.id, documentId))
// If any tag fields were updated, also update the embeddings
const hasTagUpdates = TAG_SLOTS.some((field) => (validatedData as any)[field] !== undefined)
if (hasTagUpdates) {
const embeddingUpdateData: Record<string, string | null> = {}
TAG_SLOTS.forEach((field) => {
if ((validatedData as any)[field] !== undefined) {
embeddingUpdateData[field] = (validatedData as any)[field] || null
}
})
await tx
.update(embedding)
.set(embeddingUpdateData)
.where(eq(embedding.documentId, documentId))
}
})
// Fetch the updated document
const updatedDocument = await db

View File

@@ -0,0 +1,367 @@
import { randomUUID } from 'crypto'
import { and, eq, sql } from 'drizzle-orm'
import { type NextRequest, NextResponse } from 'next/server'
import { z } from 'zod'
import { getSession } from '@/lib/auth'
import { MAX_TAG_SLOTS, TAG_SLOTS } from '@/lib/constants/knowledge'
import { createLogger } from '@/lib/logs/console/logger'
import { checkKnowledgeBaseAccess, checkKnowledgeBaseWriteAccess } from '@/app/api/knowledge/utils'
import { db } from '@/db'
import { document, knowledgeBaseTagDefinitions } from '@/db/schema'
export const dynamic = 'force-dynamic'
const logger = createLogger('DocumentTagDefinitionsAPI')
const TagDefinitionSchema = z.object({
tagSlot: z.enum(TAG_SLOTS as [string, ...string[]]),
displayName: z.string().min(1, 'Display name is required').max(100, 'Display name too long'),
fieldType: z.string().default('text'), // Currently only 'text', future: 'date', 'number', 'range'
})
const BulkTagDefinitionsSchema = z.object({
definitions: z
.array(TagDefinitionSchema)
.max(MAX_TAG_SLOTS, `Cannot define more than ${MAX_TAG_SLOTS} tags`),
})
// Helper function to clean up unused tag definitions
async function cleanupUnusedTagDefinitions(knowledgeBaseId: string, requestId: string) {
try {
logger.info(`[${requestId}] Starting cleanup for KB ${knowledgeBaseId}`)
// Get all tag definitions for this KB
const allDefinitions = await db
.select()
.from(knowledgeBaseTagDefinitions)
.where(eq(knowledgeBaseTagDefinitions.knowledgeBaseId, knowledgeBaseId))
logger.info(`[${requestId}] Found ${allDefinitions.length} tag definitions to check`)
if (allDefinitions.length === 0) {
return 0
}
let cleanedCount = 0
// For each tag definition, check if any documents use that tag slot
for (const definition of allDefinitions) {
const slot = definition.tagSlot
// Use raw SQL with proper column name injection
const countResult = await db.execute(sql`
SELECT count(*) as count
FROM document
WHERE knowledge_base_id = ${knowledgeBaseId}
AND ${sql.raw(slot)} IS NOT NULL
AND trim(${sql.raw(slot)}) != ''
`)
const count = Number(countResult[0]?.count) || 0
logger.info(
`[${requestId}] Tag ${definition.displayName} (${slot}): ${count} documents using it`
)
// If count is 0, remove this tag definition
if (count === 0) {
await db
.delete(knowledgeBaseTagDefinitions)
.where(eq(knowledgeBaseTagDefinitions.id, definition.id))
cleanedCount++
logger.info(
`[${requestId}] Removed unused tag definition: ${definition.displayName} (${definition.tagSlot})`
)
}
}
return cleanedCount
} catch (error) {
logger.warn(`[${requestId}] Failed to cleanup unused tag definitions:`, error)
return 0 // Don't fail the main operation if cleanup fails
}
}
// GET /api/knowledge/[id]/documents/[documentId]/tag-definitions - Get tag definitions for a document
export async function GET(
req: NextRequest,
{ params }: { params: Promise<{ id: string; documentId: string }> }
) {
const requestId = randomUUID().slice(0, 8)
const { id: knowledgeBaseId, documentId } = await params
try {
logger.info(`[${requestId}] Getting tag definitions for document ${documentId}`)
const session = await getSession()
if (!session?.user?.id) {
return NextResponse.json({ error: 'Unauthorized' }, { status: 401 })
}
// Check if user has access to the knowledge base
const accessCheck = await checkKnowledgeBaseAccess(knowledgeBaseId, session.user.id)
if (!accessCheck.hasAccess) {
return NextResponse.json({ error: 'Forbidden' }, { status: 403 })
}
// Verify document exists and belongs to the knowledge base
const documentExists = await db
.select({ id: document.id })
.from(document)
.where(and(eq(document.id, documentId), eq(document.knowledgeBaseId, knowledgeBaseId)))
.limit(1)
if (documentExists.length === 0) {
return NextResponse.json({ error: 'Document not found' }, { status: 404 })
}
// Get tag definitions for the knowledge base
const tagDefinitions = await db
.select({
id: knowledgeBaseTagDefinitions.id,
tagSlot: knowledgeBaseTagDefinitions.tagSlot,
displayName: knowledgeBaseTagDefinitions.displayName,
fieldType: knowledgeBaseTagDefinitions.fieldType,
createdAt: knowledgeBaseTagDefinitions.createdAt,
updatedAt: knowledgeBaseTagDefinitions.updatedAt,
})
.from(knowledgeBaseTagDefinitions)
.where(eq(knowledgeBaseTagDefinitions.knowledgeBaseId, knowledgeBaseId))
logger.info(`[${requestId}] Retrieved ${tagDefinitions.length} tag definitions`)
return NextResponse.json({
success: true,
data: tagDefinitions,
})
} catch (error) {
logger.error(`[${requestId}] Error getting tag definitions`, error)
return NextResponse.json({ error: 'Failed to get tag definitions' }, { status: 500 })
}
}
// POST /api/knowledge/[id]/documents/[documentId]/tag-definitions - Create/update tag definitions
export async function POST(
req: NextRequest,
{ params }: { params: Promise<{ id: string; documentId: string }> }
) {
const requestId = randomUUID().slice(0, 8)
const { id: knowledgeBaseId, documentId } = await params
try {
logger.info(`[${requestId}] Creating/updating tag definitions for document ${documentId}`)
const session = await getSession()
if (!session?.user?.id) {
return NextResponse.json({ error: 'Unauthorized' }, { status: 401 })
}
// Check if user has write access to the knowledge base
const accessCheck = await checkKnowledgeBaseWriteAccess(knowledgeBaseId, session.user.id)
if (!accessCheck.hasAccess) {
return NextResponse.json({ error: 'Forbidden' }, { status: 403 })
}
// Verify document exists and belongs to the knowledge base
const documentExists = await db
.select({ id: document.id })
.from(document)
.where(and(eq(document.id, documentId), eq(document.knowledgeBaseId, knowledgeBaseId)))
.limit(1)
if (documentExists.length === 0) {
return NextResponse.json({ error: 'Document not found' }, { status: 404 })
}
let body
try {
body = await req.json()
} catch (error) {
logger.error(`[${requestId}] Failed to parse JSON body:`, error)
return NextResponse.json({ error: 'Invalid JSON in request body' }, { status: 400 })
}
if (!body || typeof body !== 'object') {
logger.error(`[${requestId}] Invalid request body:`, body)
return NextResponse.json(
{ error: 'Request body must be a valid JSON object' },
{ status: 400 }
)
}
const validatedData = BulkTagDefinitionsSchema.parse(body)
// Validate no duplicate tag slots
const tagSlots = validatedData.definitions.map((def) => def.tagSlot)
const uniqueTagSlots = new Set(tagSlots)
if (tagSlots.length !== uniqueTagSlots.size) {
return NextResponse.json({ error: 'Duplicate tag slots not allowed' }, { status: 400 })
}
const now = new Date()
const createdDefinitions: (typeof knowledgeBaseTagDefinitions.$inferSelect)[] = []
// Get existing definitions count before transaction for cleanup check
const existingDefinitions = await db
.select()
.from(knowledgeBaseTagDefinitions)
.where(eq(knowledgeBaseTagDefinitions.knowledgeBaseId, knowledgeBaseId))
// Check if we're trying to create more tag definitions than available slots
const existingTagNames = new Set(existingDefinitions.map((def) => def.displayName))
const trulyNewTags = validatedData.definitions.filter(
(def) => !existingTagNames.has(def.displayName)
)
if (existingDefinitions.length + trulyNewTags.length > MAX_TAG_SLOTS) {
return NextResponse.json(
{
error: `Cannot create ${trulyNewTags.length} new tags. Knowledge base already has ${existingDefinitions.length} tag definitions. Maximum is ${MAX_TAG_SLOTS} total.`,
},
{ status: 400 }
)
}
// Use transaction to ensure consistency
await db.transaction(async (tx) => {
// Create maps for lookups
const existingByName = new Map(existingDefinitions.map((def) => [def.displayName, def]))
const existingBySlot = new Map(existingDefinitions.map((def) => [def.tagSlot, def]))
// Process each new definition
for (const definition of validatedData.definitions) {
const existingByDisplayName = existingByName.get(definition.displayName)
const existingByTagSlot = existingBySlot.get(definition.tagSlot)
if (existingByDisplayName) {
// Update existing definition (same display name)
if (existingByDisplayName.tagSlot !== definition.tagSlot) {
// Slot is changing - check if target slot is available
if (existingByTagSlot && existingByTagSlot.id !== existingByDisplayName.id) {
// Target slot is occupied by a different definition - this is a conflict
// For now, keep the existing slot to avoid constraint violation
logger.warn(
`[${requestId}] Slot conflict for ${definition.displayName}: keeping existing slot ${existingByDisplayName.tagSlot}`
)
createdDefinitions.push(existingByDisplayName)
continue
}
}
await tx
.update(knowledgeBaseTagDefinitions)
.set({
tagSlot: definition.tagSlot,
fieldType: definition.fieldType,
updatedAt: now,
})
.where(eq(knowledgeBaseTagDefinitions.id, existingByDisplayName.id))
createdDefinitions.push({
...existingByDisplayName,
tagSlot: definition.tagSlot,
fieldType: definition.fieldType,
updatedAt: now,
})
} else if (existingByTagSlot) {
// Slot is occupied by a different display name - update it
await tx
.update(knowledgeBaseTagDefinitions)
.set({
displayName: definition.displayName,
fieldType: definition.fieldType,
updatedAt: now,
})
.where(eq(knowledgeBaseTagDefinitions.id, existingByTagSlot.id))
createdDefinitions.push({
...existingByTagSlot,
displayName: definition.displayName,
fieldType: definition.fieldType,
updatedAt: now,
})
} else {
// Create new definition
const newDefinition = {
id: randomUUID(),
knowledgeBaseId,
tagSlot: definition.tagSlot,
displayName: definition.displayName,
fieldType: definition.fieldType,
createdAt: now,
updatedAt: now,
}
await tx.insert(knowledgeBaseTagDefinitions).values(newDefinition)
createdDefinitions.push(newDefinition)
}
}
})
logger.info(`[${requestId}] Created/updated ${createdDefinitions.length} tag definitions`)
return NextResponse.json({
success: true,
data: createdDefinitions,
})
} catch (error) {
if (error instanceof z.ZodError) {
return NextResponse.json(
{ error: 'Invalid request data', details: error.errors },
{ status: 400 }
)
}
logger.error(`[${requestId}] Error creating/updating tag definitions`, error)
return NextResponse.json({ error: 'Failed to create/update tag definitions' }, { status: 500 })
}
}
// DELETE /api/knowledge/[id]/documents/[documentId]/tag-definitions - Delete all tag definitions for a document
export async function DELETE(
req: NextRequest,
{ params }: { params: Promise<{ id: string; documentId: string }> }
) {
const requestId = randomUUID().slice(0, 8)
const { id: knowledgeBaseId, documentId } = await params
const { searchParams } = new URL(req.url)
const action = searchParams.get('action') // 'cleanup' or 'all'
try {
const session = await getSession()
if (!session?.user?.id) {
return NextResponse.json({ error: 'Unauthorized' }, { status: 401 })
}
// Check if user has write access to the knowledge base
const accessCheck = await checkKnowledgeBaseWriteAccess(knowledgeBaseId, session.user.id)
if (!accessCheck.hasAccess) {
return NextResponse.json({ error: 'Forbidden' }, { status: 403 })
}
if (action === 'cleanup') {
// Just run cleanup
logger.info(`[${requestId}] Running cleanup for KB ${knowledgeBaseId}`)
const cleanedUpCount = await cleanupUnusedTagDefinitions(knowledgeBaseId, requestId)
return NextResponse.json({
success: true,
data: { cleanedUp: cleanedUpCount },
})
}
// Delete all tag definitions (original behavior)
logger.info(`[${requestId}] Deleting all tag definitions for KB ${knowledgeBaseId}`)
const result = await db
.delete(knowledgeBaseTagDefinitions)
.where(eq(knowledgeBaseTagDefinitions.knowledgeBaseId, knowledgeBaseId))
return NextResponse.json({
success: true,
message: 'Tag definitions deleted successfully',
})
} catch (error) {
logger.error(`[${requestId}] Error with tag definitions operation`, error)
return NextResponse.json({ error: 'Failed to process tag definitions' }, { status: 500 })
}
}

View File

@@ -3,6 +3,7 @@ import { and, desc, eq, inArray, isNull, sql } from 'drizzle-orm'
import { type NextRequest, NextResponse } from 'next/server'
import { z } from 'zod'
import { getSession } from '@/lib/auth'
import { TAG_SLOTS } from '@/lib/constants/knowledge'
import { createLogger } from '@/lib/logs/console/logger'
import { getUserId } from '@/app/api/auth/oauth/utils'
import {
@@ -11,7 +12,7 @@ import {
processDocumentAsync,
} from '@/app/api/knowledge/utils'
import { db } from '@/db'
import { document } from '@/db/schema'
import { document, knowledgeBaseTagDefinitions } from '@/db/schema'
const logger = createLogger('DocumentsAPI')
@@ -22,6 +23,88 @@ const PROCESSING_CONFIG = {
delayBetweenDocuments: 500,
}
// Helper function to process structured document tags
async function processDocumentTags(
knowledgeBaseId: string,
tagData: Array<{ tagName: string; fieldType: string; value: string }>,
requestId: string
): Promise<Record<string, string | null>> {
const result: Record<string, string | null> = {}
// Initialize all tag slots to null
TAG_SLOTS.forEach((slot) => {
result[slot] = null
})
if (!Array.isArray(tagData) || tagData.length === 0) {
return result
}
try {
// Get existing tag definitions
const existingDefinitions = await db
.select()
.from(knowledgeBaseTagDefinitions)
.where(eq(knowledgeBaseTagDefinitions.knowledgeBaseId, knowledgeBaseId))
const existingByName = new Map(existingDefinitions.map((def) => [def.displayName, def]))
const existingBySlot = new Map(existingDefinitions.map((def) => [def.tagSlot, def]))
// Process each tag
for (const tag of tagData) {
if (!tag.tagName?.trim() || !tag.value?.trim()) continue
const tagName = tag.tagName.trim()
const fieldType = tag.fieldType || 'text'
const value = tag.value.trim()
let targetSlot: string | null = null
// Check if tag definition already exists
const existingDef = existingByName.get(tagName)
if (existingDef) {
targetSlot = existingDef.tagSlot
} else {
// Find next available slot
for (const slot of TAG_SLOTS) {
if (!existingBySlot.has(slot)) {
targetSlot = slot
break
}
}
// Create new tag definition if we have a slot
if (targetSlot) {
const newDefinition = {
id: crypto.randomUUID(),
knowledgeBaseId,
tagSlot: targetSlot as any,
displayName: tagName,
fieldType,
createdAt: new Date(),
updatedAt: new Date(),
}
await db.insert(knowledgeBaseTagDefinitions).values(newDefinition)
existingBySlot.set(targetSlot as any, newDefinition)
logger.info(`[${requestId}] Created tag definition: ${tagName} -> ${targetSlot}`)
}
}
// Assign value to the slot
if (targetSlot) {
result[targetSlot] = value
}
}
return result
} catch (error) {
logger.error(`[${requestId}] Error processing document tags:`, error)
return result
}
}
async function processDocumentsWithConcurrencyControl(
createdDocuments: Array<{
documentId: string
@@ -158,7 +241,7 @@ const CreateDocumentSchema = z.object({
fileUrl: z.string().url('File URL must be valid'),
fileSize: z.number().min(1, 'File size must be greater than 0'),
mimeType: z.string().min(1, 'MIME type is required'),
// Document tags for filtering
// Document tags for filtering (legacy format)
tag1: z.string().optional(),
tag2: z.string().optional(),
tag3: z.string().optional(),
@@ -166,6 +249,8 @@ const CreateDocumentSchema = z.object({
tag5: z.string().optional(),
tag6: z.string().optional(),
tag7: z.string().optional(),
// Structured tag data (new format)
documentTagsData: z.string().optional(),
})
const BulkCreateDocumentsSchema = z.object({
@@ -350,6 +435,31 @@ export async function POST(req: NextRequest, { params }: { params: Promise<{ id:
const documentId = crypto.randomUUID()
const now = new Date()
// Process documentTagsData if provided (for knowledge base block)
let processedTags: Record<string, string | null> = {
tag1: null,
tag2: null,
tag3: null,
tag4: null,
tag5: null,
tag6: null,
tag7: null,
}
if (docData.documentTagsData) {
try {
const tagData = JSON.parse(docData.documentTagsData)
if (Array.isArray(tagData)) {
processedTags = await processDocumentTags(knowledgeBaseId, tagData, requestId)
}
} catch (error) {
logger.warn(
`[${requestId}] Failed to parse documentTagsData for bulk document:`,
error
)
}
}
const newDocument = {
id: documentId,
knowledgeBaseId,
@@ -363,14 +473,14 @@ export async function POST(req: NextRequest, { params }: { params: Promise<{ id:
processingStatus: 'pending' as const,
enabled: true,
uploadedAt: now,
// Include tags from upload
tag1: docData.tag1 || null,
tag2: docData.tag2 || null,
tag3: docData.tag3 || null,
tag4: docData.tag4 || null,
tag5: docData.tag5 || null,
tag6: docData.tag6 || null,
tag7: docData.tag7 || null,
// Use processed tags if available, otherwise fall back to individual tag fields
tag1: processedTags.tag1 || docData.tag1 || null,
tag2: processedTags.tag2 || docData.tag2 || null,
tag3: processedTags.tag3 || docData.tag3 || null,
tag4: processedTags.tag4 || docData.tag4 || null,
tag5: processedTags.tag5 || docData.tag5 || null,
tag6: processedTags.tag6 || docData.tag6 || null,
tag7: processedTags.tag7 || docData.tag7 || null,
}
await tx.insert(document).values(newDocument)
@@ -433,6 +543,29 @@ export async function POST(req: NextRequest, { params }: { params: Promise<{ id:
const documentId = crypto.randomUUID()
const now = new Date()
// Process structured tag data if provided
let processedTags: Record<string, string | null> = {
tag1: validatedData.tag1 || null,
tag2: validatedData.tag2 || null,
tag3: validatedData.tag3 || null,
tag4: validatedData.tag4 || null,
tag5: validatedData.tag5 || null,
tag6: validatedData.tag6 || null,
tag7: validatedData.tag7 || null,
}
if (validatedData.documentTagsData) {
try {
const tagData = JSON.parse(validatedData.documentTagsData)
if (Array.isArray(tagData)) {
// Process structured tag data and create tag definitions
processedTags = await processDocumentTags(knowledgeBaseId, tagData, requestId)
}
} catch (error) {
logger.warn(`[${requestId}] Failed to parse documentTagsData:`, error)
}
}
const newDocument = {
id: documentId,
knowledgeBaseId,
@@ -445,14 +578,7 @@ export async function POST(req: NextRequest, { params }: { params: Promise<{ id:
characterCount: 0,
enabled: true,
uploadedAt: now,
// Include tags from upload
tag1: validatedData.tag1 || null,
tag2: validatedData.tag2 || null,
tag3: validatedData.tag3 || null,
tag4: validatedData.tag4 || null,
tag5: validatedData.tag5 || null,
tag6: validatedData.tag6 || null,
tag7: validatedData.tag7 || null,
...processedTags,
}
await db.insert(document).values(newDocument)

View File

@@ -0,0 +1,57 @@
import { randomUUID } from 'crypto'
import { eq } from 'drizzle-orm'
import { type NextRequest, NextResponse } from 'next/server'
import { getSession } from '@/lib/auth'
import { createLogger } from '@/lib/logs/console/logger'
import { checkKnowledgeBaseAccess } from '@/app/api/knowledge/utils'
import { db } from '@/db'
import { knowledgeBaseTagDefinitions } from '@/db/schema'
export const dynamic = 'force-dynamic'
const logger = createLogger('KnowledgeBaseTagDefinitionsAPI')
// GET /api/knowledge/[id]/tag-definitions - Get all tag definitions for a knowledge base
export async function GET(req: NextRequest, { params }: { params: Promise<{ id: string }> }) {
const requestId = randomUUID().slice(0, 8)
const { id: knowledgeBaseId } = await params
try {
logger.info(`[${requestId}] Getting tag definitions for knowledge base ${knowledgeBaseId}`)
const session = await getSession()
if (!session?.user?.id) {
return NextResponse.json({ error: 'Unauthorized' }, { status: 401 })
}
// Check if user has access to the knowledge base
const accessCheck = await checkKnowledgeBaseAccess(knowledgeBaseId, session.user.id)
if (!accessCheck.hasAccess) {
return NextResponse.json({ error: 'Forbidden' }, { status: 403 })
}
// Get tag definitions for the knowledge base
const tagDefinitions = await db
.select({
id: knowledgeBaseTagDefinitions.id,
tagSlot: knowledgeBaseTagDefinitions.tagSlot,
displayName: knowledgeBaseTagDefinitions.displayName,
fieldType: knowledgeBaseTagDefinitions.fieldType,
createdAt: knowledgeBaseTagDefinitions.createdAt,
updatedAt: knowledgeBaseTagDefinitions.updatedAt,
})
.from(knowledgeBaseTagDefinitions)
.where(eq(knowledgeBaseTagDefinitions.knowledgeBaseId, knowledgeBaseId))
.orderBy(knowledgeBaseTagDefinitions.tagSlot)
logger.info(`[${requestId}] Retrieved ${tagDefinitions.length} tag definitions`)
return NextResponse.json({
success: true,
data: tagDefinitions,
})
} catch (error) {
logger.error(`[${requestId}] Error getting tag definitions`, error)
return NextResponse.json({ error: 'Failed to get tag definitions' }, { status: 500 })
}
}

View File

@@ -1,6 +1,7 @@
import { and, eq, inArray, sql } from 'drizzle-orm'
import { type NextRequest, NextResponse } from 'next/server'
import { z } from 'zod'
import { TAG_SLOTS } from '@/lib/constants/knowledge'
import { retryWithExponentialBackoff } from '@/lib/documents/utils'
import { env } from '@/lib/env'
import { createLogger } from '@/lib/logs/console/logger'
@@ -8,31 +9,50 @@ import { estimateTokenCount } from '@/lib/tokenization/estimators'
import { getUserId } from '@/app/api/auth/oauth/utils'
import { checkKnowledgeBaseAccess } from '@/app/api/knowledge/utils'
import { db } from '@/db'
import { embedding } from '@/db/schema'
import { embedding, knowledgeBaseTagDefinitions } from '@/db/schema'
import { calculateCost } from '@/providers/utils'
const logger = createLogger('VectorSearchAPI')
function getTagFilters(filters: Record<string, string>, embedding: any) {
return Object.entries(filters).map(([key, value]) => {
switch (key) {
case 'tag1':
return sql`LOWER(${embedding.tag1}) = LOWER(${value})`
case 'tag2':
return sql`LOWER(${embedding.tag2}) = LOWER(${value})`
case 'tag3':
return sql`LOWER(${embedding.tag3}) = LOWER(${value})`
case 'tag4':
return sql`LOWER(${embedding.tag4}) = LOWER(${value})`
case 'tag5':
return sql`LOWER(${embedding.tag5}) = LOWER(${value})`
case 'tag6':
return sql`LOWER(${embedding.tag6}) = LOWER(${value})`
case 'tag7':
return sql`LOWER(${embedding.tag7}) = LOWER(${value})`
default:
return sql`1=1` // No-op for unknown keys
// Handle OR logic within same tag
const values = value.includes('|OR|') ? value.split('|OR|') : [value]
logger.debug(`[getTagFilters] Processing ${key}="${value}" -> values:`, values)
const getColumnForKey = (key: string) => {
switch (key) {
case 'tag1':
return embedding.tag1
case 'tag2':
return embedding.tag2
case 'tag3':
return embedding.tag3
case 'tag4':
return embedding.tag4
case 'tag5':
return embedding.tag5
case 'tag6':
return embedding.tag6
case 'tag7':
return embedding.tag7
default:
return null
}
}
const column = getColumnForKey(key)
if (!column) return sql`1=1` // No-op for unknown keys
if (values.length === 1) {
// Single value - simple equality
logger.debug(`[getTagFilters] Single value filter: ${key} = ${values[0]}`)
return sql`LOWER(${column}) = LOWER(${values[0]})`
}
// Multiple values - OR logic
logger.debug(`[getTagFilters] OR filter: ${key} IN (${values.join(', ')})`)
const orConditions = values.map((v) => sql`LOWER(${column}) = LOWER(${v})`)
return sql`(${sql.join(orConditions, sql` OR `)})`
})
}
@@ -53,17 +73,7 @@ const VectorSearchSchema = z.object({
]),
query: z.string().min(1, 'Search query is required'),
topK: z.number().min(1).max(100).default(10),
filters: z
.object({
tag1: z.string().optional(),
tag2: z.string().optional(),
tag3: z.string().optional(),
tag4: z.string().optional(),
tag5: z.string().optional(),
tag6: z.string().optional(),
tag7: z.string().optional(),
})
.optional(),
filters: z.record(z.string()).optional(), // Allow dynamic filter keys (display names)
})
async function generateSearchEmbedding(query: string): Promise<number[]> {
@@ -187,6 +197,7 @@ async function executeSingleQuery(
distanceThreshold: number,
filters?: Record<string, string>
) {
logger.debug(`[executeSingleQuery] Called with filters:`, filters)
return await db
.select({
id: embedding.id,
@@ -201,6 +212,7 @@ async function executeSingleQuery(
tag6: embedding.tag6,
tag7: embedding.tag7,
distance: sql<number>`${embedding.embedding} <=> ${queryVector}::vector`.as('distance'),
knowledgeBaseId: embedding.knowledgeBaseId,
})
.from(embedding)
.where(
@@ -208,28 +220,7 @@ async function executeSingleQuery(
inArray(embedding.knowledgeBaseId, knowledgeBaseIds),
eq(embedding.enabled, true),
sql`${embedding.embedding} <=> ${queryVector}::vector < ${distanceThreshold}`,
...(filters
? Object.entries(filters).map(([key, value]) => {
switch (key) {
case 'tag1':
return sql`LOWER(${embedding.tag1}) = LOWER(${value})`
case 'tag2':
return sql`LOWER(${embedding.tag2}) = LOWER(${value})`
case 'tag3':
return sql`LOWER(${embedding.tag3}) = LOWER(${value})`
case 'tag4':
return sql`LOWER(${embedding.tag4}) = LOWER(${value})`
case 'tag5':
return sql`LOWER(${embedding.tag5}) = LOWER(${value})`
case 'tag6':
return sql`LOWER(${embedding.tag6}) = LOWER(${value})`
case 'tag7':
return sql`LOWER(${embedding.tag7}) = LOWER(${value})`
default:
return sql`1=1` // No-op for unknown keys
}
})
: [])
...(filters ? getTagFilters(filters, embedding) : [])
)
)
.orderBy(sql`${embedding.embedding} <=> ${queryVector}::vector`)
@@ -271,6 +262,54 @@ export async function POST(request: NextRequest) {
}
}
// Map display names to tag slots for filtering
let mappedFilters: Record<string, string> = {}
if (validatedData.filters && accessibleKbIds.length > 0) {
try {
// Fetch tag definitions for the first accessible KB (since we're using single KB now)
const kbId = accessibleKbIds[0]
const tagDefs = await db
.select({
tagSlot: knowledgeBaseTagDefinitions.tagSlot,
displayName: knowledgeBaseTagDefinitions.displayName,
})
.from(knowledgeBaseTagDefinitions)
.where(eq(knowledgeBaseTagDefinitions.knowledgeBaseId, kbId))
logger.debug(`[${requestId}] Found tag definitions:`, tagDefs)
logger.debug(`[${requestId}] Original filters:`, validatedData.filters)
// Create mapping from display name to tag slot
const displayNameToSlot: Record<string, string> = {}
tagDefs.forEach((def) => {
displayNameToSlot[def.displayName] = def.tagSlot
})
// Map the filters and handle OR logic
Object.entries(validatedData.filters).forEach(([key, value]) => {
if (value) {
const tagSlot = displayNameToSlot[key] || key // Fallback to key if no mapping found
// Check if this is an OR filter (contains |OR| separator)
if (value.includes('|OR|')) {
logger.debug(
`[${requestId}] OR filter detected: "${key}" -> "${tagSlot}" = "${value}"`
)
}
mappedFilters[tagSlot] = value
logger.debug(`[${requestId}] Mapped filter: "${key}" -> "${tagSlot}" = "${value}"`)
}
})
logger.debug(`[${requestId}] Final mapped filters:`, mappedFilters)
} catch (error) {
logger.error(`[${requestId}] Filter mapping error:`, error)
// If mapping fails, use original filters
mappedFilters = validatedData.filters
}
}
if (accessibleKbIds.length === 0) {
return NextResponse.json(
{ error: 'Knowledge base not found or access denied' },
@@ -299,22 +338,24 @@ export async function POST(request: NextRequest) {
if (strategy.useParallel) {
// Execute parallel queries for better performance with many KBs
logger.debug(`[${requestId}] Executing parallel queries with filters:`, mappedFilters)
const parallelResults = await executeParallelQueries(
accessibleKbIds,
queryVector,
validatedData.topK,
strategy.distanceThreshold,
validatedData.filters
mappedFilters
)
results = mergeAndRankResults(parallelResults, validatedData.topK)
} else {
// Execute single optimized query for fewer KBs
logger.debug(`[${requestId}] Executing single query with filters:`, mappedFilters)
results = await executeSingleQuery(
accessibleKbIds,
queryVector,
validatedData.topK,
strategy.distanceThreshold,
validatedData.filters
mappedFilters
)
}
@@ -331,23 +372,64 @@ export async function POST(request: NextRequest) {
// Continue without cost information rather than failing the search
}
// Fetch tag definitions for display name mapping (reuse the same fetch from filtering)
const tagDefinitionsMap: Record<string, Record<string, string>> = {}
for (const kbId of accessibleKbIds) {
try {
const tagDefs = await db
.select({
tagSlot: knowledgeBaseTagDefinitions.tagSlot,
displayName: knowledgeBaseTagDefinitions.displayName,
})
.from(knowledgeBaseTagDefinitions)
.where(eq(knowledgeBaseTagDefinitions.knowledgeBaseId, kbId))
tagDefinitionsMap[kbId] = {}
tagDefs.forEach((def) => {
tagDefinitionsMap[kbId][def.tagSlot] = def.displayName
})
logger.debug(
`[${requestId}] Display mapping - KB ${kbId} tag definitions:`,
tagDefinitionsMap[kbId]
)
} catch (error) {
logger.warn(`[${requestId}] Failed to fetch tag definitions for display mapping:`, error)
tagDefinitionsMap[kbId] = {}
}
}
return NextResponse.json({
success: true,
data: {
results: results.map((result) => ({
id: result.id,
content: result.content,
documentId: result.documentId,
chunkIndex: result.chunkIndex,
tag1: result.tag1,
tag2: result.tag2,
tag3: result.tag3,
tag4: result.tag4,
tag5: result.tag5,
tag6: result.tag6,
tag7: result.tag7,
similarity: 1 - result.distance,
})),
results: results.map((result) => {
const kbTagMap = tagDefinitionsMap[result.knowledgeBaseId] || {}
logger.debug(
`[${requestId}] Result KB: ${result.knowledgeBaseId}, available mappings:`,
kbTagMap
)
// Create tags object with display names
const tags: Record<string, any> = {}
TAG_SLOTS.forEach((slot) => {
if (result[slot]) {
const displayName = kbTagMap[slot] || slot
logger.debug(
`[${requestId}] Mapping ${slot}="${result[slot]}" -> "${displayName}"="${result[slot]}"`
)
tags[displayName] = result[slot]
}
})
return {
id: result.id,
content: result.content,
documentId: result.documentId,
chunkIndex: result.chunkIndex,
tags, // Clean display name mapped tags
similarity: 1 - result.distance,
}
}),
query: validatedData.query,
knowledgeBaseIds: accessibleKbIds,
knowledgeBaseId: accessibleKbIds[0],

View File

@@ -1,7 +1,8 @@
import { and, eq } from 'drizzle-orm'
import { eq } from 'drizzle-orm'
import { type NextRequest, NextResponse } from 'next/server'
import { getSession } from '@/lib/auth'
import { createLogger } from '@/lib/logs/console/logger'
import { getUserEntityPermissions } from '@/lib/permissions/utils'
import { db } from '@/db'
import { webhook, workflow } from '@/db/schema'
@@ -29,11 +30,13 @@ export async function GET(request: NextRequest, { params }: { params: Promise<{
workflow: {
id: workflow.id,
name: workflow.name,
userId: workflow.userId,
workspaceId: workflow.workspaceId,
},
})
.from(webhook)
.innerJoin(workflow, eq(webhook.workflowId, workflow.id))
.where(and(eq(webhook.id, id), eq(workflow.userId, session.user.id)))
.where(eq(webhook.id, id))
.limit(1)
if (webhooks.length === 0) {
@@ -41,6 +44,33 @@ export async function GET(request: NextRequest, { params }: { params: Promise<{
return NextResponse.json({ error: 'Webhook not found' }, { status: 404 })
}
const webhookData = webhooks[0]
// Check if user has permission to access this webhook
let hasAccess = false
// Case 1: User owns the workflow
if (webhookData.workflow.userId === session.user.id) {
hasAccess = true
}
// Case 2: Workflow belongs to a workspace and user has any permission
if (!hasAccess && webhookData.workflow.workspaceId) {
const userPermission = await getUserEntityPermissions(
session.user.id,
'workspace',
webhookData.workflow.workspaceId
)
if (userPermission !== null) {
hasAccess = true
}
}
if (!hasAccess) {
logger.warn(`[${requestId}] User ${session.user.id} denied access to webhook: ${id}`)
return NextResponse.json({ error: 'Access denied' }, { status: 403 })
}
logger.info(`[${requestId}] Successfully retrieved webhook: ${id}`)
return NextResponse.json({ webhook: webhooks[0] }, { status: 200 })
} catch (error) {
@@ -66,13 +96,14 @@ export async function PATCH(request: NextRequest, { params }: { params: Promise<
const body = await request.json()
const { path, provider, providerConfig, isActive } = body
// Find the webhook and check ownership
// Find the webhook and check permissions
const webhooks = await db
.select({
webhook: webhook,
workflow: {
id: workflow.id,
userId: workflow.userId,
workspaceId: workflow.workspaceId,
},
})
.from(webhook)
@@ -85,9 +116,33 @@ export async function PATCH(request: NextRequest, { params }: { params: Promise<
return NextResponse.json({ error: 'Webhook not found' }, { status: 404 })
}
if (webhooks[0].workflow.userId !== session.user.id) {
logger.warn(`[${requestId}] Unauthorized webhook update attempt for webhook: ${id}`)
return NextResponse.json({ error: 'Unauthorized' }, { status: 403 })
const webhookData = webhooks[0]
// Check if user has permission to modify this webhook
let canModify = false
// Case 1: User owns the workflow
if (webhookData.workflow.userId === session.user.id) {
canModify = true
}
// Case 2: Workflow belongs to a workspace and user has write or admin permission
if (!canModify && webhookData.workflow.workspaceId) {
const userPermission = await getUserEntityPermissions(
session.user.id,
'workspace',
webhookData.workflow.workspaceId
)
if (userPermission === 'write' || userPermission === 'admin') {
canModify = true
}
}
if (!canModify) {
logger.warn(
`[${requestId}] User ${session.user.id} denied permission to modify webhook: ${id}`
)
return NextResponse.json({ error: 'Access denied' }, { status: 403 })
}
logger.debug(`[${requestId}] Updating webhook properties`, {
@@ -136,13 +191,14 @@ export async function DELETE(
return NextResponse.json({ error: 'Unauthorized' }, { status: 401 })
}
// Find the webhook and check ownership
// Find the webhook and check permissions
const webhooks = await db
.select({
webhook: webhook,
workflow: {
id: workflow.id,
userId: workflow.userId,
workspaceId: workflow.workspaceId,
},
})
.from(webhook)
@@ -155,12 +211,36 @@ export async function DELETE(
return NextResponse.json({ error: 'Webhook not found' }, { status: 404 })
}
if (webhooks[0].workflow.userId !== session.user.id) {
logger.warn(`[${requestId}] Unauthorized webhook deletion attempt for webhook: ${id}`)
return NextResponse.json({ error: 'Unauthorized' }, { status: 403 })
const webhookData = webhooks[0]
// Check if user has permission to delete this webhook
let canDelete = false
// Case 1: User owns the workflow
if (webhookData.workflow.userId === session.user.id) {
canDelete = true
}
const foundWebhook = webhooks[0].webhook
// Case 2: Workflow belongs to a workspace and user has write or admin permission
if (!canDelete && webhookData.workflow.workspaceId) {
const userPermission = await getUserEntityPermissions(
session.user.id,
'workspace',
webhookData.workflow.workspaceId
)
if (userPermission === 'write' || userPermission === 'admin') {
canDelete = true
}
}
if (!canDelete) {
logger.warn(
`[${requestId}] User ${session.user.id} denied permission to delete webhook: ${id}`
)
return NextResponse.json({ error: 'Access denied' }, { status: 403 })
}
const foundWebhook = webhookData.webhook
// If it's a Telegram webhook, delete it from Telegram first
if (foundWebhook.provider === 'telegram') {

View File

@@ -4,6 +4,7 @@ import { type NextRequest, NextResponse } from 'next/server'
import { getSession } from '@/lib/auth'
import { env } from '@/lib/env'
import { createLogger } from '@/lib/logs/console/logger'
import { getUserEntityPermissions } from '@/lib/permissions/utils'
import { getOAuthToken } from '@/app/api/auth/oauth/utils'
import { db } from '@/db'
import { webhook, workflow } from '@/db/schema'
@@ -94,18 +95,51 @@ export async function POST(request: NextRequest) {
return NextResponse.json({ error: 'Missing required fields' }, { status: 400 })
}
// Check if the workflow belongs to the user
const workflows = await db
.select({ id: workflow.id }) // Select only necessary field
// Check if the workflow exists and user has permission to modify it
const workflowData = await db
.select({
id: workflow.id,
userId: workflow.userId,
workspaceId: workflow.workspaceId,
})
.from(workflow)
.where(and(eq(workflow.id, workflowId), eq(workflow.userId, userId)))
.where(eq(workflow.id, workflowId))
.limit(1)
if (workflows.length === 0) {
logger.warn(`[${requestId}] Workflow not found or not owned by user: ${workflowId}`)
if (workflowData.length === 0) {
logger.warn(`[${requestId}] Workflow not found: ${workflowId}`)
return NextResponse.json({ error: 'Workflow not found' }, { status: 404 })
}
const workflowRecord = workflowData[0]
// Check if user has permission to modify this workflow
let canModify = false
// Case 1: User owns the workflow
if (workflowRecord.userId === userId) {
canModify = true
}
// Case 2: Workflow belongs to a workspace and user has write or admin permission
if (!canModify && workflowRecord.workspaceId) {
const userPermission = await getUserEntityPermissions(
userId,
'workspace',
workflowRecord.workspaceId
)
if (userPermission === 'write' || userPermission === 'admin') {
canModify = true
}
}
if (!canModify) {
logger.warn(
`[${requestId}] User ${userId} denied permission to modify webhook for workflow ${workflowId}`
)
return NextResponse.json({ error: 'Access denied' }, { status: 403 })
}
// Check if a webhook with the same path already exists
const existingWebhooks = await db
.select({ id: webhook.id, workflowId: webhook.workflowId })

View File

@@ -284,219 +284,10 @@ describe('Webhook Trigger API Route', () => {
expect(text).toMatch(/not found/i) // Response should contain "not found" message
})
/**
* Test duplicate webhook request handling
* Verifies that duplicate requests are detected and not processed multiple times
*/
it('should handle duplicate webhook requests', async () => {
// Set up duplicate detection
hasProcessedMessageMock.mockResolvedValue(true) // Simulate duplicate
processGenericDeduplicationMock.mockResolvedValue(
new Response('Duplicate request', { status: 200 })
)
// Configure DB mock to return a webhook and workflow
const { db } = await import('@/db')
const limitMock = vi.fn().mockReturnValue([
{
webhook: {
id: 'webhook-id',
path: 'test-path',
isActive: true,
provider: 'generic', // Not Airtable to test standard path
workflowId: 'workflow-id',
providerConfig: {},
},
workflow: {
id: 'workflow-id',
userId: 'user-id',
},
},
])
const whereMock = vi.fn().mockReturnValue({ limit: limitMock })
const innerJoinMock = vi.fn().mockReturnValue({ where: whereMock })
const fromMock = vi.fn().mockReturnValue({ innerJoin: innerJoinMock })
// @ts-ignore - mocking the query chain
db.select.mockReturnValue({ from: fromMock })
// Create a mock request
const req = createMockRequest('POST', { event: 'test' })
// Mock the path param
const params = Promise.resolve({ path: 'test-path' })
// Import the handler after mocks are set up
const { POST } = await import('@/app/api/webhooks/trigger/[path]/route')
// Call the handler
const response = await POST(req, { params })
// Expect 200 response for duplicate
expect(response.status).toBe(200)
// Verify response text indicates duplication
const text = await response.text()
expect(text).toMatch(/duplicate|received/i) // Response might be "Duplicate message" or "Request received"
})
/**
* Test Slack-specific webhook handling
* Verifies that Slack signature verification is performed
*/
// TODO: Fix failing test - returns 500 instead of 200
// it('should handle Slack webhooks with signature verification', async () => { ... })
/**
* Test error handling during webhook execution
*/
it('should handle errors during workflow execution', async () => {
// Mock the setTimeout to be faster for testing
// @ts-ignore - Replace global setTimeout for this test
global.setTimeout = vi.fn((callback) => {
callback() // Execute immediately
return 123 // Return a timer ID
})
// Set up error handling mocks
processWebhookMock.mockImplementation(() => {
throw new Error('Webhook execution failed')
})
executeMock.mockRejectedValue(new Error('Webhook execution failed'))
// Configure DB mock to return a webhook and workflow
const { db } = await import('@/db')
const limitMock = vi.fn().mockReturnValue([
{
webhook: {
id: 'webhook-id',
path: 'test-path',
isActive: true,
provider: 'generic', // Not Airtable to ensure we use the timeout path
workflowId: 'workflow-id',
providerConfig: {},
},
workflow: {
id: 'workflow-id',
userId: 'user-id',
},
},
])
const whereMock = vi.fn().mockReturnValue({ limit: limitMock })
const innerJoinMock = vi.fn().mockReturnValue({ where: whereMock })
const fromMock = vi.fn().mockReturnValue({ innerJoin: innerJoinMock })
// @ts-ignore - mocking the query chain
db.select.mockReturnValue({ from: fromMock })
// Create a mock request
const req = createMockRequest('POST', { event: 'test' })
// Mock the path param
const params = Promise.resolve({ path: 'test-path' })
// Import the handler after mocks are set up
const { POST } = await import('@/app/api/webhooks/trigger/[path]/route')
// Call the handler
const response = await POST(req, { params })
// Verify response exists and check status code
// For non-Airtable webhooks, we expect 200 from the timeout response
expect(response).toBeDefined()
expect(response.status).toBe(200)
// Verify response text
const text = await response.text()
expect(text).toMatch(/received|processing/i)
})
/**
* Test Airtable webhook specific handling
* Verifies that Airtable webhooks use the synchronous processing path
*/
it('should handle Airtable webhooks synchronously', async () => {
// Create webhook payload for Airtable
const airtablePayload = {
base: {
id: 'appn9RltLQQMsquyL',
},
webhook: {
id: 'achpbXeBqNLsRFAnD',
},
timestamp: new Date().toISOString(),
}
// Reset fetch and process mock
fetchAndProcessAirtablePayloadsMock.mockResolvedValue(undefined)
// Configure DB mock to return an Airtable webhook
const { db } = await import('@/db')
const limitMock = vi.fn().mockReturnValue([
{
webhook: {
id: 'airtable-webhook-id',
path: 'airtable-path',
isActive: true,
provider: 'airtable', // Set provider to airtable to test that path
workflowId: 'workflow-id',
providerConfig: {
baseId: 'appn9RltLQQMsquyL',
externalId: 'achpbXeBqNLsRFAnD',
},
},
workflow: {
id: 'workflow-id',
userId: 'user-id',
},
},
])
const whereMock = vi.fn().mockReturnValue({ limit: limitMock })
const innerJoinMock = vi.fn().mockReturnValue({ where: whereMock })
const fromMock = vi.fn().mockReturnValue({ innerJoin: innerJoinMock })
// Configure db.select to return the appropriate mock for this test
// @ts-ignore - Ignore TypeScript errors for test mocks
db.select = vi.fn().mockReturnValue({ from: fromMock })
// Also mock the DB for the Airtable notification check
const whereMock2 = vi.fn().mockReturnValue({ limit: vi.fn().mockReturnValue([]) })
const fromMock2 = vi.fn().mockReturnValue({ where: whereMock2 })
// We need to handle multiple calls to db.select
let callCount = 0
// @ts-ignore - Ignore TypeScript errors for test mocks
db.select = vi.fn().mockImplementation(() => {
callCount++
if (callCount === 1) {
return { from: fromMock }
}
return { from: fromMock2 }
})
// Create a mock request with Airtable payload
const req = createMockRequest('POST', airtablePayload)
// Mock the path param
const params = Promise.resolve({ path: 'airtable-path' })
// Import the handler after mocks are set up
const { POST } = await import('@/app/api/webhooks/trigger/[path]/route')
// Call the handler
const response = await POST(req, { params })
// For Airtable we expect 200 after synchronous processing
expect(response.status).toBe(200)
// Verify that the Airtable-specific function was called
expect(fetchAndProcessAirtablePayloadsMock).toHaveBeenCalledTimes(1)
// The response should indicate success
const text = await response.text()
expect(text).toMatch(/success|processed/i)
})
})

View File

@@ -1,19 +1,12 @@
import { and, eq, sql } from 'drizzle-orm'
import { tasks } from '@trigger.dev/sdk/v3'
import { and, eq } from 'drizzle-orm'
import { type NextRequest, NextResponse } from 'next/server'
import { v4 as uuidv4 } from 'uuid'
import { checkServerSideUsageLimits } from '@/lib/billing'
import { createLogger } from '@/lib/logs/console/logger'
import { acquireLock, hasProcessedMessage, markMessageAsProcessed } from '@/lib/redis'
import {
fetchAndProcessAirtablePayloads,
handleSlackChallenge,
handleWhatsAppVerification,
processGenericDeduplication,
processWebhook,
processWhatsAppDeduplication,
validateMicrosoftTeamsSignature,
} from '@/lib/webhooks/utils'
import { loadWorkflowFromNormalizedTables } from '@/lib/workflows/db-helpers'
import { db } from '@/db'
import { subscription, webhook, workflow } from '@/db/schema'
import { RateLimiter } from '@/services/queue'
@@ -24,8 +17,6 @@ const logger = createLogger('WebhookTriggerAPI')
export const dynamic = 'force-dynamic'
export const maxDuration = 300
const _activeProcessingTasks = new Map<string, Promise<any>>()
/**
* Webhook Verification Handler (GET)
*
@@ -125,50 +116,13 @@ export async function POST(
return new NextResponse('Invalid JSON payload', { status: 400 })
}
// --- PHASE 2: Early Slack deduplication handling ---
const messageId = body?.event_id
if (body?.type === 'event_callback') {
const dedupeKey = messageId
? `slack:msg:${messageId}`
: `slack:${body?.team_id || ''}:${body?.event?.ts || body?.event?.event_ts || Date.now()}`
try {
const isDuplicate = await hasProcessedMessage(dedupeKey)
if (isDuplicate) {
logger.info(`[${requestId}] Duplicate Slack message detected: ${dedupeKey}`)
return new NextResponse('Duplicate message', { status: 200 })
}
await markMessageAsProcessed(dedupeKey, 60 * 60 * 24) // 24 hour TTL
} catch (error) {
logger.error(`[${requestId}] Error in Slack deduplication`, error)
// Continue processing - better to risk a duplicate than fail
}
// Handle Slack challenge
const slackResponse = handleSlackChallenge(body)
if (slackResponse) {
return slackResponse
}
// --- PHASE 3: Distributed lock acquisition ---
let hasExecutionLock = false
let executionLockKey: string
if (body?.type === 'event_callback') {
// For Slack events, use message-specific lock key
executionLockKey = messageId
? `execution:lock:slack:${messageId}`
: `execution:lock:slack:${body?.team_id || ''}:${body?.event?.ts || body?.event?.event_ts || Date.now()}`
} else {
// Default fallback for other providers
executionLockKey = `execution:lock:${requestId}:${crypto.randomUUID()}`
}
try {
hasExecutionLock = await acquireLock(executionLockKey, requestId, 30) // 30 second TTL
} catch (lockError) {
logger.error(`[${requestId}] Error acquiring execution lock`, lockError)
// Proceed without lock in case of Redis failure (fallback to best-effort)
}
// --- PHASE 4: Webhook identification ---
// --- PHASE 2: Webhook identification ---
const path = (await params).path
logger.info(`[${requestId}] Processing webhook request for path: ${path}`)
@@ -191,60 +145,7 @@ export async function POST(
foundWebhook = webhooks[0].webhook
foundWorkflow = webhooks[0].workflow
const normalizedData = await loadWorkflowFromNormalizedTables(foundWorkflow.id)
if (!normalizedData) {
logger.error(`[${requestId}] No normalized data found for webhook workflow ${foundWorkflow.id}`)
return new NextResponse('Workflow data not found in normalized tables', { status: 500 })
}
// Construct state from normalized data only (execution-focused, no frontend state fields)
foundWorkflow.state = {
blocks: normalizedData.blocks,
edges: normalizedData.edges,
loops: normalizedData.loops,
parallels: normalizedData.parallels,
lastSaved: Date.now(),
isDeployed: foundWorkflow.isDeployed || false,
deployedAt: foundWorkflow.deployedAt,
}
// Special handling for Telegram webhooks to work around middleware User-Agent checks
if (foundWebhook.provider === 'telegram') {
// Log detailed information about the request for debugging
const userAgent = request.headers.get('user-agent') || 'empty'
logger.info(`[${requestId}] Received Telegram webhook request:`, {
userAgent,
path,
clientIp:
request.headers.get('x-forwarded-for') || request.headers.get('x-real-ip') || 'unknown',
method: request.method,
contentType: request.headers.get('content-type'),
hasUpdate: !!body?.update_id,
})
// Ensure User-Agent headers for Telegram in future requests from the bot
// We can't modify the incoming request, but we can recommend adding it for future setup
if (!userAgent || userAgent === 'empty') {
logger.warn(
`[${requestId}] Telegram webhook request missing User-Agent header. Recommend reconfiguring webhook with 'TelegramBot/1.0' User-Agent.`
)
}
}
// Detect provider type
const isAirtableWebhook = foundWebhook.provider === 'airtable'
const isGmailWebhook = foundWebhook.provider === 'gmail'
// Handle Slack challenge verification (must be done before timeout)
const slackChallengeResponse =
body?.type === 'url_verification' ? handleSlackChallenge(body) : null
if (slackChallengeResponse) {
logger.info(`[${requestId}] Responding to Slack URL verification challenge`)
return slackChallengeResponse
}
// Handle Microsoft Teams outgoing webhook signature verification (must be done before timeout)
// Handle Microsoft Teams signature verification if needed
if (foundWebhook.provider === 'microsoftteams') {
const providerConfig = (foundWebhook.providerConfig as Record<string, any>) || {}
@@ -258,9 +159,6 @@ export async function POST(
return new NextResponse('Unauthorized - Missing HMAC signature', { status: 401 })
}
// Get the raw body for HMAC verification
const rawBody = await request.text()
const isValidSignature = validateMicrosoftTeamsSignature(
providerConfig.hmacSecret,
authHeader,
@@ -273,247 +171,99 @@ export async function POST(
}
logger.debug(`[${requestId}] Microsoft Teams HMAC signature verified successfully`)
// Parse the body again since we consumed it for verification
try {
body = JSON.parse(rawBody)
} catch (parseError) {
logger.error(
`[${requestId}] Failed to parse Microsoft Teams webhook body after verification`,
{
error: parseError instanceof Error ? parseError.message : String(parseError),
}
)
return new NextResponse('Invalid JSON payload', { status: 400 })
}
}
}
// Skip processing if another instance is already handling this request
if (!hasExecutionLock) {
logger.info(`[${requestId}] Skipping execution as lock was not acquired`)
return new NextResponse('Request is being processed by another instance', { status: 200 })
// --- PHASE 3: Rate limiting for webhook execution ---
try {
// Get user subscription for rate limiting
const [subscriptionRecord] = await db
.select({ plan: subscription.plan })
.from(subscription)
.where(eq(subscription.referenceId, foundWorkflow.userId))
.limit(1)
const subscriptionPlan = (subscriptionRecord?.plan || 'free') as SubscriptionPlan
// Check async rate limits (webhooks are processed asynchronously)
const rateLimiter = new RateLimiter()
const rateLimitCheck = await rateLimiter.checkRateLimit(
foundWorkflow.userId,
subscriptionPlan,
'webhook',
true // isAsync = true for webhook execution
)
if (!rateLimitCheck.allowed) {
logger.warn(`[${requestId}] Rate limit exceeded for webhook user ${foundWorkflow.userId}`, {
provider: foundWebhook.provider,
remaining: rateLimitCheck.remaining,
resetAt: rateLimitCheck.resetAt,
})
// Return 200 to prevent webhook provider retries, but indicate rate limit
if (foundWebhook.provider === 'microsoftteams') {
// Microsoft Teams requires specific response format
return NextResponse.json({
type: 'message',
text: 'Rate limit exceeded. Please try again later.',
})
}
// Simple error response for other providers (return 200 to prevent retries)
return NextResponse.json({ message: 'Rate limit exceeded' }, { status: 200 })
}
logger.debug(`[${requestId}] Rate limit check passed for webhook`, {
provider: foundWebhook.provider,
remaining: rateLimitCheck.remaining,
resetAt: rateLimitCheck.resetAt,
})
} catch (rateLimitError) {
logger.error(`[${requestId}] Error checking webhook rate limits:`, rateLimitError)
// Continue processing - better to risk rate limit bypass than fail webhook
}
// --- PHASE 5: Provider-specific processing ---
// --- PHASE 4: Queue webhook execution via trigger.dev ---
try {
// Queue the webhook execution task
const handle = await tasks.trigger('webhook-execution', {
webhookId: foundWebhook.id,
workflowId: foundWorkflow.id,
userId: foundWorkflow.userId,
provider: foundWebhook.provider,
body,
headers: Object.fromEntries(request.headers.entries()),
path,
blockId: foundWebhook.blockId,
})
// For Airtable: Process synchronously without timeouts
if (isAirtableWebhook) {
try {
logger.info(`[${requestId}] Airtable webhook ping received for webhook: ${foundWebhook.id}`)
logger.info(
`[${requestId}] Queued webhook execution task ${handle.id} for ${foundWebhook.provider} webhook`
)
// Handle Airtable deduplication
const notificationId = body.notificationId || null
if (notificationId) {
try {
const processedKey = `airtable-webhook-${foundWebhook.id}-${notificationId}`
// Check if notification was already processed
const alreadyProcessed = await db
.select({ id: webhook.id })
.from(webhook)
.where(
and(
eq(webhook.id, foundWebhook.id),
sql`(webhook.provider_config->>'processedNotifications')::jsonb ? ${processedKey}`
)
)
.limit(1)
if (alreadyProcessed.length > 0) {
logger.info(
`[${requestId}] Duplicate Airtable notification detected: ${notificationId}`
)
return new NextResponse('Notification already processed', { status: 200 })
}
// Store notification ID for deduplication
const providerConfig = foundWebhook.providerConfig || {}
const processedNotifications = providerConfig.processedNotifications || []
processedNotifications.push(processedKey)
// Keep only the last 100 notifications to prevent unlimited growth
const limitedNotifications = processedNotifications.slice(-100)
// Update the webhook record
await db
.update(webhook)
.set({
providerConfig: {
...providerConfig,
processedNotifications: limitedNotifications,
},
updatedAt: new Date(),
})
.where(eq(webhook.id, foundWebhook.id))
} catch (error) {
logger.warn(`[${requestId}] Airtable deduplication check failed, continuing`, {
error: error instanceof Error ? error.message : String(error),
})
}
}
// Process Airtable payloads synchronously
try {
logger.info(`[${requestId}] Starting Airtable payload processing`)
await fetchAndProcessAirtablePayloads(foundWebhook, foundWorkflow, requestId)
return new NextResponse('Airtable ping processed successfully', { status: 200 })
} catch (error: any) {
logger.error(`[${requestId}] Error during Airtable processing`, {
error: error.message,
})
return new NextResponse(`Error processing Airtable webhook: ${error.message}`, {
status: 500,
})
}
} catch (error: any) {
logger.error(`[${requestId}] Error in Airtable processing`, error)
return new NextResponse(`Internal server error: ${error.message}`, { status: 500 })
// Return immediate acknowledgment with provider-specific format
if (foundWebhook.provider === 'microsoftteams') {
// Microsoft Teams requires specific response format
return NextResponse.json({
type: 'message',
text: 'Sim Studio',
})
}
return NextResponse.json({ message: 'Webhook processed' })
} catch (error: any) {
logger.error(`[${requestId}] Failed to queue webhook execution:`, error)
// Still return 200 to prevent webhook provider retries
if (foundWebhook.provider === 'microsoftteams') {
// Microsoft Teams requires specific response format
return NextResponse.json({
type: 'message',
text: 'Webhook processing failed',
})
}
return NextResponse.json({ message: 'Internal server error' }, { status: 200 })
}
// --- For all other webhook types: Use async processing with timeout ---
// Create timeout promise for fast initial response (2.5 seconds)
const timeoutDuration = 25000
const timeoutPromise = new Promise<NextResponse>((resolve) => {
setTimeout(() => {
logger.info(`[${requestId}] Fast response timeout activated`)
resolve(new NextResponse('Request received', { status: 200 }))
}, timeoutDuration)
})
// Create the processing promise for asynchronous execution
const processingPromise = (async () => {
try {
// Provider-specific deduplication
if (foundWebhook.provider === 'whatsapp') {
const data = body?.entry?.[0]?.changes?.[0]?.value
const messages = data?.messages || []
const whatsappDuplicateResponse = await processWhatsAppDeduplication(requestId, messages)
if (whatsappDuplicateResponse) {
return whatsappDuplicateResponse
}
} else if (foundWebhook.provider === 'gmail') {
// Gmail-specific validation and logging
logger.info(`[${requestId}] Gmail webhook request received for webhook: ${foundWebhook.id}`)
const webhookSecret = foundWebhook.secret
if (webhookSecret) {
const secretHeader = request.headers.get('X-Webhook-Secret')
if (secretHeader !== webhookSecret) {
logger.warn(`[${requestId}] Invalid webhook secret`)
return new NextResponse('Unauthorized', { status: 401 })
}
}
if (!body.email) {
logger.warn(`[${requestId}] Invalid Gmail webhook payload format`)
return new NextResponse('Invalid payload format', { status: 400 })
}
logger.info(`[${requestId}] Processing Gmail email`, {
emailId: body.email.id,
subject:
body.email?.payload?.headers?.find((h: any) => h.name === 'Subject')?.value ||
'No subject',
})
// Gmail deduplication using generic method
const genericDuplicateResponse = await processGenericDeduplication(requestId, path, body)
if (genericDuplicateResponse) {
return genericDuplicateResponse
}
} else if (foundWebhook.provider !== 'slack') {
// Generic deduplication for all other providers
const genericDuplicateResponse = await processGenericDeduplication(requestId, path, body)
if (genericDuplicateResponse) {
return genericDuplicateResponse
}
}
// Check rate limits for webhook execution
const [subscriptionRecord] = await db
.select({ plan: subscription.plan })
.from(subscription)
.where(eq(subscription.referenceId, foundWorkflow.userId))
.limit(1)
const subscriptionPlan = (subscriptionRecord?.plan || 'free') as SubscriptionPlan
const rateLimiter = new RateLimiter()
const rateLimitCheck = await rateLimiter.checkRateLimit(
foundWorkflow.userId,
subscriptionPlan,
'webhook',
false // webhooks are always sync
)
if (!rateLimitCheck.allowed) {
logger.warn(`[${requestId}] Rate limit exceeded for webhook user ${foundWorkflow.userId}`, {
remaining: rateLimitCheck.remaining,
resetAt: rateLimitCheck.resetAt,
})
// Return 200 to prevent webhook retries but indicate rate limit in response
return new NextResponse(
JSON.stringify({
status: 'error',
message: `Rate limit exceeded. You have ${rateLimitCheck.remaining} requests remaining. Resets at ${rateLimitCheck.resetAt.toISOString()}`,
}),
{
status: 200, // Use 200 to prevent webhook provider retries
headers: { 'Content-Type': 'application/json' },
}
)
}
// Check if the user has exceeded their usage limits
const usageCheck = await checkServerSideUsageLimits(foundWorkflow.userId)
if (usageCheck.isExceeded) {
logger.warn(
`[${requestId}] User ${foundWorkflow.userId} has exceeded usage limits. Skipping webhook execution.`,
{
currentUsage: usageCheck.currentUsage,
limit: usageCheck.limit,
workflowId: foundWorkflow.id,
}
)
// Return a successful response to avoid webhook retries, but don't execute the workflow
return new NextResponse(
JSON.stringify({
status: 'error',
message:
usageCheck.message ||
'Usage limit exceeded. Please upgrade your plan to continue using webhooks.',
}),
{
status: 200, // Use 200 to prevent webhook provider retries
headers: { 'Content-Type': 'application/json' },
}
)
}
// Execute workflow for the webhook event
logger.info(`[${requestId}] Executing workflow for ${foundWebhook.provider} webhook`)
const executionId = uuidv4()
return await processWebhook(
foundWebhook,
foundWorkflow,
body,
request,
executionId,
requestId
)
} catch (error: any) {
logger.error(`[${requestId}] Error processing webhook:`, error)
return new NextResponse(`Internal server error: ${error.message}`, { status: 500 })
}
})()
// Race processing against timeout to ensure fast response
return Promise.race([timeoutPromise, processingPromise])
}

View File

@@ -2,7 +2,9 @@ import { Analytics } from '@vercel/analytics/next'
import { SpeedInsights } from '@vercel/speed-insights/next'
import type { Metadata, Viewport } from 'next'
import { PublicEnvScript } from 'next-runtime-env'
import { isHosted } from '@/lib/environment'
import { createLogger } from '@/lib/logs/console/logger'
import { getAssetUrl } from '@/lib/utils'
import { TelemetryConsentDialog } from '@/app/telemetry-consent-dialog'
import '@/app/globals.css'
@@ -105,7 +107,7 @@ export const metadata: Metadata = {
siteName: 'Sim Studio',
images: [
{
url: 'https://simstudio.ai/social/facebook.png',
url: getAssetUrl('social/facebook.png'),
width: 1200,
height: 630,
alt: 'Sim Studio',
@@ -117,7 +119,7 @@ export const metadata: Metadata = {
title: 'Sim Studio',
description:
'Build and deploy AI agents using our Figma-like canvas. Build, write evals, and deploy AI agent workflows that automate workflows and streamline your business processes.',
images: ['https://simstudio.ai/social/twitter.png'],
images: [getAssetUrl('social/twitter.png')],
creator: '@simstudioai',
site: '@simstudioai',
},
@@ -218,16 +220,29 @@ export default function RootLayout({ children }: { children: React.ReactNode })
<meta name='twitter:domain' content='simstudio.ai' />
{/* Additional image sources */}
<link rel='image_src' href='https://simstudio.ai/social/facebook.png' />
<link rel='image_src' href={getAssetUrl('social/facebook.png')} />
<PublicEnvScript />
{/* RB2B Script - Only load on hosted version */}
{isHosted && (
<script
dangerouslySetInnerHTML={{
__html: `!function () {var reb2b = window.reb2b = window.reb2b || [];if (reb2b.invoked) return;reb2b.invoked = true;reb2b.methods = ["identify", "collect"];reb2b.factory = function (method) {return function () {var args = Array.prototype.slice.call(arguments);args.unshift(method);reb2b.push(args);return reb2b;};};for (var i = 0; i < reb2b.methods.length; i++) {var key = reb2b.methods[i];reb2b[key] = reb2b.factory(key);}reb2b.load = function (key) {var script = document.createElement("script");script.type = "text/javascript";script.async = true;script.src = "https://b2bjsstore.s3.us-west-2.amazonaws.com/b/" + key + "/DNXY8HX558O0.js.gz";var first = document.getElementsByTagName("script")[0];first.parentNode.insertBefore(script, first);};reb2b.SNIPPET_VERSION = "1.0.1";reb2b.load("DNXY8HX558O0");}();`,
}}
/>
)}
</head>
<body suppressHydrationWarning>
<ZoomPrevention />
<TelemetryConsentDialog />
{children}
<SpeedInsights />
<Analytics />
{isHosted && (
<>
<SpeedInsights />
<Analytics />
</>
)}
</body>
</html>
)

View File

@@ -11,6 +11,7 @@ import {
TooltipContent,
TooltipTrigger,
} from '@/components/ui'
import { TAG_SLOTS } from '@/lib/constants/knowledge'
import { createLogger } from '@/lib/logs/console/logger'
import { useUserPermissionsContext } from '@/app/workspace/[workspaceId]/components/providers/workspace-permissions-provider'
import {
@@ -21,7 +22,12 @@ import {
} from '@/app/workspace/[workspaceId]/knowledge/[id]/[documentId]/components'
import { ActionBar } from '@/app/workspace/[workspaceId]/knowledge/[id]/components'
import { KnowledgeHeader, SearchInput } from '@/app/workspace/[workspaceId]/knowledge/components'
import {
type DocumentTag,
DocumentTagEntry,
} from '@/app/workspace/[workspaceId]/knowledge/components/document-tag-entry/document-tag-entry'
import { useDocumentChunks } from '@/hooks/use-knowledge'
import { useTagDefinitions } from '@/hooks/use-tag-definitions'
import { type ChunkData, type DocumentData, useKnowledgeStore } from '@/stores/knowledge/store'
const logger = createLogger('Document')
@@ -50,7 +56,11 @@ export function Document({
knowledgeBaseName,
documentName,
}: DocumentProps) {
const { getCachedKnowledgeBase, getCachedDocuments } = useKnowledgeStore()
const {
getCachedKnowledgeBase,
getCachedDocuments,
updateDocument: updateDocumentInStore,
} = useKnowledgeStore()
const { workspaceId } = useParams()
const router = useRouter()
const searchParams = useSearchParams()
@@ -60,7 +70,6 @@ export function Document({
const {
chunks: paginatedChunks,
allChunks,
filteredChunks,
searchQuery,
setSearchQuery,
currentPage,
@@ -81,15 +90,102 @@ export function Document({
const [selectedChunks, setSelectedChunks] = useState<Set<string>>(new Set())
const [selectedChunk, setSelectedChunk] = useState<ChunkData | null>(null)
const [isModalOpen, setIsModalOpen] = useState(false)
const [documentTags, setDocumentTags] = useState<DocumentTag[]>([])
const [documentData, setDocumentData] = useState<DocumentData | null>(null)
const [isLoadingDocument, setIsLoadingDocument] = useState(true)
const [error, setError] = useState<string | null>(null)
// Use tag definitions hook for custom labels
const { tagDefinitions, fetchTagDefinitions } = useTagDefinitions(knowledgeBaseId, documentId)
// Function to build document tags from data and definitions
const buildDocumentTags = useCallback(
(docData: DocumentData, definitions: any[], currentTags?: DocumentTag[]) => {
const tags: DocumentTag[] = []
const tagSlots = TAG_SLOTS
tagSlots.forEach((slot) => {
const value = (docData as any)[slot] as string | null | undefined
const definition = definitions.find((def) => def.tagSlot === slot)
const currentTag = currentTags?.find((tag) => tag.slot === slot)
// Only include tag if the document actually has a value for it
if (value?.trim()) {
tags.push({
slot,
// Preserve existing displayName if definition is not found yet
displayName: definition?.displayName || currentTag?.displayName || '',
fieldType: definition?.fieldType || currentTag?.fieldType || 'text',
value: value.trim(),
})
}
})
return tags
},
[]
)
// Handle tag updates (local state only, no API calls)
const handleTagsChange = useCallback((newTags: DocumentTag[]) => {
// Only update local state, don't save to API
setDocumentTags(newTags)
}, [])
// Handle saving document tag values to the API
const handleSaveDocumentTags = useCallback(
async (tagsToSave: DocumentTag[]) => {
if (!documentData) return
try {
// Convert DocumentTag array to tag data for API
const tagData: Record<string, string> = {}
const tagSlots = TAG_SLOTS
// Clear all tags first
tagSlots.forEach((slot) => {
tagData[slot] = ''
})
// Set values from tagsToSave
tagsToSave.forEach((tag) => {
if (tag.value.trim()) {
tagData[tag.slot] = tag.value.trim()
}
})
// Update document via API
const response = await fetch(`/api/knowledge/${knowledgeBaseId}/documents/${documentId}`, {
method: 'PUT',
headers: {
'Content-Type': 'application/json',
},
body: JSON.stringify(tagData),
})
if (!response.ok) {
throw new Error('Failed to update document tags')
}
// Update the document in the store and local state
updateDocumentInStore(knowledgeBaseId, documentId, tagData)
setDocumentData((prev) => (prev ? { ...prev, ...tagData } : null))
// Refresh tag definitions to update the display
await fetchTagDefinitions()
} catch (error) {
logger.error('Error updating document tags:', error)
throw error // Re-throw so the component can handle it
}
},
[documentData, knowledgeBaseId, documentId, updateDocumentInStore, fetchTagDefinitions]
)
const [isCreateChunkModalOpen, setIsCreateChunkModalOpen] = useState(false)
const [chunkToDelete, setChunkToDelete] = useState<ChunkData | null>(null)
const [isDeleteModalOpen, setIsDeleteModalOpen] = useState(false)
const [isBulkOperating, setIsBulkOperating] = useState(false)
const [document, setDocument] = useState<DocumentData | null>(null)
const [isLoadingDocument, setIsLoadingDocument] = useState(true)
const [error, setError] = useState<string | null>(null)
const combinedError = error || chunksError
// URL synchronization for pagination
@@ -121,7 +217,10 @@ export function Document({
const cachedDoc = cachedDocuments?.documents?.find((d) => d.id === documentId)
if (cachedDoc) {
setDocument(cachedDoc)
setDocumentData(cachedDoc)
// Initialize tags from cached document
const initialTags = buildDocumentTags(cachedDoc, tagDefinitions)
setDocumentTags(initialTags)
setIsLoadingDocument(false)
return
}
@@ -138,7 +237,10 @@ export function Document({
const result = await response.json()
if (result.success) {
setDocument(result.data)
setDocumentData(result.data)
// Initialize tags from fetched document
const initialTags = buildDocumentTags(result.data, tagDefinitions, [])
setDocumentTags(initialTags)
} else {
throw new Error(result.error || 'Failed to fetch document')
}
@@ -153,11 +255,19 @@ export function Document({
if (knowledgeBaseId && documentId) {
fetchDocument()
}
}, [knowledgeBaseId, documentId, getCachedDocuments])
}, [knowledgeBaseId, documentId, getCachedDocuments, buildDocumentTags])
// Separate effect to rebuild tags when tag definitions change (without re-fetching document)
useEffect(() => {
if (documentData) {
const rebuiltTags = buildDocumentTags(documentData, tagDefinitions, documentTags)
setDocumentTags(rebuiltTags)
}
}, [documentData, tagDefinitions, buildDocumentTags])
const knowledgeBase = getCachedKnowledgeBase(knowledgeBaseId)
const effectiveKnowledgeBaseName = knowledgeBase?.name || knowledgeBaseName || 'Knowledge Base'
const effectiveDocumentName = document?.filename || documentName || 'Document'
const effectiveDocumentName = documentData?.filename || documentName || 'Document'
const breadcrumbs = [
{ label: 'Knowledge', href: `/workspace/${workspaceId}/knowledge` },
@@ -254,7 +364,7 @@ export function Document({
}
}
const handleChunkCreated = async (newChunk: ChunkData) => {
const handleChunkCreated = async () => {
// Refresh the chunks list to include the new chunk
await refreshChunks()
}
@@ -396,16 +506,16 @@ export function Document({
value={searchQuery}
onChange={setSearchQuery}
placeholder={
document?.processingStatus === 'completed'
documentData?.processingStatus === 'completed'
? 'Search chunks...'
: 'Document processing...'
}
disabled={document?.processingStatus !== 'completed'}
disabled={documentData?.processingStatus !== 'completed'}
/>
<Button
onClick={() => setIsCreateChunkModalOpen(true)}
disabled={document?.processingStatus === 'failed' || !userPermissions.canEdit}
disabled={documentData?.processingStatus === 'failed' || !userPermissions.canEdit}
size='sm'
className='flex items-center gap-1 bg-[#701FFC] font-[480] text-white shadow-[0_0_0_0_#701FFC] transition-all duration-200 hover:bg-[#6518E6] hover:shadow-[0_0_0_4px_rgba(127,47,255,0.15)] disabled:cursor-not-allowed disabled:opacity-50'
>
@@ -414,36 +524,19 @@ export function Document({
</Button>
</div>
{/* Document Tags Display */}
{document &&
(() => {
const tags = [
{ label: 'Tag 1', value: document.tag1 },
{ label: 'Tag 2', value: document.tag2 },
{ label: 'Tag 3', value: document.tag3 },
{ label: 'Tag 4', value: document.tag4 },
{ label: 'Tag 5', value: document.tag5 },
{ label: 'Tag 6', value: document.tag6 },
{ label: 'Tag 7', value: document.tag7 },
].filter((tag) => tag.value?.trim())
return tags.length > 0 ? (
<div className='mb-4 rounded-md bg-muted/50 p-3'>
<p className='mb-2 text-muted-foreground text-xs'>Document Tags:</p>
<div className='flex flex-wrap gap-2'>
{tags.map((tag, index) => (
<span
key={index}
className='inline-flex items-center gap-1 rounded-md bg-primary/10 px-2 py-1 text-primary text-xs'
>
<span className='font-medium'>{tag.label}:</span>
<span>{tag.value}</span>
</span>
))}
</div>
</div>
) : null
})()}
{/* Document Tag Entry */}
{userPermissions.canEdit && (
<div className='mb-4 rounded-md border p-4'>
<DocumentTagEntry
tags={documentTags}
onTagsChange={handleTagsChange}
disabled={false}
knowledgeBaseId={knowledgeBaseId}
documentId={documentId}
onSave={handleSaveDocumentTags}
/>
</div>
)}
{/* Error State for chunks */}
{combinedError && !isLoadingAllChunks && (
@@ -472,7 +565,8 @@ export function Document({
checked={isAllSelected}
onCheckedChange={handleSelectAll}
disabled={
document?.processingStatus !== 'completed' || !userPermissions.canEdit
documentData?.processingStatus !== 'completed' ||
!userPermissions.canEdit
}
aria-label='Select all chunks'
className='h-3.5 w-3.5 border-gray-300 focus-visible:ring-[#701FFC]/20 data-[state=checked]:border-[#701FFC] data-[state=checked]:bg-[#701FFC] [&>*]:h-3 [&>*]:w-3'
@@ -514,7 +608,7 @@ export function Document({
<col className='w-[12%]' />
</colgroup>
<tbody>
{document?.processingStatus !== 'completed' ? (
{documentData?.processingStatus !== 'completed' ? (
<tr className='border-b transition-colors'>
<td className='px-4 py-3'>
<div className='h-3.5 w-3.5' />
@@ -526,13 +620,13 @@ export function Document({
<div className='flex items-center gap-2'>
<FileText className='h-5 w-5 text-muted-foreground' />
<span className='text-muted-foreground text-sm italic'>
{document?.processingStatus === 'pending' &&
{documentData?.processingStatus === 'pending' &&
'Document processing pending...'}
{document?.processingStatus === 'processing' &&
{documentData?.processingStatus === 'processing' &&
'Document processing in progress...'}
{document?.processingStatus === 'failed' &&
{documentData?.processingStatus === 'failed' &&
'Document processing failed'}
{!document?.processingStatus && 'Document not ready'}
{!documentData?.processingStatus && 'Document not ready'}
</span>
</div>
</td>
@@ -558,7 +652,7 @@ export function Document({
<div className='flex items-center gap-2'>
<FileText className='h-5 w-5 text-muted-foreground' />
<span className='text-muted-foreground text-sm italic'>
{document?.processingStatus === 'completed'
{documentData?.processingStatus === 'completed'
? searchQuery.trim()
? 'No chunks match your search'
: 'No chunks found'
@@ -708,7 +802,7 @@ export function Document({
</div>
{/* Pagination Controls */}
{document?.processingStatus === 'completed' && totalPages > 1 && (
{documentData?.processingStatus === 'completed' && totalPages > 1 && (
<div className='flex items-center justify-center border-t bg-background px-6 py-4'>
<div className='flex items-center gap-1'>
<Button
@@ -773,7 +867,7 @@ export function Document({
{/* Edit Chunk Modal */}
<EditChunkModal
chunk={selectedChunk}
document={document}
document={documentData}
knowledgeBaseId={knowledgeBaseId}
isOpen={isModalOpen}
onClose={handleCloseModal}
@@ -811,7 +905,7 @@ export function Document({
<CreateChunkModal
open={isCreateChunkModalOpen}
onOpenChange={setIsCreateChunkModalOpen}
document={document}
document={documentData}
knowledgeBaseId={knowledgeBaseId}
onChunkCreated={handleChunkCreated}
/>

View File

@@ -6,7 +6,10 @@ import { Button } from '@/components/ui/button'
import { Dialog, DialogContent, DialogHeader, DialogTitle } from '@/components/ui/dialog'
import { Label } from '@/components/ui/label'
import { createLogger } from '@/lib/logs/console/logger'
import { type TagData, TagInput } from '@/app/workspace/[workspaceId]/knowledge/components'
import {
type DocumentTag,
DocumentTagEntry,
} from '@/app/workspace/[workspaceId]/knowledge/components/document-tag-entry/document-tag-entry'
import { useKnowledgeUpload } from '@/app/workspace/[workspaceId]/knowledge/hooks/use-knowledge-upload'
const logger = createLogger('UploadModal')
@@ -47,7 +50,7 @@ export function UploadModal({
}: UploadModalProps) {
const fileInputRef = useRef<HTMLInputElement>(null)
const [files, setFiles] = useState<FileWithPreview[]>([])
const [tags, setTags] = useState<TagData>({})
const [tags, setTags] = useState<DocumentTag[]>([])
const [fileError, setFileError] = useState<string | null>(null)
const [isDragging, setIsDragging] = useState(false)
@@ -63,7 +66,7 @@ export function UploadModal({
if (isUploading) return // Prevent closing during upload
setFiles([])
setTags({})
setTags([])
setFileError(null)
setIsDragging(false)
onOpenChange(false)
@@ -142,11 +145,19 @@ export function UploadModal({
if (files.length === 0) return
try {
// Convert DocumentTag array to TagData format
const tagData: Record<string, string> = {}
tags.forEach((tag) => {
if (tag.value.trim()) {
tagData[tag.slot] = tag.value.trim()
}
})
// Create files with tags for upload
const filesWithTags = files.map((file) => {
// Add tags as custom properties to the file object
const fileWithTags = file as File & TagData
Object.assign(fileWithTags, tags)
const fileWithTags = file as unknown as File & Record<string, string>
Object.assign(fileWithTags, tagData)
return fileWithTags
})
@@ -169,8 +180,14 @@ export function UploadModal({
</DialogHeader>
<div className='flex-1 space-y-6 overflow-auto'>
{/* Tag Input Section */}
<TagInput tags={tags} onTagsChange={setTags} disabled={isUploading} />
{/* Document Tag Entry Section */}
<DocumentTagEntry
tags={tags}
onTagsChange={setTags}
disabled={isUploading}
knowledgeBaseId={knowledgeBaseId}
documentId={null} // No specific document for upload
/>
{/* File Upload Section */}
<div className='space-y-3'>

View File

@@ -13,11 +13,7 @@ import { Input } from '@/components/ui/input'
import { Label } from '@/components/ui/label'
import { Textarea } from '@/components/ui/textarea'
import { createLogger } from '@/lib/logs/console/logger'
import {
getDocumentIcon,
type TagData,
TagInput,
} from '@/app/workspace/[workspaceId]/knowledge/components'
import { getDocumentIcon } from '@/app/workspace/[workspaceId]/knowledge/components'
import { useKnowledgeUpload } from '@/app/workspace/[workspaceId]/knowledge/hooks/use-knowledge-upload'
import type { KnowledgeBaseData } from '@/stores/knowledge/store'
@@ -88,7 +84,7 @@ export function CreateModal({ open, onOpenChange, onKnowledgeBaseCreated }: Crea
const [fileError, setFileError] = useState<string | null>(null)
const [isDragging, setIsDragging] = useState(false)
const [dragCounter, setDragCounter] = useState(0) // Track drag events to handle nested elements
const [tags, setTags] = useState<TagData>({})
const scrollContainerRef = useRef<HTMLDivElement>(null)
const dropZoneRef = useRef<HTMLDivElement>(null)
@@ -283,14 +279,7 @@ export function CreateModal({ open, onOpenChange, onKnowledgeBaseCreated }: Crea
const newKnowledgeBase = result.data
if (files.length > 0) {
// Add tags to files before upload
const filesWithTags = files.map((file) => {
const fileWithTags = file as File & TagData
Object.assign(fileWithTags, tags)
return fileWithTags
})
const uploadedFiles = await uploadFiles(filesWithTags, newKnowledgeBase.id, {
const uploadedFiles = await uploadFiles(files, newKnowledgeBase.id, {
chunkSize: data.maxChunkSize,
minCharactersPerChunk: data.minChunkSize,
chunkOverlap: data.overlapSize,
@@ -314,7 +303,6 @@ export function CreateModal({ open, onOpenChange, onKnowledgeBaseCreated }: Crea
maxChunkSize: 1024,
overlapSize: 200,
})
setTags({})
// Clean up file previews
files.forEach((file) => URL.revokeObjectURL(file.preview))
@@ -490,11 +478,6 @@ export function CreateModal({ open, onOpenChange, onKnowledgeBaseCreated }: Crea
</div>
</div>
{/* Tag Input Section */}
<div className='mt-6'>
<TagInput tags={tags} onTagsChange={setTags} disabled={isSubmitting} />
</div>
{/* File Upload Section - Expands to fill remaining space */}
<div className='mt-6 flex flex-1 flex-col'>
<Label className='mb-2'>Upload Documents</Label>

View File

@@ -0,0 +1,455 @@
'use client'
import { useEffect, useRef, useState } from 'react'
import { ChevronDown, Plus, X } from 'lucide-react'
import { Button } from '@/components/ui/button'
import {
DropdownMenu,
DropdownMenuContent,
DropdownMenuItem,
DropdownMenuTrigger,
} from '@/components/ui/dropdown-menu'
import { Input } from '@/components/ui/input'
import { Label } from '@/components/ui/label'
import {
Select,
SelectContent,
SelectItem,
SelectTrigger,
SelectValue,
} from '@/components/ui/select'
import { MAX_TAG_SLOTS, TAG_SLOTS, type TagSlot } from '@/lib/constants/knowledge'
import { useKnowledgeBaseTagDefinitions } from '@/hooks/use-knowledge-base-tag-definitions'
import { type TagDefinitionInput, useTagDefinitions } from '@/hooks/use-tag-definitions'
export interface DocumentTag {
slot: TagSlot
displayName: string
fieldType: string
value: string
}
interface DocumentTagEntryProps {
tags: DocumentTag[]
onTagsChange: (tags: DocumentTag[]) => void
disabled?: boolean
knowledgeBaseId?: string | null
documentId?: string | null
onSave?: (tags: DocumentTag[]) => Promise<void>
}
// TAG_SLOTS is now imported from constants
export function DocumentTagEntry({
tags,
onTagsChange,
disabled = false,
knowledgeBaseId = null,
documentId = null,
onSave,
}: DocumentTagEntryProps) {
const { saveTagDefinitions } = useTagDefinitions(knowledgeBaseId, documentId)
const { tagDefinitions: kbTagDefinitions, fetchTagDefinitions: refreshTagDefinitions } =
useKnowledgeBaseTagDefinitions(knowledgeBaseId)
const [editingTag, setEditingTag] = useState<{
index: number
value: string
tagName: string
isNew: boolean
} | null>(null)
const getNextAvailableSlot = (): DocumentTag['slot'] => {
const usedSlots = new Set(tags.map((tag) => tag.slot))
for (const slot of TAG_SLOTS) {
if (!usedSlots.has(slot)) {
return slot
}
}
return 'tag1' // fallback
}
const handleSaveDefinitions = async (tagsToSave?: DocumentTag[]) => {
if (!knowledgeBaseId || !documentId) return
const currentTags = tagsToSave || tags
// Create definitions for tags that have display names
const definitions: TagDefinitionInput[] = currentTags
.filter((tag) => tag?.displayName?.trim())
.map((tag) => ({
tagSlot: tag.slot as TagSlot,
displayName: tag.displayName.trim(),
fieldType: tag.fieldType || 'text',
}))
// Only save if we have valid definitions
if (definitions.length > 0) {
await saveTagDefinitions(definitions)
}
}
const handleCleanupUnusedTags = async () => {
if (!knowledgeBaseId || !documentId) return
try {
const response = await fetch(
`/api/knowledge/${knowledgeBaseId}/documents/${documentId}/tag-definitions?action=cleanup`,
{
method: 'DELETE',
}
)
if (!response.ok) {
throw new Error(`Cleanup failed: ${response.statusText}`)
}
const result = await response.json()
console.log('Cleanup result:', result)
} catch (error) {
console.error('Failed to cleanup unused tags:', error)
}
}
// Get available tag names that aren't already used in this document
const availableTagNames = kbTagDefinitions
.map((tag) => tag.displayName)
.filter((tagName) => !tags.some((tag) => tag.displayName === tagName))
// Check if we can add more tags (KB has less than MAX_TAG_SLOTS tag definitions)
const canAddMoreTags = kbTagDefinitions.length < MAX_TAG_SLOTS
const handleSuggestionClick = (tagName: string) => {
setEditingTag({ index: -1, value: '', tagName, isNew: false })
}
const handleCreateNewTag = async (tagName: string, value: string, fieldType = 'text') => {
if (!tagName.trim() || !value.trim()) return
// Check if tag name already exists in current document
const tagNameLower = tagName.trim().toLowerCase()
const existingTag = tags.find((tag) => tag.displayName.toLowerCase() === tagNameLower)
if (existingTag) {
alert(`Tag "${tagName}" already exists. Please choose a different name.`)
return
}
const newTag: DocumentTag = {
slot: getNextAvailableSlot(),
displayName: tagName.trim(),
fieldType: fieldType,
value: value.trim(),
}
const updatedTags = [...tags, newTag]
// SIMPLE ATOMIC OPERATION - NO CLEANUP
try {
// 1. Save tag definition first
await handleSaveDefinitions(updatedTags)
// 2. Save document values
if (onSave) {
await onSave(updatedTags)
}
// 3. Update UI
onTagsChange(updatedTags)
} catch (error) {
console.error('Failed to save tag:', error)
alert(`Failed to save tag "${tagName}". Please try again.`)
}
}
const handleUpdateTag = async (index: number, newValue: string) => {
if (!newValue.trim()) return
const updatedTags = tags.map((tag, i) =>
i === index ? { ...tag, value: newValue.trim() } : tag
)
// SIMPLE ATOMIC OPERATION - NO CLEANUP
try {
// 1. Save document values
if (onSave) {
await onSave(updatedTags)
}
// 2. Save tag definitions
await handleSaveDefinitions(updatedTags)
// 3. Update UI
onTagsChange(updatedTags)
} catch (error) {
console.error('Failed to update tag:', error)
}
}
const handleRemoveTag = async (index: number) => {
const updatedTags = tags.filter((_, i) => i !== index)
console.log('Removing tag, updated tags:', updatedTags)
// FULLY SYNCHRONOUS - DO NOT UPDATE UI UNTIL ALL OPERATIONS COMPLETE
try {
// 1. Save the document tag values
console.log('Saving document values after tag removal...')
if (onSave) {
await onSave(updatedTags)
}
// 2. Save the tag definitions
console.log('Saving tag definitions after tag removal...')
await handleSaveDefinitions(updatedTags)
// 3. Run cleanup to remove unused tag definitions
console.log('Running cleanup to remove unused tag definitions...')
await handleCleanupUnusedTags()
// 4. ONLY NOW update the UI
onTagsChange(updatedTags)
// 5. Refresh tag definitions for dropdown
await refreshTagDefinitions()
} catch (error) {
console.error('Failed to remove tag:', error)
}
}
return (
<div className='space-y-3'>
{/* Existing Tags as Chips */}
<div className='flex flex-wrap gap-2'>
{tags.map((tag, index) => (
<div
key={`${tag.slot}-${index}`}
className='inline-flex cursor-pointer items-center gap-1 rounded-full bg-gray-100 px-3 py-1 text-sm transition-colors hover:bg-gray-200'
onClick={() =>
setEditingTag({ index, value: tag.value, tagName: tag.displayName, isNew: false })
}
>
<span className='font-medium'>{tag.displayName}:</span>
<span className='text-muted-foreground'>{tag.value}</span>
<Button
type='button'
variant='ghost'
size='sm'
onClick={(e) => {
e.stopPropagation()
handleRemoveTag(index)
}}
disabled={disabled}
className='ml-1 h-4 w-4 p-0 text-muted-foreground hover:text-red-600'
>
<X className='h-3 w-3' />
</Button>
</div>
))}
</div>
{/* Add Tag Dropdown Selector */}
<DropdownMenu>
<DropdownMenuTrigger asChild>
<Button
type='button'
variant='outline'
size='sm'
disabled={disabled || (!canAddMoreTags && availableTagNames.length === 0)}
className='gap-1 text-muted-foreground hover:text-foreground'
>
<Plus className='h-4 w-4' />
<span>Add Tag</span>
<ChevronDown className='h-3 w-3' />
</Button>
</DropdownMenuTrigger>
<DropdownMenuContent align='start' className='w-48'>
{/* Existing tag names */}
{availableTagNames.length > 0 && (
<>
{availableTagNames.map((tagName) => {
const tagDefinition = kbTagDefinitions.find((def) => def.displayName === tagName)
return (
<DropdownMenuItem
key={tagName}
onClick={() => handleSuggestionClick(tagName)}
className='flex items-center justify-between'
>
<span>{tagName}</span>
<span className='text-muted-foreground text-xs'>
{tagDefinition?.fieldType || 'text'}
</span>
</DropdownMenuItem>
)
})}
<div className='my-1 h-px bg-border' />
</>
)}
{/* Create new tag option or disabled message */}
{canAddMoreTags ? (
<DropdownMenuItem
onClick={() => {
setEditingTag({ index: -1, value: '', tagName: '', isNew: true })
}}
className='flex items-center gap-2 text-blue-600'
>
<Plus className='h-4 w-4' />
<span>Create new tag</span>
</DropdownMenuItem>
) : (
<div className='px-2 py-1.5 text-muted-foreground text-sm'>
All {MAX_TAG_SLOTS} tag slots used in this knowledge base
</div>
)}
</DropdownMenuContent>
</DropdownMenu>
{/* Edit Tag Value Modal */}
{editingTag !== null && (
<EditTagModal
tagName={editingTag.tagName}
initialValue={editingTag.value}
isNew={editingTag.isNew}
existingType={
editingTag.isNew
? undefined
: kbTagDefinitions.find((t) => t.displayName === editingTag.tagName)?.fieldType
}
onSave={(value, type, newTagName) => {
if (editingTag.index === -1) {
// Creating new tag - use newTagName if provided, otherwise fall back to editingTag.tagName
const tagName = newTagName || editingTag.tagName
handleCreateNewTag(tagName, value, type)
} else {
// Updating existing tag
handleUpdateTag(editingTag.index, value)
}
setEditingTag(null)
}}
onCancel={() => {
setEditingTag(null)
}}
/>
)}
{/* Tag count display */}
{kbTagDefinitions.length > 0 && (
<div className='text-muted-foreground text-xs'>
{kbTagDefinitions.length} of {MAX_TAG_SLOTS} tag slots used in this knowledge base
</div>
)}
</div>
)
}
// Simple modal for editing tag values
interface EditTagModalProps {
tagName: string
initialValue: string
isNew: boolean
existingType?: string
onSave: (value: string, type?: string, newTagName?: string) => void
onCancel: () => void
}
function EditTagModal({
tagName,
initialValue,
isNew,
existingType,
onSave,
onCancel,
}: EditTagModalProps) {
const [value, setValue] = useState(initialValue)
const [fieldType, setFieldType] = useState(existingType || 'text')
const [newTagName, setNewTagName] = useState(tagName)
const inputRef = useRef<HTMLInputElement>(null)
useEffect(() => {
inputRef.current?.focus()
}, [])
const handleSubmit = (e: React.FormEvent) => {
e.preventDefault()
if (value.trim() && (isNew ? newTagName.trim() : true)) {
onSave(value.trim(), fieldType, isNew ? newTagName.trim() : undefined)
}
}
const handleKeyDown = (e: React.KeyboardEvent) => {
if (e.key === 'Escape') {
onCancel()
}
}
return (
<div className='fixed inset-0 z-50 flex items-center justify-center bg-black/50'>
<div className='mx-4 w-96 max-w-sm rounded-lg bg-white p-4'>
<div className='mb-3 flex items-start justify-between'>
<h3 className='font-medium text-sm'>
{isNew ? 'Create new tag' : `Edit "${tagName}" value`}
</h3>
{/* Type Badge in Top Right */}
{!isNew && existingType && (
<span className='rounded bg-gray-100 px-2 py-1 font-medium text-gray-500 text-xs'>
{existingType.toUpperCase()}
</span>
)}
</div>
<form onSubmit={handleSubmit} className='space-y-3'>
{/* Tag Name Input for New Tags */}
{isNew && (
<div>
<Label className='font-medium text-muted-foreground text-xs'>Tag Name</Label>
<Input
value={newTagName}
onChange={(e) => setNewTagName(e.target.value)}
placeholder='Enter tag name'
className='mt-1 text-sm'
/>
</div>
)}
{/* Type Selection for New Tags */}
{isNew && (
<div>
<Label className='font-medium text-muted-foreground text-xs'>Type</Label>
<Select value={fieldType} onValueChange={setFieldType}>
<SelectTrigger className='mt-1 text-sm'>
<SelectValue />
</SelectTrigger>
<SelectContent>
<SelectItem value='text'>Text</SelectItem>
</SelectContent>
</Select>
</div>
)}
{/* Value Input */}
<div>
<Label className='font-medium text-muted-foreground text-xs'>Value</Label>
<Input
ref={inputRef}
value={value}
onChange={(e) => setValue(e.target.value)}
onKeyDown={handleKeyDown}
placeholder='Enter tag value'
className='mt-1 text-sm'
/>
</div>
<div className='flex justify-end gap-2'>
<Button type='button' variant='outline' size='sm' onClick={onCancel}>
Cancel
</Button>
<Button
type='submit'
size='sm'
disabled={!value.trim() || (isNew && !newTagName.trim())}
>
{isNew ? 'Create' : 'Save'}
</Button>
</div>
</form>
</div>
</div>
)
}

View File

@@ -6,15 +6,11 @@ import { Button } from '@/components/ui/button'
import { Collapsible, CollapsibleContent, CollapsibleTrigger } from '@/components/ui/collapsible'
import { Input } from '@/components/ui/input'
import { Label } from '@/components/ui/label'
import { TAG_SLOTS, type TagSlot } from '@/lib/constants/knowledge'
import { useKnowledgeBaseTagDefinitions } from '@/hooks/use-knowledge-base-tag-definitions'
export interface TagData {
tag1?: string
tag2?: string
tag3?: string
tag4?: string
tag5?: string
tag6?: string
tag7?: string
export type TagData = {
[K in TagSlot]?: string
}
interface TagInputProps {
@@ -22,22 +18,30 @@ interface TagInputProps {
onTagsChange: (tags: TagData) => void
disabled?: boolean
className?: string
knowledgeBaseId?: string | null
documentId?: string | null
}
const TAG_LABELS = [
{ key: 'tag1' as keyof TagData, label: 'Tag 1', placeholder: 'Enter tag value' },
{ key: 'tag2' as keyof TagData, label: 'Tag 2', placeholder: 'Enter tag value' },
{ key: 'tag3' as keyof TagData, label: 'Tag 3', placeholder: 'Enter tag value' },
{ key: 'tag4' as keyof TagData, label: 'Tag 4', placeholder: 'Enter tag value' },
{ key: 'tag5' as keyof TagData, label: 'Tag 5', placeholder: 'Enter tag value' },
{ key: 'tag6' as keyof TagData, label: 'Tag 6', placeholder: 'Enter tag value' },
{ key: 'tag7' as keyof TagData, label: 'Tag 7', placeholder: 'Enter tag value' },
]
const TAG_LABELS = TAG_SLOTS.map((slot, index) => ({
key: slot as keyof TagData,
label: `Tag ${index + 1}`,
placeholder: 'Enter tag value',
}))
export function TagInput({ tags, onTagsChange, disabled = false, className = '' }: TagInputProps) {
export function TagInput({
tags,
onTagsChange,
disabled = false,
className = '',
knowledgeBaseId = null,
documentId = null,
}: TagInputProps) {
const [isOpen, setIsOpen] = useState(false)
const [showAllTags, setShowAllTags] = useState(false)
// Use custom tag definitions if available
const { getTagLabel } = useKnowledgeBaseTagDefinitions(knowledgeBaseId)
const handleTagChange = (tagKey: keyof TagData, value: string) => {
onTagsChange({
...tags,
@@ -53,7 +57,15 @@ export function TagInput({ tags, onTagsChange, disabled = false, className = ''
}
const hasAnyTags = Object.values(tags).some((tag) => tag?.trim())
const visibleTags = showAllTags ? TAG_LABELS : TAG_LABELS.slice(0, 2)
// Create tag labels using custom definitions or fallback to defaults
const tagLabels = TAG_LABELS.map(({ key, placeholder }) => ({
key,
label: getTagLabel(key),
placeholder,
}))
const visibleTags = showAllTags ? tagLabels : tagLabels.slice(0, 2)
return (
<div className={className}>
@@ -153,7 +165,7 @@ export function TagInput({ tags, onTagsChange, disabled = false, className = ''
<div className='flex flex-wrap gap-1'>
{Object.entries(tags).map(([key, value]) => {
if (!value?.trim()) return null
const tagLabel = TAG_LABELS.find((t) => t.key === key)?.label || key
const tagLabel = getTagLabel(key)
return (
<span
key={key}

View File

@@ -274,14 +274,7 @@ export function useKnowledgeUpload(options: UseKnowledgeUploadOptions = {}) {
const processPayload = {
documents: uploadedFiles.map((file) => ({
...file,
// Extract tags from file if they exist (added by upload modal)
tag1: (file as any).tag1,
tag2: (file as any).tag2,
tag3: (file as any).tag3,
tag4: (file as any).tag4,
tag5: (file as any).tag5,
tag6: (file as any).tag6,
tag7: (file as any).tag7,
// Tags are already included in the file object from createUploadedFile
})),
processingOptions: {
chunkSize: processingOptions.chunkSize || 1024,

View File

@@ -0,0 +1,218 @@
'use client'
import { Plus, X } from 'lucide-react'
import { Button } from '@/components/ui/button'
import { Input } from '@/components/ui/input'
import { MAX_TAG_SLOTS } from '@/lib/constants/knowledge'
import type { SubBlockConfig } from '@/blocks/types'
import { useKnowledgeBaseTagDefinitions } from '@/hooks/use-knowledge-base-tag-definitions'
import { useSubBlockValue } from '../../hooks/use-sub-block-value'
interface DocumentTag {
id: string
tagName: string // This will be mapped to displayName for API
fieldType: string
value: string
}
interface DocumentTagEntryProps {
blockId: string
subBlock: SubBlockConfig
disabled?: boolean
isPreview?: boolean
previewValue?: any
isConnecting?: boolean
}
export function DocumentTagEntry({
blockId,
subBlock,
disabled = false,
isPreview = false,
previewValue,
isConnecting = false,
}: DocumentTagEntryProps) {
const [storeValue, setStoreValue] = useSubBlockValue(blockId, subBlock.id)
// Get the knowledge base ID from other sub-blocks
const [knowledgeBaseIdValue] = useSubBlockValue(blockId, 'knowledgeBaseId')
const knowledgeBaseId = knowledgeBaseIdValue || null
// Use KB tag definitions hook to get available tags
const { tagDefinitions, isLoading } = useKnowledgeBaseTagDefinitions(knowledgeBaseId)
// Parse the current value to extract tags
const parseTags = (tagValue: string): DocumentTag[] => {
if (!tagValue) return []
try {
return JSON.parse(tagValue)
} catch {
return []
}
}
const currentValue = isPreview ? previewValue : storeValue
const tags = parseTags(currentValue || '')
const updateTags = (newTags: DocumentTag[]) => {
if (isPreview) return
const value = newTags.length > 0 ? JSON.stringify(newTags) : null
setStoreValue(value)
}
const removeTag = (tagId: string) => {
updateTags(tags.filter((t) => t.id !== tagId))
}
const updateTag = (tagId: string, updates: Partial<DocumentTag>) => {
updateTags(tags.map((tag) => (tag.id === tagId ? { ...tag, ...updates } : tag)))
}
// Get available tag names that aren't already used
const usedTagNames = new Set(tags.map((tag) => tag.tagName).filter(Boolean))
const availableTagNames = tagDefinitions
.map((def) => def.displayName)
.filter((name) => !usedTagNames.has(name))
if (isLoading) {
return <div className='p-4 text-muted-foreground text-sm'>Loading tag definitions...</div>
}
return (
<div className='space-y-4'>
{/* Available Tags Section */}
{availableTagNames.length > 0 && (
<div>
<div className='mb-2 font-medium text-muted-foreground text-sm'>
Available Tags (click to add)
</div>
<div className='flex flex-wrap gap-2'>
{availableTagNames.map((tagName) => {
const tagDef = tagDefinitions.find((def) => def.displayName === tagName)
return (
<button
key={tagName}
onClick={() => {
// Check for duplicates before adding
if (!usedTagNames.has(tagName)) {
const newTag: DocumentTag = {
id: Date.now().toString(),
tagName,
fieldType: tagDef?.fieldType || 'text',
value: '',
}
updateTags([...tags, newTag])
}
}}
disabled={disabled || isConnecting}
className='inline-flex items-center gap-1 rounded-full border border-gray-300 border-dashed bg-gray-50 px-3 py-1 text-gray-600 text-sm transition-colors hover:border-blue-300 hover:bg-blue-50 hover:text-blue-700 disabled:opacity-50'
>
<Plus className='h-3 w-3' />
{tagName}
<span className='text-muted-foreground text-xs'>
({tagDef?.fieldType || 'text'})
</span>
</button>
)
})}
</div>
</div>
)}
{/* Selected Tags Section */}
{tags.length > 0 && (
<div>
<div className='space-y-2'>
{tags.map((tag) => (
<div key={tag.id} className='flex items-center gap-2 rounded-lg border bg-white p-3'>
{/* Tag Name */}
<div className='flex-1'>
<div className='font-medium text-gray-900 text-sm'>
{tag.tagName || 'Unnamed Tag'}
</div>
<div className='text-muted-foreground text-xs'>{tag.fieldType}</div>
</div>
{/* Value Input */}
<div className='flex-1'>
<Input
value={tag.value}
onChange={(e) => updateTag(tag.id, { value: e.target.value })}
placeholder='Value'
disabled={disabled || isConnecting}
className='h-9 placeholder:text-xs'
type={tag.fieldType === 'number' ? 'number' : 'text'}
/>
</div>
{/* Remove Button */}
<Button
onClick={() => removeTag(tag.id)}
variant='ghost'
size='sm'
disabled={disabled || isConnecting}
className='h-9 w-9 p-0 text-muted-foreground hover:text-red-600'
>
<X className='h-4 w-4' />
</Button>
</div>
))}
</div>
</div>
)}
{/* Create New Tag Section */}
<div>
<div className='mb-2 font-medium text-muted-foreground text-sm'>Create New Tag</div>
<div className='flex items-center gap-2 rounded-lg border border-gray-300 border-dashed bg-gray-50 p-3'>
<div className='flex-1'>
<Input
placeholder={tagDefinitions.length >= MAX_TAG_SLOTS ? '' : 'Tag name'}
disabled={disabled || isConnecting || tagDefinitions.length >= MAX_TAG_SLOTS}
className='h-9 border-0 bg-transparent p-0 placeholder:text-xs focus-visible:ring-0'
onKeyDown={(e) => {
if (e.key === 'Enter' && e.currentTarget.value.trim()) {
const tagName = e.currentTarget.value.trim()
// Check for duplicates
if (usedTagNames.has(tagName)) {
// Visual feedback for duplicate - could add toast notification here
e.currentTarget.style.borderColor = '#ef4444'
setTimeout(() => {
e.currentTarget.style.borderColor = ''
}, 1000)
return
}
const newTag: DocumentTag = {
id: Date.now().toString(),
tagName,
fieldType: 'text',
value: '',
}
updateTags([...tags, newTag])
e.currentTarget.value = ''
}
}}
/>
</div>
<div className='text-muted-foreground text-xs'>
{tagDefinitions.length >= MAX_TAG_SLOTS
? `All ${MAX_TAG_SLOTS} tag slots used in this knowledge base`
: usedTagNames.size > 0
? 'Press Enter (no duplicates)'
: 'Press Enter to add'}
</div>
</div>
</div>
{/* Empty State */}
{tags.length === 0 && availableTagNames.length === 0 && (
<div className='py-8 text-center text-muted-foreground'>
<div className='text-sm'>No tags available</div>
<div className='text-xs'>Create a new tag above to get started</div>
</div>
)}
</div>
)
}

View File

@@ -0,0 +1,118 @@
'use client'
import { Input } from '@/components/ui/input'
import { Label } from '@/components/ui/label'
import {
Select,
SelectContent,
SelectItem,
SelectTrigger,
SelectValue,
} from '@/components/ui/select'
import type { SubBlockConfig } from '@/blocks/types'
import { useKnowledgeBaseTagDefinitions } from '@/hooks/use-knowledge-base-tag-definitions'
import { useSubBlockValue } from '../../hooks/use-sub-block-value'
interface KnowledgeTagFilterProps {
blockId: string
subBlock: SubBlockConfig
disabled?: boolean
isPreview?: boolean
previewValue?: string | null
isConnecting?: boolean
}
export function KnowledgeTagFilter({
blockId,
subBlock,
disabled = false,
isPreview = false,
previewValue,
isConnecting = false,
}: KnowledgeTagFilterProps) {
const [storeValue, setStoreValue] = useSubBlockValue(blockId, subBlock.id)
// Get the knowledge base ID and document ID from other sub-blocks
const [knowledgeBaseIdValue] = useSubBlockValue(blockId, 'knowledgeBaseIds')
const [knowledgeBaseIdSingleValue] = useSubBlockValue(blockId, 'knowledgeBaseId')
const [documentIdValue] = useSubBlockValue(blockId, 'documentId')
// Determine which knowledge base ID to use
const knowledgeBaseId =
knowledgeBaseIdSingleValue ||
(typeof knowledgeBaseIdValue === 'string' ? knowledgeBaseIdValue.split(',')[0] : null)
// Use KB tag definitions hook to get available tags
const { tagDefinitions, isLoading, getTagLabel } = useKnowledgeBaseTagDefinitions(knowledgeBaseId)
// Parse the current value to extract tag name and value
const parseTagFilter = (filterValue: string) => {
if (!filterValue) return { tagName: '', tagValue: '' }
const [tagName, ...valueParts] = filterValue.split(':')
return { tagName: tagName?.trim() || '', tagValue: valueParts.join(':').trim() || '' }
}
const currentValue = isPreview ? previewValue : storeValue
const { tagName, tagValue } = parseTagFilter(currentValue || '')
const handleTagNameChange = (newTagName: string) => {
if (isPreview) return
const newValue =
newTagName && tagValue ? `${newTagName}:${tagValue}` : newTagName || tagValue || ''
setStoreValue(newValue.trim() || null)
}
const handleTagValueChange = (newTagValue: string) => {
if (isPreview) return
const newValue =
tagName && newTagValue ? `${tagName}:${newTagValue}` : tagName || newTagValue || ''
setStoreValue(newValue.trim() || null)
}
if (isPreview) {
return (
<div className='space-y-1'>
<Label className='font-medium text-muted-foreground text-xs'>Tag Filter</Label>
<Input
value={currentValue || ''}
disabled
placeholder='Tag filter preview'
className='text-sm'
/>
</div>
)
}
return (
<div className='space-y-2'>
{/* Tag Name Selector */}
<Select
value={tagName}
onValueChange={handleTagNameChange}
disabled={disabled || isConnecting || isLoading}
>
<SelectTrigger className='text-sm'>
<SelectValue placeholder='Select tag' />
</SelectTrigger>
<SelectContent>
{tagDefinitions.map((tag) => (
<SelectItem key={tag.id} value={tag.displayName}>
{tag.displayName}
</SelectItem>
))}
</SelectContent>
</Select>
{/* Tag Value Input - only show if tag is selected */}
{tagName && (
<Input
value={tagValue}
onChange={(e) => handleTagValueChange(e.target.value)}
placeholder={`Enter ${tagName} value`}
disabled={disabled || isConnecting}
className='text-sm'
/>
)}
</div>
)
}

View File

@@ -0,0 +1,169 @@
'use client'
import { Plus, X } from 'lucide-react'
import { Button } from '@/components/ui/button'
import { Input } from '@/components/ui/input'
import { Label } from '@/components/ui/label'
import {
Select,
SelectContent,
SelectItem,
SelectTrigger,
SelectValue,
} from '@/components/ui/select'
import type { SubBlockConfig } from '@/blocks/types'
import { useKnowledgeBaseTagDefinitions } from '@/hooks/use-knowledge-base-tag-definitions'
import { useSubBlockValue } from '../../hooks/use-sub-block-value'
interface TagFilter {
id: string
tagName: string
tagValue: string
}
interface KnowledgeTagFiltersProps {
blockId: string
subBlock: SubBlockConfig
disabled?: boolean
isPreview?: boolean
previewValue?: string | null
isConnecting?: boolean
}
export function KnowledgeTagFilters({
blockId,
subBlock,
disabled = false,
isPreview = false,
previewValue,
isConnecting = false,
}: KnowledgeTagFiltersProps) {
const [storeValue, setStoreValue] = useSubBlockValue(blockId, subBlock.id)
// Get the knowledge base ID from other sub-blocks
const [knowledgeBaseIdValue] = useSubBlockValue(blockId, 'knowledgeBaseId')
const knowledgeBaseId = knowledgeBaseIdValue || null
// Use KB tag definitions hook to get available tags
const { tagDefinitions, isLoading } = useKnowledgeBaseTagDefinitions(knowledgeBaseId)
// Parse the current value to extract filters
const parseFilters = (filterValue: string): TagFilter[] => {
if (!filterValue) return []
try {
return JSON.parse(filterValue)
} catch {
return []
}
}
const currentValue = isPreview ? previewValue : storeValue
const filters = parseFilters(currentValue || '')
const updateFilters = (newFilters: TagFilter[]) => {
if (isPreview) return
const value = newFilters.length > 0 ? JSON.stringify(newFilters) : null
setStoreValue(value)
}
const addFilter = () => {
const newFilter: TagFilter = {
id: Date.now().toString(),
tagName: '',
tagValue: '',
}
updateFilters([...filters, newFilter])
}
const removeFilter = (filterId: string) => {
updateFilters(filters.filter((f) => f.id !== filterId))
}
const updateFilter = (filterId: string, field: keyof TagFilter, value: string) => {
updateFilters(filters.map((f) => (f.id === filterId ? { ...f, [field]: value } : f)))
}
if (isPreview) {
return (
<div className='space-y-1'>
<Label className='font-medium text-muted-foreground text-xs'>Tag Filters</Label>
<div className='text-muted-foreground text-sm'>
{filters.length > 0 ? `${filters.length} filter(s)` : 'No filters'}
</div>
</div>
)
}
return (
<div className='space-y-3'>
<div className='flex items-center justify-end'>
<Button
type='button'
variant='ghost'
size='sm'
onClick={addFilter}
disabled={disabled || isConnecting || isLoading}
className='h-6 px-2 text-xs'
>
<Plus className='mr-1 h-3 w-3' />
Add Filter
</Button>
</div>
{filters.length === 0 && (
<div className='py-4 text-center text-muted-foreground text-sm'>
No tag filters. Click "Add Filter" to add one.
</div>
)}
<div className='space-y-2'>
{filters.map((filter) => (
<div key={filter.id} className='flex items-center gap-2 rounded-md border p-2'>
{/* Tag Name Selector */}
<div className='flex-1'>
<Select
value={filter.tagName}
onValueChange={(value) => updateFilter(filter.id, 'tagName', value)}
disabled={disabled || isConnecting || isLoading}
>
<SelectTrigger className='h-8 text-sm'>
<SelectValue placeholder='Select tag' />
</SelectTrigger>
<SelectContent>
{tagDefinitions.map((tag) => (
<SelectItem key={tag.id} value={tag.displayName}>
{tag.displayName}
</SelectItem>
))}
</SelectContent>
</Select>
</div>
{/* Tag Value Input */}
<div className='flex-1'>
<Input
value={filter.tagValue}
onChange={(e) => updateFilter(filter.id, 'tagValue', e.target.value)}
placeholder={filter.tagName ? `Enter ${filter.tagName} value` : 'Enter value'}
disabled={disabled || isConnecting}
className='h-8 text-sm'
/>
</div>
{/* Remove Button */}
<Button
type='button'
variant='ghost'
size='sm'
onClick={() => removeFilter(filter.id)}
disabled={disabled || isConnecting}
className='h-8 w-8 p-0 text-muted-foreground hover:text-destructive'
>
<X className='h-3 w-3' />
</Button>
</div>
))}
</div>
</div>
)
}

View File

@@ -1342,8 +1342,7 @@ export function ToolInput({
const requiresOAuth = !isCustomTool && toolRequiresOAuth(currentToolId)
const oauthConfig = !isCustomTool ? getToolOAuthConfig(currentToolId) : null
// Check if the tool has any expandable content
const hasExpandableContent = isCustomTool || displayParams.length > 0 || requiresOAuth
// Tools are always expandable so users can access the interface
return (
<div
@@ -1378,12 +1377,12 @@ export function ToolInput({
<div
className={cn(
'flex items-center justify-between bg-accent/50 p-2',
hasExpandableContent ? 'cursor-pointer' : 'cursor-default'
'cursor-pointer'
)}
onClick={() => {
if (isCustomTool) {
handleEditCustomTool(toolIndex)
} else if (hasExpandableContent) {
} else {
toggleToolExpansion(toolIndex)
}
}}
@@ -1492,7 +1491,7 @@ export function ToolInput({
</div>
</div>
{!isCustomTool && hasExpandableContent && tool.isExpanded && (
{!isCustomTool && tool.isExpanded && (
<div className='space-y-3 overflow-visible p-3'>
{/* Operation dropdown for tools with multiple operations */}
{(() => {

View File

@@ -1,4 +1,4 @@
import { useEffect, useState } from 'react'
import { useEffect, useMemo, useState } from 'react'
import { X } from 'lucide-react'
import { Button } from '@/components/ui/button'
import {
@@ -392,8 +392,9 @@ export function WebhookModal({
microsoftTeamsHmacSecret,
])
// Use the provided path or generate a UUID-based path
const formattedPath = webhookPath && webhookPath.trim() !== '' ? webhookPath : crypto.randomUUID()
const formattedPath = useMemo(() => {
return webhookPath && webhookPath.trim() !== '' ? webhookPath : crypto.randomUUID()
}, [webhookPath])
// Construct the full webhook URL
const baseUrl =

View File

@@ -32,6 +32,9 @@ import {
import { getBlock } from '@/blocks/index'
import type { SubBlockConfig } from '@/blocks/types'
import { useWorkflowStore } from '@/stores/workflows/workflow/store'
import { DocumentTagEntry } from './components/document-tag-entry/document-tag-entry'
import { KnowledgeTagFilter } from './components/knowledge-tag-filter/knowledge-tag-filter'
import { KnowledgeTagFilters } from './components/knowledge-tag-filters/knowledge-tag-filters'
interface SubBlockProps {
blockId: string
@@ -353,6 +356,40 @@ export function SubBlock({
previewValue={previewValue}
/>
)
case 'knowledge-tag-filter':
return (
<KnowledgeTagFilter
blockId={blockId}
subBlock={config}
disabled={isDisabled}
isPreview={isPreview}
previewValue={previewValue}
isConnecting={isConnecting}
/>
)
case 'knowledge-tag-filters':
return (
<KnowledgeTagFilters
blockId={blockId}
subBlock={config}
disabled={isDisabled}
isPreview={isPreview}
previewValue={previewValue}
isConnecting={isConnecting}
/>
)
case 'document-tag-entry':
return (
<DocumentTagEntry
blockId={blockId}
subBlock={config}
disabled={isDisabled}
isPreview={isPreview}
previewValue={previewValue}
isConnecting={isConnecting}
/>
)
case 'document-selector':
return (
<DocumentSelector

View File

@@ -0,0 +1,159 @@
import { ArxivIcon } from '@/components/icons'
import type { BlockConfig } from '@/blocks/types'
import type { ArxivResponse } from '@/tools/arxiv/types'
export const ArxivBlock: BlockConfig<ArxivResponse> = {
type: 'arxiv',
name: 'ArXiv',
description: 'Search and retrieve academic papers from ArXiv',
longDescription:
'Search for academic papers, retrieve metadata, download papers, and access the vast collection of scientific research on ArXiv.',
docsLink: 'https://docs.simstudio.ai/tools/arxiv',
category: 'tools',
bgColor: '#E0E0E0',
icon: ArxivIcon,
subBlocks: [
{
id: 'operation',
title: 'Operation',
type: 'dropdown',
layout: 'full',
options: [
{ label: 'Search Papers', id: 'arxiv_search' },
{ label: 'Get Paper Details', id: 'arxiv_get_paper' },
{ label: 'Get Author Papers', id: 'arxiv_get_author_papers' },
],
value: () => 'arxiv_search',
},
// Search operation inputs
{
id: 'query',
title: 'Search Query',
type: 'long-input',
layout: 'full',
placeholder: 'Enter search terms (e.g., "machine learning", "quantum physics")...',
condition: { field: 'operation', value: 'arxiv_search' },
},
{
id: 'searchField',
title: 'Search Field',
type: 'dropdown',
layout: 'full',
options: [
{ label: 'All Fields', id: 'all' },
{ label: 'Title', id: 'ti' },
{ label: 'Author', id: 'au' },
{ label: 'Abstract', id: 'abs' },
{ label: 'Comment', id: 'co' },
{ label: 'Journal Reference', id: 'jr' },
{ label: 'Category', id: 'cat' },
{ label: 'Report Number', id: 'rn' },
],
value: () => 'all',
condition: { field: 'operation', value: 'arxiv_search' },
},
{
id: 'maxResults',
title: 'Max Results',
type: 'short-input',
layout: 'full',
placeholder: '10',
condition: { field: 'operation', value: 'arxiv_search' },
},
{
id: 'sortBy',
title: 'Sort By',
type: 'dropdown',
layout: 'full',
options: [
{ label: 'Relevance', id: 'relevance' },
{ label: 'Last Updated Date', id: 'lastUpdatedDate' },
{ label: 'Submitted Date', id: 'submittedDate' },
],
value: () => 'relevance',
condition: { field: 'operation', value: 'arxiv_search' },
},
{
id: 'sortOrder',
title: 'Sort Order',
type: 'dropdown',
layout: 'full',
options: [
{ label: 'Descending', id: 'descending' },
{ label: 'Ascending', id: 'ascending' },
],
value: () => 'descending',
condition: { field: 'operation', value: 'arxiv_search' },
},
// Get Paper Details operation inputs
{
id: 'paperId',
title: 'Paper ID',
type: 'short-input',
layout: 'full',
placeholder: 'Enter ArXiv paper ID (e.g., 1706.03762, cs.AI/0001001)',
condition: { field: 'operation', value: 'arxiv_get_paper' },
},
// Get Author Papers operation inputs
{
id: 'authorName',
title: 'Author Name',
type: 'short-input',
layout: 'full',
placeholder: 'Enter author name (e.g., "John Smith")...',
condition: { field: 'operation', value: 'arxiv_get_author_papers' },
},
{
id: 'maxResults',
title: 'Max Results',
type: 'short-input',
layout: 'full',
placeholder: '10',
condition: { field: 'operation', value: 'arxiv_get_author_papers' },
},
],
tools: {
access: ['arxiv_search', 'arxiv_get_paper', 'arxiv_get_author_papers'],
config: {
tool: (params) => {
// Convert maxResults to a number for operations that use it
if (params.maxResults) {
params.maxResults = Number(params.maxResults)
}
switch (params.operation) {
case 'arxiv_search':
return 'arxiv_search'
case 'arxiv_get_paper':
return 'arxiv_get_paper'
case 'arxiv_get_author_papers':
return 'arxiv_get_author_papers'
default:
return 'arxiv_search'
}
},
},
},
inputs: {
operation: { type: 'string', required: true },
// Search operation
query: { type: 'string', required: false },
searchField: { type: 'string', required: false },
maxResults: { type: 'number', required: false },
sortBy: { type: 'string', required: false },
sortOrder: { type: 'string', required: false },
// Get Paper Details operation
paperId: { type: 'string', required: false },
// Get Author Papers operation
authorName: { type: 'string', required: false },
},
outputs: {
// Search output
papers: 'json',
totalResults: 'number',
// Get Paper Details output
paper: 'json',
// Get Author Papers output
authorPapers: 'json',
},
}

View File

@@ -28,8 +28,8 @@ export const KnowledgeBlock: BlockConfig = {
},
params: (params) => {
// Validate required fields for each operation
if (params.operation === 'search' && !params.knowledgeBaseIds) {
throw new Error('Knowledge base IDs are required for search operation')
if (params.operation === 'search' && !params.knowledgeBaseId) {
throw new Error('Knowledge base ID is required for search operation')
}
if (
(params.operation === 'upload_chunk' || params.operation === 'create_document') &&
@@ -49,21 +49,16 @@ export const KnowledgeBlock: BlockConfig = {
},
inputs: {
operation: { type: 'string', required: true },
knowledgeBaseIds: { type: 'string', required: false },
knowledgeBaseId: { type: 'string', required: false },
query: { type: 'string', required: false },
topK: { type: 'number', required: false },
documentId: { type: 'string', required: false },
content: { type: 'string', required: false },
name: { type: 'string', required: false },
// Tag filters for search
tag1: { type: 'string', required: false },
tag2: { type: 'string', required: false },
tag3: { type: 'string', required: false },
tag4: { type: 'string', required: false },
tag5: { type: 'string', required: false },
tag6: { type: 'string', required: false },
tag7: { type: 'string', required: false },
// Dynamic tag filters for search
tagFilters: { type: 'string', required: false },
// Document tags for create document (JSON string of tag objects)
documentTags: { type: 'string', required: false },
},
outputs: {
results: 'json',
@@ -83,15 +78,6 @@ export const KnowledgeBlock: BlockConfig = {
],
value: () => 'search',
},
{
id: 'knowledgeBaseIds',
title: 'Knowledge Bases',
type: 'knowledge-base-selector',
layout: 'full',
placeholder: 'Select knowledge bases',
multiSelect: true,
condition: { field: 'operation', value: 'search' },
},
{
id: 'knowledgeBaseId',
title: 'Knowledge Base',
@@ -99,7 +85,7 @@ export const KnowledgeBlock: BlockConfig = {
layout: 'full',
placeholder: 'Select knowledge base',
multiSelect: false,
condition: { field: 'operation', value: ['upload_chunk', 'create_document'] },
condition: { field: 'operation', value: ['search', 'upload_chunk', 'create_document'] },
},
{
id: 'query',
@@ -118,65 +104,11 @@ export const KnowledgeBlock: BlockConfig = {
condition: { field: 'operation', value: 'search' },
},
{
id: 'tag1',
title: 'Tag 1 Filter',
type: 'short-input',
layout: 'half',
placeholder: 'Filter by tag 1',
condition: { field: 'operation', value: 'search' },
mode: 'advanced',
},
{
id: 'tag2',
title: 'Tag 2 Filter',
type: 'short-input',
layout: 'half',
placeholder: 'Filter by tag 2',
condition: { field: 'operation', value: 'search' },
mode: 'advanced',
},
{
id: 'tag3',
title: 'Tag 3 Filter',
type: 'short-input',
layout: 'half',
placeholder: 'Filter by tag 3',
condition: { field: 'operation', value: 'search' },
mode: 'advanced',
},
{
id: 'tag4',
title: 'Tag 4 Filter',
type: 'short-input',
layout: 'half',
placeholder: 'Filter by tag 4',
condition: { field: 'operation', value: 'search' },
mode: 'advanced',
},
{
id: 'tag5',
title: 'Tag 5 Filter',
type: 'short-input',
layout: 'half',
placeholder: 'Filter by tag 5',
condition: { field: 'operation', value: 'search' },
mode: 'advanced',
},
{
id: 'tag6',
title: 'Tag 6 Filter',
type: 'short-input',
layout: 'half',
placeholder: 'Filter by tag 6',
condition: { field: 'operation', value: 'search' },
mode: 'advanced',
},
{
id: 'tag7',
title: 'Tag 7 Filter',
type: 'short-input',
layout: 'half',
placeholder: 'Filter by tag 7',
id: 'tagFilters',
title: 'Tag Filters',
type: 'knowledge-tag-filters',
layout: 'full',
placeholder: 'Add tag filters',
condition: { field: 'operation', value: 'search' },
mode: 'advanced',
},
@@ -214,69 +146,13 @@ export const KnowledgeBlock: BlockConfig = {
rows: 6,
condition: { field: 'operation', value: ['create_document'] },
},
// Tag inputs for Create Document (in advanced mode)
// Dynamic tag entry for Create Document
{
id: 'tag1',
title: 'Tag 1',
type: 'short-input',
layout: 'half',
placeholder: 'Enter tag 1 value',
id: 'documentTags',
title: 'Document Tags',
type: 'document-tag-entry',
layout: 'full',
condition: { field: 'operation', value: 'create_document' },
mode: 'advanced',
},
{
id: 'tag2',
title: 'Tag 2',
type: 'short-input',
layout: 'half',
placeholder: 'Enter tag 2 value',
condition: { field: 'operation', value: 'create_document' },
mode: 'advanced',
},
{
id: 'tag3',
title: 'Tag 3',
type: 'short-input',
layout: 'half',
placeholder: 'Enter tag 3 value',
condition: { field: 'operation', value: 'create_document' },
mode: 'advanced',
},
{
id: 'tag4',
title: 'Tag 4',
type: 'short-input',
layout: 'half',
placeholder: 'Enter tag 4 value',
condition: { field: 'operation', value: 'create_document' },
mode: 'advanced',
},
{
id: 'tag5',
title: 'Tag 5',
type: 'short-input',
layout: 'half',
placeholder: 'Enter tag 5 value',
condition: { field: 'operation', value: 'create_document' },
mode: 'advanced',
},
{
id: 'tag6',
title: 'Tag 6',
type: 'short-input',
layout: 'half',
placeholder: 'Enter tag 6 value',
condition: { field: 'operation', value: 'create_document' },
mode: 'advanced',
},
{
id: 'tag7',
title: 'Tag 7',
type: 'short-input',
layout: 'half',
placeholder: 'Enter tag 7 value',
condition: { field: 'operation', value: 'create_document' },
mode: 'advanced',
},
],
}

View File

@@ -0,0 +1,108 @@
import { WikipediaIcon } from '@/components/icons'
import type { BlockConfig } from '@/blocks/types'
import type { WikipediaResponse } from '@/tools/wikipedia/types'
export const WikipediaBlock: BlockConfig<WikipediaResponse> = {
type: 'wikipedia',
name: 'Wikipedia',
description: 'Search and retrieve content from Wikipedia',
longDescription:
"Access Wikipedia articles, search for pages, get summaries, retrieve full content, and discover random articles from the world's largest encyclopedia.",
docsLink: 'https://docs.simstudio.ai/tools/wikipedia',
category: 'tools',
bgColor: '#000000',
icon: WikipediaIcon,
subBlocks: [
{
id: 'operation',
title: 'Operation',
type: 'dropdown',
layout: 'full',
options: [
{ label: 'Get Page Summary', id: 'wikipedia_summary' },
{ label: 'Search Pages', id: 'wikipedia_search' },
{ label: 'Get Page Content', id: 'wikipedia_content' },
{ label: 'Random Page', id: 'wikipedia_random' },
],
value: () => 'wikipedia_summary',
},
// Page Summary operation inputs
{
id: 'pageTitle',
title: 'Page Title',
type: 'long-input',
layout: 'full',
placeholder: 'Enter Wikipedia page title (e.g., "Python programming language")...',
condition: { field: 'operation', value: 'wikipedia_summary' },
},
// Search Pages operation inputs
{
id: 'query',
title: 'Search Query',
type: 'long-input',
layout: 'full',
placeholder: 'Enter search terms...',
condition: { field: 'operation', value: 'wikipedia_search' },
},
{
id: 'searchLimit',
title: 'Max Results',
type: 'short-input',
layout: 'full',
placeholder: '10',
condition: { field: 'operation', value: 'wikipedia_search' },
},
// Get Page Content operation inputs
{
id: 'pageTitle',
title: 'Page Title',
type: 'long-input',
layout: 'full',
placeholder: 'Enter Wikipedia page title...',
condition: { field: 'operation', value: 'wikipedia_content' },
},
],
tools: {
access: ['wikipedia_summary', 'wikipedia_search', 'wikipedia_content', 'wikipedia_random'],
config: {
tool: (params) => {
// Convert searchLimit to a number for search operation
if (params.searchLimit) {
params.searchLimit = Number(params.searchLimit)
}
switch (params.operation) {
case 'wikipedia_summary':
return 'wikipedia_summary'
case 'wikipedia_search':
return 'wikipedia_search'
case 'wikipedia_content':
return 'wikipedia_content'
case 'wikipedia_random':
return 'wikipedia_random'
default:
return 'wikipedia_summary'
}
},
},
},
inputs: {
operation: { type: 'string', required: true },
// Page Summary & Content operations
pageTitle: { type: 'string', required: false },
// Search operation
query: { type: 'string', required: false },
searchLimit: { type: 'number', required: false },
},
outputs: {
// Page Summary output
summary: 'json',
// Search output
searchResults: 'json',
totalHits: 'number',
// Page Content output
content: 'json',
// Random Page output
randomPage: 'json',
},
}

View File

@@ -6,6 +6,7 @@
import { AgentBlock } from '@/blocks/blocks/agent'
import { AirtableBlock } from '@/blocks/blocks/airtable'
import { ApiBlock } from '@/blocks/blocks/api'
import { ArxivBlock } from '@/blocks/blocks/arxiv'
import { BrowserUseBlock } from '@/blocks/blocks/browser_use'
import { ClayBlock } from '@/blocks/blocks/clay'
import { ConditionBlock } from '@/blocks/blocks/condition'
@@ -63,6 +64,7 @@ import { VisionBlock } from '@/blocks/blocks/vision'
import { WealthboxBlock } from '@/blocks/blocks/wealthbox'
import { WebhookBlock } from '@/blocks/blocks/webhook'
import { WhatsAppBlock } from '@/blocks/blocks/whatsapp'
import { WikipediaBlock } from '@/blocks/blocks/wikipedia'
import { WorkflowBlock } from '@/blocks/blocks/workflow'
import { XBlock } from '@/blocks/blocks/x'
import { YouTubeBlock } from '@/blocks/blocks/youtube'
@@ -73,6 +75,7 @@ export const registry: Record<string, BlockConfig> = {
agent: AgentBlock,
airtable: AirtableBlock,
api: ApiBlock,
arxiv: ArxivBlock,
browser_use: BrowserUseBlock,
clay: ClayBlock,
condition: ConditionBlock,
@@ -130,6 +133,7 @@ export const registry: Record<string, BlockConfig> = {
wealthbox: WealthboxBlock,
webhook: WebhookBlock,
whatsapp: WhatsAppBlock,
wikipedia: WikipediaBlock,
workflow: WorkflowBlock,
x: XBlock,
youtube: YouTubeBlock,

View File

@@ -33,7 +33,10 @@ export type SubBlockType =
| 'channel-selector' // Channel selector for Slack, Discord, etc.
| 'folder-selector' // Folder selector for Gmail, etc.
| 'knowledge-base-selector' // Knowledge base selector
| 'knowledge-tag-filter' // Dynamic tag filter for knowledge bases
| 'knowledge-tag-filters' // Multiple tag filters for knowledge bases
| 'document-selector' // Document selector for knowledge bases
| 'document-tag-entry' // Document tag entry for creating documents
| 'input-format' // Input structure format
| 'response-format' // Response structure format
| 'file-upload' // File uploader

View File

@@ -73,7 +73,7 @@ export const BatchInvitationEmail = ({
src='https://simstudio.ai/logo.png'
width='120'
height='36'
alt='SimStudio'
alt='Sim Studio'
style={logo}
/>
</Section>
@@ -82,7 +82,7 @@ export const BatchInvitationEmail = ({
<Text style={text}>
<strong>{inviterName}</strong> has invited you to join{' '}
<strong>{organizationName}</strong> on SimStudio.
<strong>{organizationName}</strong> on Sim Studio.
</Text>
{/* Organization Invitation Details */}

View File

@@ -1,5 +1,6 @@
import { Container, Img, Link, Section, Text } from '@react-email/components'
import { env } from '@/lib/env'
import { getAssetUrl } from '@/lib/utils'
interface UnsubscribeOptions {
unsubscribeToken?: string
@@ -25,13 +26,13 @@ export const EmailFooter = ({
<tr>
<td align='center' style={{ padding: '0 8px' }}>
<Link href='https://x.com/simstudioai' rel='noopener noreferrer'>
<Img src={`${baseUrl}/static/x-icon.png`} width='24' height='24' alt='X' />
<Img src={getAssetUrl('static/x-icon.png')} width='24' height='24' alt='X' />
</Link>
</td>
<td align='center' style={{ padding: '0 8px' }}>
<Link href='https://discord.gg/Hr4UWYEcTT' rel='noopener noreferrer'>
<Img
src={`${baseUrl}/static/discord-icon.png`}
src={getAssetUrl('static/discord-icon.png')}
width='24'
height='24'
alt='Discord'
@@ -41,7 +42,7 @@ export const EmailFooter = ({
<td align='center' style={{ padding: '0 8px' }}>
<Link href='https://github.com/simstudioai/sim' rel='noopener noreferrer'>
<Img
src={`${baseUrl}/static/github-icon.png`}
src={getAssetUrl('static/github-icon.png')}
width='24'
height='24'
alt='GitHub'

View File

@@ -13,6 +13,7 @@ import {
} from '@react-email/components'
import { format } from 'date-fns'
import { env } from '@/lib/env'
import { getAssetUrl } from '@/lib/utils'
import { baseStyles } from './base-styles'
import EmailFooter from './footer'
@@ -59,7 +60,7 @@ export const InvitationEmail = ({
<Row>
<Column style={{ textAlign: 'center' }}>
<Img
src={`${baseUrl}/static/sim.png`}
src={getAssetUrl('static/sim.png')}
width='114'
alt='Sim Studio'
style={{

View File

@@ -11,6 +11,7 @@ import {
Text,
} from '@react-email/components'
import { env } from '@/lib/env'
import { getAssetUrl } from '@/lib/utils'
import { baseStyles } from './base-styles'
import EmailFooter from './footer'
@@ -68,7 +69,7 @@ export const OTPVerificationEmail = ({
<Row>
<Column style={{ textAlign: 'center' }}>
<Img
src={`${baseUrl}/static/sim.png`}
src={getAssetUrl('static/sim.png')}
width='114'
alt='Sim Studio'
style={{

View File

@@ -13,6 +13,7 @@ import {
} from '@react-email/components'
import { format } from 'date-fns'
import { env } from '@/lib/env'
import { getAssetUrl } from '@/lib/utils'
import { baseStyles } from './base-styles'
import EmailFooter from './footer'
@@ -39,7 +40,7 @@ export const ResetPasswordEmail = ({
<Row>
<Column style={{ textAlign: 'center' }}>
<Img
src={`${baseUrl}/static/sim.png`}
src={getAssetUrl('static/sim.png')}
width='114'
alt='Sim Studio'
style={{

View File

@@ -12,6 +12,7 @@ import {
Text,
} from '@react-email/components'
import { env } from '@/lib/env'
import { getAssetUrl } from '@/lib/utils'
import { baseStyles } from './base-styles'
import EmailFooter from './footer'
@@ -56,7 +57,7 @@ export const WorkspaceInvitationEmail = ({
<Row>
<Column style={{ textAlign: 'center' }}>
<Img
src={`${baseUrl}/static/sim.png`}
src={getAssetUrl('static/sim.png')}
width='114'
alt='Sim Studio'
style={{

View File

@@ -3107,3 +3107,59 @@ export function QdrantIcon(props: SVGProps<SVGSVGElement>) {
</svg>
)
}
export function ArxivIcon(props: SVGProps<SVGSVGElement>) {
return (
<svg {...props} id='logomark' xmlns='http://www.w3.org/2000/svg' viewBox='0 0 17.732 24.269'>
<g id='tiny'>
<path
d='M573.549,280.916l2.266,2.738,6.674-7.84c.353-.47.52-.717.353-1.117a1.218,1.218,0,0,0-1.061-.748h0a.953.953,0,0,0-.712.262Z'
transform='translate(-566.984 -271.548)'
fill='#bdb9b4'
/>
<path
d='M579.525,282.225l-10.606-10.174a1.413,1.413,0,0,0-.834-.5,1.09,1.09,0,0,0-1.027.66c-.167.4-.047.681.319,1.206l8.44,10.242h0l-6.282,7.716a1.336,1.336,0,0,0-.323,1.3,1.114,1.114,0,0,0,1.04.69A.992.992,0,0,0,571,293l8.519-7.92A1.924,1.924,0,0,0,579.525,282.225Z'
transform='translate(-566.984 -271.548)'
fill='#b31b1b'
/>
<path
d='M584.32,293.912l-8.525-10.275,0,0L573.53,280.9l-1.389,1.254a2.063,2.063,0,0,0,0,2.965l10.812,10.419a.925.925,0,0,0,.742.282,1.039,1.039,0,0,0,.953-.667A1.261,1.261,0,0,0,584.32,293.912Z'
transform='translate(-566.984 -271.548)'
fill='#bdb9b4'
/>
</g>
</svg>
)
}
export function WikipediaIcon(props: SVGProps<SVGSVGElement>) {
return (
<svg
{...props}
fill='currentColor'
version='1.1'
id='Capa_1'
xmlns='http://www.w3.org/2000/svg'
xmlnsXlink='http://www.w3.org/1999/xlink'
width='800px'
height='800px'
viewBox='0 0 98.05 98.05'
xmlSpace='preserve'
>
<g>
<path
d='M98.023,17.465l-19.584-0.056c-0.004,0.711-0.006,1.563-0.017,2.121c1.664,0.039,5.922,0.822,7.257,4.327L66.92,67.155
c-0.919-2.149-9.643-21.528-10.639-24.02l9.072-18.818c1.873-2.863,5.455-4.709,8.918-4.843l-0.01-1.968L55.42,17.489
c-0.045,0.499,0.001,1.548-0.068,2.069c5.315,0.144,7.215,1.334,5.941,4.508c-2.102,4.776-6.51,13.824-7.372,15.475
c-2.696-5.635-4.41-9.972-7.345-16.064c-1.266-2.823,1.529-3.922,4.485-4.004v-1.981l-21.82-0.067
c0.016,0.93-0.021,1.451-0.021,2.131c3.041,0.046,6.988,0.371,8.562,3.019c2.087,4.063,9.044,20.194,11.149,24.514
c-2.685,5.153-9.207,17.341-11.544,21.913c-3.348-7.43-15.732-36.689-19.232-44.241c-1.304-3.218,3.732-5.077,6.646-5.213
l0.019-2.148L0,17.398c0.005,0.646,0.027,1.71,0.029,2.187c4.025-0.037,9.908,6.573,11.588,10.683
c7.244,16.811,14.719,33.524,21.928,50.349c0.002,0.029,2.256,0.059,2.281,0.008c4.717-9.653,10.229-19.797,15.206-29.56
L63.588,80.64c0.005,0.004,2.082,0.016,2.093,0.007c7.962-18.196,19.892-46.118,23.794-54.933c1.588-3.767,4.245-6.064,8.543-6.194
l0.032-1.956L98.023,17.465z'
/>
</g>
</svg>
)
}

View File

@@ -171,7 +171,23 @@ export const TagDropdown: React.FC<TagDropdownProps> = ({
let blockTags: string[]
if (responseFormat) {
// Special handling for evaluator blocks
if (sourceBlock.type === 'evaluator') {
// Get the evaluation metrics for the evaluator block
const metricsValue = useSubBlockStore.getState().getValue(activeSourceBlockId, 'metrics')
if (metricsValue && Array.isArray(metricsValue) && metricsValue.length > 0) {
// Use the metric names as the available outputs
const validMetrics = metricsValue.filter((metric: any) => metric?.name)
blockTags = validMetrics.map(
(metric: any) => `${normalizedBlockName}.${metric.name.toLowerCase()}`
)
} else {
// Fallback to default evaluator outputs if no metrics are defined
const outputPaths = generateOutputPaths(blockConfig.outputs)
blockTags = outputPaths.map((path) => `${normalizedBlockName}.${path}`)
}
} else if (responseFormat) {
// Use custom schema properties if response format is specified
const schemaFields = extractFieldsFromSchema(responseFormat)
if (schemaFields.length > 0) {
@@ -430,7 +446,23 @@ export const TagDropdown: React.FC<TagDropdownProps> = ({
let blockTags: string[]
if (responseFormat) {
// Special handling for evaluator blocks
if (accessibleBlock.type === 'evaluator') {
// Get the evaluation metrics for the evaluator block
const metricsValue = useSubBlockStore.getState().getValue(accessibleBlockId, 'metrics')
if (metricsValue && Array.isArray(metricsValue) && metricsValue.length > 0) {
// Use the metric names as the available outputs
const validMetrics = metricsValue.filter((metric: any) => metric?.name)
blockTags = validMetrics.map(
(metric: any) => `${normalizedBlockName}.${metric.name.toLowerCase()}`
)
} else {
// Fallback to default evaluator outputs if no metrics are defined
const outputPaths = generateOutputPaths(blockConfig.outputs)
blockTags = outputPaths.map((path) => `${normalizedBlockName}.${path}`)
}
} else if (responseFormat) {
// Use custom schema properties if response format is specified
const schemaFields = extractFieldsFromSchema(responseFormat)
if (schemaFields.length > 0) {

View File

@@ -0,0 +1,13 @@
CREATE TABLE "knowledge_base_tag_definitions" (
"id" text PRIMARY KEY NOT NULL,
"knowledge_base_id" text NOT NULL,
"tag_slot" text NOT NULL,
"display_name" text NOT NULL,
"field_type" text DEFAULT 'text' NOT NULL,
"created_at" timestamp DEFAULT now() NOT NULL,
"updated_at" timestamp DEFAULT now() NOT NULL
);
--> statement-breakpoint
ALTER TABLE "knowledge_base_tag_definitions" ADD CONSTRAINT "knowledge_base_tag_definitions_knowledge_base_id_knowledge_base_id_fk" FOREIGN KEY ("knowledge_base_id") REFERENCES "public"."knowledge_base"("id") ON DELETE cascade ON UPDATE no action;--> statement-breakpoint
CREATE UNIQUE INDEX "kb_tag_definitions_kb_slot_idx" ON "knowledge_base_tag_definitions" USING btree ("knowledge_base_id","tag_slot");--> statement-breakpoint
CREATE INDEX "kb_tag_definitions_kb_id_idx" ON "knowledge_base_tag_definitions" USING btree ("knowledge_base_id");

File diff suppressed because it is too large Load Diff

View File

@@ -435,6 +435,13 @@
"when": 1753383446084,
"tag": "0062_previous_phantom_reporter",
"breakpoints": true
},
{
"idx": 63,
"version": "7",
"when": 1753558819517,
"tag": "0063_lame_sandman",
"breakpoints": true
}
]
}

View File

@@ -16,6 +16,7 @@ import {
uuid,
vector,
} from 'drizzle-orm/pg-core'
import { TAG_SLOTS } from '@/lib/constants/knowledge'
// Custom tsvector type for full-text search
export const tsvector = customType<{
@@ -794,6 +795,32 @@ export const document = pgTable(
})
)
export const knowledgeBaseTagDefinitions = pgTable(
'knowledge_base_tag_definitions',
{
id: text('id').primaryKey(),
knowledgeBaseId: text('knowledge_base_id')
.notNull()
.references(() => knowledgeBase.id, { onDelete: 'cascade' }),
tagSlot: text('tag_slot', {
enum: TAG_SLOTS,
}).notNull(),
displayName: text('display_name').notNull(),
fieldType: text('field_type').notNull().default('text'), // 'text', future: 'date', 'number', 'range'
createdAt: timestamp('created_at').notNull().defaultNow(),
updatedAt: timestamp('updated_at').notNull().defaultNow(),
},
(table) => ({
// Ensure unique tag slot per knowledge base
kbTagSlotIdx: uniqueIndex('kb_tag_definitions_kb_slot_idx').on(
table.knowledgeBaseId,
table.tagSlot
),
// Index for querying by knowledge base
kbIdIdx: index('kb_tag_definitions_kb_id_idx').on(table.knowledgeBaseId),
})
)
export const embedding = pgTable(
'embedding',
{

View File

@@ -0,0 +1,88 @@
'use client'
import { useCallback, useEffect, useState } from 'react'
import type { TagSlot } from '@/lib/constants/knowledge'
import { createLogger } from '@/lib/logs/console/logger'
const logger = createLogger('useKnowledgeBaseTagDefinitions')
export interface TagDefinition {
id: string
tagSlot: TagSlot
displayName: string
fieldType: string
createdAt: string
updatedAt: string
}
/**
* Hook for fetching KB-scoped tag definitions (for filtering/selection)
* @param knowledgeBaseId - The knowledge base ID
*/
export function useKnowledgeBaseTagDefinitions(knowledgeBaseId: string | null) {
const [tagDefinitions, setTagDefinitions] = useState<TagDefinition[]>([])
const [isLoading, setIsLoading] = useState(false)
const [error, setError] = useState<string | null>(null)
const fetchTagDefinitions = useCallback(async () => {
if (!knowledgeBaseId) {
setTagDefinitions([])
return
}
setIsLoading(true)
setError(null)
try {
const response = await fetch(`/api/knowledge/${knowledgeBaseId}/tag-definitions`)
if (!response.ok) {
throw new Error(`Failed to fetch tag definitions: ${response.statusText}`)
}
const data = await response.json()
if (data.success && Array.isArray(data.data)) {
setTagDefinitions(data.data)
} else {
throw new Error('Invalid response format')
}
} catch (err) {
const errorMessage = err instanceof Error ? err.message : 'Unknown error occurred'
logger.error('Error fetching tag definitions:', err)
setError(errorMessage)
setTagDefinitions([])
} finally {
setIsLoading(false)
}
}, [knowledgeBaseId])
const getTagLabel = useCallback(
(tagSlot: string): string => {
const definition = tagDefinitions.find((def) => def.tagSlot === tagSlot)
return definition?.displayName || tagSlot
},
[tagDefinitions]
)
const getTagDefinition = useCallback(
(tagSlot: string): TagDefinition | undefined => {
return tagDefinitions.find((def) => def.tagSlot === tagSlot)
},
[tagDefinitions]
)
// Auto-fetch on mount and when dependencies change
useEffect(() => {
fetchTagDefinitions()
}, [fetchTagDefinitions])
return {
tagDefinitions,
isLoading,
error,
fetchTagDefinitions,
getTagLabel,
getTagDefinition,
}
}

View File

@@ -0,0 +1,172 @@
'use client'
import { useCallback, useEffect, useState } from 'react'
import type { TagSlot } from '@/lib/constants/knowledge'
import { createLogger } from '@/lib/logs/console/logger'
const logger = createLogger('useTagDefinitions')
export interface TagDefinition {
id: string
tagSlot: TagSlot
displayName: string
fieldType: string
createdAt: string
updatedAt: string
}
export interface TagDefinitionInput {
tagSlot: TagSlot
displayName: string
fieldType: string
}
/**
* Hook for managing KB-scoped tag definitions
* @param knowledgeBaseId - The knowledge base ID
* @param documentId - The document ID (required for API calls)
*/
export function useTagDefinitions(
knowledgeBaseId: string | null,
documentId: string | null = null
) {
const [tagDefinitions, setTagDefinitions] = useState<TagDefinition[]>([])
const [isLoading, setIsLoading] = useState(false)
const [error, setError] = useState<string | null>(null)
const fetchTagDefinitions = useCallback(async () => {
if (!knowledgeBaseId || !documentId) {
setTagDefinitions([])
return
}
setIsLoading(true)
setError(null)
try {
const response = await fetch(
`/api/knowledge/${knowledgeBaseId}/documents/${documentId}/tag-definitions`
)
if (!response.ok) {
throw new Error(`Failed to fetch tag definitions: ${response.statusText}`)
}
const data = await response.json()
if (data.success && Array.isArray(data.data)) {
setTagDefinitions(data.data)
} else {
throw new Error('Invalid response format')
}
} catch (err) {
const errorMessage = err instanceof Error ? err.message : 'Unknown error occurred'
logger.error('Error fetching tag definitions:', err)
setError(errorMessage)
setTagDefinitions([])
} finally {
setIsLoading(false)
}
}, [knowledgeBaseId, documentId])
const saveTagDefinitions = useCallback(
async (definitions: TagDefinitionInput[]) => {
if (!knowledgeBaseId || !documentId) {
throw new Error('Knowledge base ID and document ID are required')
}
// Simple validation
const validDefinitions = (definitions || []).filter(
(def) => def?.tagSlot && def.displayName && def.displayName.trim()
)
try {
const response = await fetch(
`/api/knowledge/${knowledgeBaseId}/documents/${documentId}/tag-definitions`,
{
method: 'POST',
headers: {
'Content-Type': 'application/json',
},
body: JSON.stringify({ definitions: validDefinitions }),
}
)
if (!response.ok) {
throw new Error(`Failed to save tag definitions: ${response.statusText}`)
}
const data = await response.json()
if (!data.success) {
throw new Error(data.error || 'Failed to save tag definitions')
}
// Refresh the definitions after saving
await fetchTagDefinitions()
return data.data
} catch (err) {
logger.error('Error saving tag definitions:', err)
throw err
}
},
[knowledgeBaseId, documentId, fetchTagDefinitions]
)
const deleteTagDefinitions = useCallback(async () => {
if (!knowledgeBaseId || !documentId) {
throw new Error('Knowledge base ID and document ID are required')
}
try {
const response = await fetch(
`/api/knowledge/${knowledgeBaseId}/documents/${documentId}/tag-definitions`,
{
method: 'DELETE',
}
)
if (!response.ok) {
throw new Error(`Failed to delete tag definitions: ${response.statusText}`)
}
// Refresh the definitions after deleting
await fetchTagDefinitions()
} catch (err) {
logger.error('Error deleting tag definitions:', err)
throw err
}
}, [knowledgeBaseId, documentId, fetchTagDefinitions])
const getTagLabel = useCallback(
(tagSlot: string): string => {
const definition = tagDefinitions.find((def) => def.tagSlot === tagSlot)
return definition?.displayName || tagSlot
},
[tagDefinitions]
)
const getTagDefinition = useCallback(
(tagSlot: string): TagDefinition | undefined => {
return tagDefinitions.find((def) => def.tagSlot === tagSlot)
},
[tagDefinitions]
)
// Auto-fetch on mount and when dependencies change
useEffect(() => {
fetchTagDefinitions()
}, [fetchTagDefinitions])
return {
tagDefinitions,
isLoading,
error,
fetchTagDefinitions,
saveTagDefinitions,
deleteTagDefinitions,
getTagLabel,
getTagDefinition,
}
}

View File

@@ -0,0 +1,15 @@
/**
* Knowledge base and document constants
*/
// Maximum number of tag slots allowed per knowledge base
export const MAX_TAG_SLOTS = 7
// Tag slot names (derived from MAX_TAG_SLOTS)
export const TAG_SLOTS = Array.from({ length: MAX_TAG_SLOTS }, (_, i) => `tag${i + 1}`) as [
string,
...string[],
]
// Type for tag slot names
export type TagSlot = (typeof TAG_SLOTS)[number]

View File

@@ -8,142 +8,170 @@ import { z } from 'zod'
* - Server-side: Falls back to process.env when runtimeEnv returns undefined
* - Provides seamless Docker runtime variable support for NEXT_PUBLIC_ vars
*/
const getEnv = (variable: string): string | undefined => {
return runtimeEnv(variable) ?? process.env[variable]
}
const getEnv = (variable: string) => runtimeEnv(variable) ?? process.env[variable]
// biome-ignore format: keep alignment for readability
export const env = createEnv({
skipValidation: true,
server: {
DATABASE_URL: z.string().url(),
BETTER_AUTH_URL: z.string().url(),
BETTER_AUTH_SECRET: z.string().min(32),
DISABLE_REGISTRATION: z.boolean().optional(),
ENCRYPTION_KEY: z.string().min(32),
INTERNAL_API_SECRET: z.string().min(32),
// Core Database & Authentication
DATABASE_URL: z.string().url(), // Primary database connection string
BETTER_AUTH_URL: z.string().url(), // Base URL for Better Auth service
BETTER_AUTH_SECRET: z.string().min(32), // Secret key for Better Auth JWT signing
DISABLE_REGISTRATION: z.boolean().optional(), // Flag to disable new user registration
ENCRYPTION_KEY: z.string().min(32), // Key for encrypting sensitive data
INTERNAL_API_SECRET: z.string().min(32), // Secret for internal API authentication
POSTGRES_URL: z.string().url().optional(),
STRIPE_SECRET_KEY: z.string().min(1).optional(),
STRIPE_BILLING_WEBHOOK_SECRET: z.string().min(1).optional(),
STRIPE_WEBHOOK_SECRET: z.string().min(1).optional(),
STRIPE_FREE_PRICE_ID: z.string().min(1).optional(),
FREE_TIER_COST_LIMIT: z.number().optional(),
STRIPE_PRO_PRICE_ID: z.string().min(1).optional(),
PRO_TIER_COST_LIMIT: z.number().optional(),
STRIPE_TEAM_PRICE_ID: z.string().min(1).optional(),
TEAM_TIER_COST_LIMIT: z.number().optional(),
STRIPE_ENTERPRISE_PRICE_ID: z.string().min(1).optional(),
ENTERPRISE_TIER_COST_LIMIT: z.number().optional(),
RESEND_API_KEY: z.string().min(1).optional(),
EMAIL_DOMAIN: z.string().min(1).optional(),
OPENAI_API_KEY: z.string().min(1).optional(),
OPENAI_API_KEY_1: z.string().min(1).optional(),
OPENAI_API_KEY_2: z.string().min(1).optional(),
OPENAI_API_KEY_3: z.string().min(1).optional(),
MISTRAL_API_KEY: z.string().min(1).optional(),
ANTHROPIC_API_KEY_1: z.string().min(1).optional(),
ANTHROPIC_API_KEY_2: z.string().min(1).optional(),
ANTHROPIC_API_KEY_3: z.string().min(1).optional(),
FREESTYLE_API_KEY: z.string().min(1).optional(),
TELEMETRY_ENDPOINT: z.string().url().optional(),
COST_MULTIPLIER: z.number().optional(),
JWT_SECRET: z.string().min(1).optional(),
BROWSERBASE_API_KEY: z.string().min(1).optional(),
BROWSERBASE_PROJECT_ID: z.string().min(1).optional(),
OLLAMA_URL: z.string().url().optional(),
SENTRY_ORG: z.string().optional(),
SENTRY_PROJECT: z.string().optional(),
SENTRY_AUTH_TOKEN: z.string().optional(),
REDIS_URL: z.string().url().optional(),
NEXT_RUNTIME: z.string().optional(),
VERCEL_ENV: z.string().optional(),
// Database & Storage
POSTGRES_URL: z.string().url().optional(), // Alternative PostgreSQL connection string
REDIS_URL: z.string().url().optional(), // Redis connection string for caching/sessions
// Trigger.dev
TRIGGER_SECRET_KEY: z.string().min(1).optional(),
// Payment & Billing (Stripe)
STRIPE_SECRET_KEY: z.string().min(1).optional(), // Stripe secret key for payment processing
STRIPE_BILLING_WEBHOOK_SECRET: z.string().min(1).optional(), // Webhook secret for billing events
STRIPE_WEBHOOK_SECRET: z.string().min(1).optional(), // General Stripe webhook secret
STRIPE_FREE_PRICE_ID: z.string().min(1).optional(), // Stripe price ID for free tier
FREE_TIER_COST_LIMIT: z.number().optional(), // Cost limit for free tier users
STRIPE_PRO_PRICE_ID: z.string().min(1).optional(), // Stripe price ID for pro tier
PRO_TIER_COST_LIMIT: z.number().optional(), // Cost limit for pro tier users
STRIPE_TEAM_PRICE_ID: z.string().min(1).optional(), // Stripe price ID for team tier
TEAM_TIER_COST_LIMIT: z.number().optional(), // Cost limit for team tier users
STRIPE_ENTERPRISE_PRICE_ID: z.string().min(1).optional(), // Stripe price ID for enterprise tier
ENTERPRISE_TIER_COST_LIMIT: z.number().optional(), // Cost limit for enterprise tier users
// Storage
AWS_REGION: z.string().optional(),
AWS_ACCESS_KEY_ID: z.string().optional(),
AWS_SECRET_ACCESS_KEY: z.string().optional(),
S3_BUCKET_NAME: z.string().optional(),
S3_LOGS_BUCKET_NAME: z.string().optional(),
S3_KB_BUCKET_NAME: z.string().optional(),
AZURE_ACCOUNT_NAME: z.string().optional(),
AZURE_ACCOUNT_KEY: z.string().optional(),
AZURE_CONNECTION_STRING: z.string().optional(),
AZURE_STORAGE_CONTAINER_NAME: z.string().optional(),
AZURE_STORAGE_KB_CONTAINER_NAME: z.string().optional(),
// Email & Communication
RESEND_API_KEY: z.string().min(1).optional(), // Resend API key for transactional emails
EMAIL_DOMAIN: z.string().min(1).optional(), // Domain for sending emails
// Miscellaneous
CRON_SECRET: z.string().optional(),
FREE_PLAN_LOG_RETENTION_DAYS: z.string().optional(),
GITHUB_TOKEN: z.string().optional(),
ELEVENLABS_API_KEY: z.string().min(1).optional(),
AZURE_OPENAI_ENDPOINT: z.string().url().optional(),
AZURE_OPENAI_API_VERSION: z.string().optional(),
// AI/LLM Provider API Keys
OPENAI_API_KEY: z.string().min(1).optional(), // Primary OpenAI API key
OPENAI_API_KEY_1: z.string().min(1).optional(), // Additional OpenAI API key for load balancing
OPENAI_API_KEY_2: z.string().min(1).optional(), // Additional OpenAI API key for load balancing
OPENAI_API_KEY_3: z.string().min(1).optional(), // Additional OpenAI API key for load balancing
MISTRAL_API_KEY: z.string().min(1).optional(), // Mistral AI API key
ANTHROPIC_API_KEY_1: z.string().min(1).optional(), // Primary Anthropic Claude API key
ANTHROPIC_API_KEY_2: z.string().min(1).optional(), // Additional Anthropic API key for load balancing
ANTHROPIC_API_KEY_3: z.string().min(1).optional(), // Additional Anthropic API key for load balancing
FREESTYLE_API_KEY: z.string().min(1).optional(), // Freestyle AI API key
OLLAMA_URL: z.string().url().optional(), // Ollama local LLM server URL
ELEVENLABS_API_KEY: z.string().min(1).optional(), // ElevenLabs API key for text-to-speech in deployed chat
// OAuth blocks (all optional)
GOOGLE_CLIENT_ID: z.string().optional(),
GOOGLE_CLIENT_SECRET: z.string().optional(),
GITHUB_CLIENT_ID: z.string().optional(),
GITHUB_CLIENT_SECRET: z.string().optional(),
GITHUB_REPO_CLIENT_ID: z.string().optional(),
GITHUB_REPO_CLIENT_SECRET: z.string().optional(),
X_CLIENT_ID: z.string().optional(),
X_CLIENT_SECRET: z.string().optional(),
CONFLUENCE_CLIENT_ID: z.string().optional(),
CONFLUENCE_CLIENT_SECRET: z.string().optional(),
JIRA_CLIENT_ID: z.string().optional(),
JIRA_CLIENT_SECRET: z.string().optional(),
AIRTABLE_CLIENT_ID: z.string().optional(),
AIRTABLE_CLIENT_SECRET: z.string().optional(),
SUPABASE_CLIENT_ID: z.string().optional(),
SUPABASE_CLIENT_SECRET: z.string().optional(),
NOTION_CLIENT_ID: z.string().optional(),
NOTION_CLIENT_SECRET: z.string().optional(),
DISCORD_CLIENT_ID: z.string().optional(),
DISCORD_CLIENT_SECRET: z.string().optional(),
MICROSOFT_CLIENT_ID: z.string().optional(),
MICROSOFT_CLIENT_SECRET: z.string().optional(),
HUBSPOT_CLIENT_ID: z.string().optional(),
HUBSPOT_CLIENT_SECRET: z.string().optional(),
WEALTHBOX_CLIENT_ID: z.string().optional(),
WEALTHBOX_CLIENT_SECRET: z.string().optional(),
DOCKER_BUILD: z.boolean().optional(),
LINEAR_CLIENT_ID: z.string().optional(),
LINEAR_CLIENT_SECRET: z.string().optional(),
SLACK_CLIENT_ID: z.string().optional(),
SLACK_CLIENT_SECRET: z.string().optional(),
REDDIT_CLIENT_ID: z.string().optional(),
REDDIT_CLIENT_SECRET: z.string().optional(),
SOCKET_SERVER_URL: z.string().url().optional(),
SOCKET_PORT: z.number().optional(),
PORT: z.number().optional(),
ALLOWED_ORIGINS: z.string().optional(),
JOB_RETENTION_DAYS: z.string().optional().default('1'),
// Azure OpenAI Configuration
AZURE_OPENAI_ENDPOINT: z.string().url().optional(), // Azure OpenAI service endpoint
AZURE_OPENAI_API_VERSION: z.string().optional(), // Azure OpenAI API version
// Monitoring & Analytics
TELEMETRY_ENDPOINT: z.string().url().optional(), // Custom telemetry/analytics endpoint
COST_MULTIPLIER: z.number().optional(), // Multiplier for cost calculations
SENTRY_ORG: z.string().optional(), // Sentry organization for error tracking
SENTRY_PROJECT: z.string().optional(), // Sentry project for error tracking
SENTRY_AUTH_TOKEN: z.string().optional(), // Sentry authentication token
// External Services
JWT_SECRET: z.string().min(1).optional(), // JWT signing secret for custom tokens
BROWSERBASE_API_KEY: z.string().min(1).optional(), // Browserbase API key for browser automation
BROWSERBASE_PROJECT_ID: z.string().min(1).optional(), // Browserbase project ID
GITHUB_TOKEN: z.string().optional(), // GitHub personal access token for API access
// Infrastructure & Deployment
NEXT_RUNTIME: z.string().optional(), // Next.js runtime environment
VERCEL_ENV: z.string().optional(), // Vercel deployment environment
DOCKER_BUILD: z.boolean().optional(), // Flag indicating Docker build environment
// Background Jobs & Scheduling
TRIGGER_SECRET_KEY: z.string().min(1).optional(), // Trigger.dev secret key for background jobs
CRON_SECRET: z.string().optional(), // Secret for authenticating cron job requests
JOB_RETENTION_DAYS: z.string().optional().default('1'), // Days to retain job logs/data
// Cloud Storage - AWS S3
AWS_REGION: z.string().optional(), // AWS region for S3 buckets
AWS_ACCESS_KEY_ID: z.string().optional(), // AWS access key ID
AWS_SECRET_ACCESS_KEY: z.string().optional(), // AWS secret access key
S3_BUCKET_NAME: z.string().optional(), // S3 bucket for general file storage
S3_LOGS_BUCKET_NAME: z.string().optional(), // S3 bucket for storing logs
S3_KB_BUCKET_NAME: z.string().optional(), // S3 bucket for knowledge base files
// Cloud Storage - Azure Blob
AZURE_ACCOUNT_NAME: z.string().optional(), // Azure storage account name
AZURE_ACCOUNT_KEY: z.string().optional(), // Azure storage account key
AZURE_CONNECTION_STRING: z.string().optional(), // Azure storage connection string
AZURE_STORAGE_CONTAINER_NAME: z.string().optional(), // Azure container for general files
AZURE_STORAGE_KB_CONTAINER_NAME: z.string().optional(), // Azure container for knowledge base files
// Data Retention
FREE_PLAN_LOG_RETENTION_DAYS: z.string().optional(), // Log retention days for free plan users
// Real-time Communication
SOCKET_SERVER_URL: z.string().url().optional(), // WebSocket server URL for real-time features
SOCKET_PORT: z.number().optional(), // Port for WebSocket server
PORT: z.number().optional(), // Main application port
ALLOWED_ORIGINS: z.string().optional(), // CORS allowed origins
// OAuth Integration Credentials - All optional, enables third-party integrations
GOOGLE_CLIENT_ID: z.string().optional(), // Google OAuth client ID for Google services
GOOGLE_CLIENT_SECRET: z.string().optional(), // Google OAuth client secret
GITHUB_CLIENT_ID: z.string().optional(), // GitHub OAuth client ID for GitHub integration
GITHUB_CLIENT_SECRET: z.string().optional(), // GitHub OAuth client secret
GITHUB_REPO_CLIENT_ID: z.string().optional(), // GitHub OAuth client ID for repo access
GITHUB_REPO_CLIENT_SECRET: z.string().optional(), // GitHub OAuth client secret for repo access
X_CLIENT_ID: z.string().optional(), // X (Twitter) OAuth client ID
X_CLIENT_SECRET: z.string().optional(), // X (Twitter) OAuth client secret
CONFLUENCE_CLIENT_ID: z.string().optional(), // Atlassian Confluence OAuth client ID
CONFLUENCE_CLIENT_SECRET: z.string().optional(), // Atlassian Confluence OAuth client secret
JIRA_CLIENT_ID: z.string().optional(), // Atlassian Jira OAuth client ID
JIRA_CLIENT_SECRET: z.string().optional(), // Atlassian Jira OAuth client secret
AIRTABLE_CLIENT_ID: z.string().optional(), // Airtable OAuth client ID
AIRTABLE_CLIENT_SECRET: z.string().optional(), // Airtable OAuth client secret
SUPABASE_CLIENT_ID: z.string().optional(), // Supabase OAuth client ID
SUPABASE_CLIENT_SECRET: z.string().optional(), // Supabase OAuth client secret
NOTION_CLIENT_ID: z.string().optional(), // Notion OAuth client ID
NOTION_CLIENT_SECRET: z.string().optional(), // Notion OAuth client secret
DISCORD_CLIENT_ID: z.string().optional(), // Discord OAuth client ID
DISCORD_CLIENT_SECRET: z.string().optional(), // Discord OAuth client secret
MICROSOFT_CLIENT_ID: z.string().optional(), // Microsoft OAuth client ID for Office 365/Teams
MICROSOFT_CLIENT_SECRET: z.string().optional(), // Microsoft OAuth client secret
HUBSPOT_CLIENT_ID: z.string().optional(), // HubSpot OAuth client ID
HUBSPOT_CLIENT_SECRET: z.string().optional(), // HubSpot OAuth client secret
WEALTHBOX_CLIENT_ID: z.string().optional(), // WealthBox OAuth client ID
WEALTHBOX_CLIENT_SECRET: z.string().optional(), // WealthBox OAuth client secret
LINEAR_CLIENT_ID: z.string().optional(), // Linear OAuth client ID
LINEAR_CLIENT_SECRET: z.string().optional(), // Linear OAuth client secret
SLACK_CLIENT_ID: z.string().optional(), // Slack OAuth client ID
SLACK_CLIENT_SECRET: z.string().optional(), // Slack OAuth client secret
REDDIT_CLIENT_ID: z.string().optional(), // Reddit OAuth client ID
REDDIT_CLIENT_SECRET: z.string().optional(), // Reddit OAuth client secret
},
client: {
NEXT_PUBLIC_APP_URL: z.string().url(),
NEXT_PUBLIC_VERCEL_URL: z.string().optional(),
NEXT_PUBLIC_SENTRY_DSN: z.string().url().optional(),
NEXT_PUBLIC_GOOGLE_CLIENT_ID: z.string().optional(),
NEXT_PUBLIC_GOOGLE_API_KEY: z.string().optional(),
NEXT_PUBLIC_GOOGLE_PROJECT_NUMBER: z.string().optional(),
NEXT_PUBLIC_SOCKET_URL: z.string().url().optional(),
// Core Application URLs - Required for frontend functionality
NEXT_PUBLIC_APP_URL: z.string().url(), // Base URL of the application (e.g., https://app.simstudio.ai)
NEXT_PUBLIC_VERCEL_URL: z.string().optional(), // Vercel deployment URL for preview/production
// Client-side Services
NEXT_PUBLIC_SENTRY_DSN: z.string().url().optional(), // Sentry DSN for client-side error tracking
NEXT_PUBLIC_SOCKET_URL: z.string().url().optional(), // WebSocket server URL for real-time features
// Asset Storage
NEXT_PUBLIC_BLOB_BASE_URL: z.string().url().optional(), // Base URL for Vercel Blob storage (CDN assets)
// Google Services - For client-side Google integrations
NEXT_PUBLIC_GOOGLE_CLIENT_ID: z.string().optional(), // Google OAuth client ID for browser auth
NEXT_PUBLIC_GOOGLE_API_KEY: z.string().optional(), // Google API key for client-side API calls
NEXT_PUBLIC_GOOGLE_PROJECT_NUMBER: z.string().optional(), // Google project number for Drive picker
},
// Variables available on both server and client
shared: {
NODE_ENV: z.enum(['development', 'test', 'production']).optional(),
NEXT_TELEMETRY_DISABLED: z.string().optional(),
NODE_ENV: z.enum(['development', 'test', 'production']).optional(), // Runtime environment
NEXT_TELEMETRY_DISABLED: z.string().optional(), // Disable Next.js telemetry collection
},
experimental__runtimeEnv: {
NEXT_PUBLIC_APP_URL: process.env.NEXT_PUBLIC_APP_URL,
NEXT_PUBLIC_VERCEL_URL: process.env.NEXT_PUBLIC_VERCEL_URL,
NEXT_PUBLIC_SENTRY_DSN: process.env.NEXT_PUBLIC_SENTRY_DSN,
NEXT_PUBLIC_BLOB_BASE_URL: process.env.NEXT_PUBLIC_BLOB_BASE_URL,
NEXT_PUBLIC_GOOGLE_CLIENT_ID: process.env.NEXT_PUBLIC_GOOGLE_CLIENT_ID,
NEXT_PUBLIC_GOOGLE_API_KEY: process.env.NEXT_PUBLIC_GOOGLE_API_KEY,
NEXT_PUBLIC_GOOGLE_PROJECT_NUMBER: process.env.NEXT_PUBLIC_GOOGLE_PROJECT_NUMBER,
@@ -153,7 +181,7 @@ export const env = createEnv({
},
})
// Needing this utility because t3-env is returning string for boolean values.
// Need this utility because t3-env is returning string for boolean values.
export const isTruthy = (value: string | boolean | number | undefined) =>
typeof value === 'string' ? value === 'true' || value === '1' : Boolean(value)

View File

@@ -49,6 +49,7 @@ export const cspDirectives: CSPDirectives = {
'https://*.atlassian.com',
'https://cdn.discordapp.com',
'https://*.githubusercontent.com',
'https://*.public.blob.vercel-storage.com',
],
'media-src': ["'self'", 'blob:'],

View File

@@ -379,6 +379,19 @@ export function getInvalidCharacters(name: string): string[] {
return invalidChars ? [...new Set(invalidChars)] : []
}
/**
* Get the full URL for an asset stored in Vercel Blob or local fallback
* - If CDN is configured (NEXT_PUBLIC_BLOB_BASE_URL), uses CDN URL
* - Otherwise falls back to local static assets served from root path
*/
export function getAssetUrl(filename: string) {
const cdnBaseUrl = env.NEXT_PUBLIC_BLOB_BASE_URL
if (cdnBaseUrl) {
return `${cdnBaseUrl}/${filename}`
}
return `/${filename}`
}
/**
* No-operation function for use as default callback
*/

View File

@@ -1,18 +1,9 @@
import { and, eq, sql } from 'drizzle-orm'
import { and, eq } from 'drizzle-orm'
import { type NextRequest, NextResponse } from 'next/server'
import { v4 as uuidv4 } from 'uuid'
import { createLogger } from '@/lib/logs/console/logger'
import { LoggingSession } from '@/lib/logs/execution/logging-session'
import { hasProcessedMessage, markMessageAsProcessed } from '@/lib/redis'
import { decryptSecret } from '@/lib/utils'
import { loadWorkflowFromNormalizedTables } from '@/lib/workflows/db-helpers'
import { updateWorkflowRunCounts } from '@/lib/workflows/utils'
import { getOAuthToken } from '@/app/api/auth/oauth/utils'
import { db } from '@/db'
import { environment as environmentTable, userStats, webhook } from '@/db/schema'
import { Executor } from '@/executor'
import { Serializer } from '@/serializer'
import { mergeSubblockStateAsync } from '@/stores/workflows/server-utils'
import { webhook } from '@/db/schema'
const logger = createLogger('WebhookUtils')
@@ -148,67 +139,6 @@ export async function validateSlackSignature(
}
}
/**
* Process WhatsApp message deduplication
*/
export async function processWhatsAppDeduplication(
requestId: string,
messages: any[]
): Promise<NextResponse | null> {
if (messages.length > 0) {
const message = messages[0]
const messageId = message.id
if (messageId) {
const whatsappMsgKey = `whatsapp:msg:${messageId}`
try {
const isDuplicate = await hasProcessedMessage(whatsappMsgKey)
if (isDuplicate) {
logger.info(`[${requestId}] Duplicate WhatsApp message detected: ${messageId}`)
return new NextResponse('Duplicate message', { status: 200 })
}
// Mark as processed BEFORE processing
await markMessageAsProcessed(whatsappMsgKey, 60 * 60 * 24)
} catch (error) {
logger.error(`[${requestId}] Error in WhatsApp deduplication`, error)
// Continue processing
}
}
}
return null
}
/**
* Process generic deduplication using request hash
*/
export async function processGenericDeduplication(
requestId: string,
path: string,
body: any
): Promise<NextResponse | null> {
try {
const requestHash = await generateRequestHash(path, body)
const genericMsgKey = `generic:${requestHash}`
const isDuplicate = await hasProcessedMessage(genericMsgKey)
if (isDuplicate) {
logger.info(`[${requestId}] Duplicate request detected with hash: ${requestHash}`)
return new NextResponse('Duplicate request', { status: 200 })
}
// Mark as processed
await markMessageAsProcessed(genericMsgKey, 60 * 60 * 24)
} catch (error) {
logger.error(`[${requestId}] Error in generic deduplication`, error)
// Continue processing
}
return null
}
/**
* Format webhook input based on provider
*/
@@ -471,375 +401,6 @@ export function formatWebhookInput(
}
}
/**
* Execute workflow with the provided input
*/
export async function executeWorkflowFromPayload(
foundWorkflow: any,
input: any,
executionId: string,
requestId: string,
startBlockId?: string | null
): Promise<void> {
// Add log at the beginning of this function for clarity
logger.info(`[${requestId}] Preparing to execute workflow`, {
workflowId: foundWorkflow.id,
executionId,
triggerSource: 'webhook-payload',
})
const loggingSession = new LoggingSession(foundWorkflow.id, executionId, 'webhook', requestId)
try {
// Load workflow data from normalized tables
logger.debug(`[${requestId}] Loading workflow ${foundWorkflow.id} from normalized tables`)
const normalizedData = await loadWorkflowFromNormalizedTables(foundWorkflow.id)
if (!normalizedData) {
logger.error(`[${requestId}] TRACE: No normalized data found for workflow`, {
workflowId: foundWorkflow.id,
hasNormalizedData: false,
})
throw new Error(`Workflow ${foundWorkflow.id} data not found in normalized tables`)
}
// Use normalized data for execution
const { blocks, edges, loops, parallels } = normalizedData
logger.info(`[${requestId}] Loaded workflow ${foundWorkflow.id} from normalized tables`)
// DEBUG: Log state information
logger.debug(`[${requestId}] TRACE: Retrieved workflow state from normalized tables`, {
workflowId: foundWorkflow.id,
blockCount: Object.keys(blocks || {}).length,
edgeCount: (edges || []).length,
loopCount: Object.keys(loops || {}).length,
})
logger.debug(
`[${requestId}] Merging subblock states for workflow ${foundWorkflow.id} (Execution: ${executionId})`
)
const mergeStartTime = Date.now()
const mergedStates = await mergeSubblockStateAsync(blocks, foundWorkflow.id)
logger.debug(`[${requestId}] TRACE: State merging complete`, {
duration: `${Date.now() - mergeStartTime}ms`,
mergedBlockCount: Object.keys(mergedStates).length,
})
// Retrieve and decrypt environment variables
const [userEnv] = await db
.select()
.from(environmentTable)
.where(eq(environmentTable.userId, foundWorkflow.userId))
.limit(1)
let decryptedEnvVars: Record<string, string> = {}
if (userEnv) {
// Decryption logic
const decryptionPromises = Object.entries((userEnv.variables as any) || {}).map(
async ([key, encryptedValue]) => {
try {
const { decrypted } = await decryptSecret(encryptedValue as string)
return [key, decrypted] as const
} catch (error: any) {
logger.error(
`[${requestId}] Failed to decrypt environment variable "${key}" (Execution: ${executionId})`,
error
)
throw new Error(`Failed to decrypt environment variable "${key}": ${error.message}`)
}
}
)
const decryptedEntries = await Promise.all(decryptionPromises)
decryptedEnvVars = Object.fromEntries(decryptedEntries)
} else {
logger.debug(`[${requestId}] TRACE: No environment variables found for user`, {
userId: foundWorkflow.userId,
})
}
await loggingSession.safeStart({
userId: foundWorkflow.userId,
workspaceId: foundWorkflow.workspaceId,
variables: decryptedEnvVars,
})
// Process block states (extract subBlock values, parse responseFormat)
const blockStatesStartTime = Date.now()
const currentBlockStates = Object.entries(mergedStates).reduce(
(acc, [id, block]) => {
acc[id] = Object.entries(block.subBlocks).reduce(
(subAcc, [key, subBlock]) => {
subAcc[key] = subBlock.value
return subAcc
},
{} as Record<string, any>
)
return acc
},
{} as Record<string, Record<string, any>>
)
const processedBlockStates = Object.entries(currentBlockStates).reduce(
(acc, [blockId, blockState]) => {
const processedState = { ...blockState }
if (processedState.responseFormat) {
try {
if (typeof processedState.responseFormat === 'string') {
processedState.responseFormat = JSON.parse(processedState.responseFormat)
}
if (
processedState.responseFormat &&
typeof processedState.responseFormat === 'object'
) {
if (!processedState.responseFormat.schema && !processedState.responseFormat.name) {
processedState.responseFormat = {
name: 'response_schema',
schema: processedState.responseFormat,
strict: true,
}
}
}
acc[blockId] = processedState
} catch (error) {
logger.warn(
`[${requestId}] Failed to parse responseFormat for block ${blockId} (Execution: ${executionId})`,
error
)
acc[blockId] = blockState
}
} else {
acc[blockId] = blockState
}
return acc
},
{} as Record<string, Record<string, any>>
)
// DEBUG: Log block state processing
logger.debug(`[${requestId}] TRACE: Block states processed`, {
duration: `${Date.now() - blockStatesStartTime}ms`,
blockCount: Object.keys(processedBlockStates).length,
})
// Serialize and get workflow variables
const serializeStartTime = Date.now()
const serializedWorkflow = new Serializer().serializeWorkflow(
mergedStates as any,
edges,
loops,
parallels
)
let workflowVariables = {}
if (foundWorkflow.variables) {
try {
if (typeof foundWorkflow.variables === 'string') {
workflowVariables = JSON.parse(foundWorkflow.variables)
} else {
workflowVariables = foundWorkflow.variables
}
} catch (error) {
logger.error(
`[${requestId}] Failed to parse workflow variables: ${foundWorkflow.id} (Execution: ${executionId})`,
error
)
}
}
// DEBUG: Log serialization completion
logger.debug(`[${requestId}] TRACE: Workflow serialized`, {
duration: `${Date.now() - serializeStartTime}ms`,
hasWorkflowVars: Object.keys(workflowVariables).length > 0,
})
logger.debug(`[${requestId}] Starting workflow execution`, {
executionId,
blockCount: Object.keys(processedBlockStates).length,
})
// Log blocks for debugging (if any missing or invalid)
if (Object.keys(processedBlockStates).length === 0) {
logger.error(`[${requestId}] No blocks found in workflow state - this will likely fail`)
} else {
logger.debug(`[${requestId}] Block IDs for execution:`, {
blockIds: Object.keys(processedBlockStates).slice(0, 5), // Log just a few block IDs for debugging
totalBlocks: Object.keys(processedBlockStates).length,
})
}
// Ensure workflow variables exist
if (!workflowVariables || Object.keys(workflowVariables).length === 0) {
logger.debug(`[${requestId}] No workflow variables defined, using empty object`)
workflowVariables = {}
}
// Validate input format for Airtable webhooks to prevent common errors
if (
input?.airtableChanges &&
(!Array.isArray(input.airtableChanges) || input.airtableChanges.length === 0)
) {
logger.warn(
`[${requestId}] Invalid Airtable input format - airtableChanges should be a non-empty array`
)
}
// DEBUG: Log critical moment before executor creation
logger.info(`[${requestId}] TRACE: Creating workflow executor`, {
workflowId: foundWorkflow.id,
hasSerializedWorkflow: !!serializedWorkflow,
blockCount: Object.keys(processedBlockStates).length,
timestamp: new Date().toISOString(),
})
const executor = new Executor(
serializedWorkflow,
processedBlockStates,
decryptedEnvVars,
input,
workflowVariables
)
// Set up logging on the executor
loggingSession.setupExecutor(executor)
// Log workflow execution start time for tracking
const executionStartTime = Date.now()
logger.info(`[${requestId}] TRACE: Executor instantiated, starting workflow execution now`, {
workflowId: foundWorkflow.id,
timestamp: new Date().toISOString(),
})
// Add direct detailed logging right before executing
logger.info(
`[${requestId}] EXECUTION_MONITOR: About to call executor.execute() - CRITICAL POINT`,
{
workflowId: foundWorkflow.id,
executionId: executionId,
timestamp: new Date().toISOString(),
}
)
// This is THE critical line where the workflow actually executes
const result = await executor.execute(foundWorkflow.id, startBlockId || undefined)
// Check if we got a StreamingExecution result (with stream + execution properties)
// For webhook executions, we only care about the ExecutionResult part, not the stream
const executionResult = 'stream' in result && 'execution' in result ? result.execution : result
// Add direct detailed logging right after executing
logger.info(`[${requestId}] EXECUTION_MONITOR: executor.execute() completed with result`, {
workflowId: foundWorkflow.id,
executionId: executionId,
success: executionResult.success,
resultType: result ? typeof result : 'undefined',
timestamp: new Date().toISOString(),
})
// Log completion and timing
const executionDuration = Date.now() - executionStartTime
logger.info(`[${requestId}] TRACE: Workflow execution completed`, {
workflowId: foundWorkflow.id,
success: executionResult.success,
duration: `${executionDuration}ms`,
actualDurationMs: executionDuration,
timestamp: new Date().toISOString(),
})
logger.info(`[${requestId}] Workflow execution finished`, {
executionId,
success: executionResult.success,
durationMs: executionResult.metadata?.duration || executionDuration,
actualDurationMs: executionDuration,
})
// Update counts and stats if successful
if (executionResult.success) {
await updateWorkflowRunCounts(foundWorkflow.id)
await db
.update(userStats)
.set({
totalWebhookTriggers: sql`total_webhook_triggers + 1`,
lastActive: new Date(),
})
.where(eq(userStats.userId, foundWorkflow.userId))
}
// Calculate total duration for logging
const totalDuration = executionResult.metadata?.duration || 0
const traceSpans = (executionResult.logs || []).map((blockLog: any, index: number) => {
let output = blockLog.output
if (!blockLog.success && blockLog.error) {
output = {
error: blockLog.error,
success: false,
...(blockLog.output || {}),
}
}
return {
id: blockLog.blockId,
name: `Block ${blockLog.blockName || blockLog.blockType} (${blockLog.blockType || 'unknown'})`,
type: blockLog.blockType || 'unknown',
duration: blockLog.durationMs || 0,
startTime: blockLog.startedAt,
endTime: blockLog.endedAt || blockLog.startedAt,
status: blockLog.success ? 'success' : 'error',
blockId: blockLog.blockId,
input: blockLog.input,
output: output,
tokens: blockLog.output?.tokens?.total || 0,
relativeStartMs: index * 100,
children: [],
toolCalls: (blockLog as any).toolCalls || [],
}
})
await loggingSession.safeComplete({
endedAt: new Date().toISOString(),
totalDurationMs: totalDuration || 0,
finalOutput: executionResult.output || {},
traceSpans: (traceSpans || []) as any,
})
// DEBUG: Final success log
logger.info(`[${requestId}] TRACE: Execution logs persisted successfully`, {
workflowId: foundWorkflow.id,
executionId,
timestamp: new Date().toISOString(),
})
} catch (error: any) {
// DEBUG: Detailed error information
logger.error(`[${requestId}] TRACE: Error during workflow execution`, {
workflowId: foundWorkflow.id,
executionId,
errorType: error.constructor.name,
errorMessage: error.message,
stack: error.stack,
timestamp: new Date().toISOString(),
})
logger.error(`[${requestId}] Error executing workflow`, {
workflowId: foundWorkflow.id,
executionId,
error: error.message,
stack: error.stack,
})
// Error logging handled by logging session
await loggingSession.safeCompleteWithError({
endedAt: new Date().toISOString(),
totalDurationMs: 0,
error: {
message: error.message || 'Webhook workflow execution failed',
stackTrace: error.stack,
},
})
// Re-throw the error so the caller knows it failed
throw error
}
}
/**
* Validates a Microsoft Teams outgoing webhook request signature using HMAC SHA-256
* @param hmacSecret - Microsoft Teams HMAC secret (base64 encoded)
@@ -1378,26 +939,23 @@ export async function fetchAndProcessAirtablePayloads(
}
)
await executeWorkflowFromPayload(workflowData, input, requestId, requestId, null)
// COMPLETION LOG - This will only appear if execution succeeds
logger.info(`[${requestId}] CRITICAL_TRACE: Workflow execution completed successfully`, {
// Return the processed input for the trigger.dev task to handle
logger.info(`[${requestId}] CRITICAL_TRACE: Airtable changes processed, returning input`, {
workflowId: workflowData.id,
timestamp: new Date().toISOString(),
})
} catch (executionError: any) {
// Errors logged within executeWorkflowFromPayload
logger.error(`[${requestId}] CRITICAL_TRACE: Workflow execution failed with error`, {
workflowId: workflowData.id,
error: executionError.message,
stack: executionError.stack,
recordCount: finalConsolidatedChanges.length,
timestamp: new Date().toISOString(),
})
logger.error(
`[${requestId}] Error during workflow execution triggered by Airtable polling`,
executionError
)
return input
} catch (processingError: any) {
logger.error(`[${requestId}] CRITICAL_TRACE: Error processing Airtable changes`, {
workflowId: workflowData.id,
error: processingError.message,
stack: processingError.stack,
timestamp: new Date().toISOString(),
})
throw processingError
}
} else {
// DEBUG: Log when no changes are found
@@ -1429,166 +987,6 @@ export async function fetchAndProcessAirtablePayloads(
})
}
/**
* Process webhook verification and authorization
*/
/**
* Handle standard webhooks with synchronous execution
*/
async function processStandardWebhook(
foundWebhook: any,
foundWorkflow: any,
input: any,
executionId: string,
requestId: string
): Promise<NextResponse> {
logger.info(
`[${requestId}] Executing workflow ${foundWorkflow.id} for webhook ${foundWebhook.id} (Execution: ${executionId})`
)
await executeWorkflowFromPayload(
foundWorkflow,
input,
executionId,
requestId,
foundWebhook.blockId
)
// Since executeWorkflowFromPayload handles logging and errors internally,
// we just need to return a success response for synchronous webhooks.
// Microsoft Teams requires a specific response format.
if (foundWebhook.provider === 'microsoftteams') {
return NextResponse.json(
{
type: 'message',
text: 'Sim Studio',
},
{ status: 200 }
)
}
return NextResponse.json({ message: 'Webhook processed' }, { status: 200 })
}
/**
* Handle webhook processing errors with provider-specific responses
*/
function handleWebhookError(
error: any,
foundWebhook: any,
executionId: string,
requestId: string
): NextResponse {
logger.error(
`[${requestId}] Error in processWebhook for ${foundWebhook.id} (Execution: ${executionId})`,
error
)
// For Microsoft Teams outgoing webhooks, return the expected error format
if (foundWebhook.provider === 'microsoftteams') {
return NextResponse.json(
{
type: 'message',
text: 'Webhook processing failed',
},
{ status: 200 }
) // Still return 200 to prevent Teams from showing additional error messages
}
return new NextResponse(`Internal Server Error: ${error.message}`, {
status: 500,
})
}
export async function processWebhook(
foundWebhook: any,
foundWorkflow: any,
body: any,
request: NextRequest,
executionId: string,
requestId: string
): Promise<NextResponse> {
try {
// --- Handle Airtable differently - it should always use fetchAndProcessAirtablePayloads ---
if (foundWebhook.provider === 'airtable') {
logger.info(`[${requestId}] Routing Airtable webhook through dedicated processor`)
await fetchAndProcessAirtablePayloads(foundWebhook, foundWorkflow, requestId)
return NextResponse.json({ message: 'Airtable webhook processed' }, { status: 200 })
}
// --- Provider-specific Auth/Verification (excluding Airtable/WhatsApp/Slack/MicrosoftTeams handled earlier) ---
if (
foundWebhook.provider &&
!['airtable', 'whatsapp', 'slack', 'microsoftteams'].includes(foundWebhook.provider)
) {
const verificationResponse = verifyProviderWebhook(foundWebhook, request, requestId)
if (verificationResponse) {
return verificationResponse
}
}
// --- Format Input based on provider (excluding Airtable) ---
const input = formatWebhookInput(foundWebhook, foundWorkflow, body, request)
if (!input && foundWebhook.provider === 'whatsapp') {
return new NextResponse('No messages in WhatsApp payload', { status: 200 })
}
// --- Route to standard processor for all providers ---
return await processStandardWebhook(foundWebhook, foundWorkflow, input, executionId, requestId)
} catch (error: any) {
return handleWebhookError(error, foundWebhook, executionId, requestId)
}
}
/**
* Generate a hash for request deduplication
*/
export async function generateRequestHash(path: string, body: any): Promise<string> {
try {
const normalizedBody = normalizeBody(body)
const requestString = `${path}:${JSON.stringify(normalizedBody)}`
let hash = 0
for (let i = 0; i < requestString.length; i++) {
const char = requestString.charCodeAt(i)
hash = (hash << 5) - hash + char
hash = hash & hash // Convert to 32bit integer
}
return `request:${path}:${hash}`
} catch (_error) {
return `request:${path}:${uuidv4()}`
}
}
/**
* Normalize the body for consistent hashing
*/
export function normalizeBody(body: any): any {
if (!body || typeof body !== 'object') return body
const result = Array.isArray(body) ? [...body] : { ...body }
const fieldsToRemove = [
'timestamp',
'random',
'nonce',
'requestId',
'event_id',
'event_time' /* Add other volatile fields */,
] // Made case-insensitive check below
if (Array.isArray(result)) {
return result.map((item) => normalizeBody(item))
}
for (const key in result) {
// Use lowercase check for broader matching
if (fieldsToRemove.includes(key.toLowerCase())) {
delete result[key]
} else if (typeof result[key] === 'object' && result[key] !== null) {
result[key] = normalizeBody(result[key])
}
}
return result
}
// Define an interface for AirtableChange
export interface AirtableChange {
tableId: string

View File

@@ -0,0 +1,76 @@
import type { ArxivGetAuthorPapersParams, ArxivGetAuthorPapersResponse } from '@/tools/arxiv/types'
import { extractTotalResults, parseArxivXML } from '@/tools/arxiv/utils'
import type { ToolConfig } from '@/tools/types'
export const getAuthorPapersTool: ToolConfig<
ArxivGetAuthorPapersParams,
ArxivGetAuthorPapersResponse
> = {
id: 'arxiv_get_author_papers',
name: 'ArXiv Get Author Papers',
description: 'Search for papers by a specific author on ArXiv.',
version: '1.0.0',
params: {
authorName: {
type: 'string',
required: true,
visibility: 'user-or-llm',
description: 'Author name to search for',
},
maxResults: {
type: 'number',
required: false,
visibility: 'user-only',
description: 'Maximum number of results to return (default: 10, max: 2000)',
},
},
request: {
url: (params: ArxivGetAuthorPapersParams) => {
const baseUrl = 'http://export.arxiv.org/api/query'
const searchParams = new URLSearchParams()
searchParams.append('search_query', `au:"${params.authorName}"`)
searchParams.append(
'max_results',
(params.maxResults ? Math.min(params.maxResults, 2000) : 10).toString()
)
searchParams.append('sortBy', 'submittedDate')
searchParams.append('sortOrder', 'descending')
return `${baseUrl}?${searchParams.toString()}`
},
method: 'GET',
headers: () => ({
'Content-Type': 'application/xml',
}),
},
transformResponse: async (response: Response) => {
if (!response.ok) {
throw new Error(`ArXiv API error: ${response.status} ${response.statusText}`)
}
const xmlText = await response.text()
// Parse XML response
const papers = parseArxivXML(xmlText)
const totalResults = extractTotalResults(xmlText)
return {
success: true,
output: {
authorPapers: papers,
totalResults,
authorName: '', // Will be filled by the calling code
},
}
},
transformError: (error) => {
return error instanceof Error
? error.message
: 'An error occurred while searching for author papers on ArXiv'
},
}

View File

@@ -0,0 +1,67 @@
import type { ArxivGetPaperParams, ArxivGetPaperResponse } from '@/tools/arxiv/types'
import { parseArxivXML } from '@/tools/arxiv/utils'
import type { ToolConfig } from '@/tools/types'
export const getPaperTool: ToolConfig<ArxivGetPaperParams, ArxivGetPaperResponse> = {
id: 'arxiv_get_paper',
name: 'ArXiv Get Paper',
description: 'Get detailed information about a specific ArXiv paper by its ID.',
version: '1.0.0',
params: {
paperId: {
type: 'string',
required: true,
visibility: 'user-or-llm',
description: 'ArXiv paper ID (e.g., "1706.03762", "cs.AI/0001001")',
},
},
request: {
url: (params: ArxivGetPaperParams) => {
// Clean paper ID - remove arxiv.org URLs if present
let paperId = params.paperId
if (paperId.includes('arxiv.org/abs/')) {
paperId = paperId.split('arxiv.org/abs/')[1]
}
const baseUrl = 'http://export.arxiv.org/api/query'
const searchParams = new URLSearchParams()
searchParams.append('id_list', paperId)
return `${baseUrl}?${searchParams.toString()}`
},
method: 'GET',
headers: () => ({
'Content-Type': 'application/xml',
}),
},
transformResponse: async (response: Response) => {
if (!response.ok) {
throw new Error(`ArXiv API error: ${response.status} ${response.statusText}`)
}
const xmlText = await response.text()
// Parse XML response
const papers = parseArxivXML(xmlText)
if (papers.length === 0) {
throw new Error('Paper not found')
}
return {
success: true,
output: {
paper: papers[0],
},
}
},
transformError: (error) => {
return error instanceof Error
? error.message
: 'An error occurred while retrieving the ArXiv paper'
},
}

View File

@@ -0,0 +1,7 @@
import { getAuthorPapersTool } from '@/tools/arxiv/get_author_papers'
import { getPaperTool } from '@/tools/arxiv/get_paper'
import { searchTool } from '@/tools/arxiv/search'
export const arxivSearchTool = searchTool
export const arxivGetPaperTool = getPaperTool
export const arxivGetAuthorPapersTool = getAuthorPapersTool

View File

@@ -0,0 +1,104 @@
import type { ArxivSearchParams, ArxivSearchResponse } from '@/tools/arxiv/types'
import { extractTotalResults, parseArxivXML } from '@/tools/arxiv/utils'
import type { ToolConfig } from '@/tools/types'
export const searchTool: ToolConfig<ArxivSearchParams, ArxivSearchResponse> = {
id: 'arxiv_search',
name: 'ArXiv Search',
description: 'Search for academic papers on ArXiv by keywords, authors, titles, or other fields.',
version: '1.0.0',
params: {
query: {
type: 'string',
required: true,
visibility: 'user-or-llm',
description: 'The search query to execute',
},
searchField: {
type: 'string',
required: false,
visibility: 'user-only',
description:
'Field to search in: all, ti (title), au (author), abs (abstract), co (comment), jr (journal), cat (category), rn (report number)',
},
maxResults: {
type: 'number',
required: false,
visibility: 'user-only',
description: 'Maximum number of results to return (default: 10, max: 2000)',
},
sortBy: {
type: 'string',
required: false,
visibility: 'user-only',
description: 'Sort by: relevance, lastUpdatedDate, submittedDate (default: relevance)',
},
sortOrder: {
type: 'string',
required: false,
visibility: 'user-only',
description: 'Sort order: ascending, descending (default: descending)',
},
},
request: {
url: (params: ArxivSearchParams) => {
const baseUrl = 'http://export.arxiv.org/api/query'
const searchParams = new URLSearchParams()
// Build search query
let searchQuery = params.query
if (params.searchField && params.searchField !== 'all') {
searchQuery = `${params.searchField}:${params.query}`
}
searchParams.append('search_query', searchQuery)
// Add optional parameters
if (params.maxResults) {
searchParams.append('max_results', Math.min(params.maxResults, 2000).toString())
} else {
searchParams.append('max_results', '10')
}
if (params.sortBy) {
searchParams.append('sortBy', params.sortBy)
}
if (params.sortOrder) {
searchParams.append('sortOrder', params.sortOrder)
}
return `${baseUrl}?${searchParams.toString()}`
},
method: 'GET',
headers: () => ({
'Content-Type': 'application/xml',
}),
},
transformResponse: async (response: Response) => {
if (!response.ok) {
throw new Error(`ArXiv API error: ${response.status} ${response.statusText}`)
}
const xmlText = await response.text()
// Parse XML response
const papers = parseArxivXML(xmlText)
const totalResults = extractTotalResults(xmlText)
return {
success: true,
output: {
papers,
totalResults,
query: '', // Will be filled by the calling code
},
}
},
transformError: (error) => {
return error instanceof Error ? error.message : 'An error occurred while searching ArXiv'
},
}

View File

@@ -0,0 +1,65 @@
// Common types for ArXiv tools
import type { ToolResponse } from '@/tools/types'
// Search tool types
export interface ArxivSearchParams {
query: string
searchField?: 'all' | 'ti' | 'au' | 'abs' | 'co' | 'jr' | 'cat' | 'rn'
maxResults?: number
sortBy?: 'relevance' | 'lastUpdatedDate' | 'submittedDate'
sortOrder?: 'ascending' | 'descending'
}
export interface ArxivPaper {
id: string
title: string
summary: string
authors: string[]
published: string
updated: string
link: string
pdfLink: string
categories: string[]
primaryCategory: string
comment?: string
journalRef?: string
doi?: string
}
export interface ArxivSearchResponse extends ToolResponse {
output: {
papers: ArxivPaper[]
totalResults: number
query: string
}
}
// Get Paper Details tool types
export interface ArxivGetPaperParams {
paperId: string
}
export interface ArxivGetPaperResponse extends ToolResponse {
output: {
paper: ArxivPaper
}
}
// Get Author Papers tool types
export interface ArxivGetAuthorPapersParams {
authorName: string
maxResults?: number
}
export interface ArxivGetAuthorPapersResponse extends ToolResponse {
output: {
authorPapers: ArxivPaper[]
totalResults: number
authorName: string
}
}
export type ArxivResponse =
| ArxivSearchResponse
| ArxivGetPaperResponse
| ArxivGetAuthorPapersResponse

View File

@@ -0,0 +1,90 @@
import type { ArxivPaper } from '@/tools/arxiv/types'
export function parseArxivXML(xmlText: string): ArxivPaper[] {
const papers: ArxivPaper[] = []
// Extract entries using regex (since we don't have XML parser in this environment)
const entryRegex = /<entry>([\s\S]*?)<\/entry>/g
let match
while ((match = entryRegex.exec(xmlText)) !== null) {
const entryXml = match[1]
const paper: ArxivPaper = {
id: extractXmlValue(entryXml, 'id')?.replace('http://arxiv.org/abs/', '') || '',
title: cleanText(extractXmlValue(entryXml, 'title') || ''),
summary: cleanText(extractXmlValue(entryXml, 'summary') || ''),
authors: extractAuthors(entryXml),
published: extractXmlValue(entryXml, 'published') || '',
updated: extractXmlValue(entryXml, 'updated') || '',
link: extractXmlValue(entryXml, 'id') || '',
pdfLink: extractPdfLink(entryXml),
categories: extractCategories(entryXml),
primaryCategory: extractXmlAttribute(entryXml, 'arxiv:primary_category', 'term') || '',
comment: extractXmlValue(entryXml, 'arxiv:comment'),
journalRef: extractXmlValue(entryXml, 'arxiv:journal_ref'),
doi: extractXmlValue(entryXml, 'arxiv:doi'),
}
papers.push(paper)
}
return papers
}
export function extractTotalResults(xmlText: string): number {
const totalResultsMatch = xmlText.match(
/<opensearch:totalResults[^>]*>(\d+)<\/opensearch:totalResults>/
)
return totalResultsMatch ? Number.parseInt(totalResultsMatch[1], 10) : 0
}
export function extractXmlValue(xml: string, tagName: string): string | undefined {
const regex = new RegExp(`<${tagName}[^>]*>([\\s\\S]*?)<\/${tagName}>`)
const match = xml.match(regex)
return match ? match[1].trim() : undefined
}
export function extractXmlAttribute(
xml: string,
tagName: string,
attrName: string
): string | undefined {
const regex = new RegExp(`<${tagName}[^>]*${attrName}="([^"]*)"[^>]*>`)
const match = xml.match(regex)
return match ? match[1] : undefined
}
export function extractAuthors(entryXml: string): string[] {
const authors: string[] = []
const authorRegex = /<author[^>]*>[\s\S]*?<name>([^<]+)<\/name>[\s\S]*?<\/author>/g
let match
while ((match = authorRegex.exec(entryXml)) !== null) {
authors.push(match[1].trim())
}
return authors
}
export function extractPdfLink(entryXml: string): string {
const linkRegex = /<link[^>]*href="([^"]*)"[^>]*title="pdf"[^>]*>/
const match = entryXml.match(linkRegex)
return match ? match[1] : ''
}
export function extractCategories(entryXml: string): string[] {
const categories: string[] = []
const categoryRegex = /<category[^>]*term="([^"]*)"[^>]*>/g
let match
while ((match = categoryRegex.exec(entryXml)) !== null) {
categories.push(match[1])
}
return categories
}
export function cleanText(text: string): string {
return text.replace(/\s+/g, ' ').trim()
}

View File

@@ -57,6 +57,11 @@ export const knowledgeCreateDocumentTool: ToolConfig<any, KnowledgeCreateDocumen
required: false,
description: 'Tag 7 value for the document',
},
documentTagsData: {
type: 'array',
required: false,
description: 'Structured tag data with names, types, and values',
},
},
request: {
url: (params) => `/api/knowledge/${params.knowledgeBaseId}/documents`,
@@ -95,20 +100,32 @@ export const knowledgeCreateDocumentTool: ToolConfig<any, KnowledgeCreateDocumen
const dataUri = `data:text/plain;base64,${base64Content}`
const tagData: Record<string, string> = {}
if (params.documentTags) {
let parsedTags = params.documentTags
// Handle both string (JSON) and array formats
if (typeof params.documentTags === 'string') {
try {
parsedTags = JSON.parse(params.documentTags)
} catch (error) {
parsedTags = []
}
}
if (Array.isArray(parsedTags)) {
tagData.documentTagsData = JSON.stringify(parsedTags)
}
}
const documents = [
{
filename: documentName.endsWith('.txt') ? documentName : `${documentName}.txt`,
fileUrl: dataUri,
fileSize: contentBytes,
mimeType: 'text/plain',
// Include tags if provided
tag1: params.tag1 || undefined,
tag2: params.tag2 || undefined,
tag3: params.tag3 || undefined,
tag4: params.tag4 || undefined,
tag5: params.tag5 || undefined,
tag6: params.tag6 || undefined,
tag7: params.tag7 || undefined,
...tagData,
},
]

View File

@@ -4,14 +4,13 @@ import type { ToolConfig } from '@/tools/types'
export const knowledgeSearchTool: ToolConfig<any, KnowledgeSearchResponse> = {
id: 'knowledge_search',
name: 'Knowledge Search',
description: 'Search for similar content in one or more knowledge bases using vector similarity',
description: 'Search for similar content in a knowledge base using vector similarity',
version: '1.0.0',
params: {
knowledgeBaseIds: {
knowledgeBaseId: {
type: 'string',
required: true,
description:
'ID of the knowledge base to search in, or comma-separated IDs for multiple knowledge bases',
description: 'ID of the knowledge base to search in',
},
query: {
type: 'string',
@@ -23,40 +22,10 @@ export const knowledgeSearchTool: ToolConfig<any, KnowledgeSearchResponse> = {
required: false,
description: 'Number of most similar results to return (1-100)',
},
tag1: {
type: 'string',
tagFilters: {
type: 'any',
required: false,
description: 'Filter by tag 1 value',
},
tag2: {
type: 'string',
required: false,
description: 'Filter by tag 2 value',
},
tag3: {
type: 'string',
required: false,
description: 'Filter by tag 3 value',
},
tag4: {
type: 'string',
required: false,
description: 'Filter by tag 4 value',
},
tag5: {
type: 'string',
required: false,
description: 'Filter by tag 5 value',
},
tag6: {
type: 'string',
required: false,
description: 'Filter by tag 6 value',
},
tag7: {
type: 'string',
required: false,
description: 'Filter by tag 7 value',
description: 'Array of tag filters with tagName and tagValue properties',
},
},
request: {
@@ -68,25 +37,41 @@ export const knowledgeSearchTool: ToolConfig<any, KnowledgeSearchResponse> = {
body: (params) => {
const workflowId = params._context?.workflowId
// Handle multiple knowledge base IDs
let knowledgeBaseIds = params.knowledgeBaseIds
if (typeof knowledgeBaseIds === 'string' && knowledgeBaseIds.includes(',')) {
// Split comma-separated string into array
knowledgeBaseIds = knowledgeBaseIds
.split(',')
.map((id) => id.trim())
.filter((id) => id.length > 0)
}
// Use single knowledge base ID
const knowledgeBaseIds = [params.knowledgeBaseId]
// Build filters object from tag parameters
// Parse dynamic tag filters and send display names to API
const filters: Record<string, string> = {}
if (params.tag1) filters.tag1 = params.tag1.toString()
if (params.tag2) filters.tag2 = params.tag2.toString()
if (params.tag3) filters.tag3 = params.tag3.toString()
if (params.tag4) filters.tag4 = params.tag4.toString()
if (params.tag5) filters.tag5 = params.tag5.toString()
if (params.tag6) filters.tag6 = params.tag6.toString()
if (params.tag7) filters.tag7 = params.tag7.toString()
if (params.tagFilters) {
let tagFilters = params.tagFilters
// Handle both string (JSON) and array formats
if (typeof tagFilters === 'string') {
try {
tagFilters = JSON.parse(tagFilters)
} catch (error) {
tagFilters = []
}
}
if (Array.isArray(tagFilters)) {
// Group filters by tag name for OR logic within same tag
const groupedFilters: Record<string, string[]> = {}
tagFilters.forEach((filter: any) => {
if (filter.tagName && filter.tagValue) {
if (!groupedFilters[filter.tagName]) {
groupedFilters[filter.tagName] = []
}
groupedFilters[filter.tagName].push(filter.tagValue)
}
})
// Convert to filters format - for now, join multiple values with OR separator
Object.entries(groupedFilters).forEach(([tagName, values]) => {
filters[tagName] = values.join('|OR|') // Use special separator for OR logic
})
}
}
const requestBody = {
knowledgeBaseIds,

View File

@@ -4,6 +4,7 @@ import {
airtableListRecordsTool,
airtableUpdateRecordTool,
} from '@/tools/airtable'
import { arxivGetAuthorPapersTool, arxivGetPaperTool, arxivSearchTool } from '@/tools/arxiv'
import { browserUseRunTaskTool } from '@/tools/browser_use'
import { clayPopulateTool } from '@/tools/clay'
import { confluenceRetrieveTool, confluenceUpdateTool } from '@/tools/confluence'
@@ -125,12 +126,21 @@ import {
wealthboxWriteTaskTool,
} from '@/tools/wealthbox'
import { whatsappSendMessageTool } from '@/tools/whatsapp'
import {
wikipediaPageContentTool,
wikipediaPageSummaryTool,
wikipediaRandomPageTool,
wikipediaSearchTool,
} from '@/tools/wikipedia'
import { workflowExecutorTool } from '@/tools/workflow'
import { xReadTool, xSearchTool, xUserTool, xWriteTool } from '@/tools/x'
import { youtubeSearchTool } from '@/tools/youtube'
// Registry of all available tools
export const tools: Record<string, ToolConfig> = {
arxiv_search: arxivSearchTool,
arxiv_get_paper: arxivGetPaperTool,
arxiv_get_author_papers: arxivGetAuthorPapersTool,
browser_use_run_task: browserUseRunTaskTool,
openai_embeddings: openAIEmbeddings,
http_request: httpRequest,
@@ -262,6 +272,10 @@ export const tools: Record<string, ToolConfig> = {
wealthbox_write_task: wealthboxWriteTaskTool,
wealthbox_read_note: wealthboxReadNoteTool,
wealthbox_write_note: wealthboxWriteNoteTool,
wikipedia_summary: wikipediaPageSummaryTool,
wikipedia_search: wikipediaSearchTool,
wikipedia_content: wikipediaPageContentTool,
wikipedia_random: wikipediaRandomPageTool,
qdrant_fetch_points: qdrantFetchTool,
qdrant_search_vector: qdrantSearchTool,
qdrant_upsert_points: qdrantUpsertTool,

View File

@@ -0,0 +1,73 @@
import type { ToolConfig } from '@/tools/types'
import type {
WikipediaPageContentParams,
WikipediaPageContentResponse,
} from '@/tools/wikipedia/types'
export const pageContentTool: ToolConfig<WikipediaPageContentParams, WikipediaPageContentResponse> =
{
id: 'wikipedia_content',
name: 'Wikipedia Page Content',
description: 'Get the full HTML content of a Wikipedia page.',
version: '1.0.0',
params: {
pageTitle: {
type: 'string',
required: true,
visibility: 'user-or-llm',
description: 'Title of the Wikipedia page to get content for',
},
},
request: {
url: (params: WikipediaPageContentParams) => {
const encodedTitle = encodeURIComponent(params.pageTitle.replace(/ /g, '_'))
return `https://en.wikipedia.org/api/rest_v1/page/html/${encodedTitle}`
},
method: 'GET',
headers: () => ({
'User-Agent': 'SimStudio/1.0 (https://simstudio.ai)',
Accept:
'text/html; charset=utf-8; profile="https://www.mediawiki.org/wiki/Specs/HTML/2.1.0"',
}),
isInternalRoute: false,
},
transformResponse: async (response: Response) => {
if (!response.ok) {
if (response.status === 404) {
throw new Error('Wikipedia page not found')
}
throw new Error(`Wikipedia API error: ${response.status} ${response.statusText}`)
}
const html = await response.text()
// Extract metadata from response headers
const revision = response.headers.get('etag')?.match(/^"(\d+)/)?.[1] || '0'
const timestamp = response.headers.get('last-modified') || new Date().toISOString()
return {
success: true,
output: {
content: {
title: '', // Will be filled by the calling code
pageid: 0, // Not available from this endpoint
html: html,
revision: Number.parseInt(revision, 10),
tid: response.headers.get('etag') || '',
timestamp: timestamp,
content_model: 'wikitext',
content_format: 'text/html',
},
},
}
},
transformError: (error) => {
return error instanceof Error
? error.message
: 'An error occurred while retrieving the Wikipedia page content'
},
}

View File

@@ -0,0 +1,9 @@
import { pageContentTool } from '@/tools/wikipedia/content'
import { randomPageTool } from '@/tools/wikipedia/random'
import { searchTool } from '@/tools/wikipedia/search'
import { pageSummaryTool } from '@/tools/wikipedia/summary'
export const wikipediaPageSummaryTool = pageSummaryTool
export const wikipediaSearchTool = searchTool
export const wikipediaPageContentTool = pageContentTool
export const wikipediaRandomPageTool = randomPageTool

Some files were not shown because too many files have changed in this diff Show More