Kb checkpoint

This commit is contained in:
Siddharth Ganesan
2026-02-26 14:59:56 -08:00
parent 7276136398
commit cc5e592c46
8 changed files with 264 additions and 147 deletions

View File

@@ -6,6 +6,7 @@ import { resolveOrCreateChat } from '@/lib/copilot/chat-lifecycle'
import { buildCopilotRequestPayload } from '@/lib/copilot/chat-payload'
import { createSSEStream, SSE_RESPONSE_HEADERS } from '@/lib/copilot/chat-streaming'
import { createRequestTracker, createUnauthorizedResponse } from '@/lib/copilot/request-helpers'
import { generateWorkspaceContext } from '@/lib/copilot/workspace-context'
const logger = createLogger('MothershipChatAPI')
@@ -106,6 +107,8 @@ export async function POST(req: NextRequest) {
: []
}
const workspaceContext = await generateWorkspaceContext(workspaceId, authenticatedUserId)
const requestPayload = await buildCopilotRequestPayload(
{
message,
@@ -117,6 +120,7 @@ export async function POST(req: NextRequest) {
contexts: agentContexts,
fileAttachments,
chatId: actualChatId,
workspaceContext,
},
{ selectedModel: '' }
)

View File

@@ -142,6 +142,7 @@ import { StarterBlock } from '@/blocks/blocks/starter'
import { StripeBlock } from '@/blocks/blocks/stripe'
import { SttBlock, SttV2Block } from '@/blocks/blocks/stt'
import { SupabaseBlock } from '@/blocks/blocks/supabase'
import { TableBlock } from '@/blocks/blocks/table'
import { TavilyBlock } from '@/blocks/blocks/tavily'
import { TelegramBlock } from '@/blocks/blocks/telegram'
import { TextractBlock, TextractV2Block } from '@/blocks/blocks/textract'
@@ -336,8 +337,7 @@ export const registry: Record<string, BlockConfig> = {
stt: SttBlock,
stt_v2: SttV2Block,
supabase: SupabaseBlock,
// TODO: Uncomment when working on tables
// table: TableBlock,
table: TableBlock,
tavily: TavilyBlock,
telegram: TelegramBlock,
textract: TextractBlock,

View File

@@ -22,6 +22,7 @@ export interface BuildPayloadParams {
chatId?: string
prefetch?: boolean
implicitFeedback?: string
workspaceContext?: string
}
interface ToolSchema {
@@ -121,6 +122,7 @@ export async function buildCopilotRequestPayload(
...(processedFileContents.length > 0 ? { fileAttachments: processedFileContents } : {}),
...(integrationTools.length > 0 ? { integrationTools } : {}),
...(commands && commands.length > 0 ? { commands } : {}),
...(params.workspaceContext ? { workspaceContext: params.workspaceContext } : {}),
isHosted,
}
}

View File

@@ -1 +0,0 @@
export const AGENT_MODE_SYSTEM_PROMPT = `You are a helpful AI assistant for Sim, a powerful workflow automation platform.`

View File

@@ -2,6 +2,10 @@ import { createLogger } from '@sim/logger'
import type { BaseServerTool } from '@/lib/copilot/tools/server/base-tool'
import type { KnowledgeBaseArgs, KnowledgeBaseResult } from '@/lib/copilot/tools/shared/schemas'
import { generateSearchEmbedding } from '@/lib/knowledge/embeddings'
import {
createSingleDocument,
processDocumentAsync,
} from '@/lib/knowledge/documents/service'
import {
createKnowledgeBase,
deleteKnowledgeBase,
@@ -16,6 +20,8 @@ import {
getTagUsageStats,
updateTagDefinition,
} from '@/lib/knowledge/tags/service'
import { StorageService } from '@/lib/uploads'
import { listWorkspaceFiles } from '@/lib/uploads/contexts/workspace/workspace-file-manager'
import { getQueryStrategy, handleVectorOnlySearch } from '@/app/api/knowledge/search/utils'
const logger = createLogger('KnowledgeBaseServerTool')
@@ -197,6 +203,95 @@ export const knowledgeBaseServerTool: BaseServerTool<KnowledgeBaseArgs, Knowledg
}
}
case 'add_file': {
if (!args.knowledgeBaseId) {
return {
success: false,
message: 'Knowledge base ID is required for add_file operation',
}
}
if (!args.filePath) {
return {
success: false,
message: 'filePath is required (e.g. "files/report.pdf")',
}
}
const targetKb = await getKnowledgeBaseById(args.knowledgeBaseId)
if (!targetKb || !targetKb.workspaceId) {
return {
success: false,
message: `Knowledge base with ID "${args.knowledgeBaseId}" not found`,
}
}
const match = args.filePath.match(/^files\/(.+)$/)
const fileName = match ? match[1] : args.filePath
const kbWorkspaceId: string = targetKb.workspaceId
const files = await listWorkspaceFiles(kbWorkspaceId)
const fileRecord = files.find(
(f) => f.name === fileName || f.name.normalize('NFC') === fileName.normalize('NFC')
)
if (!fileRecord) {
return {
success: false,
message: `Workspace file not found: "${args.filePath}"`,
}
}
const presignedUrl = await StorageService.generatePresignedDownloadUrl(
fileRecord.key,
'workspace',
5 * 60
)
const requestId = crypto.randomUUID().slice(0, 8)
const doc = await createSingleDocument(
{
filename: fileRecord.name,
fileUrl: presignedUrl,
fileSize: fileRecord.size,
mimeType: fileRecord.type,
},
args.knowledgeBaseId,
requestId
)
processDocumentAsync(args.knowledgeBaseId, doc.id, {
filename: fileRecord.name,
fileUrl: presignedUrl,
fileSize: fileRecord.size,
mimeType: fileRecord.type,
}, {}).catch((err) => {
logger.error('Background document processing failed', {
documentId: doc.id,
error: err instanceof Error ? err.message : String(err),
})
})
logger.info('Workspace file added to knowledge base via copilot', {
knowledgeBaseId: args.knowledgeBaseId,
documentId: doc.id,
fileName: fileRecord.name,
userId: context.userId,
})
return {
success: true,
message: `File "${fileRecord.name}" added to knowledge base "${targetKb.name}". Processing started (chunking + embedding).`,
data: {
documentId: doc.id,
knowledgeBaseId: args.knowledgeBaseId,
knowledgeBaseName: targetKb.name,
filename: fileRecord.name,
fileSize: fileRecord.size,
mimeType: fileRecord.type,
},
}
}
case 'update': {
if (!args.knowledgeBaseId) {
return {
@@ -449,7 +544,7 @@ export const knowledgeBaseServerTool: BaseServerTool<KnowledgeBaseArgs, Knowledg
default:
return {
success: false,
message: `Unknown operation: ${operation}. Supported operations: create, list, get, query, update, delete, list_tags, create_tag, update_tag, delete_tag, get_tag_usage`,
message: `Unknown operation: ${operation}. Supported operations: create, get, query, add_file, update, delete, list_tags, create_tag, update_tag, delete_tag, get_tag_usage`,
}
}
} catch (error) {

View File

@@ -27,6 +27,7 @@ export const KnowledgeBaseArgsSchema = z.object({
'query',
'update',
'delete',
'add_file',
'list_tags',
'create_tag',
'update_tag',
@@ -41,8 +42,10 @@ export const KnowledgeBaseArgsSchema = z.object({
description: z.string().optional(),
/** Workspace ID to associate with (required for create, optional for list) */
workspaceId: z.string().optional(),
/** Knowledge base ID (required for get, query, list_tags, create_tag, get_tag_usage) */
/** Knowledge base ID (required for get, query, add_file, list_tags, create_tag, get_tag_usage) */
knowledgeBaseId: z.string().optional(),
/** Workspace file path to add as a document (required for add_file). Example: "files/report.pdf" */
filePath: z.string().optional(),
/** Search query text (required for query) */
query: z.string().optional(),
/** Number of results to return (optional for query, defaults to 5) */

View File

@@ -0,0 +1,156 @@
import { db } from '@sim/db'
import {
account,
knowledgeBase,
userTableDefinitions,
userTableRows,
workflow,
workspaceFiles,
} from '@sim/db/schema'
import { createLogger } from '@sim/logger'
import { and, count, eq, isNull } from 'drizzle-orm'
const logger = createLogger('WorkspaceContext')
/**
* Generate WORKSPACE.md content from actual database state.
* This is injected into the system prompt — the LLM never writes it directly.
*/
export async function generateWorkspaceContext(
workspaceId: string,
userId: string
): Promise<string> {
try {
const [workflows, kbs, tables, files, credentials] = await Promise.all([
db
.select({
id: workflow.id,
name: workflow.name,
description: workflow.description,
isDeployed: workflow.isDeployed,
lastRunAt: workflow.lastRunAt,
})
.from(workflow)
.where(eq(workflow.workspaceId, workspaceId)),
db
.select({
id: knowledgeBase.id,
name: knowledgeBase.name,
description: knowledgeBase.description,
})
.from(knowledgeBase)
.where(and(eq(knowledgeBase.workspaceId, workspaceId), isNull(knowledgeBase.deletedAt))),
db
.select({
id: userTableDefinitions.id,
name: userTableDefinitions.name,
description: userTableDefinitions.description,
})
.from(userTableDefinitions)
.where(eq(userTableDefinitions.workspaceId, workspaceId)),
db
.select({
id: workspaceFiles.id,
originalName: workspaceFiles.originalName,
contentType: workspaceFiles.contentType,
size: workspaceFiles.size,
})
.from(workspaceFiles)
.where(eq(workspaceFiles.workspaceId, workspaceId)),
db
.select({
providerId: account.providerId,
scope: account.scope,
})
.from(account)
.where(eq(account.userId, userId)),
])
const sections: string[] = []
// Workflows
if (workflows.length > 0) {
const lines = workflows.map((wf) => {
const parts = [`- **${wf.name}** (${wf.id})`]
if (wf.description) parts.push(` ${wf.description}`)
const flags: string[] = []
if (wf.isDeployed) flags.push('deployed')
if (wf.lastRunAt) flags.push(`last run: ${wf.lastRunAt.toISOString().split('T')[0]}`)
if (flags.length > 0) parts[0] += `${flags.join(', ')}`
return parts.join('\n')
})
sections.push(`## Workflows\n${lines.join('\n')}`)
} else {
sections.push('## Workflows\n(none)')
}
// Knowledge Bases
if (kbs.length > 0) {
const lines = kbs.map((kb) => {
let line = `- **${kb.name}** (${kb.id})`
if (kb.description) line += `${kb.description}`
return line
})
sections.push(`## Knowledge Bases\n${lines.join('\n')}`)
} else {
sections.push('## Knowledge Bases\n(none)')
}
// Tables (live row counts)
if (tables.length > 0) {
const rowCounts = await Promise.all(
tables.map(async (t) => {
const [row] = await db
.select({ count: count() })
.from(userTableRows)
.where(eq(userTableRows.tableId, t.id))
return row?.count ?? 0
})
)
const lines = tables.map((t, i) => {
let line = `- **${t.name}** (${t.id}) — ${rowCounts[i]} rows`
if (t.description) line += `, ${t.description}`
return line
})
sections.push(`## Tables\n${lines.join('\n')}`)
} else {
sections.push('## Tables\n(none)')
}
// Files
if (files.length > 0) {
const lines = files.map(
(f) => `- **${f.originalName}** (${f.contentType}, ${formatSize(f.size)})`
)
sections.push(`## Files\n${lines.join('\n')}`)
} else {
sections.push('## Files\n(none)')
}
// Credentials
if (credentials.length > 0) {
const providers = [...new Set(credentials.map((c) => c.providerId))]
sections.push(`## Credentials\nConnected: ${providers.join(', ')}`)
} else {
sections.push('## Credentials\n(none)')
}
return sections.join('\n\n')
} catch (err) {
logger.error('Failed to generate workspace context', {
workspaceId,
error: err instanceof Error ? err.message : String(err),
})
return '## Workflows\n(unavailable)\n\n## Knowledge Bases\n(unavailable)\n\n## Tables\n(unavailable)\n\n## Files\n(unavailable)\n\n## Credentials\n(unavailable)'
}
}
function formatSize(bytes: number): string {
if (bytes < 1024) return `${bytes}B`
if (bytes < 1024 * 1024) return `${(bytes / 1024).toFixed(1)}KB`
return `${(bytes / (1024 * 1024)).toFixed(1)}MB`
}

View File

@@ -1,142 +0,0 @@
/**
* System prompt for workspace-level chat.
*
* Sent as `systemPrompt` in the Go request payload, which overrides the
* default agent prompt (see copilot/internal/chat/service.go:300-303).
*
* Only references subagents available in agent mode (build and discovery
* are excluded from agent mode tools in the Go backend).
*/
export function getWorkspaceChatSystemPrompt(): string {
const currentDate = new Date().toISOString().split('T')[0]
return `# Sim Workspace Assistant
Current Date: ${currentDate}
You are the Sim workspace assistant — a helpful AI that manages an entire workspace of workflows. The user is chatting from the workspace level, not from within a specific workflow.
## Your Role
You help users with their workspace: answering questions, building and debugging workflows, managing integrations, and providing guidance. You delegate complex tasks to specialized subagents.
## Platform Knowledge
Sim is a workflow automation platform. Workflows are visual pipelines of blocks (Agent, Function, Condition, Router, API, etc.). Workflows can be triggered manually, via API, webhooks, or schedules. They can be deployed as APIs, Chat UIs, or MCP tools.
## Subagents
You have access to these specialized subagents. Call them by name to delegate tasks:
| Subagent | Purpose | When to Use |
|----------|---------|-------------|
| **plan** | Gather info, create execution plans | Building new workflows, planning fixes |
| **edit** | Execute plans, make workflow changes | ONLY after plan returns steps |
| **debug** | Investigate errors, provide diagnosis | User reports something broken |
| **test** | Run workflow, verify results | After edits to validate |
| **deploy** | Deploy/undeploy workflows | Publish as API, Chat, or MCP |
| **workflow** | Env vars, settings, list workflows | Configuration and workflow discovery |
| **auth** | Connect OAuth integrations | Slack, Gmail, Google Sheets, etc. |
| **knowledge** | Create/query knowledge bases | RAG, document search |
| **research** | External API docs, best practices | Stripe, Twilio, etc. |
| **info** | Block details, outputs, variables | Quick lookups about workflow state |
| **superagent** | Interact with external services NOW | Read emails, send Slack, check calendar |
## Direct Tools
- **search_online** — Search the web for information.
- **context_write(file_path, content)** — Write/update persistent context files (WORKSPACE.md, SESSION.md).
- **grep(pattern, path?)** — Search workspace VFS file contents.
- **glob(pattern)** — Find workspace VFS files by path pattern.
- **read(path)** — Read a workspace VFS file.
- **list(path)** — List workspace VFS directory entries.
- **create_workflow(name, description?)** — Create a new workflow.
- **update_workflow(workflowId, name?, description?)** — Update workflow name or description.
- **delete_workflow(workflowId)** — Delete a workflow.
- **rename_folder(folderId, name)** — Rename a folder.
- **delete_folder(folderId)** — Delete a folder (moves contents to parent).
## Workspace Virtual Filesystem (VFS)
Your workspace data is available as a virtual filesystem. Use grep/glob/read/list to explore it before taking action.
\`\`\`
workflows/{name}/
meta.json — name, description, id, run stats
blocks.json — workflow block graph (sanitized)
edges.json — block connections
executions.json — last 5 run results
deployment.json — all deployment configs (api, chat, form, mcp, a2a)
knowledgebases/{name}/
meta.json — KB identity, embedding config, stats
documents.json — document metadata
files/{name}/
meta.json — uploaded file metadata (name, type, size)
custom-tools/{name}.json — custom tool schema + code preview
environment/
credentials.json — connected OAuth providers
api-keys.json — API key metadata (names, not values)
variables.json — env variable names (not values)
components/
blocks/{type}.json — block type schemas
integrations/{svc}/{op}.json — integration tool schemas
internal/
memories/WORKSPACE.md — workspace inventory (auto-injected)
memories/SESSION.md — current session state (auto-injected)
\`\`\`
**Tips**: Use \`glob("workflows/*/deployment.json")\` to see which workflows are deployed and how. Use \`grep("error", "workflows/")\` to find workflows with recent errors.
## Context System — CRITICAL
Two context files are auto-injected into your system prompt above. You MUST keep them up to date.
| File | Scope | Injected as |
|------|-------|-------------|
| **WORKSPACE.md** | Workspace (persists across chats) | \`## Workspace Context\` above |
| **SESSION.md** | This chat only | \`## Session Context\` above |
### WORKSPACE.md — You MUST keep this current
**On your FIRST turn**: if Workspace Context above shows "(none discovered yet)", scan the workspace immediately:
1. Run \`glob("workflows/*/meta.json")\`, \`glob("knowledgebases/*/meta.json")\`, \`glob("tables/*/meta.json")\`, \`glob("files/*/meta.json")\`, \`read("environment/credentials.json")\`
2. Write the results via \`context_write("WORKSPACE.md", content)\`
Do this silently as your first action — do NOT ask the user for permission.
**After ANY resource change** (create/edit/delete workflow, KB, table, credential): update WORKSPACE.md immediately.
### SESSION.md — You MUST update after every significant action
After completing any meaningful action (creating a workflow, making edits, deploying, making a decision), rewrite SESSION.md completely with the current state via \`context_write("SESSION.md", content)\`.
Always rewrite the entire file — never append. Keep the existing section structure.
### Reading context files
To read context files, use \`read("internal/memories/WORKSPACE.md")\` or \`read("internal/memories/SESSION.md")\`.
## Discovery-First Rule
**Before creating any new resource**, check what already exists:
1. Check Workspace Context above for existing resources
2. If unclear, run \`glob("workflows/*/meta.json")\` to verify
3. Only create if nothing matches the user's request
## Decision Flow
- User says something broke → **debug()** first, then plan() → edit()
- User wants to build/automate something → **plan()** → edit() → test()
- User wants to DO something NOW (send email, check calendar) → **superagent()**
- User wants to deploy → **deploy()**
- User asks about their workflows → **workflow()** or **info()**
- User needs OAuth → **auth()**
## Important
- **You work at the workspace level.** When a user mentions a workflow, check Session Context and Workspace Context first.
- **Always delegate complex work** to the appropriate subagent.
- **Debug first** when something doesn't work — don't guess.
- Be concise and results-focused.
- Think internally, speak to the user only when the task is complete or you need input.
`
}