Kb checkpoint

2026-04-06 03:00:16 -04:00 · 2026-02-26 14:59:56 -08:00
parent 7276136398
commit cc5e592c46
8 changed files with 264 additions and 147 deletions
--- a/apps/sim/app/api/mothership/chat/route.ts
+++ b/apps/sim/app/api/mothership/chat/route.ts
@@ -6,6 +6,7 @@ import { resolveOrCreateChat } from '@/lib/copilot/chat-lifecycle'
 import { buildCopilotRequestPayload } from '@/lib/copilot/chat-payload'
 import { createSSEStream, SSE_RESPONSE_HEADERS } from '@/lib/copilot/chat-streaming'
 import { createRequestTracker, createUnauthorizedResponse } from '@/lib/copilot/request-helpers'
+import { generateWorkspaceContext } from '@/lib/copilot/workspace-context'

 const logger = createLogger('MothershipChatAPI')

@@ -106,6 +107,8 @@ export async function POST(req: NextRequest) {
        : []
    }

+    const workspaceContext = await generateWorkspaceContext(workspaceId, authenticatedUserId)
+
    const requestPayload = await buildCopilotRequestPayload(
      {
        message,
@@ -117,6 +120,7 @@ export async function POST(req: NextRequest) {
        contexts: agentContexts,
        fileAttachments,
        chatId: actualChatId,
+        workspaceContext,
      },
      { selectedModel: '' }
    )
--- a/apps/sim/blocks/registry.ts
+++ b/apps/sim/blocks/registry.ts
@@ -142,6 +142,7 @@ import { StarterBlock } from '@/blocks/blocks/starter'
 import { StripeBlock } from '@/blocks/blocks/stripe'
 import { SttBlock, SttV2Block } from '@/blocks/blocks/stt'
 import { SupabaseBlock } from '@/blocks/blocks/supabase'
+import { TableBlock } from '@/blocks/blocks/table'
 import { TavilyBlock } from '@/blocks/blocks/tavily'
 import { TelegramBlock } from '@/blocks/blocks/telegram'
 import { TextractBlock, TextractV2Block } from '@/blocks/blocks/textract'
@@ -336,8 +337,7 @@ export const registry: Record<string, BlockConfig> = {
  stt: SttBlock,
  stt_v2: SttV2Block,
  supabase: SupabaseBlock,
-  // TODO: Uncomment when working on tables
-  // table: TableBlock,
+  table: TableBlock,
  tavily: TavilyBlock,
  telegram: TelegramBlock,
  textract: TextractBlock,
--- a/apps/sim/lib/copilot/chat-payload.ts
+++ b/apps/sim/lib/copilot/chat-payload.ts
@@ -22,6 +22,7 @@ export interface BuildPayloadParams {
  chatId?: string
  prefetch?: boolean
  implicitFeedback?: string
+  workspaceContext?: string
 }

 interface ToolSchema {
@@ -121,6 +122,7 @@ export async function buildCopilotRequestPayload(
    ...(processedFileContents.length > 0 ? { fileAttachments: processedFileContents } : {}),
    ...(integrationTools.length > 0 ? { integrationTools } : {}),
    ...(commands && commands.length > 0 ? { commands } : {}),
+    ...(params.workspaceContext ? { workspaceContext: params.workspaceContext } : {}),
    isHosted,
  }
 }
--- a/apps/sim/lib/copilot/prompts.ts
+++ b/apps/sim/lib/copilot/prompts.ts
@@ -1 +0,0 @@
-export const AGENT_MODE_SYSTEM_PROMPT = `You are a helpful AI assistant for Sim, a powerful workflow automation platform.`
--- a/apps/sim/lib/copilot/tools/server/knowledge/knowledge-base.ts
+++ b/apps/sim/lib/copilot/tools/server/knowledge/knowledge-base.ts
@@ -2,6 +2,10 @@ import { createLogger } from '@sim/logger'
 import type { BaseServerTool } from '@/lib/copilot/tools/server/base-tool'
 import type { KnowledgeBaseArgs, KnowledgeBaseResult } from '@/lib/copilot/tools/shared/schemas'
 import { generateSearchEmbedding } from '@/lib/knowledge/embeddings'
+import {
+  createSingleDocument,
+  processDocumentAsync,
+} from '@/lib/knowledge/documents/service'
 import {
  createKnowledgeBase,
  deleteKnowledgeBase,
@@ -16,6 +20,8 @@ import {
  getTagUsageStats,
  updateTagDefinition,
 } from '@/lib/knowledge/tags/service'
+import { StorageService } from '@/lib/uploads'
+import { listWorkspaceFiles } from '@/lib/uploads/contexts/workspace/workspace-file-manager'
 import { getQueryStrategy, handleVectorOnlySearch } from '@/app/api/knowledge/search/utils'

 const logger = createLogger('KnowledgeBaseServerTool')
@@ -197,6 +203,95 @@ export const knowledgeBaseServerTool: BaseServerTool<KnowledgeBaseArgs, Knowledg
          }
        }

+        case 'add_file': {
+          if (!args.knowledgeBaseId) {
+            return {
+              success: false,
+              message: 'Knowledge base ID is required for add_file operation',
+            }
+          }
+
+          if (!args.filePath) {
+            return {
+              success: false,
+              message: 'filePath is required (e.g. "files/report.pdf")',
+            }
+          }
+
+          const targetKb = await getKnowledgeBaseById(args.knowledgeBaseId)
+          if (!targetKb || !targetKb.workspaceId) {
+            return {
+              success: false,
+              message: `Knowledge base with ID "${args.knowledgeBaseId}" not found`,
+            }
+          }
+
+          const match = args.filePath.match(/^files\/(.+)$/)
+          const fileName = match ? match[1] : args.filePath
+          const kbWorkspaceId: string = targetKb.workspaceId
+          const files = await listWorkspaceFiles(kbWorkspaceId)
+          const fileRecord = files.find(
+            (f) => f.name === fileName || f.name.normalize('NFC') === fileName.normalize('NFC')
+          )
+
+          if (!fileRecord) {
+            return {
+              success: false,
+              message: `Workspace file not found: "${args.filePath}"`,
+            }
+          }
+
+          const presignedUrl = await StorageService.generatePresignedDownloadUrl(
+            fileRecord.key,
+            'workspace',
+            5 * 60
+          )
+
+          const requestId = crypto.randomUUID().slice(0, 8)
+          const doc = await createSingleDocument(
+            {
+              filename: fileRecord.name,
+              fileUrl: presignedUrl,
+              fileSize: fileRecord.size,
+              mimeType: fileRecord.type,
+            },
+            args.knowledgeBaseId,
+            requestId
+          )
+
+          processDocumentAsync(args.knowledgeBaseId, doc.id, {
+            filename: fileRecord.name,
+            fileUrl: presignedUrl,
+            fileSize: fileRecord.size,
+            mimeType: fileRecord.type,
+          }, {}).catch((err) => {
+            logger.error('Background document processing failed', {
+              documentId: doc.id,
+              error: err instanceof Error ? err.message : String(err),
+            })
+          })
+
+          logger.info('Workspace file added to knowledge base via copilot', {
+            knowledgeBaseId: args.knowledgeBaseId,
+            documentId: doc.id,
+            fileName: fileRecord.name,
+            userId: context.userId,
+          })
+
+          return {
+            success: true,
+            message: `File "${fileRecord.name}" added to knowledge base "${targetKb.name}". Processing started (chunking + embedding).`,
+            data: {
+              documentId: doc.id,
+              knowledgeBaseId: args.knowledgeBaseId,
+              knowledgeBaseName: targetKb.name,
+              filename: fileRecord.name,
+              fileSize: fileRecord.size,
+              mimeType: fileRecord.type,
+            },
+          }
+        }
+
        case 'update': {
          if (!args.knowledgeBaseId) {
            return {
@@ -449,7 +544,7 @@ export const knowledgeBaseServerTool: BaseServerTool<KnowledgeBaseArgs, Knowledg
        default:
          return {
            success: false,
-            message: `Unknown operation: ${operation}. Supported operations: create, list, get, query, update, delete, list_tags, create_tag, update_tag, delete_tag, get_tag_usage`,
+            message: `Unknown operation: ${operation}. Supported operations: create, get, query, add_file, update, delete, list_tags, create_tag, update_tag, delete_tag, get_tag_usage`,
          }
      }
    } catch (error) {
--- a/apps/sim/lib/copilot/tools/shared/schemas.ts
+++ b/apps/sim/lib/copilot/tools/shared/schemas.ts
@@ -27,6 +27,7 @@ export const KnowledgeBaseArgsSchema = z.object({
    'query',
    'update',
    'delete',
+    'add_file',
    'list_tags',
    'create_tag',
    'update_tag',
@@ -41,8 +42,10 @@ export const KnowledgeBaseArgsSchema = z.object({
      description: z.string().optional(),
      /** Workspace ID to associate with (required for create, optional for list) */
      workspaceId: z.string().optional(),
-      /** Knowledge base ID (required for get, query, list_tags, create_tag, get_tag_usage) */
+      /** Knowledge base ID (required for get, query, add_file, list_tags, create_tag, get_tag_usage) */
      knowledgeBaseId: z.string().optional(),
+      /** Workspace file path to add as a document (required for add_file). Example: "files/report.pdf" */
+      filePath: z.string().optional(),
      /** Search query text (required for query) */
      query: z.string().optional(),
      /** Number of results to return (optional for query, defaults to 5) */
--- a/apps/sim/lib/copilot/workspace-context.ts
+++ b/apps/sim/lib/copilot/workspace-context.ts
@@ -0,0 +1,156 @@
+import { db } from '@sim/db'
+import {
+  account,
+  knowledgeBase,
+  userTableDefinitions,
+  userTableRows,
+  workflow,
+  workspaceFiles,
+} from '@sim/db/schema'
+import { createLogger } from '@sim/logger'
+import { and, count, eq, isNull } from 'drizzle-orm'
+
+const logger = createLogger('WorkspaceContext')
+
+/**
+ * Generate WORKSPACE.md content from actual database state.
+ * This is injected into the system prompt — the LLM never writes it directly.
+ */
+export async function generateWorkspaceContext(
+  workspaceId: string,
+  userId: string
+): Promise<string> {
+  try {
+    const [workflows, kbs, tables, files, credentials] = await Promise.all([
+      db
+        .select({
+          id: workflow.id,
+          name: workflow.name,
+          description: workflow.description,
+          isDeployed: workflow.isDeployed,
+          lastRunAt: workflow.lastRunAt,
+        })
+        .from(workflow)
+        .where(eq(workflow.workspaceId, workspaceId)),
+
+      db
+        .select({
+          id: knowledgeBase.id,
+          name: knowledgeBase.name,
+          description: knowledgeBase.description,
+        })
+        .from(knowledgeBase)
+        .where(and(eq(knowledgeBase.workspaceId, workspaceId), isNull(knowledgeBase.deletedAt))),
+
+      db
+        .select({
+          id: userTableDefinitions.id,
+          name: userTableDefinitions.name,
+          description: userTableDefinitions.description,
+        })
+        .from(userTableDefinitions)
+        .where(eq(userTableDefinitions.workspaceId, workspaceId)),
+
+      db
+        .select({
+          id: workspaceFiles.id,
+          originalName: workspaceFiles.originalName,
+          contentType: workspaceFiles.contentType,
+          size: workspaceFiles.size,
+        })
+        .from(workspaceFiles)
+        .where(eq(workspaceFiles.workspaceId, workspaceId)),
+
+      db
+        .select({
+          providerId: account.providerId,
+          scope: account.scope,
+        })
+        .from(account)
+        .where(eq(account.userId, userId)),
+    ])
+
+    const sections: string[] = []
+
+    // Workflows
+    if (workflows.length > 0) {
+      const lines = workflows.map((wf) => {
+        const parts = [`- **${wf.name}** (${wf.id})`]
+        if (wf.description) parts.push(`  ${wf.description}`)
+        const flags: string[] = []
+        if (wf.isDeployed) flags.push('deployed')
+        if (wf.lastRunAt) flags.push(`last run: ${wf.lastRunAt.toISOString().split('T')[0]}`)
+        if (flags.length > 0) parts[0] += ` — ${flags.join(', ')}`
+        return parts.join('\n')
+      })
+      sections.push(`## Workflows\n${lines.join('\n')}`)
+    } else {
+      sections.push('## Workflows\n(none)')
+    }
+
+    // Knowledge Bases
+    if (kbs.length > 0) {
+      const lines = kbs.map((kb) => {
+        let line = `- **${kb.name}** (${kb.id})`
+        if (kb.description) line += ` — ${kb.description}`
+        return line
+      })
+      sections.push(`## Knowledge Bases\n${lines.join('\n')}`)
+    } else {
+      sections.push('## Knowledge Bases\n(none)')
+    }
+
+    // Tables (live row counts)
+    if (tables.length > 0) {
+      const rowCounts = await Promise.all(
+        tables.map(async (t) => {
+          const [row] = await db
+            .select({ count: count() })
+            .from(userTableRows)
+            .where(eq(userTableRows.tableId, t.id))
+          return row?.count ?? 0
+        })
+      )
+      const lines = tables.map((t, i) => {
+        let line = `- **${t.name}** (${t.id}) — ${rowCounts[i]} rows`
+        if (t.description) line += `, ${t.description}`
+        return line
+      })
+      sections.push(`## Tables\n${lines.join('\n')}`)
+    } else {
+      sections.push('## Tables\n(none)')
+    }
+
+    // Files
+    if (files.length > 0) {
+      const lines = files.map(
+        (f) => `- **${f.originalName}** (${f.contentType}, ${formatSize(f.size)})`
+      )
+      sections.push(`## Files\n${lines.join('\n')}`)
+    } else {
+      sections.push('## Files\n(none)')
+    }
+
+    // Credentials
+    if (credentials.length > 0) {
+      const providers = [...new Set(credentials.map((c) => c.providerId))]
+      sections.push(`## Credentials\nConnected: ${providers.join(', ')}`)
+    } else {
+      sections.push('## Credentials\n(none)')
+    }
+
+    return sections.join('\n\n')
+  } catch (err) {
+    logger.error('Failed to generate workspace context', {
+      workspaceId,
+      error: err instanceof Error ? err.message : String(err),
+    })
+    return '## Workflows\n(unavailable)\n\n## Knowledge Bases\n(unavailable)\n\n## Tables\n(unavailable)\n\n## Files\n(unavailable)\n\n## Credentials\n(unavailable)'
+  }
+}
+
+function formatSize(bytes: number): string {
+  if (bytes < 1024) return `${bytes}B`
+  if (bytes < 1024 * 1024) return `${(bytes / 1024).toFixed(1)}KB`
+  return `${(bytes / (1024 * 1024)).toFixed(1)}MB`
+}
--- a/apps/sim/lib/copilot/workspace-prompt.ts
+++ b/apps/sim/lib/copilot/workspace-prompt.ts
@@ -1,142 +0,0 @@
-/**
- * System prompt for workspace-level chat.
- *
- * Sent as `systemPrompt` in the Go request payload, which overrides the
- * default agent prompt (see copilot/internal/chat/service.go:300-303).
- *
- * Only references subagents available in agent mode (build and discovery
- * are excluded from agent mode tools in the Go backend).
- */
-export function getWorkspaceChatSystemPrompt(): string {
-  const currentDate = new Date().toISOString().split('T')[0]
-  return `# Sim Workspace Assistant
-
-Current Date: ${currentDate}
-
-You are the Sim workspace assistant — a helpful AI that manages an entire workspace of workflows. The user is chatting from the workspace level, not from within a specific workflow.
-
-## Your Role
-
-You help users with their workspace: answering questions, building and debugging workflows, managing integrations, and providing guidance. You delegate complex tasks to specialized subagents.
-
-## Platform Knowledge
-
-Sim is a workflow automation platform. Workflows are visual pipelines of blocks (Agent, Function, Condition, Router, API, etc.). Workflows can be triggered manually, via API, webhooks, or schedules. They can be deployed as APIs, Chat UIs, or MCP tools.
-
-## Subagents
-
-You have access to these specialized subagents. Call them by name to delegate tasks:
-
-| Subagent | Purpose | When to Use |
-|----------|---------|-------------|
-| **plan** | Gather info, create execution plans | Building new workflows, planning fixes |
-| **edit** | Execute plans, make workflow changes | ONLY after plan returns steps |
-| **debug** | Investigate errors, provide diagnosis | User reports something broken |
-| **test** | Run workflow, verify results | After edits to validate |
-| **deploy** | Deploy/undeploy workflows | Publish as API, Chat, or MCP |
-| **workflow** | Env vars, settings, list workflows | Configuration and workflow discovery |
-| **auth** | Connect OAuth integrations | Slack, Gmail, Google Sheets, etc. |
-| **knowledge** | Create/query knowledge bases | RAG, document search |
-| **research** | External API docs, best practices | Stripe, Twilio, etc. |
-| **info** | Block details, outputs, variables | Quick lookups about workflow state |
-| **superagent** | Interact with external services NOW | Read emails, send Slack, check calendar |
-
-## Direct Tools
-
- **search_online** — Search the web for information.
- **context_write(file_path, content)** — Write/update persistent context files (WORKSPACE.md, SESSION.md).
- **grep(pattern, path?)** — Search workspace VFS file contents.
- **glob(pattern)** — Find workspace VFS files by path pattern.
- **read(path)** — Read a workspace VFS file.
- **list(path)** — List workspace VFS directory entries.
- **create_workflow(name, description?)** — Create a new workflow.
- **update_workflow(workflowId, name?, description?)** — Update workflow name or description.
- **delete_workflow(workflowId)** — Delete a workflow.
- **rename_folder(folderId, name)** — Rename a folder.
- **delete_folder(folderId)** — Delete a folder (moves contents to parent).
-
-## Workspace Virtual Filesystem (VFS)
-
-Your workspace data is available as a virtual filesystem. Use grep/glob/read/list to explore it before taking action.
-
-\`\`\`
-workflows/{name}/
-  meta.json          — name, description, id, run stats
-  blocks.json        — workflow block graph (sanitized)
-  edges.json         — block connections
-  executions.json    — last 5 run results
-  deployment.json    — all deployment configs (api, chat, form, mcp, a2a)
-knowledgebases/{name}/
-  meta.json          — KB identity, embedding config, stats
-  documents.json     — document metadata
-files/{name}/
-  meta.json          — uploaded file metadata (name, type, size)
-custom-tools/{name}.json — custom tool schema + code preview
-environment/
-  credentials.json   — connected OAuth providers
-  api-keys.json      — API key metadata (names, not values)
-  variables.json     — env variable names (not values)
-components/
-  blocks/{type}.json        — block type schemas
-  integrations/{svc}/{op}.json — integration tool schemas
-internal/
-  memories/WORKSPACE.md     — workspace inventory (auto-injected)
-  memories/SESSION.md       — current session state (auto-injected)
-\`\`\`
-
-**Tips**: Use \`glob("workflows/*/deployment.json")\` to see which workflows are deployed and how. Use \`grep("error", "workflows/")\` to find workflows with recent errors.
-
-## Context System — CRITICAL
-
-Two context files are auto-injected into your system prompt above. You MUST keep them up to date.
-
-| File | Scope | Injected as |
-|------|-------|-------------|
-| **WORKSPACE.md** | Workspace (persists across chats) | \`## Workspace Context\` above |
-| **SESSION.md** | This chat only | \`## Session Context\` above |
-
-### WORKSPACE.md — You MUST keep this current
-
-**On your FIRST turn**: if Workspace Context above shows "(none discovered yet)", scan the workspace immediately:
-1. Run \`glob("workflows/*/meta.json")\`, \`glob("knowledgebases/*/meta.json")\`, \`glob("tables/*/meta.json")\`, \`glob("files/*/meta.json")\`, \`read("environment/credentials.json")\`
-2. Write the results via \`context_write("WORKSPACE.md", content)\`
-
-Do this silently as your first action — do NOT ask the user for permission.
-
-**After ANY resource change** (create/edit/delete workflow, KB, table, credential): update WORKSPACE.md immediately.
-
-### SESSION.md — You MUST update after every significant action
-
-After completing any meaningful action (creating a workflow, making edits, deploying, making a decision), rewrite SESSION.md completely with the current state via \`context_write("SESSION.md", content)\`.
-
-Always rewrite the entire file — never append. Keep the existing section structure.
-
-### Reading context files
-
-To read context files, use \`read("internal/memories/WORKSPACE.md")\` or \`read("internal/memories/SESSION.md")\`.
-
-## Discovery-First Rule
-
-**Before creating any new resource**, check what already exists:
-1. Check Workspace Context above for existing resources
-2. If unclear, run \`glob("workflows/*/meta.json")\` to verify
-3. Only create if nothing matches the user's request
-
-## Decision Flow
-
- User says something broke → **debug()** first, then plan() → edit()
- User wants to build/automate something → **plan()** → edit() → test()
- User wants to DO something NOW (send email, check calendar) → **superagent()**
- User wants to deploy → **deploy()**
- User asks about their workflows → **workflow()** or **info()**
- User needs OAuth → **auth()**
-
-## Important
-
- **You work at the workspace level.** When a user mentions a workflow, check Session Context and Workspace Context first.
- **Always delegate complex work** to the appropriate subagent.
- **Debug first** when something doesn't work — don't guess.
- Be concise and results-focused.
- Think internally, speak to the user only when the task is complete or you need input.
-`
-}
				`@@ -1 +0,0 @@`
				export const AGENT_MODE_SYSTEM_PROMPT = `You are a helpful AI assistant for Sim, a powerful workflow automation platform.`