imrpovement(kb): added client-side store, shared utils, background processing, retry with exponential backoff to knowledge base (#453)

* added background processing for file upload to knowledge base, added retry with exponential backoff, individual file retry, statuses

* added knowledge store

* added knowledge base selector and docs for knowledge block

* fixed build

* fix contributors page

* significantly improved error handling, typing, fault tolerance

* standardize file icon size
This commit is contained in:
Waleed Latif
2025-06-02 23:12:47 -07:00
committed by GitHub
parent 282345b983
commit 1eda44d605
39 changed files with 6884 additions and 1378 deletions

View File

@@ -0,0 +1,108 @@
---
title: Knowledge
description: Search knowledge
---
import { BlockInfoCard } from "@/components/ui/block-info-card"
<BlockInfoCard
type="knowledge"
color="#00B0B0"
icon={true}
iconSvg={`<svg className="block-icon"
xmlns='http://www.w3.org/2000/svg'
viewBox='0 0 24 24'
fill='none'
stroke='currentColor'
strokeWidth='1.5'
strokeLinecap='round'
strokeLinejoin='round'
>
<path d='M21 10V8a2 2 0 0 0-1-1.73l-7-4a2 2 0 0 0-2 0l-7 4A2 2 0 0 0 3 8v8a2 2 0 0 0 1 1.73l7 4a2 2 0 0 0 2 0l2-1.14' />
<path d='m7.5 4.27 9 5.15' />
<polyline points='3.29 7 12 12 20.71 7' />
<line x1='12' x2='12' y1='22' y2='12' />
<circle cx='18.5' cy='15.5' r='2.5' />
<path d='M20.27 17.27 22 19' />
</svg>`}
/>
{/* MANUAL-CONTENT-START:intro */}
Sim Studio's Knowledge Base is a powerful native feature that enables you to create, manage, and query custom knowledge bases directly within the platform. Using advanced AI embeddings and vector search technology, the Knowledge Base block allows you to build intelligent search capabilities into your workflows, making it easy to find and utilize relevant information across your organization.
The Knowledge Base system provides a comprehensive solution for managing organizational knowledge through its flexible and scalable architecture. With its built-in vector search capabilities, teams can perform semantic searches that understand meaning and context, going beyond traditional keyword matching.
Key features of the Knowledge Base include:
- Semantic Search: Advanced AI-powered search that understands meaning and context, not just keywords
- Vector Embeddings: Automatic conversion of text into high-dimensional vectors for intelligent similarity matching
- Custom Knowledge Bases: Create and manage multiple knowledge bases for different purposes or departments
- Flexible Content Types: Support for various document formats and content types
- Real-time Updates: Immediate indexing of new content for instant searchability
In Sim Studio, the Knowledge Base block enables your agents to perform intelligent semantic searches across your custom knowledge bases. This creates opportunities for automated information retrieval, content recommendations, and knowledge discovery as part of your AI workflows. The integration allows agents to search and retrieve relevant information programmatically, facilitating automated knowledge management tasks and ensuring that important information is easily accessible. By leveraging the Knowledge Base block, you can build intelligent agents that enhance information discovery while automating routine knowledge management tasks, improving team efficiency and ensuring consistent access to organizational knowledge.
{/* MANUAL-CONTENT-END */}
## Usage Instructions
Perform semantic vector search across your knowledge base to find the most relevant content. Uses advanced AI embeddings to understand meaning and context, returning the most similar documents to your search query.
## Tools
### `knowledge_search`
Search for similar content in a knowledge base using vector similarity
#### Input
| Parameter | Type | Required | Description |
| --------- | ---- | -------- | ----------- |
| `knowledgeBaseId` | string | Yes | ID of the knowledge base to search in |
| `query` | string | Yes | Search query text |
| `topK` | number | No | Number of most similar results to return \(1-100\) |
#### Output
| Parameter | Type |
| --------- | ---- |
| `results` | string |
| `query` | string |
| `knowledgeBaseId` | string |
| `topK` | string |
| `totalResults` | string |
| `message` | string |
## Block Configuration
### Input
| Parameter | Type | Required | Description |
| --------- | ---- | -------- | ----------- |
| `knowledgeBaseId` | string | Yes | Knowledge Base - Select knowledge base |
### Outputs
| Output | Type | Description |
| ------ | ---- | ----------- |
| `response` | object | Output from response |
| ↳ `results` | json | results of the response |
| ↳ `query` | string | query of the response |
| ↳ `knowledgeBaseId` | string | knowledgeBaseId of the response |
| ↳ `topK` | number | topK of the response |
| ↳ `totalResults` | number | totalResults of the response |
| ↳ `message` | string | message of the response |
## Notes
- Category: `blocks`
- Type: `knowledge`

View File

@@ -21,6 +21,7 @@
"image_generator",
"jina",
"jira",
"knowledge",
"linear",
"linkup",
"mem0",

View File

@@ -64,6 +64,9 @@ import { BlockInfoCard } from "@/components/ui/block-info-card"
<stop offset='0' stopColor='#5a62c3' />
<stop offset='.5' stopColor='#4d55bd' />
<stop offset='1' stopColor='#3940ab' />
<stop offset='0' stopColor='#5a62c3' />
<stop offset='.5' stopColor='#4d55bd' />
<stop offset='1' stopColor='#3940ab' />
</linearGradient>
<path
fill='url(#a)'

View File

@@ -1,6 +1,6 @@
---
title: Slack
description: Send a message to Slack
description: Send messages to Slack
---
import { BlockInfoCard } from "@/components/ui/block-info-card"
@@ -56,7 +56,7 @@ In Sim Studio, the Slack integration enables your agents to programmatically sen
## Usage Instructions
Send messages to any Slack channel using OAuth authentication. Integrate automated notifications and alerts into your workflow to keep your team informed.
Comprehensive Slack integration with OAuth authentication. Send formatted messages using Slack
@@ -64,15 +64,16 @@ Send messages to any Slack channel using OAuth authentication. Integrate automat
### `slack_message`
Send messages to Slack channels or users through the Slack API. Enables direct communication and notifications with timestamp tracking and channel confirmation.
Send messages to Slack channels or users through the Slack API. Supports Slack mrkdwn formatting.
#### Input
| Parameter | Type | Required | Description |
| --------- | ---- | -------- | ----------- |
| `apiKey` | string | Yes | Your Slack API token |
| `botToken` | string | No | Bot token for Custom Bot |
| `accessToken` | string | No | OAuth access token or bot token for Slack API |
| `channel` | string | Yes | Target Slack channel \(e.g., #general\) |
| `text` | string | Yes | Message text to send |
| `text` | string | Yes | Message text to send \(supports Slack mrkdwn formatting\) |
#### Output
@@ -89,7 +90,7 @@ Send messages to Slack channels or users through the Slack API. Enables direct c
| Parameter | Type | Required | Description |
| --------- | ---- | -------- | ----------- |
| `apiKey` | string | Yes | OAuth Token - Enter your Slack OAuth token |
| `operation` | string | Yes | Operation |

View File

@@ -433,7 +433,7 @@ export default function ContributorsPage() {
<ResponsiveContainer width='100%' height={300} className='sm:!h-[400px]'>
<BarChart
data={filteredContributors?.slice(0, showAllContributors ? undefined : 10)}
margin={{ top: 10, right: 5, bottom: 45, left: 5 }}
margin={{ top: 10, right: 10, bottom: 60, left: 10 }}
className='sm:!mx-2.5 sm:!mb-2.5'
>
<XAxis
@@ -448,11 +448,11 @@ export default function ContributorsPage() {
return (
<g transform={`translate(${x},${y})`}>
<foreignObject
x='-12'
y='6'
width='24'
height='24'
className='sm:!x-[-16] sm:!y-[8] sm:!w-[32] sm:!h-[32]'
x='-16'
y='8'
width='32'
height='32'
style={{ overflow: 'visible' }}
>
<Avatar className='h-6 w-6 ring-1 ring-[#606060]/30 sm:h-8 sm:w-8'>
<AvatarImage src={contributor?.avatar_url} />
@@ -464,8 +464,8 @@ export default function ContributorsPage() {
</g>
)
}}
height={50}
className='sm:!h-[60px] text-neutral-400'
height={60}
className='text-neutral-400'
/>
<YAxis
stroke='currentColor'

View File

@@ -46,7 +46,7 @@ export async function POST(request: NextRequest) {
Key: uniqueKey,
ContentType: contentType,
Metadata: {
originalName: fileName,
originalName: encodeURIComponent(fileName),
uploadedAt: new Date().toISOString(),
},
})

View File

@@ -1,14 +1,14 @@
import { and, eq, isNull } from 'drizzle-orm'
import { eq } from 'drizzle-orm'
import { type NextRequest, NextResponse } from 'next/server'
import { z } from 'zod'
import { getSession } from '@/lib/auth'
import { createLogger } from '@/lib/logs/console-logger'
import { db } from '@/db'
import { document, embedding, knowledgeBase } from '@/db/schema'
import { embedding } from '@/db/schema'
import { checkChunkAccess } from '../../../../../utils'
const logger = createLogger('ChunkByIdAPI')
// Schema for chunk updates
const UpdateChunkSchema = z.object({
content: z.string().min(1, 'Content is required').optional(),
enabled: z.boolean().optional(),
@@ -16,64 +16,6 @@ const UpdateChunkSchema = z.object({
qualityScore: z.number().min(0).max(1).optional(),
})
async function checkChunkAccess(
knowledgeBaseId: string,
documentId: string,
chunkId: string,
userId: string
) {
// First check knowledge base access
const kb = await db
.select({
id: knowledgeBase.id,
userId: knowledgeBase.userId,
})
.from(knowledgeBase)
.where(and(eq(knowledgeBase.id, knowledgeBaseId), isNull(knowledgeBase.deletedAt)))
.limit(1)
if (kb.length === 0) {
return { hasAccess: false, notFound: true, reason: 'Knowledge base not found' }
}
const kbData = kb[0]
// Check if user owns the knowledge base
if (kbData.userId !== userId) {
return { hasAccess: false, reason: 'Unauthorized knowledge base access' }
}
// Check if document exists and belongs to the knowledge base
const doc = await db
.select()
.from(document)
.where(
and(
eq(document.id, documentId),
eq(document.knowledgeBaseId, knowledgeBaseId),
isNull(document.deletedAt)
)
)
.limit(1)
if (doc.length === 0) {
return { hasAccess: false, notFound: true, reason: 'Document not found' }
}
// Check if chunk exists and belongs to the document
const chunk = await db
.select()
.from(embedding)
.where(and(eq(embedding.id, chunkId), eq(embedding.documentId, documentId)))
.limit(1)
if (chunk.length === 0) {
return { hasAccess: false, notFound: true, reason: 'Chunk not found' }
}
return { hasAccess: true, chunk: chunk[0], document: doc[0], knowledgeBase: kbData }
}
export async function GET(
req: NextRequest,
{ params }: { params: Promise<{ id: string; documentId: string; chunkId: string }> }
@@ -95,14 +37,13 @@ export async function GET(
session.user.id
)
if (accessCheck.notFound) {
logger.warn(
`[${requestId}] ${accessCheck.reason}: KB=${knowledgeBaseId}, Doc=${documentId}, Chunk=${chunkId}`
)
return NextResponse.json({ error: accessCheck.reason }, { status: 404 })
}
if (!accessCheck.hasAccess) {
if (accessCheck.notFound) {
logger.warn(
`[${requestId}] ${accessCheck.reason}: KB=${knowledgeBaseId}, Doc=${documentId}, Chunk=${chunkId}`
)
return NextResponse.json({ error: accessCheck.reason }, { status: 404 })
}
logger.warn(
`[${requestId}] User ${session.user.id} attempted unauthorized chunk access: ${accessCheck.reason}`
)
@@ -144,14 +85,13 @@ export async function PUT(
session.user.id
)
if (accessCheck.notFound) {
logger.warn(
`[${requestId}] ${accessCheck.reason}: KB=${knowledgeBaseId}, Doc=${documentId}, Chunk=${chunkId}`
)
return NextResponse.json({ error: accessCheck.reason }, { status: 404 })
}
if (!accessCheck.hasAccess) {
if (accessCheck.notFound) {
logger.warn(
`[${requestId}] ${accessCheck.reason}: KB=${knowledgeBaseId}, Doc=${documentId}, Chunk=${chunkId}`
)
return NextResponse.json({ error: accessCheck.reason }, { status: 404 })
}
logger.warn(
`[${requestId}] User ${session.user.id} attempted unauthorized chunk update: ${accessCheck.reason}`
)
@@ -235,14 +175,13 @@ export async function DELETE(
session.user.id
)
if (accessCheck.notFound) {
logger.warn(
`[${requestId}] ${accessCheck.reason}: KB=${knowledgeBaseId}, Doc=${documentId}, Chunk=${chunkId}`
)
return NextResponse.json({ error: accessCheck.reason }, { status: 404 })
}
if (!accessCheck.hasAccess) {
if (accessCheck.notFound) {
logger.warn(
`[${requestId}] ${accessCheck.reason}: KB=${knowledgeBaseId}, Doc=${documentId}, Chunk=${chunkId}`
)
return NextResponse.json({ error: accessCheck.reason }, { status: 404 })
}
logger.warn(
`[${requestId}] User ${session.user.id} attempted unauthorized chunk deletion: ${accessCheck.reason}`
)

View File

@@ -1,10 +1,11 @@
import { and, asc, eq, ilike, isNull, sql } from 'drizzle-orm'
import { and, asc, eq, ilike, sql } from 'drizzle-orm'
import { type NextRequest, NextResponse } from 'next/server'
import { z } from 'zod'
import { getSession } from '@/lib/auth'
import { createLogger } from '@/lib/logs/console-logger'
import { db } from '@/db'
import { document, embedding, knowledgeBase } from '@/db/schema'
import { embedding } from '@/db/schema'
import { checkDocumentAccess } from '../../../../utils'
const logger = createLogger('DocumentChunksAPI')
@@ -16,48 +17,6 @@ const GetChunksQuerySchema = z.object({
offset: z.coerce.number().min(0).optional().default(0),
})
async function checkDocumentAccess(knowledgeBaseId: string, documentId: string, userId: string) {
// First check knowledge base access
const kb = await db
.select({
id: knowledgeBase.id,
userId: knowledgeBase.userId,
})
.from(knowledgeBase)
.where(and(eq(knowledgeBase.id, knowledgeBaseId), isNull(knowledgeBase.deletedAt)))
.limit(1)
if (kb.length === 0) {
return { hasAccess: false, notFound: true, reason: 'Knowledge base not found' }
}
const kbData = kb[0]
// Check if user owns the knowledge base
if (kbData.userId !== userId) {
return { hasAccess: false, reason: 'Unauthorized knowledge base access' }
}
// Now check if document exists and belongs to the knowledge base
const doc = await db
.select()
.from(document)
.where(
and(
eq(document.id, documentId),
eq(document.knowledgeBaseId, knowledgeBaseId),
isNull(document.deletedAt)
)
)
.limit(1)
if (doc.length === 0) {
return { hasAccess: false, notFound: true, reason: 'Document not found' }
}
return { hasAccess: true, document: doc[0], knowledgeBase: kbData }
}
export async function GET(
req: NextRequest,
{ params }: { params: Promise<{ id: string; documentId: string }> }
@@ -74,18 +33,42 @@ export async function GET(
const accessCheck = await checkDocumentAccess(knowledgeBaseId, documentId, session.user.id)
if (accessCheck.notFound) {
logger.warn(`[${requestId}] ${accessCheck.reason}: KB=${knowledgeBaseId}, Doc=${documentId}`)
return NextResponse.json({ error: accessCheck.reason }, { status: 404 })
}
if (!accessCheck.hasAccess) {
if (accessCheck.notFound) {
logger.warn(
`[${requestId}] ${accessCheck.reason}: KB=${knowledgeBaseId}, Doc=${documentId}`
)
return NextResponse.json({ error: accessCheck.reason }, { status: 404 })
}
logger.warn(
`[${requestId}] User ${session.user.id} attempted unauthorized chunks access: ${accessCheck.reason}`
)
return NextResponse.json({ error: 'Unauthorized' }, { status: 401 })
}
// Check if document processing is completed
const doc = accessCheck.document
if (!doc) {
logger.warn(
`[${requestId}] Document data not available: KB=${knowledgeBaseId}, Doc=${documentId}`
)
return NextResponse.json({ error: 'Document not found' }, { status: 404 })
}
if (doc.processingStatus !== 'completed') {
logger.warn(
`[${requestId}] Document ${documentId} is not ready for chunk access (status: ${doc.processingStatus})`
)
return NextResponse.json(
{
error: 'Document is not ready for access',
details: `Document status: ${doc.processingStatus}`,
retryAfter: doc.processingStatus === 'processing' ? 5 : null,
},
{ status: 400 }
)
}
// Parse query parameters
const { searchParams } = new URL(req.url)
const queryParams = GetChunksQuerySchema.parse({

View File

@@ -0,0 +1,101 @@
import { eq } from 'drizzle-orm'
import { type NextRequest, NextResponse } from 'next/server'
import { getSession } from '@/lib/auth'
import { createLogger } from '@/lib/logs/console-logger'
import { db } from '@/db'
import { document, embedding } from '@/db/schema'
import { checkDocumentAccess, processDocumentAsync } from '../../../../utils'
const logger = createLogger('DocumentRetryAPI')
export async function POST(
req: NextRequest,
{ params }: { params: Promise<{ id: string; documentId: string }> }
) {
const requestId = crypto.randomUUID().slice(0, 8)
const { id: knowledgeBaseId, documentId } = await params
try {
const session = await getSession()
if (!session?.user?.id) {
logger.warn(`[${requestId}] Unauthorized document retry attempt`)
return NextResponse.json({ error: 'Unauthorized' }, { status: 401 })
}
const accessCheck = await checkDocumentAccess(knowledgeBaseId, documentId, session.user.id)
if (!accessCheck.hasAccess) {
if (accessCheck.notFound) {
logger.warn(
`[${requestId}] ${accessCheck.reason}: KB=${knowledgeBaseId}, Doc=${documentId}`
)
return NextResponse.json({ error: accessCheck.reason }, { status: 404 })
}
logger.warn(
`[${requestId}] User ${session.user.id} attempted unauthorized document retry: ${accessCheck.reason}`
)
return NextResponse.json({ error: 'Unauthorized' }, { status: 401 })
}
const doc = accessCheck.document
if (doc.processingStatus !== 'failed') {
logger.warn(
`[${requestId}] Document ${documentId} is not in failed state (current: ${doc.processingStatus})`
)
return NextResponse.json({ error: 'Document is not in failed state' }, { status: 400 })
}
await db.transaction(async (tx) => {
await tx.delete(embedding).where(eq(embedding.documentId, documentId))
await tx
.update(document)
.set({
processingStatus: 'pending',
processingStartedAt: null,
processingCompletedAt: null,
processingError: null,
chunkCount: 0,
tokenCount: 0,
characterCount: 0,
})
.where(eq(document.id, documentId))
})
const processingOptions = {
chunkSize: 1024,
minCharactersPerChunk: 24,
recipe: 'default',
lang: 'en',
}
const docData = {
filename: doc.filename,
fileUrl: doc.fileUrl,
fileSize: doc.fileSize,
mimeType: doc.mimeType,
fileHash: doc.fileHash,
}
processDocumentAsync(knowledgeBaseId, documentId, docData, processingOptions).catch(
(error: unknown) => {
logger.error(`[${requestId}] Background retry processing error:`, error)
}
)
logger.info(`[${requestId}] Document retry initiated: ${documentId}`)
return NextResponse.json({
success: true,
data: {
documentId,
status: 'pending',
message: 'Document retry processing started',
},
})
} catch (error) {
logger.error(`[${requestId}] Error retrying document processing`, error)
return NextResponse.json({ error: 'Failed to retry document processing' }, { status: 500 })
}
}

View File

@@ -1,14 +1,14 @@
import { and, eq, isNull } from 'drizzle-orm'
import { eq } from 'drizzle-orm'
import { type NextRequest, NextResponse } from 'next/server'
import { z } from 'zod'
import { getSession } from '@/lib/auth'
import { createLogger } from '@/lib/logs/console-logger'
import { db } from '@/db'
import { document, knowledgeBase } from '@/db/schema'
import { document } from '@/db/schema'
import { checkDocumentAccess } from '../../../utils'
const logger = createLogger('DocumentByIdAPI')
// Schema for document updates
const UpdateDocumentSchema = z.object({
filename: z.string().min(1, 'Filename is required').optional(),
enabled: z.boolean().optional(),
@@ -17,48 +17,6 @@ const UpdateDocumentSchema = z.object({
characterCount: z.number().min(0).optional(),
})
async function checkDocumentAccess(knowledgeBaseId: string, documentId: string, userId: string) {
// First check knowledge base access
const kb = await db
.select({
id: knowledgeBase.id,
userId: knowledgeBase.userId,
})
.from(knowledgeBase)
.where(and(eq(knowledgeBase.id, knowledgeBaseId), isNull(knowledgeBase.deletedAt)))
.limit(1)
if (kb.length === 0) {
return { hasAccess: false, notFound: true, reason: 'Knowledge base not found' }
}
const kbData = kb[0]
// Check if user owns the knowledge base
if (kbData.userId !== userId) {
return { hasAccess: false, reason: 'Unauthorized knowledge base access' }
}
// Now check if document exists and belongs to the knowledge base
const doc = await db
.select()
.from(document)
.where(
and(
eq(document.id, documentId),
eq(document.knowledgeBaseId, knowledgeBaseId),
isNull(document.deletedAt)
)
)
.limit(1)
if (doc.length === 0) {
return { hasAccess: false, notFound: true, reason: 'Document not found' }
}
return { hasAccess: true, document: doc[0], knowledgeBase: kbData }
}
export async function GET(
req: NextRequest,
{ params }: { params: Promise<{ id: string; documentId: string }> }
@@ -75,12 +33,13 @@ export async function GET(
const accessCheck = await checkDocumentAccess(knowledgeBaseId, documentId, session.user.id)
if (accessCheck.notFound) {
logger.warn(`[${requestId}] ${accessCheck.reason}: KB=${knowledgeBaseId}, Doc=${documentId}`)
return NextResponse.json({ error: accessCheck.reason }, { status: 404 })
}
if (!accessCheck.hasAccess) {
if (accessCheck.notFound) {
logger.warn(
`[${requestId}] ${accessCheck.reason}: KB=${knowledgeBaseId}, Doc=${documentId}`
)
return NextResponse.json({ error: accessCheck.reason }, { status: 404 })
}
logger.warn(
`[${requestId}] User ${session.user.id} attempted unauthorized document access: ${accessCheck.reason}`
)
@@ -117,12 +76,13 @@ export async function PUT(
const accessCheck = await checkDocumentAccess(knowledgeBaseId, documentId, session.user.id)
if (accessCheck.notFound) {
logger.warn(`[${requestId}] ${accessCheck.reason}: KB=${knowledgeBaseId}, Doc=${documentId}`)
return NextResponse.json({ error: accessCheck.reason }, { status: 404 })
}
if (!accessCheck.hasAccess) {
if (accessCheck.notFound) {
logger.warn(
`[${requestId}] ${accessCheck.reason}: KB=${knowledgeBaseId}, Doc=${documentId}`
)
return NextResponse.json({ error: accessCheck.reason }, { status: 404 })
}
logger.warn(
`[${requestId}] User ${session.user.id} attempted unauthorized document update: ${accessCheck.reason}`
)
@@ -164,6 +124,7 @@ export async function PUT(
if (validationError instanceof z.ZodError) {
logger.warn(`[${requestId}] Invalid document update data`, {
errors: validationError.errors,
documentId,
})
return NextResponse.json(
{ error: 'Invalid request data', details: validationError.errors },
@@ -173,7 +134,7 @@ export async function PUT(
throw validationError
}
} catch (error) {
logger.error(`[${requestId}] Error updating document`, error)
logger.error(`[${requestId}] Error updating document ${documentId}`, error)
return NextResponse.json({ error: 'Failed to update document' }, { status: 500 })
}
}
@@ -194,12 +155,13 @@ export async function DELETE(
const accessCheck = await checkDocumentAccess(knowledgeBaseId, documentId, session.user.id)
if (accessCheck.notFound) {
logger.warn(`[${requestId}] ${accessCheck.reason}: KB=${knowledgeBaseId}, Doc=${documentId}`)
return NextResponse.json({ error: accessCheck.reason }, { status: 404 })
}
if (!accessCheck.hasAccess) {
if (accessCheck.notFound) {
logger.warn(
`[${requestId}] ${accessCheck.reason}: KB=${knowledgeBaseId}, Doc=${documentId}`
)
return NextResponse.json({ error: accessCheck.reason }, { status: 404 })
}
logger.warn(
`[${requestId}] User ${session.user.id} attempted unauthorized document deletion: ${accessCheck.reason}`
)

View File

@@ -4,11 +4,11 @@ import { z } from 'zod'
import { getSession } from '@/lib/auth'
import { createLogger } from '@/lib/logs/console-logger'
import { db } from '@/db'
import { document, knowledgeBase } from '@/db/schema'
import { document } from '@/db/schema'
import { checkKnowledgeBaseAccess } from '../../utils'
const logger = createLogger('DocumentsAPI')
// Schema for document creation
const CreateDocumentSchema = z.object({
filename: z.string().min(1, 'Filename is required'),
fileUrl: z.string().url('File URL must be valid'),
@@ -17,30 +17,6 @@ const CreateDocumentSchema = z.object({
fileHash: z.string().optional(),
})
async function checkKnowledgeBaseAccess(knowledgeBaseId: string, userId: string) {
const kb = await db
.select({
id: knowledgeBase.id,
userId: knowledgeBase.userId,
})
.from(knowledgeBase)
.where(and(eq(knowledgeBase.id, knowledgeBaseId), isNull(knowledgeBase.deletedAt)))
.limit(1)
if (kb.length === 0) {
return { hasAccess: false, notFound: true }
}
const kbData = kb[0]
// Check if user owns the knowledge base
if (kbData.userId === userId) {
return { hasAccess: true, knowledgeBase: kbData }
}
return { hasAccess: false, knowledgeBase: kbData }
}
export async function GET(req: NextRequest, { params }: { params: Promise<{ id: string }> }) {
const requestId = crypto.randomUUID().slice(0, 8)
const { id: knowledgeBaseId } = await params
@@ -54,12 +30,11 @@ export async function GET(req: NextRequest, { params }: { params: Promise<{ id:
const accessCheck = await checkKnowledgeBaseAccess(knowledgeBaseId, session.user.id)
if (accessCheck.notFound) {
logger.warn(`[${requestId}] Knowledge base not found: ${knowledgeBaseId}`)
return NextResponse.json({ error: 'Knowledge base not found' }, { status: 404 })
}
if (!accessCheck.hasAccess) {
if ('notFound' in accessCheck && accessCheck.notFound) {
logger.warn(`[${requestId}] Knowledge base not found: ${knowledgeBaseId}`)
return NextResponse.json({ error: 'Knowledge base not found' }, { status: 404 })
}
logger.warn(
`[${requestId}] User ${session.user.id} attempted to access unauthorized knowledge base documents ${knowledgeBaseId}`
)
@@ -92,6 +67,10 @@ export async function GET(req: NextRequest, { params }: { params: Promise<{ id:
chunkCount: document.chunkCount,
tokenCount: document.tokenCount,
characterCount: document.characterCount,
processingStatus: document.processingStatus,
processingStartedAt: document.processingStartedAt,
processingCompletedAt: document.processingCompletedAt,
processingError: document.processingError,
enabled: document.enabled,
uploadedAt: document.uploadedAt,
})
@@ -126,12 +105,11 @@ export async function POST(req: NextRequest, { params }: { params: Promise<{ id:
const accessCheck = await checkKnowledgeBaseAccess(knowledgeBaseId, session.user.id)
if (accessCheck.notFound) {
logger.warn(`[${requestId}] Knowledge base not found: ${knowledgeBaseId}`)
return NextResponse.json({ error: 'Knowledge base not found' }, { status: 404 })
}
if (!accessCheck.hasAccess) {
if ('notFound' in accessCheck && accessCheck.notFound) {
logger.warn(`[${requestId}] Knowledge base not found: ${knowledgeBaseId}`)
return NextResponse.json({ error: 'Knowledge base not found' }, { status: 404 })
}
logger.warn(
`[${requestId}] User ${session.user.id} attempted to create document in unauthorized knowledge base ${knowledgeBaseId}`
)

View File

@@ -1,291 +1,173 @@
import { and, eq, isNull } from 'drizzle-orm'
import { eq } from 'drizzle-orm'
import { type NextRequest, NextResponse } from 'next/server'
import { z } from 'zod'
import { getSession } from '@/lib/auth'
import { type ProcessedDocument, processDocuments } from '@/lib/document-processor'
import { env } from '@/lib/env'
import { createLogger } from '@/lib/logs/console-logger'
import { db } from '@/db'
import { document, embedding, knowledgeBase } from '@/db/schema'
import { document } from '@/db/schema'
import { checkKnowledgeBaseAccess, processDocumentAsync } from '../../utils'
const logger = createLogger('ProcessDocumentsAPI')
// Schema for document processing request
const ProcessDocumentsSchema = z.object({
documents: z
.array(
z.object({
filename: z.string().min(1, 'Filename is required'),
fileUrl: z.string().url('File URL must be valid'),
fileSize: z.number().min(1, 'File size must be greater than 0'),
mimeType: z.string().min(1, 'MIME type is required'),
fileHash: z.string().optional(),
})
)
.min(1, 'At least one document is required'),
processingOptions: z
.object({
chunkSize: z.number().min(100).max(2048).default(512),
minCharactersPerChunk: z.number().min(10).max(1000).default(24),
recipe: z.string().default('default'),
lang: z.string().default('en'),
documents: z.array(
z.object({
filename: z.string().min(1, 'Filename is required'),
fileUrl: z.string().url('File URL must be valid'),
fileSize: z.number().min(1, 'File size must be greater than 0'),
mimeType: z.string().min(1, 'MIME type is required'),
fileHash: z.string().optional(),
})
.optional(),
),
processingOptions: z.object({
chunkSize: z.number(),
minCharactersPerChunk: z.number(),
recipe: z.string(),
lang: z.string(),
}),
})
async function checkKnowledgeBaseAccess(knowledgeBaseId: string, userId: string) {
const kb = await db
.select({
id: knowledgeBase.id,
userId: knowledgeBase.userId,
chunkingConfig: knowledgeBase.chunkingConfig,
})
.from(knowledgeBase)
.where(and(eq(knowledgeBase.id, knowledgeBaseId), isNull(knowledgeBase.deletedAt)))
.limit(1)
if (kb.length === 0) {
return { hasAccess: false, notFound: true }
}
const kbData = kb[0]
// Check if user owns the knowledge base
if (kbData.userId === userId) {
return { hasAccess: true, knowledgeBase: kbData }
}
return { hasAccess: false, knowledgeBase: kbData }
const PROCESSING_CONFIG = {
maxConcurrentDocuments: 3, // Limit concurrent processing to prevent resource exhaustion
batchSize: 5, // Process documents in batches
delayBetweenBatches: 1000, // 1 second delay between batches
delayBetweenDocuments: 500, // 500ms delay between individual documents in a batch
}
async function generateEmbeddings(
texts: string[],
embeddingModel = 'text-embedding-3-small'
): Promise<number[][]> {
const openaiApiKey = env.OPENAI_API_KEY
if (!openaiApiKey) {
throw new Error('OPENAI_API_KEY not configured')
}
try {
// Batch process embeddings for efficiency
const batchSize = 100 // OpenAI allows up to 2048 inputs per request
const allEmbeddings: number[][] = []
for (let i = 0; i < texts.length; i += batchSize) {
const batch = texts.slice(i, i + batchSize)
logger.info(
`Generating embeddings for batch ${Math.floor(i / batchSize) + 1}/${Math.ceil(texts.length / batchSize)} (${batch.length} texts)`
)
// Make direct API call to OpenAI embeddings
const response = await fetch('https://api.openai.com/v1/embeddings', {
method: 'POST',
headers: {
Authorization: `Bearer ${openaiApiKey}`,
'Content-Type': 'application/json',
},
body: JSON.stringify({
input: batch,
model: embeddingModel,
encoding_format: 'float',
}),
})
if (!response.ok) {
const errorText = await response.text()
throw new Error(
`OpenAI API error: ${response.status} ${response.statusText} - ${errorText}`
)
}
const data = await response.json()
if (!data.data || !Array.isArray(data.data)) {
throw new Error('Invalid response format from OpenAI embeddings API')
}
// Extract embeddings from response
const batchEmbeddings = data.data.map((item: any) => item.embedding)
allEmbeddings.push(...batchEmbeddings)
}
logger.info(`Successfully generated ${allEmbeddings.length} embeddings`)
return allEmbeddings
} catch (error) {
logger.error('Failed to generate embeddings:', error)
throw new Error(
`Embedding generation failed: ${error instanceof Error ? error.message : 'Unknown error'}`
)
}
}
async function saveProcessedDocuments(
knowledgeBaseId: string,
processedDocuments: ProcessedDocument[],
requestedDocuments: Array<{
/**
* Process documents with concurrency control and batching
*/
async function processDocumentsWithConcurrencyControl(
createdDocuments: Array<{
documentId: string
filename: string
fileUrl: string
fileSize: number
mimeType: string
fileHash?: string
}>
) {
const now = new Date()
const results: Array<{
}>,
knowledgeBaseId: string,
processingOptions: any,
requestId: string
): Promise<void> {
const totalDocuments = createdDocuments.length
const batches = []
// Create batches
for (let i = 0; i < totalDocuments; i += PROCESSING_CONFIG.batchSize) {
batches.push(createdDocuments.slice(i, i + PROCESSING_CONFIG.batchSize))
}
logger.info(`[${requestId}] Processing ${totalDocuments} documents in ${batches.length} batches`)
for (const [batchIndex, batch] of batches.entries()) {
logger.info(
`[${requestId}] Starting batch ${batchIndex + 1}/${batches.length} with ${batch.length} documents`
)
// Process batch with limited concurrency
await processBatchWithConcurrency(batch, knowledgeBaseId, processingOptions, requestId)
// Add delay between batches (except for the last batch)
if (batchIndex < batches.length - 1) {
await new Promise((resolve) => setTimeout(resolve, PROCESSING_CONFIG.delayBetweenBatches))
}
}
logger.info(`[${requestId}] Completed processing initiation for all ${totalDocuments} documents`)
}
/**
* Process a batch of documents with controlled concurrency
*/
async function processBatchWithConcurrency(
batch: Array<{
documentId: string
chunkCount: number
success: boolean
error?: string
}> = []
filename: string
fileUrl: string
fileSize: number
mimeType: string
fileHash?: string
}>,
knowledgeBaseId: string,
processingOptions: any,
requestId: string
): Promise<void> {
const semaphore = new Array(PROCESSING_CONFIG.maxConcurrentDocuments).fill(0)
const processingPromises = batch.map(async (doc, index) => {
// Add staggered delay to prevent overwhelming the system
if (index > 0) {
await new Promise((resolve) =>
setTimeout(resolve, index * PROCESSING_CONFIG.delayBetweenDocuments)
)
}
// Collect all chunk texts for batch embedding generation
const allChunkTexts: string[] = []
const chunkMapping: Array<{ docIndex: number; chunkIndex: number }> = []
processedDocuments.forEach((processed, docIndex) => {
processed.chunks.forEach((chunk, chunkIndex) => {
allChunkTexts.push(chunk.text)
chunkMapping.push({ docIndex, chunkIndex })
// Wait for available slot
await new Promise<void>((resolve) => {
const checkSlot = () => {
const availableIndex = semaphore.findIndex((slot) => slot === 0)
if (availableIndex !== -1) {
semaphore[availableIndex] = 1
resolve()
} else {
setTimeout(checkSlot, 100)
}
}
checkSlot()
})
try {
logger.info(`[${requestId}] Starting processing for document: ${doc.filename}`)
await processDocumentAsync(
knowledgeBaseId,
doc.documentId,
{
filename: doc.filename,
fileUrl: doc.fileUrl,
fileSize: doc.fileSize,
mimeType: doc.mimeType,
fileHash: doc.fileHash,
},
processingOptions
)
logger.info(`[${requestId}] Successfully initiated processing for document: ${doc.filename}`)
} catch (error: unknown) {
logger.error(`[${requestId}] Failed to process document: ${doc.filename}`, {
documentId: doc.documentId,
filename: doc.filename,
fileSize: doc.fileSize,
mimeType: doc.mimeType,
error: error instanceof Error ? error.message : 'Unknown error',
stack: error instanceof Error ? error.stack : undefined,
})
try {
await db
.update(document)
.set({
processingStatus: 'failed',
processingError:
error instanceof Error ? error.message : 'Failed to initiate processing',
processingCompletedAt: new Date(),
})
.where(eq(document.id, doc.documentId))
} catch (dbError: unknown) {
logger.error(
`[${requestId}] Failed to update document status for failed document: ${doc.documentId}`,
dbError
)
}
} finally {
const slotIndex = semaphore.findIndex((slot) => slot === 1)
if (slotIndex !== -1) {
semaphore[slotIndex] = 0
}
}
})
// Generate embeddings for all chunks at once
let allEmbeddings: number[][] = []
if (allChunkTexts.length > 0) {
try {
logger.info(
`Generating embeddings for ${allChunkTexts.length} chunks across ${processedDocuments.length} documents`
)
allEmbeddings = await generateEmbeddings(allChunkTexts, 'text-embedding-3-small')
logger.info(`Successfully generated ${allEmbeddings.length} embeddings`)
} catch (error) {
logger.error('Failed to generate embeddings for chunks:', error)
// Continue without embeddings rather than failing completely
allEmbeddings = []
}
}
for (let i = 0; i < processedDocuments.length; i++) {
const processed = processedDocuments[i]
const original = requestedDocuments.find((doc) => doc.filename === processed.metadata.filename)
if (!original) {
results.push({
documentId: '',
chunkCount: 0,
success: false,
error: `Original document data not found for ${processed.metadata.filename}`,
})
continue
}
try {
// Check for duplicate file hash if provided
if (original.fileHash) {
const existingDocument = await db
.select({ id: document.id })
.from(document)
.where(
and(
eq(document.knowledgeBaseId, knowledgeBaseId),
eq(document.fileHash, original.fileHash),
isNull(document.deletedAt)
)
)
.limit(1)
if (existingDocument.length > 0) {
results.push({
documentId: existingDocument[0].id,
chunkCount: 0,
success: false,
error: 'Document with this file hash already exists',
})
continue
}
}
// Insert document record
const documentId = crypto.randomUUID()
const newDocument = {
id: documentId,
knowledgeBaseId,
filename: original.filename,
fileUrl: processed.metadata.s3Url || original.fileUrl,
fileSize: original.fileSize,
mimeType: original.mimeType,
fileHash: original.fileHash || null,
chunkCount: processed.metadata.chunkCount,
tokenCount: processed.metadata.tokenCount,
characterCount: processed.metadata.characterCount,
enabled: true,
uploadedAt: now,
}
await db.insert(document).values(newDocument)
// Insert embedding records for chunks with generated embeddings
const embeddingRecords = processed.chunks.map((chunk, chunkIndex) => {
// Find the corresponding embedding for this chunk
const globalChunkIndex = chunkMapping.findIndex(
(mapping) => mapping.docIndex === i && mapping.chunkIndex === chunkIndex
)
const embedding =
globalChunkIndex >= 0 && globalChunkIndex < allEmbeddings.length
? allEmbeddings[globalChunkIndex]
: null
return {
id: crypto.randomUUID(),
knowledgeBaseId,
documentId,
chunkIndex: chunkIndex,
chunkHash: crypto.randomUUID(), // Generate a hash for the chunk
content: chunk.text,
contentLength: chunk.text.length,
tokenCount: Math.ceil(chunk.text.length / 4), // Rough token estimation
embedding: embedding, // Store the generated OpenAI embedding
embeddingModel: 'text-embedding-3-small',
startOffset: chunk.startIndex || 0,
endOffset: chunk.endIndex || chunk.text.length,
overlapTokens: 0,
metadata: {},
searchRank: '1.0',
accessCount: 0,
lastAccessedAt: null,
qualityScore: null,
createdAt: now,
updatedAt: now,
}
})
if (embeddingRecords.length > 0) {
await db.insert(embedding).values(embeddingRecords)
}
results.push({
documentId,
chunkCount: processed.metadata.chunkCount,
success: true,
})
logger.info(
`Document processed and saved: ${documentId} with ${processed.metadata.chunkCount} chunks and ${embeddingRecords.filter((r) => r.embedding).length} embeddings`
)
} catch (error) {
logger.error(`Failed to save processed document ${processed.metadata.filename}:`, error)
results.push({
documentId: '',
chunkCount: 0,
success: false,
error: error instanceof Error ? error.message : 'Unknown error during save',
})
}
}
return results
await Promise.allSettled(processingPromises)
}
export async function POST(req: NextRequest, { params }: { params: Promise<{ id: string }> }) {
@@ -301,12 +183,11 @@ export async function POST(req: NextRequest, { params }: { params: Promise<{ id:
const accessCheck = await checkKnowledgeBaseAccess(knowledgeBaseId, session.user.id)
if (accessCheck.notFound) {
logger.warn(`[${requestId}] Knowledge base not found: ${knowledgeBaseId}`)
return NextResponse.json({ error: 'Knowledge base not found' }, { status: 404 })
}
if (!accessCheck.hasAccess) {
if ('notFound' in accessCheck && accessCheck.notFound) {
logger.warn(`[${requestId}] Knowledge base not found: ${knowledgeBaseId}`)
return NextResponse.json({ error: 'Knowledge base not found' }, { status: 404 })
}
logger.warn(
`[${requestId}] User ${session.user.id} attempted to process documents in unauthorized knowledge base ${knowledgeBaseId}`
)
@@ -318,58 +199,67 @@ export async function POST(req: NextRequest, { params }: { params: Promise<{ id:
try {
const validatedData = ProcessDocumentsSchema.parse(body)
logger.info(
`[${requestId}] Starting processing of ${validatedData.documents.length} documents`
)
const createdDocuments = await db.transaction(async (tx) => {
const documentPromises = validatedData.documents.map(async (docData) => {
const documentId = crypto.randomUUID()
const now = new Date()
// Get chunking config from knowledge base or use defaults
const kbChunkingConfig = accessCheck.knowledgeBase?.chunkingConfig as any
const processingOptions = {
knowledgeBaseId,
chunkSize: validatedData.processingOptions?.chunkSize || kbChunkingConfig?.maxSize || 512,
minCharactersPerChunk:
validatedData.processingOptions?.minCharactersPerChunk || kbChunkingConfig?.minSize || 24,
recipe: validatedData.processingOptions?.recipe || 'default',
lang: validatedData.processingOptions?.lang || 'en',
}
const newDocument = {
id: documentId,
knowledgeBaseId,
filename: docData.filename,
fileUrl: docData.fileUrl,
fileSize: docData.fileSize,
mimeType: docData.mimeType,
fileHash: docData.fileHash || null,
chunkCount: 0,
tokenCount: 0,
characterCount: 0,
processingStatus: 'pending' as const,
enabled: true,
uploadedAt: now,
}
// Process documents (parsing + chunking)
const processedDocuments = await processDocuments(
validatedData.documents.map((doc) => ({
fileUrl: doc.fileUrl,
filename: doc.filename,
mimeType: doc.mimeType,
fileSize: doc.fileSize,
})),
processingOptions
)
await tx.insert(document).values(newDocument)
return { documentId, ...docData }
})
// Save processed documents and chunks to database
const saveResults = await saveProcessedDocuments(
knowledgeBaseId,
processedDocuments,
validatedData.documents
)
const successfulCount = saveResults.filter((r) => r.success).length
const totalChunks = saveResults.reduce((sum, r) => sum + r.chunkCount, 0)
return await Promise.all(documentPromises)
})
logger.info(
`[${requestId}] Document processing completed: ${successfulCount}/${validatedData.documents.length} documents, ${totalChunks} total chunks`
`[${requestId}] Starting controlled async processing of ${createdDocuments.length} documents`
)
processDocumentsWithConcurrencyControl(
createdDocuments,
knowledgeBaseId,
validatedData.processingOptions,
requestId
).catch((error: unknown) => {
logger.error(`[${requestId}] Critical error in document processing pipeline:`, error)
})
return NextResponse.json({
success: true,
data: {
processed: successfulCount,
total: validatedData.documents.length,
totalChunks,
results: saveResults,
total: createdDocuments.length,
documentsCreated: createdDocuments.map((doc) => ({
documentId: doc.documentId,
filename: doc.filename,
status: 'pending',
})),
processingMethod: 'background',
processingConfig: {
maxConcurrentDocuments: PROCESSING_CONFIG.maxConcurrentDocuments,
batchSize: PROCESSING_CONFIG.batchSize,
totalBatches: Math.ceil(createdDocuments.length / PROCESSING_CONFIG.batchSize),
},
},
})
} catch (validationError) {
if (validationError instanceof z.ZodError) {
logger.warn(`[${requestId}] Invalid document processing data`, {
logger.warn(`[${requestId}] Invalid processing request data`, {
errors: validationError.errors,
})
return NextResponse.json(
@@ -381,12 +271,6 @@ export async function POST(req: NextRequest, { params }: { params: Promise<{ id:
}
} catch (error) {
logger.error(`[${requestId}] Error processing documents`, error)
return NextResponse.json(
{
error: 'Failed to process documents',
details: error instanceof Error ? error.message : 'Unknown error',
},
{ status: 500 }
)
return NextResponse.json({ error: 'Failed to process documents' }, { status: 500 })
}
}

View File

@@ -64,6 +64,15 @@ export async function GET(req: NextRequest) {
.groupBy(knowledgeBase.id)
.orderBy(knowledgeBase.createdAt)
// Debug logging
logger.info(`[${requestId}] Knowledge bases with counts:`, {
data: knowledgeBasesWithCounts.map((kb) => ({
id: kb.id,
name: kb.name,
docCount: kb.docCount,
})),
})
logger.info(
`[${requestId}] Retrieved ${knowledgeBasesWithCounts.length} knowledge bases for user ${session.user.id}`
)
@@ -106,6 +115,7 @@ export async function POST(req: NextRequest) {
embeddingModel: validatedData.embeddingModel,
embeddingDimension: validatedData.embeddingDimension,
chunkingConfig: validatedData.chunkingConfig,
docCount: 0,
createdAt: now,
updatedAt: now,
}

View File

@@ -2,6 +2,7 @@ import { and, eq, isNull, sql } from 'drizzle-orm'
import { type NextRequest, NextResponse } from 'next/server'
import { z } from 'zod'
import { getSession } from '@/lib/auth'
import { retryWithExponentialBackoff } from '@/lib/documents/utils'
import { env } from '@/lib/env'
import { createLogger } from '@/lib/logs/console-logger'
import { db } from '@/db'
@@ -9,6 +10,16 @@ import { embedding, knowledgeBase } from '@/db/schema'
const logger = createLogger('VectorSearchAPI')
class APIError extends Error {
public status: number
constructor(message: string, status: number) {
super(message)
this.name = 'APIError'
this.status = status
}
}
// Schema for vector search request
const VectorSearchSchema = z.object({
knowledgeBaseId: z.string().min(1, 'Knowledge base ID is required'),
@@ -23,31 +34,45 @@ async function generateSearchEmbedding(query: string): Promise<number[]> {
}
try {
const response = await fetch('https://api.openai.com/v1/embeddings', {
method: 'POST',
headers: {
Authorization: `Bearer ${openaiApiKey}`,
'Content-Type': 'application/json',
return await retryWithExponentialBackoff(
async () => {
const response = await fetch('https://api.openai.com/v1/embeddings', {
method: 'POST',
headers: {
Authorization: `Bearer ${openaiApiKey}`,
'Content-Type': 'application/json',
},
body: JSON.stringify({
input: query,
model: 'text-embedding-3-small',
encoding_format: 'float',
}),
})
if (!response.ok) {
const errorText = await response.text()
const error = new APIError(
`OpenAI API error: ${response.status} ${response.statusText} - ${errorText}`,
response.status
)
throw error
}
const data = await response.json()
if (!data.data || !Array.isArray(data.data) || data.data.length === 0) {
throw new Error('Invalid response format from OpenAI embeddings API')
}
return data.data[0].embedding
},
body: JSON.stringify({
input: query,
model: 'text-embedding-3-small',
encoding_format: 'float',
}),
})
if (!response.ok) {
const errorText = await response.text()
throw new Error(`OpenAI API error: ${response.status} ${response.statusText} - ${errorText}`)
}
const data = await response.json()
if (!data.data || !Array.isArray(data.data) || data.data.length === 0) {
throw new Error('Invalid response format from OpenAI embeddings API')
}
return data.data[0].embedding
{
maxRetries: 5,
initialDelayMs: 1000,
maxDelayMs: 30000, // Max 30 seconds delay for search queries
backoffMultiplier: 2,
}
)
} catch (error) {
logger.error('Failed to generate search embedding:', error)
throw new Error(

View File

@@ -0,0 +1,487 @@
import crypto from 'crypto'
import { and, eq, isNull, sql } from 'drizzle-orm'
import { processDocuments } from '@/lib/documents/document-processor'
import { retryWithExponentialBackoff } from '@/lib/documents/utils'
import { env } from '@/lib/env'
import { createLogger } from '@/lib/logs/console-logger'
import { db } from '@/db'
import { document, embedding, knowledgeBase } from '@/db/schema'
const logger = createLogger('KnowledgeUtils')
class APIError extends Error {
public status: number
constructor(message: string, status: number) {
super(message)
this.name = 'APIError'
this.status = status
}
}
export interface KnowledgeBaseData {
id: string
userId: string
workspaceId?: string | null
name: string
description?: string | null
tokenCount: number
embeddingModel: string
embeddingDimension: number
chunkingConfig: unknown
deletedAt?: Date | null
createdAt: Date
updatedAt: Date
}
export interface DocumentData {
id: string
knowledgeBaseId: string
filename: string
fileUrl: string
fileSize: number
mimeType: string
fileHash?: string | null
chunkCount: number
tokenCount: number
characterCount: number
processingStatus: string
processingStartedAt?: Date | null
processingCompletedAt?: Date | null
processingError?: string | null
enabled: boolean
deletedAt?: Date | null
uploadedAt: Date
}
export interface EmbeddingData {
id: string
knowledgeBaseId: string
documentId: string
chunkIndex: number
chunkHash: string
content: string
contentLength: number
tokenCount: number
embedding?: number[] | null
embeddingModel: string
startOffset: number
endOffset: number
overlapTokens: number
metadata: unknown
searchRank?: string | null
accessCount: number
lastAccessedAt?: Date | null
qualityScore?: string | null
enabled: boolean
createdAt: Date
updatedAt: Date
}
interface OpenAIEmbeddingResponse {
data: Array<{
embedding: number[]
index: number
}>
model: string
usage: {
prompt_tokens: number
total_tokens: number
}
}
export interface KnowledgeBaseAccessResult {
hasAccess: true
knowledgeBase: Pick<KnowledgeBaseData, 'id' | 'userId'>
}
export interface KnowledgeBaseAccessDenied {
hasAccess: false
notFound?: boolean
reason?: string
}
export type KnowledgeBaseAccessCheck = KnowledgeBaseAccessResult | KnowledgeBaseAccessDenied
export interface DocumentAccessResult {
hasAccess: true
document: DocumentData
knowledgeBase: Pick<KnowledgeBaseData, 'id' | 'userId'>
}
export interface DocumentAccessDenied {
hasAccess: false
notFound?: boolean
reason: string
}
export type DocumentAccessCheck = DocumentAccessResult | DocumentAccessDenied
export interface ChunkAccessResult {
hasAccess: true
chunk: EmbeddingData
document: DocumentData
knowledgeBase: Pick<KnowledgeBaseData, 'id' | 'userId'>
}
export interface ChunkAccessDenied {
hasAccess: false
notFound?: boolean
reason: string
}
export type ChunkAccessCheck = ChunkAccessResult | ChunkAccessDenied
/**
* Check if a user has access to a knowledge base
*/
export async function checkKnowledgeBaseAccess(
knowledgeBaseId: string,
userId: string
): Promise<KnowledgeBaseAccessCheck> {
const kb = await db
.select({
id: knowledgeBase.id,
userId: knowledgeBase.userId,
})
.from(knowledgeBase)
.where(and(eq(knowledgeBase.id, knowledgeBaseId), isNull(knowledgeBase.deletedAt)))
.limit(1)
if (kb.length === 0) {
return { hasAccess: false, notFound: true }
}
const kbData = kb[0]
if (kbData.userId === userId) {
return { hasAccess: true, knowledgeBase: kbData }
}
return { hasAccess: false }
}
/**
* Check if a user has access to a document within a knowledge base
*/
export async function checkDocumentAccess(
knowledgeBaseId: string,
documentId: string,
userId: string
): Promise<DocumentAccessCheck> {
const kb = await db
.select({
id: knowledgeBase.id,
userId: knowledgeBase.userId,
})
.from(knowledgeBase)
.where(and(eq(knowledgeBase.id, knowledgeBaseId), isNull(knowledgeBase.deletedAt)))
.limit(1)
if (kb.length === 0) {
return { hasAccess: false, notFound: true, reason: 'Knowledge base not found' }
}
const kbData = kb[0]
if (kbData.userId !== userId) {
return { hasAccess: false, reason: 'Unauthorized knowledge base access' }
}
const doc = await db
.select()
.from(document)
.where(
and(
eq(document.id, documentId),
eq(document.knowledgeBaseId, knowledgeBaseId),
isNull(document.deletedAt)
)
)
.limit(1)
if (doc.length === 0) {
return { hasAccess: false, notFound: true, reason: 'Document not found' }
}
return { hasAccess: true, document: doc[0] as DocumentData, knowledgeBase: kbData }
}
/**
* Check if a user has access to a chunk within a document and knowledge base
*/
export async function checkChunkAccess(
knowledgeBaseId: string,
documentId: string,
chunkId: string,
userId: string
): Promise<ChunkAccessCheck> {
const kb = await db
.select({
id: knowledgeBase.id,
userId: knowledgeBase.userId,
})
.from(knowledgeBase)
.where(and(eq(knowledgeBase.id, knowledgeBaseId), isNull(knowledgeBase.deletedAt)))
.limit(1)
if (kb.length === 0) {
return { hasAccess: false, notFound: true, reason: 'Knowledge base not found' }
}
const kbData = kb[0]
if (kbData.userId !== userId) {
return { hasAccess: false, reason: 'Unauthorized knowledge base access' }
}
const doc = await db
.select()
.from(document)
.where(
and(
eq(document.id, documentId),
eq(document.knowledgeBaseId, knowledgeBaseId),
isNull(document.deletedAt)
)
)
.limit(1)
if (doc.length === 0) {
return { hasAccess: false, notFound: true, reason: 'Document not found' }
}
const docData = doc[0] as DocumentData
// Check if document processing is completed
if (docData.processingStatus !== 'completed') {
return {
hasAccess: false,
reason: `Document is not ready for access (status: ${docData.processingStatus})`,
}
}
const chunk = await db
.select()
.from(embedding)
.where(and(eq(embedding.id, chunkId), eq(embedding.documentId, documentId)))
.limit(1)
if (chunk.length === 0) {
return { hasAccess: false, notFound: true, reason: 'Chunk not found' }
}
return {
hasAccess: true,
chunk: chunk[0] as EmbeddingData,
document: docData,
knowledgeBase: kbData,
}
}
/**
* Generate embeddings using OpenAI API with retry logic for rate limiting
*/
export async function generateEmbeddings(
texts: string[],
embeddingModel = 'text-embedding-3-small'
): Promise<number[][]> {
const openaiApiKey = env.OPENAI_API_KEY
if (!openaiApiKey) {
throw new Error('OPENAI_API_KEY not configured')
}
try {
const batchSize = 100
const allEmbeddings: number[][] = []
for (let i = 0; i < texts.length; i += batchSize) {
const batch = texts.slice(i, i + batchSize)
logger.info(
`Generating embeddings for batch ${Math.floor(i / batchSize) + 1} (${batch.length} texts)`
)
const batchEmbeddings = await retryWithExponentialBackoff(
async () => {
const response = await fetch('https://api.openai.com/v1/embeddings', {
method: 'POST',
headers: {
Authorization: `Bearer ${openaiApiKey}`,
'Content-Type': 'application/json',
},
body: JSON.stringify({
input: batch,
model: embeddingModel,
encoding_format: 'float',
}),
})
if (!response.ok) {
const errorText = await response.text()
const error = new APIError(
`OpenAI API error: ${response.status} ${response.statusText} - ${errorText}`,
response.status
)
throw error
}
const data: OpenAIEmbeddingResponse = await response.json()
return data.data.map((item) => item.embedding)
},
{
maxRetries: 5,
initialDelayMs: 1000,
maxDelayMs: 60000, // Max 1 minute delay for embeddings
backoffMultiplier: 2,
}
)
allEmbeddings.push(...batchEmbeddings)
}
return allEmbeddings
} catch (error) {
logger.error('Failed to generate embeddings:', error)
throw error
}
}
/**
* Process a document asynchronously with full error handling
*/
export async function processDocumentAsync(
knowledgeBaseId: string,
documentId: string,
docData: {
filename: string
fileUrl: string
fileSize: number
mimeType: string
fileHash?: string | null
},
processingOptions: {
chunkSize?: number
minCharactersPerChunk?: number
recipe?: string
lang?: string
}
): Promise<void> {
const startTime = Date.now()
try {
logger.info(`[${documentId}] Starting document processing: ${docData.filename}`)
// Set status to processing
await db
.update(document)
.set({
processingStatus: 'processing',
processingStartedAt: new Date(),
processingError: null, // Clear any previous error
})
.where(eq(document.id, documentId))
logger.info(`[${documentId}] Status updated to 'processing', starting document processor`)
const processedDocuments = await processDocuments(
[
{
fileUrl: docData.fileUrl,
filename: docData.filename,
mimeType: docData.mimeType,
fileSize: docData.fileSize,
},
],
{
knowledgeBaseId,
...processingOptions,
}
)
if (processedDocuments.length === 0) {
throw new Error('No document was processed')
}
const processed = processedDocuments[0]
const now = new Date()
logger.info(
`[${documentId}] Document parsed successfully, generating embeddings for ${processed.chunks.length} chunks`
)
const chunkTexts = processed.chunks.map((chunk) => chunk.text)
const embeddings = chunkTexts.length > 0 ? await generateEmbeddings(chunkTexts) : []
logger.info(`[${documentId}] Embeddings generated, updating document record`)
await db
.update(document)
.set({
chunkCount: processed.metadata.chunkCount,
tokenCount: processed.metadata.tokenCount,
characterCount: processed.metadata.characterCount,
processingStatus: 'completed',
processingCompletedAt: now,
processingError: null,
})
.where(eq(document.id, documentId))
const embeddingRecords = processed.chunks.map((chunk, chunkIndex) => ({
id: crypto.randomUUID(),
knowledgeBaseId,
documentId,
chunkIndex,
chunkHash: crypto.createHash('sha256').update(chunk.text).digest('hex'),
content: chunk.text,
contentLength: chunk.text.length,
tokenCount: Math.ceil(chunk.text.length / 4),
embedding: embeddings[chunkIndex] || null,
embeddingModel: 'text-embedding-3-small',
startOffset: chunk.startIndex || 0,
endOffset: chunk.endIndex || chunk.text.length,
overlapTokens: 0,
metadata: {},
searchRank: '1.0',
accessCount: 0,
lastAccessedAt: null,
qualityScore: null,
createdAt: now,
updatedAt: now,
}))
if (embeddingRecords.length > 0) {
await db.insert(embedding).values(embeddingRecords)
}
await db
.update(knowledgeBase)
.set({
tokenCount: sql`${knowledgeBase.tokenCount} + ${processed.metadata.tokenCount}`,
updatedAt: now,
})
.where(eq(knowledgeBase.id, knowledgeBaseId))
const processingTime = Date.now() - startTime
logger.info(
`[${documentId}] Successfully processed document with ${processed.metadata.chunkCount} chunks in ${processingTime}ms`
)
} catch (error) {
const processingTime = Date.now() - startTime
logger.error(`[${documentId}] Failed to process document after ${processingTime}ms:`, {
error: error instanceof Error ? error.message : 'Unknown error',
stack: error instanceof Error ? error.stack : undefined,
filename: docData.filename,
fileUrl: docData.fileUrl,
mimeType: docData.mimeType,
})
await db
.update(document)
.set({
processingStatus: 'failed',
processingError: error instanceof Error ? error.message : 'Unknown error',
processingCompletedAt: new Date(),
})
.where(eq(document.id, documentId))
}
}

View File

@@ -44,7 +44,6 @@ export function SlackChannelSelector({
const fetchChannels = useCallback(async () => {
if (!credential) return
const controller = new AbortController()
setLoading(true)
setError(null)
@@ -53,7 +52,6 @@ export function SlackChannelSelector({
method: 'POST',
headers: { 'Content-Type': 'application/json' },
body: JSON.stringify({ credential }),
signal: controller.signal,
})
if (!res.ok) throw new Error(`HTTP error! status: ${res.status}`)

View File

@@ -0,0 +1,204 @@
import { useCallback, useEffect, useState } from 'react'
import { Check, ChevronDown, RefreshCw } from 'lucide-react'
import { PackageSearchIcon } from '@/components/icons'
import { Button } from '@/components/ui/button'
import {
Command,
CommandEmpty,
CommandGroup,
CommandInput,
CommandItem,
CommandList,
} from '@/components/ui/command'
import { Popover, PopoverContent, PopoverTrigger } from '@/components/ui/popover'
import { type KnowledgeBaseData, useKnowledgeStore } from '@/stores/knowledge/knowledge'
interface KnowledgeBaseSelectorProps {
value: string
onChange: (knowledgeBaseId: string, knowledgeBaseInfo?: KnowledgeBaseData) => void
label?: string
disabled?: boolean
isPreview?: boolean
previewValue?: string | null
}
export function KnowledgeBaseSelector({
value: propValue,
onChange,
label = 'Select knowledge base',
disabled = false,
isPreview = false,
previewValue,
}: KnowledgeBaseSelectorProps) {
const { getKnowledgeBasesList, knowledgeBasesList, loadingKnowledgeBasesList } =
useKnowledgeStore()
const [knowledgeBases, setKnowledgeBases] = useState<KnowledgeBaseData[]>([])
const [loading, setLoading] = useState(false)
const [error, setError] = useState<string | null>(null)
const [open, setOpen] = useState(false)
const [selectedKnowledgeBase, setSelectedKnowledgeBase] = useState<KnowledgeBaseData | null>(null)
const [initialFetchDone, setInitialFetchDone] = useState(false)
// Use preview value when in preview mode, otherwise use prop value
const value = isPreview ? previewValue : propValue
// Fetch knowledge bases
const fetchKnowledgeBases = useCallback(async () => {
setLoading(true)
setError(null)
try {
const data = await getKnowledgeBasesList()
setKnowledgeBases(data)
setInitialFetchDone(true)
} catch (err) {
if ((err as Error).name === 'AbortError') return
setError((err as Error).message)
setKnowledgeBases([])
} finally {
setLoading(false)
}
}, [getKnowledgeBasesList])
// Handle dropdown open/close - fetch knowledge bases when opening
const handleOpenChange = (isOpen: boolean) => {
if (isPreview) return
setOpen(isOpen)
// Only fetch knowledge bases when opening the dropdown if we haven't fetched yet
if (isOpen && (!initialFetchDone || knowledgeBasesList.length === 0)) {
fetchKnowledgeBases()
}
}
// Sync selected knowledge base with value prop
useEffect(() => {
if (value && knowledgeBases.length > 0) {
const kbInfo = knowledgeBases.find((kb) => kb.id === value)
if (kbInfo) {
setSelectedKnowledgeBase(kbInfo)
} else {
setSelectedKnowledgeBase(null)
}
} else if (!value) {
setSelectedKnowledgeBase(null)
}
}, [value, knowledgeBases])
// Use cached data if available
useEffect(() => {
if (knowledgeBasesList.length > 0 && !initialFetchDone) {
setKnowledgeBases(knowledgeBasesList)
setInitialFetchDone(true)
}
}, [knowledgeBasesList, initialFetchDone])
// If we have a value but no knowledge base info and haven't fetched yet, fetch
useEffect(() => {
if (value && !selectedKnowledgeBase && !loading && !initialFetchDone && !isPreview) {
fetchKnowledgeBases()
}
}, [value, selectedKnowledgeBase, loading, initialFetchDone, fetchKnowledgeBases, isPreview])
const handleSelectKnowledgeBase = (knowledgeBase: KnowledgeBaseData) => {
if (isPreview) return
setSelectedKnowledgeBase(knowledgeBase)
onChange(knowledgeBase.id, knowledgeBase)
setOpen(false)
}
const formatKnowledgeBaseName = (knowledgeBase: KnowledgeBaseData) => {
return knowledgeBase.name
}
const getKnowledgeBaseDescription = (knowledgeBase: KnowledgeBaseData) => {
const docCount = (knowledgeBase as any).docCount
if (docCount !== undefined) {
return `${docCount} document${docCount !== 1 ? 's' : ''}`
}
return knowledgeBase.description || 'No description'
}
return (
<Popover open={open} onOpenChange={handleOpenChange}>
<PopoverTrigger asChild>
<Button
variant='outline'
role='combobox'
aria-expanded={open}
className='relative w-full justify-between'
disabled={disabled || isPreview}
>
<div className='flex max-w-[calc(100%-20px)] items-center gap-2 overflow-hidden'>
<PackageSearchIcon className='h-4 w-4 text-[#00B0B0]' />
{selectedKnowledgeBase ? (
<span className='truncate font-normal'>
{formatKnowledgeBaseName(selectedKnowledgeBase)}
</span>
) : (
<span className='truncate text-muted-foreground'>{label}</span>
)}
</div>
<ChevronDown className='absolute right-3 h-4 w-4 shrink-0 opacity-50' />
</Button>
</PopoverTrigger>
<PopoverContent className='w-[300px] p-0' align='start'>
<Command>
<CommandInput placeholder='Search knowledge bases...' />
<CommandList>
<CommandEmpty>
{loading || loadingKnowledgeBasesList ? (
<div className='flex items-center justify-center p-4'>
<RefreshCw className='h-4 w-4 animate-spin' />
<span className='ml-2'>Loading knowledge bases...</span>
</div>
) : error ? (
<div className='p-4 text-center'>
<p className='text-destructive text-sm'>{error}</p>
</div>
) : (
<div className='p-4 text-center'>
<p className='font-medium text-sm'>No knowledge bases found</p>
<p className='text-muted-foreground text-xs'>
Create a knowledge base to get started.
</p>
</div>
)}
</CommandEmpty>
{knowledgeBases.length > 0 && (
<CommandGroup>
<div className='px-2 py-1.5 font-medium text-muted-foreground text-xs'>
Knowledge Bases
</div>
{knowledgeBases.map((knowledgeBase) => (
<CommandItem
key={knowledgeBase.id}
value={`kb-${knowledgeBase.id}-${knowledgeBase.name}`}
onSelect={() => handleSelectKnowledgeBase(knowledgeBase)}
className='cursor-pointer'
>
<div className='flex items-center gap-2 overflow-hidden'>
<PackageSearchIcon className='h-4 w-4 text-[#00B0B0]' />
<div className='min-w-0 flex-1 overflow-hidden'>
<div className='truncate font-normal'>
{formatKnowledgeBaseName(knowledgeBase)}
</div>
<div className='truncate text-muted-foreground text-xs'>
{getKnowledgeBaseDescription(knowledgeBase)}
</div>
</div>
</div>
{knowledgeBase.id === value && <Check className='ml-auto h-4 w-4' />}
</CommandItem>
))}
</CommandGroup>
)}
</CommandList>
</Command>
</PopoverContent>
</Popover>
)
}

View File

@@ -0,0 +1,65 @@
'use client'
import { useState } from 'react'
import { Tooltip, TooltipContent, TooltipProvider, TooltipTrigger } from '@/components/ui/tooltip'
import type { SubBlockConfig } from '@/blocks/types'
import type { KnowledgeBaseData } from '@/stores/knowledge/knowledge'
import { useSubBlockStore } from '@/stores/workflows/subblock/store'
import { KnowledgeBaseSelector } from './components/knowledge-base-selector'
interface KnowledgeBaseSelectorInputProps {
blockId: string
subBlock: SubBlockConfig
disabled?: boolean
onKnowledgeBaseSelect?: (knowledgeBaseId: string) => void
isPreview?: boolean
previewValue?: string | null
}
export function KnowledgeBaseSelectorInput({
blockId,
subBlock,
disabled = false,
onKnowledgeBaseSelect,
isPreview = false,
previewValue,
}: KnowledgeBaseSelectorInputProps) {
const { getValue, setValue } = useSubBlockStore()
const [knowledgeBaseInfo, setKnowledgeBaseInfo] = useState<KnowledgeBaseData | null>(null)
// Get the current value from the store
const storeValue = getValue(blockId, subBlock.id)
// Handle knowledge base selection
const handleKnowledgeBaseChange = (knowledgeBaseId: string, info?: KnowledgeBaseData) => {
setKnowledgeBaseInfo(info || null)
if (!isPreview) {
setValue(blockId, subBlock.id, knowledgeBaseId)
}
onKnowledgeBaseSelect?.(knowledgeBaseId)
}
return (
<TooltipProvider>
<Tooltip>
<TooltipTrigger asChild>
<div className='w-full'>
<KnowledgeBaseSelector
value={storeValue}
onChange={(knowledgeBaseId: string, knowledgeBaseInfo?: KnowledgeBaseData) => {
handleKnowledgeBaseChange(knowledgeBaseId, knowledgeBaseInfo)
}}
label={subBlock.placeholder || 'Select knowledge base'}
disabled={disabled}
isPreview={isPreview}
previewValue={previewValue}
/>
</div>
</TooltipTrigger>
<TooltipContent side='top'>
<p>Select a knowledge base to search</p>
</TooltipContent>
</Tooltip>
</TooltipProvider>
)
}

View File

@@ -4,6 +4,7 @@ import { Tooltip, TooltipContent, TooltipTrigger } from '@/components/ui/tooltip
import { getBlock } from '@/blocks/index'
import type { SubBlockConfig } from '@/blocks/types'
import { useWorkflowStore } from '@/stores/workflows/workflow/store'
import { ChannelSelectorInput } from './components/channel-selector/channel-selector-input'
import { CheckboxList } from './components/checkbox-list'
import { Code } from './components/code'
import { ConditionInput } from './components/condition-input'
@@ -14,6 +15,7 @@ import { EvalInput } from './components/eval-input'
import { FileSelectorInput } from './components/file-selector/file-selector-input'
import { FileUpload } from './components/file-upload'
import { FolderSelectorInput } from './components/folder-selector/components/folder-selector-input'
import { KnowledgeBaseSelectorInput } from './components/knowledge-base-selector/knowledge-base-selector-input'
import { LongInput } from './components/long-input'
import { ProjectSelectorInput } from './components/project-selector/project-selector-input'
import { ScheduleConfig } from './components/schedule/schedule-config'
@@ -309,6 +311,16 @@ export function SubBlock({
previewValue={previewValue}
/>
)
case 'knowledge-base-selector':
return (
<KnowledgeBaseSelectorInput
blockId={blockId}
subBlock={config}
disabled={isConnecting || isPreview}
isPreview={isPreview}
previewValue={previewValue}
/>
)
case 'input-format':
return (
<InputFormat
@@ -318,6 +330,16 @@ export function SubBlock({
previewValue={previewValue}
/>
)
case 'channel-selector':
return (
<ChannelSelectorInput
blockId={blockId}
subBlock={config}
disabled={isConnecting || isPreview}
isPreview={isPreview}
previewValue={previewValue}
/>
)
default:
return <div>Unknown input type: {config.type}</div>
}

View File

@@ -1,8 +1,8 @@
'use client'
import { LibraryBig, Search } from 'lucide-react'
import Link from 'next/link'
import { Search } from 'lucide-react'
import { useSidebarStore } from '@/stores/sidebar/store'
import { KnowledgeHeader } from '../../../components/knowledge-header/knowledge-header'
import { ChunkTableSkeleton } from '../../../components/skeletons/table-skeleton'
interface DocumentLoadingProps {
@@ -20,30 +20,29 @@ export function DocumentLoading({
const isSidebarCollapsed =
mode === 'expanded' ? !isExpanded : mode === 'collapsed' || mode === 'hover'
const breadcrumbs = [
{
id: 'knowledge-root',
label: 'Knowledge',
href: '/w/knowledge',
},
{
id: `knowledge-base-${knowledgeBaseId}`,
label: knowledgeBaseName,
href: `/w/knowledge/${knowledgeBaseId}`,
},
{
id: `document-${knowledgeBaseId}-${documentName}`,
label: documentName,
},
]
return (
<div
className={`flex h-[100vh] flex-col transition-padding duration-200 ${isSidebarCollapsed ? 'pl-14' : 'pl-60'}`}
>
{/* Fixed Header with Breadcrumbs */}
<div className='flex items-center gap-2 px-6 pt-[14px] pb-6'>
<Link
href='/w/knowledge'
prefetch={true}
className='group flex items-center gap-2 font-medium text-sm transition-colors hover:text-muted-foreground'
>
<LibraryBig className='h-[18px] w-[18px] text-muted-foreground transition-colors group-hover:text-muted-foreground/70' />
<span>Knowledge</span>
</Link>
<span className='text-muted-foreground'>/</span>
<Link
href={`/w/knowledge/${knowledgeBaseId}`}
className='font-medium text-sm transition-colors hover:text-muted-foreground'
>
{knowledgeBaseName}
</Link>
<span className='text-muted-foreground'>/</span>
<span className='font-medium text-sm'>{documentName}</span>
</div>
{/* Header with Breadcrumbs */}
<KnowledgeHeader breadcrumbs={breadcrumbs} />
<div className='flex flex-1 overflow-hidden'>
<div className='flex flex-1 flex-col overflow-hidden'>

View File

@@ -2,10 +2,24 @@
import { useEffect, useState } from 'react'
import { X } from 'lucide-react'
import {
AlertDialog,
AlertDialogAction,
AlertDialogCancel,
AlertDialogContent,
AlertDialogDescription,
AlertDialogFooter,
AlertDialogHeader,
AlertDialogTitle,
} from '@/components/ui/alert-dialog'
import { Button } from '@/components/ui/button'
import { Dialog, DialogContent, DialogHeader, DialogTitle } from '@/components/ui/dialog'
import { Label } from '@/components/ui/label'
import { Textarea } from '@/components/ui/textarea'
import { createLogger } from '@/lib/logs/console-logger'
import type { DocumentData } from '@/stores/knowledge/knowledge'
const logger = createLogger('EditChunkModal')
interface ChunkData {
id: string
@@ -24,21 +38,6 @@ interface ChunkData {
updatedAt: string
}
interface DocumentData {
id: string
knowledgeBaseId: string
filename: string
fileUrl: string
fileSize: number
mimeType: string
fileHash?: string
chunkCount: number
tokenCount: number
characterCount: number
enabled: boolean
uploadedAt: string
}
interface EditChunkModalProps {
chunk: ChunkData | null
document: DocumentData | null
@@ -56,8 +55,12 @@ export function EditChunkModal({
onClose,
onChunkUpdate,
}: EditChunkModalProps) {
const [editedContent, setEditedContent] = useState('')
const [editedContent, setEditedContent] = useState(chunk?.content || '')
const [isSaving, setIsSaving] = useState(false)
const [showUnsavedChangesAlert, setShowUnsavedChangesAlert] = useState(false)
// Check if there are unsaved changes
const hasUnsavedChanges = editedContent !== (chunk?.content || '')
// Update edited content when chunk changes
useEffect(() => {
@@ -93,75 +96,160 @@ export function EditChunkModal({
if (result.success && onChunkUpdate) {
onChunkUpdate(result.data)
onClose()
handleCloseModal()
}
} catch (error) {
console.error('Error updating chunk:', error)
logger.error('Error updating chunk:', error)
} finally {
setIsSaving(false)
}
}
const handleCancel = () => {
setEditedContent(chunk?.content || '')
const handleCloseModal = () => {
onClose()
setEditedContent('')
}
const handleCloseAttempt = () => {
if (hasUnsavedChanges && !isSaving) {
setShowUnsavedChangesAlert(true)
} else {
handleCloseModal()
}
}
const handleCancel = () => {
if (hasUnsavedChanges) {
setShowUnsavedChangesAlert(true)
} else {
handleCloseModal()
}
}
const handleConfirmDiscard = () => {
setShowUnsavedChangesAlert(false)
handleCloseModal()
}
const handleKeepEditing = () => {
setShowUnsavedChangesAlert(false)
}
if (!chunk || !document) return null
return (
<Dialog open={isOpen} onOpenChange={onClose}>
<DialogContent
className='flex h-[74vh] flex-col gap-0 overflow-hidden p-0 sm:max-w-[600px]'
hideCloseButton
>
<DialogHeader className='flex-shrink-0 border-b px-6 py-4'>
<div className='flex items-center justify-between'>
<DialogTitle className='font-medium text-lg'>Edit Chunk Content</DialogTitle>
<Button variant='ghost' size='icon' className='h-8 w-8 p-0' onClick={onClose}>
<X className='h-4 w-4' />
<span className='sr-only'>Close</span>
</Button>
</div>
</DialogHeader>
<>
<Dialog open={isOpen} onOpenChange={handleCloseAttempt}>
<DialogContent
className='flex h-[80vh] max-h-[900px] w-[95vw] max-w-4xl flex-col gap-0 overflow-hidden p-0'
hideCloseButton
>
<DialogHeader className='flex-shrink-0 border-b bg-background/95 px-8 py-6 backdrop-blur supports-[backdrop-filter]:bg-background/80'>
<div className='flex items-center justify-between'>
<div className='space-y-1'>
<DialogTitle className='font-semibold text-xl tracking-tight'>
Edit Chunk Content
</DialogTitle>
<p className='text-muted-foreground text-sm'>
Modify the content of this knowledge chunk
</p>
</div>
<Button
variant='ghost'
size='icon'
className='h-9 w-9 rounded-full transition-colors hover:bg-muted/50'
onClick={handleCloseAttempt}
>
<X className='h-4 w-4' />
<span className='sr-only'>Close</span>
</Button>
</div>
</DialogHeader>
<div className='flex flex-1 flex-col overflow-hidden'>
<form className='flex h-full flex-col'>
{/* Scrollable Content */}
<div className='scrollbar-thin scrollbar-thumb-muted-foreground/20 hover:scrollbar-thumb-muted-foreground/25 scrollbar-track-transparent min-h-0 flex-1 overflow-y-auto px-6'>
<div className='py-4'>
<div className='space-y-2'>
<Label htmlFor='content'>Content</Label>
<Textarea
id='content'
value={editedContent}
onChange={(e) => setEditedContent(e.target.value)}
placeholder='Enter chunk content...'
className='min-h-[400px] resize-none'
/>
<div className='flex flex-1 flex-col overflow-hidden'>
<form className='flex h-full flex-col'>
{/* Scrollable Content */}
<div className='scrollbar-thin scrollbar-thumb-muted-foreground/20 hover:scrollbar-thumb-muted-foreground/30 scrollbar-track-transparent min-h-0 flex-1 overflow-y-auto px-8'>
<div className='py-6'>
<div className='space-y-3'>
<div className='flex items-center justify-between'>
<Label htmlFor='content' className='font-medium text-sm'>
Content
</Label>
<div className='flex items-center gap-4 text-muted-foreground text-xs'>
<span>Characters: {editedContent.length}</span>
<span></span>
<span>Tokens: ~{Math.ceil(editedContent.length / 4)}</span>
</div>
</div>
<Textarea
id='content'
value={editedContent}
onChange={(e) => setEditedContent(e.target.value)}
placeholder='Enter chunk content...'
className='min-h-[500px] resize-none border-input/50 text-sm leading-relaxed focus:border-primary/50 focus:ring-2 focus:ring-primary/10'
/>
</div>
</div>
</div>
</div>
{/* Fixed Footer */}
<div className='mt-auto border-t px-6 pt-4 pb-6'>
<div className='flex justify-between'>
<Button variant='outline' onClick={handleCancel} type='button' disabled={isSaving}>
Cancel
</Button>
<Button
type='button'
onClick={handleSaveContent}
disabled={isSaving || editedContent === chunk.content}
className='bg-[#701FFC] font-[480] text-primary-foreground shadow-[0_0_0_0_#701FFC] transition-all duration-200 hover:bg-[#6518E6] hover:shadow-[0_0_0_4px_rgba(127,47,255,0.15)]'
>
{isSaving ? 'Saving...' : 'Save'}
</Button>
{/* Footer */}
<div className='flex-shrink-0 border-t bg-background/95 px-8 py-6 backdrop-blur supports-[backdrop-filter]:bg-background/80'>
<div className='flex items-center justify-between'>
<div className='text-muted-foreground text-xs'>
{hasUnsavedChanges && (
<span className='flex items-center gap-1 text-amber-600'>
<div className='h-1.5 w-1.5 rounded-full bg-amber-500' />
Unsaved changes
</span>
)}
</div>
<div className='flex items-center gap-3'>
<Button
variant='outline'
onClick={handleCancel}
type='button'
disabled={isSaving}
className='px-6'
>
Cancel
</Button>
<Button
type='button'
onClick={handleSaveContent}
disabled={isSaving || !hasUnsavedChanges}
className='bg-[#701FFC] px-8 font-medium text-white shadow-lg transition-all duration-200 hover:bg-[#6518E6] hover:shadow-[#701FFC]/25 hover:shadow-xl disabled:opacity-50 disabled:shadow-none'
>
{isSaving ? 'Saving...' : 'Save Changes'}
</Button>
</div>
</div>
</div>
</div>
</form>
</div>
</DialogContent>
</Dialog>
</form>
</div>
</DialogContent>
</Dialog>
<AlertDialog open={showUnsavedChangesAlert} onOpenChange={setShowUnsavedChangesAlert}>
<AlertDialogContent>
<AlertDialogHeader>
<AlertDialogTitle>Unsaved Changes</AlertDialogTitle>
<AlertDialogDescription>
You have unsaved changes to this chunk content. Are you sure you want to discard your
changes and close the editor?
</AlertDialogDescription>
</AlertDialogHeader>
<AlertDialogFooter>
<AlertDialogCancel onClick={handleKeepEditing}>Keep Editing</AlertDialogCancel>
<AlertDialogAction
onClick={handleConfirmDiscard}
className='bg-destructive text-destructive-foreground hover:bg-destructive/90'
>
Discard Changes
</AlertDialogAction>
</AlertDialogFooter>
</AlertDialogContent>
</AlertDialog>
</>
)
}

View File

@@ -1,74 +1,33 @@
'use client'
import { useCallback, useEffect, useState } from 'react'
import { Circle, CircleOff, FileText, LibraryBig, Search, Trash2, X } from 'lucide-react'
import Link from 'next/link'
import { useEffect, useState } from 'react'
import { Circle, CircleOff, FileText, Search, Trash2, X } from 'lucide-react'
import { Button } from '@/components/ui/button'
import { Checkbox } from '@/components/ui/checkbox'
import { Tooltip, TooltipContent, TooltipTrigger } from '@/components/ui/tooltip'
import { createLogger } from '@/lib/logs/console-logger'
import { useDocumentChunks } from '@/hooks/use-knowledge'
import { type ChunkData, type DocumentData, useKnowledgeStore } from '@/stores/knowledge/knowledge'
import { useSidebarStore } from '@/stores/sidebar/store'
import { KnowledgeHeader } from '../../components/knowledge-header/knowledge-header'
import { DocumentLoading } from './components/document-loading'
import { EditChunkModal } from './components/edit-chunk-modal'
const logger = createLogger('Document')
interface DocumentProps {
knowledgeBaseId: string
documentId: string
knowledgeBaseName: string
documentName: string
knowledgeBaseName?: string
documentName?: string
}
interface DocumentData {
id: string
knowledgeBaseId: string
filename: string
fileUrl: string
fileSize: number
mimeType: string
fileHash?: string
chunkCount: number
tokenCount: number
characterCount: number
enabled: boolean
uploadedAt: string
}
interface ChunkData {
id: string
chunkIndex: number
content: string
contentLength: number
tokenCount: number
enabled: boolean
startOffset: number
endOffset: number
overlapTokens: number
metadata: any
searchRank: string
qualityScore: string | null
createdAt: string
updatedAt: string
}
interface ChunksResponse {
success: boolean
data: ChunkData[]
error?: string
pagination: {
total: number
limit: number
offset: number
hasMore: boolean
}
}
// Helper function to get status badge styles
function getStatusBadgeStyles(enabled: boolean) {
return enabled
? 'bg-green-100 dark:bg-green-950/40 text-green-700 dark:text-green-400'
: 'bg-gray-100 dark:bg-gray-800 text-gray-700 dark:text-gray-400'
? 'inline-flex items-center rounded-md bg-emerald-100 px-2 py-1 text-xs font-medium text-emerald-700 dark:bg-emerald-900/30 dark:text-emerald-400'
: 'inline-flex items-center rounded-md bg-orange-100 px-2 py-1 text-xs font-medium text-orange-700 dark:bg-orange-900/30 dark:text-orange-400'
}
// Helper function to truncate content for display
function truncateContent(content: string, maxLength = 150): string {
if (content.length <= maxLength) return content
return `${content.substring(0, maxLength)}...`
@@ -81,6 +40,8 @@ export function Document({
documentName,
}: DocumentProps) {
const { mode, isExpanded } = useSidebarStore()
const { getCachedKnowledgeBase, getCachedDocuments } = useKnowledgeStore()
const isSidebarCollapsed =
mode === 'expanded' ? !isExpanded : mode === 'collapsed' || mode === 'hover'
@@ -90,24 +51,39 @@ export function Document({
const [isModalOpen, setIsModalOpen] = useState(false)
const [document, setDocument] = useState<DocumentData | null>(null)
const [chunks, setChunks] = useState<ChunkData[]>([])
const [isLoadingDocument, setIsLoadingDocument] = useState(true)
const [isLoadingChunks, setIsLoadingChunks] = useState(true)
const [error, setError] = useState<string | null>(null)
const [pagination, setPagination] = useState({
total: 0,
limit: 50,
offset: 0,
hasMore: false,
})
// Fetch document data
// Use the new chunks hook
const {
chunks,
isLoading: isLoadingChunks,
error: chunksError,
refreshChunks,
updateChunk,
} = useDocumentChunks(knowledgeBaseId, documentId)
// Combine errors
const combinedError = error || chunksError
// Try to get document from store cache first, then fetch if needed
useEffect(() => {
const fetchDocument = async () => {
try {
setIsLoadingDocument(true)
setError(null)
// First try to get from cached documents in the store
const cachedDocuments = getCachedDocuments(knowledgeBaseId)
const cachedDoc = cachedDocuments?.find((d) => d.id === documentId)
if (cachedDoc) {
setDocument(cachedDoc)
setIsLoadingDocument(false)
return
}
// If not in cache, fetch from API
const response = await fetch(`/api/knowledge/${knowledgeBaseId}/documents/${documentId}`)
if (!response.ok) {
@@ -125,7 +101,7 @@ export function Document({
throw new Error(result.error || 'Failed to fetch document')
}
} catch (err) {
console.error('Error fetching document:', err)
logger.error('Error fetching document:', err)
setError(err instanceof Error ? err.message : 'An error occurred')
} finally {
setIsLoadingDocument(false)
@@ -135,57 +111,17 @@ export function Document({
if (knowledgeBaseId && documentId) {
fetchDocument()
}
}, [knowledgeBaseId, documentId])
}, [knowledgeBaseId, documentId, getCachedDocuments])
// Fetch chunks data
const fetchChunks = useCallback(
async (search?: string, offset = 0) => {
try {
setIsLoadingChunks(true)
const knowledgeBase = getCachedKnowledgeBase(knowledgeBaseId)
const effectiveKnowledgeBaseName = knowledgeBase?.name || knowledgeBaseName || 'Knowledge Base'
const effectiveDocumentName = document?.filename || documentName || 'Document'
const params = new URLSearchParams({
limit: pagination.limit.toString(),
offset: offset.toString(),
})
if (search) params.append('search', search)
const response = await fetch(
`/api/knowledge/${knowledgeBaseId}/documents/${documentId}/chunks?${params}`
)
if (!response.ok) {
throw new Error(`Failed to fetch chunks: ${response.statusText}`)
}
const result: ChunksResponse = await response.json()
if (result.success) {
if (offset === 0) {
setChunks(result.data)
} else {
setChunks((prev) => [...prev, ...result.data])
}
setPagination(result.pagination)
} else {
throw new Error(result.error || 'Failed to fetch chunks')
}
} catch (err) {
console.error('Error fetching chunks:', err)
setError(err instanceof Error ? err.message : 'An error occurred')
} finally {
setIsLoadingChunks(false)
}
},
[knowledgeBaseId, documentId, pagination.limit]
)
// Initial fetch and refetch on filter changes
useEffect(() => {
if (document) {
fetchChunks(searchQuery, 0)
}
}, [document, searchQuery, fetchChunks])
const breadcrumbs = [
{ label: 'Knowledge', href: '/w/knowledge' },
{ label: effectiveKnowledgeBaseName, href: `/w/knowledge/${knowledgeBaseId}` },
{ label: effectiveDocumentName },
]
const handleChunkClick = (chunk: ChunkData) => {
setSelectedChunk(chunk)
@@ -222,10 +158,10 @@ export function Document({
const result = await response.json()
if (result.success) {
setChunks((prev) => prev.map((c) => (c.id === chunkId ? { ...c, enabled: !c.enabled } : c)))
updateChunk(chunkId, { enabled: !chunk.enabled })
}
} catch (err) {
console.error('Error updating chunk:', err)
logger.error('Error updating chunk:', err)
}
}
@@ -245,7 +181,7 @@ export function Document({
const result = await response.json()
if (result.success) {
setChunks((prev) => prev.filter((c) => c.id !== chunkId))
await refreshChunks()
setSelectedChunks((prev) => {
const newSet = new Set(prev)
newSet.delete(chunkId)
@@ -253,7 +189,7 @@ export function Document({
})
}
} catch (err) {
console.error('Error deleting chunk:', err)
logger.error('Error deleting chunk:', err)
}
}
@@ -278,47 +214,32 @@ export function Document({
}
const isAllSelected = chunks.length > 0 && selectedChunks.size === chunks.length
const isIndeterminate = selectedChunks.size > 0 && selectedChunks.size < chunks.length
// Show loading component while data is being fetched
if (isLoadingDocument || isLoadingChunks) {
return (
<DocumentLoading
knowledgeBaseId={knowledgeBaseId}
knowledgeBaseName={knowledgeBaseName}
documentName={documentName}
knowledgeBaseName={effectiveKnowledgeBaseName}
documentName={effectiveDocumentName}
/>
)
}
// Show error state for document fetch
if (error && isLoadingDocument) {
if (combinedError && !isLoadingChunks) {
const errorBreadcrumbs = [
{ label: 'Knowledge', href: '/w/knowledge' },
{ label: effectiveKnowledgeBaseName, href: `/w/knowledge/${knowledgeBaseId}` },
{ label: 'Error' },
]
return (
<div
className={`flex h-[100vh] flex-col transition-padding duration-200 ${isSidebarCollapsed ? 'pl-14' : 'pl-60'}`}
>
<div className='flex items-center gap-2 px-6 pt-[14px] pb-6'>
<Link
href='/w/knowledge'
prefetch={true}
className='group flex items-center gap-2 font-medium text-sm transition-colors hover:text-muted-foreground'
>
<LibraryBig className='h-[18px] w-[18px] text-muted-foreground transition-colors group-hover:text-muted-foreground/70' />
<span>Knowledge</span>
</Link>
<span className='text-muted-foreground'>/</span>
<Link
href={`/w/knowledge/${knowledgeBaseId}`}
className='font-medium text-sm transition-colors hover:text-muted-foreground'
>
{knowledgeBaseName}
</Link>
<span className='text-muted-foreground'>/</span>
<span className='font-medium text-sm'>Error</span>
</div>
<KnowledgeHeader breadcrumbs={errorBreadcrumbs} />
<div className='flex flex-1 items-center justify-center'>
<div className='text-center'>
<p className='mb-2 text-red-600 text-sm'>Error: {error}</p>
<p className='mb-2 text-red-600 text-sm'>Error: {combinedError}</p>
<button
onClick={() => window.location.reload()}
className='text-blue-600 text-sm underline hover:text-blue-800'
@@ -336,33 +257,15 @@ export function Document({
className={`flex h-[100vh] flex-col transition-padding duration-200 ${isSidebarCollapsed ? 'pl-14' : 'pl-60'}`}
>
{/* Fixed Header with Breadcrumbs */}
<div className='flex items-center gap-2 px-6 pt-[14px] pb-6'>
<Link
href='/w/knowledge'
prefetch={true}
className='group flex items-center gap-2 font-medium text-sm transition-colors hover:text-muted-foreground'
>
<LibraryBig className='h-[18px] w-[18px] text-muted-foreground transition-colors group-hover:text-muted-foreground/70' />
<span>Knowledge</span>
</Link>
<span className='text-muted-foreground'>/</span>
<Link
href={`/w/knowledge/${knowledgeBaseId}`}
className='font-medium text-sm transition-colors hover:text-muted-foreground'
>
{knowledgeBaseName}
</Link>
<span className='text-muted-foreground'>/</span>
<span className='font-medium text-sm'>{documentName}</span>
</div>
<KnowledgeHeader breadcrumbs={breadcrumbs} />
<div className='flex flex-1 overflow-hidden'>
<div className='flex flex-1 flex-col overflow-hidden'>
{/* Main Content */}
<div className='flex-1 overflow-auto pt-[4px]'>
<div className='flex-1 overflow-auto'>
<div className='px-6 pb-6'>
{/* Search Section */}
<div className='mb-4'>
<div className='mb-4 flex items-center justify-between'>
<div className='relative max-w-md'>
<div className='relative flex items-center'>
<Search className='-translate-y-1/2 pointer-events-none absolute top-1/2 left-3 h-[18px] w-[18px] transform text-muted-foreground' />
@@ -370,10 +273,15 @@ export function Document({
type='text'
value={searchQuery}
onChange={(e) => setSearchQuery(e.target.value)}
placeholder='Search chunks...'
placeholder={
document?.processingStatus === 'completed'
? 'Search chunks...'
: 'Document processing...'
}
disabled={document?.processingStatus !== 'completed'}
className='h-10 w-full rounded-md border bg-background px-9 py-2 text-sm ring-offset-background file:border-0 file:bg-transparent file:font-medium file:text-sm placeholder:text-muted-foreground focus-visible:outline-none focus-visible:ring-2 focus-visible:ring-ring focus-visible:ring-offset-2 disabled:cursor-not-allowed disabled:opacity-50'
/>
{searchQuery && (
{searchQuery && document?.processingStatus === 'completed' && (
<button
onClick={() => setSearchQuery('')}
className='-translate-y-1/2 absolute top-1/2 right-3 transform text-muted-foreground hover:text-foreground'
@@ -386,9 +294,9 @@ export function Document({
</div>
{/* Error State for chunks */}
{error && !isLoadingDocument && (
{combinedError && !isLoadingChunks && (
<div className='mb-4 rounded-md border border-red-200 bg-red-50 p-4'>
<p className='text-red-800 text-sm'>Error loading chunks: {error}</p>
<p className='text-red-800 text-sm'>Error loading chunks: {combinedError}</p>
</div>
)}
@@ -411,6 +319,7 @@ export function Document({
<Checkbox
checked={isAllSelected}
onCheckedChange={handleSelectAll}
disabled={document?.processingStatus !== 'completed'}
aria-label='Select all chunks'
className='h-3.5 w-3.5 border-gray-300 focus-visible:ring-[#701FFC]/20 data-[state=checked]:border-[#701FFC] data-[state=checked]:bg-[#701FFC] [&>*]:h-3 [&>*]:w-3'
/>
@@ -451,8 +360,8 @@ export function Document({
<col className='w-[12%]' />
</colgroup>
<tbody>
{chunks.length === 0 ? (
<tr className='border-b transition-colors hover:bg-accent/30'>
{document?.processingStatus !== 'completed' ? (
<tr className='border-b transition-colors'>
<td className='px-4 py-3'>
<div className='h-3.5 w-3.5' />
</td>
@@ -463,7 +372,13 @@ export function Document({
<div className='flex items-center gap-2'>
<FileText className='h-5 w-5 text-muted-foreground' />
<span className='text-muted-foreground text-sm italic'>
No chunks found
{document?.processingStatus === 'pending' &&
'Document processing pending...'}
{document?.processingStatus === 'processing' &&
'Document processing in progress...'}
{document?.processingStatus === 'failed' &&
'Document processing failed'}
{!document?.processingStatus && 'Document not ready'}
</span>
</div>
</td>
@@ -477,6 +392,58 @@ export function Document({
<div className='text-muted-foreground text-xs'></div>
</td>
</tr>
) : chunks.length === 0 && !isLoadingChunks ? (
<tr className='border-b transition-colors hover:bg-accent/30'>
<td className='px-4 py-3'>
<div className='h-3.5 w-3.5' />
</td>
<td className='px-4 py-3'>
<div className='text-muted-foreground text-xs'></div>
</td>
<td className='px-4 py-3'>
<div className='flex items-center gap-2'>
<FileText className='h-5 w-5 text-muted-foreground' />
<span className='text-muted-foreground text-sm italic'>
{document?.processingStatus === 'completed'
? 'No chunks found'
: 'Document is still processing...'}
</span>
</div>
</td>
<td className='px-4 py-3'>
<div className='text-muted-foreground text-xs'></div>
</td>
<td className='px-4 py-3'>
<div className='text-muted-foreground text-xs'></div>
</td>
<td className='px-4 py-3'>
<div className='text-muted-foreground text-xs'></div>
</td>
</tr>
) : isLoadingChunks ? (
// Show loading skeleton rows when chunks are loading
Array.from({ length: 5 }).map((_, index) => (
<tr key={`loading-${index}`} className='border-b transition-colors'>
<td className='px-4 py-3'>
<div className='h-3.5 w-3.5 animate-pulse rounded bg-muted' />
</td>
<td className='px-4 py-3'>
<div className='h-4 w-8 animate-pulse rounded bg-muted' />
</td>
<td className='px-4 py-3'>
<div className='h-4 w-full animate-pulse rounded bg-muted' />
</td>
<td className='px-4 py-3'>
<div className='h-4 w-12 animate-pulse rounded bg-muted' />
</td>
<td className='px-4 py-3'>
<div className='h-4 w-12 animate-pulse rounded bg-muted' />
</td>
<td className='px-4 py-3'>
<div className='h-4 w-16 animate-pulse rounded bg-muted' />
</td>
</tr>
))
) : (
chunks.map((chunk) => (
<tr
@@ -520,9 +487,7 @@ export function Document({
{/* Status column */}
<td className='px-4 py-3'>
<div
className={`inline-flex items-center justify-center rounded-md px-2 py-1 text-xs ${getStatusBadgeStyles(chunk.enabled)}`}
>
<div className={getStatusBadgeStyles(chunk.enabled)}>
<span className='font-medium'>
{chunk.enabled ? 'Enabled' : 'Disabled'}
</span>
@@ -593,7 +558,7 @@ export function Document({
isOpen={isModalOpen}
onClose={handleCloseModal}
onChunkUpdate={(updatedChunk: ChunkData) => {
setChunks((prev) => prev.map((c) => (c.id === updatedChunk.id ? updatedChunk : c)))
updateChunk(updatedChunk.id, updatedChunk)
setSelectedChunk(updatedChunk)
}}
/>

File diff suppressed because it is too large Load Diff

View File

@@ -1,6 +1,6 @@
'use client'
import { useRef, useState } from 'react'
import { useEffect, useRef, useState } from 'react'
import { zodResolver } from '@hookform/resolvers/zod'
import { AlertCircle, CheckCircle2, X } from 'lucide-react'
import { useForm } from 'react-hook-form'
@@ -10,17 +10,13 @@ import { Button } from '@/components/ui/button'
import { Input } from '@/components/ui/input'
import { Label } from '@/components/ui/label'
import { Textarea } from '@/components/ui/textarea'
import { createLogger } from '@/lib/logs/console-logger'
import { getDocumentIcon } from '@/app/w/knowledge/components/icons/document-icons'
import type { DocumentData, KnowledgeBaseData } from '@/stores/knowledge/knowledge'
import { useKnowledgeStore } from '@/stores/knowledge/knowledge'
// Define form schema
const formSchema = z.object({
name: z.string().min(1, 'Name is required').max(100, 'Name must be less than 100 characters'),
description: z.string().max(500, 'Description must be less than 500 characters').optional(),
})
const logger = createLogger('CreateForm')
type FormValues = z.infer<typeof formSchema>
// File upload constraints
const MAX_FILE_SIZE = 50 * 1024 * 1024 // 50MB
const ACCEPTED_FILE_TYPES = [
'application/pdf',
@@ -32,46 +28,66 @@ const ACCEPTED_FILE_TYPES = [
'application/vnd.openxmlformats-officedocument.spreadsheetml.sheet',
]
interface ProcessedDocumentResponse {
documentId: string
filename: string
status: string
}
interface FileWithPreview extends File {
preview: string
}
interface KnowledgeBase {
id: string
name: string
description?: string
tokenCount: number
embeddingModel: string
embeddingDimension: number
chunkingConfig: any
createdAt: string
updatedAt: string
workspaceId?: string
}
interface CreateFormProps {
onClose: () => void
onKnowledgeBaseCreated?: (knowledgeBase: KnowledgeBase) => void
onKnowledgeBaseCreated?: (knowledgeBase: KnowledgeBaseData) => void
}
const FormSchema = z.object({
name: z
.string()
.min(1, 'Name is required')
.max(100, 'Name must be less than 100 characters')
.refine((value) => value.trim().length > 0, 'Name cannot be empty'),
description: z.string().max(500, 'Description must be less than 500 characters').optional(),
})
type FormValues = z.infer<typeof FormSchema>
interface SubmitStatus {
type: 'success' | 'error'
message: string
}
export function CreateForm({ onClose, onKnowledgeBaseCreated }: CreateFormProps) {
const fileInputRef = useRef<HTMLInputElement>(null)
const [isSubmitting, setIsSubmitting] = useState(false)
const [submitStatus, setSubmitStatus] = useState<'success' | 'error' | null>(null)
const [errorMessage, setErrorMessage] = useState('')
const [submitStatus, setSubmitStatus] = useState<SubmitStatus | null>(null)
const [files, setFiles] = useState<FileWithPreview[]>([])
const [fileError, setFileError] = useState<string | null>(null)
const [isDragging, setIsDragging] = useState(false)
const [dragCounter, setDragCounter] = useState(0) // Track drag events to handle nested elements
const scrollContainerRef = useRef<HTMLDivElement>(null)
const dropZoneRef = useRef<HTMLDivElement>(null)
// Cleanup file preview URLs when component unmounts to prevent memory leaks
useEffect(() => {
return () => {
files.forEach((file) => {
if (file.preview) {
URL.revokeObjectURL(file.preview)
}
})
}
}, [files])
const {
register,
handleSubmit,
reset,
formState: { errors },
} = useForm<FormValues>({
resolver: zodResolver(formSchema),
resolver: zodResolver(FormSchema),
defaultValues: {
name: '',
description: '',
@@ -117,7 +133,7 @@ export function CreateForm({ onClose, onKnowledgeBaseCreated }: CreateFormProps)
setFiles((prev) => [...prev, ...newFiles])
}
} catch (error) {
console.error('Error processing files:', error)
logger.error('Error processing files:', error)
setFileError('An error occurred while processing files. Please try again.')
} finally {
// Reset the input
@@ -137,24 +153,39 @@ export function CreateForm({ onClose, onKnowledgeBaseCreated }: CreateFormProps)
const handleDragEnter = (e: React.DragEvent) => {
e.preventDefault()
e.stopPropagation()
setIsDragging(true)
setDragCounter((prev) => {
const newCount = prev + 1
if (newCount === 1) {
setIsDragging(true)
}
return newCount
})
}
const handleDragLeave = (e: React.DragEvent) => {
e.preventDefault()
e.stopPropagation()
setIsDragging(false)
setDragCounter((prev) => {
const newCount = prev - 1
if (newCount === 0) {
setIsDragging(false)
}
return newCount
})
}
const handleDragOver = (e: React.DragEvent) => {
e.preventDefault()
e.stopPropagation()
// Add visual feedback for valid drop zone
e.dataTransfer.dropEffect = 'copy'
}
const handleDrop = async (e: React.DragEvent) => {
e.preventDefault()
e.stopPropagation()
setIsDragging(false)
setDragCounter(0)
if (e.dataTransfer.files && e.dataTransfer.files.length > 0) {
await processFiles(e.dataTransfer.files)
@@ -214,10 +245,18 @@ export function CreateForm({ onClose, onKnowledgeBaseCreated }: CreateFormProps)
const newKnowledgeBase = result.data
// If files are uploaded, process them
// If files are uploaded, upload them and start processing
if (files.length > 0) {
// First, upload all files to get their URLs
const uploadedFiles = []
interface UploadedFile {
filename: string
fileUrl: string
fileSize: number
mimeType: string
fileHash: string | undefined
}
const uploadedFiles: UploadedFile[] = []
for (const file of files) {
const formData = new FormData()
@@ -245,7 +284,7 @@ export function CreateForm({ onClose, onKnowledgeBaseCreated }: CreateFormProps)
})
}
// Now process the uploaded files
// Start async document processing
const processResponse = await fetch(
`/api/knowledge/${newKnowledgeBase.id}/process-documents`,
{
@@ -266,22 +305,48 @@ export function CreateForm({ onClose, onKnowledgeBaseCreated }: CreateFormProps)
)
if (!processResponse.ok) {
const errorData = await processResponse.json()
throw new Error(errorData.error || 'Failed to process documents')
throw new Error('Failed to start document processing')
}
const processResult = await processResponse.json()
if (!processResult.success) {
throw new Error(processResult.error || 'Failed to process documents')
// Create pending document objects and add them to the store immediately
if (processResult.success && processResult.data.documentsCreated) {
const pendingDocuments: DocumentData[] = processResult.data.documentsCreated.map(
(doc: ProcessedDocumentResponse, index: number) => ({
id: doc.documentId,
knowledgeBaseId: newKnowledgeBase.id,
filename: doc.filename,
fileUrl: uploadedFiles[index].fileUrl,
fileSize: uploadedFiles[index].fileSize,
mimeType: uploadedFiles[index].mimeType,
fileHash: uploadedFiles[index].fileHash || null,
chunkCount: 0,
tokenCount: 0,
characterCount: 0,
processingStatus: 'pending' as const,
processingStartedAt: null,
processingCompletedAt: null,
processingError: null,
enabled: true,
uploadedAt: new Date().toISOString(),
})
)
// Add pending documents to store for immediate UI update
useKnowledgeStore.getState().addPendingDocuments(newKnowledgeBase.id, pendingDocuments)
}
console.log(
`Processed ${processResult.data.processed}/${processResult.data.total} documents with ${processResult.data.totalChunks} total chunks`
)
// Update the knowledge base object with the correct document count
newKnowledgeBase.docCount = uploadedFiles.length
logger.info(`Started processing ${uploadedFiles.length} documents in the background`)
}
setSubmitStatus('success')
setSubmitStatus({
type: 'success',
message: 'Your knowledge base has been created successfully!',
})
reset()
// Clean up file previews
@@ -296,11 +361,13 @@ export function CreateForm({ onClose, onKnowledgeBaseCreated }: CreateFormProps)
// Close modal after a short delay to show success message
setTimeout(() => {
onClose()
}, 2000)
}, 1500)
} catch (error) {
console.error('Error creating knowledge base:', error)
setSubmitStatus('error')
setErrorMessage(error instanceof Error ? error.message : 'An unknown error occurred')
logger.error('Error creating knowledge base:', error)
setSubmitStatus({
type: 'error',
message: error instanceof Error ? error.message : 'An unknown error occurred',
})
} finally {
setIsSubmitting(false)
}
@@ -314,7 +381,7 @@ export function CreateForm({ onClose, onKnowledgeBaseCreated }: CreateFormProps)
className='scrollbar-thin scrollbar-thumb-muted-foreground/20 hover:scrollbar-thumb-muted-foreground/25 scrollbar-track-transparent min-h-0 flex-1 overflow-y-auto px-6'
>
<div className='py-4'>
{submitStatus === 'success' ? (
{submitStatus && submitStatus.type === 'success' ? (
<Alert className='mb-6 border-border border-green-200 bg-green-50 dark:border-green-900 dark:bg-green-950/30'>
<div className='flex items-start gap-4 py-1'>
<div className='mt-[-1.5px] flex-shrink-0'>
@@ -325,19 +392,16 @@ export function CreateForm({ onClose, onKnowledgeBaseCreated }: CreateFormProps)
<span className='font-medium text-green-600 dark:text-green-400'>Success</span>
</AlertTitle>
<AlertDescription className='text-green-600 dark:text-green-400'>
Your knowledge base has been created successfully!
{submitStatus.message}
</AlertDescription>
</div>
</div>
</Alert>
) : submitStatus === 'error' ? (
) : submitStatus && submitStatus.type === 'error' ? (
<Alert variant='destructive' className='mb-6'>
<AlertCircle className='h-4 w-4' />
<AlertTitle>Error</AlertTitle>
<AlertDescription>
{errorMessage ||
'There was an error creating your knowledge base. Please try again.'}
</AlertDescription>
<AlertDescription>{submitStatus.message}</AlertDescription>
</Alert>
) : null}
@@ -378,10 +442,10 @@ export function CreateForm({ onClose, onKnowledgeBaseCreated }: CreateFormProps)
onDragLeave={handleDragLeave}
onDrop={handleDrop}
onClick={() => fileInputRef.current?.click()}
className={`relative cursor-pointer rounded-lg border-[1px] border-dashed p-16 text-center transition-colors ${
className={`relative cursor-pointer rounded-lg border-2 border-dashed p-16 text-center transition-all duration-200 ${
isDragging
? 'border-primary bg-primary/5'
: 'border-muted-foreground/25 hover:border-muted-foreground/50'
? 'border-purple-300 bg-purple-50 shadow-sm'
: 'border-muted-foreground/25 hover:border-muted-foreground/40 hover:bg-muted/10'
}`}
>
<input
@@ -392,9 +456,22 @@ export function CreateForm({ onClose, onKnowledgeBaseCreated }: CreateFormProps)
className='hidden'
multiple
/>
<div className='flex flex-col items-center gap-2'>
<div className='flex flex-col items-center gap-3'>
<div
className={`text-4xl transition-all duration-200 ${
isDragging ? 'text-purple-500' : 'text-muted-foreground'
}`}
>
📁
</div>
<div className='space-y-1'>
<p className='font-medium text-sm'>Drop files here or click to browse</p>
<p
className={`font-medium text-sm transition-colors duration-200 ${
isDragging ? 'text-purple-700' : ''
}`}
>
{isDragging ? 'Drop files here!' : 'Drop files here or click to browse'}
</p>
<p className='text-muted-foreground text-xs'>
Supports PDF, DOC, DOCX, TXT, CSV, XLS, XLSX (max 50MB each)
</p>
@@ -412,10 +489,10 @@ export function CreateForm({ onClose, onKnowledgeBaseCreated }: CreateFormProps)
onDragLeave={handleDragLeave}
onDrop={handleDrop}
onClick={() => fileInputRef.current?.click()}
className={`flex cursor-pointer items-center justify-center rounded-md border border-dashed p-3 transition-colors ${
className={`cursor-pointer rounded-md border border-dashed p-3 text-center transition-all duration-200 ${
isDragging
? 'border-primary bg-primary/5'
: 'border-muted-foreground/25 hover:border-muted-foreground/50'
? 'border-purple-300 bg-purple-50'
: 'border-muted-foreground/25 hover:border-muted-foreground/40 hover:bg-muted/10'
}`}
>
<input
@@ -426,11 +503,28 @@ export function CreateForm({ onClose, onKnowledgeBaseCreated }: CreateFormProps)
className='hidden'
multiple
/>
<div className='text-center'>
<p className='font-medium text-sm'>Drop more files or click to browse</p>
<p className='text-muted-foreground text-xs'>
PDF, DOC, DOCX, TXT, CSV, XLS, XLSX (max 50MB each)
</p>
<div className='flex items-center justify-center gap-2'>
<div
className={`text-base transition-colors duration-200 ${
isDragging ? 'text-purple-500' : 'text-muted-foreground'
}`}
>
📁
</div>
<div>
<p
className={`font-medium text-sm transition-colors duration-200 ${
isDragging ? 'text-purple-700' : ''
}`}
>
{isDragging
? 'Drop more files here!'
: 'Drop more files or click to browse'}
</p>
<p className='text-muted-foreground text-xs'>
PDF, DOC, DOCX, TXT, CSV, XLS, XLSX (max 50MB each)
</p>
</div>
</div>
</div>
@@ -458,13 +552,19 @@ export function CreateForm({ onClose, onKnowledgeBaseCreated }: CreateFormProps)
</div>
</div>
)}
{fileError && <p className='mt-1 text-red-500 text-sm'>{fileError}</p>}
{fileError && (
<Alert variant='destructive' className='mt-1'>
<AlertCircle className='h-4 w-4' />
<AlertTitle>Error</AlertTitle>
<AlertDescription>{fileError}</AlertDescription>
</Alert>
)}
</div>
</div>
</div>
</div>
{/* Fixed Footer */}
{/* Footer */}
<div className='mt-auto border-t px-6 pt-4 pb-6'>
<div className='flex justify-between'>
<Button variant='outline' onClick={onClose} type='button'>

View File

@@ -3,25 +3,13 @@
import { X } from 'lucide-react'
import { Button } from '@/components/ui/button'
import { Dialog, DialogContent, DialogHeader, DialogTitle } from '@/components/ui/dialog'
import type { KnowledgeBaseData } from '@/stores/knowledge/knowledge'
import { CreateForm } from './components/create-form/create-form'
interface KnowledgeBase {
id: string
name: string
description?: string
tokenCount: number
embeddingModel: string
embeddingDimension: number
chunkingConfig: any
createdAt: string
updatedAt: string
workspaceId?: string
}
interface CreateModalProps {
open: boolean
onOpenChange: (open: boolean) => void
onKnowledgeBaseCreated?: (knowledgeBase: KnowledgeBase) => void
onKnowledgeBaseCreated?: (knowledgeBase: KnowledgeBaseData) => void
}
export function CreateModal({ open, onOpenChange, onKnowledgeBaseCreated }: CreateModalProps) {

View File

@@ -0,0 +1,89 @@
'use client'
import { LibraryBig, MoreHorizontal, Trash2 } from 'lucide-react'
import Link from 'next/link'
import { Button } from '@/components/ui/button'
import {
DropdownMenu,
DropdownMenuContent,
DropdownMenuItem,
DropdownMenuTrigger,
} from '@/components/ui/dropdown-menu'
interface BreadcrumbItem {
label: string
href?: string
id?: string
}
const HEADER_STYLES = {
container: 'flex items-center justify-between px-6 pt-[14px] pb-6',
breadcrumbs: 'flex items-center gap-2',
icon: 'h-[18px] w-[18px] text-muted-foreground transition-colors group-hover:text-muted-foreground/70',
link: 'group flex items-center gap-2 font-medium text-sm transition-colors hover:text-muted-foreground',
label: 'font-medium text-sm',
separator: 'text-muted-foreground',
} as const
interface KnowledgeHeaderOptions {
onDeleteKnowledgeBase?: () => void
}
interface KnowledgeHeaderProps {
breadcrumbs: BreadcrumbItem[]
options?: KnowledgeHeaderOptions
}
export function KnowledgeHeader({ breadcrumbs, options }: KnowledgeHeaderProps) {
return (
<div className={HEADER_STYLES.container}>
<div className={HEADER_STYLES.breadcrumbs}>
{breadcrumbs.map((breadcrumb, index) => {
// Use unique identifier when available, fallback to content-based key
const key = breadcrumb.id || `${breadcrumb.label}-${breadcrumb.href || index}`
return (
<div key={key} className='flex items-center gap-2'>
{index === 0 && <LibraryBig className={HEADER_STYLES.icon} />}
{breadcrumb.href ? (
<Link href={breadcrumb.href} prefetch={true} className={HEADER_STYLES.link}>
<span>{breadcrumb.label}</span>
</Link>
) : (
<span className={HEADER_STYLES.label}>{breadcrumb.label}</span>
)}
{index < breadcrumbs.length - 1 && <span className={HEADER_STYLES.separator}>/</span>}
</div>
)
})}
</div>
{/* Actions Menu - only show if onDeleteKnowledgeBase is provided */}
{options?.onDeleteKnowledgeBase && (
<DropdownMenu>
<DropdownMenuTrigger asChild>
<Button
variant='ghost'
size='sm'
className='h-8 w-8 p-0'
aria-label='Knowledge base actions menu'
>
<MoreHorizontal className='h-4 w-4' />
</Button>
</DropdownMenuTrigger>
<DropdownMenuContent align='end'>
<DropdownMenuItem
onClick={options.onDeleteKnowledgeBase}
className='text-red-600 focus:text-red-600'
>
<Trash2 className='mr-2 h-4 w-4' />
Delete Knowledge Base
</DropdownMenuItem>
</DropdownMenuContent>
</DropdownMenu>
)}
</div>
)
}

View File

@@ -1,183 +1,156 @@
'use client'
import { useEffect, useState } from 'react'
import { useMemo, useState } from 'react'
import { LibraryBig, Plus, Search, X } from 'lucide-react'
import { useKnowledgeBasesList } from '@/hooks/use-knowledge'
import type { KnowledgeBaseData } from '@/stores/knowledge/knowledge'
import { useSidebarStore } from '@/stores/sidebar/store'
import { BaseOverview } from './components/base-overview/base-overview'
import { CreateModal } from './components/create-modal/create-modal'
import { EmptyStateCard } from './components/empty-state-card/empty-state-card'
import { KnowledgeHeader } from './components/knowledge-header/knowledge-header'
import { KnowledgeBaseCardSkeletonGrid } from './components/skeletons/knowledge-base-card-skeleton'
interface KnowledgeBase {
id: string
name: string
description?: string
tokenCount: number
embeddingModel: string
embeddingDimension: number
chunkingConfig: any
createdAt: string
updatedAt: string
workspaceId?: string
interface KnowledgeBaseWithDocCount extends KnowledgeBaseData {
docCount?: number
}
export function Knowledge() {
const { mode, isExpanded } = useSidebarStore()
const { knowledgeBases, isLoading, error, addKnowledgeBase, refreshList } =
useKnowledgeBasesList()
const isSidebarCollapsed =
mode === 'expanded' ? !isExpanded : mode === 'collapsed' || mode === 'hover'
const [searchQuery, setSearchQuery] = useState('')
const [isCreateModalOpen, setIsCreateModalOpen] = useState(false)
const [knowledgeBases, setKnowledgeBases] = useState<KnowledgeBase[]>([])
const [isLoading, setIsLoading] = useState(true)
const [error, setError] = useState<string | null>(null)
// Fetch knowledge bases on component mount
useEffect(() => {
const fetchKnowledgeBases = async () => {
try {
setIsLoading(true)
setError(null)
const response = await fetch('/api/knowledge')
if (!response.ok) {
throw new Error(`Failed to fetch knowledge bases: ${response.statusText}`)
}
const result = await response.json()
if (result.success) {
setKnowledgeBases(result.data)
} else {
throw new Error(result.error || 'Failed to fetch knowledge bases')
}
} catch (err) {
console.error('Error fetching knowledge bases:', err)
setError(err instanceof Error ? err.message : 'An error occurred')
} finally {
setIsLoading(false)
}
}
fetchKnowledgeBases()
}, [])
// Handle knowledge base creation success
const handleKnowledgeBaseCreated = (newKnowledgeBase: KnowledgeBase) => {
setKnowledgeBases((prev) => [newKnowledgeBase, ...prev])
const handleKnowledgeBaseCreated = (newKnowledgeBase: KnowledgeBaseData) => {
addKnowledgeBase(newKnowledgeBase)
}
// Filter knowledge bases based on search query
const filteredKnowledgeBases = knowledgeBases.filter(
(kb) =>
kb.name.toLowerCase().includes(searchQuery.toLowerCase()) ||
kb.description?.toLowerCase().includes(searchQuery.toLowerCase())
)
const handleRetry = () => {
refreshList()
}
// Format document count for display
const formatKnowledgeBaseForDisplay = (kb: KnowledgeBase) => ({
const filteredKnowledgeBases = useMemo(() => {
if (!searchQuery.trim()) return knowledgeBases
const query = searchQuery.toLowerCase()
return knowledgeBases.filter(
(kb) => kb.name.toLowerCase().includes(query) || kb.description?.toLowerCase().includes(query)
)
}, [knowledgeBases, searchQuery])
const formatKnowledgeBaseForDisplay = (kb: KnowledgeBaseWithDocCount) => ({
id: kb.id,
title: kb.name,
docCount: kb.docCount || 0,
description: kb.description || 'No description provided',
})
const breadcrumbs = [{ id: 'knowledge', label: 'Knowledge' }]
return (
<>
<div
className={`fixed inset-0 flex flex-col transition-all duration-200 ${isSidebarCollapsed ? 'left-14' : 'left-60'}`}
className={`flex h-screen flex-col transition-padding duration-200 ${isSidebarCollapsed ? 'pl-14' : 'pl-60'}`}
>
{/* Fixed Header */}
<div className='flex items-center gap-2 px-6 pt-4 pb-6'>
<LibraryBig className='h-[18px] w-[18px] text-muted-foreground' />
<h1 className='font-medium text-sm'>Knowledge</h1>
</div>
{/* Header */}
<KnowledgeHeader breadcrumbs={breadcrumbs} />
{/* Main Content */}
<div className='flex-1 overflow-auto pt-[6px]'>
<div className='px-6 pb-6'>
{/* Search and Create Section */}
<div className='mb-6 flex items-center justify-between'>
<div className='relative max-w-md flex-1'>
<div className='relative flex items-center'>
<Search className='-translate-y-1/2 pointer-events-none absolute top-1/2 left-3 h-[18px] w-[18px] transform text-muted-foreground' />
<input
type='text'
value={searchQuery}
onChange={(e) => setSearchQuery(e.target.value)}
placeholder='Search knowledge bases...'
className='h-10 w-full rounded-md border bg-background px-9 py-2 text-sm ring-offset-background file:border-0 file:bg-transparent file:font-medium file:text-sm placeholder:text-muted-foreground focus-visible:outline-none focus-visible:ring-2 focus-visible:ring-ring focus-visible:ring-offset-2 disabled:cursor-not-allowed disabled:opacity-50'
/>
{searchQuery && (
<button
onClick={() => setSearchQuery('')}
className='-translate-y-1/2 absolute top-1/2 right-3 transform text-muted-foreground hover:text-foreground'
>
<X className='h-[18px] w-[18px]' />
</button>
)}
</div>
</div>
<button
onClick={() => setIsCreateModalOpen(true)}
className='flex items-center gap-1 rounded-md bg-[#701FFC] px-3 py-[7px] font-[480] text-primary-foreground text-sm shadow-[0_0_0_0_#701FFC] transition-all duration-200 hover:bg-[#6518E6] hover:shadow-[0_0_0_4px_rgba(127,47,255,0.15)]'
>
<Plus className='h-4 w-4 font-[480]' />
<span>Create</span>
</button>
</div>
{/* Error State */}
{error && (
<div className='mb-6 rounded-md border border-red-200 bg-red-50 p-4'>
<p className='text-red-800 text-sm'>Error loading knowledge bases: {error}</p>
<button
onClick={() => window.location.reload()}
className='mt-2 text-red-600 text-sm underline hover:text-red-800'
>
Try again
</button>
</div>
)}
{/* Content Area */}
{isLoading ? (
<KnowledgeBaseCardSkeletonGrid count={8} />
) : (
<div className='grid grid-cols-1 gap-4 md:grid-cols-2 lg:grid-cols-3 xl:grid-cols-4'>
{filteredKnowledgeBases.length === 0 ? (
knowledgeBases.length === 0 ? (
<EmptyStateCard
title='Create your first knowledge base'
description='Upload your documents to create a knowledge base for your agents.'
buttonText='Create Knowledge Base'
onClick={() => setIsCreateModalOpen(true)}
icon={<LibraryBig className='h-4 w-4 text-muted-foreground' />}
/>
) : (
<div className='col-span-full py-12 text-center'>
<p className='text-muted-foreground'>No knowledge bases match your search.</p>
</div>
)
) : (
filteredKnowledgeBases.map((kb) => {
const displayData = formatKnowledgeBaseForDisplay(kb)
return (
<BaseOverview
key={kb.id}
id={displayData.id}
title={displayData.title}
docCount={displayData.docCount}
description={displayData.description}
<div className='flex flex-1 overflow-hidden'>
<div className='flex flex-1 flex-col overflow-hidden'>
{/* Main Content */}
<div className='flex-1 overflow-auto'>
<div className='px-6 pb-6'>
{/* Search and Create Section */}
<div className='mb-4 flex items-center justify-between'>
<div className='relative max-w-md flex-1'>
<div className='relative flex items-center'>
<Search className='-translate-y-1/2 pointer-events-none absolute top-1/2 left-3 h-[18px] w-[18px] transform text-muted-foreground' />
<input
type='text'
value={searchQuery}
onChange={(e) => setSearchQuery(e.target.value)}
placeholder='Search knowledge bases...'
className='h-10 w-full rounded-md border bg-background px-9 py-2 text-sm ring-offset-background file:border-0 file:bg-transparent file:font-medium file:text-sm placeholder:text-muted-foreground focus-visible:outline-none focus-visible:ring-2 focus-visible:ring-ring focus-visible:ring-offset-2 disabled:cursor-not-allowed disabled:opacity-50'
/>
)
})
{searchQuery && (
<button
onClick={() => setSearchQuery('')}
className='-translate-y-1/2 absolute top-1/2 right-3 transform text-muted-foreground hover:text-foreground'
>
<X className='h-[18px] w-[18px]' />
</button>
)}
</div>
</div>
<button
onClick={() => setIsCreateModalOpen(true)}
className='flex items-center gap-1 rounded-md bg-[#701FFC] px-3 py-[7px] font-[480] text-primary-foreground text-sm shadow-[0_0_0_0_#701FFC] transition-all duration-200 hover:bg-[#6518E6] hover:shadow-[0_0_0_4px_rgba(127,47,255,0.15)]'
>
<Plus className='h-4 w-4 font-[480]' />
<span>Create</span>
</button>
</div>
{/* Error State */}
{error && (
<div className='mb-4 rounded-md border border-red-200 bg-red-50 p-4'>
<p className='text-red-800 text-sm'>Error loading knowledge bases: {error}</p>
<button
onClick={handleRetry}
className='mt-2 text-red-600 text-sm underline hover:text-red-800'
>
Try again
</button>
</div>
)}
{/* Content Area */}
{isLoading ? (
<KnowledgeBaseCardSkeletonGrid count={8} />
) : (
<div className='grid grid-cols-1 gap-4 md:grid-cols-2 lg:grid-cols-3 xl:grid-cols-4'>
{filteredKnowledgeBases.length === 0 ? (
knowledgeBases.length === 0 ? (
<EmptyStateCard
title='Create your first knowledge base'
description='Upload your documents to create a knowledge base for your agents.'
buttonText='Create Knowledge Base'
onClick={() => setIsCreateModalOpen(true)}
icon={<LibraryBig className='h-4 w-4 text-muted-foreground' />}
/>
) : (
<div className='col-span-full py-12 text-center'>
<p className='text-muted-foreground'>
No knowledge bases match your search.
</p>
</div>
)
) : (
filteredKnowledgeBases.map((kb) => {
const displayData = formatKnowledgeBaseForDisplay(
kb as KnowledgeBaseWithDocCount
)
return (
<BaseOverview
key={kb.id}
id={displayData.id}
title={displayData.title}
docCount={displayData.docCount}
description={displayData.description}
/>
)
})
)}
</div>
)}
</div>
)}
</div>
</div>
</div>
</div>

View File

@@ -34,10 +34,10 @@ export const KnowledgeBlock: BlockConfig = {
subBlocks: [
{
id: 'knowledgeBaseId',
title: 'Knowledge Base ID',
type: 'short-input',
title: 'Knowledge Base',
type: 'knowledge-base-selector',
layout: 'full',
placeholder: 'Enter knowledge base ID',
placeholder: 'Select knowledge base',
},
{
id: 'query',

View File

@@ -31,6 +31,7 @@ export type SubBlockType =
| 'project-selector' // Project selector for Jira, Discord, etc.
| 'channel-selector' // Channel selector for Slack, Discord, etc.
| 'folder-selector' // Folder selector for Gmail, etc.
| 'knowledge-base-selector' // Knowledge base selector
| 'input-format' // Input structure format
| 'file-upload' // File uploader

View File

@@ -0,0 +1,5 @@
ALTER TABLE "document" ADD COLUMN "processing_status" text DEFAULT 'pending' NOT NULL;--> statement-breakpoint
ALTER TABLE "document" ADD COLUMN "processing_started_at" timestamp;--> statement-breakpoint
ALTER TABLE "document" ADD COLUMN "processing_completed_at" timestamp;--> statement-breakpoint
ALTER TABLE "document" ADD COLUMN "processing_error" text;--> statement-breakpoint
CREATE INDEX "doc_processing_status_idx" ON "document" USING btree ("knowledge_base_id","processing_status");

File diff suppressed because it is too large Load Diff

View File

@@ -486,6 +486,12 @@ export const document = pgTable(
tokenCount: integer('token_count').notNull().default(0),
characterCount: integer('character_count').notNull().default(0),
// Processing status
processingStatus: text('processing_status').notNull().default('pending'), // 'pending', 'processing', 'completed', 'failed'
processingStartedAt: timestamp('processing_started_at'),
processingCompletedAt: timestamp('processing_completed_at'),
processingError: text('processing_error'),
// Document state
enabled: boolean('enabled').notNull().default(true), // Enable/disable from knowledge base
deletedAt: timestamp('deleted_at'), // Soft delete
@@ -502,6 +508,11 @@ export const document = pgTable(
filenameIdx: index('doc_filename_idx').on(table.filename),
// Order by upload date (for listing documents)
kbUploadedAtIdx: index('doc_kb_uploaded_at_idx').on(table.knowledgeBaseId, table.uploadedAt),
// Processing status filtering
processingStatusIdx: index('doc_processing_status_idx').on(
table.knowledgeBaseId,
table.processingStatus
),
})
)

View File

@@ -0,0 +1,286 @@
import { useEffect, useState } from 'react'
import { type ChunkData, type DocumentData, useKnowledgeStore } from '@/stores/knowledge/knowledge'
export function useKnowledgeBase(id: string) {
const { getKnowledgeBase, getCachedKnowledgeBase, loadingKnowledgeBases } = useKnowledgeStore()
const [error, setError] = useState<string | null>(null)
const knowledgeBase = getCachedKnowledgeBase(id)
const isLoading = loadingKnowledgeBases.has(id)
useEffect(() => {
const loadData = async () => {
try {
setError(null)
await getKnowledgeBase(id)
} catch (err) {
setError(err instanceof Error ? err.message : 'Failed to load knowledge base')
}
}
if (id && !knowledgeBase && !isLoading) {
loadData()
}
}, [id, knowledgeBase, isLoading, getKnowledgeBase])
return {
knowledgeBase,
isLoading,
error,
}
}
export function useKnowledgeBaseDocuments(knowledgeBaseId: string) {
const { getDocuments, getCachedDocuments, loadingDocuments, updateDocument, refreshDocuments } =
useKnowledgeStore()
const [error, setError] = useState<string | null>(null)
const documents = getCachedDocuments(knowledgeBaseId) || []
const isLoading = loadingDocuments.has(knowledgeBaseId)
useEffect(() => {
const loadData = async () => {
try {
setError(null)
await getDocuments(knowledgeBaseId)
} catch (err) {
setError(err instanceof Error ? err.message : 'Failed to load documents')
}
}
if (knowledgeBaseId && documents.length === 0 && !isLoading) {
loadData()
}
}, [knowledgeBaseId, documents.length, isLoading, getDocuments])
const refreshDocumentsData = async () => {
try {
setError(null)
await refreshDocuments(knowledgeBaseId)
} catch (err) {
setError(err instanceof Error ? err.message : 'Failed to refresh documents')
}
}
const updateDocumentLocal = (documentId: string, updates: Partial<DocumentData>) => {
updateDocument(knowledgeBaseId, documentId, updates)
}
return {
documents,
isLoading,
error,
refreshDocuments: refreshDocumentsData,
updateDocument: updateDocumentLocal,
}
}
export function useKnowledgeBasesList() {
const {
getKnowledgeBasesList,
knowledgeBasesList,
loadingKnowledgeBasesList,
addKnowledgeBase,
removeKnowledgeBase,
clearKnowledgeBasesList,
} = useKnowledgeStore()
const [error, setError] = useState<string | null>(null)
useEffect(() => {
const loadData = async () => {
try {
setError(null)
await getKnowledgeBasesList()
} catch (err) {
setError(err instanceof Error ? err.message : 'Failed to load knowledge bases')
}
}
if (knowledgeBasesList.length === 0 && !loadingKnowledgeBasesList) {
loadData()
}
}, [knowledgeBasesList.length, loadingKnowledgeBasesList, getKnowledgeBasesList])
const refreshList = async () => {
try {
setError(null)
clearKnowledgeBasesList()
await getKnowledgeBasesList()
} catch (err) {
setError(err instanceof Error ? err.message : 'Failed to refresh knowledge bases')
}
}
return {
knowledgeBases: knowledgeBasesList,
isLoading: loadingKnowledgeBasesList,
error,
refreshList,
addKnowledgeBase,
removeKnowledgeBase,
}
}
/**
* Hook to manage chunks for a specific document
*/
export function useDocumentChunks(knowledgeBaseId: string, documentId: string) {
const { getChunks, refreshChunks, updateChunk, getCachedChunks, clearChunks, isChunksLoading } =
useKnowledgeStore()
const [chunks, setChunks] = useState<ChunkData[]>([])
const [isLoading, setIsLoading] = useState(true)
const [error, setError] = useState<string | null>(null)
const [pagination, setPagination] = useState({
total: 0,
limit: 50,
offset: 0,
hasMore: false,
})
const isStoreLoading = isChunksLoading(documentId)
const combinedIsLoading = isLoading || isStoreLoading
useEffect(() => {
if (!knowledgeBaseId || !documentId) return
const cached = getCachedChunks(documentId)
if (cached) {
setChunks(cached.chunks)
setPagination(cached.pagination)
setIsLoading(false)
}
}, [knowledgeBaseId, documentId, getCachedChunks])
// Initial load
useEffect(() => {
if (!knowledgeBaseId || !documentId) return
const loadChunks = async () => {
try {
setIsLoading(true)
setError(null)
// Try to get cached chunks first
const cached = getCachedChunks(documentId)
if (cached) {
setChunks(cached.chunks)
setPagination(cached.pagination)
setIsLoading(false)
return
}
// If not cached, fetch from API
const fetchedChunks = await getChunks(knowledgeBaseId, documentId, {
limit: pagination.limit,
offset: pagination.offset,
})
setChunks(fetchedChunks)
// Update pagination from cache after fetch
const updatedCache = getCachedChunks(documentId)
if (updatedCache) {
setPagination(updatedCache.pagination)
}
} catch (err) {
setError(err instanceof Error ? err.message : 'Failed to load chunks')
} finally {
setIsLoading(false)
}
}
loadChunks()
}, [knowledgeBaseId, documentId, getChunks, getCachedChunks])
// Sync with store state changes
useEffect(() => {
const cached = getCachedChunks(documentId)
if (cached) {
setChunks(cached.chunks)
setPagination(cached.pagination)
}
}, [documentId, getCachedChunks])
useEffect(() => {
if (!isStoreLoading && isLoading) {
const cached = getCachedChunks(documentId)
if (cached) {
setIsLoading(false)
}
}
}, [isStoreLoading, isLoading, documentId, getCachedChunks])
const refreshChunksData = async (options?: {
search?: string
limit?: number
offset?: number
}) => {
try {
setIsLoading(true)
setError(null)
const fetchedChunks = await refreshChunks(knowledgeBaseId, documentId, options)
// Update local state from cache
const cached = getCachedChunks(documentId, { search: options?.search })
if (cached) {
setChunks(cached.chunks)
setPagination(cached.pagination)
}
return fetchedChunks
} catch (err) {
setError(err instanceof Error ? err.message : 'Failed to refresh chunks')
throw err
} finally {
setIsLoading(false)
}
}
const searchChunks = async (searchQuery: string) => {
try {
setIsLoading(true)
setError(null)
const searchResults = await getChunks(knowledgeBaseId, documentId, {
search: searchQuery,
limit: pagination.limit,
offset: 0, // Reset to first page for new search
})
// Update local state from cache
const cached = getCachedChunks(documentId, { search: searchQuery })
if (cached) {
setChunks(cached.chunks)
setPagination(cached.pagination)
}
return searchResults
} catch (err) {
setError(err instanceof Error ? err.message : 'Failed to search chunks')
throw err
} finally {
setIsLoading(false)
}
}
return {
chunks,
isLoading: combinedIsLoading,
error,
pagination,
refreshChunks: refreshChunksData,
searchChunks,
updateChunk: (chunkId: string, updates: Partial<ChunkData>) => {
updateChunk(documentId, chunkId, updates)
setChunks((prevChunks) =>
prevChunks.map((chunk) => (chunk.id === chunkId ? { ...chunk, ...updates } : chunk))
)
},
clearChunks: () => clearChunks(documentId),
}
}

View File

@@ -5,6 +5,7 @@ import { isSupportedFileType, parseBuffer, parseFile } from '@/lib/file-parsers'
import { createLogger } from '@/lib/logs/console-logger'
import { type CustomS3Config, getPresignedUrlWithConfig, uploadToS3 } from '@/lib/uploads/s3-client'
import { mistralParserTool } from '@/tools/mistral/parser'
import { retryWithExponentialBackoff } from './utils'
const logger = createLogger('DocumentProcessor')
@@ -13,6 +14,16 @@ const S3_KB_CONFIG: CustomS3Config = {
region: env.AWS_REGION || '',
}
class APIError extends Error {
public status: number
constructor(message: string, status: number) {
super(message)
this.name = 'APIError'
this.status = status
}
}
export interface ProcessedDocument {
content: string
chunks: RecursiveChunk[]
@@ -127,16 +138,39 @@ async function parseDocument(
resultType: 'text',
})
// Make the actual API call to Mistral
const response = await fetch('https://api.mistral.ai/v1/ocr', {
method: mistralParserTool.request.method,
headers: mistralParserTool.request.headers({
filePath: httpsUrl,
apiKey: mistralApiKey,
resultType: 'text',
}),
body: JSON.stringify(requestBody),
})
// Make the actual API call to Mistral with retry logic
const response = await retryWithExponentialBackoff(
async () => {
logger.info(`Calling Mistral OCR API for "${filename}"`)
const response = await fetch('https://api.mistral.ai/v1/ocr', {
method: mistralParserTool.request.method,
headers: mistralParserTool.request.headers({
filePath: httpsUrl,
apiKey: mistralApiKey,
resultType: 'text',
}),
body: JSON.stringify(requestBody),
})
if (!response.ok) {
const errorText = await response.text()
const error = new APIError(
`Mistral API error: ${response.status} ${response.statusText} - ${errorText}`,
response.status
)
throw error
}
return response
},
{
maxRetries: 5,
initialDelayMs: 2000, // Start with 2 seconds for Mistral OCR
maxDelayMs: 120000, // Max 2 minutes delay for OCR processing
backoffMultiplier: 2,
}
)
if (!mistralParserTool.transformResponse) {
throw new Error('Mistral parser transform function not available')
@@ -158,6 +192,7 @@ async function parseDocument(
s3Url,
}
}
// Use file parser for other supported types
let content: string

View File

@@ -0,0 +1,149 @@
import { createLogger } from '@/lib/logs/console-logger'
const logger = createLogger('RetryUtils')
interface HTTPError extends Error {
status?: number
statusText?: string
}
type RetryableError = HTTPError | Error | { status?: number; message?: string }
export interface RetryOptions {
maxRetries?: number
initialDelayMs?: number
maxDelayMs?: number
backoffMultiplier?: number
retryCondition?: (error: RetryableError) => boolean
}
export interface RetryResult<T> {
success: boolean
data?: T
error?: Error
attemptCount: number
}
function hasStatus(
error: RetryableError
): error is HTTPError | { status?: number; message?: string } {
return typeof error === 'object' && error !== null && 'status' in error
}
/**
* Default retry condition for rate limiting errors
*/
export function isRetryableError(error: RetryableError): boolean {
if (!error) return false
// Check for rate limiting status codes
if (
hasStatus(error) &&
(error.status === 429 || error.status === 502 || error.status === 503 || error.status === 504)
) {
return true
}
// Check for rate limiting in error messages
const errorMessage = error.message || error.toString()
const rateLimitKeywords = [
'rate limit',
'rate_limit',
'too many requests',
'quota exceeded',
'throttled',
'retry after',
'temporarily unavailable',
'service unavailable',
]
return rateLimitKeywords.some((keyword) => errorMessage.toLowerCase().includes(keyword))
}
/**
* Executes a function with exponential backoff retry logic
*/
export async function retryWithExponentialBackoff<T>(
operation: () => Promise<T>,
options: RetryOptions = {}
): Promise<T> {
const {
maxRetries = 5,
initialDelayMs = 1000,
maxDelayMs = 30000,
backoffMultiplier = 2,
retryCondition = isRetryableError,
} = options
let lastError: Error | undefined
let delay = initialDelayMs
for (let attempt = 0; attempt <= maxRetries; attempt++) {
try {
logger.debug(`Executing operation attempt ${attempt + 1}/${maxRetries + 1}`)
const result = await operation()
if (attempt > 0) {
logger.info(`Operation succeeded after ${attempt + 1} attempts`)
}
return result
} catch (error) {
lastError = error instanceof Error ? error : new Error(String(error))
logger.warn(`Operation failed on attempt ${attempt + 1}`, { error })
// If this is the last attempt, throw the error
if (attempt === maxRetries) {
logger.error(`Operation failed after ${maxRetries + 1} attempts`, { error })
throw lastError
}
// Check if error is retryable
if (!retryCondition(error as RetryableError)) {
logger.warn('Error is not retryable, throwing immediately', { error })
throw lastError
}
// Add jitter to prevent thundering herd
const jitter = Math.random() * 0.1 * delay
const actualDelay = Math.min(delay + jitter, maxDelayMs)
logger.info(
`Retrying in ${Math.round(actualDelay)}ms (attempt ${attempt + 1}/${maxRetries + 1})`
)
await new Promise((resolve) => setTimeout(resolve, actualDelay))
// Exponential backoff
delay = Math.min(delay * backoffMultiplier, maxDelayMs)
}
}
throw lastError || new Error('Retry operation failed')
}
/**
* Wrapper for fetch requests with retry logic
*/
export async function fetchWithRetry(
url: string,
options: RequestInit = {},
retryOptions: RetryOptions = {}
): Promise<Response> {
return retryWithExponentialBackoff(async () => {
const response = await fetch(url, options)
// If response is not ok and status indicates rate limiting, throw an error
if (!response.ok && isRetryableError({ status: response.status })) {
const errorText = await response.text()
const error: HTTPError = new Error(
`HTTP ${response.status}: ${response.statusText} - ${errorText}`
)
error.status = response.status
error.statusText = response.statusText
throw error
}
return response
}, retryOptions)
}

View File

@@ -91,7 +91,7 @@ describe('S3 Client', () => {
Body: testFile,
ContentType: contentType,
Metadata: {
originalName: fileName,
originalName: encodeURIComponent(fileName),
uploadedAt: expect.any(String),
},
})

View File

@@ -121,7 +121,7 @@ export async function uploadToS3(
ContentType: contentType,
// Add some useful metadata
Metadata: {
originalName: fileName,
originalName: encodeURIComponent(fileName), // Encode filename to prevent invalid characters in HTTP headers
uploadedAt: new Date().toISOString(),
},
})

View File

@@ -0,0 +1,767 @@
import { create } from 'zustand'
import { createLogger } from '@/lib/logs/console-logger'
const logger = createLogger('KnowledgeStore')
export interface ChunkingConfig {
chunkSize?: number
minCharactersPerChunk?: number
recipe?: string
lang?: string
overlapTokens?: number
strategy?: 'recursive' | 'semantic' | 'sentence' | 'paragraph'
[key: string]: unknown
}
export interface KnowledgeBaseData {
id: string
name: string
description?: string
tokenCount: number
embeddingModel: string
embeddingDimension: number
chunkingConfig: ChunkingConfig
createdAt: string
updatedAt: string
workspaceId?: string
}
export interface DocumentData {
id: string
knowledgeBaseId: string
filename: string
fileUrl: string
fileSize: number
mimeType: string
fileHash?: string | null
chunkCount: number
tokenCount: number
characterCount: number
processingStatus: 'pending' | 'processing' | 'completed' | 'failed'
processingStartedAt?: string | null
processingCompletedAt?: string | null
processingError?: string | null
enabled: boolean
uploadedAt: string
}
export interface ChunkData {
id: string
chunkIndex: number
content: string
contentLength: number
tokenCount: number
enabled: boolean
startOffset: number
endOffset: number
overlapTokens: number
metadata: Record<string, unknown>
searchRank: string
qualityScore: string | null
createdAt: string
updatedAt: string
}
export interface ChunksPagination {
total: number
limit: number
offset: number
hasMore: boolean
}
export interface ChunksCache {
chunks: ChunkData[]
pagination: ChunksPagination
searchQuery?: string
lastFetchTime: number
}
interface KnowledgeStore {
// State
knowledgeBases: Record<string, KnowledgeBaseData>
documents: Record<string, DocumentData[]> // knowledgeBaseId -> documents
chunks: Record<string, ChunksCache> // documentId -> chunks cache
knowledgeBasesList: KnowledgeBaseData[]
// Loading states
loadingKnowledgeBases: Set<string>
loadingDocuments: Set<string>
loadingChunks: Set<string>
loadingKnowledgeBasesList: boolean
// Actions
getKnowledgeBase: (id: string) => Promise<KnowledgeBaseData | null>
getDocuments: (knowledgeBaseId: string) => Promise<DocumentData[]>
getChunks: (
knowledgeBaseId: string,
documentId: string,
options?: { search?: string; limit?: number; offset?: number }
) => Promise<ChunkData[]>
getKnowledgeBasesList: () => Promise<KnowledgeBaseData[]>
refreshDocuments: (knowledgeBaseId: string) => Promise<DocumentData[]>
refreshChunks: (
knowledgeBaseId: string,
documentId: string,
options?: { search?: string; limit?: number; offset?: number }
) => Promise<ChunkData[]>
updateDocument: (
knowledgeBaseId: string,
documentId: string,
updates: Partial<DocumentData>
) => void
updateChunk: (documentId: string, chunkId: string, updates: Partial<ChunkData>) => void
addPendingDocuments: (knowledgeBaseId: string, documents: DocumentData[]) => void
addKnowledgeBase: (knowledgeBase: KnowledgeBaseData) => void
removeKnowledgeBase: (id: string) => void
removeDocument: (knowledgeBaseId: string, documentId: string) => void
clearDocuments: (knowledgeBaseId: string) => void
clearChunks: (documentId: string) => void
clearKnowledgeBasesList: () => void
// Getters
getCachedKnowledgeBase: (id: string) => KnowledgeBaseData | null
getCachedDocuments: (knowledgeBaseId: string) => DocumentData[] | null
getCachedChunks: (documentId: string, options?: { search?: string }) => ChunksCache | null
// Loading state getters
isKnowledgeBaseLoading: (id: string) => boolean
isDocumentsLoading: (knowledgeBaseId: string) => boolean
isChunksLoading: (documentId: string) => boolean
}
export const useKnowledgeStore = create<KnowledgeStore>((set, get) => ({
knowledgeBases: {},
documents: {},
chunks: {},
knowledgeBasesList: [],
loadingKnowledgeBases: new Set(),
loadingDocuments: new Set(),
loadingChunks: new Set(),
loadingKnowledgeBasesList: false,
getCachedKnowledgeBase: (id: string) => {
return get().knowledgeBases[id] || null
},
getCachedDocuments: (knowledgeBaseId: string) => {
return get().documents[knowledgeBaseId] || null
},
getCachedChunks: (documentId: string, options?: { search?: string }) => {
return get().chunks[documentId] || null
},
isKnowledgeBaseLoading: (id: string) => {
return get().loadingKnowledgeBases.has(id)
},
isDocumentsLoading: (knowledgeBaseId: string) => {
return get().loadingDocuments.has(knowledgeBaseId)
},
isChunksLoading: (documentId: string) => {
return get().loadingChunks.has(documentId)
},
getKnowledgeBase: async (id: string) => {
const state = get()
// Return cached data if it exists
const cached = state.knowledgeBases[id]
if (cached) {
return cached
}
// Return cached data if already loading to prevent duplicate requests
if (state.loadingKnowledgeBases.has(id)) {
return null
}
try {
set((state) => ({
loadingKnowledgeBases: new Set([...state.loadingKnowledgeBases, id]),
}))
const response = await fetch(`/api/knowledge/${id}`)
if (!response.ok) {
throw new Error(`Failed to fetch knowledge base: ${response.statusText}`)
}
const result = await response.json()
if (!result.success) {
throw new Error(result.error || 'Failed to fetch knowledge base')
}
const knowledgeBase = result.data
set((state) => ({
knowledgeBases: {
...state.knowledgeBases,
[id]: knowledgeBase,
},
loadingKnowledgeBases: new Set(
[...state.loadingKnowledgeBases].filter((loadingId) => loadingId !== id)
),
}))
logger.info(`Knowledge base loaded: ${id}`)
return knowledgeBase
} catch (error) {
logger.error(`Error fetching knowledge base ${id}:`, error)
set((state) => ({
loadingKnowledgeBases: new Set(
[...state.loadingKnowledgeBases].filter((loadingId) => loadingId !== id)
),
}))
throw error
}
},
getDocuments: async (knowledgeBaseId: string) => {
const state = get()
// Return cached documents if they exist
const cached = state.documents[knowledgeBaseId]
if (cached) {
return cached
}
// Return empty array if already loading to prevent duplicate requests
if (state.loadingDocuments.has(knowledgeBaseId)) {
return []
}
try {
set((state) => ({
loadingDocuments: new Set([...state.loadingDocuments, knowledgeBaseId]),
}))
const response = await fetch(`/api/knowledge/${knowledgeBaseId}/documents`)
if (!response.ok) {
throw new Error(`Failed to fetch documents: ${response.statusText}`)
}
const result = await response.json()
if (!result.success) {
throw new Error(result.error || 'Failed to fetch documents')
}
const documents = result.data
set((state) => ({
documents: {
...state.documents,
[knowledgeBaseId]: documents,
},
loadingDocuments: new Set(
[...state.loadingDocuments].filter((loadingId) => loadingId !== knowledgeBaseId)
),
}))
logger.info(`Documents loaded for knowledge base: ${knowledgeBaseId}`)
return documents
} catch (error) {
logger.error(`Error fetching documents for knowledge base ${knowledgeBaseId}:`, error)
set((state) => ({
loadingDocuments: new Set(
[...state.loadingDocuments].filter((loadingId) => loadingId !== knowledgeBaseId)
),
}))
throw error
}
},
getChunks: async (
knowledgeBaseId: string,
documentId: string,
options?: { search?: string; limit?: number; offset?: number }
) => {
const state = get()
// Return cached chunks if they exist and match the search criteria
const cached = state.chunks[documentId]
if (cached && cached.searchQuery === options?.search) {
return cached.chunks
}
// Return empty array if already loading to prevent duplicate requests
if (state.loadingChunks.has(documentId)) {
return cached?.chunks || []
}
try {
set((state) => ({
loadingChunks: new Set([...state.loadingChunks, documentId]),
}))
// Build query parameters
const params = new URLSearchParams()
if (options?.search) params.set('search', options.search)
if (options?.limit) params.set('limit', options.limit.toString())
if (options?.offset) params.set('offset', options.offset.toString())
const response = await fetch(
`/api/knowledge/${knowledgeBaseId}/documents/${documentId}/chunks?${params.toString()}`
)
if (!response.ok) {
throw new Error(`Failed to fetch chunks: ${response.statusText}`)
}
const result = await response.json()
if (!result.success) {
throw new Error(result.error || 'Failed to fetch chunks')
}
const chunks = result.data
const pagination = result.pagination
set((state) => ({
chunks: {
...state.chunks,
[documentId]: {
chunks,
pagination: {
total: pagination?.total || chunks.length,
limit: pagination?.limit || options?.limit || 50,
offset: pagination?.offset || options?.offset || 0,
hasMore: pagination?.hasMore || false,
},
searchQuery: options?.search,
lastFetchTime: Date.now(),
},
},
loadingChunks: new Set(
[...state.loadingChunks].filter((loadingId) => loadingId !== documentId)
),
}))
logger.info(`Chunks loaded for document: ${documentId}`)
return chunks
} catch (error) {
logger.error(`Error fetching chunks for document ${documentId}:`, error)
set((state) => ({
loadingChunks: new Set(
[...state.loadingChunks].filter((loadingId) => loadingId !== documentId)
),
}))
throw error
}
},
getKnowledgeBasesList: async () => {
const state = get()
// Return cached list if it exists
if (state.knowledgeBasesList.length > 0) {
return state.knowledgeBasesList
}
// Return cached data if already loading
if (state.loadingKnowledgeBasesList) {
return state.knowledgeBasesList
}
try {
set({ loadingKnowledgeBasesList: true })
const response = await fetch('/api/knowledge')
if (!response.ok) {
throw new Error(`Failed to fetch knowledge bases: ${response.statusText}`)
}
const result = await response.json()
if (!result.success) {
throw new Error(result.error || 'Failed to fetch knowledge bases')
}
const knowledgeBasesList = result.data
set({
knowledgeBasesList,
loadingKnowledgeBasesList: false,
})
logger.info(`Knowledge bases list loaded: ${knowledgeBasesList.length} items`)
return knowledgeBasesList
} catch (error) {
logger.error('Error fetching knowledge bases list:', error)
set({ loadingKnowledgeBasesList: false })
throw error
}
},
refreshDocuments: async (knowledgeBaseId: string) => {
const state = get()
// Return empty array if already loading to prevent duplicate requests
if (state.loadingDocuments.has(knowledgeBaseId)) {
return state.documents[knowledgeBaseId] || []
}
try {
set((state) => ({
loadingDocuments: new Set([...state.loadingDocuments, knowledgeBaseId]),
}))
const response = await fetch(`/api/knowledge/${knowledgeBaseId}/documents`)
if (!response.ok) {
throw new Error(`Failed to fetch documents: ${response.statusText}`)
}
const result = await response.json()
if (!result.success) {
throw new Error(result.error || 'Failed to fetch documents')
}
const documents = result.data
set((state) => {
// Merge with existing documents, being smart about when to use server data vs local optimistic updates
const currentDocuments = state.documents[knowledgeBaseId] || []
// For each fetched document, decide whether to use server data or preserve local state
const mergedDocuments = documents.map((fetchedDoc: DocumentData) => {
const existingDoc = currentDocuments.find((doc) => doc.id === fetchedDoc.id)
if (!existingDoc) {
// New document from server, use it as-is
return fetchedDoc
}
// If processing status is different, generally prefer server data for these transitions:
if (existingDoc.processingStatus !== fetchedDoc.processingStatus) {
// Always allow these status progressions from server:
// pending -> processing, pending -> completed, pending -> failed
// processing -> completed, processing -> failed
const allowedTransitions = [
{ from: 'pending', to: 'processing' },
{ from: 'pending', to: 'completed' },
{ from: 'pending', to: 'failed' },
{ from: 'processing', to: 'completed' },
{ from: 'processing', to: 'failed' },
]
const transition = allowedTransitions.find(
(t) => t.from === existingDoc.processingStatus && t.to === fetchedDoc.processingStatus
)
if (transition) {
return fetchedDoc
}
}
const existingHasTimestamps =
existingDoc.processingStartedAt || existingDoc.processingCompletedAt
const fetchedHasTimestamps =
fetchedDoc.processingStartedAt || fetchedDoc.processingCompletedAt
if (fetchedHasTimestamps && !existingHasTimestamps) {
return fetchedDoc
}
// If the server document has updated stats (chunk count, token count, etc.), use it
if (
fetchedDoc.processingStatus === 'completed' &&
(fetchedDoc.chunkCount !== existingDoc.chunkCount ||
fetchedDoc.tokenCount !== existingDoc.tokenCount ||
fetchedDoc.characterCount !== existingDoc.characterCount)
) {
return fetchedDoc
}
// Otherwise, preserve the existing document (keeps optimistic updates)
return existingDoc
})
// Add any new documents that weren't in the existing set
const newDocuments = documents.filter(
(fetchedDoc: DocumentData) => !currentDocuments.find((doc) => doc.id === fetchedDoc.id)
)
return {
documents: {
...state.documents,
[knowledgeBaseId]: [...mergedDocuments, ...newDocuments],
},
loadingDocuments: new Set(
[...state.loadingDocuments].filter((loadingId) => loadingId !== knowledgeBaseId)
),
}
})
logger.info(`Documents refreshed for knowledge base: ${knowledgeBaseId}`)
return documents
} catch (error) {
logger.error(`Error refreshing documents for knowledge base ${knowledgeBaseId}:`, error)
set((state) => ({
loadingDocuments: new Set(
[...state.loadingDocuments].filter((loadingId) => loadingId !== knowledgeBaseId)
),
}))
throw error
}
},
refreshChunks: async (
knowledgeBaseId: string,
documentId: string,
options?: { search?: string; limit?: number; offset?: number }
) => {
const state = get()
// Return cached chunks if already loading to prevent duplicate requests
if (state.loadingChunks.has(documentId)) {
return state.chunks[documentId]?.chunks || []
}
try {
set((state) => ({
loadingChunks: new Set([...state.loadingChunks, documentId]),
}))
// Build query parameters
const params = new URLSearchParams()
if (options?.search) params.set('search', options.search)
if (options?.limit) params.set('limit', options.limit.toString())
if (options?.offset) params.set('offset', options.offset.toString())
const response = await fetch(
`/api/knowledge/${knowledgeBaseId}/documents/${documentId}/chunks?${params.toString()}`
)
if (!response.ok) {
throw new Error(`Failed to fetch chunks: ${response.statusText}`)
}
const result = await response.json()
if (!result.success) {
throw new Error(result.error || 'Failed to fetch chunks')
}
const chunks = result.data
const pagination = result.pagination
set((state) => {
// Get existing chunks if any
const existingCache = state.chunks[documentId]
const currentChunks = existingCache?.chunks || []
// For each fetched chunk, decide whether to use server data or preserve local state
const mergedChunks = chunks.map((fetchedChunk: ChunkData) => {
const existingChunk = currentChunks.find((chunk) => chunk.id === fetchedChunk.id)
if (!existingChunk) {
// New chunk from server, use it as-is
return fetchedChunk
}
// If server chunk has different content or metadata, prefer it (indicates server update)
if (
fetchedChunk.content !== existingChunk.content ||
JSON.stringify(fetchedChunk.metadata) !== JSON.stringify(existingChunk.metadata)
) {
return fetchedChunk
}
// If server chunk has different enabled status, quality score, or other properties, prefer it
if (
fetchedChunk.enabled !== existingChunk.enabled ||
fetchedChunk.qualityScore !== existingChunk.qualityScore
) {
return fetchedChunk
}
// Otherwise, preserve the existing chunk (keeps optimistic updates)
return existingChunk
})
// Add any new chunks that weren't in the existing set
const newChunks = chunks.filter(
(fetchedChunk: ChunkData) => !currentChunks.find((chunk) => chunk.id === fetchedChunk.id)
)
return {
chunks: {
...state.chunks,
[documentId]: {
chunks: [...mergedChunks, ...newChunks],
pagination: {
total: pagination?.total || chunks.length,
limit: pagination?.limit || options?.limit || 50,
offset: pagination?.offset || options?.offset || 0,
hasMore: pagination?.hasMore || false,
},
searchQuery: options?.search,
lastFetchTime: Date.now(),
},
},
loadingChunks: new Set(
[...state.loadingChunks].filter((loadingId) => loadingId !== documentId)
),
}
})
logger.info(`Chunks refreshed for document: ${documentId}`)
return chunks
} catch (error) {
logger.error(`Error refreshing chunks for document ${documentId}:`, error)
set((state) => ({
loadingChunks: new Set(
[...state.loadingChunks].filter((loadingId) => loadingId !== documentId)
),
}))
throw error
}
},
updateDocument: (knowledgeBaseId: string, documentId: string, updates: Partial<DocumentData>) => {
set((state) => {
const documents = state.documents[knowledgeBaseId]
if (!documents) return state
const updatedDocuments = documents.map((doc) =>
doc.id === documentId ? { ...doc, ...updates } : doc
)
return {
documents: {
...state.documents,
[knowledgeBaseId]: updatedDocuments,
},
}
})
},
updateChunk: (documentId: string, chunkId: string, updates: Partial<ChunkData>) => {
set((state) => {
const cachedChunks = state.chunks[documentId]
if (!cachedChunks || !cachedChunks.chunks) return state
const updatedChunks = cachedChunks.chunks.map((chunk) =>
chunk.id === chunkId ? { ...chunk, ...updates } : chunk
)
return {
chunks: {
...state.chunks,
[documentId]: {
...cachedChunks,
chunks: updatedChunks,
},
},
}
})
},
addPendingDocuments: (knowledgeBaseId: string, newDocuments: DocumentData[]) => {
set((state) => {
const existingDocuments = state.documents[knowledgeBaseId] || []
const updatedDocuments = [...existingDocuments, ...newDocuments]
return {
documents: {
...state.documents,
[knowledgeBaseId]: updatedDocuments,
},
}
})
logger.info(
`Added ${newDocuments.length} pending documents for knowledge base: ${knowledgeBaseId}`
)
},
addKnowledgeBase: (knowledgeBase: KnowledgeBaseData) => {
set((state) => ({
knowledgeBases: {
...state.knowledgeBases,
[knowledgeBase.id]: knowledgeBase,
},
knowledgeBasesList: [knowledgeBase, ...state.knowledgeBasesList],
}))
logger.info(`Knowledge base added: ${knowledgeBase.id}`)
},
removeKnowledgeBase: (id: string) => {
set((state) => {
const newKnowledgeBases = { ...state.knowledgeBases }
delete newKnowledgeBases[id]
const newDocuments = { ...state.documents }
delete newDocuments[id]
return {
knowledgeBases: newKnowledgeBases,
documents: newDocuments,
knowledgeBasesList: state.knowledgeBasesList.filter((kb) => kb.id !== id),
}
})
logger.info(`Knowledge base removed: ${id}`)
},
removeDocument: (knowledgeBaseId: string, documentId: string) => {
set((state) => {
const documents = state.documents[knowledgeBaseId]
if (!documents) return state
const updatedDocuments = documents.filter((doc) => doc.id !== documentId)
// Also clear chunks for the removed document
const newChunks = { ...state.chunks }
delete newChunks[documentId]
return {
documents: {
...state.documents,
[knowledgeBaseId]: updatedDocuments,
},
chunks: newChunks,
}
})
logger.info(`Document removed from knowledge base: ${documentId}`)
},
clearDocuments: (knowledgeBaseId: string) => {
set((state) => {
const newDocuments = { ...state.documents }
delete newDocuments[knowledgeBaseId]
return { documents: newDocuments }
})
logger.info(`Documents cleared for knowledge base: ${knowledgeBaseId}`)
},
clearChunks: (documentId: string) => {
set((state) => {
const newChunks = { ...state.chunks }
delete newChunks[documentId]
return { chunks: newChunks }
})
logger.info(`Chunks cleared for document: ${documentId}`)
},
clearKnowledgeBasesList: () => {
set({ knowledgeBasesList: [] })
logger.info('Knowledge bases list cleared')
},
}))

View File

@@ -964,7 +964,9 @@ async function generateBlockDoc(blockPath: string, icons: Record<string, string>
// Skip blocks with category 'blocks' (except memory type), and skip specific blocks
if (
(blockConfig.category === 'blocks' && blockConfig.type !== 'memory') ||
(blockConfig.category === 'blocks' &&
blockConfig.type !== 'memory' &&
blockConfig.type !== 'knowledge') ||
blockConfig.type === 'evaluator' ||
blockConfig.type === 'number'
) {