mirror of
https://github.com/simstudioai/sim.git
synced 2026-01-10 07:27:57 -05:00
feat(docs): reindex docs on change (#863)
* Add github action to index docs * Add new line * Update .github/workflows/docs-embeddings.yml Co-authored-by: greptile-apps[bot] <165735046+greptile-apps[bot]@users.noreply.github.com> * Updates --------- Co-authored-by: greptile-apps[bot] <165735046+greptile-apps[bot]@users.noreply.github.com>
This commit is contained in:
committed by
GitHub
parent
e71a736400
commit
4819b88ac1
38
.github/workflows/docs-embeddings.yml
vendored
Normal file
38
.github/workflows/docs-embeddings.yml
vendored
Normal file
@@ -0,0 +1,38 @@
|
||||
name: Process Docs Embeddings
|
||||
|
||||
on:
|
||||
push:
|
||||
branches: [main, staging]
|
||||
paths:
|
||||
- 'apps/docs/**'
|
||||
workflow_dispatch: # Allow manual triggering
|
||||
|
||||
jobs:
|
||||
process-docs-embeddings:
|
||||
name: Process Documentation Embeddings
|
||||
runs-on: ubuntu-latest
|
||||
if: github.ref == 'refs/heads/main' || github.ref == 'refs/heads/staging'
|
||||
|
||||
steps:
|
||||
- name: Checkout code
|
||||
uses: actions/checkout@v4
|
||||
|
||||
- name: Setup Bun
|
||||
uses: oven-sh/setup-bun@v2
|
||||
with:
|
||||
bun-version: latest
|
||||
|
||||
- name: Setup Node
|
||||
uses: actions/setup-node@v4
|
||||
with:
|
||||
node-version: latest
|
||||
|
||||
- name: Install dependencies
|
||||
run: bun install
|
||||
|
||||
- name: Process docs embeddings
|
||||
working-directory: ./apps/sim
|
||||
env:
|
||||
DATABASE_URL: ${{ github.ref == 'refs/heads/main' && secrets.DATABASE_URL || secrets.STAGING_DATABASE_URL }}
|
||||
OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }}
|
||||
run: bun run scripts/process-docs-embeddings.ts --clear
|
||||
@@ -53,7 +53,26 @@ async function processDocsEmbeddings(options: ProcessingOptions = {}) {
|
||||
clearExisting: config.clearExisting,
|
||||
})
|
||||
|
||||
// Clear existing embeddings if requested
|
||||
// Initialize the docs chunker
|
||||
const chunker = new DocsChunker({
|
||||
chunkSize: config.chunkSize,
|
||||
minChunkSize: config.minChunkSize,
|
||||
overlap: config.overlap,
|
||||
baseUrl: config.baseUrl,
|
||||
})
|
||||
|
||||
// Process all .mdx files first (compute embeddings before clearing)
|
||||
logger.info(`📚 Processing docs from: ${config.docsPath}`)
|
||||
const chunks = await chunker.chunkAllDocs(config.docsPath)
|
||||
|
||||
if (chunks.length === 0) {
|
||||
logger.warn('⚠️ No chunks generated from docs')
|
||||
return { success: false, processedChunks: 0, failedChunks: 0 }
|
||||
}
|
||||
|
||||
logger.info(`📊 Generated ${chunks.length} chunks with embeddings`)
|
||||
|
||||
// Clear existing embeddings if requested (after computing new ones to minimize downtime)
|
||||
if (config.clearExisting) {
|
||||
logger.info('🗑️ Clearing existing docs embeddings...')
|
||||
try {
|
||||
@@ -65,25 +84,6 @@ async function processDocsEmbeddings(options: ProcessingOptions = {}) {
|
||||
}
|
||||
}
|
||||
|
||||
// Initialize the docs chunker
|
||||
const chunker = new DocsChunker({
|
||||
chunkSize: config.chunkSize,
|
||||
minChunkSize: config.minChunkSize,
|
||||
overlap: config.overlap,
|
||||
baseUrl: config.baseUrl,
|
||||
})
|
||||
|
||||
// Process all .mdx files
|
||||
logger.info(`📚 Processing docs from: ${config.docsPath}`)
|
||||
const chunks = await chunker.chunkAllDocs(config.docsPath)
|
||||
|
||||
if (chunks.length === 0) {
|
||||
logger.warn('⚠️ No chunks generated from docs')
|
||||
return { success: false, processedChunks: 0, failedChunks: 0 }
|
||||
}
|
||||
|
||||
logger.info(`📊 Generated ${chunks.length} chunks with embeddings`)
|
||||
|
||||
// Save chunks to database in batches for better performance
|
||||
const batchSize = 10
|
||||
logger.info(`💾 Saving chunks to database (batch size: ${batchSize})...`)
|
||||
|
||||
Reference in New Issue
Block a user