feat(docs): reindex docs on change (#863)

* Add github action to index docs

* Add new line

* Update .github/workflows/docs-embeddings.yml

Co-authored-by: greptile-apps[bot] <165735046+greptile-apps[bot]@users.noreply.github.com>

* Updates

---------

Co-authored-by: greptile-apps[bot] <165735046+greptile-apps[bot]@users.noreply.github.com>
This commit is contained in:
Siddharth Ganesan
2025-08-04 13:26:44 -07:00
committed by GitHub
parent e71a736400
commit 4819b88ac1
2 changed files with 58 additions and 20 deletions

38
.github/workflows/docs-embeddings.yml vendored Normal file
View File

@@ -0,0 +1,38 @@
name: Process Docs Embeddings
on:
push:
branches: [main, staging]
paths:
- 'apps/docs/**'
workflow_dispatch: # Allow manual triggering
jobs:
process-docs-embeddings:
name: Process Documentation Embeddings
runs-on: ubuntu-latest
if: github.ref == 'refs/heads/main' || github.ref == 'refs/heads/staging'
steps:
- name: Checkout code
uses: actions/checkout@v4
- name: Setup Bun
uses: oven-sh/setup-bun@v2
with:
bun-version: latest
- name: Setup Node
uses: actions/setup-node@v4
with:
node-version: latest
- name: Install dependencies
run: bun install
- name: Process docs embeddings
working-directory: ./apps/sim
env:
DATABASE_URL: ${{ github.ref == 'refs/heads/main' && secrets.DATABASE_URL || secrets.STAGING_DATABASE_URL }}
OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }}
run: bun run scripts/process-docs-embeddings.ts --clear

View File

@@ -53,7 +53,26 @@ async function processDocsEmbeddings(options: ProcessingOptions = {}) {
clearExisting: config.clearExisting,
})
// Clear existing embeddings if requested
// Initialize the docs chunker
const chunker = new DocsChunker({
chunkSize: config.chunkSize,
minChunkSize: config.minChunkSize,
overlap: config.overlap,
baseUrl: config.baseUrl,
})
// Process all .mdx files first (compute embeddings before clearing)
logger.info(`📚 Processing docs from: ${config.docsPath}`)
const chunks = await chunker.chunkAllDocs(config.docsPath)
if (chunks.length === 0) {
logger.warn('⚠️ No chunks generated from docs')
return { success: false, processedChunks: 0, failedChunks: 0 }
}
logger.info(`📊 Generated ${chunks.length} chunks with embeddings`)
// Clear existing embeddings if requested (after computing new ones to minimize downtime)
if (config.clearExisting) {
logger.info('🗑️ Clearing existing docs embeddings...')
try {
@@ -65,25 +84,6 @@ async function processDocsEmbeddings(options: ProcessingOptions = {}) {
}
}
// Initialize the docs chunker
const chunker = new DocsChunker({
chunkSize: config.chunkSize,
minChunkSize: config.minChunkSize,
overlap: config.overlap,
baseUrl: config.baseUrl,
})
// Process all .mdx files
logger.info(`📚 Processing docs from: ${config.docsPath}`)
const chunks = await chunker.chunkAllDocs(config.docsPath)
if (chunks.length === 0) {
logger.warn('⚠️ No chunks generated from docs')
return { success: false, processedChunks: 0, failedChunks: 0 }
}
logger.info(`📊 Generated ${chunks.length} chunks with embeddings`)
// Save chunks to database in batches for better performance
const batchSize = 10
logger.info(`💾 Saving chunks to database (batch size: ${batchSize})...`)