diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 3dc2b0025..3db18c5d4 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -198,10 +198,30 @@ jobs: "${IMAGE_BASE}:${{ github.sha }}-arm64" docker manifest push "${IMAGE_BASE}:${{ github.sha }}" - # Process docs embeddings (after ECR images are pushed) + # Check if docs changed + check-docs-changes: + name: Check Docs Changes + runs-on: blacksmith-4vcpu-ubuntu-2404 + if: github.event_name == 'push' && github.ref == 'refs/heads/main' + outputs: + docs_changed: ${{ steps.filter.outputs.docs }} + steps: + - uses: actions/checkout@v4 + with: + fetch-depth: 2 # Need at least 2 commits to detect changes + - uses: dorny/paths-filter@v3 + id: filter + with: + filters: | + docs: + - 'apps/docs/content/docs/en/**' + - 'apps/sim/scripts/process-docs.ts' + - 'apps/sim/lib/chunkers/**' + + # Process docs embeddings (only when docs change, after ECR images are pushed) process-docs: name: Process Docs - needs: build-amd64 - if: github.event_name == 'push' && (github.ref == 'refs/heads/main' || github.ref == 'refs/heads/staging') + needs: [build-amd64, check-docs-changes] + if: needs.check-docs-changes.outputs.docs_changed == 'true' uses: ./.github/workflows/docs-embeddings.yml secrets: inherit diff --git a/.github/workflows/docs-embeddings.yml b/.github/workflows/docs-embeddings.yml index 1399b8502..35cd540cd 100644 --- a/.github/workflows/docs-embeddings.yml +++ b/.github/workflows/docs-embeddings.yml @@ -8,7 +8,7 @@ jobs: process-docs-embeddings: name: Process Documentation Embeddings runs-on: blacksmith-8vcpu-ubuntu-2404 - if: github.ref == 'refs/heads/main' || github.ref == 'refs/heads/staging' + if: github.ref == 'refs/heads/main' steps: - name: Checkout code @@ -41,6 +41,6 @@ jobs: - name: Process docs embeddings working-directory: ./apps/sim env: - DATABASE_URL: ${{ github.ref == 'refs/heads/main' && secrets.DATABASE_URL || secrets.STAGING_DATABASE_URL }} + DATABASE_URL: ${{ secrets.DATABASE_URL }} OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }} run: bun run scripts/process-docs.ts --clear diff --git a/apps/sim/scripts/process-docs.ts b/apps/sim/scripts/process-docs.ts index 86e06ffd6..029594df9 100644 --- a/apps/sim/scripts/process-docs.ts +++ b/apps/sim/scripts/process-docs.ts @@ -34,7 +34,7 @@ interface ProcessingOptions { */ async function processDocs(options: ProcessingOptions = {}) { const config = { - docsPath: options.docsPath || path.join(process.cwd(), '../../apps/docs/content/docs'), + docsPath: options.docsPath || path.join(process.cwd(), '../../apps/docs/content/docs/en'), baseUrl: options.baseUrl || (isDev ? 'http://localhost:4000' : 'https://docs.sim.ai'), chunkSize: options.chunkSize || 1024, minChunkSize: options.minChunkSize || 100, @@ -216,25 +216,31 @@ async function main() { Usage: bun run process-docs.ts [options] +By default, processes English (en) documentation only. +Note: Use --clear flag when changing language scope to remove old embeddings. + Options: --clear Clear existing embeddings before processing --dry-run Process and display results without saving to DB --verbose Show detailed output including text previews - --path Custom path to docs directory + --path Custom path to docs directory (default: docs/en) --url Custom base URL for links --chunk-size Custom chunk size in tokens (default: 1024) --help, -h Show this help message Examples: - # Dry run to test chunking + # Dry run to test chunking (English docs) bun run process-docs.ts --dry-run - # Process and save to database + # Process and save to database (English docs) bun run process-docs.ts - # Clear existing and reprocess + # Clear existing and reprocess (English docs) bun run process-docs.ts --clear + # Process a different language + bun run process-docs.ts --path ../../apps/docs/content/docs/es + # Custom path with verbose output bun run process-docs.ts --path ./my-docs --verbose `)