diff --git a/.github/workflows/link_checker_workflow.yaml b/.github/workflows/link_checker_workflow.yaml new file mode 100644 index 0000000000..4296a122e7 --- /dev/null +++ b/.github/workflows/link_checker_workflow.yaml @@ -0,0 +1,59 @@ +# Copyright 2025 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +name: Link Checker + +on: + pull_request: + + +jobs: + link-check: + runs-on: ubuntu-latest + steps: + - name: Checkout Repository + uses: actions/checkout@v5 + + - name: Restore lychee cache + uses: actions/cache@v4 + with: + path: .lycheecache + key: cache-lychee-${{ github.sha }} + restore-keys: cache-lychee- + + - name: Link Checker + uses: lycheeverse/lychee-action@v2 + with: + args: > + --verbose + --no-progress + --cache + --max-cache-age 1d + README.md + docs/ + output: /tmp/foo.txt + fail: true + jobSummary: true + debug: true + env: + GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} + # This step only runs if the 'lychee_check' step fails, ensuring the + # context note only appears when the developer needs to troubleshoot. + - name: Display Link Context Note on Failure + if: ${{ failure() }} + run: | + echo "## Link Resolution Note" >> $GITHUB_STEP_SUMMARY + echo "Local links and directory changes work differently on GitHub than on the docsite." >> $GITHUB_STEP_SUMMARY + echo "You must ensure fixes pass the **GitHub check** and also work with **\`hugo server\`**." >> $GITHUB_STEP_SUMMARY + echo "---" >> $GITHUB_STEP_SUMMARY + diff --git a/.lycheeignore b/.lycheeignore new file mode 100644 index 0000000000..1146561589 --- /dev/null +++ b/.lycheeignore @@ -0,0 +1,45 @@ +# Ignore documentation placeholders and generic example domains +^https?://([a-zA-Z0-9-]+\.)?example\.com(:\d+)?(/.*)?$ +^http://example\.net + +# Shields.io badges often trigger rate limits or intermittent 503s +^https://img\.shields\.io/.* + +# PDF files are ignored as lychee cannot reliably parse internal PDF links +\.pdf$ + +# Standard mailto: protocol is not a web URL +^mailto: + +# Ignore local development endpoints that won't resolve in CI/CD environments +^https?://(127\.0\.0\.1|localhost)(:\d+)?(/.*)?$ + +# Placeholder for Google Cloud Run service discovery +https://cloud-run-url.app/ + +# DGraph Cloud and private instance endpoints +https://xxx.cloud.dgraph.io/ +https://cloud.dgraph.io/login +https://dgraph.io/docs + +# MySQL Community downloads and main site (often protected by bot mitigation) +https://dev.mysql.com/downloads/installer/ +https://www.mysql.com/ + +# Claude desktop download link +https://claude.ai/download + +# Google Cloud Run product page +https://cloud.google.com/run + +# These specific deep links are known to cause redirect loops or 403s in automated scrapers +https://dev.mysql.com/doc/refman/8.4/en/sql-prepared-statements.html +https://dev.mysql.com/doc/refman/8.4/en/user-names.html + +# npmjs links can occasionally trigger rate limiting during high-frequency CI builds +https://www.npmjs.com/package/@toolbox-sdk/core +https://www.npmjs.com/package/@toolbox-sdk/adk + + +# Ignore social media and blog profiles to reduce external request overhead +https://medium.com/@mcp_toolbox \ No newline at end of file diff --git a/DEVELOPER.md b/DEVELOPER.md index 10ad8d3650..bd8c49913e 100644 --- a/DEVELOPER.md +++ b/DEVELOPER.md @@ -207,6 +207,30 @@ variables for each source. * SQLite - setup in the integration test, where we create a temporary database file +### Link Checking and Fixing with Lychee + +We use **[lychee](https://github.com/lycheeverse/lychee-action)** for repository link checks. + +* To run the checker **locally**, see the [command-line usage guide](https://github.com/lycheeverse/lychee?tab=readme-ov-file#commandline-usage). + +#### Fixing Broken Links + +1. **Update the Link:** Correct the broken URL or update the content where it is used. +2. **Ignore the Link:** If you can't fix the link (e.g., due to **external rate-limits** or if it's a **local-only URL**), tell Lychee to **ignore** it. + + * List **regular expressions** or **direct links** in the **[.lycheeignore](https://github.com/googleapis/genai-toolbox/blob/main/.lycheeignore)** file, one entry per line. + * **Always add a comment** explaining **why** the link is being skipped to prevent link rot. **Example `.lycheeignore`:** + ```text + # These are email addresses, not standard web URLs, and usually cause check failures. + ^mailto:.* + ``` +> [!NOTE] +> To avoid build failures in GitHub Actions, follow the linking pattern demonstrated here:
+> **Avoid:** (Works in Hugo, breaks Link Checker): `[Read more](docs/setup)` or `[Read more](docs/setup/)`
+> **Reason:** The link checker cannot find a file named "setup" or a directory with that name containing an index.
+> **Preferred:** `[Read more](docs/setup.md)`
+> **Reason:** The GitHub Action finds the physical file. Hugo then uses its internal logic (or render hooks) to resolve this to the correct `/docs/setup/` web URL.
+ ### Other GitHub Checks * License header check (`.github/header-checker-lint.yml`) - Ensures files have @@ -280,6 +304,7 @@ There are 3 GHA workflows we use to achieve document versioning: Request a repo owner to run the preview deployment workflow on your PR. A preview link will be automatically added as a comment to your PR. + #### Maintainers 1. **Inspect Changes:** Review the proposed changes in the PR to ensure they are