From 1664a69dfd2789bc7fc63cd0aebc9ca5dbc81672 Mon Sep 17 00:00:00 2001 From: Twisha Bansal <58483338+twishabansal@users.noreply.github.com> Date: Tue, 10 Feb 2026 22:11:02 +0530 Subject: [PATCH] docs: add pre/post processing docs for langchain python (#2378) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit ## Description Trigger has been tested corresponding to local changes. Latest successful run: https://pantheon.corp.google.com/cloud-build/builds;region=global/1c37031f-95f1-4c6c-9ef8-0452277599d5?e=13802955&mods=-autopush_coliseum&project=toolbox-testing-438616 Note: After merging, update python pre and post processing sample testing trigger. ## PR Checklist > Thank you for opening a Pull Request! Before submitting your PR, there are a > few things you can do to make sure it goes smoothly: - [x] Make sure you reviewed [CONTRIBUTING.md](https://github.com/googleapis/genai-toolbox/blob/main/CONTRIBUTING.md) - [ ] Make sure to open an issue as a [bug/issue](https://github.com/googleapis/genai-toolbox/issues/new/choose) before writing your code! That way we can discuss the change, evaluate designs, and agree on the general idea - [ ] Ensure the tests and linter pass - [ ] Code coverage does not decrease (if any source code was changed) - [ ] Appropriate docs were updated (if necessary) - [ ] Make sure to add `!` if this involve a breaking change 🛠️ Fixes # --------- Co-authored-by: gemini-code-assist[bot] <176961590+gemini-code-assist[bot]@users.noreply.github.com> Co-authored-by: Yuan Teoh <45984206+Yuan325@users.noreply.github.com> Co-authored-by: Averi Kitsch --- .../py.integration.cloudbuild.yaml | 57 +++++++++ docs/en/samples/pre_post_processing/_index.md | 54 ++++++++ docs/en/samples/pre_post_processing/python.md | 40 ++++++ .../pre_post_processing/python/__init__.py | 19 +++ .../pre_post_processing/python/agent_test.py | 51 ++++++++ .../python/langchain/agent.py | 116 ++++++++++++++++++ .../python/langchain/requirements.txt | 3 + 7 files changed, 340 insertions(+) create mode 100644 .ci/sample_tests/pre_post_processing/py.integration.cloudbuild.yaml create mode 100644 docs/en/samples/pre_post_processing/_index.md create mode 100644 docs/en/samples/pre_post_processing/python.md create mode 100644 docs/en/samples/pre_post_processing/python/__init__.py create mode 100644 docs/en/samples/pre_post_processing/python/agent_test.py create mode 100644 docs/en/samples/pre_post_processing/python/langchain/agent.py create mode 100644 docs/en/samples/pre_post_processing/python/langchain/requirements.txt diff --git a/.ci/sample_tests/pre_post_processing/py.integration.cloudbuild.yaml b/.ci/sample_tests/pre_post_processing/py.integration.cloudbuild.yaml new file mode 100644 index 0000000000..5844226428 --- /dev/null +++ b/.ci/sample_tests/pre_post_processing/py.integration.cloudbuild.yaml @@ -0,0 +1,57 @@ +# Copyright 2026 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +steps: + - name: "${_IMAGE}" + id: "py-pre-post-processing-test" + entrypoint: "bash" + args: + - -c + - | + set -ex + chmod +x .ci/sample_tests/run_tests.sh + .ci/sample_tests/run_tests.sh + env: + - "CLOUD_SQL_INSTANCE=${_CLOUD_SQL_INSTANCE}" + - "GCP_PROJECT=${_GCP_PROJECT}" + - "DATABASE_NAME=${_DATABASE_NAME}" + - "DB_USER=${_DB_USER}" + - "TARGET_ROOT=${_TARGET_ROOT}" + - "TARGET_LANG=${_TARGET_LANG}" + - "TABLE_NAME=${_TABLE_NAME}" + - "SQL_FILE=${_SQL_FILE}" + - "AGENT_FILE_PATTERN=${_AGENT_FILE_PATTERN}" + secretEnv: ["TOOLS_YAML_CONTENT", "GOOGLE_API_KEY", "DB_PASSWORD"] + +availableSecrets: + secretManager: + - versionName: projects/${_GCP_PROJECT}/secrets/${_TOOLS_YAML_SECRET}/versions/5 + env: "TOOLS_YAML_CONTENT" + - versionName: projects/${_GCP_PROJECT_NUMBER}/secrets/${_API_KEY_SECRET}/versions/latest + env: "GOOGLE_API_KEY" + - versionName: projects/${_GCP_PROJECT}/secrets/${_DB_PASS_SECRET}/versions/latest + env: "DB_PASSWORD" + +timeout: 1200s + +substitutions: + _TARGET_LANG: "python" + _IMAGE: "gcr.io/google.com/cloudsdktool/cloud-sdk:537.0.0" + _TARGET_ROOT: "docs/en/samples/pre_post_processing/python" + _TABLE_NAME: "hotels_py_pre_post_processing" + _SQL_FILE: ".ci/sample_tests/setup_hotels.sql" + _AGENT_FILE_PATTERN: "agent.py" + +options: + logging: CLOUD_LOGGING_ONLY \ No newline at end of file diff --git a/docs/en/samples/pre_post_processing/_index.md b/docs/en/samples/pre_post_processing/_index.md new file mode 100644 index 0000000000..6fcf570027 --- /dev/null +++ b/docs/en/samples/pre_post_processing/_index.md @@ -0,0 +1,54 @@ +--- +title: "Pre- and Post- Processing" +type: docs +weight: 1 +description: > + Intercept and modify interactions between the agent and its tools either before or after a tool is executed. +--- + +Pre- and post- processing allow developers to intercept and modify interactions between the agent and its tools or the user. + +{{< notice note >}} + +These capabilities are typically features of **orchestration frameworks** (like LangChain, LangGraph, or Agent Builder) rather than the Toolbox SDK itself. However, Toolbox tools are designed to fully leverage these framework capabilities to support robust, secure, and compliant agent architectures. + +{{< /notice >}} + +## Types of Processing + +### Pre-processing + +Pre-processing occurs before a tool is executed or an agent processes a message. Key types include: + +- **Input Sanitization & Redaction**: Detecting and masking sensitive information (like PII) in user queries or tool arguments to prevent it from being logged or sent to unauthorized systems. +- **Business Logic Validation**: Verifying that the proposed action complies with business rules (e.g., ensuring a requested hotel stay does not exceed 14 days, or checking if a user has sufficient permission). +- **Security Guardrails**: Analyzing inputs for potential prompt injection attacks or malicious payloads. + +### Post-processing + +Post-processing occurs after a tool has executed or the model has generated a response. Key types include: + +- **Response Enrichment**: Injecting additional data into the tool output that wasn't part of the raw API response (e.g., calculating loyalty points earned based on the booking value). +- **Output Formatting**: Transforming raw data (like JSON or XML) into a more human-readable or model-friendly format to improve the agent's understanding. +- **Compliance Auditing**: Logging the final outcome of transactions, including the original request and the result, to a secure audit trail. + +## Processing Scopes + +While processing logic can be applied at various levels (Agent, Model, Tool), this guide primarily focuses on **Tool Level** processing, which is most relevant for granular control over tool execution. + +### Tool Level (Primary Focus) + +Wraps individual tool executions. This is best for logic specific to a single tool or a set of tools. + +- **Scope**: Intercepts the raw inputs (arguments) to a tool and its outputs. +- **Use Cases**: Argument validation, output formatting, specific privacy rules for sensitive tools. + +### Other Levels + +It is helpful to understand how tool-level processing differs from other scopes: + +- **Model Level**: Intercepts individual calls to the LLM (prompts and responses). Unlike tool-level, this applies globally to all text sent/received, making it better for global PII redaction or token tracking. +- **Agent Level**: Wraps the high-level execution loop (e.g., a "turn" in the conversation). Unlike tool-level, this envelopes the entire turn (user input to final response), making it suitable for session management or end-to-end auditing. + + +## Samples diff --git a/docs/en/samples/pre_post_processing/python.md b/docs/en/samples/pre_post_processing/python.md new file mode 100644 index 0000000000..1c4311f487 --- /dev/null +++ b/docs/en/samples/pre_post_processing/python.md @@ -0,0 +1,40 @@ +--- +title: "Python" +type: docs +weight: 1 +description: > + How to add pre- and post- processing to your Agents using Python. +--- + +## Prerequisites + +This tutorial assumes that you have set up Toolbox with a basic agent as described in the [local quickstart](../../getting-started/local_quickstart.md). + +This guide demonstrates how to implement these patterns in your Toolbox applications. + +## Implementation + +{{< tabpane persist=header >}} +{{% tab header="ADK" text=true %}} +Coming soon. +{{% /tab %}} +{{% tab header="Langchain" text=true %}} +The following example demonstrates how to use `ToolboxClient` with LangChain's middleware to implement pre- and post- processing for tool calls. + +```py +{{< include "python/langchain/agent.py" >}} +``` + +You can also add model-level (`wrap_model`) and agent-level (`before_agent`, `after_agent`) hooks to intercept messages at different stages of the execution loop. See the [LangChain Middleware documentation](https://docs.langchain.com/oss/python/langchain/middleware/custom#wrap-style-hooks) for details on these additional hook types. +{{% /tab %}} +{{< /tabpane >}} + +## Results + +The output should look similar to the following. Note that exact responses may vary due to the non-deterministic nature of LLMs and differences between orchestration frameworks. + +``` +AI: Booking Confirmed! You earned 500 Loyalty Points with this stay. + +AI: Error: Maximum stay duration is 14 days. +``` diff --git a/docs/en/samples/pre_post_processing/python/__init__.py b/docs/en/samples/pre_post_processing/python/__init__.py new file mode 100644 index 0000000000..f5b7c1bfd2 --- /dev/null +++ b/docs/en/samples/pre_post_processing/python/__init__.py @@ -0,0 +1,19 @@ +# Copyright 2026 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + + +# This file makes the 'pre_post_processing/python' directory a Python package. + +# You can include any package-level initialization logic here if needed. +# For now, this file is empty. diff --git a/docs/en/samples/pre_post_processing/python/agent_test.py b/docs/en/samples/pre_post_processing/python/agent_test.py new file mode 100644 index 0000000000..36c5b8e27d --- /dev/null +++ b/docs/en/samples/pre_post_processing/python/agent_test.py @@ -0,0 +1,51 @@ +# Copyright 2026 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import asyncio +import importlib +import os +from pathlib import Path + +import pytest + +ORCH_NAME = os.environ.get("ORCH_NAME") +module_path = f"python.{ORCH_NAME}.agent" +agent = importlib.import_module(module_path) + +GOLDEN_KEYWORDS = [ + "AI:", + "Loyalty Points", + "POLICY CHECK: Intercepting 'update-hotel'", +] + +# --- Execution Tests --- +class TestExecution: + """Test framework execution and output validation.""" + + @pytest.fixture(scope="function") + def script_output(self, capsys): + """Run the agent function and return its output.""" + asyncio.run(agent.main()) + return capsys.readouterr() + + def test_script_runs_without_errors(self, script_output): + """Test that the script runs and produces no stderr.""" + assert script_output.err == "", f"Script produced stderr: {script_output.err}" + + def test_keywords_in_output(self, script_output): + """Test that expected keywords are present in the script's output.""" + output = script_output.out + print(f"\nAgent Output:\n{output}\n") + missing_keywords = [kw for kw in GOLDEN_KEYWORDS if kw not in output] + assert not missing_keywords, f"Missing keywords in output: {missing_keywords}" diff --git a/docs/en/samples/pre_post_processing/python/langchain/agent.py b/docs/en/samples/pre_post_processing/python/langchain/agent.py new file mode 100644 index 0000000000..5e174943a7 --- /dev/null +++ b/docs/en/samples/pre_post_processing/python/langchain/agent.py @@ -0,0 +1,116 @@ +import asyncio +from datetime import datetime + +from langchain.agents import create_agent +from langchain.agents.middleware import wrap_tool_call +from langchain_core.messages import ToolMessage +from langchain_google_vertexai import ChatVertexAI +from toolbox_langchain import ToolboxClient + +system_prompt = """ + You're a helpful hotel assistant. You handle hotel searching, booking and + cancellations. When the user searches for a hotel, mention it's name, id, + location and price tier. Always mention hotel ids while performing any + searches. This is very important for any operations. For any bookings or + cancellations, please provide the appropriate confirmation. Be sure to + update checkin or checkout dates if mentioned by the user. + Don't ask for confirmations from the user. +""" + + +# Pre processing +@wrap_tool_call +async def enforce_business_rules(request, handler): + """ + Business Logic Validation: + Enforces max stay duration (e.g., max 14 days). + """ + tool_call = request.tool_call + name = tool_call["name"] + args = tool_call["args"] + + print(f"POLICY CHECK: Intercepting '{name}'") + + if name == "update-hotel": + if "checkin_date" in args and "checkout_date" in args: + try: + start = datetime.fromisoformat(args["checkin_date"]) + end = datetime.fromisoformat(args["checkout_date"]) + duration = (end - start).days + + if duration > 14: + print("BLOCKED: Stay too long") + return ToolMessage( + content="Error: Maximum stay duration is 14 days.", + tool_call_id=tool_call["id"], + ) + except ValueError: + pass # Ignore invalid date formats + + # PRE: Code here runs BEFORE the tool execution + + # EXEC: Execute the tool (or next middleware) + result = await handler(request) + + # POST: Code here runs AFTER the tool execution + return result + + +# Post processing +@wrap_tool_call +async def enrich_response(request, handler): + """ + Post-Processing & Enrichment: + Adds loyalty points information to successful bookings. + Standardizes output format. + """ + # PRE: Code here runs BEFORE the tool execution + + # EXEC: Execute the tool (or next middleware) + result = await handler(request) + + # POST: Code here runs AFTER the tool execution + if isinstance(result, ToolMessage): + content = str(result.content) + tool_name = request.tool_call["name"] + + if tool_name == "book-hotel" and "Error" not in content: + loyalty_bonus = 500 + result.content = f"Booking Confirmed!\n You earned {loyalty_bonus} Loyalty Points with this stay.\n\nSystem Details: {content}" + + return result + + +async def main(): + async with ToolboxClient("http://127.0.0.1:5000") as client: + tools = await client.aload_toolset("my-toolset") + model = ChatVertexAI(model="gemini-2.5-flash") + agent = create_agent( + system_prompt=system_prompt, + model=model, + tools=tools, + # add any pre and post processing methods + middleware=[enforce_business_rules, enrich_response], + ) + + user_input = "Book hotel with id 3." + response = await agent.ainvoke( + {"messages": [{"role": "user", "content": user_input}]} + ) + + print("-" * 50) + last_ai_msg = response["messages"][-1].content + print(f"AI: {last_ai_msg}") + + # Test Pre-processing + print("-" * 50) + user_input = "Update my hotel with id 3 with checkin date 2025-01-18 and checkout date 2025-01-20" + response = await agent.ainvoke( + {"messages": [{"role": "user", "content": user_input}]} + ) + last_ai_msg = response["messages"][-1].content + print(f"AI: {last_ai_msg}") + + +if __name__ == "__main__": + asyncio.run(main()) diff --git a/docs/en/samples/pre_post_processing/python/langchain/requirements.txt b/docs/en/samples/pre_post_processing/python/langchain/requirements.txt new file mode 100644 index 0000000000..5638e0c108 --- /dev/null +++ b/docs/en/samples/pre_post_processing/python/langchain/requirements.txt @@ -0,0 +1,3 @@ +langchain==1.2.6 +langchain-google-vertexai==3.2.2 +toolbox-langchain==0.5.8 \ No newline at end of file