mirror of
https://github.com/googleapis/genai-toolbox.git
synced 2026-01-21 05:18:14 -05:00
Compare commits
2 Commits
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
c60344f0c1 | ||
|
|
2cd8c4692e |
112
cmd/root_test.go
112
cmd/root_test.go
@@ -2181,3 +2181,115 @@ func TestDefaultToolsFileBehavior(t *testing.T) {
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
func TestParameterReferenceValidation(t *testing.T) {
|
||||
ctx, err := testutils.ContextWithNewLogger()
|
||||
if err != nil {
|
||||
t.Fatalf("unexpected error: %s", err)
|
||||
}
|
||||
|
||||
// Base template
|
||||
baseYaml := `
|
||||
sources:
|
||||
dummy-source:
|
||||
kind: http
|
||||
baseUrl: http://example.com
|
||||
tools:
|
||||
test-tool:
|
||||
kind: postgres-sql
|
||||
source: dummy-source
|
||||
description: test tool
|
||||
statement: SELECT 1;
|
||||
parameters:
|
||||
%s`
|
||||
|
||||
tcs := []struct {
|
||||
desc string
|
||||
params string
|
||||
wantErr bool
|
||||
errSubstr string
|
||||
}{
|
||||
{
|
||||
desc: "valid backward reference",
|
||||
params: `
|
||||
- name: source_param
|
||||
type: string
|
||||
description: source
|
||||
- name: copy_param
|
||||
type: string
|
||||
description: copy
|
||||
valueFromParam: source_param`,
|
||||
wantErr: false,
|
||||
},
|
||||
{
|
||||
desc: "valid forward reference (out of order)",
|
||||
params: `
|
||||
- name: copy_param
|
||||
type: string
|
||||
description: copy
|
||||
valueFromParam: source_param
|
||||
- name: source_param
|
||||
type: string
|
||||
description: source`,
|
||||
wantErr: false,
|
||||
},
|
||||
{
|
||||
desc: "invalid missing reference",
|
||||
params: `
|
||||
- name: copy_param
|
||||
type: string
|
||||
description: copy
|
||||
valueFromParam: non_existent_param`,
|
||||
wantErr: true,
|
||||
errSubstr: "references '\"non_existent_param\"' in the 'valueFromParam' field",
|
||||
},
|
||||
{
|
||||
desc: "invalid self reference",
|
||||
params: `
|
||||
- name: myself
|
||||
type: string
|
||||
description: self
|
||||
valueFromParam: myself`,
|
||||
wantErr: true,
|
||||
errSubstr: "parameter \"myself\" cannot copy value from itself",
|
||||
},
|
||||
{
|
||||
desc: "multiple valid references",
|
||||
params: `
|
||||
- name: a
|
||||
type: string
|
||||
description: a
|
||||
- name: b
|
||||
type: string
|
||||
description: b
|
||||
valueFromParam: a
|
||||
- name: c
|
||||
type: string
|
||||
description: c
|
||||
valueFromParam: a`,
|
||||
wantErr: false,
|
||||
},
|
||||
}
|
||||
|
||||
for _, tc := range tcs {
|
||||
t.Run(tc.desc, func(t *testing.T) {
|
||||
// Indent parameters to match YAML structure
|
||||
yamlContent := fmt.Sprintf(baseYaml, tc.params)
|
||||
|
||||
_, err := parseToolsFile(ctx, []byte(yamlContent))
|
||||
|
||||
if tc.wantErr {
|
||||
if err == nil {
|
||||
t.Fatal("expected error, got nil")
|
||||
}
|
||||
if !strings.Contains(err.Error(), tc.errSubstr) {
|
||||
t.Errorf("error %q does not contain expected substring %q", err.Error(), tc.errSubstr)
|
||||
}
|
||||
} else {
|
||||
if err != nil {
|
||||
t.Fatalf("unexpected error: %v", err)
|
||||
}
|
||||
}
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
@@ -3,13 +3,14 @@ title: "EmbeddingModels"
|
||||
type: docs
|
||||
weight: 2
|
||||
description: >
|
||||
EmbeddingModels represent services that transform text into vector embeddings for semantic search.
|
||||
EmbeddingModels represent services that transform text into vector embeddings
|
||||
for semantic search.
|
||||
---
|
||||
|
||||
EmbeddingModels represent services that generate vector representations of text
|
||||
data. In the MCP Toolbox, these models enable **Semantic Queries**,
|
||||
allowing [Tools](../tools/) to automatically convert human-readable text into
|
||||
numerical vectors before using them in a query.
|
||||
data. In the MCP Toolbox, these models enable **Semantic Queries**, allowing
|
||||
[Tools](../tools/) to automatically convert human-readable text into numerical
|
||||
vectors before using them in a query.
|
||||
|
||||
This is primarily used in two scenarios:
|
||||
|
||||
@@ -19,14 +20,33 @@ This is primarily used in two scenarios:
|
||||
- **Semantic Search**: Converting a natural language query into a vector to
|
||||
perform similarity searches.
|
||||
|
||||
## Hidden Parameter Duplication (valueFromParam)
|
||||
|
||||
When building tools for vector ingestion, you often need the same input string
|
||||
twice:
|
||||
|
||||
1. To store the original text in a TEXT column.
|
||||
1. To generate the vector embedding for a VECTOR column.
|
||||
|
||||
Requesting an Agent (LLM) to output the exact same string twice is inefficient
|
||||
and error-prone. The `valueFromParam` field solves this by allowing a parameter
|
||||
to inherit its value from another parameter in the same tool.
|
||||
|
||||
### Key Behaviors
|
||||
|
||||
1. Hidden from Manifest: Parameters with valueFromParam set are excluded from
|
||||
the tool definition sent to the Agent. The Agent does not know this parameter
|
||||
exists.
|
||||
1. Auto-Filled: When the tool is executed, the Toolbox automatically copies the
|
||||
value from the referenced parameter before processing embeddings.
|
||||
|
||||
## Example
|
||||
|
||||
The following configuration defines an embedding model and applies it to
|
||||
specific tool parameters.
|
||||
|
||||
{{< notice tip >}}
|
||||
Use environment variable replacement with the format ${ENV_NAME}
|
||||
instead of hardcoding your API keys into the configuration file.
|
||||
{{< notice tip >}} Use environment variable replacement with the format
|
||||
${ENV_NAME} instead of hardcoding your API keys into the configuration file.
|
||||
{{< /notice >}}
|
||||
|
||||
### Step 1 - Define an Embedding Model
|
||||
@@ -40,14 +60,12 @@ embeddingModels:
|
||||
model: gemini-embedding-001
|
||||
apiKey: ${GOOGLE_API_KEY}
|
||||
dimension: 768
|
||||
|
||||
```
|
||||
|
||||
### Step 2 - Embed Tool Parameters
|
||||
|
||||
Use the defined embedding model, embed your query parameters using the
|
||||
`embeddedBy` field. Only string-typed
|
||||
parameters can be embedded:
|
||||
`embeddedBy` field. Only string-typed parameters can be embedded:
|
||||
|
||||
```yaml
|
||||
tools:
|
||||
@@ -61,10 +79,13 @@ tools:
|
||||
parameters:
|
||||
- name: content
|
||||
type: string
|
||||
description: The raw text content to be stored in the database.
|
||||
- name: vector_string
|
||||
type: string
|
||||
description: The text to be vectorized and stored.
|
||||
embeddedBy: gemini-model # refers to the name of a defined embedding model
|
||||
# This parameter is hidden from the LLM.
|
||||
# It automatically copies the value from 'content' and embeds it.
|
||||
valueFromParam: content
|
||||
embeddedBy: gemini-model
|
||||
|
||||
# Semantic search tool
|
||||
search_embedding:
|
||||
|
||||
@@ -296,6 +296,43 @@ func (c *ToolConfigs) UnmarshalYAML(ctx context.Context, unmarshal func(interfac
|
||||
return fmt.Errorf("invalid 'kind' field for tool %q (must be a string)", name)
|
||||
}
|
||||
|
||||
// validify parameter references
|
||||
if rawParams, ok := v["parameters"]; ok {
|
||||
if paramsList, ok := rawParams.([]any); ok {
|
||||
// Turn params into a map
|
||||
validParamNames := make(map[string]bool)
|
||||
for _, rawP := range paramsList {
|
||||
if pMap, ok := rawP.(map[string]any); ok {
|
||||
if pName, ok := pMap["name"].(string); ok && pName != "" {
|
||||
validParamNames[pName] = true
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Validate references
|
||||
for i, rawP := range paramsList {
|
||||
pMap, ok := rawP.(map[string]any)
|
||||
if !ok {
|
||||
continue
|
||||
}
|
||||
|
||||
pName, _ := pMap["name"].(string)
|
||||
refName, _ := pMap["valueFromParam"].(string)
|
||||
|
||||
if refName != "" {
|
||||
// Check if the referenced parameter exists
|
||||
if !validParamNames[refName] {
|
||||
return fmt.Errorf("tool %q config error: parameter %q (index %d) references '%q' in the 'valueFromParam' field, which is not a defined parameter.", name, pName, i, refName)
|
||||
}
|
||||
|
||||
// Check for self-reference
|
||||
if refName == pName {
|
||||
return fmt.Errorf("tool %q config error: parameter %q cannot copy value from itself", name, pName)
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
yamlDecoder, err := util.NewStrictDecoder(v)
|
||||
if err != nil {
|
||||
return fmt.Errorf("error creating YAML decoder for tool %q: %w", name, err)
|
||||
|
||||
@@ -158,3 +158,4 @@ func (r *ResourceManager) GetPromptsMap() map[string]prompts.Prompt {
|
||||
}
|
||||
return copiedMap
|
||||
}
|
||||
|
||||
|
||||
@@ -134,7 +134,12 @@ func ParseParams(ps Parameters, data map[string]any, claimsMap map[string]map[st
|
||||
var err error
|
||||
paramAuthServices := p.GetAuthServices()
|
||||
name := p.GetName()
|
||||
if len(paramAuthServices) == 0 {
|
||||
|
||||
sourceParamName := p.GetValueFromParam()
|
||||
if sourceParamName != "" {
|
||||
v, _ = data[sourceParamName]
|
||||
|
||||
} else if len(paramAuthServices) == 0 {
|
||||
// parse non auth-required parameter
|
||||
var ok bool
|
||||
v, ok = data[name]
|
||||
@@ -318,6 +323,7 @@ type Parameter interface {
|
||||
GetRequired() bool
|
||||
GetAuthServices() []ParamAuthService
|
||||
GetEmbeddedBy() string
|
||||
GetValueFromParam() string
|
||||
Parse(any) (any, error)
|
||||
Manifest() ParameterManifest
|
||||
McpManifest() (ParameterMcpManifest, []string)
|
||||
@@ -465,6 +471,9 @@ func ParseParameter(ctx context.Context, p map[string]any, paramType string) (Pa
|
||||
func (ps Parameters) Manifest() []ParameterManifest {
|
||||
rtn := make([]ParameterManifest, 0, len(ps))
|
||||
for _, p := range ps {
|
||||
if p.GetValueFromParam() != "" {
|
||||
continue
|
||||
}
|
||||
rtn = append(rtn, p.Manifest())
|
||||
}
|
||||
return rtn
|
||||
@@ -476,6 +485,11 @@ func (ps Parameters) McpManifest() (McpToolsSchema, map[string][]string) {
|
||||
authParam := make(map[string][]string)
|
||||
|
||||
for _, p := range ps {
|
||||
// If the parameter is sourced from another param, skip it in the MCP manifest
|
||||
if p.GetValueFromParam() != "" {
|
||||
continue
|
||||
}
|
||||
|
||||
name := p.GetName()
|
||||
paramManifest, authParamList := p.McpManifest()
|
||||
defaultV := p.GetDefault()
|
||||
@@ -509,6 +523,7 @@ type ParameterManifest struct {
|
||||
Default any `json:"default,omitempty"`
|
||||
AdditionalProperties any `json:"additionalProperties,omitempty"`
|
||||
EmbeddedBy string `json:"embeddedBy,omitempty"`
|
||||
ValueFromParam string `json:"valueFromParam,omitempty"`
|
||||
}
|
||||
|
||||
// ParameterMcpManifest represents properties when served as part of a ToolMcpManifest.
|
||||
@@ -531,6 +546,7 @@ type CommonParameter struct {
|
||||
AuthServices []ParamAuthService `yaml:"authServices"`
|
||||
AuthSources []ParamAuthService `yaml:"authSources"` // Deprecated: Kept for compatibility.
|
||||
EmbeddedBy string `yaml:"embeddedBy"`
|
||||
ValueFromParam string `yaml:"valueFromParam"`
|
||||
}
|
||||
|
||||
// GetName returns the name specified for the Parameter.
|
||||
@@ -588,10 +604,16 @@ func (p *CommonParameter) IsExcludedValues(v any) bool {
|
||||
return false
|
||||
}
|
||||
|
||||
// GetEmbeddedBy returns the embedding model name for the Parameter.
|
||||
func (p *CommonParameter) GetEmbeddedBy() string {
|
||||
return p.EmbeddedBy
|
||||
}
|
||||
|
||||
// GetValueFromParam returns the param value to copy from.
|
||||
func (p *CommonParameter) GetValueFromParam() string {
|
||||
return p.ValueFromParam
|
||||
}
|
||||
|
||||
// MatchStringOrRegex checks if the input matches the target
|
||||
func MatchStringOrRegex(input, target any) bool {
|
||||
targetS, ok := target.(string)
|
||||
|
||||
@@ -64,10 +64,11 @@ func AddSemanticSearchConfig(t *testing.T, config map[string]any, toolKind, inse
|
||||
"description": "The text content associated with the vector.",
|
||||
},
|
||||
map[string]any{
|
||||
"name": "text_to_embed",
|
||||
"type": "string",
|
||||
"description": "The text content used to generate the vector.",
|
||||
"embeddedBy": "gemini_model",
|
||||
"name": "text_to_embed",
|
||||
"type": "string",
|
||||
"description": "The text content used to generate the vector.",
|
||||
"embeddedBy": "gemini_model",
|
||||
"valueFromParam": "content",
|
||||
},
|
||||
},
|
||||
}
|
||||
@@ -108,7 +109,7 @@ func RunSemanticSearchToolInvokeTest(t *testing.T, insertWant, mcpInsertWant, se
|
||||
name: "HTTP invoke insert_docs",
|
||||
api: "http://127.0.0.1:5000/api/tool/insert_docs/invoke",
|
||||
isMcp: false,
|
||||
requestBody: `{"content": "The quick brown fox jumps over the lazy dog", "text_to_embed": "The quick brown fox jumps over the lazy dog"}`,
|
||||
requestBody: `{"content": "The quick brown fox jumps over the lazy dog"`,
|
||||
want: insertWant,
|
||||
},
|
||||
{
|
||||
@@ -131,8 +132,7 @@ func RunSemanticSearchToolInvokeTest(t *testing.T, insertWant, mcpInsertWant, se
|
||||
Params: map[string]any{
|
||||
"name": "insert_docs",
|
||||
"arguments": map[string]any{
|
||||
"content": "The quick brown fox jumps over the lazy dog",
|
||||
"text_to_embed": "The quick brown fox jumps over the lazy dog",
|
||||
"content": "The quick brown fox jumps over the lazy dog",
|
||||
},
|
||||
},
|
||||
},
|
||||
|
||||
Reference in New Issue
Block a user