Compare commits

..

2 Commits
main ... dual

Author SHA1 Message Date
duwenxin
c60344f0c1 add docs 2026-01-20 19:33:14 -05:00
duwenxin
2cd8c4692e add valueFromParam support 2026-01-20 19:23:29 -05:00
6 changed files with 213 additions and 20 deletions

View File

@@ -2181,3 +2181,115 @@ func TestDefaultToolsFileBehavior(t *testing.T) {
})
}
}
func TestParameterReferenceValidation(t *testing.T) {
ctx, err := testutils.ContextWithNewLogger()
if err != nil {
t.Fatalf("unexpected error: %s", err)
}
// Base template
baseYaml := `
sources:
dummy-source:
kind: http
baseUrl: http://example.com
tools:
test-tool:
kind: postgres-sql
source: dummy-source
description: test tool
statement: SELECT 1;
parameters:
%s`
tcs := []struct {
desc string
params string
wantErr bool
errSubstr string
}{
{
desc: "valid backward reference",
params: `
- name: source_param
type: string
description: source
- name: copy_param
type: string
description: copy
valueFromParam: source_param`,
wantErr: false,
},
{
desc: "valid forward reference (out of order)",
params: `
- name: copy_param
type: string
description: copy
valueFromParam: source_param
- name: source_param
type: string
description: source`,
wantErr: false,
},
{
desc: "invalid missing reference",
params: `
- name: copy_param
type: string
description: copy
valueFromParam: non_existent_param`,
wantErr: true,
errSubstr: "references '\"non_existent_param\"' in the 'valueFromParam' field",
},
{
desc: "invalid self reference",
params: `
- name: myself
type: string
description: self
valueFromParam: myself`,
wantErr: true,
errSubstr: "parameter \"myself\" cannot copy value from itself",
},
{
desc: "multiple valid references",
params: `
- name: a
type: string
description: a
- name: b
type: string
description: b
valueFromParam: a
- name: c
type: string
description: c
valueFromParam: a`,
wantErr: false,
},
}
for _, tc := range tcs {
t.Run(tc.desc, func(t *testing.T) {
// Indent parameters to match YAML structure
yamlContent := fmt.Sprintf(baseYaml, tc.params)
_, err := parseToolsFile(ctx, []byte(yamlContent))
if tc.wantErr {
if err == nil {
t.Fatal("expected error, got nil")
}
if !strings.Contains(err.Error(), tc.errSubstr) {
t.Errorf("error %q does not contain expected substring %q", err.Error(), tc.errSubstr)
}
} else {
if err != nil {
t.Fatalf("unexpected error: %v", err)
}
}
})
}
}

View File

@@ -3,13 +3,14 @@ title: "EmbeddingModels"
type: docs
weight: 2
description: >
EmbeddingModels represent services that transform text into vector embeddings for semantic search.
EmbeddingModels represent services that transform text into vector embeddings
for semantic search.
---
EmbeddingModels represent services that generate vector representations of text
data. In the MCP Toolbox, these models enable **Semantic Queries**,
allowing [Tools](../tools/) to automatically convert human-readable text into
numerical vectors before using them in a query.
data. In the MCP Toolbox, these models enable **Semantic Queries**, allowing
[Tools](../tools/) to automatically convert human-readable text into numerical
vectors before using them in a query.
This is primarily used in two scenarios:
@@ -19,14 +20,33 @@ This is primarily used in two scenarios:
- **Semantic Search**: Converting a natural language query into a vector to
perform similarity searches.
## Hidden Parameter Duplication (valueFromParam)
When building tools for vector ingestion, you often need the same input string
twice:
1. To store the original text in a TEXT column.
1. To generate the vector embedding for a VECTOR column.
Requesting an Agent (LLM) to output the exact same string twice is inefficient
and error-prone. The `valueFromParam` field solves this by allowing a parameter
to inherit its value from another parameter in the same tool.
### Key Behaviors
1. Hidden from Manifest: Parameters with valueFromParam set are excluded from
the tool definition sent to the Agent. The Agent does not know this parameter
exists.
1. Auto-Filled: When the tool is executed, the Toolbox automatically copies the
value from the referenced parameter before processing embeddings.
## Example
The following configuration defines an embedding model and applies it to
specific tool parameters.
{{< notice tip >}}
Use environment variable replacement with the format ${ENV_NAME}
instead of hardcoding your API keys into the configuration file.
{{< notice tip >}} Use environment variable replacement with the format
${ENV_NAME} instead of hardcoding your API keys into the configuration file.
{{< /notice >}}
### Step 1 - Define an Embedding Model
@@ -40,14 +60,12 @@ embeddingModels:
model: gemini-embedding-001
apiKey: ${GOOGLE_API_KEY}
dimension: 768
```
### Step 2 - Embed Tool Parameters
Use the defined embedding model, embed your query parameters using the
`embeddedBy` field. Only string-typed
parameters can be embedded:
`embeddedBy` field. Only string-typed parameters can be embedded:
```yaml
tools:
@@ -61,10 +79,13 @@ tools:
parameters:
- name: content
type: string
description: The raw text content to be stored in the database.
- name: vector_string
type: string
description: The text to be vectorized and stored.
embeddedBy: gemini-model # refers to the name of a defined embedding model
# This parameter is hidden from the LLM.
# It automatically copies the value from 'content' and embeds it.
valueFromParam: content
embeddedBy: gemini-model
# Semantic search tool
search_embedding:

View File

@@ -296,6 +296,43 @@ func (c *ToolConfigs) UnmarshalYAML(ctx context.Context, unmarshal func(interfac
return fmt.Errorf("invalid 'kind' field for tool %q (must be a string)", name)
}
// validify parameter references
if rawParams, ok := v["parameters"]; ok {
if paramsList, ok := rawParams.([]any); ok {
// Turn params into a map
validParamNames := make(map[string]bool)
for _, rawP := range paramsList {
if pMap, ok := rawP.(map[string]any); ok {
if pName, ok := pMap["name"].(string); ok && pName != "" {
validParamNames[pName] = true
}
}
}
// Validate references
for i, rawP := range paramsList {
pMap, ok := rawP.(map[string]any)
if !ok {
continue
}
pName, _ := pMap["name"].(string)
refName, _ := pMap["valueFromParam"].(string)
if refName != "" {
// Check if the referenced parameter exists
if !validParamNames[refName] {
return fmt.Errorf("tool %q config error: parameter %q (index %d) references '%q' in the 'valueFromParam' field, which is not a defined parameter.", name, pName, i, refName)
}
// Check for self-reference
if refName == pName {
return fmt.Errorf("tool %q config error: parameter %q cannot copy value from itself", name, pName)
}
}
}
}
}
yamlDecoder, err := util.NewStrictDecoder(v)
if err != nil {
return fmt.Errorf("error creating YAML decoder for tool %q: %w", name, err)

View File

@@ -158,3 +158,4 @@ func (r *ResourceManager) GetPromptsMap() map[string]prompts.Prompt {
}
return copiedMap
}

View File

@@ -134,7 +134,12 @@ func ParseParams(ps Parameters, data map[string]any, claimsMap map[string]map[st
var err error
paramAuthServices := p.GetAuthServices()
name := p.GetName()
if len(paramAuthServices) == 0 {
sourceParamName := p.GetValueFromParam()
if sourceParamName != "" {
v, _ = data[sourceParamName]
} else if len(paramAuthServices) == 0 {
// parse non auth-required parameter
var ok bool
v, ok = data[name]
@@ -318,6 +323,7 @@ type Parameter interface {
GetRequired() bool
GetAuthServices() []ParamAuthService
GetEmbeddedBy() string
GetValueFromParam() string
Parse(any) (any, error)
Manifest() ParameterManifest
McpManifest() (ParameterMcpManifest, []string)
@@ -465,6 +471,9 @@ func ParseParameter(ctx context.Context, p map[string]any, paramType string) (Pa
func (ps Parameters) Manifest() []ParameterManifest {
rtn := make([]ParameterManifest, 0, len(ps))
for _, p := range ps {
if p.GetValueFromParam() != "" {
continue
}
rtn = append(rtn, p.Manifest())
}
return rtn
@@ -476,6 +485,11 @@ func (ps Parameters) McpManifest() (McpToolsSchema, map[string][]string) {
authParam := make(map[string][]string)
for _, p := range ps {
// If the parameter is sourced from another param, skip it in the MCP manifest
if p.GetValueFromParam() != "" {
continue
}
name := p.GetName()
paramManifest, authParamList := p.McpManifest()
defaultV := p.GetDefault()
@@ -509,6 +523,7 @@ type ParameterManifest struct {
Default any `json:"default,omitempty"`
AdditionalProperties any `json:"additionalProperties,omitempty"`
EmbeddedBy string `json:"embeddedBy,omitempty"`
ValueFromParam string `json:"valueFromParam,omitempty"`
}
// ParameterMcpManifest represents properties when served as part of a ToolMcpManifest.
@@ -531,6 +546,7 @@ type CommonParameter struct {
AuthServices []ParamAuthService `yaml:"authServices"`
AuthSources []ParamAuthService `yaml:"authSources"` // Deprecated: Kept for compatibility.
EmbeddedBy string `yaml:"embeddedBy"`
ValueFromParam string `yaml:"valueFromParam"`
}
// GetName returns the name specified for the Parameter.
@@ -588,10 +604,16 @@ func (p *CommonParameter) IsExcludedValues(v any) bool {
return false
}
// GetEmbeddedBy returns the embedding model name for the Parameter.
func (p *CommonParameter) GetEmbeddedBy() string {
return p.EmbeddedBy
}
// GetValueFromParam returns the param value to copy from.
func (p *CommonParameter) GetValueFromParam() string {
return p.ValueFromParam
}
// MatchStringOrRegex checks if the input matches the target
func MatchStringOrRegex(input, target any) bool {
targetS, ok := target.(string)

View File

@@ -64,10 +64,11 @@ func AddSemanticSearchConfig(t *testing.T, config map[string]any, toolKind, inse
"description": "The text content associated with the vector.",
},
map[string]any{
"name": "text_to_embed",
"type": "string",
"description": "The text content used to generate the vector.",
"embeddedBy": "gemini_model",
"name": "text_to_embed",
"type": "string",
"description": "The text content used to generate the vector.",
"embeddedBy": "gemini_model",
"valueFromParam": "content",
},
},
}
@@ -108,7 +109,7 @@ func RunSemanticSearchToolInvokeTest(t *testing.T, insertWant, mcpInsertWant, se
name: "HTTP invoke insert_docs",
api: "http://127.0.0.1:5000/api/tool/insert_docs/invoke",
isMcp: false,
requestBody: `{"content": "The quick brown fox jumps over the lazy dog", "text_to_embed": "The quick brown fox jumps over the lazy dog"}`,
requestBody: `{"content": "The quick brown fox jumps over the lazy dog"`,
want: insertWant,
},
{
@@ -131,8 +132,7 @@ func RunSemanticSearchToolInvokeTest(t *testing.T, insertWant, mcpInsertWant, se
Params: map[string]any{
"name": "insert_docs",
"arguments": map[string]any{
"content": "The quick brown fox jumps over the lazy dog",
"text_to_embed": "The quick brown fox jumps over the lazy dog",
"content": "The quick brown fox jumps over the lazy dog",
},
},
},