mirror of
https://github.com/googleapis/genai-toolbox.git
synced 2026-01-08 15:14:00 -05:00
Add parameter `embeddedBy` field to support vector embedding & semantic search. Major change in `internal/util/parameters/parameters.go` This PR only adds vector formatter for the postgressql tool. Other tools requiring vector formatting may not work with embeddedBy. Second part of the Semantic Search support. First part: https://github.com/googleapis/genai-toolbox/pull/2121
305 lines
11 KiB
Go
305 lines
11 KiB
Go
// Copyright 2025 Google LLC
|
|
//
|
|
// Licensed under the Apache License, Version 2.0 (the "License");
|
|
// you may not use this file except in compliance with the License.
|
|
// You may obtain a copy of the License at
|
|
//
|
|
// http://www.apache.org/licenses/LICENSE-2.0
|
|
//
|
|
// Unless required by applicable law or agreed to in writing, software
|
|
// distributed under the License is distributed on an "AS IS" BASIS,
|
|
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
// See the License for the specific language governing permissions and
|
|
// limitations under the License.
|
|
|
|
package bigqueryforecast
|
|
|
|
import (
|
|
"context"
|
|
"fmt"
|
|
"strings"
|
|
|
|
bigqueryapi "cloud.google.com/go/bigquery"
|
|
yaml "github.com/goccy/go-yaml"
|
|
"github.com/googleapis/genai-toolbox/internal/embeddingmodels"
|
|
"github.com/googleapis/genai-toolbox/internal/sources"
|
|
bigqueryds "github.com/googleapis/genai-toolbox/internal/sources/bigquery"
|
|
"github.com/googleapis/genai-toolbox/internal/tools"
|
|
bqutil "github.com/googleapis/genai-toolbox/internal/tools/bigquery/bigquerycommon"
|
|
"github.com/googleapis/genai-toolbox/internal/util"
|
|
"github.com/googleapis/genai-toolbox/internal/util/parameters"
|
|
bigqueryrestapi "google.golang.org/api/bigquery/v2"
|
|
)
|
|
|
|
const kind string = "bigquery-forecast"
|
|
|
|
func init() {
|
|
if !tools.Register(kind, newConfig) {
|
|
panic(fmt.Sprintf("tool kind %q already registered", kind))
|
|
}
|
|
}
|
|
|
|
func newConfig(ctx context.Context, name string, decoder *yaml.Decoder) (tools.ToolConfig, error) {
|
|
actual := Config{Name: name}
|
|
if err := decoder.DecodeContext(ctx, &actual); err != nil {
|
|
return nil, err
|
|
}
|
|
return actual, nil
|
|
}
|
|
|
|
type compatibleSource interface {
|
|
BigQueryClient() *bigqueryapi.Client
|
|
UseClientAuthorization() bool
|
|
IsDatasetAllowed(projectID, datasetID string) bool
|
|
BigQueryAllowedDatasets() []string
|
|
BigQuerySession() bigqueryds.BigQuerySessionProvider
|
|
RetrieveClientAndService(tools.AccessToken) (*bigqueryapi.Client, *bigqueryrestapi.Service, error)
|
|
RunSQL(context.Context, *bigqueryapi.Client, string, string, []bigqueryapi.QueryParameter, []*bigqueryapi.ConnectionProperty) (any, error)
|
|
}
|
|
|
|
type Config struct {
|
|
Name string `yaml:"name" validate:"required"`
|
|
Kind string `yaml:"kind" validate:"required"`
|
|
Source string `yaml:"source" validate:"required"`
|
|
Description string `yaml:"description" validate:"required"`
|
|
AuthRequired []string `yaml:"authRequired"`
|
|
}
|
|
|
|
// validate interface
|
|
var _ tools.ToolConfig = Config{}
|
|
|
|
func (cfg Config) ToolConfigKind() string {
|
|
return kind
|
|
}
|
|
|
|
func (cfg Config) Initialize(srcs map[string]sources.Source) (tools.Tool, error) {
|
|
// verify source exists
|
|
rawS, ok := srcs[cfg.Source]
|
|
if !ok {
|
|
return nil, fmt.Errorf("no source named %q configured", cfg.Source)
|
|
}
|
|
|
|
// verify the source is compatible
|
|
s, ok := rawS.(compatibleSource)
|
|
if !ok {
|
|
return nil, fmt.Errorf("invalid source for %q tool: source %q not compatible", kind, cfg.Source)
|
|
}
|
|
|
|
allowedDatasets := s.BigQueryAllowedDatasets()
|
|
historyDataDescription := "The table id or the query of the history time series data."
|
|
if len(allowedDatasets) > 0 {
|
|
datasetIDs := []string{}
|
|
for _, ds := range allowedDatasets {
|
|
datasetIDs = append(datasetIDs, fmt.Sprintf("`%s`", ds))
|
|
}
|
|
historyDataDescription += fmt.Sprintf(" The query or table must only access datasets from the following list: %s.", strings.Join(datasetIDs, ", "))
|
|
}
|
|
|
|
historyDataParameter := parameters.NewStringParameter("history_data", historyDataDescription)
|
|
timestampColumnNameParameter := parameters.NewStringParameter("timestamp_col",
|
|
"The name of the time series timestamp column.")
|
|
dataColumnNameParameter := parameters.NewStringParameter("data_col",
|
|
"The name of the time series data column.")
|
|
idColumnNameParameter := parameters.NewArrayParameterWithDefault("id_cols", []any{},
|
|
"An array of the time series id column names.",
|
|
parameters.NewStringParameter("id_col", "The name of time series id column."))
|
|
horizonParameter := parameters.NewIntParameterWithDefault("horizon", 10, "The number of forecasting steps.")
|
|
params := parameters.Parameters{historyDataParameter,
|
|
timestampColumnNameParameter, dataColumnNameParameter, idColumnNameParameter, horizonParameter}
|
|
|
|
mcpManifest := tools.GetMcpManifest(cfg.Name, cfg.Description, cfg.AuthRequired, params, nil)
|
|
|
|
// finish tool setup
|
|
t := Tool{
|
|
Config: cfg,
|
|
Parameters: params,
|
|
manifest: tools.Manifest{Description: cfg.Description, Parameters: params.Manifest(), AuthRequired: cfg.AuthRequired},
|
|
mcpManifest: mcpManifest,
|
|
}
|
|
return t, nil
|
|
}
|
|
|
|
// validate interface
|
|
var _ tools.Tool = Tool{}
|
|
|
|
type Tool struct {
|
|
Config
|
|
Parameters parameters.Parameters `yaml:"parameters"`
|
|
manifest tools.Manifest
|
|
mcpManifest tools.McpManifest
|
|
}
|
|
|
|
func (t Tool) ToConfig() tools.ToolConfig {
|
|
return t.Config
|
|
}
|
|
|
|
func (t Tool) Invoke(ctx context.Context, resourceMgr tools.SourceProvider, params parameters.ParamValues, accessToken tools.AccessToken) (any, error) {
|
|
source, err := tools.GetCompatibleSource[compatibleSource](resourceMgr, t.Source, t.Name, t.Kind)
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
|
|
paramsMap := params.AsMap()
|
|
historyData, ok := paramsMap["history_data"].(string)
|
|
if !ok {
|
|
return nil, fmt.Errorf("unable to cast history_data parameter %v", paramsMap["history_data"])
|
|
}
|
|
timestampCol, ok := paramsMap["timestamp_col"].(string)
|
|
if !ok {
|
|
return nil, fmt.Errorf("unable to cast timestamp_col parameter %v", paramsMap["timestamp_col"])
|
|
}
|
|
dataCol, ok := paramsMap["data_col"].(string)
|
|
if !ok {
|
|
return nil, fmt.Errorf("unable to cast data_col parameter %v", paramsMap["data_col"])
|
|
}
|
|
idColsRaw, ok := paramsMap["id_cols"].([]any)
|
|
if !ok {
|
|
return nil, fmt.Errorf("unable to cast id_cols parameter %v", paramsMap["id_cols"])
|
|
}
|
|
var idCols []string
|
|
for _, v := range idColsRaw {
|
|
s, ok := v.(string)
|
|
if !ok {
|
|
return nil, fmt.Errorf("id_cols contains non-string value: %v", v)
|
|
}
|
|
idCols = append(idCols, s)
|
|
}
|
|
horizon, ok := paramsMap["horizon"].(int)
|
|
if !ok {
|
|
if h, ok := paramsMap["horizon"].(float64); ok {
|
|
horizon = int(h)
|
|
} else {
|
|
return nil, fmt.Errorf("unable to cast horizon parameter %v", paramsMap["horizon"])
|
|
}
|
|
}
|
|
|
|
bqClient, restService, err := source.RetrieveClientAndService(accessToken)
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
|
|
var historyDataSource string
|
|
trimmedUpperHistoryData := strings.TrimSpace(strings.ToUpper(historyData))
|
|
if strings.HasPrefix(trimmedUpperHistoryData, "SELECT") || strings.HasPrefix(trimmedUpperHistoryData, "WITH") {
|
|
if len(source.BigQueryAllowedDatasets()) > 0 {
|
|
var connProps []*bigqueryapi.ConnectionProperty
|
|
session, err := source.BigQuerySession()(ctx)
|
|
if err != nil {
|
|
return nil, fmt.Errorf("failed to get BigQuery session: %w", err)
|
|
}
|
|
if session != nil {
|
|
connProps = []*bigqueryapi.ConnectionProperty{
|
|
{Key: "session_id", Value: session.ID},
|
|
}
|
|
}
|
|
dryRunJob, err := bqutil.DryRunQuery(ctx, restService, source.BigQueryClient().Project(), source.BigQueryClient().Location, historyData, nil, connProps)
|
|
if err != nil {
|
|
return nil, fmt.Errorf("query validation failed: %w", err)
|
|
}
|
|
statementType := dryRunJob.Statistics.Query.StatementType
|
|
if statementType != "SELECT" {
|
|
return nil, fmt.Errorf("the 'history_data' parameter only supports a table ID or a SELECT query. The provided query has statement type '%s'", statementType)
|
|
}
|
|
|
|
queryStats := dryRunJob.Statistics.Query
|
|
if queryStats != nil {
|
|
for _, tableRef := range queryStats.ReferencedTables {
|
|
if !source.IsDatasetAllowed(tableRef.ProjectId, tableRef.DatasetId) {
|
|
return nil, fmt.Errorf("query in history_data accesses dataset '%s.%s', which is not in the allowed list", tableRef.ProjectId, tableRef.DatasetId)
|
|
}
|
|
}
|
|
} else {
|
|
return nil, fmt.Errorf("could not analyze query in history_data to validate against allowed datasets")
|
|
}
|
|
}
|
|
historyDataSource = fmt.Sprintf("(%s)", historyData)
|
|
} else {
|
|
if len(source.BigQueryAllowedDatasets()) > 0 {
|
|
parts := strings.Split(historyData, ".")
|
|
var projectID, datasetID string
|
|
|
|
switch len(parts) {
|
|
case 3: // project.dataset.table
|
|
projectID = parts[0]
|
|
datasetID = parts[1]
|
|
case 2: // dataset.table
|
|
projectID = source.BigQueryClient().Project()
|
|
datasetID = parts[0]
|
|
default:
|
|
return nil, fmt.Errorf("invalid table ID format for 'history_data': %q. Expected 'dataset.table' or 'project.dataset.table'", historyData)
|
|
}
|
|
|
|
if !source.IsDatasetAllowed(projectID, datasetID) {
|
|
return nil, fmt.Errorf("access to dataset '%s.%s' (from table '%s') is not allowed", projectID, datasetID, historyData)
|
|
}
|
|
}
|
|
historyDataSource = fmt.Sprintf("TABLE `%s`", historyData)
|
|
}
|
|
|
|
idColsArg := ""
|
|
if len(idCols) > 0 {
|
|
idColsFormatted := fmt.Sprintf("['%s']", strings.Join(idCols, "', '"))
|
|
idColsArg = fmt.Sprintf(", id_cols => %s", idColsFormatted)
|
|
}
|
|
sql := fmt.Sprintf(`SELECT *
|
|
FROM AI.FORECAST(
|
|
%s,
|
|
data_col => '%s',
|
|
timestamp_col => '%s',
|
|
horizon => %d%s)`,
|
|
historyDataSource, dataCol, timestampCol, horizon, idColsArg)
|
|
|
|
session, err := source.BigQuerySession()(ctx)
|
|
if err != nil {
|
|
return nil, fmt.Errorf("failed to get BigQuery session: %w", err)
|
|
}
|
|
var connProps []*bigqueryapi.ConnectionProperty
|
|
if session != nil {
|
|
// Add session ID to the connection properties for subsequent calls.
|
|
connProps = []*bigqueryapi.ConnectionProperty{
|
|
{Key: "session_id", Value: session.ID},
|
|
}
|
|
}
|
|
|
|
// Log the query executed for debugging.
|
|
logger, err := util.LoggerFromContext(ctx)
|
|
if err != nil {
|
|
return nil, fmt.Errorf("error getting logger: %s", err)
|
|
}
|
|
logger.DebugContext(ctx, fmt.Sprintf("executing `%s` tool query: %s", kind, sql))
|
|
|
|
return source.RunSQL(ctx, bqClient, sql, "SELECT", nil, connProps)
|
|
}
|
|
|
|
func (t Tool) ParseParams(data map[string]any, claims map[string]map[string]any) (parameters.ParamValues, error) {
|
|
return parameters.ParseParams(t.Parameters, data, claims)
|
|
}
|
|
|
|
func (t Tool) EmbedParams(ctx context.Context, paramValues parameters.ParamValues, embeddingModelsMap map[string]embeddingmodels.EmbeddingModel) (parameters.ParamValues, error) {
|
|
return parameters.EmbedParams(ctx, t.Parameters, paramValues, embeddingModelsMap, nil)
|
|
}
|
|
|
|
func (t Tool) Manifest() tools.Manifest {
|
|
return t.manifest
|
|
}
|
|
|
|
func (t Tool) McpManifest() tools.McpManifest {
|
|
return t.mcpManifest
|
|
}
|
|
|
|
func (t Tool) Authorized(verifiedAuthServices []string) bool {
|
|
return tools.IsAuthorized(t.AuthRequired, verifiedAuthServices)
|
|
}
|
|
|
|
func (t Tool) RequiresClientAuthorization(resourceMgr tools.SourceProvider) (bool, error) {
|
|
source, err := tools.GetCompatibleSource[compatibleSource](resourceMgr, t.Source, t.Name, t.Kind)
|
|
if err != nil {
|
|
return false, err
|
|
}
|
|
return source.UseClientAuthorization(), nil
|
|
}
|
|
|
|
func (t Tool) GetAuthTokenHeaderName(resourceMgr tools.SourceProvider) (string, error) {
|
|
return "Authorization", nil
|
|
}
|