mirror of
https://github.com/googleapis/genai-toolbox.git
synced 2026-04-09 03:02:26 -04:00
This PR update the linking mechanism between Source and Tool.
Tools are directly linked to their Source, either by pointing to the
Source's functions or by assigning values from the source during Tool's
initialization. However, the existing approach means that any
modification to the Source after Tool's initialization might not be
reflected. To address this limitation, each tool should only store a
name reference to the Source, rather than direct link or assigned
values.
Tools will provide interface for `compatibleSource`. This will be used
to determine if a Source is compatible with the Tool.
```
type compatibleSource interface{
Client() http.Client
ProjectID() string
}
```
During `Invoke()`, the tool will run the following operations:
* retrieve Source from the `resourceManager` with source's named defined
in Tool's config
* validate Source via `compatibleSource interface{}`
* run the remaining `Invoke()` function. Fields that are needed is
retrieved directly from the source.
With this update, resource manager is also added as input to other
Tool's function that require access to source (e.g.
`RequiresClientAuthorization()`).
347 lines
12 KiB
Go
347 lines
12 KiB
Go
// Copyright 2025 Google LLC
|
|
//
|
|
// Licensed under the Apache License, Version 2.0 (the "License");
|
|
// you may not use this file except in compliance with the License.
|
|
// You may obtain a copy of the License at
|
|
//
|
|
// http://www.apache.org/licenses/LICENSE-2.0
|
|
//
|
|
// Unless required by applicable law or agreed to in writing, software
|
|
// distributed under the License is distributed on an "AS IS" BASIS,
|
|
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
// See the License for the specific language governing permissions and
|
|
// limitations under the License.
|
|
|
|
package bigqueryforecast
|
|
|
|
import (
|
|
"context"
|
|
"fmt"
|
|
"strings"
|
|
|
|
bigqueryapi "cloud.google.com/go/bigquery"
|
|
yaml "github.com/goccy/go-yaml"
|
|
"github.com/googleapis/genai-toolbox/internal/sources"
|
|
bigqueryds "github.com/googleapis/genai-toolbox/internal/sources/bigquery"
|
|
"github.com/googleapis/genai-toolbox/internal/tools"
|
|
bqutil "github.com/googleapis/genai-toolbox/internal/tools/bigquery/bigquerycommon"
|
|
"github.com/googleapis/genai-toolbox/internal/util"
|
|
"github.com/googleapis/genai-toolbox/internal/util/parameters"
|
|
bigqueryrestapi "google.golang.org/api/bigquery/v2"
|
|
"google.golang.org/api/iterator"
|
|
)
|
|
|
|
const kind string = "bigquery-forecast"
|
|
|
|
func init() {
|
|
if !tools.Register(kind, newConfig) {
|
|
panic(fmt.Sprintf("tool kind %q already registered", kind))
|
|
}
|
|
}
|
|
|
|
func newConfig(ctx context.Context, name string, decoder *yaml.Decoder) (tools.ToolConfig, error) {
|
|
actual := Config{Name: name}
|
|
if err := decoder.DecodeContext(ctx, &actual); err != nil {
|
|
return nil, err
|
|
}
|
|
return actual, nil
|
|
}
|
|
|
|
type compatibleSource interface {
|
|
BigQueryClient() *bigqueryapi.Client
|
|
BigQueryRestService() *bigqueryrestapi.Service
|
|
BigQueryClientCreator() bigqueryds.BigqueryClientCreator
|
|
UseClientAuthorization() bool
|
|
IsDatasetAllowed(projectID, datasetID string) bool
|
|
BigQueryAllowedDatasets() []string
|
|
BigQuerySession() bigqueryds.BigQuerySessionProvider
|
|
}
|
|
|
|
type Config struct {
|
|
Name string `yaml:"name" validate:"required"`
|
|
Kind string `yaml:"kind" validate:"required"`
|
|
Source string `yaml:"source" validate:"required"`
|
|
Description string `yaml:"description" validate:"required"`
|
|
AuthRequired []string `yaml:"authRequired"`
|
|
}
|
|
|
|
// validate interface
|
|
var _ tools.ToolConfig = Config{}
|
|
|
|
func (cfg Config) ToolConfigKind() string {
|
|
return kind
|
|
}
|
|
|
|
func (cfg Config) Initialize(srcs map[string]sources.Source) (tools.Tool, error) {
|
|
// verify source exists
|
|
rawS, ok := srcs[cfg.Source]
|
|
if !ok {
|
|
return nil, fmt.Errorf("no source named %q configured", cfg.Source)
|
|
}
|
|
|
|
// verify the source is compatible
|
|
s, ok := rawS.(compatibleSource)
|
|
if !ok {
|
|
return nil, fmt.Errorf("invalid source for %q tool: source %q not compatible", kind, cfg.Source)
|
|
}
|
|
|
|
allowedDatasets := s.BigQueryAllowedDatasets()
|
|
historyDataDescription := "The table id or the query of the history time series data."
|
|
if len(allowedDatasets) > 0 {
|
|
datasetIDs := []string{}
|
|
for _, ds := range allowedDatasets {
|
|
datasetIDs = append(datasetIDs, fmt.Sprintf("`%s`", ds))
|
|
}
|
|
historyDataDescription += fmt.Sprintf(" The query or table must only access datasets from the following list: %s.", strings.Join(datasetIDs, ", "))
|
|
}
|
|
|
|
historyDataParameter := parameters.NewStringParameter("history_data", historyDataDescription)
|
|
timestampColumnNameParameter := parameters.NewStringParameter("timestamp_col",
|
|
"The name of the time series timestamp column.")
|
|
dataColumnNameParameter := parameters.NewStringParameter("data_col",
|
|
"The name of the time series data column.")
|
|
idColumnNameParameter := parameters.NewArrayParameterWithDefault("id_cols", []any{},
|
|
"An array of the time series id column names.",
|
|
parameters.NewStringParameter("id_col", "The name of time series id column."))
|
|
horizonParameter := parameters.NewIntParameterWithDefault("horizon", 10, "The number of forecasting steps.")
|
|
params := parameters.Parameters{historyDataParameter,
|
|
timestampColumnNameParameter, dataColumnNameParameter, idColumnNameParameter, horizonParameter}
|
|
|
|
mcpManifest := tools.GetMcpManifest(cfg.Name, cfg.Description, cfg.AuthRequired, params, nil)
|
|
|
|
// finish tool setup
|
|
t := Tool{
|
|
Config: cfg,
|
|
Parameters: params,
|
|
manifest: tools.Manifest{Description: cfg.Description, Parameters: params.Manifest(), AuthRequired: cfg.AuthRequired},
|
|
mcpManifest: mcpManifest,
|
|
}
|
|
return t, nil
|
|
}
|
|
|
|
// validate interface
|
|
var _ tools.Tool = Tool{}
|
|
|
|
type Tool struct {
|
|
Config
|
|
Parameters parameters.Parameters `yaml:"parameters"`
|
|
manifest tools.Manifest
|
|
mcpManifest tools.McpManifest
|
|
}
|
|
|
|
func (t Tool) ToConfig() tools.ToolConfig {
|
|
return t.Config
|
|
}
|
|
|
|
func (t Tool) Invoke(ctx context.Context, resourceMgr tools.SourceProvider, params parameters.ParamValues, accessToken tools.AccessToken) (any, error) {
|
|
source, err := tools.GetCompatibleSource[compatibleSource](resourceMgr, t.Source, t.Name, t.Kind)
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
|
|
paramsMap := params.AsMap()
|
|
historyData, ok := paramsMap["history_data"].(string)
|
|
if !ok {
|
|
return nil, fmt.Errorf("unable to cast history_data parameter %v", paramsMap["history_data"])
|
|
}
|
|
timestampCol, ok := paramsMap["timestamp_col"].(string)
|
|
if !ok {
|
|
return nil, fmt.Errorf("unable to cast timestamp_col parameter %v", paramsMap["timestamp_col"])
|
|
}
|
|
dataCol, ok := paramsMap["data_col"].(string)
|
|
if !ok {
|
|
return nil, fmt.Errorf("unable to cast data_col parameter %v", paramsMap["data_col"])
|
|
}
|
|
idColsRaw, ok := paramsMap["id_cols"].([]any)
|
|
if !ok {
|
|
return nil, fmt.Errorf("unable to cast id_cols parameter %v", paramsMap["id_cols"])
|
|
}
|
|
var idCols []string
|
|
for _, v := range idColsRaw {
|
|
s, ok := v.(string)
|
|
if !ok {
|
|
return nil, fmt.Errorf("id_cols contains non-string value: %v", v)
|
|
}
|
|
idCols = append(idCols, s)
|
|
}
|
|
horizon, ok := paramsMap["horizon"].(int)
|
|
if !ok {
|
|
if h, ok := paramsMap["horizon"].(float64); ok {
|
|
horizon = int(h)
|
|
} else {
|
|
return nil, fmt.Errorf("unable to cast horizon parameter %v", paramsMap["horizon"])
|
|
}
|
|
}
|
|
|
|
bqClient := source.BigQueryClient()
|
|
restService := source.BigQueryRestService()
|
|
|
|
// Initialize new client if using user OAuth token
|
|
if source.UseClientAuthorization() {
|
|
tokenStr, err := accessToken.ParseBearerToken()
|
|
if err != nil {
|
|
return nil, fmt.Errorf("error parsing access token: %w", err)
|
|
}
|
|
bqClient, restService, err = source.BigQueryClientCreator()(tokenStr, false)
|
|
if err != nil {
|
|
return nil, fmt.Errorf("error creating client from OAuth access token: %w", err)
|
|
}
|
|
}
|
|
|
|
var historyDataSource string
|
|
trimmedUpperHistoryData := strings.TrimSpace(strings.ToUpper(historyData))
|
|
if strings.HasPrefix(trimmedUpperHistoryData, "SELECT") || strings.HasPrefix(trimmedUpperHistoryData, "WITH") {
|
|
if len(source.BigQueryAllowedDatasets()) > 0 {
|
|
var connProps []*bigqueryapi.ConnectionProperty
|
|
session, err := source.BigQuerySession()(ctx)
|
|
if err != nil {
|
|
return nil, fmt.Errorf("failed to get BigQuery session: %w", err)
|
|
}
|
|
if session != nil {
|
|
connProps = []*bigqueryapi.ConnectionProperty{
|
|
{Key: "session_id", Value: session.ID},
|
|
}
|
|
}
|
|
dryRunJob, err := bqutil.DryRunQuery(ctx, restService, source.BigQueryClient().Project(), source.BigQueryClient().Location, historyData, nil, connProps)
|
|
if err != nil {
|
|
return nil, fmt.Errorf("query validation failed: %w", err)
|
|
}
|
|
statementType := dryRunJob.Statistics.Query.StatementType
|
|
if statementType != "SELECT" {
|
|
return nil, fmt.Errorf("the 'history_data' parameter only supports a table ID or a SELECT query. The provided query has statement type '%s'", statementType)
|
|
}
|
|
|
|
queryStats := dryRunJob.Statistics.Query
|
|
if queryStats != nil {
|
|
for _, tableRef := range queryStats.ReferencedTables {
|
|
if !source.IsDatasetAllowed(tableRef.ProjectId, tableRef.DatasetId) {
|
|
return nil, fmt.Errorf("query in history_data accesses dataset '%s.%s', which is not in the allowed list", tableRef.ProjectId, tableRef.DatasetId)
|
|
}
|
|
}
|
|
} else {
|
|
return nil, fmt.Errorf("could not analyze query in history_data to validate against allowed datasets")
|
|
}
|
|
}
|
|
historyDataSource = fmt.Sprintf("(%s)", historyData)
|
|
} else {
|
|
if len(source.BigQueryAllowedDatasets()) > 0 {
|
|
parts := strings.Split(historyData, ".")
|
|
var projectID, datasetID string
|
|
|
|
switch len(parts) {
|
|
case 3: // project.dataset.table
|
|
projectID = parts[0]
|
|
datasetID = parts[1]
|
|
case 2: // dataset.table
|
|
projectID = source.BigQueryClient().Project()
|
|
datasetID = parts[0]
|
|
default:
|
|
return nil, fmt.Errorf("invalid table ID format for 'history_data': %q. Expected 'dataset.table' or 'project.dataset.table'", historyData)
|
|
}
|
|
|
|
if !source.IsDatasetAllowed(projectID, datasetID) {
|
|
return nil, fmt.Errorf("access to dataset '%s.%s' (from table '%s') is not allowed", projectID, datasetID, historyData)
|
|
}
|
|
}
|
|
historyDataSource = fmt.Sprintf("TABLE `%s`", historyData)
|
|
}
|
|
|
|
idColsArg := ""
|
|
if len(idCols) > 0 {
|
|
idColsFormatted := fmt.Sprintf("['%s']", strings.Join(idCols, "', '"))
|
|
idColsArg = fmt.Sprintf(", id_cols => %s", idColsFormatted)
|
|
}
|
|
|
|
sql := fmt.Sprintf(`SELECT *
|
|
FROM AI.FORECAST(
|
|
%s,
|
|
data_col => '%s',
|
|
timestamp_col => '%s',
|
|
horizon => %d%s)`,
|
|
historyDataSource, dataCol, timestampCol, horizon, idColsArg)
|
|
|
|
// JobStatistics.QueryStatistics.StatementType
|
|
query := bqClient.Query(sql)
|
|
query.Location = bqClient.Location
|
|
session, err := source.BigQuerySession()(ctx)
|
|
if err != nil {
|
|
return nil, fmt.Errorf("failed to get BigQuery session: %w", err)
|
|
}
|
|
if session != nil {
|
|
// Add session ID to the connection properties for subsequent calls.
|
|
query.ConnectionProperties = []*bigqueryapi.ConnectionProperty{
|
|
{Key: "session_id", Value: session.ID},
|
|
}
|
|
}
|
|
|
|
// Log the query executed for debugging.
|
|
logger, err := util.LoggerFromContext(ctx)
|
|
if err != nil {
|
|
return nil, fmt.Errorf("error getting logger: %s", err)
|
|
}
|
|
logger.DebugContext(ctx, fmt.Sprintf("executing `%s` tool query: %s", kind, sql))
|
|
|
|
// This block handles SELECT statements, which return a row set.
|
|
// We iterate through the results, convert each row into a map of
|
|
// column names to values, and return the collection of rows.
|
|
var out []any
|
|
job, err := query.Run(ctx)
|
|
if err != nil {
|
|
return nil, fmt.Errorf("unable to execute query: %w", err)
|
|
}
|
|
it, err := job.Read(ctx)
|
|
if err != nil {
|
|
return nil, fmt.Errorf("unable to read query results: %w", err)
|
|
}
|
|
for {
|
|
var row map[string]bigqueryapi.Value
|
|
err = it.Next(&row)
|
|
if err == iterator.Done {
|
|
break
|
|
}
|
|
if err != nil {
|
|
return nil, fmt.Errorf("unable to iterate through query results: %w", err)
|
|
}
|
|
vMap := make(map[string]any)
|
|
for key, value := range row {
|
|
vMap[key] = value
|
|
}
|
|
out = append(out, vMap)
|
|
}
|
|
// If the query returned any rows, return them directly.
|
|
if len(out) > 0 {
|
|
return out, nil
|
|
}
|
|
|
|
// This handles the standard case for a SELECT query that successfully
|
|
return "The query returned 0 rows.", nil
|
|
}
|
|
|
|
func (t Tool) ParseParams(data map[string]any, claims map[string]map[string]any) (parameters.ParamValues, error) {
|
|
return parameters.ParseParams(t.Parameters, data, claims)
|
|
}
|
|
|
|
func (t Tool) Manifest() tools.Manifest {
|
|
return t.manifest
|
|
}
|
|
|
|
func (t Tool) McpManifest() tools.McpManifest {
|
|
return t.mcpManifest
|
|
}
|
|
|
|
func (t Tool) Authorized(verifiedAuthServices []string) bool {
|
|
return tools.IsAuthorized(t.AuthRequired, verifiedAuthServices)
|
|
}
|
|
|
|
func (t Tool) RequiresClientAuthorization(resourceMgr tools.SourceProvider) (bool, error) {
|
|
source, err := tools.GetCompatibleSource[compatibleSource](resourceMgr, t.Source, t.Name, t.Kind)
|
|
if err != nil {
|
|
return false, err
|
|
}
|
|
return source.UseClientAuthorization(), nil
|
|
}
|
|
|
|
func (t Tool) GetAuthTokenHeaderName(resourceMgr tools.SourceProvider) (string, error) {
|
|
return "Authorization", nil
|
|
}
|