mirror of
https://github.com/googleapis/genai-toolbox.git
synced 2026-01-07 22:54:06 -05:00
feat(bigquery): Make credentials scope configurable (#2210)
## Description
This change addresses the ask where the user may want to use custom
scopes. For instance, the default scope (bigquery) falls short from
running sql that utilizes integration with other google products, such
as Drive, Vertex AI, Cloud Run etc. With this change the user would be
able to configure custom scopes depending on their use case.
The custom scopes can be configured in the tools.yaml file, e.g.:
```yaml
sources:
bigquery-source:
kind: "bigquery"
project: ${BIGQUERY_PROJECT}
location: ${BIGQUERY_LOCATION:}
useClientOAuth: ${BIGQUERY_USE_CLIENT_OAUTH:false}
scopes:
- "https://www.googleapis.com/auth/bigquery"
- "https://www.googleapis.com/auth/drive"
```
and if the [bigquery prebuilt
config](https://github.com/googleapis/genai-toolbox/blob/main/internal/prebuiltconfigs/tools/bigquery.yaml)
is being used, then it can be set in the environment variable as well:
```shell
$ export BIGQUERY_SCOPES="https://www.googleapis.com/auth/bigquery,https://www.googleapis.com/auth/drive"
```
## PR Checklist
> Thank you for opening a Pull Request! Before submitting your PR, there
are a
> few things you can do to make sure it goes smoothly:
- [ ] Make sure you reviewed
[CONTRIBUTING.md](https://github.com/googleapis/genai-toolbox/blob/main/CONTRIBUTING.md)
- [x] Make sure to open an issue as a
[bug/issue](https://github.com/googleapis/genai-toolbox/issues/new/choose)
before writing your code! That way we can discuss the change, evaluate
designs, and agree on the general idea
- [x] Ensure the tests and linter pass
- [x] Code coverage does not decrease (if any source code was changed)
- [x] Appropriate docs were updated (if necessary)
- [ ] Make sure to add `!` if this involve a breaking change
🛠️ Fixes #1942
This commit is contained in:
@@ -68,6 +68,7 @@ The BigQuery MCP server is configured using environment variables.
|
||||
export BIGQUERY_PROJECT="<your-gcp-project-id>"
|
||||
export BIGQUERY_LOCATION="<your-dataset-location>" # Optional
|
||||
export BIGQUERY_USE_CLIENT_OAUTH="true" # Optional
|
||||
export BIGQUERY_SCOPES="<comma-separated-scopes>" # Optional
|
||||
```
|
||||
|
||||
Add the following configuration to your MCP client (e.g., `settings.json` for Gemini CLI, `mcp_config.json` for Antigravity):
|
||||
|
||||
@@ -105,6 +105,8 @@ See [Usage Examples](../reference/cli.md#examples).
|
||||
* `BIGQUERY_LOCATION`: (Optional) The dataset location.
|
||||
* `BIGQUERY_USE_CLIENT_OAUTH`: (Optional) If `true`, forwards the client's
|
||||
OAuth access token for authentication. Defaults to `false`.
|
||||
* `BIGQUERY_SCOPES`: (Optional) A comma-separated list of OAuth scopes to
|
||||
use for authentication.
|
||||
* **Permissions:**
|
||||
* **BigQuery User** (`roles/bigquery.user`) to execute queries and view
|
||||
metadata.
|
||||
|
||||
@@ -94,6 +94,13 @@ intend to run. Common roles include `roles/bigquery.user` (which includes
|
||||
permissions to run jobs and read data) or `roles/bigbigquery.dataViewer`.
|
||||
Follow this [guide][set-adc] to set up your ADC.
|
||||
|
||||
If you are running on Google Compute Engine (GCE) or Google Kubernetes Engine
|
||||
(GKE), you might need to explicitly set the access scopes for the service
|
||||
account. While you can configure scopes when creating the VM or node pool, you
|
||||
can also specify them in the source configuration using the `scopes` field.
|
||||
Common scopes include `https://www.googleapis.com/auth/bigquery` or
|
||||
`https://www.googleapis.com/auth/cloud-platform`.
|
||||
|
||||
### Authentication via User's OAuth Access Token
|
||||
|
||||
If the `useClientOAuth` parameter is set to `true`, Toolbox will instead use the
|
||||
@@ -124,6 +131,9 @@ sources:
|
||||
# - "my_dataset_1"
|
||||
# - "other_project.my_dataset_2"
|
||||
# impersonateServiceAccount: "service-account@project-id.iam.gserviceaccount.com" # Optional: Service account to impersonate
|
||||
# scopes: # Optional: List of OAuth scopes to request.
|
||||
# - "https://www.googleapis.com/auth/bigquery"
|
||||
# - "https://www.googleapis.com/auth/drive.readonly"
|
||||
```
|
||||
|
||||
Initialize a BigQuery source that uses the client's access token:
|
||||
@@ -140,6 +150,9 @@ sources:
|
||||
# - "my_dataset_1"
|
||||
# - "other_project.my_dataset_2"
|
||||
# impersonateServiceAccount: "service-account@project-id.iam.gserviceaccount.com" # Optional: Service account to impersonate
|
||||
# scopes: # Optional: List of OAuth scopes to request.
|
||||
# - "https://www.googleapis.com/auth/bigquery"
|
||||
# - "https://www.googleapis.com/auth/drive.readonly"
|
||||
```
|
||||
|
||||
## Reference
|
||||
@@ -152,4 +165,5 @@ sources:
|
||||
| writeMode | string | false | Controls the write behavior for tools. `allowed` (default): All queries are permitted. `blocked`: Only `SELECT` statements are allowed for the `bigquery-execute-sql` tool. `protected`: Enables session-based execution where all tools associated with this source instance share the same [BigQuery session](https://cloud.google.com/bigquery/docs/sessions-intro). This allows for stateful operations using temporary tables (e.g., `CREATE TEMP TABLE`). For `bigquery-execute-sql`, `SELECT` statements can be used on all tables, but write operations are restricted to the session's temporary dataset. For tools like `bigquery-sql`, `bigquery-forecast`, and `bigquery-analyze-contribution`, the `writeMode` restrictions do not apply, but they will operate within the shared session. **Note:** The `protected` mode cannot be used with `useClientOAuth: true`. It is also not recommended for multi-user server environments, as all users would share the same session. A session is terminated automatically after 24 hours of inactivity or after 7 days, whichever comes first. A new session is created on the next request, and any temporary data from the previous session will be lost. |
|
||||
| allowedDatasets | []string | false | An optional list of dataset IDs that tools using this source are allowed to access. If provided, any tool operation attempting to access a dataset not in this list will be rejected. To enforce this, two types of operations are also disallowed: 1) Dataset-level operations (e.g., `CREATE SCHEMA`), and 2) operations where table access cannot be statically analyzed (e.g., `EXECUTE IMMEDIATE`, `CREATE PROCEDURE`). If a single dataset is provided, it will be treated as the default for prebuilt tools. |
|
||||
| useClientOAuth | bool | false | If true, forwards the client's OAuth access token from the "Authorization" header to downstream queries. **Note:** This cannot be used with `writeMode: protected`. |
|
||||
| scopes | []string | false | A list of OAuth 2.0 scopes to use for the credentials. If not provided, default scopes are used. |
|
||||
| impersonateServiceAccount | string | false | Service account email to impersonate when making BigQuery and Dataplex API calls. The authenticated principal must have the `roles/iam.serviceAccountTokenCreator` role on the target service account. [Learn More](https://cloud.google.com/iam/docs/service-account-impersonation) |
|
||||
|
||||
@@ -18,6 +18,7 @@ sources:
|
||||
project: ${BIGQUERY_PROJECT}
|
||||
location: ${BIGQUERY_LOCATION:}
|
||||
useClientOAuth: ${BIGQUERY_USE_CLIENT_OAUTH:false}
|
||||
scopes: ${BIGQUERY_SCOPES:}
|
||||
|
||||
tools:
|
||||
analyze_contribution:
|
||||
|
||||
@@ -43,6 +43,9 @@ import (
|
||||
|
||||
const SourceKind string = "bigquery"
|
||||
|
||||
// CloudPlatformScope is a broad scope for Google Cloud Platform services.
|
||||
const CloudPlatformScope = "https://www.googleapis.com/auth/cloud-platform"
|
||||
|
||||
const (
|
||||
// No write operations are allowed.
|
||||
WriteModeBlocked string = "blocked"
|
||||
@@ -77,14 +80,42 @@ func newConfig(ctx context.Context, name string, decoder *yaml.Decoder) (sources
|
||||
|
||||
type Config struct {
|
||||
// BigQuery configs
|
||||
Name string `yaml:"name" validate:"required"`
|
||||
Kind string `yaml:"kind" validate:"required"`
|
||||
Project string `yaml:"project" validate:"required"`
|
||||
Location string `yaml:"location"`
|
||||
WriteMode string `yaml:"writeMode"`
|
||||
AllowedDatasets []string `yaml:"allowedDatasets"`
|
||||
UseClientOAuth bool `yaml:"useClientOAuth"`
|
||||
ImpersonateServiceAccount string `yaml:"impersonateServiceAccount"`
|
||||
Name string `yaml:"name" validate:"required"`
|
||||
Kind string `yaml:"kind" validate:"required"`
|
||||
Project string `yaml:"project" validate:"required"`
|
||||
Location string `yaml:"location"`
|
||||
WriteMode string `yaml:"writeMode"`
|
||||
AllowedDatasets StringOrStringSlice `yaml:"allowedDatasets"`
|
||||
UseClientOAuth bool `yaml:"useClientOAuth"`
|
||||
ImpersonateServiceAccount string `yaml:"impersonateServiceAccount"`
|
||||
Scopes StringOrStringSlice `yaml:"scopes"`
|
||||
}
|
||||
|
||||
// StringOrStringSlice is a custom type that can unmarshal both a single string
|
||||
// (which it splits by comma) and a sequence of strings into a string slice.
|
||||
type StringOrStringSlice []string
|
||||
|
||||
// UnmarshalYAML implements the yaml.Unmarshaler interface.
|
||||
func (s *StringOrStringSlice) UnmarshalYAML(unmarshal func(any) error) error {
|
||||
var v any
|
||||
if err := unmarshal(&v); err != nil {
|
||||
return err
|
||||
}
|
||||
switch val := v.(type) {
|
||||
case string:
|
||||
*s = strings.Split(val, ",")
|
||||
return nil
|
||||
case []any:
|
||||
for _, item := range val {
|
||||
if str, ok := item.(string); ok {
|
||||
*s = append(*s, str)
|
||||
} else {
|
||||
return fmt.Errorf("element in sequence is not a string: %v", item)
|
||||
}
|
||||
}
|
||||
return nil
|
||||
}
|
||||
return fmt.Errorf("cannot unmarshal %T into StringOrStringSlice", v)
|
||||
}
|
||||
|
||||
func (r Config) SourceConfigKind() string {
|
||||
@@ -133,7 +164,7 @@ func (r Config) Initialize(ctx context.Context, tracer trace.Tracer) (sources.So
|
||||
|
||||
} else {
|
||||
// Initializes a BigQuery Google SQL source
|
||||
client, restService, tokenSource, err = initBigQueryConnection(ctx, tracer, r.Name, r.Project, r.Location, r.ImpersonateServiceAccount)
|
||||
client, restService, tokenSource, err = initBigQueryConnection(ctx, tracer, r.Name, r.Project, r.Location, r.ImpersonateServiceAccount, r.Scopes)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("error creating client from ADC: %w", err)
|
||||
}
|
||||
@@ -396,19 +427,26 @@ func (s *Source) BigQueryTokenSource() oauth2.TokenSource {
|
||||
return s.TokenSource
|
||||
}
|
||||
|
||||
func (s *Source) BigQueryTokenSourceWithScope(ctx context.Context, scope string) (oauth2.TokenSource, error) {
|
||||
func (s *Source) BigQueryTokenSourceWithScope(ctx context.Context, scopes []string) (oauth2.TokenSource, error) {
|
||||
if len(scopes) == 0 {
|
||||
scopes = s.Scopes
|
||||
if len(scopes) == 0 {
|
||||
scopes = []string{CloudPlatformScope}
|
||||
}
|
||||
}
|
||||
|
||||
if s.ImpersonateServiceAccount != "" {
|
||||
// Create impersonated credentials token source with the requested scope
|
||||
// Create impersonated credentials token source with the requested scopes
|
||||
ts, err := impersonate.CredentialsTokenSource(ctx, impersonate.CredentialsConfig{
|
||||
TargetPrincipal: s.ImpersonateServiceAccount,
|
||||
Scopes: []string{scope},
|
||||
Scopes: scopes,
|
||||
})
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("failed to create impersonated credentials for %q with scope %q: %w", s.ImpersonateServiceAccount, scope, err)
|
||||
return nil, fmt.Errorf("failed to create impersonated credentials for %q with scopes %v: %w", s.ImpersonateServiceAccount, scopes, err)
|
||||
}
|
||||
return ts, nil
|
||||
}
|
||||
return google.DefaultTokenSource(ctx, scope)
|
||||
return google.DefaultTokenSource(ctx, scopes...)
|
||||
}
|
||||
|
||||
func (s *Source) GetMaxQueryResultRows() int {
|
||||
@@ -454,7 +492,7 @@ func (s *Source) lazyInitDataplexClient(ctx context.Context, tracer trace.Tracer
|
||||
|
||||
return func() (*dataplexapi.CatalogClient, DataplexClientCreator, error) {
|
||||
once.Do(func() {
|
||||
c, cc, e := initDataplexConnection(ctx, tracer, s.Name, s.Project, s.UseClientOAuth, s.ImpersonateServiceAccount)
|
||||
c, cc, e := initDataplexConnection(ctx, tracer, s.Name, s.Project, s.UseClientOAuth, s.ImpersonateServiceAccount, s.Scopes)
|
||||
if e != nil {
|
||||
err = fmt.Errorf("failed to initialize dataplex client: %w", e)
|
||||
return
|
||||
@@ -620,6 +658,7 @@ func initBigQueryConnection(
|
||||
project string,
|
||||
location string,
|
||||
impersonateServiceAccount string,
|
||||
scopes []string,
|
||||
) (*bigqueryapi.Client, *bigqueryrestapi.Service, oauth2.TokenSource, error) {
|
||||
ctx, span := sources.InitConnectionSpan(ctx, tracer, SourceKind, name)
|
||||
defer span.End()
|
||||
@@ -632,12 +671,21 @@ func initBigQueryConnection(
|
||||
var tokenSource oauth2.TokenSource
|
||||
var opts []option.ClientOption
|
||||
|
||||
var credScopes []string
|
||||
if len(scopes) > 0 {
|
||||
credScopes = scopes
|
||||
} else if impersonateServiceAccount != "" {
|
||||
credScopes = []string{CloudPlatformScope}
|
||||
} else {
|
||||
credScopes = []string{bigqueryapi.Scope}
|
||||
}
|
||||
|
||||
if impersonateServiceAccount != "" {
|
||||
// Create impersonated credentials token source with cloud-platform scope
|
||||
// Create impersonated credentials token source
|
||||
// This broader scope is needed for tools like conversational analytics
|
||||
cloudPlatformTokenSource, err := impersonate.CredentialsTokenSource(ctx, impersonate.CredentialsConfig{
|
||||
TargetPrincipal: impersonateServiceAccount,
|
||||
Scopes: []string{"https://www.googleapis.com/auth/cloud-platform"},
|
||||
Scopes: credScopes,
|
||||
})
|
||||
if err != nil {
|
||||
return nil, nil, nil, fmt.Errorf("failed to create impersonated credentials for %q: %w", impersonateServiceAccount, err)
|
||||
@@ -649,9 +697,9 @@ func initBigQueryConnection(
|
||||
}
|
||||
} else {
|
||||
// Use default credentials
|
||||
cred, err := google.FindDefaultCredentials(ctx, bigqueryapi.Scope)
|
||||
cred, err := google.FindDefaultCredentials(ctx, credScopes...)
|
||||
if err != nil {
|
||||
return nil, nil, nil, fmt.Errorf("failed to find default Google Cloud credentials with scope %q: %w", bigqueryapi.Scope, err)
|
||||
return nil, nil, nil, fmt.Errorf("failed to find default Google Cloud credentials with scopes %v: %w", credScopes, err)
|
||||
}
|
||||
tokenSource = cred.TokenSource
|
||||
opts = []option.ClientOption{
|
||||
@@ -742,6 +790,7 @@ func initDataplexConnection(
|
||||
project string,
|
||||
useClientOAuth bool,
|
||||
impersonateServiceAccount string,
|
||||
scopes []string,
|
||||
) (*dataplexapi.CatalogClient, DataplexClientCreator, error) {
|
||||
var client *dataplexapi.CatalogClient
|
||||
var clientCreator DataplexClientCreator
|
||||
@@ -760,11 +809,16 @@ func initDataplexConnection(
|
||||
} else {
|
||||
var opts []option.ClientOption
|
||||
|
||||
credScopes := scopes
|
||||
if len(credScopes) == 0 {
|
||||
credScopes = []string{CloudPlatformScope}
|
||||
}
|
||||
|
||||
if impersonateServiceAccount != "" {
|
||||
// Create impersonated credentials token source
|
||||
ts, err := impersonate.CredentialsTokenSource(ctx, impersonate.CredentialsConfig{
|
||||
TargetPrincipal: impersonateServiceAccount,
|
||||
Scopes: []string{"https://www.googleapis.com/auth/cloud-platform"},
|
||||
Scopes: credScopes,
|
||||
})
|
||||
if err != nil {
|
||||
return nil, nil, fmt.Errorf("failed to create impersonated credentials for %q: %w", impersonateServiceAccount, err)
|
||||
@@ -775,7 +829,7 @@ func initDataplexConnection(
|
||||
}
|
||||
} else {
|
||||
// Use default credentials
|
||||
cred, err := google.FindDefaultCredentials(ctx)
|
||||
cred, err := google.FindDefaultCredentials(ctx, credScopes...)
|
||||
if err != nil {
|
||||
return nil, nil, fmt.Errorf("failed to find default Google Cloud credentials: %w", err)
|
||||
}
|
||||
|
||||
@@ -132,6 +132,28 @@ func TestParseFromYamlBigQuery(t *testing.T) {
|
||||
},
|
||||
},
|
||||
},
|
||||
{
|
||||
desc: "with custom scopes example",
|
||||
in: `
|
||||
sources:
|
||||
my-instance:
|
||||
kind: bigquery
|
||||
project: my-project
|
||||
location: us
|
||||
scopes:
|
||||
- https://www.googleapis.com/auth/bigquery
|
||||
- https://www.googleapis.com/auth/cloud-platform
|
||||
`,
|
||||
want: server.SourceConfigs{
|
||||
"my-instance": bigquery.Config{
|
||||
Name: "my-instance",
|
||||
Kind: bigquery.SourceKind,
|
||||
Project: "my-project",
|
||||
Location: "us",
|
||||
Scopes: []string{"https://www.googleapis.com/auth/bigquery", "https://www.googleapis.com/auth/cloud-platform"},
|
||||
},
|
||||
},
|
||||
},
|
||||
}
|
||||
for _, tc := range tcs {
|
||||
t.Run(tc.desc, func(t *testing.T) {
|
||||
|
||||
@@ -56,7 +56,7 @@ func newConfig(ctx context.Context, name string, decoder *yaml.Decoder) (tools.T
|
||||
|
||||
type compatibleSource interface {
|
||||
BigQueryClient() *bigqueryapi.Client
|
||||
BigQueryTokenSourceWithScope(ctx context.Context, scope string) (oauth2.TokenSource, error)
|
||||
BigQueryTokenSourceWithScope(ctx context.Context, scopes []string) (oauth2.TokenSource, error)
|
||||
BigQueryProject() string
|
||||
BigQueryLocation() string
|
||||
GetMaxQueryResultRows() int
|
||||
@@ -191,10 +191,10 @@ func (t Tool) Invoke(ctx context.Context, resourceMgr tools.SourceProvider, para
|
||||
return nil, fmt.Errorf("error parsing access token: %w", err)
|
||||
}
|
||||
} else {
|
||||
// Get cloud-platform token source for Gemini Data Analytics API during initialization
|
||||
tokenSource, err := source.BigQueryTokenSourceWithScope(ctx, "https://www.googleapis.com/auth/cloud-platform")
|
||||
// Get a token source for the Gemini Data Analytics API.
|
||||
tokenSource, err := source.BigQueryTokenSourceWithScope(ctx, nil)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("failed to get cloud-platform token source: %w", err)
|
||||
return nil, fmt.Errorf("failed to get token source: %w", err)
|
||||
}
|
||||
|
||||
// Use cloud-platform token source for Gemini Data Analytics API
|
||||
|
||||
Reference in New Issue
Block a user