feat(bigquery): add Google BigQuery integration (#3341)

* feat(bigquery): add Google BigQuery integration

* fix(bigquery): add auth provider, fix docsLink and insertedRows count

* fix(bigquery): set pageToken visibility to user-or-llm for pagination

* fix(bigquery): use prefixed export names to avoid aliased imports

* lint

* improvement(bigquery): destructure tool outputs with structured array/object types

* lint
This commit is contained in:
Waleed
2026-02-25 19:31:06 -08:00
committed by GitHub
parent d98545d554
commit e4fb8b2fdd
19 changed files with 1378 additions and 0 deletions

View File

@@ -3430,6 +3430,23 @@ export const ResendIcon = (props: SVGProps<SVGSVGElement>) => (
</svg>
)
export const GoogleBigQueryIcon = (props: SVGProps<SVGSVGElement>) => (
<svg {...props} xmlns='http://www.w3.org/2000/svg' viewBox='0 0 64 64'>
<path
d='M14.48 58.196L.558 34.082c-.744-1.288-.744-2.876 0-4.164L14.48 5.805c.743-1.287 2.115-2.08 3.6-2.082h27.857c1.48.007 2.845.8 3.585 2.082l13.92 24.113c.744 1.288.744 2.876 0 4.164L49.52 58.196c-.743 1.287-2.115 2.08-3.6 2.082H18.07c-1.483-.005-2.85-.798-3.593-2.082z'
fill='#4386fa'
/>
<path
d='M40.697 24.235s3.87 9.283-1.406 14.545-14.883 1.894-14.883 1.894L43.95 60.27h1.984c1.486-.002 2.858-.796 3.6-2.082L58.75 42.23z'
opacity='.1'
/>
<path
d='M45.267 43.23L41 38.953a.67.67 0 0 0-.158-.12 11.63 11.63 0 1 0-2.032 2.037.67.67 0 0 0 .113.15l4.277 4.277a.67.67 0 0 0 .947 0l1.12-1.12a.67.67 0 0 0 0-.947zM31.64 40.464a8.75 8.75 0 1 1 8.749-8.749 8.75 8.75 0 0 1-8.749 8.749zm-5.593-9.216v3.616c.557.983 1.363 1.803 2.338 2.375v-6.013zm4.375-2.998v9.772a6.45 6.45 0 0 0 2.338 0V28.25zm6.764 6.606v-2.142H34.85v4.5a6.43 6.43 0 0 0 2.338-2.368z'
fill='#fff'
/>
</svg>
)
export const GoogleVaultIcon = (props: SVGProps<SVGSVGElement>) => (
<svg {...props} xmlns='http://www.w3.org/2000/svg' viewBox='0 0 82 82'>
<path

View File

@@ -42,6 +42,7 @@ import {
GitLabIcon,
GmailIcon,
GongIcon,
GoogleBigQueryIcon,
GoogleBooksIcon,
GoogleCalendarIcon,
GoogleDocsIcon,
@@ -188,6 +189,7 @@ export const blockTypeToIconMap: Record<string, IconComponent> = {
gitlab: GitLabIcon,
gmail_v2: GmailIcon,
gong: GongIcon,
google_bigquery: GoogleBigQueryIcon,
google_books: GoogleBooksIcon,
google_calendar_v2: GoogleCalendarIcon,
google_docs: GoogleDocsIcon,

View File

@@ -0,0 +1,161 @@
---
title: Google BigQuery
description: Query, list, and insert data in Google BigQuery
---
import { BlockInfoCard } from "@/components/ui/block-info-card"
<BlockInfoCard
type="google_bigquery"
color="#E0E0E0"
/>
## Usage Instructions
Connect to Google BigQuery to run SQL queries, list datasets and tables, get table metadata, and insert rows.
## Tools
### `google_bigquery_query`
Run a SQL query against Google BigQuery and return the results
#### Input
| Parameter | Type | Required | Description |
| --------- | ---- | -------- | ----------- |
| `projectId` | string | Yes | Google Cloud project ID |
| `query` | string | Yes | SQL query to execute |
| `useLegacySql` | boolean | No | Whether to use legacy SQL syntax \(default: false\) |
| `maxResults` | number | No | Maximum number of rows to return |
| `defaultDatasetId` | string | No | Default dataset for unqualified table names |
| `location` | string | No | Processing location \(e.g., "US", "EU"\) |
#### Output
| Parameter | Type | Description |
| --------- | ---- | ----------- |
| `columns` | array | Array of column names from the query result |
| `rows` | array | Array of row objects keyed by column name |
| `totalRows` | string | Total number of rows in the complete result set |
| `jobComplete` | boolean | Whether the query completed within the timeout |
| `totalBytesProcessed` | string | Total bytes processed by the query |
| `cacheHit` | boolean | Whether the query result was served from cache |
| `jobReference` | object | Job reference \(useful when jobComplete is false\) |
| ↳ `projectId` | string | Project ID containing the job |
| ↳ `jobId` | string | Unique job identifier |
| ↳ `location` | string | Geographic location of the job |
| `pageToken` | string | Token for fetching additional result pages |
### `google_bigquery_list_datasets`
List all datasets in a Google BigQuery project
#### Input
| Parameter | Type | Required | Description |
| --------- | ---- | -------- | ----------- |
| `projectId` | string | Yes | Google Cloud project ID |
| `maxResults` | number | No | Maximum number of datasets to return |
| `pageToken` | string | No | Token for pagination |
#### Output
| Parameter | Type | Description |
| --------- | ---- | ----------- |
| `datasets` | array | Array of dataset objects |
| ↳ `datasetId` | string | Unique dataset identifier |
| ↳ `projectId` | string | Project ID containing this dataset |
| ↳ `friendlyName` | string | Descriptive name for the dataset |
| ↳ `location` | string | Geographic location where the data resides |
| `nextPageToken` | string | Token for fetching next page of results |
### `google_bigquery_list_tables`
List all tables in a Google BigQuery dataset
#### Input
| Parameter | Type | Required | Description |
| --------- | ---- | -------- | ----------- |
| `projectId` | string | Yes | Google Cloud project ID |
| `datasetId` | string | Yes | BigQuery dataset ID |
| `maxResults` | number | No | Maximum number of tables to return |
| `pageToken` | string | No | Token for pagination |
#### Output
| Parameter | Type | Description |
| --------- | ---- | ----------- |
| `tables` | array | Array of table objects |
| ↳ `tableId` | string | Table identifier |
| ↳ `datasetId` | string | Dataset ID containing this table |
| ↳ `projectId` | string | Project ID containing this table |
| ↳ `type` | string | Table type \(TABLE, VIEW, EXTERNAL, etc.\) |
| ↳ `friendlyName` | string | User-friendly name for the table |
| ↳ `creationTime` | string | Time when created, in milliseconds since epoch |
| `totalItems` | number | Total number of tables in the dataset |
| `nextPageToken` | string | Token for fetching next page of results |
### `google_bigquery_get_table`
Get metadata and schema for a Google BigQuery table
#### Input
| Parameter | Type | Required | Description |
| --------- | ---- | -------- | ----------- |
| `projectId` | string | Yes | Google Cloud project ID |
| `datasetId` | string | Yes | BigQuery dataset ID |
| `tableId` | string | Yes | BigQuery table ID |
#### Output
| Parameter | Type | Description |
| --------- | ---- | ----------- |
| `tableId` | string | Table ID |
| `datasetId` | string | Dataset ID |
| `projectId` | string | Project ID |
| `type` | string | Table type \(TABLE, VIEW, SNAPSHOT, MATERIALIZED_VIEW, EXTERNAL\) |
| `description` | string | Table description |
| `numRows` | string | Total number of rows |
| `numBytes` | string | Total size in bytes, excluding data in streaming buffer |
| `schema` | array | Array of column definitions |
| ↳ `name` | string | Column name |
| ↳ `type` | string | Data type \(STRING, INTEGER, FLOAT, BOOLEAN, TIMESTAMP, RECORD, etc.\) |
| ↳ `mode` | string | Column mode \(NULLABLE, REQUIRED, or REPEATED\) |
| ↳ `description` | string | Column description |
| `creationTime` | string | Table creation time \(milliseconds since epoch\) |
| `lastModifiedTime` | string | Last modification time \(milliseconds since epoch\) |
| `location` | string | Geographic location where the table resides |
### `google_bigquery_insert_rows`
Insert rows into a Google BigQuery table using streaming insert
#### Input
| Parameter | Type | Required | Description |
| --------- | ---- | -------- | ----------- |
| `projectId` | string | Yes | Google Cloud project ID |
| `datasetId` | string | Yes | BigQuery dataset ID |
| `tableId` | string | Yes | BigQuery table ID |
| `rows` | string | Yes | JSON array of row objects to insert |
| `skipInvalidRows` | boolean | No | Whether to insert valid rows even if some are invalid |
| `ignoreUnknownValues` | boolean | No | Whether to ignore columns not in the table schema |
#### Output
| Parameter | Type | Description |
| --------- | ---- | ----------- |
| `insertedRows` | number | Number of rows successfully inserted |
| `errors` | array | Array of per-row insertion errors \(empty if all succeeded\) |
| ↳ `index` | number | Zero-based index of the row that failed |
| ↳ `errors` | array | Error details for this row |
| ↳ `reason` | string | Short error code summarizing the error |
| ↳ `location` | string | Where the error occurred |
| ↳ `message` | string | Human-readable error description |

View File

@@ -37,6 +37,7 @@
"gitlab",
"gmail",
"gong",
"google_bigquery",
"google_books",
"google_calendar",
"google_docs",

View File

@@ -44,6 +44,7 @@ const SCOPE_DESCRIPTIONS: Record<string, string> = {
'https://www.googleapis.com/auth/userinfo.profile': 'View basic profile info',
'https://www.googleapis.com/auth/forms.body': 'View and manage Google Forms',
'https://www.googleapis.com/auth/forms.responses.readonly': 'View responses to Google Forms',
'https://www.googleapis.com/auth/bigquery': 'View and manage data in Google BigQuery',
'https://www.googleapis.com/auth/ediscovery': 'Access Google Vault for eDiscovery',
'https://www.googleapis.com/auth/devstorage.read_only': 'Read files from Google Cloud Storage',
'https://www.googleapis.com/auth/admin.directory.group': 'Manage Google Workspace groups',

View File

@@ -0,0 +1,256 @@
import { GoogleBigQueryIcon } from '@/components/icons'
import type { BlockConfig } from '@/blocks/types'
import { AuthMode } from '@/blocks/types'
export const GoogleBigQueryBlock: BlockConfig = {
type: 'google_bigquery',
name: 'Google BigQuery',
description: 'Query, list, and insert data in Google BigQuery',
longDescription:
'Connect to Google BigQuery to run SQL queries, list datasets and tables, get table metadata, and insert rows.',
docsLink: 'https://docs.sim.ai/tools/google_bigquery',
category: 'tools',
bgColor: '#E0E0E0',
icon: GoogleBigQueryIcon,
authMode: AuthMode.OAuth,
subBlocks: [
{
id: 'operation',
title: 'Operation',
type: 'dropdown',
options: [
{ label: 'Run Query', id: 'query' },
{ label: 'List Datasets', id: 'list_datasets' },
{ label: 'List Tables', id: 'list_tables' },
{ label: 'Get Table', id: 'get_table' },
{ label: 'Insert Rows', id: 'insert_rows' },
],
value: () => 'query',
},
{
id: 'credential',
title: 'Google Account',
type: 'oauth-input',
canonicalParamId: 'oauthCredential',
mode: 'basic',
required: true,
serviceId: 'google-bigquery',
requiredScopes: ['https://www.googleapis.com/auth/bigquery'],
placeholder: 'Select Google account',
},
{
id: 'manualCredential',
title: 'Google Account',
type: 'short-input',
canonicalParamId: 'oauthCredential',
mode: 'advanced',
placeholder: 'Enter credential ID',
required: true,
},
{
id: 'projectId',
title: 'Project ID',
type: 'short-input',
placeholder: 'Enter Google Cloud project ID',
required: true,
},
{
id: 'query',
title: 'SQL Query',
type: 'long-input',
placeholder: 'SELECT * FROM `project.dataset.table` LIMIT 100',
condition: { field: 'operation', value: 'query' },
required: { field: 'operation', value: 'query' },
wandConfig: {
enabled: true,
prompt: `Generate a BigQuery Standard SQL query based on the user's description.
The query should:
- Use Standard SQL syntax (not Legacy SQL)
- Be well-formatted and efficient
- Include appropriate LIMIT clauses when applicable
Examples:
- "get all users" -> SELECT * FROM \`project.dataset.users\` LIMIT 1000
- "count orders by status" -> SELECT status, COUNT(*) as count FROM \`project.dataset.orders\` GROUP BY status
- "recent events" -> SELECT * FROM \`project.dataset.events\` ORDER BY created_at DESC LIMIT 100
Return ONLY the SQL query - no explanations, no quotes, no extra text.`,
placeholder: 'Describe the query you want to run...',
},
},
{
id: 'useLegacySql',
title: 'Use Legacy SQL',
type: 'switch',
condition: { field: 'operation', value: 'query' },
},
{
id: 'maxResults',
title: 'Max Results',
type: 'short-input',
placeholder: 'Maximum rows to return',
condition: { field: 'operation', value: ['query', 'list_datasets', 'list_tables'] },
},
{
id: 'defaultDatasetId',
title: 'Default Dataset',
type: 'short-input',
placeholder: 'Default dataset for unqualified table names',
condition: { field: 'operation', value: 'query' },
},
{
id: 'location',
title: 'Location',
type: 'short-input',
placeholder: 'Processing location (e.g., US, EU)',
condition: { field: 'operation', value: 'query' },
},
{
id: 'datasetId',
title: 'Dataset ID',
type: 'short-input',
placeholder: 'Enter BigQuery dataset ID',
condition: { field: 'operation', value: ['list_tables', 'get_table', 'insert_rows'] },
required: { field: 'operation', value: ['list_tables', 'get_table', 'insert_rows'] },
},
{
id: 'tableId',
title: 'Table ID',
type: 'short-input',
placeholder: 'Enter BigQuery table ID',
condition: { field: 'operation', value: ['get_table', 'insert_rows'] },
required: { field: 'operation', value: ['get_table', 'insert_rows'] },
},
{
id: 'rows',
title: 'Rows',
type: 'long-input',
placeholder: '[{"column1": "value1", "column2": 42}]',
condition: { field: 'operation', value: 'insert_rows' },
required: { field: 'operation', value: 'insert_rows' },
wandConfig: {
enabled: true,
prompt: `Generate a JSON array of row objects for BigQuery insertion based on the user's description.
Each row should be a JSON object where keys are column names and values match the expected types.
Examples:
- "3 users" -> [{"name": "Alice", "email": "alice@example.com"}, {"name": "Bob", "email": "bob@example.com"}, {"name": "Charlie", "email": "charlie@example.com"}]
- "order record" -> [{"order_id": "ORD-001", "amount": 99.99, "status": "pending"}]
Return ONLY the JSON array - no explanations, no wrapping, no extra text.`,
placeholder: 'Describe the rows to insert...',
generationType: 'json-object',
},
},
{
id: 'skipInvalidRows',
title: 'Skip Invalid Rows',
type: 'switch',
condition: { field: 'operation', value: 'insert_rows' },
},
{
id: 'ignoreUnknownValues',
title: 'Ignore Unknown Values',
type: 'switch',
condition: { field: 'operation', value: 'insert_rows' },
},
{
id: 'pageToken',
title: 'Page Token',
type: 'short-input',
placeholder: 'Pagination token',
condition: { field: 'operation', value: ['list_datasets', 'list_tables'] },
},
],
tools: {
access: [
'google_bigquery_query',
'google_bigquery_list_datasets',
'google_bigquery_list_tables',
'google_bigquery_get_table',
'google_bigquery_insert_rows',
],
config: {
tool: (params) => {
switch (params.operation) {
case 'query':
return 'google_bigquery_query'
case 'list_datasets':
return 'google_bigquery_list_datasets'
case 'list_tables':
return 'google_bigquery_list_tables'
case 'get_table':
return 'google_bigquery_get_table'
case 'insert_rows':
return 'google_bigquery_insert_rows'
default:
throw new Error(`Invalid Google BigQuery operation: ${params.operation}`)
}
},
params: (params) => {
const { oauthCredential, rows, maxResults, ...rest } = params
return {
...rest,
oauthCredential,
...(rows && { rows: typeof rows === 'string' ? rows : JSON.stringify(rows) }),
...(maxResults !== undefined && maxResults !== '' && { maxResults: Number(maxResults) }),
}
},
},
},
inputs: {
operation: { type: 'string', description: 'Operation to perform' },
oauthCredential: { type: 'string', description: 'Google BigQuery OAuth credential' },
projectId: { type: 'string', description: 'Google Cloud project ID' },
query: { type: 'string', description: 'SQL query to execute' },
useLegacySql: { type: 'boolean', description: 'Whether to use legacy SQL syntax' },
maxResults: { type: 'number', description: 'Maximum number of results to return' },
defaultDatasetId: {
type: 'string',
description: 'Default dataset for unqualified table names',
},
location: { type: 'string', description: 'Processing location' },
datasetId: { type: 'string', description: 'BigQuery dataset ID' },
tableId: { type: 'string', description: 'BigQuery table ID' },
rows: { type: 'string', description: 'JSON array of row objects to insert' },
skipInvalidRows: { type: 'boolean', description: 'Whether to skip invalid rows during insert' },
ignoreUnknownValues: {
type: 'boolean',
description: 'Whether to ignore unknown column values',
},
pageToken: { type: 'string', description: 'Pagination token' },
},
outputs: {
columns: { type: 'json', description: 'Array of column names (query)' },
rows: { type: 'json', description: 'Array of row objects (query)' },
totalRows: { type: 'string', description: 'Total number of rows (query)' },
jobComplete: { type: 'boolean', description: 'Whether the query completed (query)' },
totalBytesProcessed: { type: 'string', description: 'Bytes processed (query)' },
cacheHit: { type: 'boolean', description: 'Whether result was cached (query)' },
jobReference: { type: 'json', description: 'Job reference for incomplete queries (query)' },
pageToken: { type: 'string', description: 'Token for additional result pages (query)' },
datasets: { type: 'json', description: 'Array of dataset objects (list_datasets)' },
tables: { type: 'json', description: 'Array of table objects (list_tables)' },
totalItems: { type: 'number', description: 'Total items count (list_tables)' },
tableId: { type: 'string', description: 'Table ID (get_table)' },
datasetId: { type: 'string', description: 'Dataset ID (get_table)' },
type: { type: 'string', description: 'Table type (get_table)' },
description: { type: 'string', description: 'Table description (get_table)' },
numRows: { type: 'string', description: 'Row count (get_table)' },
numBytes: { type: 'string', description: 'Size in bytes (get_table)' },
schema: { type: 'json', description: 'Column definitions (get_table)' },
creationTime: { type: 'string', description: 'Creation time (get_table)' },
lastModifiedTime: { type: 'string', description: 'Last modified time (get_table)' },
location: { type: 'string', description: 'Data location (get_table)' },
insertedRows: { type: 'number', description: 'Rows inserted (insert_rows)' },
errors: { type: 'json', description: 'Insert errors (insert_rows)' },
nextPageToken: { type: 'string', description: 'Token for next page of results' },
},
}

View File

@@ -43,6 +43,7 @@ import { GitLabBlock } from '@/blocks/blocks/gitlab'
import { GmailBlock, GmailV2Block } from '@/blocks/blocks/gmail'
import { GongBlock } from '@/blocks/blocks/gong'
import { GoogleSearchBlock } from '@/blocks/blocks/google'
import { GoogleBigQueryBlock } from '@/blocks/blocks/google_bigquery'
import { GoogleBooksBlock } from '@/blocks/blocks/google_books'
import { GoogleCalendarBlock, GoogleCalendarV2Block } from '@/blocks/blocks/google_calendar'
import { GoogleDocsBlock } from '@/blocks/blocks/google_docs'
@@ -242,6 +243,7 @@ export const registry: Record<string, BlockConfig> = {
google_sheets_v2: GoogleSheetsV2Block,
google_slides: GoogleSlidesBlock,
google_slides_v2: GoogleSlidesV2Block,
google_bigquery: GoogleBigQueryBlock,
google_vault: GoogleVaultBlock,
grafana: GrafanaBlock,
grain: GrainBlock,

View File

@@ -3430,6 +3430,23 @@ export const ResendIcon = (props: SVGProps<SVGSVGElement>) => (
</svg>
)
export const GoogleBigQueryIcon = (props: SVGProps<SVGSVGElement>) => (
<svg {...props} xmlns='http://www.w3.org/2000/svg' viewBox='0 0 64 64'>
<path
d='M14.48 58.196L.558 34.082c-.744-1.288-.744-2.876 0-4.164L14.48 5.805c.743-1.287 2.115-2.08 3.6-2.082h27.857c1.48.007 2.845.8 3.585 2.082l13.92 24.113c.744 1.288.744 2.876 0 4.164L49.52 58.196c-.743 1.287-2.115 2.08-3.6 2.082H18.07c-1.483-.005-2.85-.798-3.593-2.082z'
fill='#4386fa'
/>
<path
d='M40.697 24.235s3.87 9.283-1.406 14.545-14.883 1.894-14.883 1.894L43.95 60.27h1.984c1.486-.002 2.858-.796 3.6-2.082L58.75 42.23z'
opacity='.1'
/>
<path
d='M45.267 43.23L41 38.953a.67.67 0 0 0-.158-.12 11.63 11.63 0 1 0-2.032 2.037.67.67 0 0 0 .113.15l4.277 4.277a.67.67 0 0 0 .947 0l1.12-1.12a.67.67 0 0 0 0-.947zM31.64 40.464a8.75 8.75 0 1 1 8.749-8.749 8.75 8.75 0 0 1-8.749 8.749zm-5.593-9.216v3.616c.557.983 1.363 1.803 2.338 2.375v-6.013zm4.375-2.998v9.772a6.45 6.45 0 0 0 2.338 0V28.25zm6.764 6.606v-2.142H34.85v4.5a6.43 6.43 0 0 0 2.338-2.368z'
fill='#fff'
/>
</svg>
)
export const GoogleVaultIcon = (props: SVGProps<SVGSVGElement>) => (
<svg {...props} xmlns='http://www.w3.org/2000/svg' viewBox='0 0 82 82'>
<path

View File

@@ -484,6 +484,7 @@ export const auth = betterAuth({
'google-docs',
'google-sheets',
'google-forms',
'google-bigquery',
'google-vault',
'google-groups',
'vertex-ai',
@@ -1068,6 +1069,46 @@ export const auth = betterAuth({
}
},
},
{
providerId: 'google-bigquery',
clientId: env.GOOGLE_CLIENT_ID as string,
clientSecret: env.GOOGLE_CLIENT_SECRET as string,
discoveryUrl: 'https://accounts.google.com/.well-known/openid-configuration',
accessType: 'offline',
scopes: [
'https://www.googleapis.com/auth/userinfo.email',
'https://www.googleapis.com/auth/userinfo.profile',
'https://www.googleapis.com/auth/bigquery',
],
prompt: 'consent',
redirectURI: `${getBaseUrl()}/api/auth/oauth2/callback/google-bigquery`,
getUserInfo: async (tokens) => {
try {
const response = await fetch('https://openidconnect.googleapis.com/v1/userinfo', {
headers: { Authorization: `Bearer ${tokens.accessToken}` },
})
if (!response.ok) {
logger.error('Failed to fetch Google user info', { status: response.status })
throw new Error(`Failed to fetch Google user info: ${response.statusText}`)
}
const profile = await response.json()
const now = new Date()
return {
id: `${profile.sub}-${crypto.randomUUID()}`,
name: profile.name || 'Google User',
email: profile.email,
image: profile.picture || undefined,
emailVerified: profile.email_verified || false,
createdAt: now,
updatedAt: now,
}
} catch (error) {
logger.error('Error in Google getUserInfo', { error })
throw error
}
},
},
{
providerId: 'google-vault',
clientId: env.GOOGLE_CLIENT_ID as string,

View File

@@ -8,6 +8,7 @@ import {
DropboxIcon,
GithubIcon,
GmailIcon,
GoogleBigQueryIcon,
GoogleCalendarIcon,
GoogleDocsIcon,
GoogleDriveIcon,
@@ -119,6 +120,14 @@ export const OAUTH_PROVIDERS: Record<string, OAuthProviderConfig> = {
baseProviderIcon: GoogleIcon,
scopes: ['https://www.googleapis.com/auth/calendar'],
},
'google-bigquery': {
name: 'Google BigQuery',
description: 'Query, list, and insert data in Google BigQuery.',
providerId: 'google-bigquery',
icon: GoogleBigQueryIcon,
baseProviderIcon: GoogleIcon,
scopes: ['https://www.googleapis.com/auth/bigquery'],
},
'google-vault': {
name: 'Google Vault',
description: 'Search, export, and manage matters/holds via Google Vault.',

View File

@@ -7,6 +7,7 @@ export type OAuthProvider =
| 'google-docs'
| 'google-sheets'
| 'google-calendar'
| 'google-bigquery'
| 'google-vault'
| 'google-forms'
| 'google-groups'
@@ -52,6 +53,7 @@ export type OAuthService =
| 'google-docs'
| 'google-sheets'
| 'google-calendar'
| 'google-bigquery'
| 'google-vault'
| 'google-forms'
| 'google-groups'

View File

@@ -0,0 +1,132 @@
import type {
GoogleBigQueryGetTableParams,
GoogleBigQueryGetTableResponse,
} from '@/tools/google_bigquery/types'
import type { ToolConfig } from '@/tools/types'
export const googleBigQueryGetTableTool: ToolConfig<
GoogleBigQueryGetTableParams,
GoogleBigQueryGetTableResponse
> = {
id: 'google_bigquery_get_table',
name: 'BigQuery Get Table',
description: 'Get metadata and schema for a Google BigQuery table',
version: '1.0.0',
oauth: {
required: true,
provider: 'google-bigquery',
},
params: {
accessToken: {
type: 'string',
required: true,
visibility: 'hidden',
description: 'OAuth access token',
},
projectId: {
type: 'string',
required: true,
visibility: 'user-or-llm',
description: 'Google Cloud project ID',
},
datasetId: {
type: 'string',
required: true,
visibility: 'user-or-llm',
description: 'BigQuery dataset ID',
},
tableId: {
type: 'string',
required: true,
visibility: 'user-or-llm',
description: 'BigQuery table ID',
},
},
request: {
url: (params) =>
`https://bigquery.googleapis.com/bigquery/v2/projects/${encodeURIComponent(params.projectId)}/datasets/${encodeURIComponent(params.datasetId)}/tables/${encodeURIComponent(params.tableId)}`,
method: 'GET',
headers: (params) => ({
Authorization: `Bearer ${params.accessToken}`,
}),
},
transformResponse: async (response: Response) => {
const data = await response.json()
if (!response.ok) {
const errorMessage = data.error?.message || 'Failed to get BigQuery table'
throw new Error(errorMessage)
}
const schema = (data.schema?.fields ?? []).map(
(f: { name: string; type: string; mode?: string; description?: string }) => ({
name: f.name,
type: f.type,
mode: f.mode ?? null,
description: f.description ?? null,
})
)
return {
success: true,
output: {
tableId: data.tableReference?.tableId ?? null,
datasetId: data.tableReference?.datasetId ?? null,
projectId: data.tableReference?.projectId ?? null,
type: data.type ?? null,
description: data.description ?? null,
numRows: data.numRows ?? null,
numBytes: data.numBytes ?? null,
schema,
creationTime: data.creationTime ?? null,
lastModifiedTime: data.lastModifiedTime ?? null,
location: data.location ?? null,
},
}
},
outputs: {
tableId: { type: 'string', description: 'Table ID' },
datasetId: { type: 'string', description: 'Dataset ID' },
projectId: { type: 'string', description: 'Project ID' },
type: {
type: 'string',
description: 'Table type (TABLE, VIEW, SNAPSHOT, MATERIALIZED_VIEW, EXTERNAL)',
},
description: { type: 'string', description: 'Table description', optional: true },
numRows: { type: 'string', description: 'Total number of rows' },
numBytes: {
type: 'string',
description: 'Total size in bytes, excluding data in streaming buffer',
},
schema: {
type: 'array',
description: 'Array of column definitions',
items: {
type: 'object',
properties: {
name: { type: 'string', description: 'Column name' },
type: {
type: 'string',
description: 'Data type (STRING, INTEGER, FLOAT, BOOLEAN, TIMESTAMP, RECORD, etc.)',
},
mode: {
type: 'string',
description: 'Column mode (NULLABLE, REQUIRED, or REPEATED)',
optional: true,
},
description: { type: 'string', description: 'Column description', optional: true },
},
},
},
creationTime: { type: 'string', description: 'Table creation time (milliseconds since epoch)' },
lastModifiedTime: {
type: 'string',
description: 'Last modification time (milliseconds since epoch)',
},
location: { type: 'string', description: 'Geographic location where the table resides' },
},
}

View File

@@ -0,0 +1,5 @@
export { googleBigQueryGetTableTool } from '@/tools/google_bigquery/get_table'
export { googleBigQueryInsertRowsTool } from '@/tools/google_bigquery/insert_rows'
export { googleBigQueryListDatasetsTool } from '@/tools/google_bigquery/list_datasets'
export { googleBigQueryListTablesTool } from '@/tools/google_bigquery/list_tables'
export { googleBigQueryQueryTool } from '@/tools/google_bigquery/query'

View File

@@ -0,0 +1,174 @@
import type {
GoogleBigQueryInsertRowsParams,
GoogleBigQueryInsertRowsResponse,
} from '@/tools/google_bigquery/types'
import type { ToolConfig } from '@/tools/types'
export const googleBigQueryInsertRowsTool: ToolConfig<
GoogleBigQueryInsertRowsParams,
GoogleBigQueryInsertRowsResponse
> = {
id: 'google_bigquery_insert_rows',
name: 'BigQuery Insert Rows',
description: 'Insert rows into a Google BigQuery table using streaming insert',
version: '1.0.0',
oauth: {
required: true,
provider: 'google-bigquery',
},
params: {
accessToken: {
type: 'string',
required: true,
visibility: 'hidden',
description: 'OAuth access token',
},
projectId: {
type: 'string',
required: true,
visibility: 'user-or-llm',
description: 'Google Cloud project ID',
},
datasetId: {
type: 'string',
required: true,
visibility: 'user-or-llm',
description: 'BigQuery dataset ID',
},
tableId: {
type: 'string',
required: true,
visibility: 'user-or-llm',
description: 'BigQuery table ID',
},
rows: {
type: 'string',
required: true,
visibility: 'user-or-llm',
description: 'JSON array of row objects to insert',
},
skipInvalidRows: {
type: 'boolean',
required: false,
visibility: 'user-or-llm',
description: 'Whether to insert valid rows even if some are invalid',
},
ignoreUnknownValues: {
type: 'boolean',
required: false,
visibility: 'user-or-llm',
description: 'Whether to ignore columns not in the table schema',
},
},
request: {
url: (params) =>
`https://bigquery.googleapis.com/bigquery/v2/projects/${encodeURIComponent(params.projectId)}/datasets/${encodeURIComponent(params.datasetId)}/tables/${encodeURIComponent(params.tableId)}/insertAll`,
method: 'POST',
headers: (params) => ({
Authorization: `Bearer ${params.accessToken}`,
'Content-Type': 'application/json',
}),
body: (params) => {
const parsedRows = typeof params.rows === 'string' ? JSON.parse(params.rows) : params.rows
const rows = (parsedRows as Record<string, unknown>[]).map(
(row: Record<string, unknown>) => ({ json: row })
)
const body: Record<string, unknown> = { rows }
if (params.skipInvalidRows !== undefined) body.skipInvalidRows = params.skipInvalidRows
if (params.ignoreUnknownValues !== undefined)
body.ignoreUnknownValues = params.ignoreUnknownValues
return body
},
},
transformResponse: async (response: Response, params?: GoogleBigQueryInsertRowsParams) => {
const data = await response.json()
if (!response.ok) {
const errorMessage = data.error?.message || 'Failed to insert rows into BigQuery table'
throw new Error(errorMessage)
}
const insertErrors = data.insertErrors ?? []
const errors = insertErrors.map(
(err: {
index: number
errors: Array<{ reason?: string; location?: string; message?: string }>
}) => ({
index: err.index,
errors: err.errors.map((e) => ({
reason: e.reason ?? null,
location: e.location ?? null,
message: e.message ?? null,
})),
})
)
let totalRows = 0
if (params?.rows) {
const parsed = typeof params.rows === 'string' ? JSON.parse(params.rows) : params.rows
totalRows = Array.isArray(parsed) ? parsed.length : 0
}
// When insertErrors is empty, all rows succeeded.
// When insertErrors is present and skipInvalidRows is false (default),
// the entire batch is rejected — no rows are inserted.
let insertedRows = 0
if (insertErrors.length === 0) {
insertedRows = totalRows
} else if (params?.skipInvalidRows) {
const failedIndexes = new Set(insertErrors.map((e: { index: number }) => e.index))
insertedRows = totalRows - failedIndexes.size
}
return {
success: true,
output: {
insertedRows,
errors,
},
}
},
outputs: {
insertedRows: { type: 'number', description: 'Number of rows successfully inserted' },
errors: {
type: 'array',
description: 'Array of per-row insertion errors (empty if all succeeded)',
items: {
type: 'object',
properties: {
index: { type: 'number', description: 'Zero-based index of the row that failed' },
errors: {
type: 'array',
description: 'Error details for this row',
items: {
type: 'object',
properties: {
reason: {
type: 'string',
description: 'Short error code summarizing the error',
optional: true,
},
location: {
type: 'string',
description: 'Where the error occurred',
optional: true,
},
message: {
type: 'string',
description: 'Human-readable error description',
optional: true,
},
},
},
},
},
},
},
},
}

View File

@@ -0,0 +1,121 @@
import type {
GoogleBigQueryListDatasetsParams,
GoogleBigQueryListDatasetsResponse,
} from '@/tools/google_bigquery/types'
import type { ToolConfig } from '@/tools/types'
export const googleBigQueryListDatasetsTool: ToolConfig<
GoogleBigQueryListDatasetsParams,
GoogleBigQueryListDatasetsResponse
> = {
id: 'google_bigquery_list_datasets',
name: 'BigQuery List Datasets',
description: 'List all datasets in a Google BigQuery project',
version: '1.0.0',
oauth: {
required: true,
provider: 'google-bigquery',
},
params: {
accessToken: {
type: 'string',
required: true,
visibility: 'hidden',
description: 'OAuth access token',
},
projectId: {
type: 'string',
required: true,
visibility: 'user-or-llm',
description: 'Google Cloud project ID',
},
maxResults: {
type: 'number',
required: false,
visibility: 'user-or-llm',
description: 'Maximum number of datasets to return',
},
pageToken: {
type: 'string',
required: false,
visibility: 'user-or-llm',
description: 'Token for pagination',
},
},
request: {
url: (params) => {
const url = new URL(
`https://bigquery.googleapis.com/bigquery/v2/projects/${encodeURIComponent(params.projectId)}/datasets`
)
if (params.maxResults !== undefined && params.maxResults !== null) {
const maxResults = Number(params.maxResults)
if (Number.isFinite(maxResults) && maxResults > 0) {
url.searchParams.set('maxResults', String(maxResults))
}
}
if (params.pageToken) url.searchParams.set('pageToken', params.pageToken)
return url.toString()
},
method: 'GET',
headers: (params) => ({
Authorization: `Bearer ${params.accessToken}`,
}),
},
transformResponse: async (response: Response) => {
const data = await response.json()
if (!response.ok) {
const errorMessage = data.error?.message || 'Failed to list BigQuery datasets'
throw new Error(errorMessage)
}
const datasets = (data.datasets ?? []).map(
(ds: {
datasetReference: { datasetId: string; projectId: string }
friendlyName?: string
location?: string
}) => ({
datasetId: ds.datasetReference.datasetId,
projectId: ds.datasetReference.projectId,
friendlyName: ds.friendlyName ?? null,
location: ds.location ?? null,
})
)
return {
success: true,
output: {
datasets,
nextPageToken: data.nextPageToken ?? null,
},
}
},
outputs: {
datasets: {
type: 'array',
description: 'Array of dataset objects',
items: {
type: 'object',
properties: {
datasetId: { type: 'string', description: 'Unique dataset identifier' },
projectId: { type: 'string', description: 'Project ID containing this dataset' },
friendlyName: {
type: 'string',
description: 'Descriptive name for the dataset',
optional: true,
},
location: { type: 'string', description: 'Geographic location where the data resides' },
},
},
},
nextPageToken: {
type: 'string',
description: 'Token for fetching next page of results',
optional: true,
},
},
}

View File

@@ -0,0 +1,142 @@
import type {
GoogleBigQueryListTablesParams,
GoogleBigQueryListTablesResponse,
} from '@/tools/google_bigquery/types'
import type { ToolConfig } from '@/tools/types'
export const googleBigQueryListTablesTool: ToolConfig<
GoogleBigQueryListTablesParams,
GoogleBigQueryListTablesResponse
> = {
id: 'google_bigquery_list_tables',
name: 'BigQuery List Tables',
description: 'List all tables in a Google BigQuery dataset',
version: '1.0.0',
oauth: {
required: true,
provider: 'google-bigquery',
},
params: {
accessToken: {
type: 'string',
required: true,
visibility: 'hidden',
description: 'OAuth access token',
},
projectId: {
type: 'string',
required: true,
visibility: 'user-or-llm',
description: 'Google Cloud project ID',
},
datasetId: {
type: 'string',
required: true,
visibility: 'user-or-llm',
description: 'BigQuery dataset ID',
},
maxResults: {
type: 'number',
required: false,
visibility: 'user-or-llm',
description: 'Maximum number of tables to return',
},
pageToken: {
type: 'string',
required: false,
visibility: 'user-or-llm',
description: 'Token for pagination',
},
},
request: {
url: (params) => {
const url = new URL(
`https://bigquery.googleapis.com/bigquery/v2/projects/${encodeURIComponent(params.projectId)}/datasets/${encodeURIComponent(params.datasetId)}/tables`
)
if (params.maxResults !== undefined && params.maxResults !== null) {
const maxResults = Number(params.maxResults)
if (Number.isFinite(maxResults) && maxResults > 0) {
url.searchParams.set('maxResults', String(maxResults))
}
}
if (params.pageToken) url.searchParams.set('pageToken', params.pageToken)
return url.toString()
},
method: 'GET',
headers: (params) => ({
Authorization: `Bearer ${params.accessToken}`,
}),
},
transformResponse: async (response: Response) => {
const data = await response.json()
if (!response.ok) {
const errorMessage = data.error?.message || 'Failed to list BigQuery tables'
throw new Error(errorMessage)
}
const tables = (data.tables ?? []).map(
(t: {
tableReference: { tableId: string; datasetId: string; projectId: string }
type?: string
friendlyName?: string
creationTime?: string
}) => ({
tableId: t.tableReference.tableId,
datasetId: t.tableReference.datasetId,
projectId: t.tableReference.projectId,
type: t.type ?? null,
friendlyName: t.friendlyName ?? null,
creationTime: t.creationTime ?? null,
})
)
return {
success: true,
output: {
tables,
totalItems: data.totalItems ?? null,
nextPageToken: data.nextPageToken ?? null,
},
}
},
outputs: {
tables: {
type: 'array',
description: 'Array of table objects',
items: {
type: 'object',
properties: {
tableId: { type: 'string', description: 'Table identifier' },
datasetId: { type: 'string', description: 'Dataset ID containing this table' },
projectId: { type: 'string', description: 'Project ID containing this table' },
type: { type: 'string', description: 'Table type (TABLE, VIEW, EXTERNAL, etc.)' },
friendlyName: {
type: 'string',
description: 'User-friendly name for the table',
optional: true,
},
creationTime: {
type: 'string',
description: 'Time when created, in milliseconds since epoch',
optional: true,
},
},
},
},
totalItems: {
type: 'number',
description: 'Total number of tables in the dataset',
optional: true,
},
nextPageToken: {
type: 'string',
description: 'Token for fetching next page of results',
optional: true,
},
},
}

View File

@@ -0,0 +1,164 @@
import type {
GoogleBigQueryQueryParams,
GoogleBigQueryQueryResponse,
} from '@/tools/google_bigquery/types'
import type { ToolConfig } from '@/tools/types'
export const googleBigQueryQueryTool: ToolConfig<
GoogleBigQueryQueryParams,
GoogleBigQueryQueryResponse
> = {
id: 'google_bigquery_query',
name: 'BigQuery Run Query',
description: 'Run a SQL query against Google BigQuery and return the results',
version: '1.0.0',
oauth: {
required: true,
provider: 'google-bigquery',
},
params: {
accessToken: {
type: 'string',
required: true,
visibility: 'hidden',
description: 'OAuth access token',
},
projectId: {
type: 'string',
required: true,
visibility: 'user-or-llm',
description: 'Google Cloud project ID',
},
query: {
type: 'string',
required: true,
visibility: 'user-or-llm',
description: 'SQL query to execute',
},
useLegacySql: {
type: 'boolean',
required: false,
visibility: 'user-or-llm',
description: 'Whether to use legacy SQL syntax (default: false)',
},
maxResults: {
type: 'number',
required: false,
visibility: 'user-or-llm',
description: 'Maximum number of rows to return',
},
defaultDatasetId: {
type: 'string',
required: false,
visibility: 'user-or-llm',
description: 'Default dataset for unqualified table names',
},
location: {
type: 'string',
required: false,
visibility: 'user-or-llm',
description: 'Processing location (e.g., "US", "EU")',
},
},
request: {
url: (params) =>
`https://bigquery.googleapis.com/bigquery/v2/projects/${encodeURIComponent(params.projectId)}/queries`,
method: 'POST',
headers: (params) => ({
Authorization: `Bearer ${params.accessToken}`,
'Content-Type': 'application/json',
}),
body: (params) => {
const body: Record<string, unknown> = {
query: params.query,
useLegacySql: params.useLegacySql ?? false,
}
if (params.maxResults !== undefined) body.maxResults = Number(params.maxResults)
if (params.defaultDatasetId) {
body.defaultDataset = {
projectId: params.projectId,
datasetId: params.defaultDatasetId,
}
}
if (params.location) body.location = params.location
return body
},
},
transformResponse: async (response: Response) => {
const data = await response.json()
if (!response.ok) {
const errorMessage = data.error?.message || 'Failed to execute BigQuery query'
throw new Error(errorMessage)
}
const columns = (data.schema?.fields ?? []).map((f: { name: string }) => f.name)
const rows = (data.rows ?? []).map((row: { f: Array<{ v: unknown }> }) => {
const obj: Record<string, unknown> = {}
row.f.forEach((field, index) => {
obj[columns[index]] = field.v ?? null
})
return obj
})
return {
success: true,
output: {
columns,
rows,
totalRows: data.totalRows ?? null,
jobComplete: data.jobComplete ?? false,
totalBytesProcessed: data.totalBytesProcessed ?? null,
cacheHit: data.cacheHit ?? null,
jobReference: data.jobReference ?? null,
pageToken: data.pageToken ?? null,
},
}
},
outputs: {
columns: {
type: 'array',
description: 'Array of column names from the query result',
items: { type: 'string', description: 'Column name' },
},
rows: {
type: 'array',
description: 'Array of row objects keyed by column name',
items: {
type: 'object',
description: 'Row with column name/value pairs',
},
},
totalRows: {
type: 'string',
description: 'Total number of rows in the complete result set',
optional: true,
},
jobComplete: { type: 'boolean', description: 'Whether the query completed within the timeout' },
totalBytesProcessed: { type: 'string', description: 'Total bytes processed by the query' },
cacheHit: {
type: 'boolean',
description: 'Whether the query result was served from cache',
optional: true,
},
jobReference: {
type: 'object',
description: 'Job reference (useful when jobComplete is false)',
optional: true,
properties: {
projectId: { type: 'string', description: 'Project ID containing the job' },
jobId: { type: 'string', description: 'Unique job identifier' },
location: { type: 'string', description: 'Geographic location of the job' },
},
},
pageToken: {
type: 'string',
description: 'Token for fetching additional result pages',
optional: true,
},
},
}

View File

@@ -0,0 +1,119 @@
import type { ToolResponse } from '@/tools/types'
export interface GoogleBigQueryBaseParams {
accessToken: string
projectId: string
}
export interface GoogleBigQueryQueryParams extends GoogleBigQueryBaseParams {
query: string
useLegacySql?: boolean
maxResults?: number
defaultDatasetId?: string
location?: string
}
export interface GoogleBigQueryListDatasetsParams extends GoogleBigQueryBaseParams {
maxResults?: number
pageToken?: string
}
export interface GoogleBigQueryListTablesParams extends GoogleBigQueryBaseParams {
datasetId: string
maxResults?: number
pageToken?: string
}
export interface GoogleBigQueryGetTableParams extends GoogleBigQueryBaseParams {
datasetId: string
tableId: string
}
export interface GoogleBigQueryInsertRowsParams extends GoogleBigQueryBaseParams {
datasetId: string
tableId: string
rows: string
skipInvalidRows?: boolean
ignoreUnknownValues?: boolean
}
export interface GoogleBigQueryJobReference {
projectId: string
jobId: string
location: string
}
export interface GoogleBigQueryQueryResponse extends ToolResponse {
output: {
columns: string[]
rows: Record<string, unknown>[]
totalRows: string | null
jobComplete: boolean
totalBytesProcessed: string | null
cacheHit: boolean | null
jobReference: GoogleBigQueryJobReference | null
pageToken: string | null
}
}
export interface GoogleBigQueryListDatasetsResponse extends ToolResponse {
output: {
datasets: Array<{
datasetId: string
projectId: string
friendlyName: string | null
location: string | null
}>
nextPageToken: string | null
}
}
export interface GoogleBigQueryListTablesResponse extends ToolResponse {
output: {
tables: Array<{
tableId: string
datasetId: string
projectId: string
type: string | null
friendlyName: string | null
creationTime: string | null
}>
totalItems: number | null
nextPageToken: string | null
}
}
export interface GoogleBigQueryGetTableResponse extends ToolResponse {
output: {
tableId: string
datasetId: string
projectId: string
type: string | null
description: string | null
numRows: string | null
numBytes: string | null
schema: Array<{
name: string
type: string
mode: string | null
description: string | null
}>
creationTime: string | null
lastModifiedTime: string | null
location: string | null
}
}
export interface GoogleBigQueryInsertRowsResponse extends ToolResponse {
output: {
insertedRows: number
errors: Array<{
index: number
errors: Array<{
reason: string | null
location: string | null
message: string | null
}>
}>
}
}

View File

@@ -644,6 +644,13 @@ import {
gongLookupPhoneTool,
} from '@/tools/gong'
import { googleSearchTool } from '@/tools/google'
import {
googleBigQueryGetTableTool,
googleBigQueryInsertRowsTool,
googleBigQueryListDatasetsTool,
googleBigQueryListTablesTool,
googleBigQueryQueryTool,
} from '@/tools/google_bigquery'
import { googleBooksVolumeDetailsTool, googleBooksVolumeSearchTool } from '@/tools/google_books'
import {
googleCalendarCreateTool,
@@ -3621,6 +3628,11 @@ export const tools: Record<string, ToolConfig> = {
wordpress_list_users: wordpressListUsersTool,
wordpress_get_user: wordpressGetUserTool,
wordpress_search_content: wordpressSearchContentTool,
google_bigquery_query: googleBigQueryQueryTool,
google_bigquery_list_datasets: googleBigQueryListDatasetsTool,
google_bigquery_list_tables: googleBigQueryListTablesTool,
google_bigquery_get_table: googleBigQueryGetTableTool,
google_bigquery_insert_rows: googleBigQueryInsertRowsTool,
google_vault_create_matters_export: createMattersExportTool,
google_vault_list_matters_export: listMattersExportTool,
google_vault_create_matters_holds: createMattersHoldsTool,