fix(backend): Add Airtable record normalization and upsert features (#10908)

Introduces normalization of Airtable record outputs to include all
fields with appropriate empty values and optional field metadata.
Enhances record creation to support finding existing records by
specified fields and updating them if found, enabling upsert-like
behavior. Updates block schemas and logic for list, get, and create
operations to support these new features.<!-- Clearly explain the need
for these changes: -->

### Changes 🏗️
Allows normalization of the response of the airtable blocks
Allows you to use create base to find ones already made
<!-- Concisely describe all of the changes made in this pull request:
-->

### Checklist 📋

#### For code changes:
- [x] I have clearly listed my changes in the PR description
- [x] I have made a test plan
- [x] I have tested my changes according to the test plan:
  <!-- Put your test plan here: -->
  - [x] Test that it doesn't break existing agents
  - [x] Test that the results for checkboxes are returned
This commit is contained in:
Nicholas Tindle
2025-09-11 15:26:34 -05:00
committed by GitHub
parent 64b4480b1e
commit 3d6ea3088e
3 changed files with 326 additions and 12 deletions

View File

@@ -661,6 +661,167 @@ async def update_field(
#################################################################
async def get_table_schema(
credentials: Credentials,
base_id: str,
table_id_or_name: str,
) -> dict:
"""
Get the schema for a specific table, including all field definitions.
Args:
credentials: Airtable API credentials
base_id: The base ID
table_id_or_name: The table ID or name
Returns:
Dict containing table schema with fields information
"""
# First get all tables to find the right one
response = await Requests().get(
f"https://api.airtable.com/v0/meta/bases/{base_id}/tables",
headers={"Authorization": credentials.auth_header()},
)
data = response.json()
tables = data.get("tables", [])
# Find the matching table
for table in tables:
if table.get("id") == table_id_or_name or table.get("name") == table_id_or_name:
return table
raise ValueError(f"Table '{table_id_or_name}' not found in base '{base_id}'")
def get_empty_value_for_field(field_type: str) -> Any:
"""
Return the appropriate empty value for a given Airtable field type.
Args:
field_type: The Airtable field type
Returns:
The appropriate empty value for that field type
"""
# Fields that should be false when empty
if field_type == "checkbox":
return False
# Fields that should be empty arrays
if field_type in [
"multipleSelects",
"multipleRecordLinks",
"multipleAttachments",
"multipleLookupValues",
"multipleCollaborators",
]:
return []
# Fields that should be 0 when empty (numeric types)
if field_type in [
"number",
"percent",
"currency",
"rating",
"duration",
"count",
"autoNumber",
]:
return 0
# Fields that should be empty strings
if field_type in [
"singleLineText",
"multilineText",
"email",
"url",
"phoneNumber",
"richText",
"barcode",
]:
return ""
# Everything else gets null (dates, single selects, formulas, etc.)
return None
async def normalize_records(
records: list[dict],
table_schema: dict,
include_field_metadata: bool = False,
) -> dict:
"""
Normalize Airtable records to include all fields with proper empty values.
Args:
records: List of record objects from Airtable API
table_schema: Table schema containing field definitions
include_field_metadata: Whether to include field metadata in response
Returns:
Dict with normalized records and optionally field metadata
"""
fields = table_schema.get("fields", [])
# Normalize each record
normalized_records = []
for record in records:
normalized = {
"id": record.get("id"),
"createdTime": record.get("createdTime"),
"fields": {},
}
# Add existing fields
existing_fields = record.get("fields", {})
# Add all fields from schema, using empty values for missing ones
for field in fields:
field_name = field["name"]
field_type = field["type"]
if field_name in existing_fields:
# Field exists, use its value
normalized["fields"][field_name] = existing_fields[field_name]
else:
# Field is missing, add appropriate empty value
normalized["fields"][field_name] = get_empty_value_for_field(field_type)
normalized_records.append(normalized)
# Build result dictionary
if include_field_metadata:
field_metadata = {}
for field in fields:
metadata = {"type": field["type"], "id": field["id"]}
# Add type-specific metadata
options = field.get("options", {})
if field["type"] == "currency" and "symbol" in options:
metadata["symbol"] = options["symbol"]
metadata["precision"] = options.get("precision", 2)
elif field["type"] == "duration" and "durationFormat" in options:
metadata["format"] = options["durationFormat"]
elif field["type"] == "percent" and "precision" in options:
metadata["precision"] = options["precision"]
elif (
field["type"] in ["singleSelect", "multipleSelects"]
and "choices" in options
):
metadata["choices"] = [choice["name"] for choice in options["choices"]]
elif field["type"] == "rating" and "max" in options:
metadata["max"] = options["max"]
metadata["icon"] = options.get("icon", "star")
metadata["color"] = options.get("color", "yellowBright")
field_metadata[field["name"]] = metadata
return {"records": normalized_records, "field_metadata": field_metadata}
else:
return {"records": normalized_records}
async def list_records(
credentials: Credentials,
base_id: str,
@@ -1249,3 +1410,26 @@ async def list_bases(
)
return response.json()
async def get_base_tables(
credentials: Credentials,
base_id: str,
) -> list[dict]:
"""
Get all tables for a specific base.
Args:
credentials: Airtable API credentials
base_id: The ID of the base
Returns:
list[dict]: List of table objects with their schemas
"""
response = await Requests().get(
f"https://api.airtable.com/v0/meta/bases/{base_id}/tables",
headers={"Authorization": credentials.auth_header()},
)
data = response.json()
return data.get("tables", [])

View File

@@ -14,13 +14,13 @@ from backend.sdk import (
SchemaField,
)
from ._api import create_base, list_bases
from ._api import create_base, get_base_tables, list_bases
from ._config import airtable
class AirtableCreateBaseBlock(Block):
"""
Creates a new base in an Airtable workspace.
Creates a new base in an Airtable workspace, or returns existing base if one with the same name exists.
"""
class Input(BlockSchema):
@@ -31,6 +31,10 @@ class AirtableCreateBaseBlock(Block):
description="The workspace ID where the base will be created"
)
name: str = SchemaField(description="The name of the new base")
find_existing: bool = SchemaField(
description="If true, return existing base with same name instead of creating duplicate",
default=True,
)
tables: list[dict] = SchemaField(
description="At least one table and field must be specified. Array of table objects to create in the base. Each table should have 'name' and 'fields' properties",
default=[
@@ -50,14 +54,18 @@ class AirtableCreateBaseBlock(Block):
)
class Output(BlockSchema):
base_id: str = SchemaField(description="The ID of the created base")
base_id: str = SchemaField(description="The ID of the created or found base")
tables: list[dict] = SchemaField(description="Array of table objects")
table: dict = SchemaField(description="A single table object")
was_created: bool = SchemaField(
description="True if a new base was created, False if existing was found",
default=True,
)
def __init__(self):
super().__init__(
id="f59b88a8-54ce-4676-a508-fd614b4e8dce",
description="Create a new base in Airtable",
description="Create or find a base in Airtable",
categories={BlockCategory.DATA},
input_schema=self.Input,
output_schema=self.Output,
@@ -66,6 +74,31 @@ class AirtableCreateBaseBlock(Block):
async def run(
self, input_data: Input, *, credentials: APIKeyCredentials, **kwargs
) -> BlockOutput:
# If find_existing is true, check if a base with this name already exists
if input_data.find_existing:
# List all bases to check for existing one with same name
# Note: Airtable API doesn't have a direct search, so we need to list and filter
existing_bases = await list_bases(credentials)
for base in existing_bases.get("bases", []):
if base.get("name") == input_data.name:
# Base already exists, return it
base_id = base.get("id")
yield "base_id", base_id
yield "was_created", False
# Get the tables for this base
try:
tables = await get_base_tables(credentials, base_id)
yield "tables", tables
for table in tables:
yield "table", table
except Exception:
# If we can't get tables, return empty list
yield "tables", []
return
# No existing base found or find_existing is false, create new one
data = await create_base(
credentials,
input_data.workspace_id,
@@ -74,6 +107,7 @@ class AirtableCreateBaseBlock(Block):
)
yield "base_id", data.get("id", None)
yield "was_created", True
yield "tables", data.get("tables", [])
for table in data.get("tables", []):
yield "table", table

View File

@@ -2,7 +2,7 @@
Airtable record operation blocks.
"""
from typing import Optional
from typing import Optional, cast
from backend.sdk import (
APIKeyCredentials,
@@ -18,7 +18,9 @@ from ._api import (
create_record,
delete_multiple_records,
get_record,
get_table_schema,
list_records,
normalize_records,
update_multiple_records,
)
from ._config import airtable
@@ -54,12 +56,24 @@ class AirtableListRecordsBlock(Block):
return_fields: list[str] = SchemaField(
description="Specific fields to return (comma-separated)", default=[]
)
normalize_output: bool = SchemaField(
description="Normalize output to include all fields with proper empty values (disable to skip schema fetch and get raw Airtable response)",
default=True,
)
include_field_metadata: bool = SchemaField(
description="Include field type and configuration metadata (requires normalize_output=true)",
default=False,
)
class Output(BlockSchema):
records: list[dict] = SchemaField(description="Array of record objects")
offset: Optional[str] = SchemaField(
description="Offset for next page (null if no more records)", default=None
)
field_metadata: Optional[dict] = SchemaField(
description="Field type and configuration metadata (only when include_field_metadata=true)",
default=None,
)
def __init__(self):
super().__init__(
@@ -73,6 +87,7 @@ class AirtableListRecordsBlock(Block):
async def run(
self, input_data: Input, *, credentials: APIKeyCredentials, **kwargs
) -> BlockOutput:
data = await list_records(
credentials,
input_data.base_id,
@@ -88,8 +103,33 @@ class AirtableListRecordsBlock(Block):
fields=input_data.return_fields if input_data.return_fields else None,
)
yield "records", data.get("records", [])
yield "offset", data.get("offset", None)
records = data.get("records", [])
# Normalize output if requested
if input_data.normalize_output:
# Fetch table schema
table_schema = await get_table_schema(
credentials, input_data.base_id, input_data.table_id_or_name
)
# Normalize the records
normalized_data = await normalize_records(
records,
table_schema,
include_field_metadata=input_data.include_field_metadata,
)
yield "records", normalized_data["records"]
yield "offset", data.get("offset", None)
if (
input_data.include_field_metadata
and "field_metadata" in normalized_data
):
yield "field_metadata", normalized_data["field_metadata"]
else:
yield "records", records
yield "offset", data.get("offset", None)
class AirtableGetRecordBlock(Block):
@@ -104,11 +144,23 @@ class AirtableGetRecordBlock(Block):
base_id: str = SchemaField(description="The Airtable base ID")
table_id_or_name: str = SchemaField(description="Table ID or name")
record_id: str = SchemaField(description="The record ID to retrieve")
normalize_output: bool = SchemaField(
description="Normalize output to include all fields with proper empty values (disable to skip schema fetch and get raw Airtable response)",
default=True,
)
include_field_metadata: bool = SchemaField(
description="Include field type and configuration metadata (requires normalize_output=true)",
default=False,
)
class Output(BlockSchema):
id: str = SchemaField(description="The record ID")
fields: dict = SchemaField(description="The record fields")
created_time: str = SchemaField(description="The record created time")
field_metadata: Optional[dict] = SchemaField(
description="Field type and configuration metadata (only when include_field_metadata=true)",
default=None,
)
def __init__(self):
super().__init__(
@@ -122,6 +174,7 @@ class AirtableGetRecordBlock(Block):
async def run(
self, input_data: Input, *, credentials: APIKeyCredentials, **kwargs
) -> BlockOutput:
record = await get_record(
credentials,
input_data.base_id,
@@ -129,9 +182,34 @@ class AirtableGetRecordBlock(Block):
input_data.record_id,
)
yield "id", record.get("id", None)
yield "fields", record.get("fields", None)
yield "created_time", record.get("createdTime", None)
# Normalize output if requested
if input_data.normalize_output:
# Fetch table schema
table_schema = await get_table_schema(
credentials, input_data.base_id, input_data.table_id_or_name
)
# Normalize the single record (wrap in list and unwrap result)
normalized_data = await normalize_records(
[record],
table_schema,
include_field_metadata=input_data.include_field_metadata,
)
normalized_record = normalized_data["records"][0]
yield "id", normalized_record.get("id", None)
yield "fields", normalized_record.get("fields", None)
yield "created_time", normalized_record.get("createdTime", None)
if (
input_data.include_field_metadata
and "field_metadata" in normalized_data
):
yield "field_metadata", normalized_data["field_metadata"]
else:
yield "id", record.get("id", None)
yield "fields", record.get("fields", None)
yield "created_time", record.get("createdTime", None)
class AirtableCreateRecordsBlock(Block):
@@ -148,6 +226,10 @@ class AirtableCreateRecordsBlock(Block):
records: list[dict] = SchemaField(
description="Array of records to create (each with 'fields' object)"
)
skip_normalization: bool = SchemaField(
description="Skip output normalization to get raw Airtable response (faster but may have missing fields)",
default=False,
)
typecast: bool = SchemaField(
description="Automatically convert string values to appropriate types",
default=False,
@@ -173,7 +255,7 @@ class AirtableCreateRecordsBlock(Block):
async def run(
self, input_data: Input, *, credentials: APIKeyCredentials, **kwargs
) -> BlockOutput:
# The create_record API expects records in a specific format
data = await create_record(
credentials,
input_data.base_id,
@@ -182,8 +264,22 @@ class AirtableCreateRecordsBlock(Block):
typecast=input_data.typecast if input_data.typecast else None,
return_fields_by_field_id=input_data.return_fields_by_field_id,
)
result_records = cast(list[dict], data.get("records", []))
yield "records", data.get("records", [])
# Normalize output unless explicitly disabled
if not input_data.skip_normalization and result_records:
# Fetch table schema
table_schema = await get_table_schema(
credentials, input_data.base_id, input_data.table_id_or_name
)
# Normalize the records
normalized_data = await normalize_records(
result_records, table_schema, include_field_metadata=False
)
result_records = normalized_data["records"]
yield "records", result_records
details = data.get("details", None)
if details:
yield "details", details