fix(backend): Add Airtable record normalization + find/create base (#10891)

## Summary Fixes critical issue with Airtable API where empty/false fields are completely omitted from responses, causing inconsistent data structures. Also improves the create base block to prevent duplicate bases.  The Airtable API has a problematic behavior where it omits fields with "empty" values from responses: - Unchecked checkboxes are missing entirely instead of returning `false` - Empty number fields are missing instead of returning `0` - This makes it impossible to distinguish between "field doesn't exist" and "field is false/empty" - Users were getting inconsistent record structures that broke their workflows ### Changes 🏗️  #### 1. **Added Record Normalization** (`backend/blocks/airtable/_api.py`) - New `get_table_schema()` function to fetch table field definitions - New `get_empty_value_for_field()` to determine appropriate empty values per field type - New `normalize_records()` to fill in missing fields with proper defaults: - Checkbox → `false` - Number/Currency/Percent/Duration/Rating → `0` - Text fields → `""` - Multiple selects/attachments/collaborators → `[]` - Dates/Single selects → `null` - New `get_base_tables()` to fetch tables for a base #### 2. **Enhanced List and Get Record Blocks** (`backend/blocks/airtable/records.py`) - Added `normalize_output` parameter (defaults to `true`) - ensures all fields are present - Added `include_field_metadata` parameter to optionally include field type information - When normalization is enabled, fetches schema once and normalizes all records - Can be disabled by setting `normalize_output=false` for raw Airtable response #### 3. **Simplified Create Records Block** - Added `skip_normalization` parameter (default `false`) - normalized output by default - Records now always include all fields with proper empty values #### 4. **Enhanced Create Base Block** (`backend/blocks/airtable/bases.py`) - Added `find_existing` parameter (defaults to `true`) to prevent duplicate bases - When finding an existing base, now fetches and returns table information - Added `was_created` output field to indicate whether base was created or found ### Testing 📋 - ✅ All Airtable block tests pass - ✅ Tested normalization with records containing missing checkbox fields - ✅ Verified all field types get appropriate empty values - ✅ Tested create base find-or-create functionality - ✅ Ran `poetry run format` and `poetry run lint` ### Migration Guide This update makes the blocks behave more predictably: - **List/Get Records**: All fields are now included by default. Set `normalize_output: false` if you need the raw Airtable response - **Create Records**: Simply creates records, no more upsert confusion - **Create Base**: Prevents duplicate bases by default. Set `find_existing: false` to force creation ### Checklist 📋 #### For code changes: - [x] I have clearly listed my changes in the PR description - [x] I have made a test plan - [x] I have tested my changes according to the test plan #### For configuration changes: - [x] `.env.default` is updated or already compatible with my changes - [x] `docker-compose.yml` is updated or already compatible with my changes - [x] I have included a list of my configuration changes in the PR description (under **Changes**) No configuration changes were required - all changes are code-only.
2026-04-08 03:00:28 -04:00 · 2025-09-10 06:57:26 +02:00
parent cddeb185a8
commit 5da41e0753
3 changed files with 326 additions and 16 deletions
--- a/autogpt_platform/backend/backend/blocks/airtable/_api.py
+++ b/autogpt_platform/backend/backend/blocks/airtable/_api.py
@@ -661,6 +661,167 @@ async def update_field(
 #################################################################


+async def get_table_schema(
+    credentials: Credentials,
+    base_id: str,
+    table_id_or_name: str,
+) -> dict:
+    """
+    Get the schema for a specific table, including all field definitions.
+
+    Args:
+        credentials: Airtable API credentials
+        base_id: The base ID
+        table_id_or_name: The table ID or name
+
+    Returns:
+        Dict containing table schema with fields information
+    """
+    # First get all tables to find the right one
+    response = await Requests().get(
+        f"https://api.airtable.com/v0/meta/bases/{base_id}/tables",
+        headers={"Authorization": credentials.auth_header()},
+    )
+
+    data = response.json()
+    tables = data.get("tables", [])
+
+    # Find the matching table
+    for table in tables:
+        if table.get("id") == table_id_or_name or table.get("name") == table_id_or_name:
+            return table
+
+    raise ValueError(f"Table '{table_id_or_name}' not found in base '{base_id}'")
+
+
+def get_empty_value_for_field(field_type: str) -> Any:
+    """
+    Return the appropriate empty value for a given Airtable field type.
+
+    Args:
+        field_type: The Airtable field type
+
+    Returns:
+        The appropriate empty value for that field type
+    """
+    # Fields that should be false when empty
+    if field_type == "checkbox":
+        return False
+
+    # Fields that should be empty arrays
+    if field_type in [
+        "multipleSelects",
+        "multipleRecordLinks",
+        "multipleAttachments",
+        "multipleLookupValues",
+        "multipleCollaborators",
+    ]:
+        return []
+
+    # Fields that should be 0 when empty (numeric types)
+    if field_type in [
+        "number",
+        "percent",
+        "currency",
+        "rating",
+        "duration",
+        "count",
+        "autoNumber",
+    ]:
+        return 0
+
+    # Fields that should be empty strings
+    if field_type in [
+        "singleLineText",
+        "multilineText",
+        "email",
+        "url",
+        "phoneNumber",
+        "richText",
+        "barcode",
+    ]:
+        return ""
+
+    # Everything else gets null (dates, single selects, formulas, etc.)
+    return None
+
+
+async def normalize_records(
+    records: list[dict],
+    table_schema: dict,
+    include_field_metadata: bool = False,
+) -> dict:
+    """
+    Normalize Airtable records to include all fields with proper empty values.
+
+    Args:
+        records: List of record objects from Airtable API
+        table_schema: Table schema containing field definitions
+        include_field_metadata: Whether to include field metadata in response
+
+    Returns:
+        Dict with normalized records and optionally field metadata
+    """
+    fields = table_schema.get("fields", [])
+
+    # Normalize each record
+    normalized_records = []
+    for record in records:
+        normalized = {
+            "id": record.get("id"),
+            "createdTime": record.get("createdTime"),
+            "fields": {},
+        }
+
+        # Add existing fields
+        existing_fields = record.get("fields", {})
+
+        # Add all fields from schema, using empty values for missing ones
+        for field in fields:
+            field_name = field["name"]
+            field_type = field["type"]
+
+            if field_name in existing_fields:
+                # Field exists, use its value
+                normalized["fields"][field_name] = existing_fields[field_name]
+            else:
+                # Field is missing, add appropriate empty value
+                normalized["fields"][field_name] = get_empty_value_for_field(field_type)
+
+        normalized_records.append(normalized)
+
+    # Build result dictionary
+    if include_field_metadata:
+        field_metadata = {}
+        for field in fields:
+            metadata = {"type": field["type"], "id": field["id"]}
+
+            # Add type-specific metadata
+            options = field.get("options", {})
+            if field["type"] == "currency" and "symbol" in options:
+                metadata["symbol"] = options["symbol"]
+                metadata["precision"] = options.get("precision", 2)
+            elif field["type"] == "duration" and "durationFormat" in options:
+                metadata["format"] = options["durationFormat"]
+            elif field["type"] == "percent" and "precision" in options:
+                metadata["precision"] = options["precision"]
+            elif (
+                field["type"] in ["singleSelect", "multipleSelects"]
+                and "choices" in options
+            ):
+                metadata["choices"] = [choice["name"] for choice in options["choices"]]
+            elif field["type"] == "rating" and "max" in options:
+                metadata["max"] = options["max"]
+                metadata["icon"] = options.get("icon", "star")
+                metadata["color"] = options.get("color", "yellowBright")
+
+            field_metadata[field["name"]] = metadata
+
+        return {"records": normalized_records, "field_metadata": field_metadata}
+    else:
+        return {"records": normalized_records}
+
+
 async def list_records(
    credentials: Credentials,
    base_id: str,
@@ -1249,3 +1410,26 @@ async def list_bases(
    )

    return response.json()
+
+
+async def get_base_tables(
+    credentials: Credentials,
+    base_id: str,
+) -> list[dict]:
+    """
+    Get all tables for a specific base.
+
+    Args:
+        credentials: Airtable API credentials
+        base_id: The ID of the base
+
+    Returns:
+        list[dict]: List of table objects with their schemas
+    """
+    response = await Requests().get(
+        f"https://api.airtable.com/v0/meta/bases/{base_id}/tables",
+        headers={"Authorization": credentials.auth_header()},
+    )
+
+    data = response.json()
+    return data.get("tables", [])
--- a/autogpt_platform/backend/backend/blocks/airtable/bases.py
+++ b/autogpt_platform/backend/backend/blocks/airtable/bases.py
@@ -14,13 +14,13 @@ from backend.sdk import (
    SchemaField,
 )

-from ._api import create_base, list_bases
+from ._api import create_base, get_base_tables, list_bases
 from ._config import airtable


 class AirtableCreateBaseBlock(Block):
    """
-    Creates a new base in an Airtable workspace.
+    Creates a new base in an Airtable workspace, or returns existing base if one with the same name exists.
    """

    class Input(BlockSchema):
@@ -31,6 +31,10 @@ class AirtableCreateBaseBlock(Block):
            description="The workspace ID where the base will be created"
        )
        name: str = SchemaField(description="The name of the new base")
+        find_existing: bool = SchemaField(
+            description="If true, return existing base with same name instead of creating duplicate",
+            default=True,
+        )
        tables: list[dict] = SchemaField(
            description="At least one table and field must be specified. Array of table objects to create in the base. Each table should have 'name' and 'fields' properties",
            default=[
@@ -50,14 +54,18 @@ class AirtableCreateBaseBlock(Block):
        )

    class Output(BlockSchema):
-        base_id: str = SchemaField(description="The ID of the created base")
+        base_id: str = SchemaField(description="The ID of the created or found base")
        tables: list[dict] = SchemaField(description="Array of table objects")
        table: dict = SchemaField(description="A single table object")
+        was_created: bool = SchemaField(
+            description="True if a new base was created, False if existing was found",
+            default=True,
+        )

    def __init__(self):
        super().__init__(
            id="f59b88a8-54ce-4676-a508-fd614b4e8dce",
-            description="Create a new base in Airtable",
+            description="Create or find a base in Airtable",
            categories={BlockCategory.DATA},
            input_schema=self.Input,
            output_schema=self.Output,
@@ -66,6 +74,31 @@ class AirtableCreateBaseBlock(Block):
    async def run(
        self, input_data: Input, *, credentials: APIKeyCredentials, **kwargs
    ) -> BlockOutput:
+        # If find_existing is true, check if a base with this name already exists
+        if input_data.find_existing:
+            # List all bases to check for existing one with same name
+            # Note: Airtable API doesn't have a direct search, so we need to list and filter
+            existing_bases = await list_bases(credentials)
+
+            for base in existing_bases.get("bases", []):
+                if base.get("name") == input_data.name:
+                    # Base already exists, return it
+                    base_id = base.get("id")
+                    yield "base_id", base_id
+                    yield "was_created", False
+
+                    # Get the tables for this base
+                    try:
+                        tables = await get_base_tables(credentials, base_id)
+                        yield "tables", tables
+                        for table in tables:
+                            yield "table", table
+                    except Exception:
+                        # If we can't get tables, return empty list
+                        yield "tables", []
+                    return
+
+        # No existing base found or find_existing is false, create new one
        data = await create_base(
            credentials,
            input_data.workspace_id,
@@ -74,6 +107,7 @@ class AirtableCreateBaseBlock(Block):
        )

        yield "base_id", data.get("id", None)
+        yield "was_created", True
        yield "tables", data.get("tables", [])
        for table in data.get("tables", []):
            yield "table", table
--- a/autogpt_platform/backend/backend/blocks/airtable/records.py
+++ b/autogpt_platform/backend/backend/blocks/airtable/records.py
@@ -2,7 +2,7 @@
 Airtable record operation blocks.
 """

-from typing import Optional
+from typing import Optional, cast

 from backend.sdk import (
    APIKeyCredentials,
@@ -18,7 +18,9 @@ from ._api import (
    create_record,
    delete_multiple_records,
    get_record,
+    get_table_schema,
    list_records,
+    normalize_records,
    update_multiple_records,
 )
 from ._config import airtable
@@ -54,12 +56,24 @@ class AirtableListRecordsBlock(Block):
        return_fields: list[str] = SchemaField(
            description="Specific fields to return (comma-separated)", default=[]
        )
+        normalize_output: bool = SchemaField(
+            description="Normalize output to include all fields with proper empty values (disable to skip schema fetch and get raw Airtable response)",
+            default=True,
+        )
+        include_field_metadata: bool = SchemaField(
+            description="Include field type and configuration metadata (requires normalize_output=true)",
+            default=False,
+        )

    class Output(BlockSchema):
        records: list[dict] = SchemaField(description="Array of record objects")
        offset: Optional[str] = SchemaField(
            description="Offset for next page (null if no more records)", default=None
        )
+        field_metadata: Optional[dict] = SchemaField(
+            description="Field type and configuration metadata (only when include_field_metadata=true)",
+            default=None,
+        )

    def __init__(self):
        super().__init__(
@@ -73,6 +87,7 @@ class AirtableListRecordsBlock(Block):
    async def run(
        self, input_data: Input, *, credentials: APIKeyCredentials, **kwargs
    ) -> BlockOutput:
+
        data = await list_records(
            credentials,
            input_data.base_id,
@@ -88,8 +103,33 @@ class AirtableListRecordsBlock(Block):
            fields=input_data.return_fields if input_data.return_fields else None,
        )

-        yield "records", data.get("records", [])
-        yield "offset", data.get("offset", None)
+        records = data.get("records", [])
+
+        # Normalize output if requested
+        if input_data.normalize_output:
+            # Fetch table schema
+            table_schema = await get_table_schema(
+                credentials, input_data.base_id, input_data.table_id_or_name
+            )
+
+            # Normalize the records
+            normalized_data = await normalize_records(
+                records,
+                table_schema,
+                include_field_metadata=input_data.include_field_metadata,
+            )
+
+            yield "records", normalized_data["records"]
+            yield "offset", data.get("offset", None)
+
+            if (
+                input_data.include_field_metadata
+                and "field_metadata" in normalized_data
+            ):
+                yield "field_metadata", normalized_data["field_metadata"]
+        else:
+            yield "records", records
+            yield "offset", data.get("offset", None)


 class AirtableGetRecordBlock(Block):
@@ -104,11 +144,23 @@ class AirtableGetRecordBlock(Block):
        base_id: str = SchemaField(description="The Airtable base ID")
        table_id_or_name: str = SchemaField(description="Table ID or name")
        record_id: str = SchemaField(description="The record ID to retrieve")
+        normalize_output: bool = SchemaField(
+            description="Normalize output to include all fields with proper empty values (disable to skip schema fetch and get raw Airtable response)",
+            default=True,
+        )
+        include_field_metadata: bool = SchemaField(
+            description="Include field type and configuration metadata (requires normalize_output=true)",
+            default=False,
+        )

    class Output(BlockSchema):
        id: str = SchemaField(description="The record ID")
        fields: dict = SchemaField(description="The record fields")
        created_time: str = SchemaField(description="The record created time")
+        field_metadata: Optional[dict] = SchemaField(
+            description="Field type and configuration metadata (only when include_field_metadata=true)",
+            default=None,
+        )

    def __init__(self):
        super().__init__(
@@ -122,6 +174,7 @@ class AirtableGetRecordBlock(Block):
    async def run(
        self, input_data: Input, *, credentials: APIKeyCredentials, **kwargs
    ) -> BlockOutput:
+
        record = await get_record(
            credentials,
            input_data.base_id,
@@ -129,9 +182,34 @@ class AirtableGetRecordBlock(Block):
            input_data.record_id,
        )

-        yield "id", record.get("id", None)
-        yield "fields", record.get("fields", None)
-        yield "created_time", record.get("createdTime", None)
+        # Normalize output if requested
+        if input_data.normalize_output:
+            # Fetch table schema
+            table_schema = await get_table_schema(
+                credentials, input_data.base_id, input_data.table_id_or_name
+            )
+
+            # Normalize the single record (wrap in list and unwrap result)
+            normalized_data = await normalize_records(
+                [record],
+                table_schema,
+                include_field_metadata=input_data.include_field_metadata,
+            )
+
+            normalized_record = normalized_data["records"][0]
+            yield "id", normalized_record.get("id", None)
+            yield "fields", normalized_record.get("fields", None)
+            yield "created_time", normalized_record.get("createdTime", None)
+
+            if (
+                input_data.include_field_metadata
+                and "field_metadata" in normalized_data
+            ):
+                yield "field_metadata", normalized_data["field_metadata"]
+        else:
+            yield "id", record.get("id", None)
+            yield "fields", record.get("fields", None)
+            yield "created_time", record.get("createdTime", None)


 class AirtableCreateRecordsBlock(Block):
@@ -148,6 +226,10 @@ class AirtableCreateRecordsBlock(Block):
        records: list[dict] = SchemaField(
            description="Array of records to create (each with 'fields' object)"
        )
+        skip_normalization: bool = SchemaField(
+            description="Skip output normalization to get raw Airtable response (faster but may have missing fields)",
+            default=False,
+        )
        typecast: bool = SchemaField(
            description="Automatically convert string values to appropriate types",
            default=False,
@@ -159,7 +241,6 @@ class AirtableCreateRecordsBlock(Block):

    class Output(BlockSchema):
        records: list[dict] = SchemaField(description="Array of created record objects")
-        details: dict = SchemaField(description="Details of the created records")

    def __init__(self):
        super().__init__(
@@ -173,7 +254,7 @@ class AirtableCreateRecordsBlock(Block):
    async def run(
        self, input_data: Input, *, credentials: APIKeyCredentials, **kwargs
    ) -> BlockOutput:
-        # The create_record API expects records in a specific format
+
        data = await create_record(
            credentials,
            input_data.base_id,
@@ -182,11 +263,22 @@ class AirtableCreateRecordsBlock(Block):
            typecast=input_data.typecast if input_data.typecast else None,
            return_fields_by_field_id=input_data.return_fields_by_field_id,
        )
+        result_records = cast(list[dict], data.get("records", []))

-        yield "records", data.get("records", [])
-        details = data.get("details", None)
-        if details:
-            yield "details", details
+        # Normalize output unless explicitly disabled
+        if not input_data.skip_normalization and result_records:
+            # Fetch table schema
+            table_schema = await get_table_schema(
+                credentials, input_data.base_id, input_data.table_id_or_name
+            )
+
+            # Normalize the records
+            normalized_data = await normalize_records(
+                result_records, table_schema, include_field_metadata=False
+            )
+            result_records = normalized_data["records"]
+
+        yield "records", result_records


 class AirtableUpdateRecordsBlock(Block):