fix(backend): Add Airtable record normalization and upsert features (#10908)

Introduces normalization of Airtable record outputs to include all fields with appropriate empty values and optional field metadata. Enhances record creation to support finding existing records by specified fields and updating them if found, enabling upsert-like behavior. Updates block schemas and logic for list, get, and create operations to support these new features. ### Changes 🏗️ Allows normalization of the response of the airtable blocks Allows you to use create base to find ones already made  ### Checklist 📋 #### For code changes: - [x] I have clearly listed my changes in the PR description - [x] I have made a test plan - [x] I have tested my changes according to the test plan:  - [x] Test that it doesn't break existing agents - [x] Test that the results for checkboxes are returned
2026-04-08 03:00:28 -04:00 · 2025-09-11 15:26:34 -05:00
parent 64b4480b1e
commit 3d6ea3088e
3 changed files with 326 additions and 12 deletions
--- a/autogpt_platform/backend/backend/blocks/airtable/_api.py
+++ b/autogpt_platform/backend/backend/blocks/airtable/_api.py
@@ -661,6 +661,167 @@ async def update_field(
 #################################################################


+async def get_table_schema(
+    credentials: Credentials,
+    base_id: str,
+    table_id_or_name: str,
+) -> dict:
+    """
+    Get the schema for a specific table, including all field definitions.
+
+    Args:
+        credentials: Airtable API credentials
+        base_id: The base ID
+        table_id_or_name: The table ID or name
+
+    Returns:
+        Dict containing table schema with fields information
+    """
+    # First get all tables to find the right one
+    response = await Requests().get(
+        f"https://api.airtable.com/v0/meta/bases/{base_id}/tables",
+        headers={"Authorization": credentials.auth_header()},
+    )
+
+    data = response.json()
+    tables = data.get("tables", [])
+
+    # Find the matching table
+    for table in tables:
+        if table.get("id") == table_id_or_name or table.get("name") == table_id_or_name:
+            return table
+
+    raise ValueError(f"Table '{table_id_or_name}' not found in base '{base_id}'")
+
+
+def get_empty_value_for_field(field_type: str) -> Any:
+    """
+    Return the appropriate empty value for a given Airtable field type.
+
+    Args:
+        field_type: The Airtable field type
+
+    Returns:
+        The appropriate empty value for that field type
+    """
+    # Fields that should be false when empty
+    if field_type == "checkbox":
+        return False
+
+    # Fields that should be empty arrays
+    if field_type in [
+        "multipleSelects",
+        "multipleRecordLinks",
+        "multipleAttachments",
+        "multipleLookupValues",
+        "multipleCollaborators",
+    ]:
+        return []
+
+    # Fields that should be 0 when empty (numeric types)
+    if field_type in [
+        "number",
+        "percent",
+        "currency",
+        "rating",
+        "duration",
+        "count",
+        "autoNumber",
+    ]:
+        return 0
+
+    # Fields that should be empty strings
+    if field_type in [
+        "singleLineText",
+        "multilineText",
+        "email",
+        "url",
+        "phoneNumber",
+        "richText",
+        "barcode",
+    ]:
+        return ""
+
+    # Everything else gets null (dates, single selects, formulas, etc.)
+    return None
+
+
+async def normalize_records(
+    records: list[dict],
+    table_schema: dict,
+    include_field_metadata: bool = False,
+) -> dict:
+    """
+    Normalize Airtable records to include all fields with proper empty values.
+
+    Args:
+        records: List of record objects from Airtable API
+        table_schema: Table schema containing field definitions
+        include_field_metadata: Whether to include field metadata in response
+
+    Returns:
+        Dict with normalized records and optionally field metadata
+    """
+    fields = table_schema.get("fields", [])
+
+    # Normalize each record
+    normalized_records = []
+    for record in records:
+        normalized = {
+            "id": record.get("id"),
+            "createdTime": record.get("createdTime"),
+            "fields": {},
+        }
+
+        # Add existing fields
+        existing_fields = record.get("fields", {})
+
+        # Add all fields from schema, using empty values for missing ones
+        for field in fields:
+            field_name = field["name"]
+            field_type = field["type"]
+
+            if field_name in existing_fields:
+                # Field exists, use its value
+                normalized["fields"][field_name] = existing_fields[field_name]
+            else:
+                # Field is missing, add appropriate empty value
+                normalized["fields"][field_name] = get_empty_value_for_field(field_type)
+
+        normalized_records.append(normalized)
+
+    # Build result dictionary
+    if include_field_metadata:
+        field_metadata = {}
+        for field in fields:
+            metadata = {"type": field["type"], "id": field["id"]}
+
+            # Add type-specific metadata
+            options = field.get("options", {})
+            if field["type"] == "currency" and "symbol" in options:
+                metadata["symbol"] = options["symbol"]
+                metadata["precision"] = options.get("precision", 2)
+            elif field["type"] == "duration" and "durationFormat" in options:
+                metadata["format"] = options["durationFormat"]
+            elif field["type"] == "percent" and "precision" in options:
+                metadata["precision"] = options["precision"]
+            elif (
+                field["type"] in ["singleSelect", "multipleSelects"]
+                and "choices" in options
+            ):
+                metadata["choices"] = [choice["name"] for choice in options["choices"]]
+            elif field["type"] == "rating" and "max" in options:
+                metadata["max"] = options["max"]
+                metadata["icon"] = options.get("icon", "star")
+                metadata["color"] = options.get("color", "yellowBright")
+
+            field_metadata[field["name"]] = metadata
+
+        return {"records": normalized_records, "field_metadata": field_metadata}
+    else:
+        return {"records": normalized_records}
+
+
 async def list_records(
    credentials: Credentials,
    base_id: str,
@@ -1249,3 +1410,26 @@ async def list_bases(
    )

    return response.json()
+
+
+async def get_base_tables(
+    credentials: Credentials,
+    base_id: str,
+) -> list[dict]:
+    """
+    Get all tables for a specific base.
+
+    Args:
+        credentials: Airtable API credentials
+        base_id: The ID of the base
+
+    Returns:
+        list[dict]: List of table objects with their schemas
+    """
+    response = await Requests().get(
+        f"https://api.airtable.com/v0/meta/bases/{base_id}/tables",
+        headers={"Authorization": credentials.auth_header()},
+    )
+
+    data = response.json()
+    return data.get("tables", [])
--- a/autogpt_platform/backend/backend/blocks/airtable/bases.py
+++ b/autogpt_platform/backend/backend/blocks/airtable/bases.py
@@ -14,13 +14,13 @@ from backend.sdk import (
    SchemaField,
 )

-from ._api import create_base, list_bases
+from ._api import create_base, get_base_tables, list_bases
 from ._config import airtable


 class AirtableCreateBaseBlock(Block):
    """
-    Creates a new base in an Airtable workspace.
+    Creates a new base in an Airtable workspace, or returns existing base if one with the same name exists.
    """

    class Input(BlockSchema):
@@ -31,6 +31,10 @@ class AirtableCreateBaseBlock(Block):
            description="The workspace ID where the base will be created"
        )
        name: str = SchemaField(description="The name of the new base")
+        find_existing: bool = SchemaField(
+            description="If true, return existing base with same name instead of creating duplicate",
+            default=True,
+        )
        tables: list[dict] = SchemaField(
            description="At least one table and field must be specified. Array of table objects to create in the base. Each table should have 'name' and 'fields' properties",
            default=[
@@ -50,14 +54,18 @@ class AirtableCreateBaseBlock(Block):
        )

    class Output(BlockSchema):
-        base_id: str = SchemaField(description="The ID of the created base")
+        base_id: str = SchemaField(description="The ID of the created or found base")
        tables: list[dict] = SchemaField(description="Array of table objects")
        table: dict = SchemaField(description="A single table object")
+        was_created: bool = SchemaField(
+            description="True if a new base was created, False if existing was found",
+            default=True,
+        )

    def __init__(self):
        super().__init__(
            id="f59b88a8-54ce-4676-a508-fd614b4e8dce",
-            description="Create a new base in Airtable",
+            description="Create or find a base in Airtable",
            categories={BlockCategory.DATA},
            input_schema=self.Input,
            output_schema=self.Output,
@@ -66,6 +74,31 @@ class AirtableCreateBaseBlock(Block):
    async def run(
        self, input_data: Input, *, credentials: APIKeyCredentials, **kwargs
    ) -> BlockOutput:
+        # If find_existing is true, check if a base with this name already exists
+        if input_data.find_existing:
+            # List all bases to check for existing one with same name
+            # Note: Airtable API doesn't have a direct search, so we need to list and filter
+            existing_bases = await list_bases(credentials)
+
+            for base in existing_bases.get("bases", []):
+                if base.get("name") == input_data.name:
+                    # Base already exists, return it
+                    base_id = base.get("id")
+                    yield "base_id", base_id
+                    yield "was_created", False
+
+                    # Get the tables for this base
+                    try:
+                        tables = await get_base_tables(credentials, base_id)
+                        yield "tables", tables
+                        for table in tables:
+                            yield "table", table
+                    except Exception:
+                        # If we can't get tables, return empty list
+                        yield "tables", []
+                    return
+
+        # No existing base found or find_existing is false, create new one
        data = await create_base(
            credentials,
            input_data.workspace_id,
@@ -74,6 +107,7 @@ class AirtableCreateBaseBlock(Block):
        )

        yield "base_id", data.get("id", None)
+        yield "was_created", True
        yield "tables", data.get("tables", [])
        for table in data.get("tables", []):
            yield "table", table
--- a/autogpt_platform/backend/backend/blocks/airtable/records.py
+++ b/autogpt_platform/backend/backend/blocks/airtable/records.py
@@ -2,7 +2,7 @@
 Airtable record operation blocks.
 """

-from typing import Optional
+from typing import Optional, cast

 from backend.sdk import (
    APIKeyCredentials,
@@ -18,7 +18,9 @@ from ._api import (
    create_record,
    delete_multiple_records,
    get_record,
+    get_table_schema,
    list_records,
+    normalize_records,
    update_multiple_records,
 )
 from ._config import airtable
@@ -54,12 +56,24 @@ class AirtableListRecordsBlock(Block):
        return_fields: list[str] = SchemaField(
            description="Specific fields to return (comma-separated)", default=[]
        )
+        normalize_output: bool = SchemaField(
+            description="Normalize output to include all fields with proper empty values (disable to skip schema fetch and get raw Airtable response)",
+            default=True,
+        )
+        include_field_metadata: bool = SchemaField(
+            description="Include field type and configuration metadata (requires normalize_output=true)",
+            default=False,
+        )

    class Output(BlockSchema):
        records: list[dict] = SchemaField(description="Array of record objects")
        offset: Optional[str] = SchemaField(
            description="Offset for next page (null if no more records)", default=None
        )
+        field_metadata: Optional[dict] = SchemaField(
+            description="Field type and configuration metadata (only when include_field_metadata=true)",
+            default=None,
+        )

    def __init__(self):
        super().__init__(
@@ -73,6 +87,7 @@ class AirtableListRecordsBlock(Block):
    async def run(
        self, input_data: Input, *, credentials: APIKeyCredentials, **kwargs
    ) -> BlockOutput:
+
        data = await list_records(
            credentials,
            input_data.base_id,
@@ -88,8 +103,33 @@ class AirtableListRecordsBlock(Block):
            fields=input_data.return_fields if input_data.return_fields else None,
        )

-        yield "records", data.get("records", [])
-        yield "offset", data.get("offset", None)
+        records = data.get("records", [])
+
+        # Normalize output if requested
+        if input_data.normalize_output:
+            # Fetch table schema
+            table_schema = await get_table_schema(
+                credentials, input_data.base_id, input_data.table_id_or_name
+            )
+
+            # Normalize the records
+            normalized_data = await normalize_records(
+                records,
+                table_schema,
+                include_field_metadata=input_data.include_field_metadata,
+            )
+
+            yield "records", normalized_data["records"]
+            yield "offset", data.get("offset", None)
+
+            if (
+                input_data.include_field_metadata
+                and "field_metadata" in normalized_data
+            ):
+                yield "field_metadata", normalized_data["field_metadata"]
+        else:
+            yield "records", records
+            yield "offset", data.get("offset", None)


 class AirtableGetRecordBlock(Block):
@@ -104,11 +144,23 @@ class AirtableGetRecordBlock(Block):
        base_id: str = SchemaField(description="The Airtable base ID")
        table_id_or_name: str = SchemaField(description="Table ID or name")
        record_id: str = SchemaField(description="The record ID to retrieve")
+        normalize_output: bool = SchemaField(
+            description="Normalize output to include all fields with proper empty values (disable to skip schema fetch and get raw Airtable response)",
+            default=True,
+        )
+        include_field_metadata: bool = SchemaField(
+            description="Include field type and configuration metadata (requires normalize_output=true)",
+            default=False,
+        )

    class Output(BlockSchema):
        id: str = SchemaField(description="The record ID")
        fields: dict = SchemaField(description="The record fields")
        created_time: str = SchemaField(description="The record created time")
+        field_metadata: Optional[dict] = SchemaField(
+            description="Field type and configuration metadata (only when include_field_metadata=true)",
+            default=None,
+        )

    def __init__(self):
        super().__init__(
@@ -122,6 +174,7 @@ class AirtableGetRecordBlock(Block):
    async def run(
        self, input_data: Input, *, credentials: APIKeyCredentials, **kwargs
    ) -> BlockOutput:
+
        record = await get_record(
            credentials,
            input_data.base_id,
@@ -129,9 +182,34 @@ class AirtableGetRecordBlock(Block):
            input_data.record_id,
        )

-        yield "id", record.get("id", None)
-        yield "fields", record.get("fields", None)
-        yield "created_time", record.get("createdTime", None)
+        # Normalize output if requested
+        if input_data.normalize_output:
+            # Fetch table schema
+            table_schema = await get_table_schema(
+                credentials, input_data.base_id, input_data.table_id_or_name
+            )
+
+            # Normalize the single record (wrap in list and unwrap result)
+            normalized_data = await normalize_records(
+                [record],
+                table_schema,
+                include_field_metadata=input_data.include_field_metadata,
+            )
+
+            normalized_record = normalized_data["records"][0]
+            yield "id", normalized_record.get("id", None)
+            yield "fields", normalized_record.get("fields", None)
+            yield "created_time", normalized_record.get("createdTime", None)
+
+            if (
+                input_data.include_field_metadata
+                and "field_metadata" in normalized_data
+            ):
+                yield "field_metadata", normalized_data["field_metadata"]
+        else:
+            yield "id", record.get("id", None)
+            yield "fields", record.get("fields", None)
+            yield "created_time", record.get("createdTime", None)


 class AirtableCreateRecordsBlock(Block):
@@ -148,6 +226,10 @@ class AirtableCreateRecordsBlock(Block):
        records: list[dict] = SchemaField(
            description="Array of records to create (each with 'fields' object)"
        )
+        skip_normalization: bool = SchemaField(
+            description="Skip output normalization to get raw Airtable response (faster but may have missing fields)",
+            default=False,
+        )
        typecast: bool = SchemaField(
            description="Automatically convert string values to appropriate types",
            default=False,
@@ -173,7 +255,7 @@ class AirtableCreateRecordsBlock(Block):
    async def run(
        self, input_data: Input, *, credentials: APIKeyCredentials, **kwargs
    ) -> BlockOutput:
-        # The create_record API expects records in a specific format
+
        data = await create_record(
            credentials,
            input_data.base_id,
@@ -182,8 +264,22 @@ class AirtableCreateRecordsBlock(Block):
            typecast=input_data.typecast if input_data.typecast else None,
            return_fields_by_field_id=input_data.return_fields_by_field_id,
        )
+        result_records = cast(list[dict], data.get("records", []))

-        yield "records", data.get("records", [])
+        # Normalize output unless explicitly disabled
+        if not input_data.skip_normalization and result_records:
+            # Fetch table schema
+            table_schema = await get_table_schema(
+                credentials, input_data.base_id, input_data.table_id_or_name
+            )
+
+            # Normalize the records
+            normalized_data = await normalize_records(
+                result_records, table_schema, include_field_metadata=False
+            )
+            result_records = normalized_data["records"]
+
+        yield "records", result_records
        details = data.get("details", None)
        if details:
            yield "details", details