From 3ca23876317dd1d784458acd63c273a49a8cf755 Mon Sep 17 00:00:00 2001 From: Bently Date: Thu, 5 Feb 2026 17:31:02 +0000 Subject: [PATCH] feat(blocks): Implement Text Encode block (#11857) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit ## Summary Implements a `TextEncoderBlock` that encodes plain text into escape sequences (the reverse of `TextDecoderBlock`). ## Changes ### Block Implementation - Added `encoder_block.py` with `TextEncoderBlock` in `autogpt_platform/backend/backend/blocks/` - Uses `codecs.encode(text, "unicode_escape").decode("utf-8")` for encoding - Mirrors the structure and patterns of the existing `TextDecoderBlock` - Categorised as `BlockCategory.TEXT` ### Documentation - Added Text Encoder section to `docs/integrations/block-integrations/text.md` (the auto-generated docs file for TEXT category blocks) - Expanded "How it works" with technical details on the encoding method, validation, and edge cases - Added 3 structured use cases per docs guidelines: JSON payload preparation, Config/ENV generation, Snapshot fixtures - Added Text Encoder to the overview table in `docs/integrations/README.md` - Removed standalone `encoder_block.md` (TEXT category blocks belong in `text.md` per `CATEGORY_FILE_MAP` in `generate_block_docs.py`) ### Documentation Formatting (CodeRabbit feedback) - Added blank lines around markdown tables (MD058) - Added `text` language tags to fenced code blocks (MD040) - Restructured use case section with bold headings per coding guidelines ## How Docs Were Synced The `check-docs-sync` CI job runs `poetry run python scripts/generate_block_docs.py --check` which expects blocks to be documented in category-grouped files. Since `TextEncoderBlock` uses `BlockCategory.TEXT`, the `CATEGORY_FILE_MAP` maps it to `text.md` β€” not a standalone file. The block entry was added to `text.md` following the exact format used by the generator (with `` markers for hand-written sections). ## Related Issue Fixes #11111 --------- Co-authored-by: Otto Co-authored-by: lif <19658300+majiayu000@users.noreply.github.com> Co-authored-by: Aryan Kaul <134673289+aryancodes1@users.noreply.github.com> Co-authored-by: Nicholas Tindle Co-authored-by: Nick Tindle --- .../backend/backend/blocks/encoder_block.py | 77 +++++++++++++++++++ .../backend/blocks/test/test_text_encoder.py | 77 +++++++++++++++++++ docs/integrations/README.md | 1 + docs/integrations/block-integrations/text.md | 36 +++++++++ 4 files changed, 191 insertions(+) create mode 100644 autogpt_platform/backend/backend/blocks/encoder_block.py create mode 100644 autogpt_platform/backend/backend/blocks/test/test_text_encoder.py diff --git a/autogpt_platform/backend/backend/blocks/encoder_block.py b/autogpt_platform/backend/backend/blocks/encoder_block.py new file mode 100644 index 0000000000..b60a4ae828 --- /dev/null +++ b/autogpt_platform/backend/backend/blocks/encoder_block.py @@ -0,0 +1,77 @@ +"""Text encoding block for converting special characters to escape sequences.""" + +import codecs + +from backend.data.block import ( + Block, + BlockCategory, + BlockOutput, + BlockSchemaInput, + BlockSchemaOutput, +) +from backend.data.model import SchemaField + + +class TextEncoderBlock(Block): + """ + Encodes a string by converting special characters into escape sequences. + + This block is the inverse of TextDecoderBlock. It takes text containing + special characters (like newlines, tabs, etc.) and converts them into + their escape sequence representations (e.g., newline becomes \\n). + """ + + class Input(BlockSchemaInput): + """Input schema for TextEncoderBlock.""" + + text: str = SchemaField( + description="A string containing special characters to be encoded", + placeholder="Your text with newlines and quotes to encode", + ) + + class Output(BlockSchemaOutput): + """Output schema for TextEncoderBlock.""" + + encoded_text: str = SchemaField( + description="The encoded text with special characters converted to escape sequences" + ) + error: str = SchemaField(description="Error message if encoding fails") + + def __init__(self): + super().__init__( + id="5185f32e-4b65-4ecf-8fbb-873f003f09d6", + description="Encodes a string by converting special characters into escape sequences", + categories={BlockCategory.TEXT}, + input_schema=TextEncoderBlock.Input, + output_schema=TextEncoderBlock.Output, + test_input={ + "text": """Hello +World! +This is a "quoted" string.""" + }, + test_output=[ + ( + "encoded_text", + """Hello\\nWorld!\\nThis is a "quoted" string.""", + ) + ], + ) + + async def run(self, input_data: Input, **kwargs) -> BlockOutput: + """ + Encode the input text by converting special characters to escape sequences. + + Args: + input_data: The input containing the text to encode. + **kwargs: Additional keyword arguments (unused). + + Yields: + The encoded text with escape sequences, or an error message if encoding fails. + """ + try: + encoded_text = codecs.encode(input_data.text, "unicode_escape").decode( + "utf-8" + ) + yield "encoded_text", encoded_text + except Exception as e: + yield "error", f"Encoding error: {str(e)}" diff --git a/autogpt_platform/backend/backend/blocks/test/test_text_encoder.py b/autogpt_platform/backend/backend/blocks/test/test_text_encoder.py new file mode 100644 index 0000000000..1e9b9fed4f --- /dev/null +++ b/autogpt_platform/backend/backend/blocks/test/test_text_encoder.py @@ -0,0 +1,77 @@ +import pytest + +from backend.blocks.encoder_block import TextEncoderBlock + + +@pytest.mark.asyncio +async def test_text_encoder_basic(): + """Test basic encoding of newlines and special characters.""" + block = TextEncoderBlock() + result = [] + async for output in block.run(TextEncoderBlock.Input(text="Hello\nWorld")): + result.append(output) + + assert len(result) == 1 + assert result[0][0] == "encoded_text" + assert result[0][1] == "Hello\\nWorld" + + +@pytest.mark.asyncio +async def test_text_encoder_multiple_escapes(): + """Test encoding of multiple escape sequences.""" + block = TextEncoderBlock() + result = [] + async for output in block.run( + TextEncoderBlock.Input(text="Line1\nLine2\tTabbed\rCarriage") + ): + result.append(output) + + assert len(result) == 1 + assert result[0][0] == "encoded_text" + assert "\\n" in result[0][1] + assert "\\t" in result[0][1] + assert "\\r" in result[0][1] + + +@pytest.mark.asyncio +async def test_text_encoder_unicode(): + """Test that unicode characters are handled correctly.""" + block = TextEncoderBlock() + result = [] + async for output in block.run(TextEncoderBlock.Input(text="Hello δΈ–η•Œ\n")): + result.append(output) + + assert len(result) == 1 + assert result[0][0] == "encoded_text" + # Unicode characters should be escaped as \uXXXX sequences + assert "\\n" in result[0][1] + + +@pytest.mark.asyncio +async def test_text_encoder_empty_string(): + """Test encoding of an empty string.""" + block = TextEncoderBlock() + result = [] + async for output in block.run(TextEncoderBlock.Input(text="")): + result.append(output) + + assert len(result) == 1 + assert result[0][0] == "encoded_text" + assert result[0][1] == "" + + +@pytest.mark.asyncio +async def test_text_encoder_error_handling(): + """Test that encoding errors are handled gracefully.""" + from unittest.mock import patch + + block = TextEncoderBlock() + result = [] + + with patch("codecs.encode", side_effect=Exception("Mocked encoding error")): + async for output in block.run(TextEncoderBlock.Input(text="test")): + result.append(output) + + assert len(result) == 1 + assert result[0][0] == "error" + assert "Mocked encoding error" in result[0][1] diff --git a/docs/integrations/README.md b/docs/integrations/README.md index 7f2ea19721..dc9c930ea4 100644 --- a/docs/integrations/README.md +++ b/docs/integrations/README.md @@ -192,6 +192,7 @@ Below is a comprehensive list of all available blocks, categorized by their prim | [Get Current Time](block-integrations/text.md#get-current-time) | This block outputs the current time | | [Match Text Pattern](block-integrations/text.md#match-text-pattern) | Matches text against a regex pattern and forwards data to positive or negative output based on the match | | [Text Decoder](block-integrations/text.md#text-decoder) | Decodes a string containing escape sequences into actual text | +| [Text Encoder](block-integrations/text.md#text-encoder) | Encodes a string by converting special characters into escape sequences | | [Text Replace](block-integrations/text.md#text-replace) | This block is used to replace a text with a new text | | [Text Split](block-integrations/text.md#text-split) | This block is used to split a text into a list of strings | | [Word Character Count](block-integrations/text.md#word-character-count) | Counts the number of words and characters in a given text | diff --git a/docs/integrations/block-integrations/text.md b/docs/integrations/block-integrations/text.md index e47375196c..d189aa5c3f 100644 --- a/docs/integrations/block-integrations/text.md +++ b/docs/integrations/block-integrations/text.md @@ -380,6 +380,42 @@ This is useful when working with data from APIs or files where escape sequences --- +## Text Encoder + +### What it is +Encodes a string by converting special characters into escape sequences + +### How it works + +The Text Encoder takes the input string and applies Python's `unicode_escape` encoding (equivalent to `codecs.encode(text, "unicode_escape").decode("utf-8")`) to transform special characters like newlines, tabs, and backslashes into their escaped forms. + +The block relies on the input schema to ensure the value is a string; non-string inputs are rejected by validation, and any encoding failures surface as block errors. Non-ASCII characters are emitted as `\uXXXX` sequences, which is useful for ASCII-only payloads. + + +### Inputs + +| Input | Description | Type | Required | +|-------|-------------|------|----------| +| text | A string containing special characters to be encoded | str | Yes | + +### Outputs + +| Output | Description | Type | +|--------|-------------|------| +| error | Error message if encoding fails | str | +| encoded_text | The encoded text with special characters converted to escape sequences | str | + +### Possible use case + +**JSON Payload Preparation**: Encode multiline or quoted text before embedding it in JSON string fields to ensure proper escaping. + +**Config/ENV Generation**: Convert template text into escaped strings for `.env` or YAML values that require special character handling. + +**Snapshot Fixtures**: Produce stable escaped strings for golden files or API tests where consistent text representation is needed. + + +--- + ## Text Replace ### What it is