diff --git a/autogpt_platform/backend/backend/blocks/encoder_block.py b/autogpt_platform/backend/backend/blocks/encoder_block.py new file mode 100644 index 0000000000..b60a4ae828 --- /dev/null +++ b/autogpt_platform/backend/backend/blocks/encoder_block.py @@ -0,0 +1,77 @@ +"""Text encoding block for converting special characters to escape sequences.""" + +import codecs + +from backend.data.block import ( + Block, + BlockCategory, + BlockOutput, + BlockSchemaInput, + BlockSchemaOutput, +) +from backend.data.model import SchemaField + + +class TextEncoderBlock(Block): + """ + Encodes a string by converting special characters into escape sequences. + + This block is the inverse of TextDecoderBlock. It takes text containing + special characters (like newlines, tabs, etc.) and converts them into + their escape sequence representations (e.g., newline becomes \\n). + """ + + class Input(BlockSchemaInput): + """Input schema for TextEncoderBlock.""" + + text: str = SchemaField( + description="A string containing special characters to be encoded", + placeholder="Your text with newlines and quotes to encode", + ) + + class Output(BlockSchemaOutput): + """Output schema for TextEncoderBlock.""" + + encoded_text: str = SchemaField( + description="The encoded text with special characters converted to escape sequences" + ) + error: str = SchemaField(description="Error message if encoding fails") + + def __init__(self): + super().__init__( + id="5185f32e-4b65-4ecf-8fbb-873f003f09d6", + description="Encodes a string by converting special characters into escape sequences", + categories={BlockCategory.TEXT}, + input_schema=TextEncoderBlock.Input, + output_schema=TextEncoderBlock.Output, + test_input={ + "text": """Hello +World! +This is a "quoted" string.""" + }, + test_output=[ + ( + "encoded_text", + """Hello\\nWorld!\\nThis is a "quoted" string.""", + ) + ], + ) + + async def run(self, input_data: Input, **kwargs) -> BlockOutput: + """ + Encode the input text by converting special characters to escape sequences. + + Args: + input_data: The input containing the text to encode. + **kwargs: Additional keyword arguments (unused). + + Yields: + The encoded text with escape sequences, or an error message if encoding fails. + """ + try: + encoded_text = codecs.encode(input_data.text, "unicode_escape").decode( + "utf-8" + ) + yield "encoded_text", encoded_text + except Exception as e: + yield "error", f"Encoding error: {str(e)}" diff --git a/autogpt_platform/backend/backend/blocks/test/test_text_encoder.py b/autogpt_platform/backend/backend/blocks/test/test_text_encoder.py new file mode 100644 index 0000000000..1e9b9fed4f --- /dev/null +++ b/autogpt_platform/backend/backend/blocks/test/test_text_encoder.py @@ -0,0 +1,77 @@ +import pytest + +from backend.blocks.encoder_block import TextEncoderBlock + + +@pytest.mark.asyncio +async def test_text_encoder_basic(): + """Test basic encoding of newlines and special characters.""" + block = TextEncoderBlock() + result = [] + async for output in block.run(TextEncoderBlock.Input(text="Hello\nWorld")): + result.append(output) + + assert len(result) == 1 + assert result[0][0] == "encoded_text" + assert result[0][1] == "Hello\\nWorld" + + +@pytest.mark.asyncio +async def test_text_encoder_multiple_escapes(): + """Test encoding of multiple escape sequences.""" + block = TextEncoderBlock() + result = [] + async for output in block.run( + TextEncoderBlock.Input(text="Line1\nLine2\tTabbed\rCarriage") + ): + result.append(output) + + assert len(result) == 1 + assert result[0][0] == "encoded_text" + assert "\\n" in result[0][1] + assert "\\t" in result[0][1] + assert "\\r" in result[0][1] + + +@pytest.mark.asyncio +async def test_text_encoder_unicode(): + """Test that unicode characters are handled correctly.""" + block = TextEncoderBlock() + result = [] + async for output in block.run(TextEncoderBlock.Input(text="Hello 世界\n")): + result.append(output) + + assert len(result) == 1 + assert result[0][0] == "encoded_text" + # Unicode characters should be escaped as \uXXXX sequences + assert "\\n" in result[0][1] + + +@pytest.mark.asyncio +async def test_text_encoder_empty_string(): + """Test encoding of an empty string.""" + block = TextEncoderBlock() + result = [] + async for output in block.run(TextEncoderBlock.Input(text="")): + result.append(output) + + assert len(result) == 1 + assert result[0][0] == "encoded_text" + assert result[0][1] == "" + + +@pytest.mark.asyncio +async def test_text_encoder_error_handling(): + """Test that encoding errors are handled gracefully.""" + from unittest.mock import patch + + block = TextEncoderBlock() + result = [] + + with patch("codecs.encode", side_effect=Exception("Mocked encoding error")): + async for output in block.run(TextEncoderBlock.Input(text="test")): + result.append(output) + + assert len(result) == 1 + assert result[0][0] == "error" + assert "Mocked encoding error" in result[0][1] diff --git a/docs/integrations/README.md b/docs/integrations/README.md index 7f2ea19721..dc9c930ea4 100644 --- a/docs/integrations/README.md +++ b/docs/integrations/README.md @@ -192,6 +192,7 @@ Below is a comprehensive list of all available blocks, categorized by their prim | [Get Current Time](block-integrations/text.md#get-current-time) | This block outputs the current time | | [Match Text Pattern](block-integrations/text.md#match-text-pattern) | Matches text against a regex pattern and forwards data to positive or negative output based on the match | | [Text Decoder](block-integrations/text.md#text-decoder) | Decodes a string containing escape sequences into actual text | +| [Text Encoder](block-integrations/text.md#text-encoder) | Encodes a string by converting special characters into escape sequences | | [Text Replace](block-integrations/text.md#text-replace) | This block is used to replace a text with a new text | | [Text Split](block-integrations/text.md#text-split) | This block is used to split a text into a list of strings | | [Word Character Count](block-integrations/text.md#word-character-count) | Counts the number of words and characters in a given text | diff --git a/docs/integrations/block-integrations/text.md b/docs/integrations/block-integrations/text.md index e47375196c..d189aa5c3f 100644 --- a/docs/integrations/block-integrations/text.md +++ b/docs/integrations/block-integrations/text.md @@ -380,6 +380,42 @@ This is useful when working with data from APIs or files where escape sequences --- +## Text Encoder + +### What it is +Encodes a string by converting special characters into escape sequences + +### How it works + +The Text Encoder takes the input string and applies Python's `unicode_escape` encoding (equivalent to `codecs.encode(text, "unicode_escape").decode("utf-8")`) to transform special characters like newlines, tabs, and backslashes into their escaped forms. + +The block relies on the input schema to ensure the value is a string; non-string inputs are rejected by validation, and any encoding failures surface as block errors. Non-ASCII characters are emitted as `\uXXXX` sequences, which is useful for ASCII-only payloads. + + +### Inputs + +| Input | Description | Type | Required | +|-------|-------------|------|----------| +| text | A string containing special characters to be encoded | str | Yes | + +### Outputs + +| Output | Description | Type | +|--------|-------------|------| +| error | Error message if encoding fails | str | +| encoded_text | The encoded text with special characters converted to escape sequences | str | + +### Possible use case + +**JSON Payload Preparation**: Encode multiline or quoted text before embedding it in JSON string fields to ensure proper escaping. + +**Config/ENV Generation**: Convert template text into escaped strings for `.env` or YAML values that require special character handling. + +**Snapshot Fixtures**: Produce stable escaped strings for golden files or API tests where consistent text representation is needed. + + +--- + ## Text Replace ### What it is