mirror of
https://github.com/Significant-Gravitas/AutoGPT.git
synced 2026-02-03 11:24:57 -05:00
Compare commits
10 Commits
otto/copil
...
feat/text-
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
2eaa13b141 | ||
|
|
283b77e7e0 | ||
|
|
ac5aa0a5f4 | ||
|
|
0f300d7884 | ||
|
|
7dfc816280 | ||
|
|
378126e60f | ||
|
|
21a1d993b8 | ||
|
|
e0862e8086 | ||
|
|
b1259e0bdd | ||
|
|
5244bd94fc |
77
autogpt_platform/backend/backend/blocks/encoder_block.py
Normal file
77
autogpt_platform/backend/backend/blocks/encoder_block.py
Normal file
@@ -0,0 +1,77 @@
|
||||
"""Text encoding block for converting special characters to escape sequences."""
|
||||
|
||||
import codecs
|
||||
|
||||
from backend.data.block import (
|
||||
Block,
|
||||
BlockCategory,
|
||||
BlockOutput,
|
||||
BlockSchemaInput,
|
||||
BlockSchemaOutput,
|
||||
)
|
||||
from backend.data.model import SchemaField
|
||||
|
||||
|
||||
class TextEncoderBlock(Block):
|
||||
"""
|
||||
Encodes a string by converting special characters into escape sequences.
|
||||
|
||||
This block is the inverse of TextDecoderBlock. It takes text containing
|
||||
special characters (like newlines, tabs, etc.) and converts them into
|
||||
their escape sequence representations (e.g., newline becomes \\n).
|
||||
"""
|
||||
|
||||
class Input(BlockSchemaInput):
|
||||
"""Input schema for TextEncoderBlock."""
|
||||
|
||||
text: str = SchemaField(
|
||||
description="A string containing special characters to be encoded",
|
||||
placeholder="Your text with newlines and quotes to encode",
|
||||
)
|
||||
|
||||
class Output(BlockSchemaOutput):
|
||||
"""Output schema for TextEncoderBlock."""
|
||||
|
||||
encoded_text: str = SchemaField(
|
||||
description="The encoded text with special characters converted to escape sequences"
|
||||
)
|
||||
error: str = SchemaField(description="Error message if encoding fails")
|
||||
|
||||
def __init__(self):
|
||||
super().__init__(
|
||||
id="5185f32e-4b65-4ecf-8fbb-873f003f09d6",
|
||||
description="Encodes a string by converting special characters into escape sequences",
|
||||
categories={BlockCategory.TEXT},
|
||||
input_schema=TextEncoderBlock.Input,
|
||||
output_schema=TextEncoderBlock.Output,
|
||||
test_input={
|
||||
"text": """Hello
|
||||
World!
|
||||
This is a "quoted" string."""
|
||||
},
|
||||
test_output=[
|
||||
(
|
||||
"encoded_text",
|
||||
"""Hello\\nWorld!\\nThis is a "quoted" string.""",
|
||||
)
|
||||
],
|
||||
)
|
||||
|
||||
async def run(self, input_data: Input, **kwargs) -> BlockOutput:
|
||||
"""
|
||||
Encode the input text by converting special characters to escape sequences.
|
||||
|
||||
Args:
|
||||
input_data: The input containing the text to encode.
|
||||
**kwargs: Additional keyword arguments (unused).
|
||||
|
||||
Yields:
|
||||
The encoded text with escape sequences, or an error message if encoding fails.
|
||||
"""
|
||||
try:
|
||||
encoded_text = codecs.encode(input_data.text, "unicode_escape").decode(
|
||||
"utf-8"
|
||||
)
|
||||
yield "encoded_text", encoded_text
|
||||
except Exception as e:
|
||||
yield "error", f"Encoding error: {str(e)}"
|
||||
@@ -0,0 +1,77 @@
|
||||
import pytest
|
||||
|
||||
from backend.blocks.encoder_block import TextEncoderBlock
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_text_encoder_basic():
|
||||
"""Test basic encoding of newlines and special characters."""
|
||||
block = TextEncoderBlock()
|
||||
result = []
|
||||
async for output in block.run(TextEncoderBlock.Input(text="Hello\nWorld")):
|
||||
result.append(output)
|
||||
|
||||
assert len(result) == 1
|
||||
assert result[0][0] == "encoded_text"
|
||||
assert result[0][1] == "Hello\\nWorld"
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_text_encoder_multiple_escapes():
|
||||
"""Test encoding of multiple escape sequences."""
|
||||
block = TextEncoderBlock()
|
||||
result = []
|
||||
async for output in block.run(
|
||||
TextEncoderBlock.Input(text="Line1\nLine2\tTabbed\rCarriage")
|
||||
):
|
||||
result.append(output)
|
||||
|
||||
assert len(result) == 1
|
||||
assert result[0][0] == "encoded_text"
|
||||
assert "\\n" in result[0][1]
|
||||
assert "\\t" in result[0][1]
|
||||
assert "\\r" in result[0][1]
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_text_encoder_unicode():
|
||||
"""Test that unicode characters are handled correctly."""
|
||||
block = TextEncoderBlock()
|
||||
result = []
|
||||
async for output in block.run(TextEncoderBlock.Input(text="Hello 世界\n")):
|
||||
result.append(output)
|
||||
|
||||
assert len(result) == 1
|
||||
assert result[0][0] == "encoded_text"
|
||||
# Unicode characters should be escaped as \uXXXX sequences
|
||||
assert "\\n" in result[0][1]
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_text_encoder_empty_string():
|
||||
"""Test encoding of an empty string."""
|
||||
block = TextEncoderBlock()
|
||||
result = []
|
||||
async for output in block.run(TextEncoderBlock.Input(text="")):
|
||||
result.append(output)
|
||||
|
||||
assert len(result) == 1
|
||||
assert result[0][0] == "encoded_text"
|
||||
assert result[0][1] == ""
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_text_encoder_error_handling():
|
||||
"""Test that encoding errors are handled gracefully."""
|
||||
from unittest.mock import patch
|
||||
|
||||
block = TextEncoderBlock()
|
||||
result = []
|
||||
|
||||
with patch("codecs.encode", side_effect=Exception("Mocked encoding error")):
|
||||
async for output in block.run(TextEncoderBlock.Input(text="test")):
|
||||
result.append(output)
|
||||
|
||||
assert len(result) == 1
|
||||
assert result[0][0] == "error"
|
||||
assert "Mocked encoding error" in result[0][1]
|
||||
@@ -193,6 +193,7 @@ Below is a comprehensive list of all available blocks, categorized by their prim
|
||||
| [Get Current Time](block-integrations/text.md#get-current-time) | This block outputs the current time |
|
||||
| [Match Text Pattern](block-integrations/text.md#match-text-pattern) | Matches text against a regex pattern and forwards data to positive or negative output based on the match |
|
||||
| [Text Decoder](block-integrations/text.md#text-decoder) | Decodes a string containing escape sequences into actual text |
|
||||
| [Text Encoder](block-integrations/text.md#text-encoder) | Encodes a string by converting special characters into escape sequences |
|
||||
| [Text Replace](block-integrations/text.md#text-replace) | This block is used to replace a text with a new text |
|
||||
| [Text Split](block-integrations/text.md#text-split) | This block is used to split a text into a list of strings |
|
||||
| [Word Character Count](block-integrations/text.md#word-character-count) | Counts the number of words and characters in a given text |
|
||||
|
||||
@@ -380,6 +380,42 @@ This is useful when working with data from APIs or files where escape sequences
|
||||
|
||||
---
|
||||
|
||||
## Text Encoder
|
||||
|
||||
### What it is
|
||||
Encodes a string by converting special characters into escape sequences
|
||||
|
||||
### How it works
|
||||
<!-- MANUAL: how_it_works -->
|
||||
The Text Encoder takes the input string and applies Python's `unicode_escape` encoding (equivalent to `codecs.encode(text, "unicode_escape").decode("utf-8")`) to transform special characters like newlines, tabs, and backslashes into their escaped forms.
|
||||
|
||||
The block relies on the input schema to ensure the value is a string; non-string inputs are rejected by validation, and any encoding failures surface as block errors. Non-ASCII characters are emitted as `\uXXXX` sequences, which is useful for ASCII-only payloads.
|
||||
<!-- END MANUAL -->
|
||||
|
||||
### Inputs
|
||||
|
||||
| Input | Description | Type | Required |
|
||||
|-------|-------------|------|----------|
|
||||
| text | A string containing special characters to be encoded | str | Yes |
|
||||
|
||||
### Outputs
|
||||
|
||||
| Output | Description | Type |
|
||||
|--------|-------------|------|
|
||||
| error | Error message if encoding fails | str |
|
||||
| encoded_text | The encoded text with special characters converted to escape sequences | str |
|
||||
|
||||
### Possible use case
|
||||
<!-- MANUAL: use_case -->
|
||||
**JSON Payload Preparation**: Encode multiline or quoted text before embedding it in JSON string fields to ensure proper escaping.
|
||||
|
||||
**Config/ENV Generation**: Convert template text into escaped strings for `.env` or YAML values that require special character handling.
|
||||
|
||||
**Snapshot Fixtures**: Produce stable escaped strings for golden files or API tests where consistent text representation is needed.
|
||||
<!-- END MANUAL -->
|
||||
|
||||
---
|
||||
|
||||
## Text Replace
|
||||
|
||||
### What it is
|
||||
|
||||
Reference in New Issue
Block a user