Compare commits

..

10 Commits

Author SHA1 Message Date
Bently
2eaa13b141 Merge branch 'dev' into feat/text-encode-block 2026-02-03 10:07:00 +00:00
Nick Tindle
283b77e7e0 docs: sync block docs for TextEncoderBlock 2026-02-02 14:04:21 -06:00
Nicholas Tindle
ac5aa0a5f4 Merge branch 'dev' into feat/text-encode-block 2026-02-02 12:16:53 -06:00
Otto
0f300d7884 feat(blocks): add error handling and tests to TextEncoderBlock
- Add error output field and try/except handling for encoding failures
- Add comprehensive unit tests covering basic encoding, multiple escapes,
  unicode handling, empty strings, and error scenarios

Co-authored-by: lif <19658300+majiayu000@users.noreply.github.com>
Co-authored-by: Aryan Kaul <134673289+aryancodes1@users.noreply.github.com>
2026-02-02 17:52:53 +00:00
root
7dfc816280 style: fix Black formatting in encoder_block.py
- Add blank line after class docstring before nested class
- Reformat test_input dict for proper line length
2026-01-29 13:02:15 +00:00
root
378126e60f docs: fix block docs sync for Text Encoder
- Move Text Encoder docs from standalone encoder_block.md into text.md
  (matches CATEGORY_FILE_MAP for TEXT category blocks)
- Add Text Encoder to overview table in README.md
- Remove orphaned encoder_block.md
- Follows exact format expected by generate_block_docs.py
2026-01-29 12:53:09 +00:00
root
21a1d993b8 docs: address CodeRabbit review feedback for encoder_block.md
- Expand 'How it works' with technical details (unicode_escape encoding, validation, edge cases)
- Add blank lines around tables (MD058)
- Add language tags to fenced code blocks (MD040)
- Replace 'Possible use case' with 3 structured use cases per docs guidelines
- Separate example into its own section
2026-01-29 12:43:45 +00:00
Bently
e0862e8086 fix: add docs and fix error handling in TextEncoderBlock
- Add encoder_block.md documentation
- Remove error handling that yields undeclared output field
- Match pattern used by TextDecoderBlock
2026-01-28 11:05:03 +00:00
Bently
b1259e0bdd docs(blocks): Add docstrings and error handling to TextEncoderBlock
- Add module, class, and method docstrings for 80%+ coverage
- Add try/except error handling per CodeRabbit review
- Use inherited error field from BlockSchemaOutput
2026-01-27 15:58:19 +00:00
Bently
5244bd94fc feat(blocks): Implement Text Encode block (fixes #11111) 2026-01-27 15:48:10 +00:00
5 changed files with 204 additions and 111 deletions

View File

@@ -1419,7 +1419,6 @@ async def _stream_chat_chunks(
return
except Exception as e:
last_error = e
if _is_retryable_error(e) and retry_count < MAX_RETRIES:
retry_count += 1
# Calculate delay with exponential backoff
@@ -1435,24 +1434,12 @@ async def _stream_chat_chunks(
continue # Retry the stream
else:
# Non-retryable error or max retries exceeded
_log_api_error(
error=e,
session_id=session.session_id if session else None,
message_count=len(messages) if messages else None,
model=model,
retry_count=retry_count,
logger.error(
f"Error in stream (not retrying): {e!s}",
exc_info=True,
)
error_code = None
error_text = str(e)
error_details = _extract_api_error_details(e)
if error_details.get("response_body"):
body = error_details["response_body"]
if isinstance(body, dict) and body.get("error", {}).get(
"message"
):
error_text = body["error"]["message"]
if _is_region_blocked_error(e):
error_code = "MODEL_NOT_AVAILABLE_REGION"
error_text = (
@@ -1469,12 +1456,9 @@ async def _stream_chat_chunks(
# If we exit the retry loop without returning, it means we exhausted retries
if last_error:
_log_api_error(
error=last_error,
session_id=session.session_id if session else None,
message_count=len(messages) if messages else None,
model=model,
retry_count=MAX_RETRIES,
logger.error(
f"Max retries ({MAX_RETRIES}) exceeded. Last error: {last_error!s}",
exc_info=True,
)
yield StreamError(errorText=f"Max retries exceeded: {last_error!s}")
yield StreamFinish()
@@ -1895,7 +1879,6 @@ async def _generate_llm_continuation(
break # Success, exit retry loop
except Exception as e:
last_error = e
if _is_retryable_error(e) and retry_count < MAX_RETRIES:
retry_count += 1
delay = min(
@@ -1909,23 +1892,17 @@ async def _generate_llm_continuation(
await asyncio.sleep(delay)
continue
else:
# Non-retryable error - log details and exit gracefully
_log_api_error(
error=e,
session_id=session_id,
message_count=len(messages) if messages else None,
model=config.model,
retry_count=retry_count,
# Non-retryable error - log and exit gracefully
logger.error(
f"Non-retryable error in LLM continuation: {e!s}",
exc_info=True,
)
return
if last_error:
_log_api_error(
error=last_error,
session_id=session_id,
message_count=len(messages) if messages else None,
model=config.model,
retry_count=MAX_RETRIES,
logger.error(
f"Max retries ({MAX_RETRIES}) exceeded for LLM continuation. "
f"Last error: {last_error!s}"
)
return
@@ -1963,78 +1940,3 @@ async def _generate_llm_continuation(
except Exception as e:
logger.error(f"Failed to generate LLM continuation: {e}", exc_info=True)
def _log_api_error(
error: Exception,
session_id: str | None = None,
message_count: int | None = None,
model: str | None = None,
retry_count: int = 0,
) -> None:
"""Log detailed API error information for debugging."""
details = _extract_api_error_details(error)
details["session_id"] = session_id
details["message_count"] = message_count
details["model"] = model
details["retry_count"] = retry_count
if isinstance(error, RateLimitError):
logger.warning(f"Rate limit error: {details}")
elif isinstance(error, APIConnectionError):
logger.warning(f"API connection error: {details}")
elif isinstance(error, APIStatusError) and error.status_code >= 500:
logger.error(f"API server error (5xx): {details}")
else:
logger.error(f"API error: {details}")
def _extract_api_error_details(error: Exception) -> dict[str, Any]:
"""Extract detailed information from OpenAI/OpenRouter API errors."""
error_msg = str(error)
details: dict[str, Any] = {
"error_type": type(error).__name__,
"error_message": error_msg[:500] + "..." if len(error_msg) > 500 else error_msg,
}
if hasattr(error, "code"):
details["code"] = error.code
if hasattr(error, "param"):
details["param"] = error.param
if isinstance(error, APIStatusError):
details["status_code"] = error.status_code
details["request_id"] = getattr(error, "request_id", None)
if hasattr(error, "body") and error.body:
details["response_body"] = _sanitize_error_body(error.body)
if hasattr(error, "response") and error.response:
headers = error.response.headers
details["openrouter_provider"] = headers.get("x-openrouter-provider")
details["openrouter_model"] = headers.get("x-openrouter-model")
details["retry_after"] = headers.get("retry-after")
details["rate_limit_remaining"] = headers.get("x-ratelimit-remaining")
return details
def _sanitize_error_body(body: Any, max_length: int = 2000) -> dict[str, Any] | None:
"""Extract only safe fields from error response body to avoid logging sensitive data."""
if not isinstance(body, dict):
return None
safe_fields = ("message", "type", "code", "param", "error")
sanitized: dict[str, Any] = {}
for field in safe_fields:
if field in body:
value = body[field]
if field == "error" and isinstance(value, dict):
sanitized[field] = _sanitize_error_body(value, max_length)
elif isinstance(value, str) and len(value) > max_length:
sanitized[field] = value[:max_length] + "...[truncated]"
else:
sanitized[field] = value
return sanitized if sanitized else None

View File

@@ -0,0 +1,77 @@
"""Text encoding block for converting special characters to escape sequences."""
import codecs
from backend.data.block import (
Block,
BlockCategory,
BlockOutput,
BlockSchemaInput,
BlockSchemaOutput,
)
from backend.data.model import SchemaField
class TextEncoderBlock(Block):
"""
Encodes a string by converting special characters into escape sequences.
This block is the inverse of TextDecoderBlock. It takes text containing
special characters (like newlines, tabs, etc.) and converts them into
their escape sequence representations (e.g., newline becomes \\n).
"""
class Input(BlockSchemaInput):
"""Input schema for TextEncoderBlock."""
text: str = SchemaField(
description="A string containing special characters to be encoded",
placeholder="Your text with newlines and quotes to encode",
)
class Output(BlockSchemaOutput):
"""Output schema for TextEncoderBlock."""
encoded_text: str = SchemaField(
description="The encoded text with special characters converted to escape sequences"
)
error: str = SchemaField(description="Error message if encoding fails")
def __init__(self):
super().__init__(
id="5185f32e-4b65-4ecf-8fbb-873f003f09d6",
description="Encodes a string by converting special characters into escape sequences",
categories={BlockCategory.TEXT},
input_schema=TextEncoderBlock.Input,
output_schema=TextEncoderBlock.Output,
test_input={
"text": """Hello
World!
This is a "quoted" string."""
},
test_output=[
(
"encoded_text",
"""Hello\\nWorld!\\nThis is a "quoted" string.""",
)
],
)
async def run(self, input_data: Input, **kwargs) -> BlockOutput:
"""
Encode the input text by converting special characters to escape sequences.
Args:
input_data: The input containing the text to encode.
**kwargs: Additional keyword arguments (unused).
Yields:
The encoded text with escape sequences, or an error message if encoding fails.
"""
try:
encoded_text = codecs.encode(input_data.text, "unicode_escape").decode(
"utf-8"
)
yield "encoded_text", encoded_text
except Exception as e:
yield "error", f"Encoding error: {str(e)}"

View File

@@ -0,0 +1,77 @@
import pytest
from backend.blocks.encoder_block import TextEncoderBlock
@pytest.mark.asyncio
async def test_text_encoder_basic():
"""Test basic encoding of newlines and special characters."""
block = TextEncoderBlock()
result = []
async for output in block.run(TextEncoderBlock.Input(text="Hello\nWorld")):
result.append(output)
assert len(result) == 1
assert result[0][0] == "encoded_text"
assert result[0][1] == "Hello\\nWorld"
@pytest.mark.asyncio
async def test_text_encoder_multiple_escapes():
"""Test encoding of multiple escape sequences."""
block = TextEncoderBlock()
result = []
async for output in block.run(
TextEncoderBlock.Input(text="Line1\nLine2\tTabbed\rCarriage")
):
result.append(output)
assert len(result) == 1
assert result[0][0] == "encoded_text"
assert "\\n" in result[0][1]
assert "\\t" in result[0][1]
assert "\\r" in result[0][1]
@pytest.mark.asyncio
async def test_text_encoder_unicode():
"""Test that unicode characters are handled correctly."""
block = TextEncoderBlock()
result = []
async for output in block.run(TextEncoderBlock.Input(text="Hello 世界\n")):
result.append(output)
assert len(result) == 1
assert result[0][0] == "encoded_text"
# Unicode characters should be escaped as \uXXXX sequences
assert "\\n" in result[0][1]
@pytest.mark.asyncio
async def test_text_encoder_empty_string():
"""Test encoding of an empty string."""
block = TextEncoderBlock()
result = []
async for output in block.run(TextEncoderBlock.Input(text="")):
result.append(output)
assert len(result) == 1
assert result[0][0] == "encoded_text"
assert result[0][1] == ""
@pytest.mark.asyncio
async def test_text_encoder_error_handling():
"""Test that encoding errors are handled gracefully."""
from unittest.mock import patch
block = TextEncoderBlock()
result = []
with patch("codecs.encode", side_effect=Exception("Mocked encoding error")):
async for output in block.run(TextEncoderBlock.Input(text="test")):
result.append(output)
assert len(result) == 1
assert result[0][0] == "error"
assert "Mocked encoding error" in result[0][1]

View File

@@ -193,6 +193,7 @@ Below is a comprehensive list of all available blocks, categorized by their prim
| [Get Current Time](block-integrations/text.md#get-current-time) | This block outputs the current time |
| [Match Text Pattern](block-integrations/text.md#match-text-pattern) | Matches text against a regex pattern and forwards data to positive or negative output based on the match |
| [Text Decoder](block-integrations/text.md#text-decoder) | Decodes a string containing escape sequences into actual text |
| [Text Encoder](block-integrations/text.md#text-encoder) | Encodes a string by converting special characters into escape sequences |
| [Text Replace](block-integrations/text.md#text-replace) | This block is used to replace a text with a new text |
| [Text Split](block-integrations/text.md#text-split) | This block is used to split a text into a list of strings |
| [Word Character Count](block-integrations/text.md#word-character-count) | Counts the number of words and characters in a given text |

View File

@@ -380,6 +380,42 @@ This is useful when working with data from APIs or files where escape sequences
---
## Text Encoder
### What it is
Encodes a string by converting special characters into escape sequences
### How it works
<!-- MANUAL: how_it_works -->
The Text Encoder takes the input string and applies Python's `unicode_escape` encoding (equivalent to `codecs.encode(text, "unicode_escape").decode("utf-8")`) to transform special characters like newlines, tabs, and backslashes into their escaped forms.
The block relies on the input schema to ensure the value is a string; non-string inputs are rejected by validation, and any encoding failures surface as block errors. Non-ASCII characters are emitted as `\uXXXX` sequences, which is useful for ASCII-only payloads.
<!-- END MANUAL -->
### Inputs
| Input | Description | Type | Required |
|-------|-------------|------|----------|
| text | A string containing special characters to be encoded | str | Yes |
### Outputs
| Output | Description | Type |
|--------|-------------|------|
| error | Error message if encoding fails | str |
| encoded_text | The encoded text with special characters converted to escape sequences | str |
### Possible use case
<!-- MANUAL: use_case -->
**JSON Payload Preparation**: Encode multiline or quoted text before embedding it in JSON string fields to ensure proper escaping.
**Config/ENV Generation**: Convert template text into escaped strings for `.env` or YAML values that require special character handling.
**Snapshot Fixtures**: Produce stable escaped strings for golden files or API tests where consistent text representation is needed.
<!-- END MANUAL -->
---
## Text Replace
### What it is