fix(service): prevent retry of HTTP 4xx client errors

## Problem
The service client retry logic was incorrectly retrying HTTP 4xx client errors
(404, 403, 401, etc.), which should never be retried since they represent
permanent client-side issues that won't be resolved by retrying.

## Solution
- **Added HTTPClientError and HTTPServerError exceptions** to categorize HTTP errors
- **Modified retry exclusions** to include HTTPClientError in the exclude_exceptions list
- **Enhanced error handling** to wrap 4xx errors in HTTPClientError (non-retryable) and 5xx errors in HTTPServerError (retryable)
- **Preserved mapped exceptions** - When the server returns a properly formatted RemoteCallError with a mapped exception type (ValueError, etc.), that original exception is re-raised regardless of HTTP status

## Changes Made
- **New Exception Classes**: Added HTTPClientError and HTTPServerError with status code tracking
- **Improved Error Categorization**: HTTP errors are now properly categorized by status code:
  - 4xx → HTTPClientError (excluded from retries)
  - 5xx → HTTPServerError (can be retried)
  - Mapped exceptions (ValueError, etc.) → Original exception type preserved
- **Clean Logic Flow**: Simplified exception handling logic to check for mapped exceptions first, then fall back to HTTP status categorization
- **Comprehensive Tests**: Added TestHTTPErrorRetryBehavior with coverage for various status codes and exception mapping

## Benefits
- **No more wasted retry attempts** on permanent client errors (404, 403, etc.)
- **Faster error handling** for client errors since they fail immediately
- **Preserved compatibility** with existing exception mapping system
- **Better resource utilization** by avoiding unnecessary retry delays
- **Cleaner logs** with fewer spurious retry warnings

## Files Modified
- `backend/util/service.py`: Core retry logic and error handling improvements
- `backend/util/service_test.py`: Comprehensive test coverage for HTTP error retry behavior

🤖 Generated with [Claude Code](https://claude.ai/code)

Co-Authored-By: Claude <noreply@anthropic.com>
This commit is contained in:
Zamil Majdy
2025-08-11 11:52:51 +07:00
parent 004011726d
commit 10a402a766
2 changed files with 168 additions and 5 deletions

View File

@@ -111,6 +111,22 @@ class UnhealthyServiceError(ValueError):
return self.message
class HTTPClientError(Exception):
"""Exception for HTTP client errors (4xx status codes) that should not be retried."""
def __init__(self, status_code: int, message: str):
self.status_code = status_code
super().__init__(f"HTTP {status_code}: {message}")
class HTTPServerError(Exception):
"""Exception for HTTP server errors (5xx status codes) that can be retried."""
def __init__(self, status_code: int, message: str):
self.status_code = status_code
super().__init__(f"HTTP {status_code}: {message}")
EXCEPTION_MAPPING = {
e.__name__: e
for e in [
@@ -119,6 +135,8 @@ EXCEPTION_MAPPING = {
TimeoutError,
ConnectionError,
UnhealthyServiceError,
HTTPClientError,
HTTPServerError,
*[
ErrorType
for _, ErrorType in inspect.getmembers(exceptions)
@@ -313,6 +331,7 @@ def get_service_client(
AttributeError, # Missing attributes
asyncio.CancelledError, # Task was cancelled
concurrent.futures.CancelledError, # Future was cancelled
HTTPClientError, # HTTP 4xx client errors - don't retry
),
)(fn)
@@ -390,11 +409,31 @@ def get_service_client(
self._connection_failure_count = 0
return response.json()
except httpx.HTTPStatusError as e:
error = RemoteCallError.model_validate(e.response.json())
# DEBUG HELP: if you made a custom exception, make sure you override self.args to be how to make your exception
raise EXCEPTION_MAPPING.get(error.type, Exception)(
*(error.args or [str(e)])
)
status_code = e.response.status_code
# Try to parse the error response as RemoteCallError for mapped exceptions
error_response = None
try:
error_response = RemoteCallError.model_validate(e.response.json())
except Exception:
pass
# If we successfully parsed a mapped exception type, re-raise it
if error_response and error_response.type in EXCEPTION_MAPPING:
exception_class = EXCEPTION_MAPPING[error_response.type]
args = error_response.args or [str(e)]
raise exception_class(*args)
# Otherwise categorize by HTTP status code
if 400 <= status_code < 500:
# Client errors (4xx) - wrap to prevent retries
raise HTTPClientError(status_code, str(e))
elif 500 <= status_code < 600:
# Server errors (5xx) - wrap but allow retries
raise HTTPServerError(status_code, str(e))
else:
# Other status codes (1xx, 2xx, 3xx) - re-raise original error
raise e
@_maybe_retry
def _call_method_sync(self, method_name: str, **kwargs: Any) -> Any:

View File

@@ -8,6 +8,8 @@ import pytest
from backend.util.service import (
AppService,
AppServiceClient,
HTTPClientError,
HTTPServerError,
endpoint_to_async,
expose,
get_service_client,
@@ -366,3 +368,125 @@ def test_service_no_retry_when_disabled(server):
# This should fail immediately without retry
with pytest.raises(RuntimeError, match="Intended error for testing"):
client.always_failing_add(5, 3)
class TestHTTPErrorRetryBehavior:
"""Test that HTTP client errors (4xx) are not retried but server errors (5xx) can be."""
# Note: These tests access private methods for testing internal behavior
# Type ignore comments are used to suppress warnings about accessing private methods
def test_http_client_error_not_retried(self):
"""Test that 4xx errors are wrapped as HTTPClientError and not retried."""
# Create a mock response with 404 status
mock_response = Mock()
mock_response.status_code = 404
mock_response.json.return_value = {"message": "Not found"}
mock_response.raise_for_status.side_effect = httpx.HTTPStatusError(
"404 Not Found", request=Mock(), response=mock_response
)
# Create client
client = get_service_client(ServiceTestClient)
dynamic_client = client
# Test the _handle_call_method_response directly
with pytest.raises(HTTPClientError) as exc_info:
dynamic_client._handle_call_method_response( # type: ignore[attr-defined]
response=mock_response, method_name="test_method"
)
assert exc_info.value.status_code == 404
assert "404" in str(exc_info.value)
def test_http_server_error_can_be_retried(self):
"""Test that 5xx errors are wrapped as HTTPServerError and can be retried."""
# Create a mock response with 500 status
mock_response = Mock()
mock_response.status_code = 500
mock_response.json.return_value = {"message": "Internal server error"}
mock_response.raise_for_status.side_effect = httpx.HTTPStatusError(
"500 Internal Server Error", request=Mock(), response=mock_response
)
# Create client
client = get_service_client(ServiceTestClient)
dynamic_client = client
# Test the _handle_call_method_response directly
with pytest.raises(HTTPServerError) as exc_info:
dynamic_client._handle_call_method_response( # type: ignore[attr-defined]
response=mock_response, method_name="test_method"
)
assert exc_info.value.status_code == 500
assert "500" in str(exc_info.value)
def test_mapped_exception_preserves_original_type(self):
"""Test that mapped exceptions preserve their original type regardless of HTTP status."""
# Create a mock response with ValueError in the remote call error
mock_response = Mock()
mock_response.status_code = 400
mock_response.json.return_value = {
"type": "ValueError",
"args": ["Invalid parameter value"],
}
mock_response.raise_for_status.side_effect = httpx.HTTPStatusError(
"400 Bad Request", request=Mock(), response=mock_response
)
# Create client
client = get_service_client(ServiceTestClient)
dynamic_client = client
# Test the _handle_call_method_response directly
with pytest.raises(ValueError) as exc_info:
dynamic_client._handle_call_method_response( # type: ignore[attr-defined]
response=mock_response, method_name="test_method"
)
assert "Invalid parameter value" in str(exc_info.value)
def test_client_error_status_codes_coverage(self):
"""Test that various 4xx status codes are all wrapped as HTTPClientError."""
client_error_codes = [400, 401, 403, 404, 405, 409, 422, 429]
for status_code in client_error_codes:
mock_response = Mock()
mock_response.status_code = status_code
mock_response.json.return_value = {"message": f"Error {status_code}"}
mock_response.raise_for_status.side_effect = httpx.HTTPStatusError(
f"{status_code} Error", request=Mock(), response=mock_response
)
client = get_service_client(ServiceTestClient)
dynamic_client = client
with pytest.raises(HTTPClientError) as exc_info:
dynamic_client._handle_call_method_response( # type: ignore
response=mock_response, method_name="test_method"
)
assert exc_info.value.status_code == status_code
def test_server_error_status_codes_coverage(self):
"""Test that various 5xx status codes are all wrapped as HTTPServerError."""
server_error_codes = [500, 501, 502, 503, 504, 505]
for status_code in server_error_codes:
mock_response = Mock()
mock_response.status_code = status_code
mock_response.json.return_value = {"message": f"Error {status_code}"}
mock_response.raise_for_status.side_effect = httpx.HTTPStatusError(
f"{status_code} Error", request=Mock(), response=mock_response
)
client = get_service_client(ServiceTestClient)
dynamic_client = client
with pytest.raises(HTTPServerError) as exc_info:
dynamic_client._handle_call_method_response( # type: ignore
response=mock_response, method_name="test_method"
)
assert exc_info.value.status_code == status_code