fix(backend): retry transient HTTP errors during polling, validate agent_json responses

This commit is contained in:
Zamil Majdy
2026-02-25 15:44:11 +07:00
parent 0bc098acb1
commit 9b3e25d98e
2 changed files with 111 additions and 3 deletions

View File

@@ -191,6 +191,21 @@ async def _submit_and_poll(
return _create_error_response(
"Agent Generator job not found or expired", "job_not_found"
)
status_code = e.response.status_code
if status_code in {429, 503, 504, 408}:
consecutive_errors += 1
logger.warning(
f"Transient HTTP {status_code} polling job {job_id} "
f"({consecutive_errors}/{MAX_CONSECUTIVE_POLL_ERRORS}): {e}"
)
if consecutive_errors >= MAX_CONSECUTIVE_POLL_ERRORS:
error_type, error_msg = _classify_http_error(e)
logger.error(
f"Giving up on job {job_id} after "
f"{MAX_CONSECUTIVE_POLL_ERRORS} consecutive poll errors: {error_msg}"
)
return _create_error_response(error_msg, error_type)
continue
error_type, error_msg = _classify_http_error(e)
logger.error(f"Poll error for job {job_id}: {error_msg}")
return _create_error_response(error_msg, error_type)
@@ -321,7 +336,12 @@ async def generate_agent_external(
return result
# The job result contains {"agent_json": {...}, "success": True, ...}
return result.get("agent_json")
agent_json = result.get("agent_json")
if not isinstance(agent_json, dict):
return _create_error_response(
"Agent Generator returned no agent_json in result", "invalid_response"
)
return agent_json
async def generate_agent_patch_external(
@@ -362,7 +382,12 @@ async def generate_agent_patch_external(
"questions": result.get("questions", []),
}
return result.get("agent_json")
agent_json = result.get("agent_json")
if not isinstance(agent_json, dict):
return _create_error_response(
"Agent Generator returned no agent_json in result", "invalid_response"
)
return agent_json
async def customize_template_external(
@@ -407,7 +432,12 @@ async def customize_template_external(
"questions": result.get("questions", []),
}
return result.get("agent_json")
agent_json = result.get("agent_json")
if not isinstance(agent_json, dict):
return _create_error_response(
"Agent Generator returned no agent_json in result", "invalid_response"
)
return agent_json
# ---------------------------------------------------------------------------

View File

@@ -215,6 +215,68 @@ class TestSubmitAndPoll:
assert result["type"] == "error"
assert result["error_type"] == "job_not_found"
@pytest.mark.asyncio
async def test_poll_retries_on_transient_http_status(self):
"""Test that transient HTTP status codes (429, 503, etc.) are retried."""
submit_resp = MagicMock()
submit_resp.json.return_value = {"job_id": "job-transient"}
submit_resp.raise_for_status = MagicMock()
mock_429_response = MagicMock()
mock_429_response.status_code = 429
ok_poll_resp = MagicMock()
ok_poll_resp.json.return_value = {
"job_id": "job-transient",
"status": "completed",
"result": {"data": "recovered"},
}
ok_poll_resp.raise_for_status = MagicMock()
mock_client = AsyncMock()
mock_client.post.return_value = submit_resp
mock_client.get.side_effect = [
httpx.HTTPStatusError(
"Too Many Requests", request=MagicMock(), response=mock_429_response
),
ok_poll_resp,
]
with (
patch.object(service, "_get_client", return_value=mock_client),
patch("asyncio.sleep", new_callable=AsyncMock),
):
result = await service._submit_and_poll("/api/test", {})
assert result == {"data": "recovered"}
assert mock_client.get.call_count == 2
@pytest.mark.asyncio
async def test_poll_does_not_retry_non_transient_http_status(self):
"""Test that non-transient HTTP status codes (e.g. 500) fail immediately."""
submit_resp = MagicMock()
submit_resp.json.return_value = {"job_id": "job-500"}
submit_resp.raise_for_status = MagicMock()
mock_500_response = MagicMock()
mock_500_response.status_code = 500
mock_client = AsyncMock()
mock_client.post.return_value = submit_resp
mock_client.get.side_effect = httpx.HTTPStatusError(
"Internal Server Error", request=MagicMock(), response=mock_500_response
)
with (
patch.object(service, "_get_client", return_value=mock_client),
patch("asyncio.sleep", new_callable=AsyncMock),
):
result = await service._submit_and_poll("/api/test", {})
assert result["type"] == "error"
assert result["error_type"] == "http_error"
assert mock_client.get.call_count == 1
@pytest.mark.asyncio
async def test_poll_timeout(self):
"""Test that polling times out after MAX_POLL_TIME_SECONDS."""
@@ -457,6 +519,22 @@ class TestGenerateAgentExternal:
assert result.get("type") == "error"
assert result.get("error_type") == "connection_error"
@pytest.mark.asyncio
async def test_generate_agent_missing_agent_json(self):
"""Test that missing agent_json in result returns an error."""
with (
patch.object(service, "_is_dummy_mode", return_value=False),
patch.object(
service, "_submit_and_poll", new_callable=AsyncMock
) as mock_poll,
):
mock_poll.return_value = {"success": True}
result = await service.generate_agent_external({"steps": ["Step 1"]})
assert result is not None
assert result.get("type") == "error"
assert result.get("error_type") == "invalid_response"
class TestGenerateAgentPatchExternal:
"""Test generate_agent_patch_external function."""