Compare commits

...

10 Commits

3 changed files with 36 additions and 5 deletions

View File

@@ -149,9 +149,8 @@ export const chatSlice = createSlice({
} else if (observationID === "run_ipython") {
// For IPython, we consider it successful if there's no error message
const ipythonObs = observation.payload as IPythonObservation;
causeMessage.success = !ipythonObs.message
.toLowerCase()
.includes("error");
// Check for error in the message field which contains error information
causeMessage.success = !ipythonObs.error;
}
if (observationID === "run" || observationID === "run_ipython") {

View File

@@ -40,7 +40,13 @@ class IPythonRunCellObservation(Observation):
@property
def error(self) -> bool:
return False # IPython cells do not return exit codes
# Check for common error indicators in IPython output
error_indicators = [
'ERROR:',
'Error:',
'Exception:',
]
return any(indicator in self.content for indicator in error_indicators)
@property
def message(self) -> str:
@@ -48,7 +54,7 @@ class IPythonRunCellObservation(Observation):
@property
def success(self) -> bool:
return True # IPython cells are always considered successful
return not self.error
def __str__(self) -> str:
return f'**IPythonRunCellObservation**\n{self.content}'

View File

@@ -40,6 +40,32 @@ def serialization_deserialization(
# Additional tests for various observation subclasses can be included here
def test_ipython_error_detection():
from openhands.events.observation import IPythonRunCellObservation
# Test error detection for various error patterns
error_cases = [
'ERROR: Something went wrong',
'Error: Invalid syntax',
'Exception: Division by zero',
]
for error_content in error_cases:
obs = IPythonRunCellObservation(content=error_content, code='print("test")')
serialized = event_to_dict(obs)
assert (
serialized['success'] is False
), f'Failed to detect error in: {error_content}'
assert obs.error is True, f'Failed to detect error in: {error_content}'
# Test success case
obs = IPythonRunCellObservation(
content='Hello World!', code='print("Hello World!")'
)
serialized = event_to_dict(obs)
assert serialized['success'] is True, 'Failed to detect success'
assert obs.error is False, 'Failed to detect success'
def test_success_field_serialization():
# Test success=True
obs = CmdOutputObservation(