fix(classic): handle parallel tool calls in action history

When prompts encourage parallel tool execution and the LLM makes multiple tool calls simultaneously, the Anthropic API requires a tool_result message for EACH tool_use. Previously, we only created one tool result for the first tool call, causing "tool_use ids were found without tool_result blocks" errors. This fix: - Adds _make_result_messages() to create results for ALL tool calls - Maps tool names to their outputs from parallel execution results - Handles errors per-tool from the _errors list - Falls back gracefully when results are missing Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
2026-01-28 00:18:25 -05:00 · 2026-01-20 23:18:15 -06:00
parent 326554d89a
commit a67d475a69
1 changed files with 100 additions and 4 deletions
--- a/classic/forge/forge/components/action_history/action_history.py
+++ b/classic/forge/forge/components/action_history/action_history.py
@@ -66,9 +66,15 @@ class ActionHistoryComponent(
                messages.insert(0, episode.action.raw_message)
                tokens += self.count_tokens(str(messages[0]))  # HACK
                if episode.result:
-                    result_message = self._make_result_message(episode, episode.result)
-                    messages.insert(1, result_message)
-                    tokens += self.count_tokens(str(result_message))  # HACK
+                    # Create result messages for ALL tool calls
+                    # (required by Anthropic API)
+                    result_messages = self._make_result_messages(
+                        episode, episode.result
+                    )
+                    # Insert in reverse order so they appear in correct order
+                    for j, result_message in enumerate(result_messages):
+                        messages.insert(1 + j, result_message)
+                        tokens += self.count_tokens(str(result_message))  # HACK
                continue
            elif episode.summary is None:
                step_content = indent(episode.format(), 2).strip()
@@ -130,7 +136,97 @@ class ActionHistoryComponent(
            )

    @staticmethod
-    def _make_result_message(episode: Episode, result: ActionResult) -> ChatMessage:
+    def _make_result_messages(
+        episode: Episode, result: ActionResult
+    ) -> list[ChatMessage]:
+        """Create result messages for all tool calls in an episode.
+
+        When multiple tools are called in parallel, we need to create a
+        ToolResultMessage for EACH tool_call to satisfy API requirements
+        (both Anthropic and OpenAI require tool_use to be followed by tool_result).
+
+        Args:
+            episode: The episode containing the action and its raw message
+            result: The result of executing the action(s)
+
+        Returns:
+            List of ChatMessage objects (ToolResultMessage or user message)
+        """
+        tool_calls = (
+            episode.action.raw_message.tool_calls
+            if episode.action.raw_message.tool_calls
+            else []
+        )
+
+        # Single tool call or no tool calls - use simple logic
+        if len(tool_calls) <= 1:
+            return [ActionHistoryComponent._make_single_result_message(episode, result)]
+
+        # Multiple tool calls - create a result for each
+        messages: list[ChatMessage] = []
+
+        # Get outputs dict if parallel execution returned a dict
+        outputs_dict: dict = {}
+        errors_list: list[str] = []
+        if result.status == "success" and isinstance(result.outputs, dict):
+            outputs_dict = result.outputs
+            errors_list = outputs_dict.pop("_errors", [])
+        elif result.status == "error":
+            # All tools failed - create error results for all
+            for tool_call in tool_calls:
+                messages.append(
+                    ToolResultMessage(
+                        content=f"{result.reason}\n\n{result.error or ''}".strip(),
+                        is_error=True,
+                        tool_call_id=tool_call.id,
+                    )
+                )
+            return messages
+
+        # Create result message for each tool call
+        for tool_call in tool_calls:
+            tool_name = tool_call.function.name
+            tool_id = tool_call.id
+
+            # Check if this tool's result is in the outputs
+            if tool_name in outputs_dict:
+                output = outputs_dict[tool_name]
+                messages.append(
+                    ToolResultMessage(
+                        content=str(output),
+                        tool_call_id=tool_id,
+                    )
+                )
+            else:
+                # Check if there's an error for this tool
+                error_msg = next(
+                    (e for e in errors_list if e.startswith(f"{tool_name}:")), None
+                )
+                if error_msg:
+                    messages.append(
+                        ToolResultMessage(
+                            content=error_msg,
+                            is_error=True,
+                            tool_call_id=tool_id,
+                        )
+                    )
+                else:
+                    # Fallback - tool not found in results
+                    messages.append(
+                        ToolResultMessage(
+                            content="No result returned",
+                            is_error=True,
+                            tool_call_id=tool_id,
+                        )
+                    )
+
+        return messages
+
+    @staticmethod
+    def _make_single_result_message(
+        episode: Episode, result: ActionResult
+    ) -> ChatMessage:
+        """Create a result message for a single tool call."""
        if result.status == "success":
            return (
                ToolResultMessage(