feat(agent, forge): Markdown-formatted history -> message history (#7228)

- Implement message based history in `ActionHistoryComponent` - Make non-summarized message count configurable (`ActionHistoryComponent.full_message_count`) - Run `ActionHistoryComponent` after `SystemComponent` so that history messages are last in the prompt - Omit final instruction message if prompt already contains assistant messages - Filter `raw_message` from `ActionProposal.schema()` --------- Co-authored-by: Krzysztof Czerwinski <kpczerwinski@gmail.com>
2026-04-08 03:00:28 -04:00 · 2024-07-02 03:47:55 +02:00
parent 2fa4fd23af
commit 97e4cceb94
6 changed files with 118 additions and 18 deletions
--- a/autogpt/autogpt/agents/agent.py
+++ b/autogpt/autogpt/agents/agent.py
@@ -111,14 +111,18 @@ class Agent(BaseAgent[OneShotAgentActionProposal], Configurable[AgentSettings]):

        # Components
        self.system = SystemComponent()
-        self.history = ActionHistoryComponent(
-            settings.history,
-            lambda x: self.llm_provider.count_tokens(x, self.llm.name),
-            llm_provider,
-            ActionHistoryConfiguration(
-                model_name=app_config.fast_llm, max_tokens=self.send_token_limit
-            ),
-        ).run_after(WatchdogComponent)
+        self.history = (
+            ActionHistoryComponent(
+                settings.history,
+                lambda x: self.llm_provider.count_tokens(x, self.llm.name),
+                llm_provider,
+                ActionHistoryConfiguration(
+                    model_name=app_config.fast_llm, max_tokens=self.send_token_limit
+                ),
+            )
+            .run_after(WatchdogComponent)
+            .run_after(SystemComponent)
+        )
        if not app_config.noninteractive_mode:
            self.user_interaction = UserInteractionComponent()
        self.file_manager = FileManagerComponent(file_storage, settings)
--- a/autogpt/autogpt/agents/prompt_strategies/one_shot.py
+++ b/autogpt/autogpt/agents/prompt_strategies/one_shot.py
@@ -275,4 +275,5 @@ class OneShotAgentPromptStrategy(PromptStrategy):
            assistant_reply_dict["use_tool"] = response.tool_calls[0].function

        parsed_response = OneShotAgentActionProposal.parse_obj(assistant_reply_dict)
+        parsed_response.raw_message = response.copy()
        return parsed_response
--- a/docs/content/forge/components/built-in-components.md
+++ b/docs/content/forge/components/built-in-components.md
@@ -81,6 +81,7 @@ Keeps track of agent's actions and their outcomes. Provides their summary to the
 | `model_name`           | Name of the llm model used to compress the history      | `ModelName` | `"gpt-3.5-turbo"`  |
 | `max_tokens`           | Maximum number of tokens to use for the history summary | `int`       | `1024`             |
 | `spacy_language_model` | Language model used for summary chunking using spacy    | `str`       | `"en_core_web_sm"` |
+| `full_message_count`   | Number of cycles to include unsummarized in the prompt  | `int`       | `4`                |

 **MessageProvider**

--- a/forge/forge/agent/forge_agent.py
+++ b/forge/forge/agent/forge_agent.py
@@ -24,7 +24,7 @@ from forge.config.ai_profile import AIProfile
 from forge.file_storage.base import FileStorage
 from forge.llm.prompting.schema import ChatPrompt
 from forge.llm.prompting.utils import dump_prompt
-from forge.llm.providers.schema import AssistantFunctionCall
+from forge.llm.providers.schema import AssistantChatMessage, AssistantFunctionCall
 from forge.llm.providers.utils import function_specs_from_commands
 from forge.models.action import (
    ActionErrorResult,
@@ -178,6 +178,9 @@ class ForgeAgent(ProtocolAgent, BaseAgent):
            use_tool=AssistantFunctionCall(
                name="finish", arguments={"reason": "Unimplemented logic"}
            ),
+            raw_message=AssistantChatMessage(
+                content="finish(reason='Unimplemented logic')"
+            ),
        )

        self.config.cycle_count += 1
--- a/forge/forge/components/action_history/action_history.py
+++ b/forge/forge/components/action_history/action_history.py
@@ -10,6 +10,7 @@ from forge.llm.prompting.utils import indent
 from forge.llm.providers import ChatMessage, MultiProvider
 from forge.llm.providers.multi import ModelName
 from forge.llm.providers.openai import OpenAIModelName
+from forge.llm.providers.schema import ToolResultMessage

 from .model import ActionResult, AnyProposal, Episode, EpisodicActionHistory

@@ -21,6 +22,8 @@ class ActionHistoryConfiguration(BaseModel):
    """Maximum number of tokens to use up with generated history messages"""
    spacy_language_model: str = "en_core_web_sm"
    """Language model used for summary chunking using spacy"""
+    full_message_count: int = 4
+    """Number of latest non-summarized messages to include in the history"""


 class ActionHistoryComponent(
@@ -46,12 +49,47 @@ class ActionHistoryComponent(
        self.llm_provider = llm_provider

    def get_messages(self) -> Iterator[ChatMessage]:
-        if progress := self._compile_progress(
-            self.event_history.episodes,
-            self.config.max_tokens,
-            self.count_tokens,
-        ):
-            yield ChatMessage.system(f"## Progress on your Task so far\n\n{progress}")
+        messages: list[ChatMessage] = []
+        step_summaries: list[str] = []
+        tokens: int = 0
+        n_episodes = len(self.event_history.episodes)
+
+        # Include a summary for all except a few latest steps
+        for i, episode in enumerate(reversed(self.event_history.episodes)):
+            # Use full format for a few steps, summary or format for older steps
+            if i < self.config.full_message_count:
+                messages.insert(0, episode.action.raw_message)
+                tokens += self.count_tokens(str(messages[0]))  # HACK
+                if episode.result:
+                    result_message = self._make_result_message(episode, episode.result)
+                    messages.insert(1, result_message)
+                    tokens += self.count_tokens(str(result_message))  # HACK
+                continue
+            elif episode.summary is None:
+                step_content = indent(episode.format(), 2).strip()
+            else:
+                step_content = episode.summary
+
+            step = f"* Step {n_episodes - i}: {step_content}"
+
+            if self.config.max_tokens and self.count_tokens:
+                step_tokens = self.count_tokens(step)
+                if tokens + step_tokens > self.config.max_tokens:
+                    break
+                tokens += step_tokens
+
+            step_summaries.insert(0, step)
+
+        if step_summaries:
+            step_summaries_fmt = "\n\n".join(step_summaries)
+            yield ChatMessage.system(
+                f"## Progress on your Task so far\n"
+                "Here is a summary of the steps that you have executed so far, "
+                "use this as your consideration for determining the next action!\n"
+                f"{step_summaries_fmt}"
+            )
+
+        yield from messages

    def after_parse(self, result: AnyProposal) -> None:
        self.event_history.register_action(result)
@@ -62,6 +100,41 @@ class ActionHistoryComponent(
            self.llm_provider, self.config.model_name, self.config.spacy_language_model
        )

+    @staticmethod
+    def _make_result_message(episode: Episode, result: ActionResult) -> ChatMessage:
+        if result.status == "success":
+            return (
+                ToolResultMessage(
+                    content=str(result.outputs),
+                    tool_call_id=episode.action.raw_message.tool_calls[0].id,
+                )
+                if episode.action.raw_message.tool_calls
+                else ChatMessage.user(
+                    f"{episode.action.use_tool.name} returned: "
+                    + (
+                        f"```\n{result.outputs}\n```"
+                        if "\n" in str(result.outputs)
+                        else f"`{result.outputs}`"
+                    )
+                )
+            )
+        elif result.status == "error":
+            return (
+                ToolResultMessage(
+                    content=f"{result.reason}\n\n{result.error or ''}".strip(),
+                    is_error=True,
+                    tool_call_id=episode.action.raw_message.tool_calls[0].id,
+                )
+                if episode.action.raw_message.tool_calls
+                else ChatMessage.user(
+                    f"{episode.action.use_tool.name} raised an error: ```\n"
+                    f"{result.reason}\n"
+                    "```"
+                )
+            )
+        else:
+            return ChatMessage.user(result.feedback)
+
    def _compile_progress(
        self,
        episode_history: list[Episode[AnyProposal]],
@@ -76,8 +149,8 @@ class ActionHistoryComponent(
        n_episodes = len(episode_history)

        for i, episode in enumerate(reversed(episode_history)):
-            # Use full format for the latest 4 steps, summary or format for older steps
-            if i < 4 or episode.summary is None:
+            # Use full format for a few latest steps, summary or format for older steps
+            if i < self.config.full_message_count or episode.summary is None:
                step_content = indent(episode.format(), 2).strip()
            else:
                step_content = episode.summary
--- a/forge/forge/models/action.py
+++ b/forge/forge/models/action.py
@@ -3,8 +3,9 @@ from __future__ import annotations
 from typing import Any, Literal, Optional, TypeVar

 from pydantic import BaseModel
+from pydantic.schema import default_ref_template

-from forge.llm.providers.schema import AssistantFunctionCall
+from forge.llm.providers.schema import AssistantChatMessage, AssistantFunctionCall

 from .utils import ModelWithSummary

@@ -13,6 +14,23 @@ class ActionProposal(BaseModel):
    thoughts: str | ModelWithSummary
    use_tool: AssistantFunctionCall

+    raw_message: AssistantChatMessage = None  # type: ignore
+    """
+    The message from which the action proposal was parsed. To be set by the parser.
+    """
+
+    @classmethod
+    def schema(
+        cls, by_alias: bool = True, ref_template: str = default_ref_template, **kwargs
+    ):
+        """
+        The schema for this ActionProposal model, excluding the 'raw_message' property.
+        """
+        schema = super().schema(by_alias=by_alias, ref_template=ref_template, **kwargs)
+        if "raw_message" in schema["properties"]:  # must check because schema is cached
+            del schema["properties"]["raw_message"]
+        return schema
+

 AnyProposal = TypeVar("AnyProposal", bound=ActionProposal)