Add task tracking tool for long-horizon tasks (#10166)

Co-authored-by: openhands <openhands@all-hands.dev> Co-authored-by: Xingyao Wang <xingyao@all-hands.dev>
2026-01-09 14:57:59 -05:00 · 2025-08-16 20:05:59 +07:00
parent 0ec6ed20cb
commit fe486ad1f1
32 changed files with 1017 additions and 39 deletions
--- a/frontend/tests/components/features/chat/task-tracking-observation-content.test.tsx
+++ b/frontend/tests/components/features/chat/task-tracking-observation-content.test.tsx
@@ -0,0 +1,135 @@
+import { render, screen } from "@testing-library/react";
+import { describe, it, expect, vi } from "vitest";
+import { TaskTrackingObservationContent } from "#/components/features/chat/task-tracking-observation-content";
+import { TaskTrackingObservation } from "#/types/core/observations";
+
+// Mock the translation hook
+vi.mock("react-i18next", () => ({
+  useTranslation: () => ({
+    t: (key: string) => {
+      const translations: Record<string, string> = {
+        "TASK_TRACKING_OBSERVATION$TASK_LIST": "Task List",
+        "TASK_TRACKING_OBSERVATION$TASK_ID": "ID",
+        "TASK_TRACKING_OBSERVATION$TASK_NOTES": "Notes",
+        "TASK_TRACKING_OBSERVATION$RESULT": "Result",
+      };
+      return translations[key] || key;
+    },
+  }),
+}));
+
+describe("TaskTrackingObservationContent", () => {
+  const mockEvent: TaskTrackingObservation = {
+    id: 123,
+    timestamp: "2024-01-01T00:00:00Z",
+    source: "agent",
+    observation: "task_tracking",
+    content: "Task tracking operation completed successfully",
+    cause: 122,
+    message: "Task tracking operation completed successfully",
+    extras: {
+      command: "plan",
+      task_list: [
+        {
+          id: "task-1",
+          title: "Implement feature A",
+          status: "todo",
+          notes: "This is a test task",
+        },
+        {
+          id: "task-2",
+          title: "Fix bug B",
+          status: "in_progress",
+        },
+        {
+          id: "task-3",
+          title: "Deploy to production",
+          status: "done",
+          notes: "Completed successfully",
+        },
+      ],
+    },
+  };
+
+  it("does not render command section", () => {
+    render(<TaskTrackingObservationContent event={mockEvent} />);
+
+    expect(screen.queryByText("Command")).not.toBeInTheDocument();
+    expect(screen.queryByText("plan")).not.toBeInTheDocument();
+  });
+
+  it("renders task list when command is 'plan' and tasks exist", () => {
+    render(<TaskTrackingObservationContent event={mockEvent} />);
+
+    expect(screen.getByText("Task List (3 items)")).toBeInTheDocument();
+    expect(screen.getByText("Implement feature A")).toBeInTheDocument();
+    expect(screen.getByText("Fix bug B")).toBeInTheDocument();
+    expect(screen.getByText("Deploy to production")).toBeInTheDocument();
+  });
+
+  it("displays correct status icons and badges", () => {
+    render(<TaskTrackingObservationContent event={mockEvent} />);
+
+    // Check for status text (the icons are emojis)
+    expect(screen.getByText("todo")).toBeInTheDocument();
+    expect(screen.getByText("in progress")).toBeInTheDocument();
+    expect(screen.getByText("done")).toBeInTheDocument();
+  });
+
+  it("displays task IDs and notes", () => {
+    render(<TaskTrackingObservationContent event={mockEvent} />);
+
+    expect(screen.getByText("ID: task-1")).toBeInTheDocument();
+    expect(screen.getByText("ID: task-2")).toBeInTheDocument();
+    expect(screen.getByText("ID: task-3")).toBeInTheDocument();
+
+    expect(screen.getByText("Notes: This is a test task")).toBeInTheDocument();
+    expect(screen.getByText("Notes: Completed successfully")).toBeInTheDocument();
+  });
+
+  it("renders result section when content exists", () => {
+    render(<TaskTrackingObservationContent event={mockEvent} />);
+
+    expect(screen.getByText("Result")).toBeInTheDocument();
+    expect(screen.getByText("Task tracking operation completed successfully")).toBeInTheDocument();
+  });
+
+  it("does not render task list when command is not 'plan'", () => {
+    const eventWithoutPlan = {
+      ...mockEvent,
+      extras: {
+        ...mockEvent.extras,
+        command: "view",
+      },
+    };
+
+    render(<TaskTrackingObservationContent event={eventWithoutPlan} />);
+
+    expect(screen.queryByText("Task List")).not.toBeInTheDocument();
+  });
+
+  it("does not render task list when task list is empty", () => {
+    const eventWithEmptyTasks = {
+      ...mockEvent,
+      extras: {
+        ...mockEvent.extras,
+        task_list: [],
+      },
+    };
+
+    render(<TaskTrackingObservationContent event={eventWithEmptyTasks} />);
+
+    expect(screen.queryByText("Task List")).not.toBeInTheDocument();
+  });
+
+  it("does not render result section when content is empty", () => {
+    const eventWithoutContent = {
+      ...mockEvent,
+      content: "",
+    };
+
+    render(<TaskTrackingObservationContent event={eventWithoutContent} />);
+
+    expect(screen.queryByText("Result")).not.toBeInTheDocument();
+  });
+});
--- a/frontend/src/components/features/chat/event-content-helpers/get-action-content.ts
+++ b/frontend/src/components/features/chat/event-content-helpers/get-action-content.ts
@@ -9,6 +9,7 @@ import {
  ThinkAction,
  OpenHandsAction,
  FinishAction,
+  TaskTrackingAction,
 } from "#/types/core/actions";
 import { getDefaultEventContent, MAX_CONTENT_LENGTH } from "./shared";
 import i18n from "#/i18n";
@@ -79,6 +80,38 @@ const getThinkActionContent = (event: ThinkAction): string =>

 const getFinishActionContent = (event: FinishAction): string =>
  event.args.final_thought.trim();
+
+const getTaskTrackingActionContent = (event: TaskTrackingAction): string => {
+  let content = `**Command:** \`${event.args.command}\``;
+
+  if (
+    event.args.command === "plan" &&
+    event.args.task_list &&
+    event.args.task_list.length > 0
+  ) {
+    content += `\n\n**Task List (${event.args.task_list.length} ${event.args.task_list.length === 1 ? "item" : "items"}):**\n`;
+
+    event.args.task_list.forEach((task, index) => {
+      const statusIcon =
+        {
+          todo: "⏳",
+          in_progress: "🔄",
+          done: "✅",
+        }[task.status] || "❓";
+
+      content += `\n${index + 1}. ${statusIcon} **[${task.status.toUpperCase().replace("_", " ")}]** ${task.title}`;
+      content += `\n   *ID: ${task.id}*`;
+      if (task.notes) {
+        content += `\n   *Notes: ${task.notes}*`;
+      }
+    });
+  } else if (event.args.command === "plan") {
+    content += "\n\n**Task List:** Empty";
+  }
+
+  return content;
+};
+
 const getNoContentActionContent = (): string => "";

 export const getActionContent = (event: OpenHandsAction): string => {
@@ -102,6 +135,8 @@ export const getActionContent = (event: OpenHandsAction): string => {
      return getThinkActionContent(event);
    case "finish":
      return getFinishActionContent(event);
+    case "task_tracking":
+      return getTaskTrackingActionContent(event);
    default:
      return getDefaultEventContent(event);
  }
--- a/frontend/src/components/features/chat/event-content-helpers/get-observation-content.ts
+++ b/frontend/src/components/features/chat/event-content-helpers/get-observation-content.ts
@@ -6,6 +6,7 @@ import {
  BrowseObservation,
  OpenHandsObservation,
  RecallObservation,
+  TaskTrackingObservation,
 } from "#/types/core/observations";
 import { getObservationResult } from "./get-observation-result";
 import { getDefaultEventContent, MAX_CONTENT_LENGTH } from "./shared";
@@ -102,6 +103,40 @@ const getRecallObservationContent = (event: RecallObservation): string => {
  return content;
 };

+const getTaskTrackingObservationContent = (
+  event: TaskTrackingObservation,
+): string => {
+  const { command, task_list: taskList } = event.extras;
+  let content = `**Command:** \`${command}\``;
+
+  if (command === "plan" && taskList.length > 0) {
+    content += `\n\n**Task List (${taskList.length} ${taskList.length === 1 ? "item" : "items"}):**\n`;
+
+    taskList.forEach((task, index) => {
+      const statusIcon =
+        {
+          todo: "⏳",
+          in_progress: "🔄",
+          done: "✅",
+        }[task.status] || "❓";
+
+      content += `\n${index + 1}. ${statusIcon} **[${task.status.toUpperCase().replace("_", " ")}]** ${task.title}`;
+      content += `\n   *ID: ${task.id}*`;
+      if (task.notes) {
+        content += `\n   *Notes: ${task.notes}*`;
+      }
+    });
+  } else if (command === "plan") {
+    content += "\n\n**Task List:** Empty";
+  }
+
+  if (event.content && event.content.trim()) {
+    content += `\n\n**Result:** ${event.content.trim()}`;
+  }
+
+  return content;
+};
+
 export const getObservationContent = (event: OpenHandsObservation): string => {
  switch (event.observation) {
    case "read":
@@ -118,6 +153,8 @@ export const getObservationContent = (event: OpenHandsObservation): string => {
      return getBrowseObservationContent(event);
    case "recall":
      return getRecallObservationContent(event);
+    case "task_tracking":
+      return getTaskTrackingObservationContent(event);
    default:
      return getDefaultEventContent(event);
  }
--- a/frontend/src/components/features/chat/event-message.tsx
+++ b/frontend/src/components/features/chat/event-message.tsx
@@ -1,4 +1,5 @@
 import React from "react";
+import { useTranslation } from "react-i18next";
 import { ConfirmationButtons } from "#/components/shared/buttons/confirmation-buttons";
 import { OpenHandsAction } from "#/types/core/actions";
 import {
@@ -10,12 +11,14 @@ import {
  isFinishAction,
  isRejectObservation,
  isMcpObservation,
+  isTaskTrackingObservation,
 } from "#/types/core/guards";
 import { OpenHandsObservation } from "#/types/core/observations";
 import { ImageCarousel } from "../images/image-carousel";
 import { ChatMessage } from "./chat-message";
 import { ErrorMessage } from "./error-message";
 import { MCPObservationContent } from "./mcp-observation-content";
+import { TaskTrackingObservationContent } from "./task-tracking-observation-content";
 import { getObservationResult } from "./event-content-helpers/get-observation-result";
 import { getEventContent } from "./event-content-helpers/get-event-content";
 import { GenericEventMessage } from "./generic-event-message";
@@ -58,6 +61,7 @@ export function EventMessage({
  actions,
  isInLast10Actions,
 }: EventMessageProps) {
+  const { t } = useTranslation();
  const shouldShowConfirmationButtons =
    isLastMessage && event.source === "agent" && isAwaitingUserConfirmation;

@@ -209,6 +213,34 @@ export function EventMessage({
    );
  }

+  if (isTaskTrackingObservation(event)) {
+    const { command } = event.extras;
+    let title: React.ReactNode;
+    let initiallyExpanded = false;
+
+    // Determine title and expansion state based on command
+    if (command === "plan") {
+      title = t("OBSERVATION_MESSAGE$TASK_TRACKING_PLAN");
+      initiallyExpanded = true;
+    } else {
+      // command === "view"
+      title = t("OBSERVATION_MESSAGE$TASK_TRACKING_VIEW");
+      initiallyExpanded = false;
+    }
+
+    return (
+      <div>
+        <GenericEventMessage
+          title={title}
+          details={<TaskTrackingObservationContent event={event} />}
+          success={getObservationResult(event)}
+          initiallyExpanded={initiallyExpanded}
+        />
+        {shouldShowConfirmationButtons && <ConfirmationButtons />}
+      </div>
+    );
+  }
+
  return (
    <div>
      {isOpenHandsAction(event) && hasThoughtProperty(event.args) && (
--- a/frontend/src/components/features/chat/generic-event-message.tsx
+++ b/frontend/src/components/features/chat/generic-event-message.tsx
@@ -13,14 +13,16 @@ interface GenericEventMessageProps {
  title: React.ReactNode;
  details: string | React.ReactNode;
  success?: ObservationResultStatus;
+  initiallyExpanded?: boolean;
 }

 export function GenericEventMessage({
  title,
  details,
  success,
+  initiallyExpanded = false,
 }: GenericEventMessageProps) {
-  const [showDetails, setShowDetails] = React.useState(false);
+  const [showDetails, setShowDetails] = React.useState(initiallyExpanded);

  return (
    <div className="flex flex-col gap-2 border-l-2 pl-2 my-2 py-2 border-neutral-300 text-sm w-full">
--- a/frontend/src/components/features/chat/task-tracking-observation-content.tsx
+++ b/frontend/src/components/features/chat/task-tracking-observation-content.tsx
@@ -0,0 +1,110 @@
+import React from "react";
+import { useTranslation } from "react-i18next";
+import { TaskTrackingObservation } from "#/types/core/observations";
+
+interface TaskTrackingObservationContentProps {
+  event: TaskTrackingObservation;
+}
+
+export function TaskTrackingObservationContent({
+  event,
+}: TaskTrackingObservationContentProps) {
+  const { t } = useTranslation();
+
+  const { command, task_list: taskList } = event.extras;
+  const shouldShowTaskList = command === "plan" && taskList.length > 0;
+
+  const getStatusIcon = (status: string) => {
+    switch (status) {
+      case "todo":
+        return "⏳";
+      case "in_progress":
+        return "🔄";
+      case "done":
+        return "✅";
+      default:
+        return "❓";
+    }
+  };
+
+  const getStatusClassName = (status: string) => {
+    if (status === "done") {
+      return "bg-green-800 text-green-200";
+    }
+    if (status === "in_progress") {
+      return "bg-yellow-800 text-yellow-200";
+    }
+    return "bg-gray-700 text-gray-300";
+  };
+
+  return (
+    <div className="flex flex-col gap-4">
+      {/* Task List section - only show for 'plan' command */}
+      {shouldShowTaskList && (
+        <div className="flex flex-col gap-2">
+          <div className="flex items-center justify-between">
+            <h3 className="text-sm font-semibold text-gray-300">
+              {t("TASK_TRACKING_OBSERVATION$TASK_LIST")} ({taskList.length}{" "}
+              {taskList.length === 1 ? "item" : "items"})
+            </h3>
+          </div>
+          <div className="p-3 bg-gray-900 rounded-md overflow-auto text-gray-300 max-h-[400px] shadow-inner">
+            <div className="space-y-3">
+              {taskList.map((task, index) => (
+                <div key={task.id} className="border-l-2 border-gray-600 pl-3">
+                  <div className="flex items-start gap-2">
+                    <span className="text-lg">
+                      {getStatusIcon(task.status)}
+                    </span>
+                    <div className="flex-1">
+                      <div className="flex items-center gap-2 mb-1">
+                        <span className="text-sm text-gray-400">
+                          {index + 1}.
+                        </span>
+                        <span
+                          className={`text-xs px-2 py-1 rounded uppercase font-semibold ${getStatusClassName(
+                            task.status,
+                          )}`}
+                        >
+                          {task.status.replace("_", " ")}
+                        </span>
+                      </div>
+                      <h4 className="font-medium text-white mb-1">
+                        {task.title}
+                      </h4>
+                      <p className="text-xs text-gray-400 mb-1">
+                        {t("TASK_TRACKING_OBSERVATION$TASK_ID")}: {task.id}
+                      </p>
+                      {task.notes && (
+                        <p className="text-sm text-gray-300 italic">
+                          {t("TASK_TRACKING_OBSERVATION$TASK_NOTES")}:{" "}
+                          {task.notes}
+                        </p>
+                      )}
+                    </div>
+                  </div>
+                </div>
+              ))}
+            </div>
+          </div>
+        </div>
+      )}
+
+      {/* Result message - only show if there's meaningful content */}
+      {event.content && event.content.trim() && (
+        <div className="flex flex-col gap-2">
+          <div className="flex items-center justify-between">
+            <h3 className="text-sm font-semibold text-gray-300">
+              {t("TASK_TRACKING_OBSERVATION$RESULT")}
+            </h3>
+          </div>
+          <div className="p-3 bg-gray-900 rounded-md overflow-auto text-gray-300 shadow-inner">
+            <pre className="whitespace-pre-wrap text-sm">
+              {event.content.trim()}
+            </pre>
+          </div>
+        </div>
+      )}
+    </div>
+  );
+}
--- a/frontend/src/i18n/declaration.ts
+++ b/frontend/src/i18n/declaration.ts
@@ -37,8 +37,14 @@ export enum I18nKey {
  EVENT$UNKNOWN_EVENT = "EVENT$UNKNOWN_EVENT",
  OBSERVATION$COMMAND_NO_OUTPUT = "OBSERVATION$COMMAND_NO_OUTPUT",
  OBSERVATION$MCP_NO_OUTPUT = "OBSERVATION$MCP_NO_OUTPUT",
+  OBSERVATION$TASK_TRACKING_NO_OUTPUT = "OBSERVATION$TASK_TRACKING_NO_OUTPUT",
  MCP_OBSERVATION$ARGUMENTS = "MCP_OBSERVATION$ARGUMENTS",
  MCP_OBSERVATION$OUTPUT = "MCP_OBSERVATION$OUTPUT",
+  TASK_TRACKING_OBSERVATION$TASK_LIST = "TASK_TRACKING_OBSERVATION$TASK_LIST",
+  TASK_TRACKING_OBSERVATION$OUTPUT = "TASK_TRACKING_OBSERVATION$OUTPUT",
+  TASK_TRACKING_OBSERVATION$TASK_ID = "TASK_TRACKING_OBSERVATION$TASK_ID",
+  TASK_TRACKING_OBSERVATION$TASK_NOTES = "TASK_TRACKING_OBSERVATION$TASK_NOTES",
+  TASK_TRACKING_OBSERVATION$RESULT = "TASK_TRACKING_OBSERVATION$RESULT",
  OBSERVATION$ERROR_PREFIX = "OBSERVATION$ERROR_PREFIX",
  TASK$ADDRESSING_TASK = "TASK$ADDRESSING_TASK",
  SECRETS$SECRET_VALUE_REQUIRED = "SECRETS$SECRET_VALUE_REQUIRED",
@@ -483,6 +489,7 @@ export enum I18nKey {
  ACTION_MESSAGE$THINK = "ACTION_MESSAGE$THINK",
  ACTION_MESSAGE$SYSTEM = "ACTION_MESSAGE$SYSTEM",
  ACTION_MESSAGE$CONDENSATION = "ACTION_MESSAGE$CONDENSATION",
+  ACTION_MESSAGE$TASK_TRACKING = "ACTION_MESSAGE$TASK_TRACKING",
  OBSERVATION_MESSAGE$RUN = "OBSERVATION_MESSAGE$RUN",
  OBSERVATION_MESSAGE$RUN_IPYTHON = "OBSERVATION_MESSAGE$RUN_IPYTHON",
  OBSERVATION_MESSAGE$READ = "OBSERVATION_MESSAGE$READ",
@@ -492,6 +499,8 @@ export enum I18nKey {
  OBSERVATION_MESSAGE$MCP = "OBSERVATION_MESSAGE$MCP",
  OBSERVATION_MESSAGE$RECALL = "OBSERVATION_MESSAGE$RECALL",
  OBSERVATION_MESSAGE$THINK = "OBSERVATION_MESSAGE$THINK",
+  OBSERVATION_MESSAGE$TASK_TRACKING_PLAN = "OBSERVATION_MESSAGE$TASK_TRACKING_PLAN",
+  OBSERVATION_MESSAGE$TASK_TRACKING_VIEW = "OBSERVATION_MESSAGE$TASK_TRACKING_VIEW",
  EXPANDABLE_MESSAGE$SHOW_DETAILS = "EXPANDABLE_MESSAGE$SHOW_DETAILS",
  EXPANDABLE_MESSAGE$HIDE_DETAILS = "EXPANDABLE_MESSAGE$HIDE_DETAILS",
  AI_SETTINGS$TITLE = "AI_SETTINGS$TITLE",
--- a/frontend/src/i18n/translation.json
+++ b/frontend/src/i18n/translation.json
@@ -591,6 +591,22 @@
        "de": "[MCP-Tool wurde ohne Ausgabe ausgeführt]",
        "uk": "[Інструмент MCP завершив виконання без виводу]"
    },
+    "OBSERVATION$TASK_TRACKING_NO_OUTPUT": {
+        "en": "[Task tracking completed with no output]",
+        "ja": "[タスクトラッキングは出力なしで完了しました]",
+        "zh-CN": "[任务跟踪完成，没有输出]",
+        "zh-TW": "[任務跟踪完成，沒有輸出]",
+        "ko-KR": "[작업 추적이 출력 없이 완료되었습니다]",
+        "no": "[Oppgavesporing fullført uten utdata]",
+        "it": "[Tracciamento attività completato senza output]",
+        "pt": "[Rastreamento de tarefas concluído sem saída]",
+        "es": "[Seguimiento de tareas completado sin salida]",
+        "ar": "[اكتمل تتبع المهام بدون مخرجات]",
+        "fr": "[Suivi des tâches terminé sans sortie]",
+        "tr": "[Görev takibi çıktı olmadan tamamlandı]",
+        "de": "[Aufgabenverfolgung ohne Ausgabe abgeschlossen]",
+        "uk": "[Відстеження завдань завершено без виводу]"
+    },
    "MCP_OBSERVATION$ARGUMENTS": {
        "en": "Arguments",
        "ja": "引数",
@@ -623,6 +639,86 @@
        "de": "Ausgabe",
        "uk": "Вивід"
    },
+    "TASK_TRACKING_OBSERVATION$TASK_LIST": {
+        "en": "Task List",
+        "ja": "タスクリスト",
+        "zh-CN": "任务列表",
+        "zh-TW": "任務列表",
+        "ko-KR": "작업 목록",
+        "no": "Oppgaveliste",
+        "it": "Elenco attività",
+        "pt": "Lista de tarefas",
+        "es": "Lista de tareas",
+        "ar": "قائمة المهام",
+        "fr": "Liste des tâches",
+        "tr": "Görev listesi",
+        "de": "Aufgabenliste",
+        "uk": "Список завдань"
+    },
+    "TASK_TRACKING_OBSERVATION$OUTPUT": {
+        "en": "Output",
+        "ja": "出力",
+        "zh-CN": "输出",
+        "zh-TW": "輸出",
+        "ko-KR": "출력",
+        "no": "Utdata",
+        "it": "Output",
+        "pt": "Saída",
+        "es": "Salida",
+        "ar": "المخرجات",
+        "fr": "Sortie",
+        "tr": "Çıktı",
+        "de": "Ausgabe",
+        "uk": "Вивід"
+    },
+    "TASK_TRACKING_OBSERVATION$TASK_ID": {
+        "en": "ID",
+        "ja": "ID",
+        "zh-CN": "ID",
+        "zh-TW": "ID",
+        "ko-KR": "ID",
+        "no": "ID",
+        "it": "ID",
+        "pt": "ID",
+        "es": "ID",
+        "ar": "المعرف",
+        "fr": "ID",
+        "tr": "ID",
+        "de": "ID",
+        "uk": "ID"
+    },
+    "TASK_TRACKING_OBSERVATION$TASK_NOTES": {
+        "en": "Notes",
+        "ja": "メモ",
+        "zh-CN": "备注",
+        "zh-TW": "備註",
+        "ko-KR": "메모",
+        "no": "Notater",
+        "it": "Note",
+        "pt": "Notas",
+        "es": "Notas",
+        "ar": "ملاحظات",
+        "fr": "Notes",
+        "tr": "Notlar",
+        "de": "Notizen",
+        "uk": "Примітки"
+    },
+    "TASK_TRACKING_OBSERVATION$RESULT": {
+        "en": "Result",
+        "ja": "結果",
+        "zh-CN": "结果",
+        "zh-TW": "結果",
+        "ko-KR": "결과",
+        "no": "Resultat",
+        "it": "Risultato",
+        "pt": "Resultado",
+        "es": "Resultado",
+        "ar": "النتيجة",
+        "fr": "Résultat",
+        "tr": "Sonuç",
+        "de": "Ergebnis",
+        "uk": "Результат"
+    },
    "OBSERVATION$ERROR_PREFIX": {
        "en": "error:",
        "ja": "エラー:",
@@ -7727,6 +7823,22 @@
        "tr": "Yoğunlaşma",
        "uk": "Конденсація"
    },
+    "ACTION_MESSAGE$TASK_TRACKING": {
+        "en": "Managing tasks",
+        "zh-CN": "管理任务",
+        "zh-TW": "管理任務",
+        "ko-KR": "작업 관리",
+        "ja": "タスク管理",
+        "no": "Administrerer oppgaver",
+        "ar": "إدارة المهام",
+        "de": "Aufgaben verwalten",
+        "fr": "Gestion des tâches",
+        "it": "Gestione delle attività",
+        "pt": "Gerenciando tarefas",
+        "es": "Gestionando tareas",
+        "tr": "Görevleri yönetiyor",
+        "uk": "Керування завданнями"
+    },
    "OBSERVATION_MESSAGE$RUN": {
        "en": "Ran <cmd>{{command}}</cmd>",
        "zh-CN": "运行 <cmd>{{command}}</cmd>",
@@ -7871,6 +7983,38 @@
        "de": "Gedanke",
        "uk": "Думка"
    },
+    "OBSERVATION_MESSAGE$TASK_TRACKING_PLAN": {
+        "en": "Agent updated the plan",
+        "zh-CN": "代理更新了计划",
+        "zh-TW": "代理更新了計劃",
+        "ko-KR": "에이전트가 계획을 업데이트했습니다",
+        "ja": "エージェントがプランを更新しました",
+        "no": "Agent oppdaterte planen",
+        "ar": "قام الوكيل بتحديث الخطة",
+        "de": "Agent hat den Plan aktualisiert",
+        "fr": "L'agent a mis à jour le plan",
+        "it": "L'agente ha aggiornato il piano",
+        "pt": "O agente atualizou o plano",
+        "es": "El agente actualizó el plan",
+        "tr": "Ajan planı güncelledi",
+        "uk": "Агент оновив план"
+    },
+    "OBSERVATION_MESSAGE$TASK_TRACKING_VIEW": {
+        "en": "Agent checked the current plan",
+        "zh-CN": "代理检查了当前计划",
+        "zh-TW": "代理檢查了當前計劃",
+        "ko-KR": "에이전트가 현재 계획을 확인했습니다",
+        "ja": "エージェントが現在のプランを確認しました",
+        "no": "Agent sjekket gjeldende plan",
+        "ar": "تحقق الوكيل من الخطة الحالية",
+        "de": "Agent hat den aktuellen Plan überprüft",
+        "fr": "L'agent a vérifié le plan actuel",
+        "it": "L'agente ha controllato il piano attuale",
+        "pt": "O agente verificou o plano atual",
+        "es": "El agente verificó el plan actual",
+        "tr": "Ajan mevcut planı kontrol etti",
+        "uk": "Агент перевірив поточний план"
+    },
    "EXPANDABLE_MESSAGE$SHOW_DETAILS": {
        "en": "Show details",
        "zh-CN": "显示详情",
--- a/frontend/src/services/observations.ts
+++ b/frontend/src/services/observations.ts
@@ -52,6 +52,7 @@ export function handleObservationMessage(message: ObservationMessage) {
    case ObservationType.RECALL:
    case ObservationType.ERROR:
    case ObservationType.MCP:
+    case ObservationType.TASK_TRACKING:
      break; // We don't display the default message for these observations
    default:
      break;
--- a/frontend/src/types/action-type.tsx
+++ b/frontend/src/types/action-type.tsx
@@ -44,6 +44,9 @@ enum ActionType {

  // Interact with the MCP server.
  MCP = "call_tool_mcp",
+
+  // Views or updates the task list for task management.
+  TASK_TRACKING = "task_tracking",
 }

 export default ActionType;
--- a/frontend/src/types/core/actions.ts
+++ b/frontend/src/types/core/actions.ts
@@ -162,6 +162,21 @@ export interface MCPAction extends OpenHandsActionEvent<"call_tool_mcp"> {
  };
 }

+export interface TaskTrackingAction
+  extends OpenHandsActionEvent<"task_tracking"> {
+  source: "agent";
+  args: {
+    command: string;
+    task_list: Array<{
+      id: string;
+      title: string;
+      status: "todo" | "in_progress" | "done";
+      notes?: string;
+    }>;
+    thought: string;
+  };
+}
+
 export type OpenHandsAction =
  | UserMessageAction
  | AssistantMessageAction
@@ -178,4 +193,5 @@ export type OpenHandsAction =
  | FileWriteAction
  | RejectAction
  | RecallAction
-  | MCPAction;
+  | MCPAction
+  | TaskTrackingAction;
--- a/frontend/src/types/core/base.ts
+++ b/frontend/src/types/core/base.ts
@@ -18,6 +18,7 @@ export type OpenHandsEventType =
  | "recall"
  | "mcp"
  | "call_tool_mcp"
+  | "task_tracking"
  | "user_rejected";

 export type OpenHandsSourceType = "agent" | "user" | "environment";
--- a/frontend/src/types/core/guards.ts
+++ b/frontend/src/types/core/guards.ts
@@ -6,6 +6,7 @@ import {
  SystemMessageAction,
  CommandAction,
  FinishAction,
+  TaskTrackingAction,
 } from "./actions";
 import {
  AgentStateChangeObservation,
@@ -13,6 +14,7 @@ import {
  ErrorObservation,
  MCPObservation,
  OpenHandsObservation,
+  TaskTrackingObservation,
 } from "./observations";
 import { StatusUpdate } from "./variances";

@@ -87,6 +89,16 @@ export const isMcpObservation = (
 ): event is MCPObservation =>
  isOpenHandsObservation(event) && event.observation === "mcp";

+export const isTaskTrackingAction = (
+  event: OpenHandsParsedEvent,
+): event is TaskTrackingAction =>
+  isOpenHandsAction(event) && event.action === "task_tracking";
+
+export const isTaskTrackingObservation = (
+  event: OpenHandsParsedEvent,
+): event is TaskTrackingObservation =>
+  isOpenHandsObservation(event) && event.observation === "task_tracking";
+
 export const isStatusUpdate = (event: unknown): event is StatusUpdate =>
  typeof event === "object" &&
  event !== null &&
--- a/frontend/src/types/core/observations.ts
+++ b/frontend/src/types/core/observations.ts
@@ -146,6 +146,20 @@ export interface UserRejectedObservation
  extras: Record<string, unknown>;
 }

+export interface TaskTrackingObservation
+  extends OpenHandsObservationEvent<"task_tracking"> {
+  source: "agent";
+  extras: {
+    command: string;
+    task_list: Array<{
+      id: string;
+      title: string;
+      status: "todo" | "in_progress" | "done";
+      notes?: string;
+    }>;
+  };
+}
+
 export type OpenHandsObservation =
  | AgentStateChangeObservation
  | AgentThinkObservation
@@ -160,4 +174,5 @@ export type OpenHandsObservation =
  | ErrorObservation
  | RecallObservation
  | MCPObservation
-  | UserRejectedObservation;
+  | UserRejectedObservation
+  | TaskTrackingObservation;
--- a/frontend/src/types/observation-type.tsx
+++ b/frontend/src/types/observation-type.tsx
@@ -40,6 +40,9 @@ enum ObservationType {

  // A no-op observation
  NULL = "null",
+
+  // Result of a task tracking operation
+  TASK_TRACKING = "task_tracking",
 }

 export default ObservationType;
--- a/openhands/agenthub/codeact_agent/codeact_agent.py
+++ b/openhands/agenthub/codeact_agent/codeact_agent.py
@@ -21,6 +21,9 @@ from openhands.agenthub.codeact_agent.tools.llm_based_edit import LLMBasedFileEd
 from openhands.agenthub.codeact_agent.tools.str_replace_editor import (
    create_str_replace_editor_tool,
 )
+from openhands.agenthub.codeact_agent.tools.task_tracker import (
+    create_task_tracker_tool,
+)
 from openhands.agenthub.codeact_agent.tools.think import ThinkTool
 from openhands.controller.agent import Agent
 from openhands.controller.state.state import State
@@ -98,7 +101,7 @@ class CodeActAgent(Agent):
        if self._prompt_manager is None:
            self._prompt_manager = PromptManager(
                prompt_dir=os.path.join(os.path.dirname(__file__), 'prompts'),
-                system_prompt_filename=self.config.system_prompt_filename,
+                system_prompt_filename=self.config.resolved_system_prompt_filename,
            )

        return self._prompt_manager
@@ -136,6 +139,9 @@ class CodeActAgent(Agent):
                tools.append(BrowserTool)
        if self.config.enable_jupyter:
            tools.append(IPythonTool)
+        if self.config.enable_plan_mode:
+            # In plan mode, we use the task_tracker tool for task management
+            tools.append(create_task_tracker_tool(use_short_tool_desc))
        if self.config.enable_llm_editor:
            tools.append(LLMBasedFileEditTool)
        elif self.config.enable_editor:
--- a/openhands/agenthub/codeact_agent/function_calling.py
+++ b/openhands/agenthub/codeact_agent/function_calling.py
@@ -35,11 +35,13 @@ from openhands.events.action import (
    FileReadAction,
    IPythonRunCellAction,
    MessageAction,
+    TaskTrackingAction,
 )
 from openhands.events.action.agent import CondensationRequestAction
 from openhands.events.action.mcp import MCPAction
 from openhands.events.event import FileEditSource, FileReadSource
 from openhands.events.tool import ToolCallMetadata
+from openhands.llm.tool_names import TASK_TRACKER_TOOL_NAME


 def combine_thought(action: Action, thought: str) -> Action:
@@ -220,6 +222,24 @@ def response_to_actions(
                    )
                action = BrowseInteractiveAction(browser_actions=arguments['code'])

+            # ================================================
+            # TaskTrackingAction
+            # ================================================
+            elif tool_call.function.name == TASK_TRACKER_TOOL_NAME:
+                if 'command' not in arguments:
+                    raise FunctionCallValidationError(
+                        f'Missing required argument "command" in tool call {tool_call.function.name}'
+                    )
+                if arguments['command'] == 'plan' and 'task_list' not in arguments:
+                    raise FunctionCallValidationError(
+                        f'Missing required argument "task_list" for "plan" command in tool call {tool_call.function.name}'
+                    )
+
+                action = TaskTrackingAction(
+                    command=arguments['command'],
+                    task_list=arguments.get('task_list', []),
+                )
+
            # ================================================
            # MCPAction (MCP)
            # ================================================
--- a/openhands/agenthub/codeact_agent/prompts/system_prompt_long_horizon.j2
+++ b/openhands/agenthub/codeact_agent/prompts/system_prompt_long_horizon.j2
@@ -1,39 +1,40 @@
 {% include "system_prompt.j2" %}

 <TASK_MANAGEMENT>
-* For complex, long-horizon tasks, create a TODO.md file to track progress:
-  1. Start by creating a detailed plan in TODO.md with clear steps
-  2. Check TODO.md before each new action to maintain context and track progress
-  3. Update TODO.md as you complete steps or discover new requirements
-  4. Mark completed items with ✓ or [x] to maintain a clear record of progress
-  5. For each major step, add sub-tasks as needed to break down complex work
-  6. If you discover the plan needs significant changes, propose updates and confirm with the user before proceeding and update TODO.md
-  7. IMPORTANT: Do NOT add TODO.md to git commits or version control systems
-
-* Example TODO.md format:
-```markdown
-# Task: [Brief description of the overall task]
-
-## Plan
- [ ] Step 1: [Description]
-  - [ ] Sub-task 1.1
-  - [ ] Sub-task 1.2
- [ ] Step 2: [Description]
- [x] Step 3: [Description] (Completed)
-
-## Notes
- Important discovery: [Details about something you learned]
- Potential issue: [Description of a potential problem]
-```
-
-* When working on a task:
-  - Read the README to understand how the system works
-  - Create TODO.md with every major step unchecked
-  - Add TODO.md to .gitignore if it's not already ignored
-  - Until every item in TODO.md is checked:
-    a. Pick the next unchecked item and work on it
-    b. Run appropriate tests to verify your work
-    c. If issues arise, fix them until tests pass
-    d. Once complete, check off the item in TODO.md
-    e. Proceed to the next unchecked item
+* You have access to the `task_tracker` tool to help you organize and monitor development work. Use this tool REGULARLY to maintain task visibility and provide users with clear progress updates. This tool is ESSENTIAL for systematic planning and decomposing complex development work into manageable components. Failing to use this tool for planning may result in overlooked requirements - which is unacceptable.
+* It is crucial that you update task status to "done" immediately upon completion of each work item. Do not accumulate multiple finished tasks before updating their status.
+* For complex, multi-phase development work, use `task_tracker` to establish a comprehensive plan with well-defined steps:
+  1. Begin by decomposing the overall objective into primary phases using `task_tracker`
+  2. Include detailed work items as necessary to break complex activities into actionable units
+  3. Update tasks to "in_progress" status when commencing work on them
+  4. Update tasks to "done" status immediately after completing each item
+  5. For each primary phase, incorporate additional work items as you identify new requirements
+  6. If you determine the plan requires substantial modifications, suggest revisions and obtain user confirmation before proceeding
+* Example workflow for debugging and resolution:
+  ```
+  User: "Execute the test suite and resolve any validation failures"
+  Assistant: I'm going to use the task_tracker tool to organize the following work items:
+  - Execute the test suite
+  - Resolve any validation failures
+  I'm now going to run the test suite using the terminal.
+  [After running tests and discovering 8 validation failures]
+  I found 8 validation failures that need attention. I'm going to use the task_tracker tool to add 8 specific items to the task list.
+  [Updating first task to in_progress]
+  Let me begin addressing the first validation issue...
+  [After resolving first failure]
+  The first validation issue has been resolved, let me mark that task as done and proceed to the second item...
+  ```
+* Example workflow for component development:
+  ```
+  User: "Build a dashboard component that displays analytics data with interactive charts and filtering options"
+  Assistant: I'll help you create an analytics dashboard with interactive charts and filtering. Let me first use the task_tracker tool to organize this development work.
+  Adding the following tasks to the tracker:
+  1. Analyze existing analytics data structure and requirements
+  2. Design dashboard layout and component architecture
+  3. Implement data visualization charts with interactivity
+  4. Create filtering and search functionality
+  5. Integrate components and perform testing
+  Let me start by examining the current analytics data structure to understand what we're working with...
+  [Assistant proceeds with implementation step by step, updating tasks to in_progress and done as work progresses]
+  ```
 </TASK_MANAGEMENT>
--- a/openhands/agenthub/codeact_agent/tools/task_tracker.py
+++ b/openhands/agenthub/codeact_agent/tools/task_tracker.py
@@ -0,0 +1,203 @@
+from litellm import ChatCompletionToolParam, ChatCompletionToolParamFunctionChunk
+
+from openhands.llm.tool_names import TASK_TRACKER_TOOL_NAME
+
+_DETAILED_TASK_TRACKER_DESCRIPTION = """This tool provides structured task management capabilities for development workflows.
+It enables systematic tracking of work items, progress monitoring, and efficient
+organization of complex development activities.
+
+The tool maintains visibility into project status and helps communicate
+progress effectively to users.
+
+## Application Guidelines
+
+Utilize this tool in the following situations:
+
+1. Multi-phase development work - When projects involve multiple sequential or
+   parallel activities
+2. Complex implementation tasks - Work requiring systematic planning and
+   coordination across multiple components
+3. Explicit user request for task organization - When users specifically ask
+   for structured task management
+4. Multiple concurrent requirements - When users present several work items
+   that need coordination
+5. Project initiation - Capture and organize user requirements at project start
+6. Work commencement - Update task status to in_progress before beginning
+   implementation. Maintain focus by limiting active work to one task
+7. Task completion - Update status to done and identify any additional work
+   that emerged during implementation
+
+## Situations Where Tool Usage Is Unnecessary
+
+Avoid using this tool when:
+
+1. Single atomic tasks that require no decomposition
+2. Trivial operations where tracking adds no organizational value
+3. Simple activities completable in minimal steps
+4. Pure information exchange or discussion
+
+Note: For single straightforward tasks, proceed with direct implementation
+rather than creating tracking overhead.
+
+## Usage Scenarios
+
+**Scenario A: Feature Development with Validation**
+User request: "Build a user authentication system with login/logout functionality.
+Don't forget to include input validation and error handling!"
+
+Response approach: I'll implement a user authentication system with comprehensive
+validation. Let me organize this work systematically.
+
+*Task breakdown includes:*
+1. Design authentication API endpoints and data models
+2. Implement user login/logout controllers with session management
+3. Add client-side form validation and error display
+4. Create backend input sanitization and security checks
+5. Implement comprehensive error handling for edge cases
+6. Perform integration testing and fix any issues
+
+**Scenario B: Codebase Refactoring**
+User request: "I need to update all instances of the method 'fetchData' to
+'retrieveInformation' throughout the entire project"
+
+Response approach: Let me first analyze your codebase to identify all locations
+where 'fetchData' appears.
+
+*After code analysis*
+
+I've located 12 occurrences of 'fetchData' across 6 files in your project.
+I'll create a systematic plan to update these references.
+
+*Organized task list includes specific file-by-file updates*
+
+**Scenario C: Multi-feature Development**
+User request: "Set up a blog platform with these components: admin dashboard,
+article management, comment system, and user profiles."
+
+Response approach: I'll help build your blog platform. Let me break down these
+components into manageable implementation phases.
+
+*Creates structured plan with each feature decomposed into specific development tasks*
+
+## Counter-examples - Direct Implementation Preferred
+
+**Counter-example A: Simple inquiry**
+User request: "What's the syntax for a for loop in JavaScript?"
+
+Response approach: Here's how to write a for loop in JavaScript:
+
+```javascript
+for (let i = 0; i < array.length; i++) {
+    console.log(array[i]);
+}
+```
+
+*No task tracking needed - single informational response*
+
+**Counter-example B: Minor modification**
+User request: "Please add a docstring to the processPayment function explaining
+its parameters."
+
+Response approach: I'll add a docstring to explain the processPayment function.
+
+*Direct edit without task tracking for simple modification*
+
+## Status Management and Workflow
+
+1. **Status Values**: Track work using these states:
+   - todo: Not yet initiated
+   - in_progress: Currently active (maintain single focus)
+   - done: Successfully completed
+
+2. **Workflow Practices**:
+   - Update status dynamically as work progresses
+   - Mark completion immediately upon task finish
+   - Limit active work to ONE task at any given time
+   - Complete current activities before initiating new ones
+   - Remove obsolete tasks from tracking entirely
+
+3. **Completion Criteria**:
+   - Mark tasks as done only when fully achieved
+   - Keep status as in_progress if errors, blocks, or partial completion exist
+   - Create new tasks for discovered issues or dependencies
+   - Never mark done when:
+       - Test suites are failing
+       - Implementation remains incomplete
+       - Unresolved errors persist
+       - Required resources are unavailable
+
+4. **Task Organization**:
+   - Write precise, actionable descriptions
+   - Decompose complex work into manageable units
+   - Use descriptive, clear naming conventions
+
+When uncertain, favor using this tool. Proactive task management demonstrates
+systematic approach and ensures comprehensive requirement fulfillment.
+"""
+
+_SHORT_TASK_TRACKER_DESCRIPTION = """Provides structured task management for development workflows, enabling progress
+tracking and systematic organization of complex coding activities.
+
+* Apply to multi-phase projects (3+ distinct steps) or when managing multiple user requirements
+* Update status (todo/in_progress/done) dynamically throughout work
+* Maintain single active task focus at any time
+* Mark completion immediately upon task finish
+* Decompose complex work into manageable, actionable units
+"""
+
+
+def create_task_tracker_tool(
+    use_short_description: bool = False,
+) -> ChatCompletionToolParam:
+    description = (
+        _SHORT_TASK_TRACKER_DESCRIPTION
+        if use_short_description
+        else _DETAILED_TASK_TRACKER_DESCRIPTION
+    )
+    return ChatCompletionToolParam(
+        type='function',
+        function=ChatCompletionToolParamFunctionChunk(
+            name=TASK_TRACKER_TOOL_NAME,
+            description=description,
+            parameters={
+                'type': 'object',
+                'properties': {
+                    'command': {
+                        'type': 'string',
+                        'enum': ['view', 'plan'],
+                        'description': 'The command to execute. `view` shows the current task list. `plan` creates or updates the task list based on provided requirements and progress. Always `view` the current list before making changes.',
+                    },
+                    'task_list': {
+                        'type': 'array',
+                        'description': 'The full task list. Required parameter of `plan` command.',
+                        'items': {
+                            'type': 'object',
+                            'properties': {
+                                'id': {
+                                    'type': 'string',
+                                    'description': 'Unique task identifier',
+                                },
+                                'title': {
+                                    'type': 'string',
+                                    'description': 'Brief task description',
+                                },
+                                'status': {
+                                    'type': 'string',
+                                    'description': 'Current task status',
+                                    'enum': ['todo', 'in_progress', 'done'],
+                                },
+                                'notes': {
+                                    'type': 'string',
+                                    'description': 'Optional additional context or details',
+                                },
+                            },
+                            'required': ['title', 'status', 'id'],
+                            'additionalProperties': False,
+                        },
+                    },
+                },
+                'required': ['command'],
+                'additionalProperties': False,
+            },
+        ),
+    )
--- a/openhands/cli/tui.py
+++ b/openhands/cli/tui.py
@@ -41,6 +41,7 @@ from openhands.events.action import (
    CmdRunAction,
    MCPAction,
    MessageAction,
+    TaskTrackingAction,
 )
 from openhands.events.event import Event
 from openhands.events.observation import (
@@ -50,6 +51,7 @@ from openhands.events.observation import (
    FileEditObservation,
    FileReadObservation,
    MCPObservation,
+    TaskTrackingObservation,
 )
 from openhands.llm.metrics import Metrics
 from openhands.mcp.error_collector import mcp_error_collector
@@ -273,6 +275,8 @@ def display_event(event: Event, config: OpenHandsConfig) -> None:
                initialize_streaming_output()
        elif isinstance(event, MCPAction):
            display_mcp_action(event)
+        elif isinstance(event, TaskTrackingAction):
+            display_task_tracking_action(event)
        elif isinstance(event, Action):
            # For other actions, display thoughts normally
            if hasattr(event, 'thought') and event.thought:
@@ -293,6 +297,8 @@ def display_event(event: Event, config: OpenHandsConfig) -> None:
            display_file_read(event)
        elif isinstance(event, MCPObservation):
            display_mcp_observation(event)
+        elif isinstance(event, TaskTrackingObservation):
+            display_task_tracking_observation(event)
        elif isinstance(event, AgentStateChangedObservation):
            display_agent_state_change_message(event.agent_state)
        elif isinstance(event, ErrorObservation):
@@ -521,6 +527,74 @@ def display_mcp_observation(event: MCPObservation) -> None:
    print_container(container)


+def display_task_tracking_action(event: TaskTrackingAction) -> None:
+    """Display a TaskTracking action in the CLI."""
+    # Display thought first if present
+    if hasattr(event, 'thought') and event.thought:
+        display_message(event.thought)
+
+    # Format the command and task list for display
+    display_text = f'Command: {event.command}'
+
+    if event.command == 'plan':
+        if event.task_list:
+            display_text += f'\n\nTask List ({len(event.task_list)} items):'
+            for i, task in enumerate(event.task_list, 1):
+                status = task.get('status', 'unknown')
+                title = task.get('title', 'Untitled task')
+                task_id = task.get('id', f'task-{i}')
+                notes = task.get('notes', '')
+
+                # Add status indicator with color
+                status_indicator = {
+                    'todo': '⏳',
+                    'in_progress': '🔄',
+                    'done': '✅',
+                }.get(status, '❓')
+
+                display_text += f'\n  {i}. {status_indicator} [{status.upper()}] {title} (ID: {task_id})'
+                if notes:
+                    display_text += f'\n     Notes: {notes}'
+        else:
+            display_text += '\n\nTask List: Empty'
+
+    container = Frame(
+        TextArea(
+            text=display_text,
+            read_only=True,
+            style='ansigreen',
+            wrap_lines=True,
+        ),
+        title='Task Tracking Action',
+        style='ansigreen',
+    )
+    print_formatted_text('')
+    print_container(container)
+
+
+def display_task_tracking_observation(event: TaskTrackingObservation) -> None:
+    """Display a TaskTracking observation in the CLI."""
+    # Format the content and task list for display
+    content = (
+        event.content.strip() if event.content else 'Task tracking operation completed'
+    )
+
+    display_text = f'Result: {content}'
+
+    container = Frame(
+        TextArea(
+            text=display_text,
+            read_only=True,
+            style=COLOR_GREY,
+            wrap_lines=True,
+        ),
+        title='Task Tracking Result',
+        style=f'fg:{COLOR_GREY}',
+    )
+    print_formatted_text('')
+    print_container(container)
+
+
 def initialize_streaming_output():
    """Initialize the streaming output TextArea."""
    if not ENABLE_STREAMING:
--- a/openhands/core/config/agent_config.py
+++ b/openhands/core/config/agent_config.py
@@ -46,6 +46,8 @@ class AgentConfig(BaseModel):
    """Whether history should be truncated to continue the session when hitting LLM context length limit."""
    enable_som_visual_browsing: bool = Field(default=True)
    """Whether to enable SoM (Set of Marks) visual browsing."""
+    enable_plan_mode: bool = Field(default=True)
+    """Whether to enable plan mode, which uses the long horizon system message and add the new tool - task_tracker - for planning, tracking and executing complex tasks."""
    condenser: CondenserConfig = Field(
        # The default condenser is set to the conversation window condenser -- if
        # we use NoOp and the conversation hits the LLM context length limit,
@@ -58,6 +60,17 @@ class AgentConfig(BaseModel):

    model_config = ConfigDict(extra='forbid')

+    @property
+    def resolved_system_prompt_filename(self) -> str:
+        """
+        Returns the appropriate system prompt filename based on the agent configuration.
+        When enable_plan_mode is True, automatically uses the long horizon system prompt
+        unless a custom system_prompt_filename was explicitly set (not the default).
+        """
+        if self.enable_plan_mode and self.system_prompt_filename == 'system_prompt.j2':
+            return 'system_prompt_long_horizon.j2'
+        return self.system_prompt_filename
+
    @classmethod
    def from_toml_section(cls, data: dict) -> dict[str, AgentConfig]:
        """Create a mapping of AgentConfig instances from a toml dictionary representing the [agent] section.
--- a/openhands/core/schema/action.py
+++ b/openhands/core/schema/action.py
@@ -94,3 +94,6 @@ class ActionType(str, Enum):

    CONDENSATION_REQUEST = 'condensation_request'
    """Request for condensation of a list of events."""
+
+    TASK_TRACKING = 'task_tracking'
+    """Views or updates the task list for task management."""
--- a/openhands/core/schema/observation.py
+++ b/openhands/core/schema/observation.py
@@ -55,3 +55,6 @@ class ObservationType(str, Enum):

    DOWNLOAD = 'download'
    """Result of downloading/opening a file via the browser"""
+
+    TASK_TRACKING = 'task_tracking'
+    """Result of a task tracking operation"""
--- a/openhands/events/action/init.py
+++ b/openhands/events/action/init.py
@@ -6,6 +6,7 @@ from openhands.events.action.agent import (
    AgentThinkAction,
    ChangeAgentStateAction,
    RecallAction,
+    TaskTrackingAction,
 )
 from openhands.events.action.browse import BrowseInteractiveAction, BrowseURLAction
 from openhands.events.action.commands import CmdRunAction, IPythonRunCellAction
@@ -38,4 +39,5 @@ __all__ = [
    'AgentThinkAction',
    'RecallAction',
    'MCPAction',
+    'TaskTrackingAction',
 ]
--- a/openhands/events/action/agent.py
+++ b/openhands/events/action/agent.py
@@ -201,3 +201,28 @@ class CondensationRequestAction(Action):
    @property
    def message(self) -> str:
        return 'Requesting a condensation of the conversation history.'
+
+
+@dataclass
+class TaskTrackingAction(Action):
+    """An action where the agent writes or updates a task list for task management.
+    Attributes:
+        task_list (list): The list of task items with their status and metadata.
+        thought (str): The agent's explanation of its actions.
+        action (str): The action type, namely ActionType.TASK_TRACKING.
+    """
+
+    command: str = 'view'
+    task_list: list[dict[str, Any]] = field(default_factory=list)
+    thought: str = ''
+    action: str = ActionType.TASK_TRACKING
+
+    @property
+    def message(self) -> str:
+        num_tasks = len(self.task_list)
+        if num_tasks == 0:
+            return 'Clearing the task list.'
+        elif num_tasks == 1:
+            return 'Managing 1 task item.'
+        else:
+            return f'Managing {num_tasks} task items.'
--- a/openhands/events/observation/init.py
+++ b/openhands/events/observation/init.py
@@ -26,6 +26,7 @@ from openhands.events.observation.mcp import MCPObservation
 from openhands.events.observation.observation import Observation
 from openhands.events.observation.reject import UserRejectObservation
 from openhands.events.observation.success import SuccessObservation
+from openhands.events.observation.task_tracking import TaskTrackingObservation

 __all__ = [
    'Observation',
@@ -48,4 +49,5 @@ __all__ = [
    'RecallType',
    'MCPObservation',
    'FileDownloadObservation',
+    'TaskTrackingObservation',
 ]
--- a/openhands/events/observation/task_tracking.py
+++ b/openhands/events/observation/task_tracking.py
@@ -0,0 +1,18 @@
+from dataclasses import dataclass, field
+from typing import Any
+
+from openhands.core.schema import ObservationType
+from openhands.events.observation.observation import Observation
+
+
+@dataclass
+class TaskTrackingObservation(Observation):
+    """This data class represents the result of a task tracking operation."""
+
+    observation: str = ObservationType.TASK_TRACKING
+    command: str = ''
+    task_list: list[dict[str, Any]] = field(default_factory=list)
+
+    @property
+    def message(self) -> str:
+        return self.content
--- a/openhands/events/serialization/action.py
+++ b/openhands/events/serialization/action.py
@@ -11,6 +11,7 @@ from openhands.events.action.agent import (
    CondensationAction,
    CondensationRequestAction,
    RecallAction,
+    TaskTrackingAction,
 )
 from openhands.events.action.browse import BrowseInteractiveAction, BrowseURLAction
 from openhands.events.action.commands import (
@@ -46,6 +47,7 @@ actions = (
    CondensationAction,
    CondensationRequestAction,
    MCPAction,
+    TaskTrackingAction,
 )

 ACTION_TYPE_TO_CLASS = {action_class.action: action_class for action_class in actions}  # type: ignore[attr-defined]
--- a/openhands/events/serialization/observation.py
+++ b/openhands/events/serialization/observation.py
@@ -30,6 +30,7 @@ from openhands.events.observation.mcp import MCPObservation
 from openhands.events.observation.observation import Observation
 from openhands.events.observation.reject import UserRejectObservation
 from openhands.events.observation.success import SuccessObservation
+from openhands.events.observation.task_tracking import TaskTrackingObservation

 observations = (
    NullObservation,
@@ -49,6 +50,7 @@ observations = (
    RecallObservation,
    MCPObservation,
    FileDownloadObservation,
+    TaskTrackingObservation,
 )

 OBSERVATION_TYPE_TO_CLASS = {
--- a/openhands/llm/tool_names.py
+++ b/openhands/llm/tool_names.py
@@ -5,3 +5,4 @@ STR_REPLACE_EDITOR_TOOL_NAME = 'str_replace_editor'
 BROWSER_TOOL_NAME = 'browser'
 FINISH_TOOL_NAME = 'finish'
 LLM_BASED_EDIT_TOOL_NAME = 'edit_file'
+TASK_TRACKER_TOOL_NAME = 'task_tracker'
--- a/openhands/memory/conversation_memory.py
+++ b/openhands/memory/conversation_memory.py
@@ -18,6 +18,7 @@ from openhands.events.action import (
    FileReadAction,
    IPythonRunCellAction,
    MessageAction,
+    TaskTrackingAction,
 )
 from openhands.events.action.mcp import MCPAction
 from openhands.events.action.message import SystemMessageAction
@@ -32,6 +33,7 @@ from openhands.events.observation import (
    FileEditObservation,
    FileReadObservation,
    IPythonRunCellObservation,
+    TaskTrackingObservation,
    UserRejectObservation,
 )
 from openhands.events.observation.agent import (
@@ -228,6 +230,7 @@ class ConversationMemory:
                BrowseInteractiveAction,
                BrowseURLAction,
                MCPAction,
+                TaskTrackingAction,
            ),
        ) or (isinstance(action, CmdRunAction) and action.source == 'agent'):
            tool_metadata = action.tool_call_metadata
@@ -487,6 +490,9 @@ class ConversationMemory:
        elif isinstance(obs, AgentThinkObservation):
            text = truncate_content(obs.content, max_message_chars)
            message = Message(role='user', content=[TextContent(text=text)])
+        elif isinstance(obs, TaskTrackingObservation):
+            text = truncate_content(obs.content, max_message_chars)
+            message = Message(role='user', content=[TextContent(text=text)])
        elif isinstance(obs, ErrorObservation):
            text = truncate_content(obs.content, max_message_chars)
            text += '\n[Error occurred in processing last action]'
--- a/openhands/runtime/base.py
+++ b/openhands/runtime/base.py
@@ -33,6 +33,7 @@ from openhands.events.action import (
    FileReadAction,
    FileWriteAction,
    IPythonRunCellAction,
+    TaskTrackingAction,
 )
 from openhands.events.action.mcp import MCPAction
 from openhands.events.event import Event
@@ -43,6 +44,7 @@ from openhands.events.observation import (
    FileReadObservation,
    NullObservation,
    Observation,
+    TaskTrackingObservation,
    UserRejectObservation,
 )
 from openhands.events.serialization.action import ACTION_TYPE_TO_CLASS
@@ -869,6 +871,46 @@ fi
        if not action.runnable:
            if isinstance(action, AgentThinkAction):
                return AgentThinkObservation('Your thought has been logged.')
+            elif isinstance(action, TaskTrackingAction):
+                # If `command` is `plan`, write the serialized task list to the file TASKS.md under `.openhands/`
+                if action.command == 'plan':
+                    content = '# Task List\n\n'
+                    for i, task in enumerate(action.task_list, 1):
+                        status_icon = {
+                            'todo': '⏳',
+                            'in_progress': '🔄',
+                            'done': '✅',
+                        }.get(task.get('status', 'todo'), '⏳')
+                        content += f'{i}. {status_icon} {task.get("title", "")}\n{task.get("notes", "")}\n'
+                    write_obs = self.write(
+                        FileWriteAction(path='.openhands/TASKS.md', content=content)
+                    )
+                    if isinstance(write_obs, ErrorObservation):
+                        return ErrorObservation(
+                            f'Failed to write task list to .openhands/TASKS.md: {write_obs.content}'
+                        )
+
+                    return TaskTrackingObservation(
+                        content=f'Task list has been updated with {len(action.task_list)} items.',
+                        command=action.command,
+                        task_list=action.task_list,
+                    )
+                elif action.command == 'view':
+                    # If `command` is `view`, read the TASKS.md file and return its content
+                    read_obs = self.read(FileReadAction(path='.openhands/TASKS.md'))
+                    if isinstance(read_obs, FileReadObservation):
+                        return TaskTrackingObservation(
+                            content=read_obs.content,
+                            command=action.command,
+                            task_list=[],  # Empty for view command
+                        )
+                    else:
+                        return TaskTrackingObservation(  # Return observation if error occurs because file might not exist yet
+                            command=action.command,
+                            task_list=[],
+                            content=f'Failed to read the task list. Error: {read_obs.content}',
+                        )
+
            return NullObservation('')
        if (
            hasattr(action, 'confirmation_state')