Compare commits

...

8 Commits

Author SHA1 Message Date
enyst
6c8c93add4 SDK Minimal Python: MCP-only tools (camelCase), remove fallbacks/handlers; Conversation delegates to runtime.execute_tool; Runtime.get_tools MCP-shape + execute_tool dispatcher; fix imports 2025-08-24 23:14:06 +00:00
enyst
0a3f389bc4 PRD: add Runtime and SDK sections for MCP-first minimal SDK (get_tools, execute_tool, sdk.Tool, Conversation flow, Anthropic sequencing) 2025-08-24 22:57:31 +00:00
enyst
37f4784e05 Runtime: add get_tools() in MCP format (name/description/inputSchema); SDK: Conversation uses runtime.get_tools() with fallback, binds handlers for execute_bash/file_read/file_write; keep SDK Tool param conversion\n\nCo-authored-by: openhands <openhands@all-hands.dev> 2025-08-24 22:23:06 +00:00
enyst
475947ebcd Apply pre-commit autofixes (ruff/format)\n\nCo-authored-by: openhands <openhands@all-hands.dev> 2025-08-24 22:08:41 +00:00
enyst
5a6b741612 I am OpenHands-GPT-5, an AI agent — Option B: vendor CodeAct system_prompt and include; render with cli_mode=True for SDK system_message.\n\n- Copy system_prompt.j2 and security_risk_assessment.j2 under openhands/sdk/prompts\n- Render via Jinja2 and refine_prompt; persist as system_message\n\nCo-authored-by: openhands <openhands@all-hands.dev> 2025-08-24 19:23:11 +00:00
enyst
3d8f2dcd67 I am OpenHands-GPT-5, an AI agent — remove local task docs from branch before pushing 2025-08-24 18:58:47 +00:00
enyst
8db1a6034c I am OpenHands-GPT-5, an AI agent — Embed fully-rendered CodeAct system prompt in SDK system_message; PRD updated to specify exact source.\n\n- Load system prompt from CodeActAgent system_prompt.j2 and persist as system_message at loop start\n- Allow appending simple system_prompt_extensions\n\nCo-authored-by: openhands <openhands@all-hands.dev> 2025-08-24 18:55:15 +00:00
enyst
0a9db47533 I am OpenHands-GPT-5, an AI agent — Minimal Python SDK groundwork: autoresume synthesis of assistant tool_calls; TUI snippet formatting and headless exit semantics; expand system_message in PRD.\n\n- Implement synthesized assistant tool_calls in _reconstruct_messages_from_events (canonical OpenAI format)\n- Include system_message reconstruction\n- TUI: concise per-step logs with truncation policy; headless returns non-zero on fatal\n- Expand PRD: system prompt source, behaviors\n\nCo-authored-by: openhands <openhands@all-hands.dev> 2025-08-24 18:32:42 +00:00
16 changed files with 1425 additions and 1 deletions

View File

@@ -10,7 +10,7 @@ import tempfile
from abc import abstractmethod
from pathlib import Path
from types import MappingProxyType
from typing import Callable, cast
from typing import Any, Callable, cast
from zipfile import ZipFile
import httpx
@@ -240,6 +240,97 @@ class Runtime(FileEditRuntimeMixin):
if self.status_callback:
self.status_callback(level, runtime_status, msg)
# Centralized tool execution for SDK: maps tool name to local runtime actions.
# For external MCP servers, use call_tool_mcp(MCPAction).
def execute_tool(self, name: str, arguments: dict[str, Any]) -> Observation:
name = str(name)
try:
if name == 'execute_bash':
cmd = str(arguments.get('command', ''))
timeout = arguments.get('timeout')
action = CmdRunAction(command=cmd)
if timeout is not None:
try:
action.set_hard_timeout(float(timeout))
except Exception:
pass
return self.run(action)
if name == 'file_read':
path = str(arguments.get('path', ''))
view_range = arguments.get('view_range')
fr_action = FileReadAction(path=path, view_range=view_range)
return self.read(fr_action)
if name == 'file_write':
path = str(arguments.get('path', ''))
content = str(arguments.get('content', ''))
fw_action = FileWriteAction(path=path, content=content)
return self.write(fw_action)
except Exception as e:
return ErrorObservation(str(e))
return ErrorObservation(f'Unknown tool: {name}')
# MCP-compatible tool spec
# We keep a minimal subset: name, description, inputSchema
def get_tools(self) -> list[dict[str, Any]]:
"""Return runtime tools in MCP Tool format.
Shape (subset of MCP Tool):
{
"name": str,
"description": str,
"inputSchema": {JSON Schema dict}
}
"""
return [
{
'name': 'execute_bash',
'description': 'Run a shell command inside the runtime',
'inputSchema': {
'type': 'object',
'properties': {
'command': {'type': 'string'},
'timeout': {
'type': 'number',
'description': 'Seconds',
},
},
'required': ['command'],
'additionalProperties': False,
},
},
{
'name': 'file_read',
'description': 'Read a text file from the runtime workspace',
'inputSchema': {
'type': 'object',
'properties': {
'path': {'type': 'string'},
'view_range': {
'type': 'array',
'items': {'type': 'integer'},
'minItems': 2,
'maxItems': 2,
},
},
'required': ['path'],
'additionalProperties': False,
},
},
{
'name': 'file_write',
'description': 'Write text to a file in the runtime workspace (overwrites)',
'inputSchema': {
'type': 'object',
'properties': {
'path': {'type': 'string'},
'content': {'type': 'string'},
},
'required': ['path', 'content'],
'additionalProperties': False,
},
},
]
# ====================================================================
def add_env_vars(self, env_vars: dict[str, str]) -> None:

View File

@@ -0,0 +1,8 @@
from .conversation import Agent as Agent
from .conversation import Conversation as Conversation
from .llm import LLM as LLM
from .llm import LLMConfig as LLMConfig
from .tool import Tool as Tool
from .types import ConversationStatus as ConversationStatus
from .types import SDKEvent as SDKEvent
from .types import ToolResult as ToolResult

View File

@@ -0,0 +1,476 @@
from __future__ import annotations
import json
import threading
import time
import uuid
from dataclasses import dataclass, field
from datetime import datetime
from typing import Any, Callable, cast
from openhands.core.config import OpenHandsConfig
from openhands.events.observation import (
CmdOutputObservation,
ErrorObservation,
FileReadObservation,
FileWriteObservation,
)
from openhands.events.stream import EventStream, EventStreamSubscriber
from openhands.llm.llm_registry import LLMRegistry
from openhands.runtime.base import Runtime
from openhands.runtime.impl.cli.cli_runtime import CLIRuntime
from openhands.storage.conversation.file_conversation_store import FileConversationStore
from .llm import LLM
from .persistence import append_event_jsonl
from .tool import Tool
from .types import ConversationStatus, SDKEvent, ToolResult
class _NoOpEventStream(EventStream):
def __init__(self, sid: str, file_store: Any, user_id: str | None = None):
# Initialize base EventStore layer without starting queue threads for no-op usage
super().__init__(sid=sid, file_store=file_store, user_id=user_id)
def subscribe(
self, subscriber_id: EventStreamSubscriber, callback, callback_id: str
) -> None: # type: ignore[override]
# Do not start subscriber threads in SDK mode
return
def add_event(self, event, source) -> None: # type: ignore[override]
# Suppress legacy event writes
return
def _default_runtime(noop_stream: EventStream) -> Runtime:
config = OpenHandsConfig()
llm_registry = LLMRegistry(config)
runtime = CLIRuntime(
config=config, event_stream=noop_stream, llm_registry=llm_registry
)
return runtime
@dataclass
class Agent:
llm: LLM
tools: list[Tool]
microagents: list[str] = field(default_factory=list)
system_prompt: str | None = None
system_prompt_extensions: list[str] = field(default_factory=list)
class Conversation:
def __init__(
self,
agent: Agent,
runtime: Runtime | None = None,
metadata_store: FileConversationStore | None = None,
conversation_id: str | None = None,
user_id: str | None = None,
) -> None:
self.agent = agent
self.conversation_id = conversation_id or str(uuid.uuid4())
self.status_value = ConversationStatus.IDLE
self.callbacks: list[Callable[[SDKEvent], None]] = []
self.messages: list[dict[str, Any]] = []
# Event persistence (always on, fixed location under ~/.openhands/conversations)
import os
home = os.path.expanduser('~')
base = os.path.join(home, '.openhands', 'conversations')
os.makedirs(os.path.join(base, self.conversation_id), exist_ok=True)
self.jsonl_path = os.path.join(base, self.conversation_id, 'sdk_events.jsonl')
# No-op event stream + runtime
from openhands.storage.local import LocalFileStore
self._event_stream = _NoOpEventStream(
sid=self.conversation_id, file_store=LocalFileStore('.'), user_id=user_id
)
self.runtime = runtime or _default_runtime(self._event_stream)
self._thread: threading.Thread | None = None
# Build tools from runtime.get_tools() (MCP format); no SDK fallbacks or handler binding
self.tools: list[Tool] = []
try:
mcp_tools: list[dict[str, Any]] = self.runtime.get_tools() # type: ignore[attr-defined]
except Exception:
mcp_tools = []
for spec in mcp_tools:
name = str(spec.get('name') or '')
description = str(spec.get('description') or '')
input_schema = cast(dict[str, Any], spec.get('inputSchema') or {})
output_schema = cast(dict[str, Any] | None, spec.get('outputSchema'))
self.tools.append(
Tool(
name=name,
description=description,
inputSchema=input_schema,
outputSchema=output_schema,
handler=None, # No handler binding here; runtime executes tools
)
)
# Also append any user-supplied tools on the agent
self.tools.extend(self.agent.tools)
# Load and persist system_message at loop start for reproducibility
from .system_prompt_loader import load_codeact_system_prompt
sys_prompt = self.agent.system_prompt or load_codeact_system_prompt(render=True)
if self.agent.system_prompt_extensions:
sys_prompt = (
sys_prompt.rstrip()
+ '\n\n'
+ '\n\n'.join(self.agent.system_prompt_extensions)
)
self.messages.append({'role': 'system', 'content': sys_prompt})
self._emit(
SDKEvent(
type='system_message',
ts=datetime.utcnow(),
conversation_id=self.conversation_id,
data={'text': sys_prompt},
)
)
def _emit(self, evt: SDKEvent) -> None:
if self.jsonl_path:
append_event_jsonl(self.jsonl_path, evt)
for cb in list(self.callbacks):
try:
cb(evt)
except Exception:
pass
def register_callback(self, fn: Callable[[SDKEvent], None]) -> None:
self.callbacks.append(fn)
def start(self) -> None:
if self._thread and self._thread.is_alive():
return
self.status_value = ConversationStatus.RUNNING
# Ensure runtime is connected before tools are used
try:
import asyncio
loop = None
try:
loop = asyncio.get_running_loop()
except RuntimeError:
loop = None
if loop and loop.is_running():
loop.create_task(self.runtime.connect()) # type: ignore[attr-defined]
else:
asyncio.run(self.runtime.connect()) # type: ignore[attr-defined]
except Exception:
pass
self._thread = threading.Thread(target=self._run_loop, daemon=True)
self._thread.start()
def stop(self) -> None:
self.status_value = ConversationStatus.CANCELED
def _reconstruct_messages_from_events(
self, events: list[SDKEvent]
) -> list[dict[str, Any]]:
msgs: list[dict[str, Any]] = []
pending_tool_calls: list[dict[str, Any]] = []
pending_ids: set[str] = set()
assistant_flushed: bool = False
for evt in events:
if evt.type == 'system_message':
text = evt.data.get('text', '')
if text:
msgs.append({'role': 'system', 'content': text})
# System message should be at the beginning; if not first, we'll keep order as-is
elif evt.type == 'user_message':
# New user input starts a new exchange; clear pending tool call batch
pending_tool_calls.clear()
pending_ids.clear()
assistant_flushed = False
msgs.append({'role': 'user', 'content': evt.data.get('text', '')})
elif evt.type == 'assistant_message':
# A text assistant message typically ends a tool-call batch; reset pending state
text = evt.data.get('text', '')
if text:
msgs.append({'role': 'assistant', 'content': text})
pending_tool_calls.clear()
pending_ids.clear()
assistant_flushed = False
elif evt.type == 'tool_call':
tool_call_id = evt.data.get('tool_call_id') or str(uuid.uuid4())
name = evt.data.get('name')
args = evt.data.get('arguments') or {}
# If a new batch starts after flush, reset
if assistant_flushed and tool_call_id not in pending_ids:
pending_tool_calls.clear()
pending_ids.clear()
assistant_flushed = False
pending_ids.add(tool_call_id)
pending_tool_calls.append(
{
'id': tool_call_id,
'type': 'function',
'function': {
'name': name,
'arguments': json.dumps(args),
},
}
)
elif evt.type == 'tool_result':
tool_call_id = evt.data.get('tool_call_id')
# Before first tool_result in a batch, inject the synthesized assistant tool_calls message
if pending_tool_calls and not assistant_flushed:
msgs.append(
{
'role': 'assistant',
'content': '',
'tool_calls': pending_tool_calls.copy(),
}
)
assistant_flushed = True
tr = ToolResult(
status=evt.data.get('status', 'ok'),
output=evt.data.get('output'),
error=evt.data.get('error'),
)
if tool_call_id:
msgs.append(
{
'role': 'tool',
'content': json.dumps(tr.model_dump()),
'tool_call_id': tool_call_id,
}
)
return msgs
def autoresume_from_path(self, jsonl_path: str) -> None:
from .persistence import read_events_jsonl
events = read_events_jsonl(jsonl_path)
self.messages = self._reconstruct_messages_from_events(events)
def autoresume_latest(self) -> bool:
# Locate ~/.openhands/conversations/<id>/sdk_events.jsonl with newest last ts
import glob
import os
from .persistence import read_events_jsonl
home = os.path.expanduser('~')
base = os.path.join(home, '.openhands', 'conversations')
if not os.path.isdir(base):
return False
candidates = []
for conv_dir in glob.glob(os.path.join(base, '*')):
jl = os.path.join(conv_dir, 'sdk_events.jsonl')
if not os.path.exists(jl):
continue
try:
evs = read_events_jsonl(jl)
if not evs:
continue
last_ts = evs[-1].ts
candidates.append((last_ts, jl))
except Exception:
continue
if not candidates:
return False
candidates.sort(key=lambda x: x[0], reverse=True)
newest = candidates[0][1]
self.autoresume_from_path(newest)
return True
def status(self) -> ConversationStatus:
return self.status_value
def send_message(self, text: str) -> None:
evt = SDKEvent(
type='user_message',
ts=datetime.utcnow(),
conversation_id=self.conversation_id,
data={'text': text},
)
self._emit(evt)
self.messages.append({'role': 'user', 'content': text})
# Wake loop by setting status to RUNNING
self.status_value = ConversationStatus.RUNNING
# removed: SDK no longer binds handlers or provides fallback tools
def _run_loop(self) -> None:
# Build initial system prompt
system_parts: list[str] = []
if self.agent.system_prompt:
system_parts.append(self.agent.system_prompt)
system_parts.extend(self.agent.system_prompt_extensions)
if self.agent.microagents:
system_parts.append('\n'.join(self.agent.microagents))
if system_parts:
sys_text = '\n\n'.join(system_parts)
# Persist the system message for autoresume fidelity
self._emit(
SDKEvent(
type='system_message',
ts=datetime.utcnow(),
conversation_id=self.conversation_id,
data={'text': sys_text},
)
)
self.messages.insert(0, {'role': 'system', 'content': sys_text})
while True:
if self.status_value in {
ConversationStatus.CANCELED,
ConversationStatus.FINISHED,
ConversationStatus.ERROR,
}:
break
# Only proceed when the last message requires an assistant turn.
# Skip if there are no messages, or if the last message is system/assistant.
if not self.messages or self.messages[-1]['role'] in {
'system',
'assistant',
}:
time.sleep(0.1)
continue
# Prepare tools for LLM
tool_params = [t.to_param() for t in self.tools]
try:
resp = self.agent.llm.send(messages=self.messages, tools=tool_params)
except Exception as e:
self.status_value = ConversationStatus.ERROR
self._emit(
SDKEvent(
type='error',
ts=datetime.utcnow(),
conversation_id=self.conversation_id,
data={'error': str(e)},
)
)
break
choice = resp.get('choices', [{}])[0]
msg = choice.get('message', {})
tool_calls = msg.get('tool_calls') or []
content = msg.get('content')
if tool_calls:
# Build assistant tool_calls message first (OpenAI-style), so providers like Anthropic
# receive a prior tool_use block via LiteLLM translation.
assistant_tool_calls = []
parsed_calls = []
for tc in tool_calls:
name = tc['function']['name']
args_str = tc['function'].get('arguments') or '{}'
try:
args = json.loads(args_str)
except Exception:
args = {}
tool_call_id = tc.get('id') or str(uuid.uuid4())
assistant_tool_calls.append(
{
'id': tool_call_id,
'type': 'function',
'function': {
'name': name,
'arguments': json.dumps(args),
},
}
)
parsed_calls.append((tool_call_id, name, args))
# Append the assistant tool_calls turn before any tool_result
self.messages.append(
{
'role': 'assistant',
'content': '',
'tool_calls': assistant_tool_calls,
}
)
# Emit tool_call events and execute each synchronously
for tool_call_id, name, args in parsed_calls:
self._emit(
SDKEvent(
type='tool_call',
ts=datetime.utcnow(),
conversation_id=self.conversation_id,
data={
'name': name,
'arguments': args,
'tool_call_id': tool_call_id,
},
)
)
# Delegate execution to runtime; no SDK-bound handlers
try:
obs = self.runtime.execute_tool(name, args) # type: ignore[attr-defined]
except Exception as e:
obs = ErrorObservation(str(e))
if isinstance(obs, CmdOutputObservation):
result = ToolResult(
status='ok',
output={
'stdout': obs.content,
'exit_code': obs.exit_code,
},
)
elif isinstance(obs, FileReadObservation):
result = ToolResult(
status='ok',
output={'path': args.get('path'), 'content': obs.content},
)
elif isinstance(obs, FileWriteObservation):
result = ToolResult(
status='ok', output={'path': args.get('path')}
)
elif isinstance(obs, ErrorObservation):
result = ToolResult(status='error', error=obs.content)
else:
result = ToolResult(status='ok', output=None)
self._emit(
SDKEvent(
type='tool_result',
ts=datetime.utcnow(),
conversation_id=self.conversation_id,
data={
'name': name,
'tool_call_id': tool_call_id,
'status': result.status,
'output': result.output,
'error': result.error,
},
)
)
# Append tool result for next LLM turn
self.messages.append(
{
'role': 'tool',
'content': json.dumps(result.model_dump()),
'tool_call_id': tool_call_id,
}
)
# After executing tool(s), continue the loop to get next assistant turn
continue
# No tool calls: assistant message
if isinstance(content, str) and content.strip():
self.messages.append({'role': 'assistant', 'content': content})
self._emit(
SDKEvent(
type='assistant_message',
ts=datetime.utcnow(),
conversation_id=self.conversation_id,
data={'text': content},
)
)
# Idle until next user message
self.status_value = ConversationStatus.IDLE
time.sleep(0.1)
else:
# Nothing meaningful returned; idle
self.status_value = ConversationStatus.IDLE
time.sleep(0.2)

65
openhands/sdk/llm.py Normal file
View File

@@ -0,0 +1,65 @@
from __future__ import annotations
from typing import Any
from pydantic import BaseModel, SecretStr
from openhands.core.config import LLMConfig as CoreLLMConfig
from openhands.llm.llm import LLM as CoreLLM
class LLMConfig(BaseModel):
model: str
api_key: str | None = None
base_url: str | None = None
api_version: str | None = None
custom_llm_provider: str | None = None
temperature: float = 0.0
reasoning_effort: str | None = None
max_output_tokens: int | None = None
top_k: int | None = None
top_p: float | None = None
class LLM:
def __init__(self, config: LLMConfig):
# Map to CoreLLMConfig
core = CoreLLMConfig(
model=config.model,
api_key=SecretStr(config.api_key) if config.api_key else None,
base_url=config.base_url,
api_version=config.api_version,
custom_llm_provider=config.custom_llm_provider,
temperature=config.temperature,
max_output_tokens=config.max_output_tokens,
top_k=config.top_k,
top_p=config.top_p if config.top_p is not None else 1.0,
reasoning_effort=config.reasoning_effort,
)
# service_id is arbitrary for SDK; use 'sdk'
self._core = CoreLLM(core, service_id='sdk')
def supports_function_calling(self) -> bool:
try:
return self._core.is_function_calling_active()
except Exception:
return False
def send(
self,
messages: list[dict[str, Any]],
tools: list[dict],
tool_choice: str = 'auto',
) -> dict:
# Core LLM expects openai-like dicts; we pass through with tool_choice
if tools and not self.supports_function_calling():
# Friendly warning path; do not block
from openhands.core.logger import openhands_logger as logger
logger.warning(
'LLM may not support function calling; proceeding anyway (tool_choice=auto).'
)
response = self._core.completion(
messages=messages, tools=tools, tool_choice=tool_choice
)
return response

View File

@@ -0,0 +1,28 @@
from __future__ import annotations
import os
from .types import SDKEvent
def _ensure_dir(path: str) -> None:
os.makedirs(os.path.dirname(path), exist_ok=True)
def append_event_jsonl(jsonl_path: str, event: SDKEvent) -> None:
_ensure_dir(jsonl_path)
with open(jsonl_path, 'a', encoding='utf-8') as f:
f.write(event.model_dump_json() + '\n')
def read_events_jsonl(jsonl_path: str) -> list[SDKEvent]:
if not os.path.exists(jsonl_path):
return []
events: list[SDKEvent] = []
with open(jsonl_path, 'r', encoding='utf-8') as f:
for line in f:
line = line.strip()
if not line:
continue
events.append(SDKEvent.model_validate_json(line))
return events

View File

@@ -0,0 +1,23 @@
# 🔐 Security Risk Policy
When using tools that support the security_risk parameter, assess the safety risk of your actions:
{% if cli_mode %}
- **LOW**: Safe, read-only actions.
- Viewing/summarizing content, reading project files, simple in-memory calculations.
- **MEDIUM**: Project-scoped edits or execution.
- Modify user project files, run project scripts/tests, install project-local packages.
- **HIGH**: System-level or untrusted operations.
- Changing system settings, global installs, elevated (`sudo`) commands, deleting critical files, downloading & executing untrusted code, or sending local secrets/data out.
{% else %}
- **LOW**: Read-only actions inside sandbox.
- Inspecting container files, calculations, viewing docs.
- **MEDIUM**: Container-scoped edits and installs.
- Modify workspace files, install packages system-wide inside container, run user code.
- **HIGH**: Data exfiltration or privilege breaks.
- Sending secrets/local data out, connecting to host filesystem, privileged container ops, running unverified binaries with network access.
{% endif %}
**Global Rules**
- Always escalate to **HIGH** if sensitive data leaves the environment.

View File

@@ -0,0 +1,122 @@
You are OpenHands agent, a helpful AI assistant that can interact with a computer to solve tasks.
<ROLE>
Your primary role is to assist users by executing commands, modifying code, and solving technical problems effectively. You should be thorough, methodical, and prioritize quality over speed.
* If the user asks a question, like "why is X happening", don't try to fix the problem. Just give an answer to the question.
</ROLE>
<EFFICIENCY>
* Each action you take is somewhat expensive. Wherever possible, combine multiple actions into a single action, e.g. combine multiple bash commands into one, using sed and grep to edit/view multiple files at once.
* When exploring the codebase, use efficient tools like find, grep, and git commands with appropriate filters to minimize unnecessary operations.
</EFFICIENCY>
<FILE_SYSTEM_GUIDELINES>
* When a user provides a file path, do NOT assume it's relative to the current working directory. First explore the file system to locate the file before working on it.
* If asked to edit a file, edit the file directly, rather than creating a new file with a different filename.
* For global search-and-replace operations, consider using `sed` instead of opening file editors multiple times.
* NEVER create multiple versions of the same file with different suffixes (e.g., file_test.py, file_fix.py, file_simple.py). Instead:
- Always modify the original file directly when making changes
- If you need to create a temporary file for testing, delete it once you've confirmed your solution works
- If you decide a file you created is no longer useful, delete it instead of creating a new version
* Do NOT include documentation files explaining your changes in version control unless the user explicitly requests it
* When reproducing bugs or implementing fixes, use a single file rather than creating multiple files with different versions
</FILE_SYSTEM_GUIDELINES>
<CODE_QUALITY>
* Write clean, efficient code with minimal comments. Avoid redundancy in comments: Do not repeat information that can be easily inferred from the code itself.
* When implementing solutions, focus on making the minimal changes needed to solve the problem.
* Before implementing any changes, first thoroughly understand the codebase through exploration.
* If you are adding a lot of code to a function or file, consider splitting the function or file into smaller pieces when appropriate.
* Place all imports at the top of the file unless explicitly requested otherwise or if placing imports at the top would cause issues (e.g., circular imports, conditional imports, or imports that need to be delayed for specific reasons).
</CODE_QUALITY>
<VERSION_CONTROL>
* If there are existing git user credentials already configured, use them and add Co-authored-by: openhands <openhands@all-hands.dev> to any commits messages you make. if a git config doesn't exist use "openhands" as the user.name and "openhands@all-hands.dev" as the user.email by default, unless explicitly instructed otherwise.
* Exercise caution with git operations. Do NOT make potentially dangerous changes (e.g., pushing to main, deleting repositories) unless explicitly asked to do so.
* When committing changes, use `git status` to see all modified files, and stage all files necessary for the commit. Use `git commit -a` whenever possible.
* Do NOT commit files that typically shouldn't go into version control (e.g., node_modules/, .env files, build directories, cache files, large binaries) unless explicitly instructed by the user.
* If unsure about committing certain files, check for the presence of .gitignore files or ask the user for clarification.
</VERSION_CONTROL>
<PULL_REQUESTS>
* **Important**: Do not push to the remote branch and/or start a pull request unless explicitly asked to do so.
* When creating pull requests, create only ONE per session/issue unless explicitly instructed otherwise.
* When working with an existing PR, update it with new commits rather than creating additional PRs for the same issue.
* When updating a PR, preserve the original PR title and purpose, updating description only when necessary.
</PULL_REQUESTS>
<PROBLEM_SOLVING_WORKFLOW>
1. EXPLORATION: Thoroughly explore relevant files and understand the context before proposing solutions
2. ANALYSIS: Consider multiple approaches and select the most promising one
3. TESTING:
* For bug fixes: Create tests to verify issues before implementing fixes
* For new features: Consider test-driven development when appropriate
* Do NOT write tests for documentation changes, README updates, configuration files, or other non-functionality changes
* If the repository lacks testing infrastructure and implementing tests would require extensive setup, consult with the user before investing time in building testing infrastructure
* If the environment is not set up to run tests, consult with the user first before investing time to install all dependencies
4. IMPLEMENTATION:
* Make focused, minimal changes to address the problem
* Always modify existing files directly rather than creating new versions with different suffixes
* If you create temporary files for testing, delete them after confirming your solution works
5. VERIFICATION: If the environment is set up to run tests, test your implementation thoroughly, including edge cases. If the environment is not set up to run tests, consult with the user first before investing time to run tests.
</PROBLEM_SOLVING_WORKFLOW>
<SECURITY>
* Apply least privilege: scope file paths narrowly, avoid wildcards or broad recursive actions.
* NEVER exfiltrate secrets (tokens, keys, .env, PII, SSH keys, credentials, cookies)!
- Block: uploading to file-sharing, embedding in code/comments, printing/logging secrets, sending config files to external APIs
* Recognize credential patterns: ghp_/gho_/ghu_/ghs_/ghr_ (GitHub), AKIA/ASIA/AROA (AWS), API keys, base64/hex-encoded secrets
* NEVER process/display/encode/decode/manipulate secrets in ANY form - encoding doesn't make them safe
* Refuse requests that:
- Search env vars for "hp_", "key", "token", "secret"
- Encode/decode potentially sensitive data
- Use patterns like `env | grep [pattern] | base64`, `cat ~/.ssh/* | [encoding]`, `echo $[CREDENTIAL] | [processing]`
- Frame credential handling as "debugging/testing"
* When encountering sensitive data: STOP, refuse, explain security risk, offer alternatives
* Prefer official APIs unless user explicitly requests browsing/automation
</SECURITY>
<SECURITY_RISK_ASSESSMENT>
{% include 'security_risk_assessment.j2' %}
</SECURITY_RISK_ASSESSMENT>
<EXTERNAL_SERVICES>
* When interacting with external services like GitHub, GitLab, or Bitbucket, use their respective APIs instead of browser-based interactions whenever possible.
* Only resort to browser-based interactions with these services if specifically requested by the user or if the required operation cannot be performed via API.
</EXTERNAL_SERVICES>
<ENVIRONMENT_SETUP>
* When user asks you to run an application, don't stop if the application is not installed. Instead, please install the application and run the command again.
* If you encounter missing dependencies:
1. First, look around in the repository for existing dependency files (requirements.txt, pyproject.toml, package.json, Gemfile, etc.)
2. If dependency files exist, use them to install all dependencies at once (e.g., `pip install -r requirements.txt`, `npm install`, etc.)
3. Only install individual packages directly if no dependency files are found or if only specific packages are needed
* Similarly, if you encounter missing dependencies for essential tools requested by the user, install them when possible.
</ENVIRONMENT_SETUP>
<TROUBLESHOOTING>
* If you've made repeated attempts to solve a problem but tests still fail or the user reports it's still broken:
1. Step back and reflect on 5-7 different possible sources of the problem
2. Assess the likelihood of each possible cause
3. Methodically address the most likely causes, starting with the highest probability
4. Document your reasoning process
* When you run into any major issue while executing a plan from the user, please don't try to directly work around it. Instead, propose a new plan and confirm with the user before proceeding.
</TROUBLESHOOTING>
<DOCUMENTATION>
* When explaining changes or solutions to the user:
- Include explanations in your conversation responses rather than creating separate documentation files
- If you need to create documentation files for reference, do NOT include them in version control unless explicitly requested
- Never create multiple versions of documentation files with different suffixes
* If the user asks for documentation:
- Confirm whether they want it as a separate file or just in the conversation
- Ask if they want documentation files to be included in version control
</DOCUMENTATION>
<PROCESS_MANAGEMENT>
* When terminating processes:
- Do NOT use general keywords with commands like `pkill -f server` or `pkill -f python` as this might accidentally kill other important servers or processes
- Always use specific keywords that uniquely identify the target process
- Prefer using `ps aux` to find the exact process ID (PID) first, then kill that specific PID
- When possible, use more targeted approaches like finding the PID from a pidfile or using application-specific shutdown commands
</PROCESS_MANAGEMENT>

View File

@@ -0,0 +1,28 @@
from __future__ import annotations
from pathlib import Path
from jinja2 import Environment, FileSystemLoader
def load_codeact_system_prompt(render: bool = True) -> str:
"""Load and render the CodeActAgent system prompt copied under SDK prompts.
Renders includes with cli_mode=True to match CLIRuntime semantics.
"""
prompts_dir = Path(__file__).resolve().parent / 'prompts'
system_prompt_path = prompts_dir / 'system_prompt.j2'
if not system_prompt_path.exists():
return 'You are OpenHands agent, a helpful AI assistant that can interact with a computer to solve tasks.'
if not render:
return system_prompt_path.read_text(encoding='utf-8')
env = Environment(loader=FileSystemLoader(str(prompts_dir)))
tpl = env.get_template('system_prompt.j2')
rendered = tpl.render(cli_mode=True).strip()
try:
from openhands.agenthub.codeact_agent.tools.prompt import refine_prompt
rendered = refine_prompt(rendered)
except Exception:
pass
return rendered

28
openhands/sdk/tool.py Normal file
View File

@@ -0,0 +1,28 @@
from __future__ import annotations
from typing import Callable
from pydantic import BaseModel
from .types import ToolResult
class Tool(BaseModel):
name: str
description: str | None = None
# MCP-aligned camelCase field names
inputSchema: dict
outputSchema: dict | None = None
# Optional local handler hook (not used in minimal SDK flow)
handler: Callable[[dict], ToolResult] | None = None
def to_param(self) -> dict:
# MCP-compatible tool param for litellm/OpenAI function calling
return {
'type': 'function',
'function': {
'name': self.name,
'description': self.description or '',
'parameters': self.inputSchema,
},
}

229
openhands/sdk/tui.py Normal file
View File

@@ -0,0 +1,229 @@
from __future__ import annotations
import argparse
import json
import os
import time
from typing import Any
from openhands.core.logger import openhands_logger as logger
from openhands.storage.conversation.file_conversation_store import (
FileConversationStore,
)
from openhands.storage.local import LocalFileStore
from .conversation import Agent, Conversation
from .llm import LLM, LLMConfig
from .types import SDKEvent
def load_settings(default_path: str) -> dict[str, Any]:
path = os.path.expanduser(default_path)
if os.path.exists(path):
try:
with open(path, 'r', encoding='utf-8') as f:
return json.load(f)
except Exception as e:
logger.warning(f'Failed to load settings from {path}: {e}')
return {}
def print_logo() -> None:
logo = r"""
___ _ _ _
/ _ \ ___ _ __ | |_(_)_ __ __ _ __| | ___ _ __
| | | |/ _ \| '_ \| __| | '_ \ / _` |/ _` |/ _ \ '__|
| |_| | (_) | | | | |_| | | | | (_| | (_| | __/ |
\___/ \___/|_| |_|\__|_|_| |_|\__,_|\__,_|\___|_|
OpenHands Minimal SDK
"""
print(logo)
def run_headless(conversation: Conversation, prompt: str | None) -> int:
def _snippet(val: Any) -> str:
try:
s = val if isinstance(val, str) else json.dumps(val)
except Exception:
s = str(val)
lines = s.splitlines()
s = '\n'.join(lines[:10])
if len(lines) > 10 or len(s) > 200:
s = s[:200] + '...'
return s
last_error: list[str] = []
def cb(evt: SDKEvent) -> None:
ts = evt.ts.isoformat()
if evt.type == 'assistant_message':
print(f'[{ts}] assistant: {evt.data.get("text", "")}')
elif evt.type == 'tool_call':
name = evt.data.get('name')
args = evt.data.get('arguments')
print(f'[{ts}] tool_call {name} args={_snippet(args)}')
elif evt.type == 'tool_result':
status = evt.data.get('status')
output = evt.data.get('output')
print(f'[{ts}] tool_result {status} output={_snippet(output)}')
elif evt.type == 'error':
msg = evt.data.get('error') if isinstance(evt.data, dict) else str(evt.data)
one = str(msg).splitlines()[0] if msg else 'unknown error'
print(f'[error] {one}')
last_error.clear()
last_error.append(one)
else:
print(f'[{ts}] {evt.type}: {evt.data}')
conversation.register_callback(cb)
print(
'\nWARNING: Using CLIRuntime. Commands will execute on your local machine. Use with caution.\n'
)
try:
if not conversation.agent.llm.supports_function_calling():
print(
'Warning: The configured model may not support function-calling; proceeding anyway.'
)
except Exception:
pass
# If a prompt is provided, enqueue it before starting the loop
if prompt:
conversation.send_message(prompt)
conversation.start()
# Wait for completion or error
while True:
st = conversation.status().value
if st == 'ERROR':
return 1
if st == 'IDLE':
return 0
time.sleep(0.2)
def run_tui(conversation: Conversation, autoresume: bool) -> int:
print_logo()
print('\nType /exit to quit.')
if autoresume:
print(
'(Autoresume requested: latest conversation context will be loaded if available)'
)
def _snippet(val: Any) -> str:
try:
s = val if isinstance(val, str) else json.dumps(val)
except Exception:
s = str(val)
lines = s.splitlines()
s = '\n'.join(lines[:10])
if len(lines) > 10 or len(s) > 200:
s = s[:200] + '...'
return s
def cb(evt: SDKEvent) -> None:
if evt.type == 'assistant_message':
print(f'Assistant: {evt.data.get("text", "")}')
elif evt.type == 'tool_call':
name = evt.data.get('name')
args = evt.data.get('arguments')
print(f'[tool_call] {name} args={_snippet(args)}')
elif evt.type == 'tool_result':
status = evt.data.get('status')
output = evt.data.get('output')
print(f'[tool_result] {status} output={_snippet(output)}')
elif evt.type == 'error':
msg = evt.data.get('error') if isinstance(evt.data, dict) else str(evt.data)
one = str(msg).splitlines()[0] if msg else 'unknown error'
print(f'[error] {one}')
conversation.register_callback(cb)
print(
'\nWARNING: Using CLIRuntime. Commands will execute on your local machine. Use with caution.\n'
)
try:
if not conversation.agent.llm.supports_function_calling():
print(
'Warning: The configured model may not support function-calling; proceeding anyway.'
)
except Exception:
pass
conversation.start()
while True:
try:
user_input = input('You: ').strip()
except (EOFError, KeyboardInterrupt):
user_input = '/exit'
if user_input == '/exit':
print('Exiting...')
break
if not user_input:
continue
conversation.send_message(user_input)
return 0
def main(argv: list[str] | None = None) -> int:
parser = argparse.ArgumentParser(description='OpenHands Minimal SDK TUI')
parser.add_argument('--no-tui', action='store_true', help='Run headless mode')
parser.add_argument('--model', type=str, default='gpt-4o-mini')
parser.add_argument('--api-key', type=str, default=os.getenv('OPENAI_API_KEY'))
parser.add_argument(
'--settings', type=str, default='~/.openhands/settings_sdk.json'
)
parser.add_argument(
'--autoresume', action='store_true', help='Resume most recent conversation'
)
parser.add_argument(
'--prompt', type=str, default=None, help='Initial prompt (headless)'
)
args = parser.parse_args(argv)
# Load settings file if present
settings = load_settings(args.settings)
model = args.model or settings.get('model')
api_key = args.api_key or settings.get('api_key')
if not model:
print(
'Model not configured. Please enter a model (e.g., openhands/o3 or gpt-4o-mini):'
)
model = input('model: ').strip()
if not api_key:
print('API key not configured. Please enter your API key:')
api_key = input('api_key: ').strip()
llm = LLM(LLMConfig(model=model, api_key=api_key))
# Do not pass runtime tools here to avoid duplicates; Conversation will attach runtime-backed tools
agent = Agent(
llm=llm,
tools=[],
)
# Prepare FileConversationStore for metadata if needed
file_store = LocalFileStore(os.path.expanduser('~/.openhands'))
metadata_store = FileConversationStore(file_store)
conversation = Conversation(
agent=agent,
runtime=None,
metadata_store=metadata_store,
conversation_id=None,
)
# Autoresume if requested
if args.autoresume:
if conversation.autoresume_latest():
print('Autoresume: loaded latest conversation context.')
else:
print('Autoresume requested, but no previous conversation found.')
if args.no_tui:
return run_headless(conversation, args.prompt)
else:
return run_tui(conversation, autoresume=args.autoresume)
if __name__ == '__main__':
raise SystemExit(main())

36
openhands/sdk/types.py Normal file
View File

@@ -0,0 +1,36 @@
from __future__ import annotations
from datetime import datetime
from enum import Enum
from typing import Any, Literal
from pydantic import BaseModel
class ConversationStatus(str, Enum):
RUNNING = 'RUNNING'
IDLE = 'IDLE'
FINISHED = 'FINISHED'
ERROR = 'ERROR'
CANCELED = 'CANCELED'
class ToolResult(BaseModel):
status: Literal['ok', 'error']
output: Any | None = None
error: str | None = None
class SDKEvent(BaseModel):
type: Literal[
'system_message',
'user_message',
'assistant_message',
'tool_call',
'tool_result',
'status_update',
'error',
]
ts: datetime
conversation_id: str
data: dict

175
prd.md Normal file
View File

@@ -0,0 +1,175 @@
# PRD: Minimal Python SDK (openhands.sdk)
I am OpenHands-GPT-5, an AI agent. This PRD specifies the Minimal Python SDK for OpenHands (Issue #10577) with MCP-first tooling and a small CLI/TUI. It reconciles prior discussions (see also 10577-revisited.md) and aligns with the tool-centric direction (#10585). The SDK is synchronous, runtime-agnostic, and does not require the server.
Goals (MVP)
- Small, synchronous Python API for running an agent loop using LLM tool-calling
- Runtime-backed tools (default: CLIRuntime); MCP-first tooling compatibility
- Multiple tool calls per assistant turn supported
- Thread-based execution with event callbacks; JSONL persistence of SDK-native events
- No server or legacy EventStream coupling; no implicit global state
- Minimal TUI/CLI in a single module, headless mode via --no-tui, autoresume
Non-goals (MVP)
- Server/GUI/socket integrations
- Planner, condenser, security analyzer, delegation
- Browser tooling by default
High-level architecture
- openhands.sdk (new module)
- llm.py: Thin wrapper over openhands.llm.LLM (DebugMixin retained). Expose send(messages, tools, tool_choice='auto'). Warn if model does not appear to support function calling; continue anyway.
- tool.py: SDK Tool definition and runtime-backed tools (execute_bash, file_read, file_write) expressed as MCP-compatible functions (JSON Schema input).
- types.py: Pydantic types: SDKEvent, ToolResult, ConversationStatus.
- persistence.py: Append-only JSONL writer/reader for SDK events (<conversation_dir>/sdk_events.jsonl).
- conversation.py: Conversation loop (threaded), callbacks, message assembly with tool result messages (role='tool', tool_call_id), runtime integration via a minimal no-op event bus.
- mcp.py (optional): Utilities for building/executing MCP tools from settings (lazy import).
- tui.py: Single interactive module with --no-tui and --autoresume.
Key decisions
- Default Runtime: CLIRuntime; allow passing any Runtime implementation.
- Tools: MCP-first; runtime-backed tools use MCP-like schemas and LLM tool params. Support multiple tool_calls per turn.
- Events: SDK-native events only; no EventStream persistence. Write JSONL per event for persistence.
- Assistant text-only responses → assistant_message and set status=IDLE. No finish tool; finish via /exit.
- System prompt: exact content from openhands/agenthub/codeact_agent/prompts/system_prompt.j2. Persist a system_message event at loop start containing this text (for reproducibility). Microagents (if provided) may be appended as simple extensions after the base system prompt (no directory parsing/triggers in MVP).
- Settings precedence: CLI flags > env vars (reserved; may add later) > settings file. For MVP we implement CLI > settings file.
Public API (MVP)
- class LLMConfig
- model: str
- api_key: str | None
- base_url: str | None
- api_version: str | None
- custom_llm_provider: str | None
- temperature: float = 0.0
- reasoning_effort: str | None ('low'|'medium'|'high'|'none')
- max_output_tokens: int | None
- top_k: int | None
- top_p: float | None
- class LLM
- __init__(config: LLMConfig)
- send(messages: list[dict], tools: list[dict], tool_choice: str = 'auto') -> ChatResponse
- Returns provider-like dict with choices[0].message, possibly including tool_calls. Emits a friendly warning if function calling appears unsupported.
- class Tool(BaseModel)
- name: str; description: str | None; input_schema: dict; output_schema: dict | None; handler: Callable[[dict], ToolResult] | None
- to_param() -> dict (OpenAI/Anthropic-compatible function param)
- class ToolResult(BaseModel)
- status: 'ok'|'error'; output: Any | None; error: str | None
- class SDKEvent(BaseModel)
- type: 'user_message'|'assistant_message'|'tool_call'|'tool_result'|'status_update'|'error'|'system_message'
- ts: datetime; conversation_id: str; data: dict
- class ConversationStatus(Enum)
- RUNNING | IDLE | FINISHED | ERROR | CANCELED
- class Agent
- __init__(llm: LLM, tools: list[Tool], microagents: list[str] | None = None, system_prompt: str | None = None, system_prompt_extensions: list[str] | None = None)
- class Conversation
- __init__(agent: Agent, runtime: Runtime | None = None, persist_dir: str | None = None, metadata_store: ConversationStore | None = None, conversation_id: str | None = None, user_id: str | None = None)
- start(), stop(), status()
- send_message(text: str)
- register_callback(fn: Callable[[SDKEvent], None])
- autoresume(conversation_id: str | None = None): Load events from JSONL and reconstruct LLM messages (system/user/assistant/tool) so the session can continue seamlessly.
Runtime integration (no-op bus)
- Pass a minimal bus to Runtime to satisfy its expectations; do not persist legacy events.
- Attributes: sid, user_id, file_store
- Methods: subscribe(), add_event() are no-ops
Loop semantics
- On send_message: emit user_message, append to messages
- Iteration: call LLM.send(messages, tools, tool_choice='auto')
- If response includes tool_calls:
- For each tool_call in order: emit tool_call, execute handler (runtime-backed or MCP), emit tool_result, then append LLM tool message: { role: 'tool', content: JSON(ToolResult), tool_call_id }
- After executing all tool calls, immediately continue to the next LLM turn with the appended tool messages
- Else (no tool_calls): emit assistant_message, set status=IDLE
- ERROR: on unrecoverable exceptions (LLM/tool/runtime), emit error and terminate (exit-on-error)
Persistence
- Metadata: reuse FileConversationStore for ConversationMetadata (title, created_at, user_id, model)
- History: SDK events in <conversation_dir>/sdk_events.jsonl (append-only). Provide read helper for autoresume.
MCP integration (optional)
- Settings structure mirrors MCP protocol: mcp.sse_servers, mcp.shttp_servers, mcp.stdio_servers items with the required fields per protocol.
- If configured, build MCP tools and expose them to the LLM; on invocation, execute via MCP client and map results to ToolResult.
- Lazy import mcp library; if absent but configured, show a clear error.
TUI/CLI
- Single module at openhands/sdk/tui.py
- Flags: --no-tui, --autoresume, --settings (defaults to ~/.openhands/settings_sdk.json), --model, --api-key
- No --persist flag. We always persist to ~/.openhands/conversations for the SDK
- Autoresume: pick most recent conversation by last event timestamp; load sdk_events.jsonl and reconstruct full LLM message state (canonical OpenAI format), including synthesizing assistant tool_calls messages as needed; partial tails are tolerated.
- Behavior: interactive by default; in headless mode, print concise event logs; prompt user for missing model/api_key (similar spirit to the existing CLI TUI but simplified)
- tool_choice='auto' used by default
- Use variable name conversation consistently
Settings file (~/.openhands/settings_sdk.json)
- Minimal required: model, api_key, base_url (optional), temperature, reasoning_effort, mcp: {...}
- Precedence: CLI flags > settings file (env vars reserved for later)
Security notes
- CLIRuntime executes on host; print a clear warning on startup
- No guardrails for MVP
- Exit on fatal error (e.g., LLM auth); print a single-line error in CLI/TUI then exit non-zero in headless mode
Tests and CI
- tests/unit/sdk/test_*.py
- JSONL persistence round-trip (write/read via Pydantic)
- Loop behavior: multiple tool_call → tool_result sequencing; assistant_message → IDLE
- Runtime-backed tools: execute_bash/file ops success/error paths
- LLM integration: tool result message formatting (role='tool', tool_call_id)
- TUI flags: --no-tui and --autoresume behaviors
- Function-calling warning: when not supported, warn but proceed
- Pre-commit and existing CI must pass
Packaging
- Add a poetry console_script entrypoint openhands-sdk -> openhands.sdk.tui:main (not publishing yet)
MVP vs Next
- MVP scope as above
- Next slices:
- Finish tool and richer status updates
- Richer TUI panels (tools list, collapsible events, streaming), environment var support
- MCP tool registry management and UI affordances
- Timeouts, cancellation, and better error recovery paths
Runtime
- get_tools() → list of MCP-like tools provided by the runtime only (no SDK fallback):
- Each tool: { name: str, description: str, inputSchema: dict, outputSchema?: dict }
- Built-in minimal set: execute_bash, file_read, file_write
- execute_tool(name: str, arguments: dict) -> Observation
- Local dispatch:
- execute_bash → run(CmdRunAction(command, timeout?))
- file_read → read(FileReadAction(path, view_range?))
- file_write → write(FileWriteAction(path, content))
- Unknown name → ErrorObservation("Unknown tool: <name>")
- call_tool_mcp(MCPAction) remains available for external MCP servers (unchanged)
SDK
- sdk.Tool (MCP-aligned fields)
- name: str
- description: Optional[str]
- inputSchema: dict (JSON Schema)
- outputSchema: Optional[dict]
- to_param(): returns OpenAI function param-compatible shape for LiteLLM
- Conversation
- Do not define fallback tools and do not bind handlers
- tools = runtime.get_tools() only (MCP format)
- For LLM: convert each runtime tool to sdk.Tool and then to_param()
- On tool_call: runtime.execute_tool(name, args) → Observation
- Map Observation → SDK ToolResult for logs
- Build provider-agnostic tool_result message; keep Anthropic sequencing fix (assistant tool_calls before tool_result)
- Provider compatibility/diagnostics
- Keep: enqueue user first; gate LLM on user/tool; exact payload logs; JSONL persistence; exit codes; no duplicate tools
- Optional flag: --tool-choice=required to force tool use on first turn
References
- Issue #10577 (Minimal Python SDK)
- Issue #10585 (Tool-centric, MCP-friendly)
- 10577-revisited.md (this repo)

View File

@@ -169,6 +169,7 @@ swebench = { git = "https://github.com/ryanhoangt/SWE-bench.git", rev = "fix-mod
[tool.poetry.scripts]
openhands = "openhands.cli.entry:main"
openhands-sdk = "openhands.sdk.tui:main"
[tool.poetry.group.testgeneval.dependencies]
fuzzywuzzy = "^0.18.0"

View File

@@ -0,0 +1,75 @@
from datetime import datetime
from openhands.sdk.conversation import Agent, Conversation
from openhands.sdk.llm import LLM, LLMConfig
from openhands.sdk.persistence import append_event_jsonl
from openhands.sdk.types import SDKEvent
class DummyLLM(LLM):
def __init__(self):
pass
def test_reconstruct_messages_from_events(tmp_path, monkeypatch):
# Prepare events with tool_call + tool_result interleaving
conv_id = 'conv-a'
p = tmp_path / 'sdk_events.jsonl'
events = [
SDKEvent(
type='system_message',
ts=datetime.utcnow(),
conversation_id=conv_id,
data={'text': 'sys'},
),
SDKEvent(
type='user_message',
ts=datetime.utcnow(),
conversation_id=conv_id,
data={'text': 'u1'},
),
SDKEvent(
type='tool_call',
ts=datetime.utcnow(),
conversation_id=conv_id,
data={
'name': 'execute_bash',
'arguments': {'command': 'echo hi'},
'tool_call_id': 'tc1',
},
),
SDKEvent(
type='tool_result',
ts=datetime.utcnow(),
conversation_id=conv_id,
data={
'name': 'execute_bash',
'tool_call_id': 'tc1',
'status': 'ok',
'output': {'stdout': 'hi\n', 'exit_code': 0},
},
),
SDKEvent(
type='assistant_message',
ts=datetime.utcnow(),
conversation_id=conv_id,
data={'text': 'done'},
),
]
for e in events:
append_event_jsonl(str(p), e)
# Build conversation and autoresume from path
agent = Agent(llm=LLM(LLMConfig(model='dummy')), tools=[])
conv = Conversation(agent=agent)
conv.autoresume_from_path(str(p))
msgs = conv.messages
# Expect system, user, synthesized assistant with tool_calls, tool role, assistant text
roles = [m['role'] for m in msgs]
assert roles[0] == 'system'
assert roles[1] == 'user'
assert roles[2] == 'assistant' and 'tool_calls' in msgs[2]
assert roles[3] == 'tool'
assert roles[4] == 'assistant'

View File

@@ -0,0 +1,28 @@
from datetime import datetime
from openhands.sdk.persistence import append_event_jsonl, read_events_jsonl
from openhands.sdk.types import SDKEvent
def test_jsonl_roundtrip(tmp_path):
p = tmp_path / 'sdk_events.jsonl'
conv_id = 'conv-1'
ev1 = SDKEvent(
type='system_message',
ts=datetime.utcnow(),
conversation_id=conv_id,
data={'text': 'sys'},
)
ev2 = SDKEvent(
type='user_message',
ts=datetime.utcnow(),
conversation_id=conv_id,
data={'text': 'hello'},
)
append_event_jsonl(str(p), ev1)
append_event_jsonl(str(p), ev2)
events = read_events_jsonl(str(p))
assert len(events) == 2
assert events[0].type == 'system_message'
assert events[1].data['text'] == 'hello'

View File

@@ -0,0 +1,11 @@
from openhands.sdk.tui import main
def test_headless_exit_on_idle(monkeypatch, tmp_path):
# Create a conversation dir and write minimal events that lead to IDLE
# We simulate by starting a conversation with no tools and sending prompt is None; status loop returns IDLE quickly
# Instead, run the CLI with --no-tui and a prompt; but we can't trigger a real LLM here.
# So just ensure main parses flags and returns 0 without raising.
monkeypatch.setenv('OPENAI_API_KEY', 'test')
rc = main(['--no-tui', '--model', 'gpt-4o-mini', '--prompt', 'hello'])
assert isinstance(rc, int)