mirror of
https://github.com/All-Hands-AI/OpenHands.git
synced 2026-01-09 14:57:59 -05:00
173 lines
5.8 KiB
Python
173 lines
5.8 KiB
Python
from __future__ import annotations
|
|
|
|
import json
|
|
from datetime import datetime
|
|
from typing import Any
|
|
|
|
from integrations.solvability.models.difficulty_level import DifficultyLevel
|
|
from integrations.solvability.models.report import SolvabilityReport
|
|
from integrations.solvability.prompts import load_prompt
|
|
from pydantic import BaseModel, Field
|
|
|
|
from openhands.llm import LLM
|
|
|
|
|
|
class SolvabilitySummary(BaseModel):
|
|
"""Summary of the solvability analysis in human-readable format."""
|
|
|
|
score: float
|
|
"""
|
|
Solvability score indicating the likelihood of the issue being solvable.
|
|
"""
|
|
|
|
summary: str
|
|
"""
|
|
The executive summary content generated by the LLM.
|
|
"""
|
|
|
|
actionable_feedback: str
|
|
"""
|
|
Actionable feedback content generated by the LLM.
|
|
"""
|
|
|
|
positive_feedback: str
|
|
"""
|
|
Positive feedback content generated by the LLM, highlighting what is good about the issue.
|
|
"""
|
|
|
|
prompt_tokens: int
|
|
"""
|
|
Number of prompt tokens used in the API call to generate the summary.
|
|
"""
|
|
|
|
completion_tokens: int
|
|
"""
|
|
Number of completion tokens used in the API call to generate the summary.
|
|
"""
|
|
|
|
response_latency: float
|
|
"""
|
|
Response latency of the API call to generate the summary.
|
|
"""
|
|
|
|
created_at: datetime = Field(default_factory=datetime.now)
|
|
"""
|
|
Datetime when the summary was created.
|
|
"""
|
|
|
|
@staticmethod
|
|
def tool_description() -> dict[str, Any]:
|
|
"""Get the tool description for the LLM."""
|
|
return {
|
|
'type': 'function',
|
|
'function': {
|
|
'name': 'solvability_summary',
|
|
'description': 'Generate a human-readable summary of the solvability analysis.',
|
|
'parameters': {
|
|
'type': 'object',
|
|
'properties': {
|
|
'summary': {
|
|
'type': 'string',
|
|
'description': 'A high-level (at most two sentences) summary of the solvability report.',
|
|
},
|
|
'actionable_feedback': {
|
|
'type': 'string',
|
|
'description': (
|
|
'Bullet list of 1-3 pieces of actionable feedback on how the user can address the lowest scoring relevant features.'
|
|
),
|
|
},
|
|
'positive_feedback': {
|
|
'type': 'string',
|
|
'description': (
|
|
'Bullet list of 1-3 pieces of positive feedback on the issue, highlighting what is good about it.'
|
|
),
|
|
},
|
|
},
|
|
'required': ['summary', 'actionable_feedback'],
|
|
},
|
|
},
|
|
}
|
|
|
|
@staticmethod
|
|
def tool_choice() -> dict[str, Any]:
|
|
"""Get the tool choice for the LLM."""
|
|
return {
|
|
'type': 'function',
|
|
'function': {
|
|
'name': 'solvability_summary',
|
|
},
|
|
}
|
|
|
|
@staticmethod
|
|
def system_message() -> dict[str, Any]:
|
|
"""Get the system message for the LLM."""
|
|
return {
|
|
'role': 'system',
|
|
'content': load_prompt('summary_system_message'),
|
|
}
|
|
|
|
@staticmethod
|
|
def user_message(report: SolvabilityReport) -> dict[str, Any]:
|
|
"""Get the user message for the LLM."""
|
|
return {
|
|
'role': 'user',
|
|
'content': load_prompt(
|
|
'summary_user_message',
|
|
report=report.model_dump(),
|
|
difficulty_level=DifficultyLevel.from_score(report.score).value[0],
|
|
),
|
|
}
|
|
|
|
@staticmethod
|
|
def from_report(report: SolvabilityReport, llm: LLM) -> SolvabilitySummary:
|
|
"""Create a SolvabilitySummary from a SolvabilityReport."""
|
|
import time
|
|
|
|
start_time = time.time()
|
|
response = llm.completion(
|
|
messages=[
|
|
SolvabilitySummary.system_message(),
|
|
SolvabilitySummary.user_message(report),
|
|
],
|
|
tools=[SolvabilitySummary.tool_description()],
|
|
tool_choice=SolvabilitySummary.tool_choice(),
|
|
)
|
|
response_latency = time.time() - start_time
|
|
|
|
# Grab the arguments from the forced function call
|
|
arguments = json.loads(
|
|
response.choices[0].message.tool_calls[0].function.arguments
|
|
)
|
|
|
|
return SolvabilitySummary(
|
|
# The score is copied directly from the report
|
|
score=report.score,
|
|
# Performance and usage metrics are pulled from the response
|
|
prompt_tokens=response.usage.prompt_tokens,
|
|
completion_tokens=response.usage.completion_tokens,
|
|
response_latency=response_latency,
|
|
# Every other field should be taken from the forced function call
|
|
**arguments,
|
|
)
|
|
|
|
def format_as_markdown(self) -> str:
|
|
"""Format the summary content as Markdown."""
|
|
# Convert score to difficulty level enum
|
|
difficulty_level = DifficultyLevel.from_score(self.score)
|
|
|
|
# Create the main difficulty display
|
|
result = f'{difficulty_level.format_display()}\n\n{self.summary}'
|
|
|
|
# If not easy, show the three features with lowest importance scores
|
|
if difficulty_level != DifficultyLevel.EASY:
|
|
# Add dropdown with lowest importance features
|
|
result += '\n\nYou can make the issue easier to resolve by addressing these concerns in the conversation:\n\n'
|
|
result += self.actionable_feedback
|
|
|
|
# If the difficulty isn't hard, add some positive feedback
|
|
if difficulty_level != DifficultyLevel.HARD:
|
|
result += '\n\nPositive feedback:\n\n'
|
|
result += self.positive_feedback
|
|
|
|
return result
|