mirror of
https://github.com/Pythagora-io/gpt-pilot.git
synced 2026-01-10 13:37:55 -05:00
482 lines
19 KiB
Python
482 lines
19 KiB
Python
import asyncio
|
|
import re
|
|
from difflib import unified_diff
|
|
from enum import Enum
|
|
from typing import Optional, Union
|
|
|
|
from pydantic import BaseModel, Field
|
|
|
|
from core.agents.base import BaseAgent
|
|
from core.agents.convo import AgentConvo
|
|
from core.agents.mixins import FileDiffMixin
|
|
from core.agents.response import AgentResponse, ResponseType
|
|
from core.config import CODE_MONKEY_AGENT_NAME, CODE_REVIEW_AGENT_NAME, DESCRIBE_FILES_AGENT_NAME
|
|
from core.config.actions import CM_UPDATE_FILES
|
|
from core.db.models import File
|
|
from core.llm.parser import JSONParser, OptionalCodeBlockParser
|
|
from core.log import get_logger
|
|
|
|
log = get_logger(__name__)
|
|
|
|
|
|
# Constant for indicating missing new line at the end of a file in a unified diff
|
|
NO_EOL = "\\ No newline at end of file"
|
|
|
|
# Regular expression pattern for matching hunk headers
|
|
PATCH_HEADER_PATTERN = re.compile(r"^@@ -(\d+),?(\d+)? \+(\d+),?(\d+)? @@")
|
|
|
|
# Maximum number of attempts to ask for review if it can't be parsed
|
|
MAX_REVIEW_RETRIES = 2
|
|
|
|
# Maximum number of code implementation attempts after which we accept the changes unconditionaly
|
|
MAX_CODING_ATTEMPTS = 3
|
|
|
|
|
|
class Decision(str, Enum):
|
|
APPLY = "apply"
|
|
IGNORE = "ignore"
|
|
REWORK = "rework"
|
|
|
|
|
|
class Hunk(BaseModel):
|
|
number: int = Field(description="Index of the hunk in the diff. Starts from 1.")
|
|
reason: str = Field(description="Reason for applying or ignoring this hunk, or for asking for it to be reworked.")
|
|
decision: Decision = Field(description="Whether to apply this hunk, rework, or ignore it.")
|
|
|
|
|
|
class ReviewChanges(BaseModel):
|
|
hunks: list[Hunk]
|
|
review_notes: str = Field(description="Additional review notes (optional, can be empty).")
|
|
|
|
|
|
class FileDescription(BaseModel):
|
|
summary: str = Field(
|
|
description="Detailed description summarized what the file is about, and what the major classes, functions, elements or other functionality is implemented."
|
|
)
|
|
references: list[str] = Field(
|
|
description="List of references the file imports or includes (only files local to the project), where each element specifies the project-relative path of the referenced file, including the file extension."
|
|
)
|
|
|
|
|
|
class CodeMonkey(FileDiffMixin, BaseAgent):
|
|
agent_type = "code-monkey"
|
|
display_name = "Code Monkey"
|
|
|
|
async def run(self) -> AgentResponse:
|
|
if self.prev_response and self.prev_response.type == ResponseType.DESCRIBE_FILES:
|
|
return await self.describe_files()
|
|
else:
|
|
data = await self.implement_changes()
|
|
code_review_done = False
|
|
while not code_review_done:
|
|
review_response = await self.run_code_review(data)
|
|
if isinstance(review_response, AgentResponse):
|
|
return review_response
|
|
data = await self.implement_changes(review_response)
|
|
|
|
async def implement_changes(self, data: Optional[dict] = None) -> dict:
|
|
file_name = self.step["save_file"]["path"]
|
|
|
|
current_file = await self.state_manager.get_file_by_path(file_name)
|
|
file_content = current_file.content.content if current_file else ""
|
|
|
|
task = self.current_state.current_task
|
|
|
|
if data is not None:
|
|
attempt = data["attempt"] + 1
|
|
feedback = data["feedback"]
|
|
log.debug(f"Fixing file {file_name} after review feedback: {feedback} ({attempt}. attempt)")
|
|
await self.ui.send_file_status(file_name, "reworking", source=self.ui_source)
|
|
else:
|
|
log.debug(f"Implementing file {file_name}")
|
|
if data is None:
|
|
await self.ui.send_file_status(
|
|
file_name, "updating" if file_content else "creating", source=self.ui_source
|
|
)
|
|
else:
|
|
await self.ui.send_file_status(file_name, "reworking", source=self.ui_source)
|
|
self.next_state.action = CM_UPDATE_FILES
|
|
attempt = 1
|
|
feedback = None
|
|
|
|
iterations = self.current_state.iterations
|
|
user_feedback = None
|
|
user_feedback_qa = None
|
|
llm = self.get_llm(CODE_MONKEY_AGENT_NAME)
|
|
|
|
if iterations:
|
|
last_iteration = iterations[-1]
|
|
instructions = last_iteration.get("description")
|
|
user_feedback = last_iteration.get("user_feedback")
|
|
user_feedback_qa = last_iteration.get("user_feedback_qa")
|
|
else:
|
|
instructions = self.current_state.current_task["instructions"]
|
|
|
|
convo = AgentConvo(self).template(
|
|
"implement_changes",
|
|
file_name=file_name,
|
|
file_content=file_content,
|
|
instructions=instructions,
|
|
user_feedback=user_feedback,
|
|
user_feedback_qa=user_feedback_qa,
|
|
)
|
|
if feedback:
|
|
convo.assistant(f"```\n{data['new_content']}\n```\n").template(
|
|
"review_feedback",
|
|
content=data["approved_content"],
|
|
original_content=file_content,
|
|
rework_feedback=feedback,
|
|
)
|
|
|
|
response: str = await llm(convo, temperature=0, parser=OptionalCodeBlockParser())
|
|
# FIXME: provide a counter here so that we don't have an endless loop here
|
|
return {
|
|
"path": file_name,
|
|
"instructions": task["instructions"],
|
|
"old_content": file_content,
|
|
"new_content": response,
|
|
"attempt": attempt,
|
|
}
|
|
|
|
async def describe_files(self) -> AgentResponse:
|
|
tasks = []
|
|
to_describe = {
|
|
file.path: file.content.content for file in self.current_state.files if not file.meta.get("description")
|
|
}
|
|
|
|
for file in self.next_state.files:
|
|
content = to_describe.get(file.path)
|
|
if content is None:
|
|
continue
|
|
|
|
if content == "":
|
|
file.meta = {
|
|
**file.meta,
|
|
"description": "Empty file",
|
|
"references": [],
|
|
}
|
|
continue
|
|
|
|
tasks.append(self.describe_file(file, content))
|
|
|
|
await asyncio.gather(*tasks)
|
|
return AgentResponse.done(self)
|
|
|
|
async def describe_file(self, file: File, content: str):
|
|
"""
|
|
Describes a file by sending it to the LLM agent and then updating the file's metadata in the database.
|
|
"""
|
|
llm = self.get_llm(DESCRIBE_FILES_AGENT_NAME)
|
|
log.debug(f"Describing file {file.path}")
|
|
convo = (
|
|
AgentConvo(self)
|
|
.template(
|
|
"describe_file",
|
|
path=file.path,
|
|
content=content,
|
|
)
|
|
.require_schema(FileDescription)
|
|
)
|
|
llm_response: FileDescription = await llm(convo, parser=JSONParser(spec=FileDescription))
|
|
|
|
file.meta = {
|
|
**file.meta,
|
|
"description": llm_response.summary,
|
|
"references": llm_response.references,
|
|
}
|
|
|
|
# ------------------------------
|
|
# CODE REVIEW
|
|
# ------------------------------
|
|
|
|
async def run_code_review(self, data: Optional[dict]) -> Union[AgentResponse, dict]:
|
|
await self.ui.send_file_status(data["path"], "reviewing", source=self.ui_source)
|
|
if (
|
|
data is not None
|
|
and not data["old_content"]
|
|
or data["new_content"] == data["old_content"]
|
|
or data["attempt"] >= MAX_CODING_ATTEMPTS
|
|
):
|
|
# we always auto-accept new files and unchanged files, or if we've tried too many times
|
|
return await self.accept_changes(data["path"], data["old_content"], data["new_content"])
|
|
|
|
approved_content, feedback = await self.review_change(
|
|
data["path"],
|
|
data["instructions"],
|
|
data["old_content"],
|
|
data["new_content"],
|
|
)
|
|
if feedback:
|
|
return {
|
|
"new_content": data["new_content"],
|
|
"approved_content": approved_content,
|
|
"feedback": feedback,
|
|
"attempt": data["attempt"],
|
|
}
|
|
else:
|
|
return await self.accept_changes(data["path"], data["old_content"], approved_content)
|
|
|
|
async def accept_changes(self, file_path: str, old_content: str, new_content: str) -> AgentResponse:
|
|
await self.ui.send_file_status(file_path, "done", source=self.ui_source)
|
|
|
|
n_new_lines, n_del_lines = self.get_line_changes(old_content, new_content)
|
|
await self.ui.generate_diff(
|
|
file_path, old_content, new_content, n_new_lines, n_del_lines, source=self.ui_source
|
|
)
|
|
|
|
await self.state_manager.save_file(file_path, new_content)
|
|
self.step["save_file"]["content"] = new_content
|
|
self.next_state.complete_step("save_file")
|
|
|
|
input_required = self.state_manager.get_input_required(new_content, file_path)
|
|
if input_required:
|
|
return AgentResponse.input_required(
|
|
self,
|
|
[{"file": file_path, "line": line} for line in input_required],
|
|
)
|
|
else:
|
|
return AgentResponse.done(self)
|
|
|
|
def _get_task_convo(self) -> AgentConvo:
|
|
# FIXME: Current prompts reuse conversation from the developer so we have to resort to this
|
|
task = self.current_state.current_task
|
|
current_task_index = self.current_state.tasks.index(task)
|
|
|
|
related_api_endpoints = task.get("related_api_endpoints", [])
|
|
# TODO: Temp fix for old projects
|
|
if not (
|
|
related_api_endpoints
|
|
and len(related_api_endpoints) > 0
|
|
and all(isinstance(api, dict) and "endpoint" in api for api in related_api_endpoints)
|
|
):
|
|
related_api_endpoints = []
|
|
convo = AgentConvo(self).template(
|
|
"breakdown",
|
|
task=task,
|
|
iteration=None,
|
|
current_task_index=current_task_index,
|
|
related_api_endpoints=related_api_endpoints,
|
|
)
|
|
# TODO: We currently show last iteration to the code monkey; we might need to show the task
|
|
# breakdown and all the iterations instead? To think about when refactoring prompts
|
|
if self.current_state.iterations:
|
|
convo.assistant(self.current_state.iterations[-1]["description"])
|
|
else:
|
|
convo.assistant(self.current_state.current_task["instructions"])
|
|
return convo
|
|
|
|
async def review_change(
|
|
self, file_name: str, instructions: str, old_content: str, new_content: str
|
|
) -> tuple[str, str]:
|
|
"""
|
|
Review changes that were applied to the file.
|
|
|
|
This asks the LLM to act as a PR reviewer and for each part (hunk) of the
|
|
diff, decide if it should be applied (kept) or ignored (removed from the PR).
|
|
|
|
:param file_name: name of the file being modified
|
|
:param instructions: instructions for the reviewer
|
|
:param old_content: old file content
|
|
:param new_content: new file content (with proposed changes)
|
|
:return: tuple with file content update with approved changes, and review feedback
|
|
|
|
Diff hunk explanation: https://www.gnu.org/software/diffutils/manual/html_node/Hunks.html
|
|
"""
|
|
|
|
hunks = self.get_diff_hunks(file_name, old_content, new_content)
|
|
|
|
llm = self.get_llm(CODE_REVIEW_AGENT_NAME)
|
|
convo = (
|
|
self._get_task_convo()
|
|
.template(
|
|
"review_changes",
|
|
instructions=instructions,
|
|
file_name=file_name,
|
|
old_content=old_content,
|
|
hunks=hunks,
|
|
)
|
|
.require_schema(ReviewChanges)
|
|
)
|
|
llm_response: ReviewChanges = await llm(convo, temperature=0, parser=JSONParser(ReviewChanges))
|
|
|
|
for i in range(MAX_REVIEW_RETRIES):
|
|
reasons = {}
|
|
ids_to_apply = set()
|
|
ids_to_ignore = set()
|
|
ids_to_rework = set()
|
|
for hunk in llm_response.hunks:
|
|
reasons[hunk.number - 1] = hunk.reason
|
|
if hunk.decision == "apply":
|
|
ids_to_apply.add(hunk.number - 1)
|
|
elif hunk.decision == "ignore":
|
|
ids_to_ignore.add(hunk.number - 1)
|
|
elif hunk.decision == "rework":
|
|
ids_to_rework.add(hunk.number - 1)
|
|
|
|
n_hunks = len(hunks)
|
|
n_review_hunks = len(reasons)
|
|
if n_review_hunks == n_hunks:
|
|
break
|
|
elif n_review_hunks < n_hunks:
|
|
error = "Not all hunks have been reviewed. Please review all hunks and add 'apply', 'ignore' or 'rework' decision for each."
|
|
elif n_review_hunks > n_hunks:
|
|
error = f"Your review contains more hunks ({n_review_hunks}) than in the original diff ({n_hunks}). Note that one hunk may have multiple changed lines."
|
|
|
|
# Max two retries; if the reviewer still hasn't reviewed all hunks, we'll just use the entire new content
|
|
convo.assistant(llm_response.model_dump_json()).user(error)
|
|
llm_response = await llm(convo, parser=JSONParser(ReviewChanges))
|
|
else:
|
|
return new_content, None
|
|
|
|
hunks_to_apply = [h for i, h in enumerate(hunks) if i in ids_to_apply]
|
|
diff_log = f"--- {file_name}\n+++ {file_name}\n" + "\n".join(hunks_to_apply)
|
|
|
|
hunks_to_rework = [(i, h) for i, h in enumerate(hunks) if i in ids_to_rework]
|
|
review_log = (
|
|
"\n\n".join([f"## Change\n```{hunk}```\nReviewer feedback:\n{reasons[i]}" for (i, hunk) in hunks_to_rework])
|
|
+ "\n\nReview notes:\n"
|
|
+ llm_response.review_notes
|
|
)
|
|
|
|
if len(hunks_to_apply) == len(hunks):
|
|
log.info(f"Applying entire change to {file_name}")
|
|
return new_content, None
|
|
|
|
elif len(hunks_to_apply) == 0:
|
|
if hunks_to_rework:
|
|
log.info(f"Requesting rework for {len(hunks_to_rework)} changes to {file_name} (0 hunks to apply)")
|
|
return old_content, review_log
|
|
else:
|
|
# If everything can be safely ignored, it's probably because the files already implement the changes
|
|
# from previous tasks (which can happen often). Insisting on a change here is likely to cause problems.
|
|
log.info(f"Rejecting entire change to {file_name} with reason: {llm_response.review_notes}")
|
|
return old_content, None
|
|
|
|
log.debug(f"Applying code change to {file_name}:\n{diff_log}")
|
|
new_content = self.apply_diff(file_name, old_content, hunks_to_apply, new_content)
|
|
if hunks_to_rework:
|
|
log.info(f"Requesting further rework for {len(hunks_to_rework)} changes to {file_name}")
|
|
return new_content, review_log
|
|
else:
|
|
return new_content, None
|
|
|
|
@staticmethod
|
|
def get_diff_hunks(file_name: str, old_content: str, new_content: str) -> list[str]:
|
|
"""
|
|
Get the diff between two files.
|
|
|
|
This uses Python difflib to produce an unified diff, then splits
|
|
it into hunks that will be separately reviewed by the reviewer.
|
|
|
|
:param file_name: name of the file being modified
|
|
:param old_content: old file content
|
|
:param new_content: new file content
|
|
:return: change hunks from the unified diff
|
|
"""
|
|
from_name = "old_" + file_name
|
|
to_name = "to_" + file_name
|
|
from_lines = old_content.splitlines(keepends=True)
|
|
to_lines = new_content.splitlines(keepends=True)
|
|
diff_gen = unified_diff(from_lines, to_lines, fromfile=from_name, tofile=to_name)
|
|
diff_txt = "".join(diff_gen)
|
|
|
|
hunks = re.split(r"\n@@", diff_txt, re.MULTILINE)
|
|
result = []
|
|
for i, h in enumerate(hunks):
|
|
# Skip the prologue (file names)
|
|
if i == 0:
|
|
continue
|
|
txt = h.splitlines()
|
|
txt[0] = "@@" + txt[0]
|
|
result.append("\n".join(txt))
|
|
return result
|
|
|
|
def apply_diff(self, file_name: str, old_content: str, hunks: list[str], fallback: str):
|
|
"""
|
|
Apply the diff to the original file content.
|
|
|
|
This uses the internal `_apply_patch` method to apply the
|
|
approved diff hunks to the original file content.
|
|
|
|
If patch apply fails, the fallback is the full new file content
|
|
with all the changes applied (as if the reviewer approved everythng).
|
|
|
|
:param file_name: name of the file being modified
|
|
:param old_content: old file content
|
|
:param hunks: change hunks from the unified diff
|
|
:param fallback: proposed new file content (with all the changes applied)
|
|
"""
|
|
diff = (
|
|
"\n".join(
|
|
[
|
|
f"--- {file_name}",
|
|
f"+++ {file_name}",
|
|
]
|
|
+ hunks
|
|
)
|
|
+ "\n"
|
|
)
|
|
try:
|
|
fixed_content = self._apply_patch(old_content, diff)
|
|
except Exception as e:
|
|
# This should never happen but if it does, just use the new version from
|
|
# the LLM and hope for the best
|
|
print(f"Error applying diff: {e}; hoping all changes are valid")
|
|
return fallback
|
|
|
|
return fixed_content
|
|
|
|
# Adapted from https://gist.github.com/noporpoise/16e731849eb1231e86d78f9dfeca3abc (Public Domain)
|
|
@staticmethod
|
|
def _apply_patch(original: str, patch: str, revert: bool = False):
|
|
"""
|
|
Apply a patch to a string to recover a newer version of the string.
|
|
|
|
:param original: The original string.
|
|
:param patch: The patch to apply.
|
|
:param revert: If True, treat the original string as the newer version and recover the older string.
|
|
:return: The updated string after applying the patch.
|
|
"""
|
|
original_lines = original.splitlines(True)
|
|
patch_lines = patch.splitlines(True)
|
|
|
|
updated_text = ""
|
|
index_original = start_line = 0
|
|
|
|
# Choose which group of the regex to use based on the revert flag
|
|
match_index, line_sign = (1, "+") if not revert else (3, "-")
|
|
|
|
# Skip header lines of the patch
|
|
while index_original < len(patch_lines) and patch_lines[index_original].startswith(("---", "+++")):
|
|
index_original += 1
|
|
|
|
while index_original < len(patch_lines):
|
|
match = PATCH_HEADER_PATTERN.match(patch_lines[index_original])
|
|
if not match:
|
|
raise Exception("Bad patch -- regex mismatch [line " + str(index_original) + "]")
|
|
|
|
line_number = int(match.group(match_index)) - 1 + (match.group(match_index + 1) == "0")
|
|
|
|
if start_line > line_number or line_number > len(original_lines):
|
|
raise Exception("Bad patch -- bad line number [line " + str(index_original) + "]")
|
|
|
|
updated_text += "".join(original_lines[start_line:line_number])
|
|
start_line = line_number
|
|
index_original += 1
|
|
|
|
while index_original < len(patch_lines) and patch_lines[index_original][0] != "@":
|
|
if index_original + 1 < len(patch_lines) and patch_lines[index_original + 1][0] == "\\":
|
|
line_content = patch_lines[index_original][:-1]
|
|
index_original += 2
|
|
else:
|
|
line_content = patch_lines[index_original]
|
|
index_original += 1
|
|
|
|
if line_content:
|
|
if line_content[0] == line_sign or line_content[0] == " ":
|
|
updated_text += line_content[1:]
|
|
start_line += line_content[0] != line_sign
|
|
|
|
updated_text += "".join(original_lines[start_line:])
|
|
return updated_text
|