mirror of
https://github.com/Pythagora-io/gpt-pilot.git
synced 2026-01-08 12:53:50 -05:00
This is a complete rewrite of the GPT Pilot core, from the ground up, making the agentic architecture front and center, and also fixing some long-standing problems with the database architecture that weren't feasible to solve without breaking compatibility. As the database structure and config file syntax have changed, we have automatic imports for projects and current configs, see the README.md file for details. This also relicenses the project to FSL-1.1-MIT license.
329 lines
14 KiB
Python
329 lines
14 KiB
Python
import re
|
|
from difflib import unified_diff
|
|
from enum import Enum
|
|
|
|
from pydantic import BaseModel, Field
|
|
|
|
from core.agents.base import BaseAgent
|
|
from core.agents.convo import AgentConvo
|
|
from core.agents.response import AgentResponse
|
|
from core.llm.parser import JSONParser
|
|
from core.log import get_logger
|
|
|
|
log = get_logger(__name__)
|
|
|
|
|
|
# Constant for indicating missing new line at the end of a file in a unified diff
|
|
NO_EOL = "\\ No newline at end of file"
|
|
|
|
# Regular expression pattern for matching hunk headers
|
|
PATCH_HEADER_PATTERN = re.compile(r"^@@ -(\d+),?(\d+)? \+(\d+),?(\d+)? @@")
|
|
|
|
# Maximum number of attempts to ask for review if it can't be parsed
|
|
MAX_REVIEW_RETRIES = 2
|
|
|
|
# Maximum number of code implementation attempts after which we accept the changes unconditionaly
|
|
MAX_CODING_ATTEMPTS = 3
|
|
|
|
|
|
class Decision(str, Enum):
|
|
APPLY = "apply"
|
|
IGNORE = "ignore"
|
|
REWORK = "rework"
|
|
|
|
|
|
class Hunk(BaseModel):
|
|
number: int = Field(description="Index of the hunk in the diff. Starts from 1.")
|
|
reason: str = Field(description="Reason for applying or ignoring this hunk, or for asking for it to be reworked.")
|
|
decision: Decision = Field(description="Whether to apply this hunk, rework, or ignore it.")
|
|
|
|
|
|
class ReviewChanges(BaseModel):
|
|
hunks: list[Hunk]
|
|
review_notes: str = Field(description="Additional review notes (optional, can be empty).")
|
|
|
|
|
|
class CodeReviewer(BaseAgent):
|
|
agent_type = "code-reviewer"
|
|
display_name = "Code Reviewer"
|
|
|
|
async def run(self) -> AgentResponse:
|
|
if (
|
|
not self.prev_response.data["old_content"]
|
|
or self.prev_response.data["new_content"] == self.prev_response.data["old_content"]
|
|
or self.prev_response.data["attempt"] >= MAX_CODING_ATTEMPTS
|
|
):
|
|
# we always auto-accept new files and unchanged files, or if we've tried too many times
|
|
return await self.accept_changes(self.prev_response.data["path"], self.prev_response.data["new_content"])
|
|
|
|
approved_content, feedback = await self.review_change(
|
|
self.prev_response.data["path"],
|
|
self.prev_response.data["instructions"],
|
|
self.prev_response.data["old_content"],
|
|
self.prev_response.data["new_content"],
|
|
)
|
|
if feedback:
|
|
return AgentResponse.code_review_feedback(
|
|
self,
|
|
new_content=self.prev_response.data["new_content"],
|
|
approved_content=approved_content,
|
|
feedback=feedback,
|
|
attempt=self.prev_response.data["attempt"],
|
|
)
|
|
else:
|
|
return await self.accept_changes(self.prev_response.data["path"], approved_content)
|
|
|
|
async def accept_changes(self, path: str, content: str) -> AgentResponse:
|
|
await self.state_manager.save_file(path, content)
|
|
self.next_state.complete_step()
|
|
|
|
input_required = self.state_manager.get_input_required(content)
|
|
if input_required:
|
|
return AgentResponse.input_required(
|
|
self,
|
|
[{"file": path, "line": line} for line in input_required],
|
|
)
|
|
else:
|
|
return AgentResponse.done(self)
|
|
|
|
def _get_task_convo(self) -> AgentConvo:
|
|
# FIXME: Current prompts reuse conversation from the developer so we have to resort to this
|
|
task = self.current_state.current_task
|
|
current_task_index = self.current_state.tasks.index(task)
|
|
|
|
convo = AgentConvo(self).template(
|
|
"breakdown",
|
|
task=task,
|
|
iteration=None,
|
|
current_task_index=current_task_index,
|
|
)
|
|
# TODO: We currently show last iteration to the code monkey; we might need to show the task
|
|
# breakdown and all the iterations instead? To think about when refactoring prompts
|
|
if self.current_state.iterations:
|
|
convo.assistant(self.current_state.iterations[-1]["description"])
|
|
else:
|
|
convo.assistant(self.current_state.current_task["instructions"])
|
|
return convo
|
|
|
|
async def review_change(
|
|
self, file_name: str, instructions: str, old_content: str, new_content: str
|
|
) -> tuple[str, str]:
|
|
"""
|
|
Review changes that were applied to the file.
|
|
|
|
This asks the LLM to act as a PR reviewer and for each part (hunk) of the
|
|
diff, decide if it should be applied (kept) or ignored (removed from the PR).
|
|
|
|
:param file_name: name of the file being modified
|
|
:param instructions: instructions for the reviewer
|
|
:param old_content: old file content
|
|
:param new_content: new file content (with proposed changes)
|
|
:return: tuple with file content update with approved changes, and review feedback
|
|
|
|
Diff hunk explanation: https://www.gnu.org/software/diffutils/manual/html_node/Hunks.html
|
|
"""
|
|
|
|
hunks = self.get_diff_hunks(file_name, old_content, new_content)
|
|
|
|
llm = self.get_llm()
|
|
convo = (
|
|
self._get_task_convo()
|
|
.template(
|
|
"review_changes",
|
|
instructions=instructions,
|
|
file_name=file_name,
|
|
old_content=old_content,
|
|
hunks=hunks,
|
|
)
|
|
.require_schema(ReviewChanges)
|
|
)
|
|
llm_response: ReviewChanges = await llm(convo, temperature=0, parser=JSONParser(ReviewChanges))
|
|
|
|
for i in range(MAX_REVIEW_RETRIES):
|
|
reasons = {}
|
|
ids_to_apply = set()
|
|
ids_to_ignore = set()
|
|
ids_to_rework = set()
|
|
for hunk in llm_response.hunks:
|
|
reasons[hunk.number - 1] = hunk.reason
|
|
if hunk.decision == "apply":
|
|
ids_to_apply.add(hunk.number - 1)
|
|
elif hunk.decision == "ignore":
|
|
ids_to_ignore.add(hunk.number - 1)
|
|
elif hunk.decision == "rework":
|
|
ids_to_rework.add(hunk.number - 1)
|
|
|
|
n_hunks = len(hunks)
|
|
n_review_hunks = len(reasons)
|
|
if n_review_hunks == n_hunks:
|
|
break
|
|
elif n_review_hunks < n_hunks:
|
|
error = "Not all hunks have been reviewed. Please review all hunks and add 'apply', 'ignore' or 'rework' decision for each."
|
|
elif n_review_hunks > n_hunks:
|
|
error = f"Your review contains more hunks ({n_review_hunks}) than in the original diff ({n_hunks}). Note that one hunk may have multiple changed lines."
|
|
|
|
# Max two retries; if the reviewer still hasn't reviewed all hunks, we'll just use the entire new content
|
|
convo.assistant(llm_response.model_dump_json()).user(error)
|
|
llm_response = await llm(convo, parser=JSONParser(ReviewChanges))
|
|
else:
|
|
return new_content, None
|
|
|
|
hunks_to_apply = [h for i, h in enumerate(hunks) if i in ids_to_apply]
|
|
diff_log = f"--- {file_name}\n+++ {file_name}\n" + "\n".join(hunks_to_apply)
|
|
|
|
hunks_to_rework = [(i, h) for i, h in enumerate(hunks) if i in ids_to_rework]
|
|
review_log = (
|
|
"\n\n".join([f"## Change\n```{hunk}```\nReviewer feedback:\n{reasons[i]}" for (i, hunk) in hunks_to_rework])
|
|
+ "\n\nReview notes:\n"
|
|
+ llm_response.review_notes
|
|
)
|
|
|
|
if len(hunks_to_apply) == len(hunks):
|
|
await self.send_message("Applying entire change")
|
|
log.info(f"Applying entire change to {file_name}")
|
|
return new_content, None
|
|
|
|
elif len(hunks_to_apply) == 0:
|
|
if hunks_to_rework:
|
|
await self.send_message(
|
|
f"Requesting rework for {len(hunks_to_rework)} changes with reason: {llm_response.review_notes}"
|
|
)
|
|
log.info(f"Requesting rework for {len(hunks_to_rework)} changes to {file_name} (0 hunks to apply)")
|
|
return old_content, review_log
|
|
else:
|
|
# If everything can be safely ignored, it's probably because the files already implement the changes
|
|
# from previous tasks (which can happen often). Insisting on a change here is likely to cause problems.
|
|
await self.send_message(f"Rejecting entire change with reason: {llm_response.review_notes}")
|
|
log.info(f"Rejecting entire change to {file_name} with reason: {llm_response.review_notes}")
|
|
return old_content, None
|
|
|
|
print("Applying code change:\n" + diff_log)
|
|
log.info(f"Applying code change to {file_name}:\n{diff_log}")
|
|
new_content = self.apply_diff(file_name, old_content, hunks_to_apply, new_content)
|
|
if hunks_to_rework:
|
|
print(f"Requesting rework for {len(hunks_to_rework)} changes with reason: {llm_response.review_notes}")
|
|
log.info(f"Requesting further rework for {len(hunks_to_rework)} changes to {file_name}")
|
|
return new_content, review_log
|
|
else:
|
|
return new_content, None
|
|
|
|
@staticmethod
|
|
def get_diff_hunks(file_name: str, old_content: str, new_content: str) -> list[str]:
|
|
"""
|
|
Get the diff between two files.
|
|
|
|
This uses Python difflib to produce an unified diff, then splits
|
|
it into hunks that will be separately reviewed by the reviewer.
|
|
|
|
:param file_name: name of the file being modified
|
|
:param old_content: old file content
|
|
:param new_content: new file content
|
|
:return: change hunks from the unified diff
|
|
"""
|
|
from_name = "old_" + file_name
|
|
to_name = "to_" + file_name
|
|
from_lines = old_content.splitlines(keepends=True)
|
|
to_lines = new_content.splitlines(keepends=True)
|
|
diff_gen = unified_diff(from_lines, to_lines, fromfile=from_name, tofile=to_name)
|
|
diff_txt = "".join(diff_gen)
|
|
|
|
hunks = re.split(r"\n@@", diff_txt, re.MULTILINE)
|
|
result = []
|
|
for i, h in enumerate(hunks):
|
|
# Skip the prologue (file names)
|
|
if i == 0:
|
|
continue
|
|
txt = h.splitlines()
|
|
txt[0] = "@@" + txt[0]
|
|
result.append("\n".join(txt))
|
|
return result
|
|
|
|
def apply_diff(self, file_name: str, old_content: str, hunks: list[str], fallback: str):
|
|
"""
|
|
Apply the diff to the original file content.
|
|
|
|
This uses the internal `_apply_patch` method to apply the
|
|
approved diff hunks to the original file content.
|
|
|
|
If patch apply fails, the fallback is the full new file content
|
|
with all the changes applied (as if the reviewer approved everythng).
|
|
|
|
:param file_name: name of the file being modified
|
|
:param old_content: old file content
|
|
:param hunks: change hunks from the unified diff
|
|
:param fallback: proposed new file content (with all the changes applied)
|
|
"""
|
|
diff = (
|
|
"\n".join(
|
|
[
|
|
f"--- {file_name}",
|
|
f"+++ {file_name}",
|
|
]
|
|
+ hunks
|
|
)
|
|
+ "\n"
|
|
)
|
|
try:
|
|
fixed_content = self._apply_patch(old_content, diff)
|
|
except Exception as e:
|
|
# This should never happen but if it does, just use the new version from
|
|
# the LLM and hope for the best
|
|
print(f"Error applying diff: {e}; hoping all changes are valid")
|
|
return fallback
|
|
|
|
return fixed_content
|
|
|
|
# Adapted from https://gist.github.com/noporpoise/16e731849eb1231e86d78f9dfeca3abc (Public Domain)
|
|
@staticmethod
|
|
def _apply_patch(original: str, patch: str, revert: bool = False):
|
|
"""
|
|
Apply a patch to a string to recover a newer version of the string.
|
|
|
|
:param original: The original string.
|
|
:param patch: The patch to apply.
|
|
:param revert: If True, treat the original string as the newer version and recover the older string.
|
|
:return: The updated string after applying the patch.
|
|
"""
|
|
original_lines = original.splitlines(True)
|
|
patch_lines = patch.splitlines(True)
|
|
|
|
updated_text = ""
|
|
index_original = start_line = 0
|
|
|
|
# Choose which group of the regex to use based on the revert flag
|
|
match_index, line_sign = (1, "+") if not revert else (3, "-")
|
|
|
|
# Skip header lines of the patch
|
|
while index_original < len(patch_lines) and patch_lines[index_original].startswith(("---", "+++")):
|
|
index_original += 1
|
|
|
|
while index_original < len(patch_lines):
|
|
match = PATCH_HEADER_PATTERN.match(patch_lines[index_original])
|
|
if not match:
|
|
raise Exception("Bad patch -- regex mismatch [line " + str(index_original) + "]")
|
|
|
|
line_number = int(match.group(match_index)) - 1 + (match.group(match_index + 1) == "0")
|
|
|
|
if start_line > line_number or line_number > len(original_lines):
|
|
raise Exception("Bad patch -- bad line number [line " + str(index_original) + "]")
|
|
|
|
updated_text += "".join(original_lines[start_line:line_number])
|
|
start_line = line_number
|
|
index_original += 1
|
|
|
|
while index_original < len(patch_lines) and patch_lines[index_original][0] != "@":
|
|
if index_original + 1 < len(patch_lines) and patch_lines[index_original + 1][0] == "\\":
|
|
line_content = patch_lines[index_original][:-1]
|
|
index_original += 2
|
|
else:
|
|
line_content = patch_lines[index_original]
|
|
index_original += 1
|
|
|
|
if line_content:
|
|
if line_content[0] == line_sign or line_content[0] == " ":
|
|
updated_text += line_content[1:]
|
|
start_line += line_content[0] != line_sign
|
|
|
|
updated_text += "".join(original_lines[start_line:])
|
|
return updated_text
|