Merge pull request #834 from Pythagora-io/development

Development
This commit is contained in:
LeonOstrez
2024-04-01 13:40:06 -07:00
committed by GitHub
9 changed files with 152 additions and 37 deletions

View File

@@ -17,6 +17,7 @@ MESSAGE_TYPE = {
'progress': 'progress', # Progress bar for extension only
'projectStats': 'projectStats', # Project stats for extension only
'keyExpired': 'keyExpired', # (Free trial) key expired message - for extension only
'inputPrefill': 'inputPrefill', # Prefill input field with text in extension
}
LOCAL_IGNORE_MESSAGE_TYPES = [
@@ -33,4 +34,5 @@ LOCAL_IGNORE_MESSAGE_TYPES = [
'progress',
'projectStats',
'keyExpired',
'inputPrefill',
]

View File

@@ -254,6 +254,37 @@ def save_progress(app_id, step, data):
return progress
def edit_development_plan(app_id, update_data):
try:
dev_plan = DevelopmentPlanning.get(app=app_id)
except DevelopmentPlanning.DoesNotExist:
print(color_red(f"No development plan found for app {app_id}"), category='error')
return None
for key, value in update_data.items():
setattr(dev_plan, key, value)
dev_plan.save()
return dev_plan
def edit_feature_plan(app_id, update_data):
try:
dev_plan = (DevelopmentSteps.select()
.where((DevelopmentSteps.app == app_id) & (DevelopmentSteps.prompt_path.contains('feature_plan')))
.order_by(DevelopmentSteps.created_at.desc())
.get())
except DevelopmentPlanning.DoesNotExist:
print(color_red(f"No feature plan found for app {app_id}"), category='error')
return None
for key, value in update_data.items():
setattr(dev_plan, key, value)
dev_plan.save()
return dev_plan
def get_app(app_id, error_if_not_found=True):
try:
app = App.get(App.id == app_id)

View File

@@ -241,7 +241,13 @@ class AgentConvo:
if not self.agent.project.check_ipc():
print(color_yellow_bold(dev_step_msg), end='')
logger.info(dev_step_msg)
print(f"\n{content}\n", type='local')
try:
print(f"\n{content}\n", type='local')
except Exception: # noqa
# Workaround for Windows encoding crash: https://github.com/Pythagora-io/gpt-pilot/issues/509
safe_content = content.encode('ascii', 'ignore').decode('ascii')
print(f"\n{safe_content}\n", type='local')
logger.info(f"{print_msg}: {content}\n")
def to_context_prompt(self):

View File

@@ -364,8 +364,8 @@ class CodeMonkey(Agent):
"""
diff = "\n".join(
[
"--- " + file_name,
"+++ " + file_name,
f"--- {file_name}",
f"+++ {file_name}",
] + hunks
) + "\n"
try:

View File

@@ -16,7 +16,7 @@ from utils.style import (
color_white_bold
)
from helpers.exceptions import TokenLimitError
from const.code_execution import MAX_COMMAND_DEBUG_TRIES, MAX_QUESTIONS_FOR_BUG_REPORT
from const.code_execution import MAX_COMMAND_DEBUG_TRIES
from helpers.exceptions import TooDeepRecursionError
from helpers.Debugger import Debugger
from utils.questionary import styled_text
@@ -29,7 +29,7 @@ from utils.utils import should_execute_step, array_of_objects_to_string, generat
from helpers.cli import run_command_until_success, execute_command_and_check_cli_response
from const.function_calls import (EXECUTE_COMMANDS, GET_TEST_TYPE, IMPLEMENT_TASK, COMMAND_TO_RUN,
ALTERNATIVE_SOLUTIONS, GET_BUG_REPORT_MISSING_DATA)
from database.database import save_progress, get_progress_steps, update_app_status
from database.database import save_progress, edit_development_plan, edit_feature_plan, get_progress_steps, update_app_status
from utils.telemetry import telemetry
from prompts.prompts import ask_user
from utils.print import print_task_progress, print_step_progress
@@ -55,14 +55,17 @@ class Developer(Agent):
if not self.project.skip_steps:
logger.info("Starting to create the actual code...")
total_tasks = len(self.project.development_plan)
progress_thresholds = [50] # Percentages of progress when documentation is created
documented_thresholds = set()
for i, dev_task in enumerate(self.project.development_plan):
finished_tasks = []
while len(finished_tasks) < len(self.project.development_plan):
i = len(finished_tasks)
dev_task = self.project.development_plan[i]
num_of_tasks = len(self.project.development_plan)
# don't create documentation for features
if not self.project.finished:
current_progress_percent = round((i / total_tasks) * 100, 2)
current_progress_percent = round(((i + 1) / num_of_tasks) * 100, 2)
for threshold in progress_thresholds:
if current_progress_percent > threshold and threshold not in documented_thresholds:
@@ -76,6 +79,7 @@ class Developer(Agent):
self.project.cleanup_list('dev_steps_to_load', task['id'])
if len(self.project.tasks_to_load):
finished_tasks.append(dev_task)
continue
# if it is last task to load, execute it to check if it's finished
else:
@@ -83,15 +87,18 @@ class Developer(Agent):
readme_dev_step = next((el for el in self.project.dev_steps_to_load if
'create_readme.prompt' in el.get('prompt_path', '')), None)
if len(self.project.development_plan) - 1 == i and readme_dev_step is not None:
if num_of_tasks - 1 == i and readme_dev_step is not None:
self.project.cleanup_list('dev_steps_to_load', readme_dev_step['id'])
finished_tasks.append(dev_task)
continue
self.project.current_task.start_new_task(dev_task['description'], i + 1)
print_task_progress(i+1, len(self.project.development_plan), dev_task['description'], task_source, 'in_progress')
self.implement_task(i, task_source, dev_task)
print_task_progress(i+1, len(self.project.development_plan), dev_task['description'], task_source, 'done')
telemetry.inc("num_tasks")
print_task_progress(i+1, num_of_tasks, dev_task['description'], task_source, 'in_progress')
task_finished = self.implement_task(i, task_source, dev_task)
if task_finished:
finished_tasks.append(dev_task)
telemetry.inc("num_tasks")
print_task_progress(i+1, num_of_tasks, dev_task['description'], task_source, 'done')
# DEVELOPMENT END
if not self.project.skip_steps:
@@ -123,6 +130,10 @@ class Developer(Agent):
:param task_source: The source of the task, one of: 'app', 'feature', 'debugger', 'iteration'.
:param development_task: The task to implement.
"""
should_execute_task = self.edit_task(task_source, development_task)
if not should_execute_task:
return False
print(color_green_bold(f'Implementing task #{i + 1}: ') + color_green(f' {development_task["description"]}\n'), category='pythagora')
print(f'Starting task #{i + 1} implementation...', type='verbose', category='agent:developer')
self.project.dot_pilot_gpt.chat_log_folder(i + 1)
@@ -242,6 +253,64 @@ class Developer(Agent):
logger.warning('Testing at end of task failed')
break
return True
def edit_task(self, task_source, task):
"""
Allow the user to edit a task before executing it.
:param task_source: The source of the task, it can be only 'app' or 'feature'.
:param task: The task to edit.
:return: True if the task should be executed, False if it should be skipped.
"""
if self.project.skip_steps or task_source not in ['app', 'feature']:
return True
execute_question = 'Do you want to execute this task?'
if self.project.check_ipc():
print(execute_question, category='pythagora')
else:
execute_question += ' [yes/edit task/skip task]'
print('')
print(task['description'])
print('yes/edit task/skip task', type='buttons-only')
response = ask_user(self.project, execute_question)
if response.lower() in NEGATIVE_ANSWERS + ['skip task']:
# remove task from development plan if it is being skipped
self.project.development_plan = [
element for element in self.project.development_plan
if element['description'] != task['description']
]
if task_source == 'app':
db_task_skip = edit_development_plan(self.project.args['app_id'], {'development_plan': self.project.development_plan})
else:
db_task_skip = edit_feature_plan(self.project.args['app_id'], {'llm_response': {'text': json.dumps({'plan': self.project.development_plan})}})
if db_task_skip:
print('Successfully skipped task.', category='Pythagora')
return False
elif response.lower() == 'edit task':
edit_question = 'Write full edited description of the task here:'
if self.project.check_ipc():
print('continue/cancel', type='button')
print(edit_question, type='ipc')
print(task['description'], type='inputPrefill')
edited_task = ask_user(self.project, edit_question)
if edited_task.lower() in NEGATIVE_ANSWERS + ['', 'continue']:
return True
task['description'] = edited_task
if task_source == 'app':
db_task_edit = edit_development_plan(self.project.args['app_id'], {'development_plan': self.project.development_plan})
else:
db_task_edit = edit_feature_plan(self.project.args['app_id'], {'llm_response': {'text': json.dumps({'plan': self.project.development_plan})}})
if db_task_edit:
print('Successfully edited task.', category='Pythagora')
return True
def step_delete_file(self, convo, step, i, test_after_code_changes):
"""
Delete a file from the project.
@@ -606,8 +675,7 @@ class Developer(Agent):
next_solution_to_try = None
iteration_count = self.project.last_iteration['prompt_data']['iteration_count'] if (self.project.last_iteration and 'iteration_count' in self.project.last_iteration['prompt_data']) else 0
while True:
self.user_feedback = llm_solutions[-1]['user_feedback'] if len(llm_solutions) > 0 else None
review_successful = self.project.skip_steps or self.review_task()
review_successful = self.project.skip_steps or self.review_task(llm_solutions)
if not review_successful and self.review_count < 3:
continue
iteration_count += 1
@@ -765,24 +833,29 @@ class Developer(Agent):
return user_feedback, questions_and_answers
def review_task(self):
def review_task(self, llm_solutions):
"""
Review all task changes and refactor big files.
:param llm_solutions: List of all user feedbacks and LLM solutions (to those feedbacks) for current task.
:return: bool - True if the task changes passed review, False if not
"""
print('Starting review of all changes made in this task...', type='verbose', category='agent:reviewer')
self.review_count += 1
review_result = self.review_code_changes()
review_result = self.review_code_changes(llm_solutions)
refactoring_done = self.refactor_code()
if refactoring_done or review_result['implementation_needed']:
review_result = self.review_code_changes()
review_result = self.review_code_changes(llm_solutions)
return review_result['success']
def review_code_changes(self):
def review_code_changes(self, llm_solutions):
"""
Review the code changes and ask for human intervention if needed
:param llm_solutions: List of all user feedbacks and LLM solutions (to those feedbacks) for current task.
:return: dict - {
'success': bool,
'implementation_needed': bool
@@ -791,11 +864,13 @@ class Developer(Agent):
review_convo = AgentConvo(self)
files = [
file_dict for file_dict in self.project.get_all_coded_files()
if any(file_dict['full_path'].endswith(modified_file.lstrip('.')) for modified_file in self.modified_files)
if any(os.path.normpath(file_dict['full_path']).endswith(os.path.normpath(modified_file.lstrip('.'))) for
modified_file in self.modified_files)
]
files_at_start_of_task = [
file_dict for file_dict in self.files_at_start_of_task
if any(file_dict['full_path'].endswith(modified_file.lstrip('.')) for modified_file in self.modified_files)
if any(os.path.normpath(file_dict['full_path']).endswith(os.path.normpath(modified_file.lstrip('.'))) for
modified_file in self.modified_files)
]
# TODO instead of sending files before and after maybe add nice way to show diff for multiple files
review = review_convo.send_message('development/review_task.prompt', {
@@ -804,7 +879,7 @@ class Developer(Agent):
"tasks": self.project.development_plan,
"current_task": self.project.current_task.data.get('task_description'),
"files": files,
"user_input": self.user_feedback,
"all_feedbacks": [solution["user_feedback"].replace("```", "") for solution in llm_solutions],
"modified_files": self.modified_files,
"files_at_start_of_task": files_at_start_of_task,
"previous_features": self.project.previous_features,

View File

@@ -32,8 +32,8 @@ def update_file(path: str, new_content: Union[str, bytes], project=None):
if project is not None: # project can be None only in tests
if not project.skip_steps:
print({"path": path, "line": None}, type='openFile')
if not project.check_ipc():
print(color_green(f"Updated file {path}"))
if not project.check_ipc():
print(color_green(f"Updated file {path}"))
def get_file_contents(

View File

@@ -14,7 +14,7 @@ I want you to create a list of steps that are needed to debug this issue.
Each step can be either:
* `command` - command to run (must be able to run on a {{ os }} machine, assume current working directory is project root folder)
* `code_change` - step will change the code and you need to thoroughly describe what needs to be implemented. I will implement the requested changes and let you know.
* `save_file` - step will create or update a file, and you need to thoroughly describe what needs to be implemented. I will implement the requested changes and let you know.
* `human_intervention` - if you need the human to do something, use this type of step and explain in details what you want the human to do. NEVER use `human_intervention` for testing, as testing will be done separately by a dedicated QA after all the steps are done.
{{ execution_order }}
@@ -22,5 +22,3 @@ Each step can be either:
Also, make sure that at least the last step has `check_if_fixed` set to TRUE.
{{ file_size_limit }}
{# After this, you need to decide what to do next. You can rerun the command `{{ command }}` to check if the problem is fixed or run another command with `run_command` or change more code with `implement_code_changes`. #}

View File

@@ -24,27 +24,26 @@ Here are files that were modified during this task implementation:
---end_of_current_files---
{% endif -%}
{% if user_input -%}While working on this task, your colleague who is testing the app "{{ name }}" sent you some additional info. Here it is:
```
{{ user_input }}
{% if all_feedbacks -%}While working on this task, your colleague who is testing the app "{{ name }}" sent you some additional information on what doesn't work as intended or what should be added. Here are all the inputs he sent you:
```{% for feedback in all_feedbacks %}
{{ loop.index }}. {{ feedback }}
{% endfor %}
```
After this info, you tried to fix it. {% endif %}Files that were modified during implementation of the task are:
After you got each of these additional inputs, you tried to fix it as part of this task. {% endif %}Files that were modified during implementation of the task are:
```
{{ modified_files }}
```
Now I will show you how those files looked before this task implementation started. If a file is listed as the file that changed but is not in this list that means it was created during this task. Here are files before implementation of this task:
{% if files_at_start_of_task|length > 0 %}---start_of_files_at_start_of_task---
---start_of_files_at_start_of_task---{% if files_at_start_of_task|length > 0 %}
{% for file in files_at_start_of_task %}
**{{ file.path }}/{{ file.name }}** ({{ file.lines_of_code }} lines of code):
```
{{ file.content }}
```
{% endfor %}
---end_of_files_at_start_of_task---
{% endif -%}
{% endfor %}{% endif -%}---end_of_files_at_start_of_task---
**IMPORTANT**
You have to review this task implementation. You are known to be very strict with your reviews and very good at noticing bugs but you don't mind minor changes like refactoring, adding or removing logs and so on. You think twice through all information given before giving any conclusions.

View File

@@ -264,7 +264,12 @@ def retry_on_exception(func):
print(color_red(f"Error calling LLM API: The request exceeded the maximum token limit (request size: {n_tokens}) tokens."))
trace_token_limit_error(n_tokens, args[0]['messages'], err_str)
raise TokenLimitError(n_tokens, MAX_GPT_MODEL_TOKENS)
if "rate_limit_exceeded" in err_str:
if "rate_limit_exceeded" in err_str or "rate_limit_error" in err_str:
# Retry the attempt if the current account's tier reaches the API limits
rate_limit_exceeded_sleep(e, err_str)
continue
if "overloaded_error" in err_str:
# Retry the attempt if the Anthropic servers are overloaded
rate_limit_exceeded_sleep(e, err_str)
continue
@@ -601,7 +606,6 @@ def load_data_to_json(string):
return json.loads(fix_json(string))
def stream_anthropic(messages, function_call_message, gpt_data, model_name = "claude-3-sonnet-20240229"):
try:
import anthropic