merge main

This commit is contained in:
LeonOstrez
2024-06-10 12:28:39 +01:00
39 changed files with 682 additions and 55 deletions

View File

@@ -151,6 +151,15 @@ any adjustments if needed).
This will start two containers, one being a new image built by the `Dockerfile` and a Postgres database. The new image also has [ttyd](https://github.com/tsl0922/ttyd) installed so that you can easily interact with gpt-pilot. Node is also installed on the image and port 3000 is exposed.
### PostgreSQL support
GPT Pilot uses built-in SQLite database by default. If you want to use the PostgreSQL database, you need to additional install `asyncpg` and `psycopg2` packages:
```bash
pip install asyncpg psycopg2
```
Then, you need to update the `config.json` file to set `db.url` to `postgresql+asyncpg://<user>:<password>@<db-host>/<db-name>`.
# 🧑‍💻️ CLI arguments

View File

@@ -133,6 +133,7 @@ class Developer(BaseAgent):
user_feedback=user_feedback,
user_feedback_qa=None,
next_solution_to_try=None,
docs=self.current_state.docs,
)
.assistant(description)
.template("parse_task")
@@ -168,17 +169,14 @@ class Developer(BaseAgent):
log.debug(f"Current state files: {len(self.current_state.files)}, relevant {self.current_state.relevant_files}")
# Check which files are relevant to the current task
if self.current_state.files and not self.current_state.relevant_files:
if self.current_state.files and self.current_state.relevant_files is None:
await self.get_relevant_files()
current_task_index = self.current_state.tasks.index(task)
llm = self.get_llm()
convo = AgentConvo(self).template(
"breakdown",
task=task,
iteration=None,
current_task_index=current_task_index,
"breakdown", task=task, iteration=None, current_task_index=current_task_index, docs=self.current_state.docs
)
response: str = await llm(convo)
@@ -302,7 +300,7 @@ class Developer(BaseAgent):
self.next_state.current_task["description"] = user_response.text
self.next_state.current_task["run_always"] = True
self.next_state.relevant_files = []
self.next_state.relevant_files = None
log.info(f"Task description updated to: {user_response.text}")
# Orchestrator will rerun us with the new task description
return False

View File

@@ -74,6 +74,16 @@ class ErrorHandler(BaseAgent):
if not cmd:
raise ValueError("No command provided in command error response details")
confirm = await self.ask_question(
"Can I debug why this command failed?",
buttons={"yes": "Yes", "no": "No"},
default="yes",
buttons_only=True,
)
if confirm.cancelled or confirm.button == "no":
log.info("Skipping command error debug (requested by user)")
return AgentResponse.done(self)
llm = self.get_llm()
convo = AgentConvo(self).template(
"debug",

View File

@@ -0,0 +1,160 @@
import asyncio
from urllib.parse import urljoin
import httpx
from pydantic import BaseModel
from core.agents.base import BaseAgent
from core.agents.convo import AgentConvo
from core.agents.response import AgentResponse
from core.config import EXTERNAL_DOCUMENTATION_API
from core.llm.parser import JSONParser
from core.log import get_logger
from core.telemetry import telemetry
log = get_logger(__name__)
class DocQueries(BaseModel):
queries: list[str]
class SelectedDocsets(BaseModel):
docsets: list[str]
class ExternalDocumentation(BaseAgent):
"""Agent in charge of collecting and storing additional documentation.
Docs are per task and are stores in the `tasks` variable in the project state.
This agent ensures documentation is collected only once per task.
Agent does 2 LLM interactions:
1. Ask the LLM to select useful documentation from a predefined list.
2. Ask the LLM to come up with a query to use to fetch the actual documentation snippets.
Agent does 2 calls to our documentation API:
1. Fetch all the available docsets. `docset` is a collection of documentation snippets
for a single topic, eg. VueJS API Reference docs.
2. Fetch the documentation snippets for given queries.
"""
agent_type = "external-docs"
display_name = "Documentation"
async def run(self) -> AgentResponse:
available_docsets = await self._get_available_docsets()
selected_docsets = await self._select_docsets(available_docsets)
await telemetry.trace_code_event("docsets_used", selected_docsets)
if not selected_docsets:
log.info("No documentation selected for this task.")
await self._store_docs([], available_docsets)
return AgentResponse.done(self)
queries = await self._create_queries(selected_docsets)
doc_snippets = await self._fetch_snippets(queries)
await telemetry.trace_code_event("doc_snippets", {"num_stored": len(doc_snippets)})
await self._store_docs(doc_snippets, available_docsets)
return AgentResponse.done(self)
async def _get_available_docsets(self) -> list[tuple]:
url = urljoin(EXTERNAL_DOCUMENTATION_API, "docsets")
client = httpx.Client(transport=httpx.HTTPTransport(retries=3))
try:
resp = client.get(url)
except httpx.HTTPError:
# In case of any errors, we'll proceed without the documentation
log.warning("Failed to fetch available docsets due to an error.", exc_info=True)
return []
log.debug(f"Fetched {len(resp.json())} docsets.")
return resp.json()
async def _select_docsets(self, available_docsets: list[tuple]) -> dict[str, str]:
"""From a list of available docsets, select the relevant ones."""
if not available_docsets:
return {}
llm = self.get_llm()
convo = (
AgentConvo(self)
.template(
"select_docset",
current_task=self.current_state.current_task,
available_docsets=available_docsets,
)
.require_schema(SelectedDocsets)
)
await self.send_message("Determining if external documentation is needed for the next task...")
llm_response: SelectedDocsets = await llm(convo, parser=JSONParser(spec=SelectedDocsets))
available_docsets = dict(available_docsets)
return {k: available_docsets[k] for k in llm_response.docsets if k in available_docsets}
async def _create_queries(self, docsets: dict[str, str]) -> dict[str, list[str]]:
"""Return queries we have to make to the docs API.
Key is the docset_key and value is the list of queries for that docset.
"""
queries = {}
await self.send_message("Getting relevant documentation for the following topics:")
for k, short_desc in docsets.items():
llm = self.get_llm()
convo = (
AgentConvo(self)
.template(
"create_docs_queries",
short_description=short_desc,
current_task=self.current_state.current_task,
)
.require_schema(DocQueries)
)
llm_response: DocQueries = await llm(convo, parser=JSONParser(spec=DocQueries))
if llm_response.queries:
queries[k] = llm_response.queries
return queries
async def _fetch_snippets(self, queries: dict[str, list[str]]) -> list[tuple]:
"""Query the docs API and fetch the documentation snippets.
Returns a list of tuples: (docset_key, snippets).
"""
url = urljoin(EXTERNAL_DOCUMENTATION_API, "query")
snippets: list[tuple] = []
async with httpx.AsyncClient(transport=httpx.AsyncHTTPTransport(retries=3)) as client:
reqs = []
ordered_keys = []
for docset_key, qs in queries.items():
reqs.append(client.get(url, params={"q": qs, "doc_key": docset_key, "num_results": 3}))
ordered_keys.append(docset_key)
try:
results = await asyncio.gather(*reqs)
except httpx.HTTPError:
log.warning("Failed to fetch documentation snippets", exc_info=True)
for k, res in zip(ordered_keys, results):
snippets.append((k, res.json()))
return snippets
async def _store_docs(self, snippets: list[tuple], available_docsets: list[tuple]):
"""Store the snippets into current task data.
Documentation snippets are stored as a list of dictionaries:
{"key": docset-key, "desc": documentation-description, "snippets": list-of-snippets}
"""
docsets_dict = dict(available_docsets)
docs = []
for docset_key, snip in snippets:
docs.append({"key": docset_key, "desc": docsets_dict[docset_key], "snippets": snip})
self.next_state.docs = docs
self.next_state.flag_tasks_as_modified()

86
core/agents/importer.py Normal file
View File

@@ -0,0 +1,86 @@
from uuid import uuid4
from core.agents.base import BaseAgent
from core.agents.convo import AgentConvo
from core.agents.response import AgentResponse, ResponseType
from core.db.models import Complexity
from core.llm.parser import JSONParser
from core.log import get_logger
from core.templates.example_project import EXAMPLE_PROJECT_DESCRIPTION
log = get_logger(__name__)
MAX_PROJECT_LINES = 10000
class Importer(BaseAgent):
agent_type = "importer"
display_name = "Project Analyist"
async def run(self) -> AgentResponse:
if self.prev_response and self.prev_response.type == ResponseType.IMPORT_PROJECT:
# Called by SpecWriter to start the import process
await self.start_import_process()
return AgentResponse.describe_files(self)
await self.analyze_project()
return AgentResponse.done(self)
async def start_import_process(self):
# TODO: Send a signal to the UI to copy the project files to workspace
project_root = self.state_manager.get_full_project_root()
await self.ui.import_project(project_root)
await self.send_message(
f"This is experimental feature and is currently limited to projects with size up to {MAX_PROJECT_LINES} lines of code."
)
await self.ask_question(
f"Please copy your project files to {project_root} and press Continue",
allow_empty=False,
buttons={
"continue": "Continue",
},
buttons_only=True,
default="continue",
)
imported_files, _ = await self.state_manager.import_files()
imported_lines = sum(len(f.content.content.splitlines()) for f in imported_files)
if imported_lines > MAX_PROJECT_LINES:
await self.send_message(
"WARNING: Your project ({imported_lines} LOC) is larger than supported and may cause issues in Pythagora."
)
await self.state_manager.commit()
async def analyze_project(self):
llm = self.get_llm()
self.send_message("Inspecting most important project files ...")
convo = AgentConvo(self).template("get_entrypoints")
llm_response = await llm(convo, parser=JSONParser())
relevant_files = [f for f in self.current_state.files if f.path in llm_response]
self.send_message("Analyzing project ...")
convo = AgentConvo(self).template(
"analyze_project", relevant_files=relevant_files, example_spec=EXAMPLE_PROJECT_DESCRIPTION
)
llm_response = await llm(convo)
spec = self.current_state.specification.clone()
spec.description = llm_response
self.next_state.specification = spec
self.next_state.epics = [
{
"id": uuid4().hex,
"name": "Import project",
"description": "Import an existing project into Pythagora",
"tasks": [],
"completed": True,
"test_instructions": None,
"source": "app",
"summary": None,
"complexity": Complexity.HARD if len(self.current_state.files) > 5 else Complexity.SIMPLE,
}
]

View File

@@ -7,7 +7,9 @@ from core.agents.code_reviewer import CodeReviewer
from core.agents.developer import Developer
from core.agents.error_handler import ErrorHandler
from core.agents.executor import Executor
from core.agents.external_docs import ExternalDocumentation
from core.agents.human_input import HumanInput
from core.agents.importer import Importer
from core.agents.problem_solver import ProblemSolver
from core.agents.response import AgentResponse, ResponseType
from core.agents.spec_writer import SpecWriter
@@ -175,10 +177,16 @@ class Orchestrator(BaseAgent):
return HumanInput(self.state_manager, self.ui, prev_response=prev_response)
if prev_response.type == ResponseType.TASK_REVIEW_FEEDBACK:
return Developer(self.state_manager, self.ui, prev_response=prev_response)
if prev_response.type == ResponseType.IMPORT_PROJECT:
return Importer(self.state_manager, self.ui, prev_response=prev_response)
if not state.specification.description:
# Ask the Spec Writer to refine and save the project specification
return SpecWriter(self.state_manager, self.ui)
if state.files:
# The project has been imported, but not analyzed yet
return Importer(self.state_manager, self.ui)
else:
# New project: ask the Spec Writer to refine and save the project specification
return SpecWriter(self.state_manager, self.ui)
elif not state.specification.architecture:
# Ask the Architect to design the project architecture and determine dependencies
return Architect(self.state_manager, self.ui, process_manager=self.process_manager)
@@ -189,10 +197,12 @@ class Orchestrator(BaseAgent):
):
# Ask the Tech Lead to break down the initial project or feature into tasks and apply project template
return TechLead(self.state_manager, self.ui, process_manager=self.process_manager)
elif not state.steps and not state.iterations:
# Ask the Developer to break down current task into actionable steps
return Developer(self.state_manager, self.ui)
if state.current_task and state.docs is None:
return ExternalDocumentation(self.state_manager, self.ui)
# Current task status must be checked before Developer is called because we might want
# to skip it instead of breaking it down
current_task_status = state.current_task.get("status") if state.current_task else None
if current_task_status:
# Status of the current task is set first time after the task was reviewed by user
@@ -207,6 +217,10 @@ class Orchestrator(BaseAgent):
# Task is fully done or skipped, call TaskCompleter to mark it as completed
return TaskCompleter(self.state_manager, self.ui)
if not state.steps and not state.iterations:
# Ask the Developer to break down current task into actionable steps
return Developer(self.state_manager, self.ui)
if state.current_step:
# Execute next step in the task
# TODO: this can be parallelized in the future

View File

@@ -39,6 +39,9 @@ class ResponseType(str, Enum):
TASK_REVIEW_FEEDBACK = "task-review-feedback"
"""Agent is providing feedback on the entire task."""
IMPORT_PROJECT = "import-project"
"""User wants to import an existing project."""
class AgentResponse:
type: ResponseType = ResponseType.DONE
@@ -130,3 +133,7 @@ class AgentResponse:
"feedback": feedback,
},
)
@staticmethod
def import_project(agent: "BaseAgent") -> "AgentResponse":
return AgentResponse(type=ResponseType.IMPORT_PROJECT, agent=agent)

View File

@@ -31,11 +31,15 @@ class SpecWriter(BaseAgent):
# FIXME: must be lowercase becase VSCode doesn't recognize it otherwise. Needs a fix in the extension
"continue": "continue",
"example": "Start an example project",
"import": "Import an existing project",
},
)
if response.cancelled:
return AgentResponse.error(self, "No project description")
if response.button == "import":
return AgentResponse.import_project(self)
if response.button == "example":
await self.send_message("Starting example project with description:")
await self.send_message(EXAMPLE_PROJECT_DESCRIPTION)

View File

@@ -90,7 +90,7 @@ class TechLead(BaseAgent):
)
# Saving template files will fill this in and we want it clear for the
# first task.
self.next_state.relevant_files = []
self.next_state.relevant_files = None
return summary
async def ask_for_new_feature(self) -> AgentResponse:

View File

@@ -11,7 +11,7 @@ from core.llm.base import APIError, BaseLLMClient
from core.log import get_logger
from core.state.state_manager import StateManager
from core.telemetry import telemetry
from core.ui.base import UIBase, UIClosedError, pythagora_source
from core.ui.base import UIBase, UIClosedError, UserInput, pythagora_source
log = get_logger(__name__)
@@ -112,7 +112,15 @@ async def start_new_project(sm: StateManager, ui: UIBase) -> bool:
:param ui: User interface.
:return: True if the project was created successfully, False otherwise.
"""
user_input = await ui.ask_question("What is the project name?", allow_empty=False, source=pythagora_source)
try:
user_input = await ui.ask_question(
"What is the project name?",
allow_empty=False,
source=pythagora_source,
)
except (KeyboardInterrupt, UIClosedError):
user_input = UserInput(cancelled=True)
if user_input.cancelled:
return False

View File

@@ -1,6 +1,6 @@
from enum import Enum
from os.path import abspath, dirname, isdir, join
from typing import Literal, Optional, Union
from typing import Any, Literal, Optional, Union
from pydantic import BaseModel, ConfigDict, Field, field_validator
from typing_extensions import Annotated
@@ -18,6 +18,7 @@ DEFAULT_IGNORE_PATHS = [
"node_modules",
"package-lock.json",
"venv",
".venv",
"dist",
"build",
"target",
@@ -34,6 +35,9 @@ IGNORE_SIZE_THRESHOLD = 50000 # 50K+ files are ignored by default
DEFAULT_AGENT_NAME = "default"
DESCRIBE_FILES_AGENT_NAME = "CodeMonkey.describe_files"
# Endpoint for the external documentation
EXTERNAL_DOCUMENTATION_API = "http://docs-pythagora-io-439719575.us-east-1.elb.amazonaws.com"
class _StrictModel(BaseModel):
"""
@@ -54,6 +58,7 @@ class LLMProvider(str, Enum):
ANTHROPIC = "anthropic"
GROQ = "groq"
LM_STUDIO = "lm-studio"
AZURE = "azure"
class UIAdapter(str, Enum):
@@ -88,6 +93,10 @@ class ProviderConfig(_StrictModel):
description="Timeout (in seconds) for receiving a new chunk of data from the response stream",
ge=0.0,
)
extra: Optional[dict[str, Any]] = Field(
None,
description="Extra provider-specific configuration",
)
class AgentLLMConfig(_StrictModel):
@@ -139,6 +148,10 @@ class LLMConfig(_StrictModel):
description="Timeout (in seconds) for receiving a new chunk of data from the response stream",
ge=0.0,
)
extra: Optional[dict[str, Any]] = Field(
None,
description="Extra provider-specific configuration",
)
@classmethod
def from_provider_and_agent_configs(cls, provider: ProviderConfig, agent: AgentLLMConfig):
@@ -150,6 +163,7 @@ class LLMConfig(_StrictModel):
temperature=agent.temperature,
connect_timeout=provider.connect_timeout,
read_timeout=provider.read_timeout,
extra=provider.extra,
)
@@ -212,6 +226,12 @@ class DBConfig(_StrictModel):
def validate_url_scheme(cls, v: str) -> str:
if v.startswith("sqlite+aiosqlite://"):
return v
if v.startswith("postgresql+asyncpg://"):
try:
import asyncpg # noqa: F401
except ImportError:
raise ValueError("To use PostgreSQL database, please install `asyncpg` and `psycopg2` packages")
return v
raise ValueError(f"Unsupported database URL scheme in: {v}")

View File

@@ -0,0 +1,34 @@
"""Add docs column to project_states
Revision ID: b760f66138c0
Revises: f352dbe45751
Create Date: 2024-06-08 10:00:44.222099
"""
from typing import Sequence, Union
import sqlalchemy as sa
from alembic import op
# revision identifiers, used by Alembic.
revision: str = "b760f66138c0"
down_revision: Union[str, None] = "f352dbe45751"
branch_labels: Union[str, Sequence[str], None] = None
depends_on: Union[str, Sequence[str], None] = None
def upgrade() -> None:
# ### commands auto generated by Alembic - please adjust! ###
with op.batch_alter_table("project_states", schema=None) as batch_op:
batch_op.add_column(sa.Column("docs", sa.JSON(), nullable=True))
# ### end Alembic commands ###
def downgrade() -> None:
# ### commands auto generated by Alembic - please adjust! ###
with op.batch_alter_table("project_states", schema=None) as batch_op:
batch_op.drop_column("docs")
# ### end Alembic commands ###

View File

@@ -0,0 +1,34 @@
"""Make relevant_files nullable
Revision ID: f352dbe45751
Revises: 0a1bb637fa26
Create Date: 2024-06-04 15:07:40.175466
"""
from typing import Sequence, Union
from alembic import op
from sqlalchemy.dialects import sqlite
# revision identifiers, used by Alembic.
revision: str = "f352dbe45751"
down_revision: Union[str, None] = "0a1bb637fa26"
branch_labels: Union[str, Sequence[str], None] = None
depends_on: Union[str, Sequence[str], None] = None
def upgrade() -> None:
# ### commands auto generated by Alembic - please adjust! ###
with op.batch_alter_table("project_states", schema=None) as batch_op:
batch_op.alter_column("relevant_files", existing_type=sqlite.JSON(), nullable=True)
# ### end Alembic commands ###
def downgrade() -> None:
# ### commands auto generated by Alembic - please adjust! ###
with op.batch_alter_table("project_states", schema=None) as batch_op:
batch_op.alter_column("relevant_files", existing_type=sqlite.JSON(), nullable=False)
# ### end Alembic commands ###

View File

@@ -4,7 +4,7 @@ from typing import TYPE_CHECKING, Optional, Union
from unicodedata import normalize
from uuid import UUID, uuid4
from sqlalchemy import delete, inspect, select
from sqlalchemy import and_, delete, inspect, select
from sqlalchemy.ext.asyncio import AsyncSession
from sqlalchemy.orm import Mapped, mapped_column, relationship, selectinload
from sqlalchemy.sql import func
@@ -79,7 +79,7 @@ class Project(Base):
from core.db.models import Branch, ProjectState
latest_state_query = (
select(ProjectState.branch_id, func.max(ProjectState.id).label("max_id"))
select(ProjectState.branch_id, func.max(ProjectState.step_index).label("max_index"))
.group_by(ProjectState.branch_id)
.subquery()
)
@@ -88,7 +88,13 @@ class Project(Base):
select(Project, Branch, ProjectState)
.join(Branch, Project.branches)
.join(ProjectState, Branch.states)
.join(latest_state_query, ProjectState.id == latest_state_query.columns.max_id)
.join(
latest_state_query,
and_(
ProjectState.branch_id == latest_state_query.columns.branch_id,
ProjectState.step_index == latest_state_query.columns.max_index,
),
)
.options(selectinload(Project.branches), selectinload(Branch.states))
.order_by(Project.name, Branch.name)
)

View File

@@ -51,8 +51,9 @@ class ProjectState(Base):
tasks: Mapped[list[dict]] = mapped_column(default=list)
steps: Mapped[list[dict]] = mapped_column(default=list)
iterations: Mapped[list[dict]] = mapped_column(default=list)
relevant_files: Mapped[list[str]] = mapped_column(default=list)
relevant_files: Mapped[Optional[list[str]]] = mapped_column(default=None)
modified_files: Mapped[dict] = mapped_column(default=dict)
docs: Mapped[Optional[list[dict]]] = mapped_column(default=None)
run_command: Mapped[Optional[str]] = mapped_column()
action: Mapped[Optional[str]] = mapped_column()
@@ -167,7 +168,10 @@ class ProjectState(Base):
:return: List of tuples with file path and content.
"""
all_files = set(self.relevant_files + list(self.modified_files.keys()))
relevant_files = self.relevant_files or []
modified_files = self.modified_files or {}
all_files = set(relevant_files + list(modified_files.keys()))
return [file for file in self.files if file.path in all_files]
@staticmethod
@@ -219,6 +223,7 @@ class ProjectState(Base):
files=[],
relevant_files=deepcopy(self.relevant_files),
modified_files=deepcopy(self.modified_files),
docs=deepcopy(self.docs),
run_command=self.run_command,
)
@@ -254,8 +259,9 @@ class ProjectState(Base):
self.set_current_task_status(TaskStatus.DONE)
self.steps = []
self.iterations = []
self.relevant_files = []
self.relevant_files = None
self.modified_files = {}
self.docs = None
flag_modified(self, "tasks")
if not self.unfinished_tasks and self.unfinished_epics:
@@ -362,6 +368,8 @@ class ProjectState(Base):
if path not in self.modified_files and not external:
self.modified_files[path] = original_content
self.relevant_files = self.relevant_files or []
if path not in self.relevant_files:
self.relevant_files.append(path)

29
core/llm/azure_client.py Normal file
View File

@@ -0,0 +1,29 @@
from httpx import Timeout
from openai import AsyncAzureOpenAI
from core.config import LLMProvider
from core.llm.openai_client import OpenAIClient
from core.log import get_logger
log = get_logger(__name__)
class AzureClient(OpenAIClient):
provider = LLMProvider.AZURE
stream_options = None
def _init_client(self):
azure_deployment = self.config.extra.get("azure_deployment")
api_version = self.config.extra.get("api_version")
self.client = AsyncAzureOpenAI(
api_key=self.config.api_key,
azure_endpoint=self.config.base_url,
azure_deployment=azure_deployment,
api_version=api_version,
timeout=Timeout(
max(self.config.connect_timeout, self.config.read_timeout),
connect=self.config.connect_timeout,
read=self.config.read_timeout,
),
)

View File

@@ -316,6 +316,7 @@ class BaseLLMClient:
:return: Client class for the specified provider.
"""
from .anthropic_client import AnthropicClient
from .azure_client import AzureClient
from .groq_client import GroqClient
from .openai_client import OpenAIClient
@@ -325,6 +326,8 @@ class BaseLLMClient:
return AnthropicClient
elif provider == LLMProvider.GROQ:
return GroqClient
elif provider == LLMProvider.AZURE:
return AzureClient
else:
raise ValueError(f"Unsupported LLM provider: {provider.value}")

View File

@@ -17,6 +17,7 @@ tokenizer = tiktoken.get_encoding("cl100k_base")
class OpenAIClient(BaseLLMClient):
provider = LLMProvider.OPENAI
stream_options = {"include_usage": True}
def _init_client(self):
self.client = AsyncOpenAI(
@@ -40,10 +41,10 @@ class OpenAIClient(BaseLLMClient):
"messages": convo.messages,
"temperature": self.config.temperature if temperature is None else temperature,
"stream": True,
"stream_options": {
"include_usage": True,
},
}
if self.stream_options:
completion_kwargs["stream_options"] = self.stream_options
if json_mode:
completion_kwargs["response_format"] = {"type": "json_object"}

View File

@@ -14,7 +14,7 @@ Output the result in a JSON format with the following structure, as in this exam
Example:
{
"summary": "Describe in detail the functionality being defind o implemented in this file. Be as detailed as possible",
"summary": "Describe in detail the functionality being defined or implemented in this file. Be as detailed as possible",
"references": [
"some/file.py",
"some/other/file.js"

View File

@@ -19,6 +19,8 @@ You are currently working on task #{{ current_task_index + 1 }} with the followi
Now, tell me all the code that needs to be written to implement ONLY this task and have it fully working and all commands that need to be run to implement this task.
{% include "partials/doc_snippets.prompt" %}
**IMPORTANT**
{%- if state.epics|length == 1 %}
Remember, I created an empty folder where I will start writing files that you tell me and that are needed for this app.

View File

@@ -0,0 +1,14 @@
{% include "partials/project_details.prompt" %}
Here is the next task that needs to be implemented:
```
{{ current_task.description }}
```
Here is the list of the libraries, frameworks and APIs for which we have documentation available. The documentation is given in a sequence of pairs, one pair per line. First item in the pair is the documentation key. Second item is the short description of what that documentation contains.
Here's an example for React API documentation:
"react-api-ref", "React API Reference documentation"
We have additional documentation from "{{ short_description }}" that might be useful for completing this task.
Now, give me a summary of what specifically from the {{ short_description }} you think would be useful for completing this task. Please provide only the topics of interest, no additional text. Only return the topics relevant to the actual implementation, NOT the topics related to library installation and setup, environment setup, database setup and similar. Return the topics in JSON format, as a list of strings, WITHOUT any additional formatting such as backticks, bullets and similar. Return a maximum of 3 topics you think would be most useful.

View File

@@ -0,0 +1,15 @@
{% include "partials/project_details.prompt" %}
Here is the next task that needs to be implemented:
{{ current_task.description }}
Here is the list of the libraries, frameworks and APIs for which we have documentation available. The documentation is given in a sequence of pairs, one pair per line. First item in the pair is the documentation key. Second item is the short description of what that documentation contains.
Here's an example for React API documentation:
"react-api-ref", "React API Reference documentation"
Here is the list of available documentations:
{% for docset in available_docsets %}
{{ docset[0], docset[1] }}
{% endfor %}
Now, give me the list of the additional documentation that you would like to use to complete the task listed above. Return only the documentation that is absolutely required for the given task, and only from the list of available documentations provided above. If there is no additional documentation in the list that you would like to use, return an empty list.

View File

@@ -0,0 +1,3 @@
You are a world class full stack software developer working in a team.
Your job is to select the documentation that might be useful for implementing a task at hand.

View File

@@ -0,0 +1,28 @@
You're given an existing project you need to analyze and continue developing. To do this, you'll need to determine the project architecture, technologies used (platform, libraries, etc) and reverse-engineer the technical and functional spec.
Here is the list of all the files in the project:
{% for file in state.files %}
* `{{ file.path }}` - {{ file.meta.get("description")}}
{% endfor %}
Here's the full content of interesting files that may help you to determine the specification:
{% for file in state.files %}
**`{{ file.path }}`**:
```
{{ file.content.content }}
```
{% endfor %}
Based on this information, please provide detailed specification for the project. Here is an example specification format:
---START_OF_EXAMPLE_SPEC---
{{ example_spec }}
---END_OF_EXAMPLE_SPEC---
**IMPORTANT**: In the specification, you must include the following sections:
* **Project Description**: A detailed description of what the project is about.
* **Features**: A list of features that the project has implemented. Each feature should be described in detail.
* **Technical Specification**: Detailed description of how the project works, including any important technical details.

View File

@@ -0,0 +1,21 @@
You're given an existing project you need to analyze and continue developing. To do this, you'll need to determine the project architecture, technologies used (platform, libraries, etc) and reverse-engineer the technical and functional spec.
As a first step, you have to identify which of the listed files to examine so you can determine this. After you identify the files, you'll be given full access to their contents so you can determine the project information.
Here is the list of all the files in the project:
{% for file in state.files %}
* `{{ file.path }}` - {{ file.meta.get("description")}}
{% endfor %}
Based on this information, list the files (full path, as shown in the list) you would examine to determine the project architecture, technologies and specification. Output the list in JSON format like in the following example:
```json
{
"files": [
"README.md",
"pyproject.toml",
"settings/settings.py"
]
}
```

View File

@@ -0,0 +1,14 @@
{% if docs is defined and docs %}
We have some some documentation snippets that might be helpful while working on this task, we will now list those.
---START_OF_DOCUMENTATION_SNIPPETS---
{% for d in docs %}
Documentation snippets from {{ d.desc }}:
{% for snippet in d.snippets %}
{{ snippet }}
{% endfor %}
{% endfor %}
---END_OF_DOCUMENTATION_SNIPPETS---
{% endif %}

View File

@@ -39,6 +39,7 @@ Focus on solving this issue in the following way:
{{ next_solution_to_try }}
```
{% endif %}
{% include "partials/doc_snippets.prompt" %}
Now, you have to debug this issue and comply with the additional user feedback.
**IMPORTANT**

View File

@@ -170,7 +170,6 @@ class StateManager:
self.branch = state.branch
self.project = state.branch.project
self.next_state = await state.create_next_state()
# TODO: overwrite files?
self.file_system = await self.init_file_system(load_existing=True)
log.debug(
f"Loaded project {self.project} ({self.project.id}) "
@@ -178,7 +177,7 @@ class StateManager:
f"step {state.step_index} (state id={state.id})"
)
if self.current_state.current_epic and self.ui:
if self.current_state.current_epic and self.current_state.current_task and self.ui:
source = self.current_state.current_epic.get("source", "app")
await self.ui.send_task_progress(
self.current_state.tasks.index(self.current_state.current_task) + 1,

View File

@@ -81,6 +81,8 @@ class Telemetry:
"model": config.agent["default"].model,
# Initial prompt
"initial_prompt": None,
# App complexity
"is_complex_app": None,
# Optional template used for the project
"template": None,
# Optional user contact email
@@ -89,6 +91,10 @@ class Telemetry:
"app_id": None,
# Project architecture
"architecture": None,
# Documentation sets used for a given task
"docsets_used": [],
# Number of documentation snippets stored for a given task
"doc_snippets_stored": 0,
}
if sys.platform == "linux":
try:

View File

@@ -263,6 +263,17 @@ class UIBase:
"""
raise NotImplementedError()
async def import_project(self, project_dir: str):
"""
Ask the UI to import files from the project directory.
The UI should provide a way for the user to select the directory with
existing project, and recursively copy the files over.
:param project_dir: Project directory.
"""
raise NotImplementedError()
pythagora_source = UISource("Pythagora", "pythagora")
success_source = UISource("Congratulations", "success")

View File

@@ -1,5 +1,7 @@
from typing import Optional
from prompt_toolkit.shortcuts import PromptSession
from core.log import get_logger
from core.ui.base import ProjectStage, UIBase, UIClosedError, UISource, UserInput
@@ -57,9 +59,12 @@ class PlainConsoleUI(UIBase):
default_str = " (default)" if k == default else ""
print(f" [{k}]: {v}{default_str}")
session = PromptSession("> ")
while True:
try:
choice = input("> ").strip()
choice = await session.prompt_async(default=initial_text or "")
choice = choice.strip()
except KeyboardInterrupt:
raise UIClosedError()
if not choice and default:
@@ -118,5 +123,8 @@ class PlainConsoleUI(UIBase):
async def send_features_list(self, features: list[str]):
pass
async def import_project(self, project_dir: str):
pass
__all__ = ["PlainConsoleUI"]

View File

@@ -39,6 +39,7 @@ class MessageType(str, Enum):
LOADING_FINISHED = "loadingFinished"
PROJECT_DESCRIPTION = "projectDescription"
FEATURES_LIST = "featuresList"
IMPORT_PROJECT = "importProject"
class Message(BaseModel):
@@ -334,5 +335,8 @@ class IPCClientUI(UIBase):
async def send_features_list(self, features: list[str]):
await self._send(MessageType.FEATURES_LIST, content={"featuresList": features})
async def import_project(self, project_dir: str):
await self._send(MessageType.IMPORT_PROJECT, content={"project_dir": project_dir})
__all__ = ["IPCClientUI"]

View File

@@ -1,6 +1,6 @@
{
// Configuration for the LLM providers that can be used. Pythagora supports
// OpenAI, Anthropic and Groq. Azure and OpenRouter and local LLMs (such as LM-Studio)
// OpenAI, Azure, Anthropic and Groq. OpenRouter and local LLMs (such as LM-Studio)
// also work, you can use "openai" provider to define these.
"llm": {
"openai": {
@@ -9,6 +9,17 @@
"api_key": null,
"connect_timeout": 60.0,
"read_timeout": 10.0
},
// Example config for Azure OpenAI (see https://learn.microsoft.com/en-us/azure/ai-services/openai/reference#chat-completions)
"azure": {
"base_url": "https://your-resource-name.openai.azure.com/",
"api_key": "your-api-key",
"connect_timeout": 60.0,
"read_timeout": 10.0,
"extra": {
"azure_deployment": "your-azure-deployment-id",
"api_version": "2024-02-01"
}
}
},
// Each agent can use a different model or configuration. The default, as before, is GPT4 Turbo

11
main.py
View File

@@ -6,20 +6,23 @@ import sys
try:
from core.cli.main import run_pythagora
except ImportError:
pythagora_root = os.path.dirname(os.path.dirname(__file__))
except ImportError as err:
pythagora_root = os.path.dirname(__file__)
venv_path = os.path.join(pythagora_root, "venv")
requirements_path = os.path.join(pythagora_root, "requirements.txt")
if sys.prefix == sys.base_prefix:
venv_python_path = os.path.join(venv_path, "scripts" if sys.platform == "win32" else "bin", "python")
print("Python environment for Pythagora is not set up.", file=sys.stderr)
print(f"Python environment for Pythagora is not set up: module `{err.name}` is missing.", file=sys.stderr)
print(f"Please create Python virtual environment: {sys.executable} -m venv {venv_path}", file=sys.stderr)
print(
f"Then install the required dependencies with: {venv_python_path} -m pip install -r {requirements_path}",
file=sys.stderr,
)
else:
print("Python environment for Pythagora is not completely set up.", file=sys.stderr)
print(
f"Python environment for Pythagora is not completely set up: module `{err.name}` is missing",
file=sys.stderr,
)
print(
f"Please run `{sys.executable} -m pip install -r {requirements_path}` to finish Python setup, and rerun Pythagora.",
file=sys.stderr,

View File

@@ -1,6 +1,6 @@
[tool.poetry]
name = "gpt-pilot"
version = "0.2.1"
version = "0.2.3"
description = "Build complete apps using AI agents"
authors = ["Senko Rasic <senko@pythagora.ai>"]
license = "FSL-1.1-MIT"
@@ -37,6 +37,7 @@ psutil = "^5.9.8"
httpx = "^0.27.0"
alembic = "^1.13.1"
python-dotenv = "^1.0.1"
prompt-toolkit = "^3.0.45"
[tool.poetry.group.dev.dependencies]
pytest = "^8.1.1"

View File

@@ -2,38 +2,40 @@ aiosqlite==0.20.0
alembic==1.13.1
annotated-types==0.7.0
anthropic==0.25.9
anyio==4.3.0
certifi==2024.2.2
anyio==4.4.0
certifi==2024.6.2
charset-normalizer==3.3.2
colorama==0.4.6
distro==1.9.0
exceptiongroup==1.2.1
filelock==3.14.0
fsspec==2024.5.0
fsspec==2024.6.0
greenlet==3.0.3
groq==0.6.0
h11==0.14.0
httpcore==1.0.5
httpx==0.27.0
huggingface-hub==0.23.1
huggingface-hub==0.23.2
idna==3.7
jinja2==3.1.4
mako==1.3.5
markupsafe==2.1.5
openai==1.30.1
openai==1.31.0
packaging==24.0
prompt-toolkit==3.0.46
psutil==5.9.8
pydantic-core==2.18.2
pydantic==2.7.1
pydantic-core==2.18.4
pydantic==2.7.3
python-dotenv==1.0.1
pyyaml==6.0.1
regex==2024.5.15
requests==2.32.2
requests==2.32.3
sniffio==1.3.1
sqlalchemy==2.0.30
sqlalchemy[asyncio]==2.0.30
tiktoken==0.6.0
tokenizers==0.19.1
tqdm==4.66.4
typing-extensions==4.11.0
typing-extensions==4.12.1
urllib3==2.2.1
wcwidth==0.2.13

View File

@@ -0,0 +1,50 @@
from unittest.mock import patch
import pytest
from httpx import HTTPError
from core.agents.external_docs import DocQueries, ExternalDocumentation, SelectedDocsets
@pytest.mark.asyncio
async def test_stores_documentation_snippets_for_task(agentcontext):
sm, _, ui, mock_llm = agentcontext
sm.current_state.tasks = [{"description": "Some VueJS task", "status": "todo"}]
await sm.commit()
ed = ExternalDocumentation(sm, ui)
ed.get_llm = mock_llm(
side_effect=[SelectedDocsets(docsets=["vuejs-api-ref"]), DocQueries(queries=["VueJS component model"])]
)
await ed.run()
assert ed.next_state.docs[0]["key"] == "vuejs-api-ref"
@pytest.mark.asyncio
async def test_continues_without_docs_for_invalid_docset(agentcontext):
sm, _, ui, mock_llm = agentcontext
sm.current_state.tasks = [{"description": "Some VueJS task", "status": "todo"}]
await sm.commit()
ed = ExternalDocumentation(sm, ui)
ed.get_llm = mock_llm(
side_effect=[SelectedDocsets(docsets=["doesnt-exist"]), DocQueries(queries=["VueJS component model"])]
)
await ed.run()
assert ed.next_state.docs == []
@pytest.mark.asyncio
async def test_continues_without_docs_if_api_is_down(agentcontext):
sm, _, ui, _ = agentcontext
sm.current_state.tasks = [{"description": "Future Task", "status": "todo"}]
await sm.commit()
ed = ExternalDocumentation(sm, ui)
with patch("httpx.Client.get", side_effect=HTTPError("Failed")):
await ed.run()
assert ed.next_state.docs == []

View File

@@ -80,14 +80,14 @@ async def test_create_next_deep_copies_fields(testdb):
next_state.tasks[0]["completed"] = True
next_state.iterations[0]["completed"] = True
next_state.steps[0]["completed"] = True
next_state.relevant_files.append("test.txt")
next_state.relevant_files = ["test.txt"]
next_state.modified_files["test.txt"] = "Hello World"
assert state.epics[0]["completed"] is False
assert state.tasks[0]["completed"] is False
assert state.iterations[0]["completed"] is False
assert state.steps[0]["completed"] is False
assert state.relevant_files == []
assert state.relevant_files is None
assert state.modified_files == {}

View File

@@ -1,4 +1,4 @@
from unittest.mock import patch
from unittest.mock import AsyncMock, patch
import pytest
@@ -35,8 +35,9 @@ async def test_stream(capsys):
@pytest.mark.asyncio
@patch("builtins.input", return_value="awesome")
async def test_ask_question_simple(mock_input):
@patch("core.ui.console.PromptSession")
async def test_ask_question_simple(mock_PromptSession):
prompt_async = mock_PromptSession.return_value.prompt_async = AsyncMock(return_value="awesome")
ui = PlainConsoleUI()
await ui.start()
@@ -48,12 +49,13 @@ async def test_ask_question_simple(mock_input):
await ui.stop()
mock_input.assert_called_once()
prompt_async.assert_awaited_once()
@pytest.mark.asyncio
@patch("builtins.input", return_value="yes")
async def test_ask_question_with_buttons(mock_input):
@patch("core.ui.console.PromptSession")
async def test_ask_question_with_buttons(mock_PromptSession):
prompt_async = mock_PromptSession.return_value.prompt_async = AsyncMock(return_value="yes")
ui = PlainConsoleUI()
await ui.start()
@@ -68,12 +70,13 @@ async def test_ask_question_with_buttons(mock_input):
await ui.stop()
mock_input.assert_called_once()
prompt_async.assert_awaited_once()
@pytest.mark.asyncio
@patch("builtins.input", side_effect=KeyboardInterrupt())
async def test_ask_question_interrupted(mock_input):
@patch("core.ui.console.PromptSession")
async def test_ask_question_interrupted(mock_PromptSession):
prompt_async = mock_PromptSession.return_value.prompt_async = AsyncMock(side_effect=KeyboardInterrupt)
ui = PlainConsoleUI()
await ui.start()
@@ -82,4 +85,4 @@ async def test_ask_question_interrupted(mock_input):
await ui.stop()
mock_input.assert_called_once()
prompt_async.assert_awaited_once()