Use external documentation when it makes sense.

This commit is contained in:
Goran Peretin
2024-06-05 17:25:19 +00:00
parent dbd3bec8b5
commit 77e3fe3e85
7 changed files with 183 additions and 0 deletions

View File

@@ -0,0 +1,130 @@
import asyncio
from urllib.parse import urljoin
import httpx
from core.agents.base import BaseAgent
from core.agents.convo import AgentConvo
from core.agents.response import AgentResponse
from core.config import EXTERNAL_DOCUMENTATION_API
from core.log import get_logger
from core.telemetry import telemetry
log = get_logger(__name__)
class ExternalDocumentation(BaseAgent):
"""Agent in charge of collecting and storing additional documentation.
Docs are per task and are stores in the `tasks` variable in the project state.
This agent ensures documentation is collected only once per task.
Agent does 2 LLM interactions:
1. Ask the LLM to select useful documentation from a predefined list.
2. Ask the LLM to come up with a query to use to fetch the actual documentation snippets.
"""
agent_type = "external-docs"
display_name = "Documentation"
async def run(self) -> AgentResponse:
current_task = self.current_state.current_task
if not current_task:
# If we have no active task, there's no docs to collect
return AgentResponse.done(self)
available_docsets = await self._get_available_docsets()
selected_docsets = await self._select_docsets(available_docsets)
if not selected_docsets:
await self._store_docs([], available_docsets)
return AgentResponse.done(self)
telemetry.set("docsets_used", selected_docsets)
queries = await self._create_queries(selected_docsets)
doc_snippets = await self._fetch_snippets(queries)
telemetry.set("doc_snippets_stored", len(doc_snippets))
await self._store_docs(doc_snippets, available_docsets)
return AgentResponse.done(self)
async def _get_available_docsets(self) -> list[tuple]:
url = urljoin(EXTERNAL_DOCUMENTATION_API, "docsets")
resp = httpx.get(url)
log.debug(f"Fetched {len(resp.json())} docsets.")
return resp.json()
async def _select_docsets(self, available_docsets: list[tuple]) -> dict[str, str]:
llm = self.get_llm()
convo = AgentConvo(self).template(
"select_docset",
current_task=self.current_state.current_task,
available_docsets=available_docsets,
)
llm_response: str = await llm(convo)
available_docsets = dict(available_docsets)
if llm_response.strip().lower() == "done":
return {}
else:
selected_keys = llm_response.splitlines()
return {k: available_docsets[k] for k in selected_keys}
async def _create_queries(self, docsets: dict[str, str]) -> dict[str, list[str]]:
"""Return queries we have to make to the docs API.
Key is the docset_key and value is the list of queries for that docset.
"""
queries = {}
for k, short_desc in docsets.items():
llm = self.get_llm()
convo = AgentConvo(self).template(
"create_queries",
short_description=short_desc,
current_task=self.current_state.current_task,
)
llm_response: str = await llm(convo)
if llm_response.strip().lower() == "done":
continue
else:
queries[k] = llm_response.splitlines()
return queries
async def _fetch_snippets(self, queries: dict[str, list[str]]) -> list[tuple]:
"""Query the docs API and fetch the documentation snippets.
Returns a list of tuples: (docset_key, snippets).
"""
url = urljoin(EXTERNAL_DOCUMENTATION_API, "query")
snippets: list[tuple] = []
async with httpx.AsyncClient() as client:
reqs = []
ordered_keys = []
for docset_key, qs in queries.items():
reqs.append(client.get(url, params={"q": qs, "doc_key": docset_key}))
ordered_keys.append(docset_key)
results = await asyncio.gather(*reqs)
for k, res in zip(ordered_keys, results):
snippets.append((k, res.json()))
return snippets
async def _store_docs(self, snippets: list[tuple], available_docsets: list[tuple]):
"""Store the snippets into current task data.
Documentation snippets are stored as a list of dictionaries:
{"key": docset-key, "desc": documentation-description, "snippets": list-of-snippets}
"""
docsets_dict = dict(available_docsets)
docs = []
for docset_key, snip in snippets:
docs.append({"key": docset_key, "desc": docsets_dict[docset_key], "snippets": snip})
self.next_state.current_task["docs"] = docs
self.next_state.flag_tasks_as_modified()

View File

@@ -7,6 +7,7 @@ from core.agents.code_reviewer import CodeReviewer
from core.agents.developer import Developer
from core.agents.error_handler import ErrorHandler
from core.agents.executor import Executor
from core.agents.external_docs import ExternalDocumentation
from core.agents.human_input import HumanInput
from core.agents.importer import Importer
from core.agents.problem_solver import ProblemSolver
@@ -189,6 +190,10 @@ class Orchestrator(BaseAgent):
# Ask the Tech Lead to break down the initial project or feature into tasks and apply project template
return TechLead(self.state_manager, self.ui, process_manager=self.process_manager)
current_task_docs = state.current_task.get("docs") if state.current_task else None
if current_task_docs is None:
return ExternalDocumentation(self.state_manager, self.ui)
# Current task status must be checked before Developer is called because we might want
# to skip it instead of breaking it down
current_task_status = state.current_task.get("status") if state.current_task else None

View File

@@ -35,6 +35,9 @@ IGNORE_SIZE_THRESHOLD = 50000 # 50K+ files are ignored by default
DEFAULT_AGENT_NAME = "default"
DESCRIBE_FILES_AGENT_NAME = "CodeMonkey.describe_files"
# Endpoint for the external documentation
EXTERNAL_DOCUMENTATION_API = "http://docs-pythagora-io-439719575.us-east-1.elb.amazonaws.com"
class _StrictModel(BaseModel):
"""

View File

@@ -19,6 +19,21 @@ You are currently working on task #{{ current_task_index + 1 }} with the followi
Now, tell me all the code that needs to be written to implement ONLY this task and have it fully working and all commands that need to be run to implement this task.
{% if task.docs %}
We have some some documentation snippets that might be helpful while working on this task, we will now list those.
{% for d in task.docs %}
Documentation snippets from {{ d.desc }}:
{% for snippet in d.snippets %}
{{ snippet }}
{% endfor %}
{% endfor %}
This concludes the documentation snippets.
{% endif %}
**IMPORTANT**
{%- if state.epics|length == 1 %}
Remember, I created an empty folder where I will start writing files that you tell me and that are needed for this app.

View File

@@ -0,0 +1,12 @@
{% include "partials/project_details.prompt" %}
Here is the next task that needs to be implemented:
{{ current_task.description }}
Here is the list of the libraries, frameworks and APIs for which we have documentation available. The documentation is given in a sequence of pairs, one pair per line. First item in the pair is the documentation key. Second item is the short description of what that documentation contains.
Here's an example for React API documentation:
"react-api-ref", "React API Reference documentation"
We have additional documentation from "{{ short_description }}" that might be useful for completing this task.
Now, give me a summary of what specifically from the {{ short_description }} you think would be useful for completing this task. Please provide only the topics of interest, no additional text. Only return the topics relevant to the actual implementation, NOT the topics related to library installation and setup, environment setup, database setup and similar. Return the topics one item per line, WITHOUT any additional formatting such as backticks, bullets and similar. Return a maximum of 5 topics you think would be most useful.

View File

@@ -0,0 +1,15 @@
{% include "partials/project_details.prompt" %}
Here is the next task that needs to be implemented:
{{ current_task.description }}
Here is the list of the libraries, frameworks and APIs for which we have documentation available. The documentation is given in a sequence of pairs, one pair per line. First item in the pair is the documentation key. Second item is the short description of what that documentation contains.
Here's an example for React API documentation:
"react-api-ref", "React API Reference documentation"
Here is the list of available documentations:
{% for docset in available_docsets %}
{{ docset[0], docset[1] }}
{% endfor %}
Now, give me the list of the additional documentation that you would like to use to complete the task listed above. Return ONLY the keys from the available documentation list, without any formatting like quotes and bullets, DO NOT return anything else. If you don't need any additional documentation, just respond with "DONE". Return only the documentation that is absolutely required for the given task.

View File

@@ -0,0 +1,3 @@
You are a world class full stack software developer working in a team.
Your job is to select the documentation that might be useful for implementing a task at hand.