diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml
new file mode 100644
index 00000000..5a6f6a3e
--- /dev/null
+++ b/.github/workflows/ci.yml
@@ -0,0 +1,35 @@
+name: Run unit tests
+
+on:
+  push:
+    branches: [ "main" ]
+  pull_request:
+    branches: [ "main" ]
+
+jobs:
+  build:
+
+    runs-on: ${{ matrix.os }}
+    timeout-minutes: 10
+    strategy:
+      fail-fast: false
+      matrix:
+        python-version: ["3.9", "3.12"]
+        os: [ubuntu-latest, macos-latest, windows-latest]
+
+    steps:
+    - uses: actions/checkout@v4
+    - name: Set up Python ${{ matrix.python-version }}
+      uses: actions/setup-python@v4
+      with:
+        python-version: ${{ matrix.python-version }}
+    - name: Install dependencies
+      run: |
+        python -m pip install poetry
+        poetry install --with=dev
+    - name: Lint with ruff
+      run: poetry run ruff check --output-format github
+    - name: Check code style with ruff
+      run: poetry run ruff format --check --diff
+    - name: Test with pytest
+      run: poetry run pytest
diff --git a/.gitignore b/.gitignore
new file mode 100644
index 00000000..8806b02f
--- /dev/null
+++ b/.gitignore
@@ -0,0 +1,18 @@
+__pycache__/
+.venv/
+.vscode/
+.idea/
+htmlcov/
+dist/
+workspace/
+
+.coverage
+*.code-workspace
+.*_cache
+.env
+*.pyc
+*.db
+config.json
+poetry.lock
+.DS_Store
+*.log
diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
new file mode 100644
index 00000000..1d395e98
--- /dev/null
+++ b/.pre-commit-config.yaml
@@ -0,0 +1,21 @@
+fail_fast: true
+repos:
+  - repo: https://github.com/astral-sh/ruff-pre-commit
+    # Ruff version.
+    rev: v0.3.5
+    hooks:
+      # Run the linter.
+      - id: ruff
+        args: [ --fix ]
+      # Run the formatter.
+      - id: ruff-format
+  - repo: local
+    hooks:
+      # Run the tests
+      - id: pytest
+        name: pytest
+        stages: [commit]
+        types: [python]
+        entry: pytest
+        language: system
+        pass_filenames: false
diff --git a/LICENSE b/LICENSE
new file mode 100644
index 00000000..74887def
--- /dev/null
+++ b/LICENSE
@@ -0,0 +1,110 @@
+# Functional Source License, Version 1.1, MIT Future License
+
+## Abbreviation
+
+FSL-1.1-MIT
+
+## Notice
+
+Copyright 2024 Pythagora Technologies, Inc.
+
+## Terms and Conditions
+
+### Licensor ("We")
+
+The party offering the Software under these Terms and Conditions.
+
+### The Software
+
+The "Software" is each version of the software that we make available under
+these Terms and Conditions, as indicated by our inclusion of these Terms and
+Conditions with the Software.
+
+### License Grant
+
+Subject to your compliance with this License Grant and the Patents,
+Redistribution and Trademark clauses below, we hereby grant you the right to
+use, copy, modify, create derivative works, publicly perform, publicly display
+and redistribute the Software for any Permitted Purpose identified below.
+
+### Permitted Purpose
+
+A Permitted Purpose is any purpose other than a Competing Use. A Competing Use
+means making the Software available to others in a commercial product or
+service that:
+
+1. substitutes for the Software;
+
+2. substitutes for any other product or service we offer using the Software
+   that exists as of the date we make the Software available; or
+
+3. offers the same or substantially similar functionality as the Software.
+
+Permitted Purposes specifically include using the Software:
+
+1. for your internal use and access;
+
+2. for non-commercial education;
+
+3. for non-commercial research; and
+
+4. in connection with professional services that you provide to a licensee
+   using the Software in accordance with these Terms and Conditions.
+
+### Patents
+
+To the extent your use for a Permitted Purpose would necessarily infringe our
+patents, the license grant above includes a license under our patents. If you
+make a claim against any party that the Software infringes or contributes to
+the infringement of any patent, then your patent license to the Software ends
+immediately.
+
+### Redistribution
+
+The Terms and Conditions apply to all copies, modifications and derivatives of
+the Software.
+
+If you redistribute any copies, modifications or derivatives of the Software,
+you must include a copy of or a link to these Terms and Conditions and not
+remove any copyright notices provided in or with the Software.
+
+### Disclaimer
+
+THE SOFTWARE IS PROVIDED "AS IS" AND WITHOUT WARRANTIES OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING WITHOUT LIMITATION WARRANTIES OF FITNESS FOR A PARTICULAR
+PURPOSE, MERCHANTABILITY, TITLE OR NON-INFRINGEMENT.
+
+IN NO EVENT WILL WE HAVE ANY LIABILITY TO YOU ARISING OUT OF OR RELATED TO THE
+SOFTWARE, INCLUDING INDIRECT, SPECIAL, INCIDENTAL OR CONSEQUENTIAL DAMAGES,
+EVEN IF WE HAVE BEEN INFORMED OF THEIR POSSIBILITY IN ADVANCE.
+
+### Trademarks
+
+Except for displaying the License Details and identifying us as the origin of
+the Software, you have no right under these Terms and Conditions to use our
+trademarks, trade names, service marks or product names.
+
+## Grant of Future License
+
+We hereby irrevocably grant you an additional license to use the Software under
+the MIT license that is effective on the second anniversary of the date we make
+the Software available. On or after that date, you may use the Software under
+the MIT license, in which case the following will apply:
+
+Permission is hereby granted, free of charge, to any person obtaining a copy of
+this software and associated documentation files (the "Software"), to deal in
+the Software without restriction, including without limitation the rights to
+use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies
+of the Software, and to permit persons to whom the Software is furnished to do
+so, subject to the following conditions:
+
+The above copyright notice and this permission notice shall be included in all
+copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+SOFTWARE.
diff --git a/README.md b/README.md
new file mode 100644
index 00000000..a7e496bb
--- /dev/null
+++ b/README.md
@@ -0,0 +1,238 @@
+<div align="center">
+
+# 🧑‍✈️ GPT PILOT 🧑‍✈️
+
+</div>
+
+---
+
+<div align="center">
+
+[![Discord Follow](https://dcbadge.vercel.app/api/server/HaqXugmxr9?style=flat)](https://discord.gg/HaqXugmxr9)
+[![GitHub Repo stars](https://img.shields.io/github/stars/Pythagora-io/gpt-pilot?style=social)](https://github.com/Pythagora-io/gpt-pilot)
+[![Twitter Follow](https://img.shields.io/twitter/follow/HiPythagora?style=social)](https://twitter.com/HiPythagora)
+
+</div>
+
+---
+
+<div align="center">
+<a href="https://www.ycombinator.com/" target="_blank"><img src="https://s3.amazonaws.com/assets.pythagora.ai/yc/PNG/Black.png" alt="Pythagora-io%2Fgpt-pilot | Trendshift" style="width: 250px; height: 93px;"/></a>
+</div>
+<br>
+<div align="center">
+<a href="https://trendshift.io/repositories/466" target="_blank"><img src="https://trendshift.io/api/badge/repositories/466" alt="Pythagora-io%2Fgpt-pilot | Trendshift" style="width: 250px; height: 55px;" width="250" height="55"/></a>
+</div>
+
+<br>
+<br>
+
+<div align="center">
+
+### GPT Pilot doesn't just generate code, it builds apps!
+
+</div>
+
+---
+<div align="center">
+
+[![See it in action](https://i3.ytimg.com/vi/4g-1cPGK0GA/maxresdefault.jpg)](https://youtu.be/4g-1cPGK0GA)
+
+(click to open the video in YouTube) (1:40min)
+
+</div>
+
+---
+
+<div align="center">
+
+<a href="vscode:extension/PythagoraTechnologies.gpt-pilot-vs-code" target="_blank"><img src="https://github.com/Pythagora-io/gpt-pilot/assets/10895136/5792143e-77c7-47dd-ad96-6902be1501cd" alt="Pythagora-io%2Fgpt-pilot | Trendshift" style="width: 185px; height: 55px;" width="185" height="55"/></a>
+
+</div>
+
+GPT Pilot is the core technology for the [Pythagora VS Code extension](https://bit.ly/3IeZxp6) that aims to provide **the first real AI developer companion**. Not just an autocomplete or a helper for PR messages but rather a real AI developer that can write full features, debug them, talk to you about issues, ask for review, etc.
+
+---
+
+📫 If you would like to get updates on future releases or just get in touch, join our [Discord server](https://discord.gg/HaqXugmxr9) or you [can add your email here](http://eepurl.com/iD6Mpo). 📬
+
+---
+
+<!-- TOC -->
+* [🔌 Requirements](#-requirements)
+* [🚦How to start using gpt-pilot?](#how-to-start-using-gpt-pilot)
+* [🔎 Examples](#-examples)
+* [🐳 How to start gpt-pilot in docker?](#-how-to-start-gpt-pilot-in-docker)
+* [🧑‍💻️ CLI arguments](#-cli-arguments)
+* [🏗 How GPT Pilot works?](#-how-gpt-pilot-works)
+* [🕴How's GPT Pilot different from _Smol developer_ and _GPT engineer_?](#hows-gpt-pilot-different-from-smol-developer-and-gpt-engineer)
+* [🍻 Contributing](#-contributing)
+* [🔗 Connect with us](#-connect-with-us)
+* [🌟 Star history](#-star-history)
+<!-- TOC -->
+
+---
+
+GPT Pilot aims to research how much LLMs can be utilized to generate fully working, production-ready apps while the developer oversees the implementation.
+
+**The main idea is that AI can write most of the code for an app (maybe 95%), but for the rest, 5%, a developer is and will be needed until we get full AGI**.
+
+If you are interested in our learnings during this project, you can check [our latest blog posts](https://blog.pythagora.ai/2024/02/19/gpt-pilot-what-did-we-learn-in-6-months-of-working-on-a-codegen-pair-programmer/).
+
+---
+
+<br>
+
+<div align="center">
+
+### **[👉 Examples of apps written by GPT Pilot 👈](https://github.com/Pythagora-io/gpt-pilot/wiki/Apps-created-with-GPT-Pilot)**
+
+</div>
+<br>
+
+---
+
+# 🔌 Requirements
+
+- **Python 3.9+**
+
+# 🚦How to start using gpt-pilot?
+👉 If you are using VS Code as your IDE, the easiest way to start is by downloading [GPT Pilot VS Code extension](https://bit.ly/3IeZxp6). 👈
+
+Otherwise, you can use the CLI tool.
+
+### If you're new to GPT Pilot:
+
+After you have Python and (optionally) PostgreSQL installed, follow these steps:
+
+1. `git clone https://github.com/Pythagora-io/gpt-pilot.git` (clone the repo)
+2. `cd gpt-pilot` (go to the repo folder)
+3. `python -m venv venv` (create a virtual environment)
+4. `source venv/bin/activate` (or on Windows `venv\Scripts\activate`) (activate the virtual environment)
+5. `pip install -r requirements.txt` (install the dependencies)
+6. `cp example-config.json config.json` (create `config.json` file)
+7. Set your key and other settings in `config.json` file:
+   - LLM Provider (`openai`, `anthropic` or `groq`) key and endpoints (leave `null` for default) (note that Azure and OpenRouter are suppored via the `openai` setting)
+   - Your API key (if `null`, will be read from the environment variables)
+   - database settings: sqlite is used by default, PostgreSQL should also work
+   - optionally update `fs.ignore_paths` and add files or folders which shouldn't be tracked by GPT Pilot in workspace, useful to ignore folders created by compilers
+8. `python main.py` (start GPT Pilot)
+
+All generated code will be stored in the folder `workspace` inside the folder named after the app name you enter upon starting the pilot.
+
+### If you're upgrading from GPT Pilot v0.1
+
+Assuming you already have the git repository with an earlier version:
+
+1. `git pull` (update the repo)
+2. `source pilot-env/bin/activate` (or on Windows `pilot-env\Scripts\activate`) (activate the virtual environment)
+3. `pip install -r requirements.txt` (install the new dependencies)
+4. `python main.py --import-v0 pilot/gpt-pilot` (this should import your settings and existing projects)
+
+This will create a new database `pythagora.db` and import all apps from the old database. For each app,
+it will import the start of the latest task you were working on.
+
+To verify that the import was successful, you can run `python main.py --list` to see all the apps you have created,
+and check `config.json` to check the settings were correctly converted to the new config file format (and make
+any adjustments if needed).
+
+# 🔎 [Examples](https://github.com/Pythagora-io/gpt-pilot/wiki/Apps-created-with-GPT-Pilot)
+
+[Click here](https://github.com/Pythagora-io/gpt-pilot/wiki/Apps-created-with-GPT-Pilot) to see all example apps created with GPT Pilot.
+
+## 🐳 How to start gpt-pilot in docker?
+1. `git clone https://github.com/Pythagora-io/gpt-pilot.git` (clone the repo)
+2. Update the `docker-compose.yml` environment variables, which can be done via `docker compose config`. If you wish to use a local model, please go to [https://localai.io/basics/getting_started/](https://localai.io/basics/getting_started/).
+3. By default, GPT Pilot will read & write to `~/gpt-pilot-workspace` on your machine, you can also edit this in `docker-compose.yml`
+4. run `docker compose build`. this will build a gpt-pilot container for you.
+5. run `docker compose up`.
+6. access the web terminal on `port 7681`
+7. `python main.py` (start GPT Pilot)
+
+This will start two containers, one being a new image built by the `Dockerfile` and a Postgres database. The new image also has [ttyd](https://github.com/tsl0922/ttyd) installed so that you can easily interact with gpt-pilot. Node is also installed on the image and port 3000 is exposed.
+
+
+# 🧑‍💻️ CLI arguments
+
+### List created projects (apps)
+
+```bash
+python main.py --list
+```
+
+Note: for each project (app), this also lists "branches". Currently we only support having one branch (called "main"), and in the future we plan to add support for multiple project branches.
+
+### Load and continue from the latest step in a project (app)
+
+```bash
+python main.py --project <app_id>
+```
+
+### Load and continue from a specific step in a project (app)
+
+```bash
+python main.py --project <app_id> --step <step>
+```
+
+Warning: this will delete all progress after the specified step!
+
+### Delete project (app)
+
+```bash
+python main.py --delete <app_id>
+```
+
+Delete project with the specified `app_id`. Warning: this cannot be undone!
+
+### Import projects from v0.1
+
+```bash
+python main.py --import-v0 <path>
+```
+
+This will import projects from the old GPT Pilot v0.1 database. The path should be the path to the old GPT Pilot v0.1 database. For each project, it will import the start of the latest task you were working on. If the project was already imported, the import procedure will skip it (won't overwrite the project in the database).
+
+### Other command-line options
+
+There are several other command-line options that mostly support calling GPT Pilot from our VSCode extension. To see all the available options, use the `--help` flag:
+
+```bash
+python main.py --help
+```
+
+# 🏗 How GPT Pilot works?
+Here are the steps GPT Pilot takes to create an app:
+
+1. You enter the app name and the description.
+2. **Product Owner agent** like in real life, does nothing. :)
+3. **Specification Writer agent** asks a couple of questions to understand the requirements better if project description is not good enough.
+4. **Architect agent** writes up technologies that will be used for the app and checks if all technologies are installed on the machine and installs them if not.
+5. **Tech Lead agent** writes up development tasks that the Developer must implement.
+6. **Developer agent** takes each task and writes up what needs to be done to implement it. The description is in human-readable form.
+7. **Code Monkey agent** takes the Developer's description and the existing file and implements the changes.
+8. **Reviewer agent** reviews every step of the task and if something is done wrong Reviewer sends it back to Code Monkey.
+9. **Troubleshooter agent** helps you to give good feedback to GPT Pilot when something is wrong.
+10. **Debugger agent** hate to see him, but he is your best friend when things go south.
+11. **Technical Writer agent** writes documentation for the project.
+
+<br>
+
+# 🕴How's GPT Pilot different from _Smol developer_ and _GPT engineer_?
+
+- **GPT Pilot works with the developer to create a fully working production-ready app** - I don't think AI can (at least in the near future) create apps without a developer being involved. So, **GPT Pilot codes the app step by step** just like a developer would in real life. This way, it can debug issues as they arise throughout the development process. If it gets stuck, you, the developer in charge, can review the code and fix the issue. Other similar tools give you the entire codebase at once - this way, bugs are much harder to fix for AI and for you as a developer.
+  <br><br>
+- **Works at scale** - GPT Pilot isn't meant to create simple apps but rather so it can work at any scale. It has mechanisms that filter out the code, so in each LLM conversation, it doesn't need to store the entire codebase in context, but it shows the LLM only the relevant code for the current task it's working on. Once an app is finished, you can continue working on it by writing instructions on what feature you want to add.
+
+# 🍻 Contributing
+If you are interested in contributing to GPT Pilot, join [our Discord server](https://discord.gg/HaqXugmxr9), check out open [GitHub issues](https://github.com/Pythagora-io/gpt-pilot/issues), and see if anything interests you. We would be happy to get help in resolving any of those. The best place to start is by reviewing blog posts mentioned above to understand how the architecture works before diving into the codebase.
+
+## 🖥 Development
+Other than the research, GPT Pilot needs to be debugged to work in different scenarios. For example, we realized that the quality of the code generated is very sensitive to the size of the development task. When the task is too broad, the code has too many bugs that are hard to fix, but when the development task is too narrow, GPT also seems to struggle in getting the task implemented into the existing code.
+
+## 📊 Telemetry
+To improve GPT Pilot, we are tracking some events from which you can opt out at any time. You can read more about it [here](./docs/TELEMETRY.md).
+
+# 🔗 Connect with us
+🌟 As an open-source tool, it would mean the world to us if you starred the GPT-pilot repo 🌟
+
+💬 Join [the Discord server](https://discord.gg/HaqXugmxr9) to get in touch.
diff --git a/core/agents/__init__.py b/core/agents/__init__.py
new file mode 100644
index 00000000..e69de29b
diff --git a/core/agents/architect.py b/core/agents/architect.py
new file mode 100644
index 00000000..646f02cf
--- /dev/null
+++ b/core/agents/architect.py
@@ -0,0 +1,146 @@
+from typing import Optional
+
+from pydantic import BaseModel, Field
+
+from core.agents.base import BaseAgent
+from core.agents.convo import AgentConvo
+from core.agents.response import AgentResponse
+from core.llm.parser import JSONParser
+from core.telemetry import telemetry
+from core.templates.registry import PROJECT_TEMPLATES, ProjectTemplateEnum
+from core.ui.base import ProjectStage
+
+ARCHITECTURE_STEP = "architecture"
+WARN_SYSTEM_DEPS = ["docker", "kubernetes", "microservices"]
+WARN_FRAMEWORKS = ["next.js", "vue", "vue.js", "svelte", "angular"]
+WARN_FRAMEWORKS_URL = "https://github.com/Pythagora-io/gpt-pilot/wiki/Using-GPT-Pilot-with-frontend-frameworks"
+
+
+# FIXME: all the reponse pydantic models should be strict (see config._StrictModel), also check if we
+# can disallow adding custom Python attributes to the model
+class SystemDependency(BaseModel):
+    name: str = Field(
+        None,
+        description="Name of the system dependency, for example Node.js or Python.",
+    )
+    description: str = Field(
+        None,
+        description="One-line description of the dependency.",
+    )
+    test: str = Field(
+        None,
+        description="Command line to test whether the dependency is available on the system.",
+    )
+    required_locally: bool = Field(
+        None,
+        description="Whether this dependency must be installed locally (as opposed to connecting to cloud or other server)",
+    )
+
+
+class PackageDependency(BaseModel):
+    name: str = Field(
+        None,
+        description="Name of the package dependency, for example Express or React.",
+    )
+    description: str = Field(
+        None,
+        description="One-line description of the dependency.",
+    )
+
+
+class Architecture(BaseModel):
+    architecture: str = Field(
+        None,
+        description="General description of the app architecture.",
+    )
+    system_dependencies: list[SystemDependency] = Field(
+        None,
+        description="List of system dependencies required to build and run the app.",
+    )
+    package_dependencies: list[PackageDependency] = Field(
+        None,
+        description="List of framework/language-specific packages used by the app.",
+    )
+    template: Optional[ProjectTemplateEnum] = Field(
+        None,
+        description="Project template to use for the app, if any (optional, can be null).",
+    )
+
+
+class Architect(BaseAgent):
+    agent_type = "architect"
+    display_name = "Architect"
+
+    async def run(self) -> AgentResponse:
+        await self.ui.send_project_stage(ProjectStage.ARCHITECTURE)
+
+        llm = self.get_llm()
+        convo = AgentConvo(self).template("technologies", templates=PROJECT_TEMPLATES).require_schema(Architecture)
+
+        await self.send_message("Planning project architecture ...")
+        arch: Architecture = await llm(convo, parser=JSONParser(Architecture))
+
+        await self.check_compatibility(arch)
+        await self.check_system_dependencies(arch.system_dependencies)
+
+        spec = self.current_state.specification.clone()
+        spec.architecture = arch.architecture
+        spec.system_dependencies = [d.model_dump() for d in arch.system_dependencies]
+        spec.package_dependencies = [d.model_dump() for d in arch.package_dependencies]
+        spec.template = arch.template.value if arch.template else None
+
+        self.next_state.specification = spec
+        telemetry.set(
+            "architecture",
+            {
+                "description": spec.architecture,
+                "system_dependencies": spec.system_dependencies,
+                "package_dependencies": spec.package_dependencies,
+            },
+        )
+        telemetry.set("template", spec.template)
+        return AgentResponse.done(self)
+
+    async def check_compatibility(self, arch: Architecture) -> bool:
+        warn_system_deps = [dep.name for dep in arch.system_dependencies if dep.name.lower() in WARN_SYSTEM_DEPS]
+        warn_package_deps = [dep.name for dep in arch.package_dependencies if dep.name.lower() in WARN_FRAMEWORKS]
+
+        if warn_system_deps:
+            await self.ask_question(
+                f"Warning: GPT Pilot doesn't officially support {', '.join(warn_system_deps)}. "
+                f"You can try to use {'it' if len(warn_system_deps) == 1 else 'them'}, but you may run into problems.",
+                buttons={"continue": "Continue"},
+                buttons_only=True,
+                default="continue",
+            )
+
+        if warn_package_deps:
+            await self.ask_question(
+                f"Warning: GPT Pilot works best with vanilla JavaScript. "
+                f"You can try try to use {', '.join(warn_package_deps)}, but you may run into problems. "
+                f"Visit {WARN_FRAMEWORKS_URL} for more information.",
+                buttons={"continue": "Continue"},
+                buttons_only=True,
+                default="continue",
+            )
+
+        # TODO: add "cancel" option to the above buttons; if pressed, Architect should
+        # return AgentResponse.revise_spec()
+        # that SpecWriter should catch and allow the user to reword the initial spec.
+        return True
+
+    async def check_system_dependencies(self, deps: list[SystemDependency]):
+        """
+        Check whether the required system dependencies are installed.
+        """
+
+        for dep in deps:
+            status_code, _, _ = await self.process_manager.run_command(dep.test)
+            if status_code != 0:
+                if dep.required_locally:
+                    remedy = "Please install it before proceeding with your app."
+                else:
+                    remedy = "If you would like to use it locally, please install it before proceeding."
+                await self.send_message(f"❌ {dep.name} is not available. {remedy}")
+            else:
+                await self.send_message(f"✅ {dep.name} is available.")
diff --git a/core/agents/base.py b/core/agents/base.py
new file mode 100644
index 00000000..3959cf87
--- /dev/null
+++ b/core/agents/base.py
@@ -0,0 +1,174 @@
+from typing import Any, Callable, Optional
+
+from core.agents.response import AgentResponse
+from core.config import get_config
+from core.db.models import ProjectState
+from core.llm.base import BaseLLMClient, LLMError
+from core.log import get_logger
+from core.proc.process_manager import ProcessManager
+from core.state.state_manager import StateManager
+from core.ui.base import AgentSource, UIBase, UserInput
+
+log = get_logger(__name__)
+
+
+class BaseAgent:
+    """
+    Base class for agents.
+    """
+
+    agent_type: str
+    display_name: str
+
+    def __init__(
+        self,
+        state_manager: StateManager,
+        ui: UIBase,
+        *,
+        step: Optional[Any] = None,
+        prev_response: Optional["AgentResponse"] = None,
+        process_manager: Optional["ProcessManager"] = None,
+    ):
+        """
+        Create a new agent.
+        """
+        self.ui_source = AgentSource(self.display_name, self.agent_type)
+        self.ui = ui
+        self.stream_output = True
+        self.state_manager = state_manager
+        self.process_manager = process_manager
+        self.prev_response = prev_response
+        self.step = step
+
+    @property
+    def current_state(self) -> ProjectState:
+        """Current state of the project (read-only)."""
+        return self.state_manager.current_state
+
+    @property
+    def next_state(self) -> ProjectState:
+        """Next state of the project (write-only)."""
+        return self.state_manager.next_state
+
+    async def send_message(self, message: str):
+        """
+        Send a message to the user.
+
+        Convenience method, uses `UIBase.send_message()` to send the message,
+        setting the correct source.
+
+        :param message: Message to send.
+        """
+        await self.ui.send_message(message + "\n", source=self.ui_source)
+
+    async def ask_question(
+        self,
+        question: str,
+        *,
+        buttons: Optional[dict[str, str]] = None,
+        default: Optional[str] = None,
+        buttons_only: bool = False,
+        initial_text: Optional[str] = None,
+        allow_empty: bool = False,
+        hint: Optional[str] = None,
+    ) -> UserInput:
+        """
+        Ask a question to the user and return the response.
+
+        Convenience method, uses `UIBase.ask_question()` to
+        ask the question, setting the correct source and
+        logging the question/response.
+
+        :param question: Question to ask.
+        :param buttons: Buttons to display with the question.
+        :param default: Default button to select.
+        :param buttons_only: Only display buttons, no text input.
+        :param allow_empty: Allow empty input.
+        :param hint: Text to display in a popup as a hint to the question.
+        :param initial_text: Initial text input.
+        :return: User response.
+        """
+        response = await self.ui.ask_question(
+            question,
+            buttons=buttons,
+            default=default,
+            buttons_only=buttons_only,
+            allow_empty=allow_empty,
+            hint=hint,
+            initial_text=initial_text,
+            source=self.ui_source,
+        )
+        await self.state_manager.log_user_input(question, response)
+        return response
+
+    async def stream_handler(self, content: str):
+        """
+        Handle streamed response from the LLM.
+
+        Serves as a callback to `AgentBase.llm()` so it can stream the responses to the UI.
+        This can be turned on/off on a pe-request basis by setting `BaseAgent.stream_output`
+        to True or False.
+
+        :param content: Response content.
+        """
+        if self.stream_output:
+            await self.ui.send_stream_chunk(content, source=self.ui_source)
+
+        if content is None:
+            await self.ui.send_message("")
+
+    async def error_handler(self, error: LLMError, message: Optional[str] = None):
+        """
+        Handle error responses from the LLM.
+
+        :param error: The exception that was thrown the the LLM client.
+        :param message: Optional message to show.
+        """
+
+        if error == LLMError.KEY_EXPIRED:
+            await self.ui.send_key_expired(message)
+        elif error == LLMError.RATE_LIMITED:
+            await self.stream_handler(message)
+
+    def get_llm(self, name=None) -> Callable:
+        """
+        Get a new instance of the agent-specific LLM client.
+
+        The client initializes the UI stream handler and stores the
+        request/response to the current state's log. The agent name
+        can be overridden in case the agent needs to use a different
+        model configuration.
+
+        :param name: Name of the agent for configuration (default: class name).
+        :return: LLM client for the agent.
+        """
+
+        if name is None:
+            name = self.__class__.__name__
+
+        config = get_config()
+
+        llm_config = config.llm_for_agent(name)
+        client_class = BaseLLMClient.for_provider(llm_config.provider)
+        llm_client = client_class(llm_config, stream_handler=self.stream_handler, error_handler=self.error_handler)
+
+        async def client(convo, **kwargs) -> Any:
+            """
+            Agent-specific LLM client.
+
+            For details on optional arguments to pass to the LLM client,
+            see `pythagora.llm.openai_client.OpenAIClient()`.
+            """
+            response, request_log = await llm_client(convo, **kwargs)
+            await self.state_manager.log_llm_request(request_log, agent=self)
+            return response
+
+        return client
+
+    async def run() -> AgentResponse:
+        """
+        Run the agent.
+
+        :return: Response from the agent.
+        """
+        raise NotImplementedError()
diff --git a/core/agents/code_monkey.py b/core/agents/code_monkey.py
new file mode 100644
index 00000000..67aa77e8
--- /dev/null
+++ b/core/agents/code_monkey.py
@@ -0,0 +1,127 @@
+from pydantic import BaseModel, Field
+
+from core.agents.base import BaseAgent
+from core.agents.convo import AgentConvo
+from core.agents.response import AgentResponse, ResponseType
+from core.config import DESCRIBE_FILES_AGENT_NAME
+from core.llm.parser import JSONParser, OptionalCodeBlockParser
+from core.log import get_logger
+
+log = get_logger(__name__)
+
+
+class FileDescription(BaseModel):
+    summary: str = Field(
+        description="Detailed description summarized what the file is about, and what the major classes, functions, elements or other functionality is implemented."
+    )
+    references: list[str] = Field(
+        description="List of references the file imports or includes (only files local to the project), where each element specifies the project-relative path of the referenced file, including the file extension."
+    )
+
+
+class CodeMonkey(BaseAgent):
+    agent_type = "code-monkey"
+    display_name = "Code Monkey"
+
+    async def run(self) -> AgentResponse:
+        if self.prev_response and self.prev_response.type == ResponseType.DESCRIBE_FILES:
+            return await self.describe_files()
+        else:
+            return await self.implement_changes()
+
+    def _get_task_convo(self) -> AgentConvo:
+        # FIXME: Current prompts reuse task breakdown / iteration messages so we have to resort to this
+        task = self.current_state.current_task
+        current_task_index = self.current_state.tasks.index(task)
+
+        convo = AgentConvo(self).template(
+            "breakdown",
+            task=task,
+            iteration=None,
+            current_task_index=current_task_index,
+        )
+        # TODO: We currently show last iteration to the code monkey; we might need to show the task
+        # breakdown and all the iterations instead? To think about when refactoring prompts
+        if self.current_state.iterations:
+            convo.assistant(self.current_state.iterations[-1]["description"])
+        else:
+            convo.assistant(self.current_state.current_task["instructions"])
+        return convo
+
+    async def implement_changes(self) -> AgentResponse:
+        file_name = self.step["save_file"]["path"]
+
+        current_file = await self.state_manager.get_file_by_path(file_name)
+        file_content = current_file.content.content if current_file else ""
+
+        task = self.current_state.current_task
+
+        if self.prev_response and self.prev_response.type == ResponseType.CODE_REVIEW_FEEDBACK:
+            attempt = self.prev_response.data["attempt"] + 1
+            feedback = self.prev_response.data["feedback"]
+            log.debug(f"Fixing file {file_name} after review feedback: {feedback} ({attempt}. attempt)")
+            await self.send_message(f"Reworking changes I made to {file_name} ...")
+        else:
+            log.debug(f"Implementing file {file_name}")
+            await self.send_message(f"{'Updating existing' if file_content else 'Creating new'} file {file_name} ...")
+            attempt = 1
+            feedback = None
+
+        llm = self.get_llm()
+        convo = self._get_task_convo().template(
+            "implement_changes",
+            file_name=file_name,
+            file_content=file_content,
+            instructions=task["instructions"],
+        )
+        if feedback:
+            convo.assistant(f"```\n{self.prev_response.data['new_content']}\n```\n").template(
+                "review_feedback",
+                content=self.prev_response.data["approved_content"],
+                original_content=file_content,
+                rework_feedback=feedback,
+            )
+
+        response: str = await llm(convo, temperature=0, parser=OptionalCodeBlockParser())
+        # FIXME: provide a counter here so that we don't have an endless loop here
+        return AgentResponse.code_review(self, file_name, task["instructions"], file_content, response, attempt)
+
+    async def describe_files(self) -> AgentResponse:
+        llm = self.get_llm(DESCRIBE_FILES_AGENT_NAME)
+        to_describe = {
+            file.path: file.content.content for file in self.current_state.files if not file.meta.get("description")
+        }
+
+        for file in self.next_state.files:
+            content = to_describe.get(file.path)
+            if content is None:
+                continue
+
+            if content == "":
+                file.meta = {
+                    **file.meta,
+                    "description": "Empty file",
+                    "references": [],
+                }
+                continue
+
+            log.debug(f"Describing file {file.path}")
+            await self.send_message(f"Describing file {file.path} ...")
+
+            convo = (
+                AgentConvo(self)
+                .template(
+                    "describe_file",
+                    path=file.path,
+                    content=content,
+                )
+                .require_schema(FileDescription)
+            )
+            llm_response: FileDescription = await llm(convo, parser=JSONParser(spec=FileDescription))
+
+            file.meta = {
+                **file.meta,
+                "description": llm_response.summary,
+                "references": llm_response.references,
+            }
+        return AgentResponse.done(self)
diff --git a/core/agents/code_reviewer.py b/core/agents/code_reviewer.py
new file mode 100644
index 00000000..1d1a8457
--- /dev/null
+++ b/core/agents/code_reviewer.py
@@ -0,0 +1,328 @@
+import re
+from difflib import unified_diff
+from enum import Enum
+
+from pydantic import BaseModel, Field
+
+from core.agents.base import BaseAgent
+from core.agents.convo import AgentConvo
+from core.agents.response import AgentResponse
+from core.llm.parser import JSONParser
+from core.log import get_logger
+
+log = get_logger(__name__)
+
+
+# Constant for indicating missing new line at the end of a file in a unified diff
+NO_EOL = "\\ No newline at end of file"
+
+# Regular expression pattern for matching hunk headers
+PATCH_HEADER_PATTERN = re.compile(r"^@@ -(\d+),?(\d+)? \+(\d+),?(\d+)? @@")
+
+# Maximum number of attempts to ask for review if it can't be parsed
+MAX_REVIEW_RETRIES = 2
+
+# Maximum number of code implementation attempts after which we accept the changes unconditionaly
+MAX_CODING_ATTEMPTS = 3
+
+
+class Decision(str, Enum):
+    APPLY = "apply"
+    IGNORE = "ignore"
+    REWORK = "rework"
+
+
+class Hunk(BaseModel):
+    number: int = Field(description="Index of the hunk in the diff. Starts from 1.")
+    reason: str = Field(description="Reason for applying or ignoring this hunk, or for asking for it to be reworked.")
+    decision: Decision = Field(description="Whether to apply this hunk, rework, or ignore it.")
+
+
+class ReviewChanges(BaseModel):
+    hunks: list[Hunk]
+    review_notes: str = Field(description="Additional review notes (optional, can be empty).")
+
+
+class CodeReviewer(BaseAgent):
+    agent_type = "code-reviewer"
+    display_name = "Code Reviewer"
+
+    async def run(self) -> AgentResponse:
+        if (
+            not self.prev_response.data["old_content"]
+            or self.prev_response.data["new_content"] == self.prev_response.data["old_content"]
+            or self.prev_response.data["attempt"] >= MAX_CODING_ATTEMPTS
+        ):
+            # we always auto-accept new files and unchanged files, or if we've tried too many times
+            return await self.accept_changes(self.prev_response.data["path"], self.prev_response.data["new_content"])
+
+        approved_content, feedback = await self.review_change(
+            self.prev_response.data["path"],
+            self.prev_response.data["instructions"],
+            self.prev_response.data["old_content"],
+            self.prev_response.data["new_content"],
+        )
+        if feedback:
+            return AgentResponse.code_review_feedback(
+                self,
+                new_content=self.prev_response.data["new_content"],
+                approved_content=approved_content,
+                feedback=feedback,
+                attempt=self.prev_response.data["attempt"],
+            )
+        else:
+            return await self.accept_changes(self.prev_response.data["path"], approved_content)
+
+    async def accept_changes(self, path: str, content: str) -> AgentResponse:
+        await self.state_manager.save_file(path, content)
+        self.next_state.complete_step()
+
+        input_required = self.state_manager.get_input_required(content)
+        if input_required:
+            return AgentResponse.input_required(
+                self,
+                [{"file": path, "line": line} for line in input_required],
+            )
+        else:
+            return AgentResponse.done(self)
+
+    def _get_task_convo(self) -> AgentConvo:
+        # FIXME: Current prompts reuse conversation from the developer so we have to resort to this
+        task = self.current_state.current_task
+        current_task_index = self.current_state.tasks.index(task)
+
+        convo = AgentConvo(self).template(
+            "breakdown",
+            task=task,
+            iteration=None,
+            current_task_index=current_task_index,
+        )
+        # TODO: We currently show last iteration to the code monkey; we might need to show the task
+        # breakdown and all the iterations instead? To think about when refactoring prompts
+        if self.current_state.iterations:
+            convo.assistant(self.current_state.iterations[-1]["description"])
+        else:
+            convo.assistant(self.current_state.current_task["instructions"])
+        return convo
+
+    async def review_change(
+        self, file_name: str, instructions: str, old_content: str, new_content: str
+    ) -> tuple[str, str]:
+        """
+        Review changes that were applied to the file.
+
+        This asks the LLM to act as a PR reviewer and for each part (hunk) of the
+        diff, decide if it should be applied (kept) or ignored (removed from the PR).
+
+        :param file_name: name of the file being modified
+        :param instructions: instructions for the reviewer
+        :param old_content: old file content
+        :param new_content: new file content (with proposed changes)
+        :return: tuple with file content update with approved changes, and review feedback
+
+        Diff hunk explanation: https://www.gnu.org/software/diffutils/manual/html_node/Hunks.html
+        """
+
+        hunks = self.get_diff_hunks(file_name, old_content, new_content)
+
+        llm = self.get_llm()
+        convo = (
+            self._get_task_convo()
+            .template(
+                "review_changes",
+                instructions=instructions,
+                file_name=file_name,
+                old_content=old_content,
+                hunks=hunks,
+            )
+            .require_schema(ReviewChanges)
+        )
+        llm_response: ReviewChanges = await llm(convo, temperature=0, parser=JSONParser(ReviewChanges))
+
+        for i in range(MAX_REVIEW_RETRIES):
+            reasons = {}
+            ids_to_apply = set()
+            ids_to_ignore = set()
+            ids_to_rework = set()
+            for hunk in llm_response.hunks:
+                reasons[hunk.number - 1] = hunk.reason
+                if hunk.decision == "apply":
+                    ids_to_apply.add(hunk.number - 1)
+                elif hunk.decision == "ignore":
+                    ids_to_ignore.add(hunk.number - 1)
+                elif hunk.decision == "rework":
+                    ids_to_rework.add(hunk.number - 1)
+
+            n_hunks = len(hunks)
+            n_review_hunks = len(reasons)
+            if n_review_hunks == n_hunks:
+                break
+            elif n_review_hunks < n_hunks:
+                error = "Not all hunks have been reviewed. Please review all hunks and add 'apply', 'ignore' or 'rework' decision for each."
+            elif n_review_hunks > n_hunks:
+                error = f"Your review contains more hunks ({n_review_hunks}) than in the original diff ({n_hunks}). Note that one hunk may have multiple changed lines."
+
+            # Max two retries; if the reviewer still hasn't reviewed all hunks, we'll just use the entire new content
+            convo.assistant(llm_response.model_dump_json()).user(error)
+            llm_response = await llm(convo, parser=JSONParser(ReviewChanges))
+        else:
+            return new_content, None
+
+        hunks_to_apply = [h for i, h in enumerate(hunks) if i in ids_to_apply]
+        diff_log = f"--- {file_name}\n+++ {file_name}\n" + "\n".join(hunks_to_apply)
+
+        hunks_to_rework = [(i, h) for i, h in enumerate(hunks) if i in ids_to_rework]
+        review_log = (
+            "\n\n".join([f"## Change\n```{hunk}```\nReviewer feedback:\n{reasons[i]}" for (i, hunk) in hunks_to_rework])
+            + "\n\nReview notes:\n"
+            + llm_response.review_notes
+        )
+
+        if len(hunks_to_apply) == len(hunks):
+            await self.send_message("Applying entire change")
+            log.info(f"Applying entire change to {file_name}")
+            return new_content, None
+
+        elif len(hunks_to_apply) == 0:
+            if hunks_to_rework:
+                await self.send_message(
+                    f"Requesting rework for {len(hunks_to_rework)} changes with reason: {llm_response.review_notes}"
+                )
+                log.info(f"Requesting rework for {len(hunks_to_rework)} changes to {file_name} (0 hunks to apply)")
+                return old_content, review_log
+            else:
+                # If everything can be safely ignored, it's probably because the files already implement the changes
+                # from previous tasks (which can happen often). Insisting on a change here is likely to cause problems.
+                await self.send_message(f"Rejecting entire change with reason: {llm_response.review_notes}")
+                log.info(f"Rejecting entire change to {file_name} with reason: {llm_response.review_notes}")
+                return old_content, None
+
+        print("Applying code change:\n" + diff_log)
+        log.info(f"Applying code change to {file_name}:\n{diff_log}")
+        new_content = self.apply_diff(file_name, old_content, hunks_to_apply, new_content)
+        if hunks_to_rework:
+            print(f"Requesting rework for {len(hunks_to_rework)} changes with reason: {llm_response.review_notes}")
+            log.info(f"Requesting further rework for {len(hunks_to_rework)} changes to {file_name}")
+            return new_content, review_log
+        else:
+            return new_content, None
+
+    @staticmethod
+    def get_diff_hunks(file_name: str, old_content: str, new_content: str) -> list[str]:
+        """
+        Get the diff between two files.
+
+        This uses Python difflib to produce an unified diff, then splits
+        it into hunks that will be separately reviewed by the reviewer.
+
+        :param file_name: name of the file being modified
+        :param old_content: old file content
+        :param new_content: new file content
+        :return: change hunks from the unified diff
+        """
+        from_name = "old_" + file_name
+        to_name = "to_" + file_name
+        from_lines = old_content.splitlines(keepends=True)
+        to_lines = new_content.splitlines(keepends=True)
+        diff_gen = unified_diff(from_lines, to_lines, fromfile=from_name, tofile=to_name)
+        diff_txt = "".join(diff_gen)
+
+        hunks = re.split(r"\n@@", diff_txt, re.MULTILINE)
+        result = []
+        for i, h in enumerate(hunks):
+            # Skip the prologue (file names)
+            if i == 0:
+                continue
+            txt = h.splitlines()
+            txt[0] = "@@" + txt[0]
+            result.append("\n".join(txt))
+        return result
+
+    def apply_diff(self, file_name: str, old_content: str, hunks: list[str], fallback: str):
+        """
+        Apply the diff to the original file content.
+
+        This uses the internal `_apply_patch` method to apply the
+        approved diff hunks to the original file content.
+
+        If patch apply fails, the fallback is the full new file content
+        with all the changes applied (as if the reviewer approved everythng).
+
+        :param file_name: name of the file being modified
+        :param old_content: old file content
+        :param hunks: change hunks from the unified diff
+        :param fallback: proposed new file content (with all the changes applied)
+        """
+        diff = (
+            "\n".join(
+                [
+                    f"--- {file_name}",
+                    f"+++ {file_name}",
+                ]
+                + hunks
+            )
+            + "\n"
+        )
+        try:
+            fixed_content = self._apply_patch(old_content, diff)
+        except Exception as e:
+            # This should never happen but if it does, just use the new version from
+            # the LLM and hope for the best
+            print(f"Error applying diff: {e}; hoping all changes are valid")
+            return fallback
+
+        return fixed_content
+
+    # Adapted from https://gist.github.com/noporpoise/16e731849eb1231e86d78f9dfeca3abc (Public Domain)
+    @staticmethod
+    def _apply_patch(original: str, patch: str, revert: bool = False):
+        """
+        Apply a patch to a string to recover a newer version of the string.
+
+        :param original: The original string.
+        :param patch: The patch to apply.
+        :param revert: If True, treat the original string as the newer version and recover the older string.
+        :return: The updated string after applying the patch.
+        """
+        original_lines = original.splitlines(True)
+        patch_lines = patch.splitlines(True)
+
+        updated_text = ""
+        index_original = start_line = 0
+
+        # Choose which group of the regex to use based on the revert flag
+        match_index, line_sign = (1, "+") if not revert else (3, "-")
+
+        # Skip header lines of the patch
+        while index_original < len(patch_lines) and patch_lines[index_original].startswith(("---", "+++")):
+            index_original += 1
+
+        while index_original < len(patch_lines):
+            match = PATCH_HEADER_PATTERN.match(patch_lines[index_original])
+            if not match:
+                raise Exception("Bad patch -- regex mismatch [line " + str(index_original) + "]")
+
+            line_number = int(match.group(match_index)) - 1 + (match.group(match_index + 1) == "0")
+
+            if start_line > line_number or line_number > len(original_lines):
+                raise Exception("Bad patch -- bad line number [line " + str(index_original) + "]")
+
+            updated_text += "".join(original_lines[start_line:line_number])
+            start_line = line_number
+            index_original += 1
+
+            while index_original < len(patch_lines) and patch_lines[index_original][0] != "@":
+                if index_original + 1 < len(patch_lines) and patch_lines[index_original + 1][0] == "\\":
+                    line_content = patch_lines[index_original][:-1]
+                    index_original += 2
+                else:
+                    line_content = patch_lines[index_original]
+                    index_original += 1
+
+                if line_content:
+                    if line_content[0] == line_sign or line_content[0] == " ":
+                        updated_text += line_content[1:]
+                    start_line += line_content[0] != line_sign
+
+        updated_text += "".join(original_lines[start_line:])
+        return updated_text
diff --git a/core/agents/convo.py b/core/agents/convo.py
new file mode 100644
index 00000000..ad389cf6
--- /dev/null
+++ b/core/agents/convo.py
@@ -0,0 +1,75 @@
+import json
+import sys
+from copy import deepcopy
+from typing import TYPE_CHECKING, Optional
+
+from pydantic import BaseModel
+
+from core.config import get_config
+from core.llm.convo import Convo
+from core.llm.prompt import JinjaFileTemplate
+from core.log import get_logger
+
+if TYPE_CHECKING:
+    from core.agents.response import BaseAgent
+
+log = get_logger(__name__)
+
+
+class AgentConvo(Convo):
+    prompt_loader: Optional[JinjaFileTemplate] = None
+
+    def __init__(self, agent: "BaseAgent"):
+        self.agent_instance = agent
+        super().__init__()
+        try:
+            system_message = self.render("system")
+            self.system(system_message)
+        except ValueError as err:
+            log.warning(f"Agent {agent.__class__.__name__} has no system prompt: {err}")
+
+    @classmethod
+    def _init_templates(cls):
+        if cls.prompt_loader is not None:
+            return
+
+        config = get_config()
+        cls.prompt_loader = JinjaFileTemplate(config.prompt.paths)
+
+    def _get_default_template_vars(self) -> dict:
+        if sys.platform == "win32":
+            os = "Windows"
+        elif sys.platform == "darwin":
+            os = "macOS"
+        else:
+            os = "Linux"
+
+        return {
+            "state": self.agent_instance.current_state,
+            "os": os,
+        }
+
+    def render(self, name: str, **kwargs) -> str:
+        self._init_templates()
+
+        kwargs.update(self._get_default_template_vars())
+
+        # Jinja uses "/" even in Windows
+        template_name = f"{self.agent_instance.agent_type}/{name}.prompt"
+        log.debug(f"Loading template {template_name}")
+        return self.prompt_loader(template_name, **kwargs)
+
+    def template(self, template_name: str, **kwargs) -> "AgentConvo":
+        message = self.render(template_name, **kwargs)
+        self.user(message)
+        return self
+
+    def fork(self) -> "AgentConvo":
+        child = AgentConvo(self.agent_instance)
+        child.messages = deepcopy(self.messages)
+        return child
+
+    def require_schema(self, model: BaseModel) -> "AgentConvo":
+        schema_txt = json.dumps(model.model_json_schema())
+        self.user(f"IMPORTANT: Your response MUST conform to this JSON schema:\n```\n{schema_txt}\n```")
+        return self
diff --git a/core/agents/developer.py b/core/agents/developer.py
new file mode 100644
index 00000000..d1642739
--- /dev/null
+++ b/core/agents/developer.py
@@ -0,0 +1,294 @@
+from enum import Enum
+from typing import Annotated, Literal, Optional, Union
+from uuid import uuid4
+
+from pydantic import BaseModel, Field
+
+from core.agents.base import BaseAgent
+from core.agents.convo import AgentConvo
+from core.agents.response import AgentResponse, ResponseType
+from core.llm.parser import JSONParser
+from core.log import get_logger
+
+log = get_logger(__name__)
+
+
+class StepType(str, Enum):
+    COMMAND = "command"
+    SAVE_FILE = "save_file"
+    HUMAN_INTERVENTION = "human_intervention"
+
+
+class CommandOptions(BaseModel):
+    command: str = Field(description="Command to run")
+    timeout: int = Field(description="Timeout in seconds")
+    success_message: str = ""
+
+
+class SaveFileOptions(BaseModel):
+    path: str
+
+
+class SaveFileStep(BaseModel):
+    type: Literal[StepType.SAVE_FILE] = StepType.SAVE_FILE
+    save_file: SaveFileOptions
+
+
+class CommandStep(BaseModel):
+    type: Literal[StepType.COMMAND] = StepType.COMMAND
+    command: CommandOptions
+
+
+class HumanInterventionStep(BaseModel):
+    type: Literal[StepType.HUMAN_INTERVENTION] = StepType.HUMAN_INTERVENTION
+    human_intervention_description: str
+
+
+Step = Annotated[
+    Union[SaveFileStep, CommandStep, HumanInterventionStep],
+    Field(discriminator="type"),
+]
+
+
+class TaskSteps(BaseModel):
+    steps: list[Step]
+
+
+class Developer(BaseAgent):
+    agent_type = "developer"
+    display_name = "Developer"
+
+    async def run(self) -> AgentResponse:
+        if self.prev_response and self.prev_response.type == ResponseType.TASK_REVIEW_FEEDBACK:
+            return await self.breakdown_current_iteration(self.prev_response.data["feedback"])
+
+        # If any of the files are missing metadata/descriptions, those need to be filled-in
+        missing_descriptions = [file.path for file in self.current_state.files if not file.meta.get("description")]
+        if missing_descriptions:
+            log.debug(f"Some files are missing descriptions: {', '.join(missing_descriptions)}, reqesting analysis")
+            return AgentResponse.describe_files(self)
+
+        log.debug(f"Current state files: {len(self.current_state.files)}, relevant {self.current_state.relevant_files}")
+        # Check which files are relevant to the current task
+        if self.current_state.files and not self.current_state.relevant_files:
+            await self.get_relevant_files()
+            return AgentResponse.done(self)
+
+        if not self.current_state.unfinished_tasks:
+            log.warning("No unfinished tasks found, nothing to do (why am I called? is this a bug?)")
+            return AgentResponse.done(self)
+
+        if self.current_state.unfinished_iterations:
+            return await self.breakdown_current_iteration()
+
+        # By default, we want to ask the user if they want to run the task,
+        # except in certain cases (such as they've just edited it).
+        if not self.current_state.current_task.get("run_always", False):
+            if not await self.ask_to_execute_task():
+                return AgentResponse.done(self)
+
+        return await self.breakdown_current_task()
+
+    async def breakdown_current_iteration(self, review_feedback: Optional[str] = None) -> AgentResponse:
+        """
+        Breaks down current iteration or task review into steps.
+
+        :param review_feedback: If provided, the task review feedback is broken down instead of the current iteration
+        :return: AgentResponse.done(self) when the breakdown is done
+        """
+        if self.current_state.unfinished_steps:
+            # if this happens, it's most probably a bug as we should have gone through all the
+            # steps before getting new new iteration instructions
+            log.warning(
+                f"Unfinished steps found before the next iteration is broken down: {self.current_state.unfinished_steps}"
+            )
+
+        if review_feedback is not None:
+            iteration = None
+            description = review_feedback
+            user_feedback = ""
+            source = "review"
+            n_tasks = 1
+            log.debug(f"Breaking down the task review feedback {review_feedback}")
+            await self.send_message("Breaking down the task review feedback...")
+        else:
+            iteration = self.current_state.current_iteration
+            if iteration is None:
+                log.error("Iteration breakdown called but there's no current iteration or task review, possible bug?")
+                return AgentResponse.done(self)
+
+            description = iteration["description"]
+            user_feedback = iteration["user_feedback"]
+            source = "troubleshooting"
+            n_tasks = len(self.next_state.iterations)
+            log.debug(f"Breaking down the iteration {description}")
+            await self.send_message("Breaking down the current task iteration ...")
+
+        await self.ui.send_task_progress(
+            n_tasks,  # iterations and reviews can be created only one at a time, so we are always on last one
+            n_tasks,
+            self.current_state.current_task["description"],
+            source,
+            "in-progress",
+        )
+        llm = self.get_llm()
+        # FIXME: In case of iteration, parse_task depends on the context (files, tasks, etc) set there.
+        # Ideally this prompt would be self-contained.
+        convo = (
+            AgentConvo(self)
+            .template(
+                "iteration",
+                current_task=self.current_state.current_task,
+                user_feedback=user_feedback,
+                user_feedback_qa=None,
+                next_solution_to_try=None,
+            )
+            .assistant(description)
+            .template("parse_task")
+            .require_schema(TaskSteps)
+        )
+        response: TaskSteps = await llm(convo, parser=JSONParser(TaskSteps), temperature=0)
+
+        self.set_next_steps(response, source)
+
+        if iteration:
+            self.next_state.complete_iteration()
+
+        return AgentResponse.done(self)
+
+    async def breakdown_current_task(self) -> AgentResponse:
+        task = self.current_state.current_task
+        source = self.current_state.current_epic.get("source", "app")
+        await self.ui.send_task_progress(
+            self.current_state.tasks.index(self.current_state.current_task) + 1,
+            len(self.current_state.tasks),
+            self.current_state.current_task["description"],
+            source,
+            "in-progress",
+        )
+
+        log.debug(f"Breaking down the current task: {task['description']}")
+        await self.send_message("Thinking about how to implement this task ...")
+
+        current_task_index = self.current_state.tasks.index(task)
+
+        llm = self.get_llm()
+        convo = AgentConvo(self).template(
+            "breakdown",
+            task=task,
+            iteration=None,
+            current_task_index=current_task_index,
+        )
+        response: str = await llm(convo)
+
+        # FIXME: check if this is correct, as sqlalchemy can't figure out modifications
+        # to attributes; however, self.next is not saved yet so maybe this is fine
+        self.next_state.tasks[current_task_index] = {
+            **task,
+            "instructions": response,
+        }
+
+        await self.send_message("Breaking down the task into steps ...")
+        convo.template("parse_task").require_schema(TaskSteps)
+        response: TaskSteps = await llm(convo, parser=JSONParser(TaskSteps), temperature=0)
+
+        # There might be state leftovers from previous tasks that we need to clean here
+        self.next_state.modified_files = {}
+        self.set_next_steps(response, source)
+        return AgentResponse.done(self)
+
+    async def get_relevant_files(self) -> AgentResponse:
+        log.debug("Getting relevant files for the current task")
+        await self.send_message("Figuring out which project files are relevant for the next task ...")
+
+        llm = self.get_llm()
+        convo = AgentConvo(self).template("filter_files", current_task=self.current_state.current_task)
+
+        # FIXME: this doesn't validate correct structure format, we should use pydantic for that as well
+        llm_response: list[str] = await llm(convo, parser=JSONParser(), temperature=0)
+
+        existing_files = {file.path for file in self.current_state.files}
+        self.next_state.relevant_files = [path for path in llm_response if path in existing_files]
+
+        return AgentResponse.done(self)
+
+    def set_next_steps(self, response: TaskSteps, source: str):
+        # For logging/debugging purposes, we don't want to remove the finished steps
+        # until we're done with the task.
+        finished_steps = [step for step in self.current_state.steps if step["completed"]]
+        self.next_state.steps = finished_steps + [
+            {
+                "id": uuid4().hex,
+                "completed": False,
+                "source": source,
+                **step.model_dump(),
+            }
+            for step in response.steps
+        ]
+        if len(self.next_state.unfinished_steps) > 0:
+            self.next_state.steps += [
+                # TODO: add refactor step here once we have the refactor agent
+                {
+                    "id": uuid4().hex,
+                    "completed": False,
+                    "type": "review_task",
+                    "source": source,
+                },
+                {
+                    "id": uuid4().hex,
+                    "completed": False,
+                    "type": "create_readme",
+                    "source": source,
+                },
+            ]
+        log.debug(f"Next steps: {self.next_state.unfinished_steps}")
+
+    async def ask_to_execute_task(self) -> bool:
+        """
+        Asks the user to approve, skip or edit the current task.
+
+        If task is edited, the method returns False so that the changes are saved. The
+        Orchestrator will rerun the agent on the next iteration.
+
+        :return: True if the task should be executed as is, False if the task is skipped or edited
+        """
+        description = self.current_state.current_task["description"]
+        user_response = await self.ask_question(
+            "Do you want to execute the this task:\n\n" + description,
+            buttons={"yes": "Yes", "edit": "Edit Task", "skip": "Skip Task"},
+            default="yes",
+            buttons_only=True,
+        )
+        if user_response.button == "yes":
+            # Execute the task as is
+            return True
+
+        if user_response.cancelled or user_response.button == "skip":
+            log.info(f"Skipping task: {description}")
+            self.next_state.current_task["instructions"] = "(skipped on user request)"
+            self.next_state.complete_task()
+            await self.send_message(f"Skipping task {description}")
+            # We're done here, and will pick up the next task (if any) on the next run
+            return False
+
+        user_response = await self.ask_question(
+            "Edit the task description:",
+            buttons={
+                # FIXME: Continue doesn't actually work, VSCode doesn't send the user
+                # message if it's clicked. Long term we need to fix the extension.
+                # "continue": "Continue",
+                "cancel": "Cancel",
+            },
+            default="continue",
+            initial_text=description,
+        )
+        if user_response.button == "cancel" or user_response.cancelled:
+            # User hasn't edited the task so we can execute it immediately as is
+            return True
+
+        self.next_state.current_task["description"] = user_response.text
+        self.next_state.current_task["run_always"] = True
+        self.next_state.relevant_files = []
+        log.info(f"Task description updated to: {user_response.text}")
+        # Orchestrator will rerun us with the new task description
+        return False
diff --git a/core/agents/error_handler.py b/core/agents/error_handler.py
new file mode 100644
index 00000000..d74b6fb3
--- /dev/null
+++ b/core/agents/error_handler.py
@@ -0,0 +1,108 @@
+from uuid import uuid4
+
+from core.agents.base import BaseAgent
+from core.agents.convo import AgentConvo
+from core.agents.response import AgentResponse
+from core.log import get_logger
+
+log = get_logger(__name__)
+
+
+class ErrorHandler(BaseAgent):
+    """
+    Error handler agent.
+
+    Error handler is responsible for handling errors returned by other agents. If it's possible
+    to recover from the error, it should do it (which may include updating the "next" state) and
+    return DONE. Otherwise it should return EXIT to tell Orchestrator to quit the application.
+    """
+
+    agent_type = "error-handler"
+    display_name = "Error Handler"
+
+    async def run(self) -> AgentResponse:
+        from core.agents.executor import Executor
+        from core.agents.spec_writer import SpecWriter
+
+        error = self.prev_response
+        if error is None:
+            log.warning("ErrorHandler called without a previous error", stack_info=True)
+            return AgentResponse.done(self)
+
+        log.error(
+            f"Agent {error.agent.display_name} returned error response: {error.type}",
+            extra={"data": error.data},
+        )
+
+        if isinstance(error.agent, SpecWriter):
+            # If SpecWriter wasn't able to get the project description, there's nothing for
+            # us to do.
+            return AgentResponse.exit(self)
+
+        if isinstance(error.agent, Executor):
+            return await self.handle_command_error(
+                error.data.get("message", "Unknown error"), error.data.get("details", {})
+            )
+
+        log.error(
+            f"Unhandled error response from agent {error.agent.display_name}",
+            extra={"data": error.data},
+        )
+        return AgentResponse.exit(self)
+
+    async def handle_command_error(self, message: str, details: dict) -> AgentResponse:
+        """
+        Handle an error returned by Executor agent.
+
+        Error message must be the analyis of the command execution, and the details must contain:
+        * cmd - command that was executed
+        * timeout - timeout for the command if any (or None if no timeout was used)
+        * status_code - exit code for the command (or None if the command timed out)
+        * stdout - standard output of the command
+        * stderr - standard error of the command
+
+        :return: AgentResponse
+        """
+        cmd = details.get("cmd")
+        timeout = details.get("timeout")
+        status_code = details.get("status_code")
+        stdout = details.get("stdout", "")
+        stderr = details.get("stderr", "")
+
+        if not message:
+            raise ValueError("No error message provided in command error response")
+        if not cmd:
+            raise ValueError("No command provided in command error response details")
+
+        llm = self.get_llm()
+        convo = AgentConvo(self).template(
+            "debug",
+            task_steps=self.current_state.steps,
+            current_task=self.current_state.current_task,
+            # FIXME: can this break?
+            step_index=self.current_state.steps.index(self.current_state.current_step),
+            cmd=cmd,
+            timeout=timeout,
+            stdout=stdout,
+            stderr=stderr,
+            status_code=status_code,
+            # fixme: everything above copypasted from Executor
+            analysis=message,
+        )
+        llm_response: str = await llm(convo)
+
+        # TODO: duplicate from Troubleshooter, maybe extract to a ProjectState method?
+        self.next_state.iterations = self.current_state.iterations + [
+            {
+                "id": uuid4().hex,
+                "user_feedback": f"Error running command: {cmd}",
+                "description": llm_response,
+                "alternative_solutions": [],
+                "attempts": 1,
+                "completed": False,
+            }
+        ]
+        # TODO: maybe have ProjectState.finished_steps as well? would make the debug/ran_command prompts nicer too
+        self.next_state.steps = [s for s in self.current_state.steps if s.get("completed") is True]
+        # No need to call complete_step() here as we've just removed the steps so that Developer can break down the iteration
+        return AgentResponse.done(self)
diff --git a/core/agents/executor.py b/core/agents/executor.py
new file mode 100644
index 00000000..450588d8
--- /dev/null
+++ b/core/agents/executor.py
@@ -0,0 +1,166 @@
+from datetime import datetime
+from typing import Optional
+
+from pydantic import BaseModel, Field
+
+from core.agents.base import BaseAgent
+from core.agents.convo import AgentConvo
+from core.agents.response import AgentResponse
+from core.llm.parser import JSONParser
+from core.log import get_logger
+from core.proc.exec_log import ExecLog
+from core.proc.process_manager import ProcessManager
+from core.state.state_manager import StateManager
+from core.ui.base import AgentSource, UIBase
+
+log = get_logger(__name__)
+
+
+class CommandResult(BaseModel):
+    """
+    Analysis of the command run and decision on the next steps.
+    """
+
+    analysis: str = Field(
+        description="Analysis of the command output (stdout, stderr) and exit code, in context of the current task"
+    )
+    success: bool = Field(
+        description="True if the command should be treated as successful and the task should continue, false if the command unexpectedly failed and we should debug the issue"
+    )
+
+
+class Executor(BaseAgent):
+    agent_type = "executor"
+    display_name = "Executor"
+
+    def __init__(
+        self,
+        state_manager: StateManager,
+        ui: UIBase,
+    ):
+        """
+        Create a new Executor agent
+        """
+        self.ui_source = AgentSource(self.display_name, self.agent_type)
+        self.ui = ui
+        self.state_manager = state_manager
+        self.process_manager = ProcessManager(
+            root_dir=state_manager.get_full_project_root(),
+            output_handler=self.output_handler,
+            exit_handler=self.exit_handler,
+        )
+        self.stream_output = True
+
+    def for_step(self, step):
+        # FIXME: not needed, refactor to use self.current_state.current_step
+        # in general, passing current step is not needed
+        self.step = step
+        return self
+
+    async def output_handler(self, out, err):
+        await self.stream_handler(out)
+        await self.stream_handler(err)
+
+    async def exit_handler(self, process):
+        pass
+
+    async def run(self) -> AgentResponse:
+        if not self.step:
+            raise ValueError("No current step set (probably an Orchestrator bug)")
+
+        options = self.step["command"]
+        cmd = options["command"]
+        timeout = options.get("timeout")
+
+        if timeout:
+            q = f"Can I run command: {cmd} with {timeout}s timeout?"
+        else:
+            q = f"Can I run command: {cmd}?"
+
+        confirm = await self.ask_question(
+            q,
+            buttons={"yes": "Yes", "no": "No"},
+            default="yes",
+            buttons_only=True,
+        )
+        if confirm.button == "no":
+            log.info(f"Skipping command execution of `{cmd}` (requested by user)")
+            await self.send_message(f"Skipping command {cmd}")
+            self.complete()
+            return AgentResponse.done(self)
+
+        started_at = datetime.now()
+
+        log.info(f"Running command `{cmd}` with timeout {timeout}s")
+        status_code, stdout, stderr = await self.process_manager.run_command(cmd, timeout=timeout)
+        llm_response = await self.check_command_output(cmd, timeout, stdout, stderr, status_code)
+
+        duration = (datetime.now() - started_at).total_seconds()
+
+        self.complete()
+
+        exec_log = ExecLog(
+            started_at=started_at,
+            duration=duration,
+            cmd=cmd,
+            cwd=".",
+            env={},
+            timeout=timeout,
+            status_code=status_code,
+            stdout=stdout,
+            stderr=stderr,
+            analysis=llm_response.analysis,
+            success=llm_response.success,
+        )
+        await self.state_manager.log_command_run(exec_log)
+
+        if llm_response.success:
+            return AgentResponse.done(self)
+
+        return AgentResponse.error(
+            self,
+            llm_response.analysis,
+            {
+                "cmd": cmd,
+                "timeout": timeout,
+                "stdout": stdout,
+                "stderr": stderr,
+                "status_code": status_code,
+            },
+        )
+
+    async def check_command_output(
+        self, cmd: str, timeout: Optional[int], stdout: str, stderr: str, status_code: int
+    ) -> CommandResult:
+        llm = self.get_llm()
+        convo = (
+            AgentConvo(self)
+            .template(
+                "ran_command",
+                task_steps=self.current_state.steps,
+                current_task=self.current_state.current_task,
+                # FIXME: can step ever happen *not* to be in current steps?
+                step_index=self.current_state.steps.index(self.step),
+                cmd=cmd,
+                timeout=timeout,
+                stdout=stdout,
+                stderr=stderr,
+                status_code=status_code,
+            )
+            .require_schema(CommandResult)
+        )
+        return await llm(convo, parser=JSONParser(spec=CommandResult), temperature=0)
+
+    def complete(self):
+        """
+        Mark the step as complete.
+
+        Note that this marks the step complete in the next state. If there's an error,
+        the state won't get committed and the error handler will have access to the
+        current state, where this step is still unfinished.
+
+        This is intentional, so that the error handler can decide what to do with the
+        information we give it.
+        """
+        self.step = None
+        self.next_state.complete_step()
diff --git a/core/agents/human_input.py b/core/agents/human_input.py
new file mode 100644
index 00000000..5bd62d29
--- /dev/null
+++ b/core/agents/human_input.py
@@ -0,0 +1,46 @@
+from core.agents.base import BaseAgent
+from core.agents.response import AgentResponse, ResponseType
+
+
+class HumanInput(BaseAgent):
+    agent_type = "human-input"
+    display_name = "Human Input"
+
+    async def run(self) -> AgentResponse:
+        if self.prev_response and self.prev_response.type == ResponseType.INPUT_REQUIRED:
+            return await self.input_required(self.prev_response.data.get("files", []))
+
+        return await self.human_intervention(self.step)
+
+    async def human_intervention(self, step) -> AgentResponse:
+        description = step["human_intervention_description"]
+
+        await self.ask_question(
+            f"I need human intervention: {description}",
+            buttons={"continue": "Continue"},
+            default="continue",
+            buttons_only=True,
+        )
+        self.next_state.complete_step()
+        return AgentResponse.done(self)
+
+    async def input_required(self, files: list[dict]) -> AgentResponse:
+        for item in files:
+            file = item["file"]
+            line = item["line"]
+
+            # FIXME: this is an ugly hack, we shouldn't need to know how to get to VFS and
+            # anyways the full path is only available for local vfs, so this is doubly wrong;
+            # instead, we should just send the relative path to the extension and it should
+            # figure out where its local files are and how to open it.
+            full_path = self.state_manager.file_system.get_full_path(file)
+
+            await self.send_message(f"Input required on {file}:{line}")
+            await self.ui.open_editor(full_path, line)
+            await self.ask_question(
+                f"Please open {file} and modify line {line} according to the instructions.",
+                buttons={"continue": "Continue"},
+                default="continue",
+                buttons_only=True,
+            )
+        return AgentResponse.done(self)
diff --git a/core/agents/mixins.py b/core/agents/mixins.py
new file mode 100644
index 00000000..5ea0aae7
--- /dev/null
+++ b/core/agents/mixins.py
@@ -0,0 +1,37 @@
+from typing import Optional
+
+from core.agents.convo import AgentConvo
+
+
+class IterationPromptMixin:
+    """
+    Provides a method to find a solution to a problem based on user feedback.
+
+    Used by ProblemSolver and Troubleshooter agents.
+    """
+
+    async def find_solution(
+        self,
+        user_feedback: str,
+        *,
+        user_feedback_qa: Optional[list[str]] = None,
+        next_solution_to_try: Optional[str] = None,
+    ) -> str:
+        """
+        Generate a new solution for the problem the user reported.
+
+        :param user_feedback: User feedback about the problem.
+        :param user_feedback_qa: Additional q/a about the problem provided by the user (optional).
+        :param next_solution_to_try: Hint from ProblemSolver on which solution to try (optional).
+        :return: The generated solution to the problem.
+        """
+        llm = self.get_llm()
+        convo = AgentConvo(self).template(
+            "iteration",
+            current_task=self.current_state.current_task,
+            user_feedback=user_feedback,
+            user_feedback_qa=user_feedback_qa,
+            next_solution_to_try=next_solution_to_try,
+        )
+        llm_solution: str = await llm(convo)
+        return llm_solution
diff --git a/core/agents/orchestrator.py b/core/agents/orchestrator.py
new file mode 100644
index 00000000..c8430e9e
--- /dev/null
+++ b/core/agents/orchestrator.py
@@ -0,0 +1,329 @@
+from typing import Optional
+
+from core.agents.architect import Architect
+from core.agents.base import BaseAgent
+from core.agents.code_monkey import CodeMonkey
+from core.agents.code_reviewer import CodeReviewer
+from core.agents.developer import Developer
+from core.agents.error_handler import ErrorHandler
+from core.agents.executor import Executor
+from core.agents.human_input import HumanInput
+from core.agents.problem_solver import ProblemSolver
+from core.agents.response import AgentResponse, ResponseType
+from core.agents.spec_writer import SpecWriter
+from core.agents.task_reviewer import TaskReviewer
+from core.agents.tech_lead import TechLead
+from core.agents.tech_writer import TechnicalWriter
+from core.agents.troubleshooter import Troubleshooter
+from core.config import LLMProvider, get_config
+from core.llm.convo import Convo
+from core.log import get_logger
+from core.telemetry import telemetry
+from core.ui.base import ProjectStage
+
+log = get_logger(__name__)
+
+
+class Orchestrator(BaseAgent):
+    """
+    Main agent that controls the flow of the process.
+
+    Based on the current state of the project, the orchestrator invokes
+    all other agents. It is also responsible for determining when each
+    step is done and the project state needs to be committed to the database.
+    """
+
+    agent_type = "orchestrator"
+    display_name = "Orchestrator"
+
+    async def run(self) -> bool:
+        """
+        Run the Orchestrator agent.
+
+        :return: True if the Orchestrator exited successfully, False otherwise.
+        """
+        response = None
+
+        log.info(f"Starting {__name__}.Orchestrator")
+
+        self.executor = Executor(self.state_manager, self.ui)
+        self.process_manager = self.executor.process_manager
+        # self.chat = Chat() TODO
+
+        await self.init_ui()
+        await self.offline_changes_check()
+
+        llm_api_check = await self.test_llm_access()
+        if not llm_api_check:
+            return False
+
+        # TODO: consider refactoring this into two loop; the outer with one iteration per comitted step,
+        # and the inner which runs the agents for the current step until they're done. This would simplify
+        # handle_done() and let us do other per-step processing (eg. describing files) in between agent runs.
+        while True:
+            await self.update_stats()
+
+            agent = self.create_agent(response)
+            log.debug(f"Running agent {agent.__class__.__name__} (step {self.current_state.step_index})")
+            response = await agent.run()
+
+            if response.type == ResponseType.EXIT:
+                log.debug(f"Agent {agent.__class__.__name__} requested exit")
+                break
+
+            if response.type == ResponseType.DONE:
+                response = await self.handle_done(agent, response)
+                continue
+
+        # TODO: rollback changes to "next" so they aren't accidentally committed?
+        return True
+
+    async def test_llm_access(self) -> bool:
+        """
+        Make sure the LLMs for all the defined agents are reachable.
+
+        Each LLM provider is only checked once.
+        Returns True if the check for successful for all LLMs.
+        """
+
+        config = get_config()
+        defined_agents = config.agent.keys()
+
+        convo = Convo()
+        convo.user(
+            " ".join(
+                [
+                    "This is a connection test. If you can see this,",
+                    "please respond only with 'START' and nothing else.",
+                ]
+            )
+        )
+
+        success = True
+        tested_llms: set[LLMProvider] = set()
+        for agent_name in defined_agents:
+            llm = self.get_llm(agent_name)
+            llm_config = config.llm_for_agent(agent_name)
+
+            if llm_config.provider in tested_llms:
+                continue
+
+            tested_llms.add(llm_config.provider)
+            provider_model_combo = f"{llm_config.provider.value} {llm_config.model}"
+            try:
+                resp = await llm(convo)
+            except Exception as err:
+                log.warning(f"API check for {provider_model_combo} failed: {err}")
+                success = False
+                await self.ui.send_message(f"Error connecting to the {provider_model_combo} API: {err}")
+                continue
+
+            if resp and len(resp) > 0:
+                log.debug(f"API check for {provider_model_combo} passed.")
+            else:
+                log.warning(f"API check for {provider_model_combo} failed.")
+                await self.ui.send_message(
+                    f"Error connecting to the {provider_model_combo} API. Please check your settings and internet connection."
+                )
+                success = False
+
+        return success
+
+    async def offline_changes_check(self):
+        """
+        Check for changes outside of Pythagora.
+
+        If there are changes, ask the user if they want to keep them, and
+        import if needed.
+        """
+
+        log.info("Checking for offline changes.")
+        modified_files = await self.state_manager.get_modified_files()
+
+        if self.state_manager.workspace_is_empty():
+            # NOTE: this will currently get triggered on a new project, but will do
+            # nothing as there's no files in the database.
+            log.info("Detected empty workspace, restoring state from the database.")
+            await self.state_manager.restore_files()
+        elif modified_files:
+            await self.send_message(f"We found {len(modified_files)} new and/or modified files.")
+
+            hint = "".join(
+                [
+                    "If you would like Pythagora to import those changes, click 'Yes'.\n",
+                    "Clicking 'No' means Pythagora will restore (overwrite) all files to the last stored state.\n",
+                ]
+            )
+            use_changes = await self.ask_question(
+                question="Would you like to keep your changes?",
+                buttons={
+                    "yes": "Yes, keep my changes",
+                    "no": "No, restore last Pythagora state",
+                },
+                buttons_only=True,
+                hint=hint,
+            )
+            if use_changes.button == "yes":
+                log.debug("Importing offline changes into Pythagora.")
+                await self.import_files()
+            else:
+                log.debug("Restoring last stored state.")
+                await self.state_manager.restore_files()
+
+        log.info("Offline changes check done.")
+
+    async def handle_done(self, agent: BaseAgent, response: AgentResponse) -> AgentResponse:
+        """
+        Handle the DONE response from the agent and commit current state to the database.
+
+        This also checks for any files created or modified outside Pythagora and
+        imports them. If any of the files require input from the user, the returned response
+        will trigger the HumanInput agent to ask the user to provide the required input.
+
+        """
+        n_epics = len(self.next_state.epics)
+        n_finished_epics = n_epics - len(self.next_state.unfinished_epics)
+        n_tasks = len(self.next_state.tasks)
+        n_finished_tasks = n_tasks - len(self.next_state.unfinished_tasks)
+        n_iterations = len(self.next_state.iterations)
+        n_finished_iterations = n_iterations - len(self.next_state.unfinished_iterations)
+        n_steps = len(self.next_state.steps)
+        n_finished_steps = n_steps - len(self.next_state.unfinished_steps)
+
+        log.debug(
+            f"Agent {agent.__class__.__name__} is done, "
+            f"committing state for step {self.current_state.step_index}: "
+            f"{n_finished_epics}/{n_epics} epics, "
+            f"{n_finished_tasks}/{n_tasks} tasks, "
+            f"{n_finished_iterations}/{n_iterations} iterations, "
+            f"{n_finished_steps}/{n_steps} dev steps."
+        )
+        await self.state_manager.commit()
+
+        # If there are any new or modified files changed outside Pythagora,
+        # this is a good time to add them to the project. If any of them have
+        # INPUT_REQUIRED, we'll first ask the user to provide the required input.
+        return await self.import_files()
+
+    def create_agent(self, prev_response: Optional[AgentResponse]) -> BaseAgent:
+        state = self.current_state
+
+        if prev_response:
+            if prev_response.type in [ResponseType.CANCEL, ResponseType.ERROR]:
+                return ErrorHandler(self.state_manager, self.ui, prev_response=prev_response)
+            if prev_response.type == ResponseType.CODE_REVIEW:
+                return CodeReviewer(self.state_manager, self.ui, prev_response=prev_response)
+            if prev_response.type == ResponseType.CODE_REVIEW_FEEDBACK:
+                return CodeMonkey(self.state_manager, self.ui, prev_response=prev_response, step=state.current_step)
+            if prev_response.type == ResponseType.DESCRIBE_FILES:
+                return CodeMonkey(self.state_manager, self.ui, prev_response=prev_response)
+            if prev_response.type == ResponseType.INPUT_REQUIRED:
+                # FIXME: HumanInput should be on the whole time and intercept chat/interrupt
+                return HumanInput(self.state_manager, self.ui, prev_response=prev_response)
+            if prev_response.type == ResponseType.UPDATE_EPIC:
+                return TechLead(self.state_manager, self.ui, prev_response=prev_response)
+            if prev_response.type == ResponseType.TASK_REVIEW_FEEDBACK:
+                return Developer(self.state_manager, self.ui, prev_response=prev_response)
+
+        if not state.specification.description:
+            # Ask the Spec Writer to refine and save the project specification
+            return SpecWriter(self.state_manager, self.ui)
+        elif not state.specification.architecture:
+            # Ask the Architect to design the project architecture and determine dependencies
+            return Architect(self.state_manager, self.ui, process_manager=self.process_manager)
+        elif (
+            not state.epics
+            or not self.current_state.unfinished_tasks
+            or (state.specification.template and not state.files)
+        ):
+            # Ask the Tech Lead to break down the initial project or feature into tasks and apply projet template
+            return TechLead(self.state_manager, self.ui, process_manager=self.process_manager)
+        elif not state.steps and not state.iterations:
+            # Ask the Developer to break down current task into actionable steps
+            return Developer(self.state_manager, self.ui)
+
+        if state.current_step:
+            # Execute next step in the task
+            # TODO: this can be parallelized in the future
+            return self.create_agent_for_step(state.current_step)
+
+        if state.unfinished_iterations:
+            if state.current_iteration["description"]:
+                # Break down the next iteration into steps
+                return Developer(self.state_manager, self.ui)
+            else:
+                # We need to iterate over the current task but there's no solution, as Pythagora
+                # is stuck in a loop, and ProblemSolver needs to find alternative solutions.
+                return ProblemSolver(self.state_manager, self.ui)
+
+        # We have just finished the task, call Troubleshooter to ask the user to review
+        return Troubleshooter(self.state_manager, self.ui)
+
+    def create_agent_for_step(self, step: dict) -> BaseAgent:
+        step_type = step.get("type")
+        if step_type == "save_file":
+            return CodeMonkey(self.state_manager, self.ui, step=step)
+        elif step_type == "command":
+            return self.executor.for_step(step)
+        elif step_type == "human_intervention":
+            return HumanInput(self.state_manager, self.ui, step=step)
+        elif step_type == "review_task":
+            return TaskReviewer(self.state_manager, self.ui)
+        elif step_type == "create_readme":
+            return TechnicalWriter(self.state_manager, self.ui)
+        else:
+            raise ValueError(f"Unknown step type: {step_type}")
+
+    async def import_files(self) -> Optional[AgentResponse]:
+        imported_files = await self.state_manager.import_files()
+        if not imported_files:
+            return None
+
+        log.info(f"Imported new/changed files to project: {', '.join(f.path for f in imported_files)}")
+
+        input_required_files: list[dict[str, int]] = []
+        for file in imported_files:
+            for line in self.state_manager.get_input_required(file.content.content):
+                input_required_files.append({"file": file.path, "line": line})
+
+        if input_required_files:
+            # This will trigger the HumanInput agent to ask the user to provide the required changes
+            # If the user changes anything (removes the "required changes"), the file will be re-imported.
+            return AgentResponse.input_required(self, input_required_files)
+
+        # Commit the newly imported file
+        log.debug(f"Committing imported files as a separate step {self.current_state.step_index}")
+        await self.state_manager.commit()
+        return None
+
+    async def init_ui(self):
+        await self.ui.send_project_root(self.state_manager.get_full_project_root())
+        if self.current_state.epics:
+            await self.ui.send_project_stage(ProjectStage.CODING)
+        elif self.current_state.specification:
+            await self.ui.send_project_stage(ProjectStage.ARCHITECTURE)
+        else:
+            await self.ui.send_project_stage(ProjectStage.DESCRIPTION)
+
+    async def update_stats(self):
+        if self.current_state.steps and self.current_state.current_step:
+            source = self.current_state.current_step.get("source")
+            source_steps = [s for s in self.current_state.steps if s.get("source") == source]
+            await self.ui.send_step_progress(
+                source_steps.index(self.current_state.current_step) + 1,
+                len(source_steps),
+                self.current_state.current_step,
+                source,
+            )
+
+        total_files = 0
+        total_lines = 0
+        for file in self.current_state.files:
+            total_files += 1
+            total_lines += len(file.content.content.splitlines())
+
+        telemetry.set("num_files", total_files)
+        telemetry.set("num_lines", total_lines)
+
+        stats = telemetry.get_project_stats()
+        await self.ui.send_project_stats(stats)
diff --git a/core/agents/problem_solver.py b/core/agents/problem_solver.py
new file mode 100644
index 00000000..680a4215
--- /dev/null
+++ b/core/agents/problem_solver.py
@@ -0,0 +1,126 @@
+from typing import Optional
+
+from pydantic import BaseModel, Field
+
+from core.agents.base import BaseAgent
+from core.agents.convo import AgentConvo
+from core.agents.response import AgentResponse
+from core.agents.troubleshooter import IterationPromptMixin
+from core.llm.parser import JSONParser
+from core.log import get_logger
+
+log = get_logger(__name__)
+
+
+class AlternativeSolutions(BaseModel):
+    # FIXME: This is probably extra leftover from some dead code in the old implementation
+    description_of_tried_solutions: str = Field(
+        description="A description of the solutions that were tried to solve the recurring issue that was labeled as loop by the user.",
+    )
+    alternative_solutions: list[str] = Field(
+        description=("List of all alternative solutions to the recurring issue that was labeled as loop by the user.")
+    )
+
+
+class ProblemSolver(IterationPromptMixin, BaseAgent):
+    agent_type = "problem-solver"
+    display_name = "Problem Solver"
+
+    def __init__(self, *args, **kwargs):
+        super().__init__(*args, **kwargs)
+        self.iteration = self.current_state.current_iteration
+        self.next_state_iteration = self.next_state.current_iteration
+        self.previous_solutions = [s for s in self.iteration["alternative_solutions"] if s["tried"]]
+        self.possible_solutions = [s for s in self.iteration["alternative_solutions"] if not s["tried"]]
+
+    async def run(self) -> AgentResponse:
+        if self.iteration is None:
+            log.warning("ProblemSolver agent started without an iteration to work on, possible bug?")
+            return AgentResponse.done(self)
+
+        if not self.possible_solutions:
+            await self.generate_alternative_solutions()
+            return AgentResponse.done(self)
+
+        return await self.try_alternative_solutions()
+
+    async def generate_alternative_solutions(self):
+        llm = self.get_llm()
+        convo = (
+            AgentConvo(self)
+            .template(
+                "get_alternative_solutions",
+                user_input=self.iteration["user_feedback"],
+                iteration=self.iteration,
+                previous_solutions=self.previous_solutions,
+            )
+            .require_schema(AlternativeSolutions)
+        )
+        llm_response: AlternativeSolutions = await llm(
+            convo,
+            parser=JSONParser(spec=AlternativeSolutions),
+            temperature=1,
+        )
+        self.next_state_iteration["alternative_solutions"] = self.iteration["alternative_solutions"] + [
+            {
+                "user_feedback": None,
+                "description": solution,
+                "tried": False,
+            }
+            for solution in llm_response.alternative_solutions
+        ]
+        self.next_state.flag_iterations_as_modified()
+
+    async def try_alternative_solutions(self) -> AgentResponse:
+        preferred_solution = await self.ask_for_preferred_solution()
+        if preferred_solution is None:
+            # TODO: We have several alternative solutions but the user didn't choose any.
+            # This means the user either needs expert help, or that they need to go back and
+            # maybe rephrase the tasks or even the project specs.
+            # For now, we'll just mark these as not working and try to regenerate.
+            self.next_state_iteration["alternative_solutions"] = [
+                {
+                    **s,
+                    "tried": True,
+                    "user_feedback": s["user_feedback"] or "That doesn't sound like a good idea, try something else.",
+                }
+                for s in self.possible_solutions
+            ]
+            self.next_state.flag_iterations_as_modified()
+            return AgentResponse.done(self)
+
+        index, next_solution_to_try = preferred_solution
+        llm_solution = await self.find_solution(
+            self.iteration["user_feedback"],
+            next_solution_to_try=next_solution_to_try,
+        )
+
+        self.next_state_iteration["alternative_solutions"][index]["tried"] = True
+        self.next_state_iteration["description"] = llm_solution
+        self.next_state_iteration["attempts"] = self.iteration["attempts"] + 1
+        self.next_state.flag_iterations_as_modified()
+        return AgentResponse.done(self)
+
+    async def ask_for_preferred_solution(self) -> Optional[tuple[int, str]]:
+        solutions = self.possible_solutions
+        buttons = {}
+
+        for i in range(len(solutions)):
+            buttons[str(i)] = str(i + 1)
+        buttons["none"] = "None of these"
+
+        solutions_txt = "\n\n".join([f"{i+1}: {s['description']}" for i, s in enumerate(solutions)])
+        user_response = await self.ask_question(
+            "Choose which solution would you like Pythagora to try next:\n\n" + solutions_txt,
+            buttons=buttons,
+            default="0",
+            buttons_only=True,
+        )
+        if user_response.button == "none" or user_response.cancelled:
+            return None
+
+        try:
+            i = int(user_response.button)
+            return i, solutions[i]
+        except (ValueError, IndexError):
+            return None
diff --git a/core/agents/response.py b/core/agents/response.py
new file mode 100644
index 00000000..eb2813a2
--- /dev/null
+++ b/core/agents/response.py
@@ -0,0 +1,139 @@
+from enum import Enum
+from typing import TYPE_CHECKING, Optional
+
+from core.log import get_logger
+
+if TYPE_CHECKING:
+    from core.agents.base import BaseAgent
+    from core.agents.error_handler import ErrorHandler
+
+
+log = get_logger(__name__)
+
+
+class ResponseType(str, Enum):
+    DONE = "done"
+    """Agent has finished processing."""
+
+    ERROR = "error"
+    """There was an error processing the request."""
+
+    CANCEL = "cancel"
+    """User explicitly cancelled the operation."""
+
+    EXIT = "exit"
+    """Pythagora should exit."""
+
+    CODE_REVIEW = "code-review"
+    """Agent is requesting a review of the created code."""
+
+    CODE_REVIEW_FEEDBACK = "code-review-feedback"
+    """Agent is providing feedback on the code review."""
+
+    DESCRIBE_FILES = "describe-files"
+    """Analysis of the files in the project is requested."""
+
+    INPUT_REQUIRED = "input-required"
+    """User needs to modify a line in the generated code."""
+
+    UPDATE_EPIC = "update-epic"
+    """Update the epic development plan after a task was iterated on."""
+
+    TASK_REVIEW_FEEDBACK = "task-review-feedback"
+    """Agent is providing feedback on the entire task."""
+
+
+class AgentResponse:
+    type: ResponseType = ResponseType.DONE
+    agent: "BaseAgent"
+    data: Optional[dict]
+
+    def __init__(self, type: ResponseType, agent: "BaseAgent", data: Optional[dict] = None):
+        self.type = type
+        self.agent = agent
+        self.data = data
+
+    def __repr__(self) -> str:
+        return f"<AgentResponse type={self.type} agent={self.agent}>"
+
+    @staticmethod
+    def done(agent: "BaseAgent") -> "AgentResponse":
+        return AgentResponse(type=ResponseType.DONE, agent=agent)
+
+    @staticmethod
+    def error(agent: "BaseAgent", message: str, details: Optional[dict] = None) -> "AgentResponse":
+        return AgentResponse(
+            type=ResponseType.ERROR,
+            agent=agent,
+            data={"message": message, "details": details},
+        )
+
+    @staticmethod
+    def cancel(agent: "BaseAgent") -> "AgentResponse":
+        return AgentResponse(type=ResponseType.CANCEL, agent=agent)
+
+    @staticmethod
+    def exit(agent: "ErrorHandler") -> "AgentResponse":
+        return AgentResponse(type=ResponseType.EXIT, agent=agent)
+
+    @staticmethod
+    def code_review(
+        agent: "BaseAgent",
+        path: str,
+        instructions: str,
+        old_content: str,
+        new_content: str,
+        attempt: int,
+    ) -> "AgentResponse":
+        return AgentResponse(
+            type=ResponseType.CODE_REVIEW,
+            agent=agent,
+            data={
+                "path": path,
+                "instructions": instructions,
+                "old_content": old_content,
+                "new_content": new_content,
+                "attempt": attempt,
+            },
+        )
+
+    @staticmethod
+    def code_review_feedback(
+        agent: "BaseAgent",
+        new_content: str,
+        approved_content: str,
+        feedback: str,
+        attempt: int,
+    ) -> "AgentResponse":
+        return AgentResponse(
+            type=ResponseType.CODE_REVIEW_FEEDBACK,
+            agent=agent,
+            data={
+                "new_content": new_content,
+                "approved_content": approved_content,
+                "feedback": feedback,
+                "attempt": attempt,
+            },
+        )
+
+    @staticmethod
+    def describe_files(agent: "BaseAgent") -> "AgentResponse":
+        return AgentResponse(type=ResponseType.DESCRIBE_FILES, agent=agent)
+
+    @staticmethod
+    def input_required(agent: "BaseAgent", files: list[dict[str, int]]) -> "AgentResponse":
+        return AgentResponse(type=ResponseType.INPUT_REQUIRED, agent=agent, data={"files": files})
+
+    @staticmethod
+    def update_epic(agent: "BaseAgent") -> "AgentResponse":
+        return AgentResponse(type=ResponseType.UPDATE_EPIC, agent=agent)
+
+    @staticmethod
+    def task_review_feedback(agent: "BaseAgent", feedback: str) -> "AgentResponse":
+        return AgentResponse(
+            type=ResponseType.TASK_REVIEW_FEEDBACK,
+            agent=agent,
+            data={
+                "feedback": feedback,
+            },
+        )
diff --git a/core/agents/spec_writer.py b/core/agents/spec_writer.py
new file mode 100644
index 00000000..23047409
--- /dev/null
+++ b/core/agents/spec_writer.py
@@ -0,0 +1,143 @@
+from core.agents.base import BaseAgent
+from core.agents.convo import AgentConvo
+from core.agents.response import AgentResponse
+from core.db.models import Complexity
+from core.llm.parser import StringParser
+from core.telemetry import telemetry
+from core.templates.example_project import (
+    EXAMPLE_PROJECT_ARCHITECTURE,
+    EXAMPLE_PROJECT_DESCRIPTION,
+    EXAMPLE_PROJECT_PLAN,
+)
+
+# If the project description is less than this, perform an analysis using LLM
+ANALYZE_THRESHOLD = 1500
+# URL to the wiki page with tips on how to write a good project description
+INITIAL_PROJECT_HOWTO_URL = (
+    "https://github.com/Pythagora-io/gpt-pilot/wiki/How-to-write-a-good-initial-project-description"
+)
+
+
+class SpecWriter(BaseAgent):
+    agent_type = "spec-writer"
+    display_name = "Spec Writer"
+
+    async def run(self) -> AgentResponse:
+        response = await self.ask_question(
+            "Describe your app in as much detail as possible",
+            allow_empty=False,
+            buttons={"example": "Start an example project"},
+        )
+        if response.cancelled:
+            return AgentResponse.error(self, "No project description")
+
+        if response.button == "example":
+            self.prepare_example_project()
+            return AgentResponse.done(self)
+
+        spec = response.text
+
+        complexity = await self.check_prompt_complexity(spec)
+        if len(spec) < ANALYZE_THRESHOLD and complexity != Complexity.SIMPLE:
+            spec = await self.analyze_spec(spec)
+            spec = await self.review_spec(spec)
+
+        self.next_state.specification = self.current_state.specification.clone()
+        self.next_state.specification.description = spec
+        self.next_state.specification.complexity = complexity
+        telemetry.set("initial_prompt", spec)
+        telemetry.set("is_complex_app", complexity != Complexity.SIMPLE)
+
+        return AgentResponse.done(self)
+
+    async def check_prompt_complexity(self, prompt: str) -> str:
+        await self.send_message("Checking the complexity of the prompt ...")
+        llm = self.get_llm()
+        convo = AgentConvo(self).template("prompt_complexity", prompt=prompt)
+        llm_response: str = await llm(convo, temperature=0, parser=StringParser())
+        return llm_response.lower()
+
+    def prepare_example_project(self):
+        spec = self.current_state.specification.clone()
+        spec.description = EXAMPLE_PROJECT_DESCRIPTION
+        spec.architecture = EXAMPLE_PROJECT_ARCHITECTURE["architecture"]
+        spec.system_dependencies = EXAMPLE_PROJECT_ARCHITECTURE["system_dependencies"]
+        spec.package_dependencies = EXAMPLE_PROJECT_ARCHITECTURE["package_dependencies"]
+        spec.template = EXAMPLE_PROJECT_ARCHITECTURE["template"]
+        spec.complexity = Complexity.SIMPLE
+        telemetry.set("initial_prompt", spec.description.strip())
+        telemetry.set("is_complex_app", False)
+        telemetry.set("template", spec.template)
+        telemetry.set(
+            "architecture",
+            {
+                "architecture": spec.architecture,
+                "system_dependencies": spec.system_dependencies,
+                "package_dependencies": spec.package_dependencies,
+            },
+        )
+        self.next_state.specification = spec
+
+        self.next_state.epics = [
+            {
+                "name": "Initial Project",
+                "description": EXAMPLE_PROJECT_DESCRIPTION,
+                "completed": False,
+                "complexity": Complexity.SIMPLE,
+            }
+        ]
+        self.next_state.tasks = EXAMPLE_PROJECT_PLAN
+
+    async def analyze_spec(self, spec: str) -> str:
+        msg = (
+            "Your project description seems a bit short. "
+            "The better you can describe the project, the better GPT Pilot will understand what you'd like to build.\n\n"
+            f"Here are some tips on how to better describe the project: {INITIAL_PROJECT_HOWTO_URL}\n\n"
+            "Let's start by refining your project idea:"
+        )
+        await self.send_message(msg)
+
+        llm = self.get_llm()
+        convo = AgentConvo(self).template("ask_questions").user(spec)
+
+        while True:
+            response: str = await llm(convo)
+            if len(response) > 500:
+                # The response is too long for it to be a question, assume it's the spec
+                confirm = await self.ask_question(
+                    (
+                        "Can we proceed with this project description? If so, just press ENTER. "
+                        "Otherwise, please tell me what's missing or what you'd like to add."
+                    ),
+                    allow_empty=True,
+                    buttons={"continue": "Continue"},
+                )
+                if confirm.cancelled or confirm.button == "continue" or confirm.text == "":
+                    return spec
+                convo.user(confirm.text)
+
+            else:
+                convo.assistant(response)
+
+                user_response = await self.ask_question(
+                    response,
+                    buttons={"skip": "Skip questions"},
+                )
+                if user_response.cancelled or user_response.button == "skip":
+                    convo.user(
+                        "This is enough clarification, you have all the information. "
+                        "Please output the spec now, without additional comments or questions."
+                    )
+                    response: str = await llm(convo)
+                    return response
+
+                convo.user(user_response.text)
+
+    async def review_spec(self, spec: str) -> str:
+        convo = AgentConvo(self).template("review_spec", spec=spec)
+        llm = self.get_llm()
+        llm_response: str = await llm(convo, temperature=0)
+        additional_info = llm_response.strip()
+        if additional_info:
+            spec += "\nAdditional info/examples:\n" + additional_info
+        return spec
diff --git a/core/agents/task_reviewer.py b/core/agents/task_reviewer.py
new file mode 100644
index 00000000..7b74c5aa
--- /dev/null
+++ b/core/agents/task_reviewer.py
@@ -0,0 +1,53 @@
+from core.agents.base import BaseAgent
+from core.agents.convo import AgentConvo
+from core.agents.response import AgentResponse
+from core.log import get_logger
+
+log = get_logger(__name__)
+
+
+class TaskReviewer(BaseAgent):
+    agent_type = "task-reviewer"
+    display_name = "Task Reviewer"
+
+    async def run(self) -> AgentResponse:
+        response = await self.review_code_changes()
+        self.next_state.complete_step()
+        return response
+
+    async def review_code_changes(self) -> AgentResponse:
+        """
+        Review all the code changes during current task.
+        """
+
+        log.debug(f"Reviewing code changes for task {self.current_state.current_task['description']}")
+        await self.send_message("Reviewing the task implementation ...")
+        all_feedbacks = [
+            iteration["user_feedback"].replace("```", "").strip()
+            for iteration in self.current_state.iterations
+            # Some iterations are created by the task reviewer and have no user feedback
+            if iteration["user_feedback"]
+        ]
+
+        files_before_modification = self.current_state.modified_files
+        files_after_modification = [
+            (file.path, file.content.content)
+            for file in self.current_state.files
+            if (file.path in files_before_modification)
+        ]
+
+        llm = self.get_llm()
+        # TODO instead of sending files before and after maybe add nice way to show diff for multiple files
+        convo = AgentConvo(self).template(
+            "review_task",
+            current_task=self.current_state.current_task,
+            all_feedbacks=all_feedbacks,
+            files_before_modification=files_before_modification,
+            files_after_modification=files_after_modification,
+        )
+        llm_response: str = await llm(convo, temperature=0.7)
+
+        if llm_response.strip().lower() == "done":
+            return AgentResponse.done(self)
+        else:
+            return AgentResponse.task_review_feedback(self, llm_response)
diff --git a/core/agents/tech_lead.py b/core/agents/tech_lead.py
new file mode 100644
index 00000000..67ad656c
--- /dev/null
+++ b/core/agents/tech_lead.py
@@ -0,0 +1,196 @@
+from typing import Optional
+from uuid import uuid4
+
+from pydantic import BaseModel, Field
+
+from core.agents.base import BaseAgent
+from core.agents.convo import AgentConvo
+from core.agents.response import AgentResponse, ResponseType
+from core.db.models import Complexity
+from core.llm.parser import JSONParser
+from core.log import get_logger
+from core.templates.registry import apply_project_template, get_template_summary
+from core.ui.base import ProjectStage
+
+log = get_logger(__name__)
+
+
+class Task(BaseModel):
+    description: str = Field(description=("Very detailed description of a development task."))
+
+
+class DevelopmentPlan(BaseModel):
+    plan: list[Task] = Field(description="List of development tasks that need to be done to implement the entire plan.")
+
+
+class UpdatedDevelopmentPlan(BaseModel):
+    updated_current_task: Task = Field(
+        description="Updated detailed description of what was implemented while working on the current development task."
+    )
+    plan: list[Task] = Field(description="List of unfinished development tasks.")
+
+
+class TechLead(BaseAgent):
+    agent_type = "tech-lead"
+    display_name = "Tech Lead"
+
+    async def run(self) -> AgentResponse:
+        if self.prev_response and self.prev_response.type == ResponseType.UPDATE_EPIC:
+            return await self.update_epic()
+
+        if len(self.current_state.epics) == 0:
+            self.create_initial_project_epic()
+            # Orchestrator will rerun us to break down the initial project epic
+            return AgentResponse.done(self)
+
+        await self.ui.send_project_stage(ProjectStage.CODING)
+
+        if self.current_state.specification.template and not self.current_state.files:
+            await self.apply_project_template()
+            return AgentResponse.done(self)
+
+        unfinished_epics = self.current_state.unfinished_epics
+        if unfinished_epics:
+            return await self.plan_epic(unfinished_epics[0])
+        else:
+            return await self.ask_for_new_feature()
+
+    def create_initial_project_epic(self):
+        log.debug("Creating initial project epic")
+        self.next_state.epics = [
+            {
+                "id": uuid4().hex,
+                "name": "Initial Project",
+                "source": "app",
+                "description": self.current_state.specification.description,
+                "summary": None,
+                "completed": False,
+                "complexity": self.current_state.specification.complexity,
+            }
+        ]
+
+    async def apply_project_template(self) -> Optional[str]:
+        state = self.current_state
+
+        # Only do this for the initial project and if the template is specified
+        if len(state.epics) != 1 or not state.specification.template:
+            return None
+
+        log.info(f"Applying project template: {self.current_state.specification.template}")
+        await self.send_message(f"Applying project template {self.current_state.specification.template} ...")
+        summary = await apply_project_template(
+            self.current_state.specification.template,
+            self.state_manager,
+            self.process_manager,
+        )
+        # Saving template files will fill this in and we want it clear for the
+        # first task.
+        self.next_state.relevant_files = []
+        return summary
+
+    async def ask_for_new_feature(self) -> AgentResponse:
+        log.debug("Asking for new feature")
+        response = await self.ask_question(
+            "Do you have a new feature to add to the project? Just write it here",
+            buttons={"end": "No, I'm done"},
+            allow_empty=True,
+        )
+
+        if response.cancelled or response.button == "end" or not response.text:
+            return AgentResponse.exit(self)
+
+        self.next_state.epics = self.current_state.epics + [
+            {
+                "id": uuid4().hex,
+                "name": f"Feature #{len(self.current_state.epics)}",
+                "source": "feature",
+                "description": response.text,
+                "summary": None,
+                "completed": False,
+                "complexity": Complexity.HARD,
+            }
+        ]
+        # Orchestrator will rerun us to break down the new feature epic
+        return AgentResponse.done(self)
+
+    async def plan_epic(self, epic) -> AgentResponse:
+        log.debug(f"Planning tasks for the epic: {epic['name']}")
+        await self.send_message("Starting to create the action plan for development ...")
+
+        llm = self.get_llm()
+        convo = (
+            AgentConvo(self)
+            .template(
+                "plan",
+                epic=epic,
+                task_type=self.current_state.current_epic.get("source", "app"),
+                existing_summary=get_template_summary(self.current_state.specification.template),
+            )
+            .require_schema(DevelopmentPlan)
+        )
+
+        response: DevelopmentPlan = await llm(convo, parser=JSONParser(DevelopmentPlan))
+        self.next_state.tasks = self.current_state.tasks + [
+            {
+                "id": uuid4().hex,
+                "description": task.description,
+                "instructions": None,
+                "completed": False,
+            }
+            for task in response.plan
+        ]
+        return AgentResponse.done(self)
+
+    async def update_epic(self) -> AgentResponse:
+        """
+        Update the development plan for the current epic.
+
+        As a side-effect, this also marks the current task as a complete,
+        and should only be called by Troubleshooter once the task is done,
+        if the Troubleshooter decides plan update is needed.
+
+        """
+        epic = self.current_state.current_epic
+        self.next_state.complete_task()
+        await self.state_manager.log_task_completed()
+
+        if not self.next_state.unfinished_tasks:
+            # There are no tasks after this one, so there's nothing to update
+            return AgentResponse.done(self)
+
+        finished_tasks = [task for task in self.next_state.tasks if task["completed"]]
+
+        log.debug(f"Updating development plan for {epic['name']}")
+        await self.ui.send_message("Updating development plan ...")
+
+        llm = self.get_llm()
+        convo = (
+            AgentConvo(self)
+            .template(
+                "update_plan",
+                finished_tasks=finished_tasks,
+                task_type=self.current_state.current_epic.get("source", "app"),
+                modified_files=[f for f in self.current_state.files if f.path in self.current_state.modified_files],
+            )
+            .require_schema(UpdatedDevelopmentPlan)
+        )
+
+        response: UpdatedDevelopmentPlan = await llm(
+            convo,
+            parser=JSONParser(UpdatedDevelopmentPlan),
+            temperature=0,
+        )
+        log.debug(f"Reworded last task as: {response.updated_current_task.description}")
+        finished_tasks[-1]["description"] = response.updated_current_task.description
+
+        self.next_state.tasks = finished_tasks + [
+            {
+                "id": uuid4().hex,
+                "description": task.description,
+                "instructions": None,
+                "completed": False,
+            }
+            for task in response.plan
+        ]
+        log.debug(f"Updated development plan for {epic['name']}, {len(response.plan)} tasks remaining")
+        return AgentResponse.done(self)
diff --git a/core/agents/tech_writer.py b/core/agents/tech_writer.py
new file mode 100644
index 00000000..f2dd96d8
--- /dev/null
+++ b/core/agents/tech_writer.py
@@ -0,0 +1,30 @@
+from core.agents.base import BaseAgent
+from core.agents.convo import AgentConvo
+from core.agents.response import AgentResponse
+from core.log import get_logger
+
+log = get_logger(__name__)
+
+
+class TechnicalWriter(BaseAgent):
+    agent_type = "tech-writer"
+    display_name = "Technical Writer"
+
+    async def run(self) -> AgentResponse:
+        n_tasks = len(self.current_state.tasks)
+        n_unfinished = len(self.current_state.unfinished_tasks)
+
+        if n_unfinished in [n_tasks // 2, 1]:
+            # Halfway through the initial project, and and at the last task
+            await self.create_readme()
+
+        self.next_state.complete_step()
+        return AgentResponse.done(self)
+
+    async def create_readme(self):
+        await self.ui.send_message("Creating README ...")
+
+        llm = self.get_llm()
+        convo = AgentConvo(self).template("create_readme")
+        llm_response: str = await llm(convo)
+        await self.state_manager.save_file("README.md", llm_response)
diff --git a/core/agents/troubleshooter.py b/core/agents/troubleshooter.py
new file mode 100644
index 00000000..9da6dbb2
--- /dev/null
+++ b/core/agents/troubleshooter.py
@@ -0,0 +1,281 @@
+from typing import Optional
+from uuid import uuid4
+
+from pydantic import BaseModel, Field
+
+from core.agents.base import BaseAgent
+from core.agents.convo import AgentConvo
+from core.agents.mixins import IterationPromptMixin
+from core.agents.response import AgentResponse
+from core.llm.parser import JSONParser, OptionalCodeBlockParser
+from core.log import get_logger
+from core.telemetry import telemetry
+
+log = get_logger(__name__)
+
+LOOP_THRESHOLD = 3  # number of iterations in task to be considered a loop
+
+
+class BugReportQuestions(BaseModel):
+    missing_data: list[str] = Field(
+        description="Very clear question that needs to be answered to have good bug report."
+    )
+
+
+class Troubleshooter(IterationPromptMixin, BaseAgent):
+    agent_type = "troubleshooter"
+    display_name = "Troubleshooter"
+
+    async def run(self) -> AgentResponse:
+        run_command = await self.get_run_command()
+        user_instructions = await self.get_user_instructions()
+        if user_instructions is None:
+            # LLM decided we don't need to test anything, so we're done with the task
+            return await self.complete_task()
+
+        # Developer sets iteration as "completed" when it generates the step breakdown, so we can't
+        # use "current_iteration" here
+        last_iteration = self.current_state.iterations[-1] if self.current_state.iterations else None
+
+        should_iterate, is_loop, user_feedback = await self.get_user_feedback(
+            run_command,
+            user_instructions,
+            last_iteration is not None,
+        )
+        if not should_iterate:
+            # User tested and reported no problems, we're done with the task
+            return await self.complete_task()
+
+        user_feedback_qa = await self.generate_bug_report(run_command, user_instructions, user_feedback)
+
+        if is_loop:
+            if last_iteration["alternative_solutions"]:
+                # If we already have alternative solutions, it means we were already in a loop.
+                return self.try_next_alternative_solution(user_feedback, user_feedback_qa)
+            else:
+                # Newly detected loop, set up an empty new iteration to trigger ProblemSolver
+                llm_solution = ""
+                await self.trace_loop("loop-feedback")
+        else:
+            llm_solution = await self.find_solution(user_feedback, user_feedback_qa=user_feedback_qa)
+
+        self.next_state.iterations = self.current_state.iterations + [
+            {
+                "id": uuid4().hex,
+                "user_feedback": user_feedback,
+                "user_feedback_qa": user_feedback_qa,
+                "description": llm_solution,
+                "alternative_solutions": [],
+                # FIXME - this is incorrect if this is a new problem; otherwise we could
+                # just count the iterations
+                "attempts": 1,
+                "completed": False,
+            }
+        ]
+        if len(self.next_state.iterations) == LOOP_THRESHOLD:
+            await self.trace_loop("loop-start")
+
+        return AgentResponse.done(self)
+
+    async def complete_task(self) -> AgentResponse:
+        """
+        Mark the current task as completed.
+
+        If there were iterations for the task, instead of marking the task as completed directly,
+        we ask the TechLead to update the epic (it needs state to the current task) and then mark
+        the task as completed.
+        """
+        self.next_state.steps = []
+        if len(self.current_state.iterations) >= LOOP_THRESHOLD:
+            await self.trace_loop("loop-end")
+
+        if self.current_state.iterations:
+            return AgentResponse.update_epic(self)
+        else:
+            self.next_state.complete_task()
+            await self.state_manager.log_task_completed()
+            await self.ui.send_task_progress(
+                self.current_state.tasks.index(self.current_state.current_task) + 1,
+                len(self.current_state.tasks),
+                self.current_state.current_task["description"],
+                self.current_state.current_epic.get("source", "app"),
+                "done",
+            )
+            return AgentResponse.done(self)
+
+    def _get_task_convo(self) -> AgentConvo:
+        # FIXME: Current prompts reuse conversation from the developer so we have to resort to this
+        task = self.current_state.current_task
+        current_task_index = self.current_state.tasks.index(task)
+
+        return (
+            AgentConvo(self)
+            .template(
+                "breakdown",
+                task=task,
+                iteration=None,
+                current_task_index=current_task_index,
+            )
+            .assistant(self.current_state.current_task["instructions"])
+        )
+
+    async def get_run_command(self) -> Optional[str]:
+        if self.current_state.run_command:
+            return self.current_state.run_command
+
+        await self.send_message("Figuring out how to run the app ...")
+
+        llm = self.get_llm()
+        convo = self._get_task_convo().template("get_run_command")
+
+        # Although the prompt is explicit about not using "```", LLM may still return it
+        llm_response: str = await llm(convo, temperature=0, parser=OptionalCodeBlockParser())
+        self.next_state.run_command = llm_response
+        return llm_response
+
+    async def get_user_instructions(self) -> Optional[str]:
+        await self.send_message("Determining how to test the app ...")
+
+        llm = self.get_llm()
+        convo = self._get_task_convo().template("define_user_review_goal", task=self.current_state.current_task)
+        user_instructions: str = await llm(convo)
+
+        user_instructions = user_instructions.strip()
+        if user_instructions.lower() == "done":
+            log.debug(f"Nothing to do for user testing for task {self.current_state.current_task['description']}")
+            return None
+
+        return user_instructions
+
+    async def get_user_feedback(
+        self,
+        run_command: str,
+        user_instructions: str,
+        last_iteration: Optional[dict],
+    ) -> tuple[bool, bool, str, str]:
+        """
+        Ask the user to test the app and provide feedback.
+
+        :return (bool, bool, str): Tuple containing "should_iterate", "is_loop" and
+        "user_feedback" respectively.
+
+        If "should_iterate" is False, the user has confirmed that the app works as expected and there's
+        nothing for the troubleshooter or problem solver to do.
+
+        If "is_loop" is True, Pythagora is stuck in a loop and needs to consider alternative solutions.
+
+        The last element in the tuple is the user feedback, which may be empty if the user provided no
+        feedback (eg. if they just clicked on "Continue" or "I'm stuck in a loop").
+        """
+
+        test_message = "Can you check if the app works please?"
+        if user_instructions:
+            test_message += " Here is a description of what should be working:\n\n" + user_instructions
+
+        if run_command:
+            await self.ui.send_run_command(run_command)
+
+        buttons = {"continue": "Everything works, continue"}
+        if last_iteration:
+            buttons["loop"] = "I still have the same issue"
+
+        user_response = await self.ask_question(
+            test_message,
+            buttons=buttons,
+            default="continue",
+        )
+        if user_response.button == "continue" or user_response.cancelled:
+            return False, False, ""
+
+        if user_response.button == "loop":
+            return True, True, ""
+
+        return True, False, user_response.text
+
+    def try_next_alternative_solution(self, user_feedback: str, user_feedback_qa: list[str]) -> AgentResponse:
+        """
+        Call the ProblemSolver to try an alternative solution.
+
+        Stores the user feedback and sets iteration state (not completed, no description)
+        so that ProblemSolver will be triggered.
+
+        :param user_feedback: User feedback to store in the iteration state.
+        :param user_feedback_qa: Additional questions/answers about the problem.
+        :return: Agent response done.
+        """
+        next_state_iteration = self.next_state.iterations[-1]
+        next_state_iteration["description"] = ""
+        next_state_iteration["user_feedback"] = user_feedback
+        next_state_iteration["user_feedback_qa"] = user_feedback_qa
+        next_state_iteration["attempts"] += 1
+        next_state_iteration["completed"] = False
+        self.next_state.flag_iterations_as_modified()
+        return AgentResponse.done(self)
+
+    async def generate_bug_report(
+        self,
+        run_command: Optional[str],
+        user_instructions: str,
+        user_feedback: str,
+    ) -> list[str]:
+        """
+        Generate a bug report from the user feedback.
+
+        :param run_command: The command to run to test the app.
+        :param user_instructions: Instructions on how to test the functionality.
+        :param user_feedback: The user feedback.
+        :return: Additional questions and answers to generate a better bug report.
+        """
+        additional_qa = []
+        llm = self.get_llm()
+        convo = (
+            AgentConvo(self)
+            .template(
+                "bug_report",
+                user_instructions=user_instructions,
+                user_feedback=user_feedback,
+                # TODO: revisit if we again want to run this in a loop, where this is useful
+                additional_qa=additional_qa,
+            )
+            .require_schema(BugReportQuestions)
+        )
+        llm_response: BugReportQuestions = await llm(convo, parser=JSONParser(BugReportQuestions))
+
+        if not llm_response.missing_data:
+            return []
+
+        for question in llm_response.missing_data:
+            if run_command:
+                await self.ui.send_run_command(run_command)
+            user_response = await self.ask_question(
+                question,
+                buttons={
+                    "continue": "Submit answer",
+                    "skip": "Skip this question",
+                    "skip-all": "Skip all questions",
+                },
+                allow_empty=False,
+            )
+            if user_response.cancelled or user_response.button == "skip-all":
+                break
+            elif user_response.button == "skip":
+                continue
+
+            additional_qa.append(
+                {
+                    "question": question,
+                    "answer": user_response.text,
+                }
+            )
+
+        return additional_qa
+
+    async def trace_loop(self, trace_event: str):
+        state = self.current_state
+        task_with_loop = {
+            "task_description": state.current_task["description"],
+            "task_number": len([t for t in state.tasks if t["completed"]]) + 1,
+            "steps": len(state.steps),
+            "iterations": len(state.iterations),
+        }
+        await telemetry.trace_loop(trace_event, task_with_loop)
diff --git a/core/cli/__init__.py b/core/cli/__init__.py
new file mode 100644
index 00000000..e69de29b
diff --git a/core/cli/helpers.py b/core/cli/helpers.py
new file mode 100644
index 00000000..ec940c1c
--- /dev/null
+++ b/core/cli/helpers.py
@@ -0,0 +1,319 @@
+import json
+import os
+import os.path
+import sys
+from argparse import ArgumentParser, ArgumentTypeError, Namespace
+from typing import Optional
+from urllib.parse import urlparse
+from uuid import UUID
+
+from core.config import Config, LLMProvider, LocalIPCConfig, ProviderConfig, UIAdapter, get_config, loader
+from core.config.env_importer import import_from_dotenv
+from core.config.version import get_version
+from core.db.session import SessionManager
+from core.db.setup import run_migrations
+from core.log import setup
+from core.state.state_manager import StateManager
+from core.ui.base import UIBase
+from core.ui.console import PlainConsoleUI
+from core.ui.ipc_client import IPCClientUI
+
+
+def parse_llm_endpoint(value: str) -> Optional[tuple[LLMProvider, str]]:
+    """
+    Parse --llm-endpoint command-line option.
+
+    Option syntax is: --llm-endpoint <provider>:<url>
+
+    :param value: Argument value.
+    :return: Tuple with LLM provider and URL, or None if if the option wasn't provided.
+    """
+    if not value:
+        return None
+
+    parts = value.split(":", 1)
+    if len(parts) != 2:
+        raise ArgumentTypeError("Invalid LLM endpoint format; expected 'provider:url'")
+
+    try:
+        provider = LLMProvider(parts[0])
+    except ValueError as err:
+        raise ArgumentTypeError(f"Unsupported LLM provider: {err}")
+    url = urlparse(parts[1])
+    if url.scheme not in ("http", "https"):
+        raise ArgumentTypeError(f"Invalid LLM endpoint URL: {parts[1]}")
+
+    return provider, url.geturl()
+
+
+def parse_llm_key(value: str) -> Optional[tuple[LLMProvider, str]]:
+    """
+    Parse --llm-key command-line option.
+
+    Option syntax is: --llm-key <provider>:<key>
+
+    :param value: Argument value.
+    :return: Tuple with LLM provider and key, or None if if the option wasn't provided.
+    """
+    if not value:
+        return None
+
+    parts = value.split(":", 1)
+    if len(parts) != 2:
+        raise ArgumentTypeError("Invalid LLM endpoint format; expected 'provider:key'")
+
+    try:
+        provider = LLMProvider(parts[0])
+    except ValueError as err:
+        raise ArgumentTypeError(f"Unsupported LLM provider: {err}")
+
+    return provider, parts[1]
+
+
+def parse_arguments() -> Namespace:
+    """
+    Parse command-line arguments.
+
+    Available arguments:
+        --help: Show the help message
+        --config: Path to the configuration file
+        --show-config: Output the default configuration to stdout
+        --default-config: Output the configuration to stdout
+        --level: Log level (debug,info,warning,error,critical)
+        --database: Database URL
+        --local-ipc-port: Local IPC port to connect to
+        --local-ipc-host: Local IPC host to connect to
+        --version: Show the version and exit
+        --list: List all projects
+        --list-json: List all projects in JSON format
+        --project: Load a specific project
+        --branch: Load a specific branch
+        --step: Load a specific step in a project/branch
+        --llm-endpoint: Use specific API endpoint for the given provider
+        --llm-key: Use specific LLM key for the given provider
+        --import-v0: Import data from a v0 (gpt-pilot) database with the given path
+        --email: User's email address, if provided
+        --extension-version: Version of the VSCode extension, if used
+    :return: Parsed arguments object.
+    """
+    version = get_version()
+
+    parser = ArgumentParser()
+    parser.add_argument("--config", help="Path to the configuration file", default="config.json")
+    parser.add_argument("--show-config", help="Output the default configuration to stdout", action="store_true")
+    parser.add_argument("--level", help="Log level (debug,info,warning,error,critical)", required=False)
+    parser.add_argument("--database", help="Database URL", required=False)
+    parser.add_argument("--local-ipc-port", help="Local IPC port to connect to", type=int, required=False)
+    parser.add_argument("--local-ipc-host", help="Local IPC host to connect to", default="localhost", required=False)
+    parser.add_argument("--version", action="version", version=version)
+    parser.add_argument("--list", help="List all projects", action="store_true")
+    parser.add_argument("--list-json", help="List all projects in JSON format", action="store_true")
+    parser.add_argument("--project", help="Load a specific project", type=UUID, required=False)
+    parser.add_argument("--branch", help="Load a specific branch", type=UUID, required=False)
+    parser.add_argument("--step", help="Load a specific step in a project/branch", type=int, required=False)
+    parser.add_argument("--delete", help="Delete a specific project", type=UUID, required=False)
+    parser.add_argument(
+        "--llm-endpoint",
+        help="Use specific API endpoint for the given provider",
+        type=parse_llm_endpoint,
+        action="append",
+        required=False,
+    )
+    parser.add_argument(
+        "--llm-key",
+        help="Use specific LLM key for the given provider",
+        type=parse_llm_key,
+        action="append",
+        required=False,
+    )
+    parser.add_argument(
+        "--import-v0",
+        help="Import data from a v0 (gpt-pilot) database with the given path",
+        required=False,
+    )
+    parser.add_argument("--email", help="User's email address", required=False)
+    parser.add_argument("--extension-version", help="Version of the VSCode extension", required=False)
+    return parser.parse_args()
+
+
+def load_config(args: Namespace) -> Optional[Config]:
+    """
+    Load Pythagora JSON configuration file and apply command-line arguments.
+
+    :param args: Command-line arguments (at least `config` must be present).
+    :return: Configuration object, or None if config couldn't be loaded.
+    """
+    if not os.path.isfile(args.config):
+        imported = import_from_dotenv(args.config)
+        if not imported:
+            print(f"Configuration file not found: {args.config}; using default", file=sys.stderr)
+            return get_config()
+
+    try:
+        config = loader.load(args.config)
+    except ValueError as err:
+        print(f"Error parsing config file {args.config}: {err}", file=sys.stderr)
+        return None
+
+    if args.level:
+        config.log.level = args.level.upper()
+
+    if args.database:
+        config.db.url = args.database
+
+    if args.local_ipc_port:
+        config.ui = LocalIPCConfig(port=args.local_ipc_port, host=args.local_ipc_host)
+
+    if args.llm_endpoint:
+        for provider, endpoint in args.llm_endpoint:
+            if provider not in config.llm:
+                config.llm[provider] = ProviderConfig()
+            config.llm[provider].base_url = endpoint
+
+    if args.llm_key:
+        for provider, key in args.llm_key:
+            if provider not in config.llm:
+                config.llm[provider] = ProviderConfig()
+            config.llm[provider].api_key = key
+
+    try:
+        Config.model_validate(config)
+    except ValueError as err:
+        print(f"Configuration error: {err}", file=sys.stderr)
+        return None
+
+    return config
+
+
+async def list_projects_json(db: SessionManager):
+    """
+    List all projects in the database in JSON format.
+    """
+    sm = StateManager(db)
+    projects = await sm.list_projects()
+
+    data = []
+    for project in projects:
+        p = {
+            "name": project.name,
+            "id": project.id.hex,
+            "branches": [],
+        }
+        for branch in project.branches:
+            b = {
+                "name": branch.name,
+                "id": branch.id.hex,
+                "steps": [],
+            }
+            for state in branch.states:
+                s = {
+                    "name": f"Step #{state.step_index}",
+                    "step": state.step_index,
+                }
+                b["steps"].append(s)
+            if b["steps"]:
+                b["steps"][-1]["name"] = "Latest step"
+            p["branches"].append(b)
+        data.append(p)
+    print(json.dumps(data, indent=2))
+
+
+async def list_projects(db: SessionManager):
+    """
+    List all projects in the database.
+    """
+    sm = StateManager(db)
+    projects = await sm.list_projects()
+
+    print(f"Available projects ({len(projects)}):")
+    for project in projects:
+        print(f"* {project.name} ({project.id})")
+        for branch in project.branches:
+            last_step = max(state.step_index for state in branch.states)
+            print(f"  - {branch.name} ({branch.id}) - last step: {last_step}")
+
+
+async def load_project(
+    sm: StateManager,
+    project_id: Optional[UUID] = None,
+    branch_id: Optional[UUID] = None,
+    step_index: Optional[int] = None,
+) -> bool:
+    """
+    Load a project from the database.
+
+    :param sm: State manager.
+    :param project_id: Project ID (optional, loads the last step in the main branch).
+    :param branch_id: Branch ID (optional, loads the last step in the branch).
+    :param step_index: Step index (optional, loads the state at the given step).
+    :return: True if the project was loaded successfully, False otherwise.
+    """
+    step_txt = f" step {step_index}" if step_index else ""
+
+    if branch_id:
+        project_state = await sm.load_project(branch_id=branch_id, step_index=step_index)
+        if project_state:
+            return True
+        else:
+            print(f"Branch {branch_id}{step_txt} not found; use --list to list all projects", file=sys.stderr)
+            return False
+
+    elif project_id:
+        project_state = await sm.load_project(project_id=project_id, step_index=step_index)
+        if project_state:
+            return True
+        else:
+            print(f"Project {project_id}{step_txt} not found; use --list to list all projects", file=sys.stderr)
+            return False
+
+    return False
+
+
+async def delete_project(sm: StateManager, project_id: UUID) -> bool:
+    """
+    Delete a project from a database.
+
+    :param sm: State manager.
+    :param project_id: Project ID.
+    :return: True if project was deleted, False otherwise.
+    """
+
+    return await sm.delete_project(project_id)
+
+
+def show_config():
+    """
+    Print the current configuration to stdout.
+    """
+    cfg = get_config()
+    print(cfg.model_dump_json(indent=2))
+
+
+def init() -> tuple[UIBase, SessionManager, Namespace]:
+    """
+    Initialize the application.
+
+    Loads configuration, sets up logging and UI, initializes the database
+    and runs database migrations.
+
+    :return: Tuple with UI, db session manager, file manager, and command-line arguments.
+    """
+    args = parse_arguments()
+    config = load_config(args)
+    if not config:
+        return (None, None, args)
+
+    setup(config.log, force=True)
+
+    if config.ui.type == UIAdapter.IPC_CLIENT:
+        ui = IPCClientUI(config.ui)
+    else:
+        ui = PlainConsoleUI()
+
+    run_migrations(config.db)
+    db = SessionManager(config.db)
+
+    return (ui, db, args)
+
+
+__all__ = ["parse_arguments", "load_config", "list_projects_json", "list_projects", "load_project", "init"]
diff --git a/core/cli/main.py b/core/cli/main.py
new file mode 100644
index 00000000..f6718a75
--- /dev/null
+++ b/core/cli/main.py
@@ -0,0 +1,145 @@
+import sys
+from argparse import Namespace
+from asyncio import run
+
+from core.agents.orchestrator import Orchestrator
+from core.cli.helpers import delete_project, init, list_projects, list_projects_json, load_project, show_config
+from core.db.session import SessionManager
+from core.db.v0importer import LegacyDatabaseImporter
+from core.llm.base import APIError
+from core.log import get_logger
+from core.state.state_manager import StateManager
+from core.telemetry import telemetry
+from core.ui.base import UIBase
+
+log = get_logger(__name__)
+
+
+async def run_project(sm: StateManager, ui: UIBase) -> bool:
+    """
+    Work on the project.
+
+    Starts the orchestrator agent with the newly loaded/created project
+    and runs it until the orchestrator decides to exit.
+
+    :param sm: State manager.
+    :param ui: User interface.
+    :return: True if the orchestrator exited successfully, False otherwise.
+    """
+
+    telemetry.start()
+    telemetry.set("app_id", str(sm.project.id))
+    telemetry.set("initial_prompt", sm.current_state.specification.description)
+
+    orca = Orchestrator(sm, ui)
+    success = False
+    try:
+        success = await orca.run()
+    except KeyboardInterrupt:
+        log.info("Interrupted by user")
+        telemetry.set("end_result", "interrupt")
+        await sm.rollback()
+    except APIError as err:
+        log.warning(f"LLM API error occurred: {err.message}")
+        await ui.send_message(f"LLM API error occurred: {err.message}")
+        await ui.send_message("Stopping Pythagora due to previous error.")
+        telemetry.set("end_result", "failure:api-error")
+        await sm.rollback()
+    except Exception as err:
+        telemetry.record_crash(err)
+        await sm.rollback()
+        log.error(f"Uncaught exception: {err}", exc_info=True)
+        await ui.send_message(f"Unrecoverable error occurred: {err}")
+
+    if success:
+        telemetry.set("end_result", "success:exit")
+    else:
+        # We assume unsuccessful exit (but not an exception) is a result
+        # of an API error that the user didn't retry.
+        telemetry.set("end_result", "failure:api-error")
+
+    await telemetry.send()
+    return success
+
+
+async def start_new_project(sm: StateManager, ui: UIBase) -> bool:
+    """
+    Start a new project.
+
+    :param sm: State manager.
+    :param ui: User interface.
+    :return: True if the project was created successfully, False otherwise.
+    """
+    user_input = await ui.ask_question("What is the name of the project", allow_empty=False)
+    if user_input.cancelled:
+        return False
+
+    project_state = await sm.create_project(user_input.text)
+    return project_state is not None
+
+
+async def async_main(
+    ui: UIBase,
+    db: SessionManager,
+    args: Namespace,
+) -> bool:
+    """
+    Main application coroutine.
+
+    :param ui: User interface.
+    :param db: Database session manager.
+    :param args: Command-line arguments.
+    :return: True if the application ran successfully, False otherwise.
+    """
+
+    if args.list:
+        await list_projects(db)
+        return True
+    elif args.list_json:
+        await list_projects_json(db)
+        return True
+    if args.show_config:
+        show_config()
+        return True
+    elif args.import_v0:
+        importer = LegacyDatabaseImporter(db, args.import_v0)
+        await importer.import_database()
+        return True
+
+    telemetry.set("user_contact", args.email)
+    if args.extension_version:
+        telemetry.set("is_extension", True)
+        telemetry.set("extension_version", args.extension_version)
+
+    sm = StateManager(db, ui)
+    ui_started = await ui.start()
+    if not ui_started:
+        return False
+
+    if args.project or args.branch or args.step:
+        telemetry.set("is_continuation", True)
+        # FIXME: we should send the project stage and other runtime info to the UI
+        success = await load_project(sm, args.project, args.branch, args.step)
+        if not success:
+            return False
+    elif args.delete:
+        success = await delete_project(sm, args.delete)
+        return success
+    else:
+        success = await start_new_project(sm, ui)
+        if not success:
+            return False
+
+    return await run_project(sm, ui)
+
+
+def run_pythagora():
+    ui, db, args = init()
+    if not ui or not db:
+        return -1
+    success = run(async_main(ui, db, args))
+    return 0 if success else -1
+
+
+if __name__ == "__main__":
+    sys.exit(run_pythagora())
diff --git a/core/config/__init__.py b/core/config/__init__.py
new file mode 100644
index 00000000..68e3bca0
--- /dev/null
+++ b/core/config/__init__.py
@@ -0,0 +1,375 @@
+from enum import Enum
+from os.path import abspath, dirname, isdir, join
+from typing import Literal, Optional, Union
+
+from pydantic import BaseModel, ConfigDict, Field, field_validator
+from typing_extensions import Annotated
+
+ROOT_DIR = abspath(join(dirname(__file__), "..", ".."))
+DEFAULT_IGNORE_PATHS = [
+    ".git",
+    ".gpt-pilot",
+    ".idea",
+    ".vscode",
+    ".next",
+    ".DS_Store",
+    "__pycache__",
+    "site-packages",
+    "node_modules",
+    "package-lock.json",
+    "venv",
+    "dist",
+    "build",
+    "target",
+    "*.min.js",
+    "*.min.css",
+    "*.svg",
+    "*.csv",
+    "*.log",
+    "go.sum",
+]
+IGNORE_SIZE_THRESHOLD = 50000  # 50K+ files are ignored by default
+
+# Agents with sane setup in the default configuration
+DEFAULT_AGENT_NAME = "default"
+DESCRIBE_FILES_AGENT_NAME = "CodeMonkey.describe_files"
+
+
+class _StrictModel(BaseModel):
+    """
+    Pydantic parser configuration options.
+    """
+
+    model_config = ConfigDict(
+        extra="forbid",
+    )
+
+
+class LLMProvider(str, Enum):
+    """
+    Supported LLM providers.
+    """
+
+    OPENAI = "openai"
+    ANTHROPIC = "anthropic"
+    GROQ = "groq"
+    LM_STUDIO = "lm-studio"
+
+
+class UIAdapter(str, Enum):
+    """
+    Supported UI adapters.
+    """
+
+    PLAIN = "plain"
+    IPC_CLIENT = "ipc-client"
+
+
+class ProviderConfig(_StrictModel):
+    """
+    LLM provider configuration.
+    """
+
+    base_url: Optional[str] = Field(
+        None,
+        description="Base URL for the provider's API (if different from the provider default)",
+    )
+    api_key: Optional[str] = Field(
+        None,
+        description="API key to use for authentication (if not set, provider uses default from environment variable)",
+    )
+    connect_timeout: float = Field(
+        default=60.0,
+        description="Timeout (in seconds) for connecting to the provider's API",
+        ge=0.0,
+    )
+    read_timeout: float = Field(
+        default=10.0,
+        description="Timeout (in seconds) for receiving a new chunk of data from the response stream",
+        ge=0.0,
+    )
+
+
+class AgentLLMConfig(_StrictModel):
+    """
+    Configuration for the various LLMs used by Pythagora.
+
+    Each Agent has an LLM provider, from the Enum LLMProvider. If
+    AgentLLMConfig is not specified, default will be used.
+    """
+
+    provider: LLMProvider = LLMProvider.OPENAI
+    model: str = Field(description="Model to use", default="gpt-4-turbo")
+    temperature: float = Field(
+        default=0.5,
+        description="Temperature to use for sampling",
+        ge=0.0,
+        le=1.0,
+    )
+
+
+class LLMConfig(_StrictModel):
+    """
+    Complete agent-specific configuration for an LLM.
+    """
+
+    provider: LLMProvider = LLMProvider.OPENAI
+    model: str = Field(description="Model to use")
+    base_url: Optional[str] = Field(
+        None,
+        description="Base URL for the provider's API (if different from the provider default)",
+    )
+    api_key: Optional[str] = Field(
+        None,
+        description="API key to use for authentication (if not set, provider uses default from environment variable)",
+    )
+    temperature: float = Field(
+        default=0.5,
+        description="Temperature to use for sampling",
+        ge=0.0,
+        le=1.0,
+    )
+    connect_timeout: float = Field(
+        default=60.0,
+        description="Timeout (in seconds) for connecting to the provider's API",
+        ge=0.0,
+    )
+    read_timeout: float = Field(
+        default=10.0,
+        description="Timeout (in seconds) for receiving a new chunk of data from the response stream",
+        ge=0.0,
+    )
+
+    @classmethod
+    def from_provider_and_agent_configs(cls, provider: ProviderConfig, agent: AgentLLMConfig):
+        return cls(
+            provider=agent.provider,
+            model=agent.model,
+            base_url=provider.base_url,
+            api_key=provider.api_key,
+            temperature=agent.temperature,
+            connect_timeout=provider.connect_timeout,
+            read_timeout=provider.read_timeout,
+        )
+
+
+class PromptConfig(_StrictModel):
+    """
+    Configuration for prompt templates:
+    """
+
+    paths: list[str] = Field(
+        [join(ROOT_DIR, "core", "prompts")],
+        description="List of directories to search for prompt templates",
+    )
+
+    @field_validator("paths")
+    @classmethod
+    def validate_paths(cls, v: list[str]) -> list[str]:
+        for path in v:
+            if not isdir(path):
+                raise ValueError(f"Invalid prompt path: {path}")
+        return v
+
+
+class LogConfig(_StrictModel):
+    """
+    Configuration for logging.
+    """
+
+    level: str = Field(
+        "DEBUG",
+        description="Logging level",
+        pattern=r"^(DEBUG|INFO|WARNING|ERROR|CRITICAL)$",
+    )
+    format: str = Field(
+        "%(asctime)s %(levelname)s [%(name)s] %(message)s",
+        description="Logging format",
+    )
+    output: Optional[str] = Field(
+        "pythagora.log",
+        description="Output file for logs (if not specified, logs are printed to stderr)",
+    )
+
+
+class DBConfig(_StrictModel):
+    """
+    Configuration for database connections.
+
+    Supported URL schemes:
+
+    * sqlite+aiosqlite: SQLite database using the aiosqlite driver
+    """
+
+    url: str = Field(
+        "sqlite+aiosqlite:///pythagora.db",
+        description="Database connection URL",
+    )
+    debug_sql: bool = Field(False, description="Log all SQL queries to the console")
+
+    @field_validator("url")
+    @classmethod
+    def validate_url_scheme(cls, v: str) -> str:
+        if v.startswith("sqlite+aiosqlite://"):
+            return v
+        raise ValueError(f"Unsupported database URL scheme in: {v}")
+
+
+class PlainUIConfig(_StrictModel):
+    """
+    Configuration for plaintext console UI.
+    """
+
+    type: Literal[UIAdapter.PLAIN] = UIAdapter.PLAIN
+
+
+class LocalIPCConfig(_StrictModel):
+    """
+    Configuration for VSCode extension IPC client.
+    """
+
+    type: Literal[UIAdapter.IPC_CLIENT] = UIAdapter.IPC_CLIENT
+    host: str = "localhost"
+    port: int = 8125
+
+
+UIConfig = Annotated[
+    Union[PlainUIConfig, LocalIPCConfig],
+    Field(discriminator="type"),
+]
+
+
+class FileSystemType(str, Enum):
+    """
+    Supported filesystem types.
+    """
+
+    MEMORY = "memory"
+    LOCAL = "local"
+
+
+class FileSystemConfig(_StrictModel):
+    """
+    Configuration for project workspace.
+    """
+
+    type: Literal[FileSystemType.LOCAL] = FileSystemType.LOCAL
+    workspace_root: str = Field(
+        join(ROOT_DIR, "workspace"),
+        description="Workspace directory containing all the projects",
+    )
+    ignore_paths: list[str] = Field(
+        DEFAULT_IGNORE_PATHS,
+        description="List of paths to ignore when scanning for files and folders",
+    )
+    ignore_size_threshold: int = Field(
+        IGNORE_SIZE_THRESHOLD,
+        description="Files larger than this size should be ignored",
+    )
+
+
+class Config(_StrictModel):
+    """
+    Pythagora Core configuration
+    """
+
+    llm: dict[LLMProvider, ProviderConfig] = Field(default={LLMProvider.OPENAI: ProviderConfig()})
+    agent: dict[str, AgentLLMConfig] = Field(
+        default={
+            DEFAULT_AGENT_NAME: AgentLLMConfig(),
+            DESCRIBE_FILES_AGENT_NAME: AgentLLMConfig(model="gpt-3.5-turbo", temperature=0.0),
+        }
+    )
+    prompt: PromptConfig = PromptConfig()
+    log: LogConfig = LogConfig()
+    db: DBConfig = DBConfig()
+    ui: UIConfig = PlainUIConfig()
+    fs: FileSystemConfig = FileSystemConfig()
+
+    def llm_for_agent(self, agent_name: str = "default") -> LLMConfig:
+        """
+        Fetch an LLM configuration for a given agent.
+
+        If the agent specific configuration doesn't exist, returns the configuration
+        for the 'default' agent.
+        """
+
+        agent_name = agent_name if agent_name in self.agent else "default"
+        agent_config = self.agent[agent_name]
+        provider_config = self.llm[agent_config.provider]
+        return LLMConfig.from_provider_and_agent_configs(provider_config, agent_config)
+
+
+class ConfigLoader:
+    """
+    Configuration loader takes care of loading and parsing configuration files.
+
+    The default loader is already initialized as `core.config.loader`. To
+    load the configuration from a file, use `core.config.loader.load(path)`.
+
+    To get the current configuration, use `core.config.get_config()`.
+    """
+
+    config: Config
+    config_path: Optional[str]
+
+    def __init__(self):
+        self.config_path = None
+        self.config = Config()
+
+    @staticmethod
+    def _remove_json_comments(json_str: str) -> str:
+        """
+        Remove comments from a JSON string.
+
+        Removes all lines that start with "//" from the JSON string.
+
+        :param json_str: JSON string with comments.
+        :return: JSON string without comments.
+        """
+        return "\n".join([line for line in json_str.splitlines() if not line.strip().startswith("//")])
+
+    @classmethod
+    def from_json(cls: "ConfigLoader", config: str) -> Config:
+        """
+        Parse JSON Into a Config object.
+
+        :param config: JSON string to parse.
+        :return: Config object.
+        """
+        return Config.model_validate_json(cls._remove_json_comments(config), strict=True)
+
+    def load(self, path: str) -> Config:
+        """
+        Load a configuration from a file.
+
+        :param path: Path to the configuration file.
+        :return: Config object.
+        """
+        with open(path, "rb") as f:
+            raw_config = f.read()
+
+        if b"\x00" in raw_config:
+            encoding = "utf-16"
+        else:
+            encoding = "utf-8"
+
+        text_config = raw_config.decode(encoding)
+        self.config = self.from_json(text_config)
+        self.config_path = path
+        return self.config
+
+
+loader = ConfigLoader()
+
+
+def get_config() -> Config:
+    """
+    Return current configuration.
+
+    :return: Current configuration object.
+    """
+    return loader.config
+
+
+__all__ = ["loader", "get_config"]
diff --git a/core/config/env_importer.py b/core/config/env_importer.py
new file mode 100644
index 00000000..ec545c96
--- /dev/null
+++ b/core/config/env_importer.py
@@ -0,0 +1,90 @@
+from os.path import dirname, exists, join
+
+from dotenv import dotenv_values
+
+from core.config import Config, LLMProvider, ProviderConfig, loader
+
+
+def import_from_dotenv(new_config_path: str) -> bool:
+    """
+    Import configuration from old gpt-pilot .env file and save it to a new format.
+
+    If the configuration is already loaded, does nothing. If the target file
+    already exists, it's parsed as is (it's not overwritten).
+
+    Otherwise, loads the values from `pilot/.env` file and creates a new configuration
+    with the relevant settings.
+
+    This intentionally DOES NOT load the .env variables into the current process
+    environments, to avoid polluting it with old settings.
+
+    :param new_config_path: Path to save the new configuration file.
+    :return: True if the configuration was imported, False otherwise.
+    """
+    if loader.config_path or exists(new_config_path):
+        # Config already exists, nothing to do
+        return True
+
+    env_path = join(dirname(__file__), "..", "..", "pilot", ".env")
+    if not exists(env_path):
+        return False
+
+    values = dotenv_values(env_path)
+    if not values:
+        return False
+
+    config = convert_config(values)
+
+    with open(new_config_path, "w", encoding="utf-8") as fp:
+        fp.write(config.model_dump_json(indent=2))
+
+    return True
+
+
+def convert_config(values: dict) -> Config:
+    config = Config()
+
+    for provider in LLMProvider:
+        endpoint = values.get(f"{provider.value.upper()}_ENDPOINT")
+        key = values.get(f"{provider.value.upper()}_API_KEY")
+
+        if provider == LLMProvider.OPENAI:
+            # OpenAI is also used for Azure and OpenRouter and local LLMs
+            if endpoint is None:
+                endpoint = values.get("AZURE_ENDPOINT")
+            if endpoint is None:
+                endpoint = values.get("OPENROUTER_ENDPOINT")
+
+            if key is None:
+                key = values.get("AZURE_API_KEY")
+            if key is None:
+                key = values.get("OPENROUTER_API_KEY")
+                if key and endpoint is None:
+                    endpoint = "https://openrouter.ai/api/v1/chat/completions"
+
+        if endpoint or key and provider not in config.llm:
+            config.llm[provider] = ProviderConfig()
+
+        if endpoint:
+            endpoint = endpoint.replace("chat/completions", "")
+            config.llm[provider].base_url = endpoint
+        if key:
+            config.llm[provider].api_key = key
+
+    provider = "openai"
+    model = values.get("MODEL_NAME", "gpt-4-turbo")
+    if "/" in model:
+        provider, model = model.split("/", 1)
+
+    try:
+        agent_provider = LLMProvider(provider.upper())
+    except ValueError:
+        agent_provider = LLMProvider.OPENAI
+
+    config.agent["default"].model = model
+    config.agent["default"].provider = agent_provider
+
+    ignore_paths = [p for p in values.get("IGNORE_PATHS", "").split(",") if p]
+    if ignore_paths:
+        config.fs.ignore_paths += ignore_paths
+    return config
diff --git a/core/config/user_settings.py b/core/config/user_settings.py
new file mode 100644
index 00000000..fb53672b
--- /dev/null
+++ b/core/config/user_settings.py
@@ -0,0 +1,94 @@
+import sys
+from os import getenv, makedirs
+from pathlib import Path
+from uuid import uuid4
+
+from pydantic import BaseModel, Field, PrivateAttr
+
+from core.log import get_logger
+
+log = get_logger(__name__)
+
+
+SETTINGS_APP_NAME = "GPT Pilot"
+DEFAULT_TELEMETRY_ENDPOINT = "https://api.pythagora.io/telemetry"
+
+
+class TelemetrySettings(BaseModel):
+    id: str = Field(default_factory=lambda: uuid4().hex, description="Unique telemetry ID")
+    enabled: bool = Field(True, description="Whether telemetry should send stats to the server")
+    endpoint: str = Field(DEFAULT_TELEMETRY_ENDPOINT, description="Telemetry server endpoint")
+
+
+def resolve_config_dir() -> Path:
+    """
+    Figure out where to store the global config file(s).
+
+    :return: path to the desired location config directory
+
+    See the UserSettings docstring for details on how the config directory is
+    determined.
+    """
+    posix_app_name = SETTINGS_APP_NAME.replace(" ", "-").lower()
+
+    xdg_config_home = getenv("XDG_CONFIG_HOME")
+    if xdg_config_home:
+        return Path(xdg_config_home) / Path(posix_app_name)
+
+    if sys.platform == "win32" and getenv("APPDATA"):
+        return Path(getenv("APPDATA")) / Path(SETTINGS_APP_NAME)
+
+    return Path("~").expanduser() / Path(f".{posix_app_name}")
+
+
+class UserSettings(BaseModel):
+    """
+    This object holds all the global user settings, that are applicable for
+    all Pythagora/GPT-Pilot installations.
+
+    The use settings are stored in a JSON file in the config directory.
+
+    The config directory is determined by the following rules:
+    * If the XDG_CONFIG_HOME environment variable is set (desktop Linux), use that.
+    * If the APPDATA environment variable is set (Windows), use that.
+    * Otherwise, use the POSIX default ~/.<app-name> (MacOS, server Linux).
+
+    This is a singleton object, use it by importing the instance directly
+    from the module:
+
+    >>> from config.user_settings import settings
+    >>> print(settings.telemetry.id)
+    >>> print(settings.config_path)
+    """
+
+    telemetry: TelemetrySettings = TelemetrySettings()
+    _config_path: str = PrivateAttr("")
+
+    @staticmethod
+    def load():
+        config_path = resolve_config_dir() / "config.json"
+
+        if not config_path.exists():
+            default = UserSettings()
+            default._config_path = config_path
+            default.save()
+
+        with open(config_path, "r", encoding="utf-8") as fp:
+            settings = UserSettings.model_validate_json(fp.read())
+        settings._config_path = str(config_path)
+        return settings
+
+    def save(self):
+        makedirs(Path(self._config_path).parent, exist_ok=True)
+        with open(self._config_path, "w", encoding="utf-8") as fp:
+            fp.write(self.model_dump_json(indent=2))
+
+    @property
+    def config_path(self):
+        return self._config_path
+
+
+settings = UserSettings.load()
+
+
+__all__ = ["settings"]
diff --git a/core/config/version.py b/core/config/version.py
new file mode 100644
index 00000000..ee9feb3a
--- /dev/null
+++ b/core/config/version.py
@@ -0,0 +1,86 @@
+import re
+from os.path import abspath, basename, dirname, isdir, isfile, join
+from typing import Optional
+
+GIT_DIR_PATH = abspath(join(dirname(__file__), "..", "..", ".git"))
+
+
+def get_git_commit() -> Optional[str]:
+    """
+    Return the current git commit (if running from a repo).
+
+    :return: commit hash or None if not running from a git repo
+    """
+
+    if not isdir(GIT_DIR_PATH):
+        return None
+
+    git_head = join(GIT_DIR_PATH, "HEAD")
+    if not isfile(git_head):
+        return None
+
+    with open(git_head, "r", encoding="utf-8") as f:
+        ref = f.read().strip()
+
+    # Direct reference to commit hash
+    if not ref.startswith("ref: "):
+        return ref
+
+    # Follow the reference
+    ref = ref[5:]
+    ref_path = join(GIT_DIR_PATH, ref)
+
+    # Dangling reference,  return the reference name
+    if not isfile(ref_path):
+        return basename(ref_path)
+
+    # Return the reference commit hash
+    with open(ref_path, "r", encoding="utf-8") as f:
+        return f.read().strip()
+
+
+def get_package_version() -> str:
+    """
+    Get package version as defined pyproject.toml.
+
+    If not found, returns "0.0.0."
+
+    :return: package version as defined in pyproject.toml
+    """
+    UNKNOWN = "0.0.0"
+    PYPOETRY_VERSION_PATTERN = re.compile(r'^\s*version\s*=\s*"(.*)"\s*(#.*)?$')
+
+    pyproject_path = join(dirname(__file__), "..", "..", "pyproject.toml")
+    if not isfile(pyproject_path):
+        return UNKNOWN
+
+    with open(pyproject_path, "r", encoding="utf-8") as fp:
+        for line in fp:
+            m = PYPOETRY_VERSION_PATTERN.match(line)
+            if m:
+                return m.group(1)
+
+    return UNKNOWN
+
+
+def get_version() -> str:
+    """
+    Find and return the current version of Pythagora Core.
+
+    The version string is built from the package version and the current
+    git commit hash (if running from a git repo).
+
+    Example: 0.0.0-gitbf01c19
+
+    :return: version string
+    """
+
+    version = get_package_version()
+    commit = get_git_commit()
+    if commit:
+        version = version + "-git" + commit[:7]
+
+    return version
+
+
+__all__ = ["get_version"]
diff --git a/core/db/__init__.py b/core/db/__init__.py
new file mode 100644
index 00000000..e69de29b
diff --git a/core/db/alembic.ini b/core/db/alembic.ini
new file mode 100644
index 00000000..bb4d8250
--- /dev/null
+++ b/core/db/alembic.ini
@@ -0,0 +1,116 @@
+# A generic, single database configuration.
+
+[alembic]
+# path to migration scripts
+script_location = core/db/migrations
+
+# template used to generate migration file names; The default value is %%(rev)s_%%(slug)s
+# Uncomment the line below if you want the files to be prepended with date and time
+# see https://alembic.sqlalchemy.org/en/latest/tutorial.html#editing-the-ini-file
+# for all available tokens
+# file_template = %%(year)d_%%(month).2d_%%(day).2d_%%(hour).2d%%(minute).2d-%%(rev)s_%%(slug)s
+
+# sys.path path, will be prepended to sys.path if present.
+# defaults to the current working directory.
+prepend_sys_path = .
+
+# timezone to use when rendering the date within the migration file
+# as well as the filename.
+# If specified, requires the python>=3.9 or backports.zoneinfo library.
+# Any required deps can installed by adding `alembic[tz]` to the pip requirements
+# string value is passed to ZoneInfo()
+# leave blank for localtime
+# timezone =
+
+# max length of characters to apply to the
+# "slug" field
+# truncate_slug_length = 40
+
+# set to 'true' to run the environment during
+# the 'revision' command, regardless of autogenerate
+# revision_environment = false
+
+# set to 'true' to allow .pyc and .pyo files without
+# a source .py file to be detected as revisions in the
+# versions/ directory
+# sourceless = false
+
+# version location specification; This defaults
+# to migrations/versions.  When using multiple version
+# directories, initial revisions must be specified with --version-path.
+# The path separator used here should be the separator specified by "version_path_separator" below.
+version_locations = core/db/migrations/versions
+
+# version path separator; As mentioned above, this is the character used to split
+# version_locations. The default within new alembic.ini files is "os", which uses os.pathsep.
+# If this key is omitted entirely, it falls back to the legacy behavior of splitting on spaces and/or commas.
+# Valid values for version_path_separator are:
+#
+# version_path_separator = :
+# version_path_separator = ;
+# version_path_separator = space
+# Use os.pathsep. Default configuration used for new projects.
+version_path_separator = os
+
+# set to 'true' to search source files recursively
+# in each "version_locations" directory
+# new in Alembic version 1.10
+# recursive_version_locations = false
+
+# the output encoding used when revision files
+# are written from script.py.mako
+# output_encoding = utf-8
+
+sqlalchemy.url = sqlite:///pythagora.db
+
+[post_write_hooks]
+# post_write_hooks defines scripts or Python functions that are run
+# on newly generated revision scripts.  See the documentation for further
+# detail and examples
+
+# format using "black" - use the console_scripts runner, against the "black" entrypoint
+# hooks = black
+# black.type = console_scripts
+# black.entrypoint = black
+# black.options = -l 79 REVISION_SCRIPT_FILENAME
+
+# lint with attempts to fix using "ruff" - use the exec runner, execute a binary
+hooks = ruff
+ruff.type = exec
+ruff.executable = ruff
+ruff.options = check --fix REVISION_SCRIPT_FILENAME
+
+# Logging configuration
+[loggers]
+keys = root,sqlalchemy,alembic
+
+[handlers]
+keys = console
+
+[formatters]
+keys = generic
+
+[logger_root]
+level = WARN
+handlers = console
+qualname =
+
+[logger_sqlalchemy]
+level = WARN
+handlers =
+qualname = sqlalchemy.engine
+
+[logger_alembic]
+level = INFO
+handlers =
+qualname = alembic
+
+[handler_console]
+class = StreamHandler
+args = (sys.stderr,)
+level = NOTSET
+formatter = generic
+
+[formatter_generic]
+format = %(levelname)-5.5s [%(name)s] %(message)s
+datefmt = %H:%M:%S
diff --git a/core/db/migrations/README b/core/db/migrations/README
new file mode 100644
index 00000000..98e4f9c4
--- /dev/null
+++ b/core/db/migrations/README
@@ -0,0 +1 @@
+Generic single-database configuration.
\ No newline at end of file
diff --git a/core/db/migrations/env.py b/core/db/migrations/env.py
new file mode 100644
index 00000000..aea84ff5
--- /dev/null
+++ b/core/db/migrations/env.py
@@ -0,0 +1,83 @@
+from logging.config import fileConfig
+
+from alembic import context
+from sqlalchemy import engine_from_config, pool
+
+from core.db.models import Base
+
+# this is the Alembic Config object, which provides
+# access to the values within the .ini file in use.
+config = context.config
+
+# Interpret the config file for Python logging.
+# This line sets up loggers basically.
+if config.config_file_name is not None and not config.get_main_option("pythagora_runtime"):
+    fileConfig(config.config_file_name)
+
+# Set database URL from environment
+# config.set_main_option("sqlalchemy.url", getenv("DATABASE_URL"))
+
+# add your model's MetaData object here
+# for 'autogenerate' support
+target_metadata = Base.metadata
+
+# other values from the config, defined by the needs of env.py,
+# can be acquired:
+# my_important_option = config.get_main_option("my_important_option")
+# ... etc.
+
+
+def run_migrations_offline() -> None:
+    """Run migrations in 'offline' mode.
+
+    This configures the context with just a URL
+    and not an Engine, though an Engine is acceptable
+    here as well.  By skipping the Engine creation
+    we don't even need a DBAPI to be available.
+
+    Calls to context.execute() here emit the given string to the
+    script output.
+
+    """
+    url = config.get_main_option("sqlalchemy.url")
+    context.configure(
+        url=url,
+        target_metadata=target_metadata,
+        literal_binds=True,
+        dialect_opts={"paramstyle": "named"},
+        render_as_batch="sqlite://" in url,
+    )
+
+    with context.begin_transaction():
+        context.run_migrations()
+
+
+def run_migrations_online() -> None:
+    """Run migrations in 'online' mode.
+
+    In this scenario we need to create an Engine
+    and associate a connection with the context.
+
+    """
+    url = config.get_main_option("sqlalchemy.url")
+    connectable = engine_from_config(
+        config.get_section(config.config_ini_section, {}),
+        prefix="sqlalchemy.",
+        poolclass=pool.NullPool,
+    )
+
+    with connectable.connect() as connection:
+        context.configure(
+            connection=connection,
+            target_metadata=target_metadata,
+            render_as_batch="sqlite://" in url,
+        )
+
+        with context.begin_transaction():
+            context.run_migrations()
+
+
+if context.is_offline_mode():
+    run_migrations_offline()
+else:
+    run_migrations_online()
diff --git a/core/db/migrations/script.py.mako b/core/db/migrations/script.py.mako
new file mode 100644
index 00000000..fbc4b07d
--- /dev/null
+++ b/core/db/migrations/script.py.mako
@@ -0,0 +1,26 @@
+"""${message}
+
+Revision ID: ${up_revision}
+Revises: ${down_revision | comma,n}
+Create Date: ${create_date}
+
+"""
+from typing import Sequence, Union
+
+from alembic import op
+import sqlalchemy as sa
+${imports if imports else ""}
+
+# revision identifiers, used by Alembic.
+revision: str = ${repr(up_revision)}
+down_revision: Union[str, None] = ${repr(down_revision)}
+branch_labels: Union[str, Sequence[str], None] = ${repr(branch_labels)}
+depends_on: Union[str, Sequence[str], None] = ${repr(depends_on)}
+
+
+def upgrade() -> None:
+    ${upgrades if upgrades else "pass"}
+
+
+def downgrade() -> None:
+    ${downgrades if downgrades else "pass"}
diff --git a/core/db/migrations/versions/4f79e6952354_added_complexity_to_specification.py b/core/db/migrations/versions/4f79e6952354_added_complexity_to_specification.py
new file mode 100644
index 00000000..c089eb9d
--- /dev/null
+++ b/core/db/migrations/versions/4f79e6952354_added_complexity_to_specification.py
@@ -0,0 +1,34 @@
+"""added complexity to specification
+
+Revision ID: 4f79e6952354
+Revises: 5b04ea6afce5
+Create Date: 2024-05-16 18:01:49.024811
+
+"""
+
+from typing import Sequence, Union
+
+import sqlalchemy as sa
+from alembic import op
+
+# revision identifiers, used by Alembic.
+revision: str = "4f79e6952354"
+down_revision: Union[str, None] = "5b04ea6afce5"
+branch_labels: Union[str, Sequence[str], None] = None
+depends_on: Union[str, Sequence[str], None] = None
+
+
+def upgrade() -> None:
+    # ### commands auto generated by Alembic - please adjust! ###
+    with op.batch_alter_table("specifications", schema=None) as batch_op:
+        batch_op.add_column(sa.Column("complexity", sa.String(), server_default="hard", nullable=False))
+
+    # ### end Alembic commands ###
+
+
+def downgrade() -> None:
+    # ### commands auto generated by Alembic - please adjust! ###
+    with op.batch_alter_table("specifications", schema=None) as batch_op:
+        batch_op.drop_column("complexity")
+
+    # ### end Alembic commands ###
diff --git a/core/db/migrations/versions/5b04ea6afce5_add_agent_info_to_llm_request_log.py b/core/db/migrations/versions/5b04ea6afce5_add_agent_info_to_llm_request_log.py
new file mode 100644
index 00000000..3ca14643
--- /dev/null
+++ b/core/db/migrations/versions/5b04ea6afce5_add_agent_info_to_llm_request_log.py
@@ -0,0 +1,34 @@
+"""add agent info to llm request log
+
+Revision ID: 5b04ea6afce5
+Revises: fd206d3095d0
+Create Date: 2024-05-12 11:07:40.271217
+
+"""
+
+from typing import Sequence, Union
+
+import sqlalchemy as sa
+from alembic import op
+
+# revision identifiers, used by Alembic.
+revision: str = "5b04ea6afce5"
+down_revision: Union[str, None] = "fd206d3095d0"
+branch_labels: Union[str, Sequence[str], None] = None
+depends_on: Union[str, Sequence[str], None] = None
+
+
+def upgrade() -> None:
+    # ### commands auto generated by Alembic - please adjust! ###
+    with op.batch_alter_table("llm_requests", schema=None) as batch_op:
+        batch_op.add_column(sa.Column("agent", sa.String(), nullable=True))
+
+    # ### end Alembic commands ###
+
+
+def downgrade() -> None:
+    # ### commands auto generated by Alembic - please adjust! ###
+    with op.batch_alter_table("llm_requests", schema=None) as batch_op:
+        batch_op.drop_column("agent")
+
+    # ### end Alembic commands ###
diff --git a/core/db/migrations/versions/e7b54beadf8f_initial.py b/core/db/migrations/versions/e7b54beadf8f_initial.py
new file mode 100644
index 00000000..1d9db050
--- /dev/null
+++ b/core/db/migrations/versions/e7b54beadf8f_initial.py
@@ -0,0 +1,120 @@
+"""initial
+
+Revision ID: e7b54beadf8f
+Revises:
+Create Date: 2024-05-06 09:38:05.391674
+
+"""
+
+from typing import Sequence, Union
+
+import sqlalchemy as sa
+from alembic import op
+
+# revision identifiers, used by Alembic.
+revision: str = "e7b54beadf8f"
+down_revision: Union[str, None] = None
+branch_labels: Union[str, Sequence[str], None] = None
+depends_on: Union[str, Sequence[str], None] = None
+
+
+def upgrade() -> None:
+    # ### commands auto generated by Alembic - please adjust! ###
+    op.create_table(
+        "file_contents",
+        sa.Column("id", sa.String(), nullable=False),
+        sa.Column("content", sa.String(), nullable=False),
+        sa.PrimaryKeyConstraint("id", name=op.f("pk_file_contents")),
+    )
+    op.create_table(
+        "projects",
+        sa.Column("id", sa.Uuid(), nullable=False),
+        sa.Column("name", sa.String(), nullable=False),
+        sa.Column("created_at", sa.DateTime(), server_default=sa.text("(CURRENT_TIMESTAMP)"), nullable=False),
+        sa.Column("folder_name", sa.String(), nullable=False),
+        sa.PrimaryKeyConstraint("id", name=op.f("pk_projects")),
+    )
+    op.create_table(
+        "specifications",
+        sa.Column("id", sa.Integer(), autoincrement=True, nullable=False),
+        sa.Column("description", sa.String(), nullable=False),
+        sa.Column("architecture", sa.String(), nullable=False),
+        sa.Column("system_dependencies", sa.JSON(), nullable=False),
+        sa.Column("package_dependencies", sa.JSON(), nullable=False),
+        sa.Column("template", sa.String(), nullable=True),
+        sa.PrimaryKeyConstraint("id", name=op.f("pk_specifications")),
+    )
+    op.create_table(
+        "branches",
+        sa.Column("id", sa.Uuid(), nullable=False),
+        sa.Column("project_id", sa.Uuid(), nullable=False),
+        sa.Column("created_at", sa.DateTime(), server_default=sa.text("(CURRENT_TIMESTAMP)"), nullable=False),
+        sa.Column("name", sa.String(), nullable=False),
+        sa.ForeignKeyConstraint(
+            ["project_id"], ["projects.id"], name=op.f("fk_branches_project_id_projects"), ondelete="CASCADE"
+        ),
+        sa.PrimaryKeyConstraint("id", name=op.f("pk_branches")),
+    )
+    op.create_table(
+        "project_states",
+        sa.Column("id", sa.Uuid(), nullable=False),
+        sa.Column("branch_id", sa.Uuid(), nullable=False),
+        sa.Column("prev_state_id", sa.Uuid(), nullable=True),
+        sa.Column("specification_id", sa.Integer(), nullable=False),
+        sa.Column("created_at", sa.DateTime(), server_default=sa.text("(CURRENT_TIMESTAMP)"), nullable=False),
+        sa.Column("step_index", sa.Integer(), server_default="1", nullable=False),
+        sa.Column("epics", sa.JSON(), nullable=False),
+        sa.Column("tasks", sa.JSON(), nullable=False),
+        sa.Column("steps", sa.JSON(), nullable=False),
+        sa.Column("iterations", sa.JSON(), nullable=False),
+        sa.Column("relevant_files", sa.JSON(), nullable=False),
+        sa.Column("modified_files", sa.JSON(), nullable=False),
+        sa.Column("run_command", sa.String(), nullable=True),
+        sa.ForeignKeyConstraint(
+            ["branch_id"], ["branches.id"], name=op.f("fk_project_states_branch_id_branches"), ondelete="CASCADE"
+        ),
+        sa.ForeignKeyConstraint(
+            ["prev_state_id"],
+            ["project_states.id"],
+            name=op.f("fk_project_states_prev_state_id_project_states"),
+            ondelete="CASCADE",
+        ),
+        sa.ForeignKeyConstraint(
+            ["specification_id"], ["specifications.id"], name=op.f("fk_project_states_specification_id_specifications")
+        ),
+        sa.PrimaryKeyConstraint("id", name=op.f("pk_project_states")),
+        sa.UniqueConstraint("branch_id", "step_index", name=op.f("uq_project_states_branch_id")),
+        sa.UniqueConstraint("prev_state_id", name=op.f("uq_project_states_prev_state_id")),
+        sqlite_autoincrement=True,
+    )
+    op.create_table(
+        "files",
+        sa.Column("id", sa.Integer(), nullable=False),
+        sa.Column("project_state_id", sa.Uuid(), nullable=False),
+        sa.Column("content_id", sa.String(), nullable=False),
+        sa.Column("path", sa.String(), nullable=False),
+        sa.Column("meta", sa.JSON(), server_default="{}", nullable=False),
+        sa.ForeignKeyConstraint(
+            ["content_id"], ["file_contents.id"], name=op.f("fk_files_content_id_file_contents"), ondelete="RESTRICT"
+        ),
+        sa.ForeignKeyConstraint(
+            ["project_state_id"],
+            ["project_states.id"],
+            name=op.f("fk_files_project_state_id_project_states"),
+            ondelete="CASCADE",
+        ),
+        sa.PrimaryKeyConstraint("id", name=op.f("pk_files")),
+        sa.UniqueConstraint("project_state_id", "path", name=op.f("uq_files_project_state_id")),
+    )
+    # ### end Alembic commands ###
+
+
+def downgrade() -> None:
+    # ### commands auto generated by Alembic - please adjust! ###
+    op.drop_table("files")
+    op.drop_table("project_states")
+    op.drop_table("branches")
+    op.drop_table("specifications")
+    op.drop_table("projects")
+    op.drop_table("file_contents")
+    # ### end Alembic commands ###
diff --git a/core/db/migrations/versions/fd206d3095d0_store_request_input_exec_logs_to_db.py b/core/db/migrations/versions/fd206d3095d0_store_request_input_exec_logs_to_db.py
new file mode 100644
index 00000000..3c70df90
--- /dev/null
+++ b/core/db/migrations/versions/fd206d3095d0_store_request_input_exec_logs_to_db.py
@@ -0,0 +1,106 @@
+"""store request input exec logs to db
+
+Revision ID: fd206d3095d0
+Revises: e7b54beadf8f
+Create Date: 2024-05-09 08:25:10.698607
+
+"""
+
+from typing import Sequence, Union
+
+import sqlalchemy as sa
+from alembic import op
+
+# revision identifiers, used by Alembic.
+revision: str = "fd206d3095d0"
+down_revision: Union[str, None] = "e7b54beadf8f"
+branch_labels: Union[str, Sequence[str], None] = None
+depends_on: Union[str, Sequence[str], None] = None
+
+
+def upgrade() -> None:
+    # ### commands auto generated by Alembic - please adjust! ###
+    op.create_table(
+        "exec_logs",
+        sa.Column("id", sa.Integer(), autoincrement=True, nullable=False),
+        sa.Column("branch_id", sa.Uuid(), nullable=False),
+        sa.Column("project_state_id", sa.Uuid(), nullable=True),
+        sa.Column("started_at", sa.DateTime(), nullable=False),
+        sa.Column("duration", sa.Float(), nullable=False),
+        sa.Column("cmd", sa.String(), nullable=False),
+        sa.Column("cwd", sa.String(), nullable=False),
+        sa.Column("env", sa.JSON(), nullable=False),
+        sa.Column("timeout", sa.Float(), nullable=True),
+        sa.Column("status_code", sa.Integer(), nullable=True),
+        sa.Column("stdout", sa.String(), nullable=False),
+        sa.Column("stderr", sa.String(), nullable=False),
+        sa.Column("analysis", sa.String(), nullable=False),
+        sa.Column("success", sa.Boolean(), nullable=False),
+        sa.ForeignKeyConstraint(
+            ["branch_id"], ["branches.id"], name=op.f("fk_exec_logs_branch_id_branches"), ondelete="CASCADE"
+        ),
+        sa.ForeignKeyConstraint(
+            ["project_state_id"],
+            ["project_states.id"],
+            name=op.f("fk_exec_logs_project_state_id_project_states"),
+            ondelete="SET NULL",
+        ),
+        sa.PrimaryKeyConstraint("id", name=op.f("pk_exec_logs")),
+    )
+    op.create_table(
+        "llm_requests",
+        sa.Column("id", sa.Integer(), autoincrement=True, nullable=False),
+        sa.Column("branch_id", sa.Uuid(), nullable=False),
+        sa.Column("project_state_id", sa.Uuid(), nullable=True),
+        sa.Column("started_at", sa.DateTime(), server_default=sa.text("(CURRENT_TIMESTAMP)"), nullable=False),
+        sa.Column("provider", sa.String(), nullable=False),
+        sa.Column("model", sa.String(), nullable=False),
+        sa.Column("temperature", sa.Float(), nullable=False),
+        sa.Column("messages", sa.JSON(), nullable=False),
+        sa.Column("response", sa.String(), nullable=True),
+        sa.Column("prompt_tokens", sa.Integer(), nullable=False),
+        sa.Column("completion_tokens", sa.Integer(), nullable=False),
+        sa.Column("duration", sa.Float(), nullable=False),
+        sa.Column("status", sa.String(), nullable=False),
+        sa.Column("error", sa.String(), nullable=True),
+        sa.ForeignKeyConstraint(
+            ["branch_id"], ["branches.id"], name=op.f("fk_llm_requests_branch_id_branches"), ondelete="CASCADE"
+        ),
+        sa.ForeignKeyConstraint(
+            ["project_state_id"],
+            ["project_states.id"],
+            name=op.f("fk_llm_requests_project_state_id_project_states"),
+            ondelete="SET NULL",
+        ),
+        sa.PrimaryKeyConstraint("id", name=op.f("pk_llm_requests")),
+    )
+    op.create_table(
+        "user_inputs",
+        sa.Column("id", sa.Integer(), autoincrement=True, nullable=False),
+        sa.Column("branch_id", sa.Uuid(), nullable=False),
+        sa.Column("project_state_id", sa.Uuid(), nullable=True),
+        sa.Column("created_at", sa.DateTime(), server_default=sa.text("(CURRENT_TIMESTAMP)"), nullable=False),
+        sa.Column("question", sa.String(), nullable=False),
+        sa.Column("answer_text", sa.String(), nullable=True),
+        sa.Column("answer_button", sa.String(), nullable=True),
+        sa.Column("cancelled", sa.Boolean(), nullable=False),
+        sa.ForeignKeyConstraint(
+            ["branch_id"], ["branches.id"], name=op.f("fk_user_inputs_branch_id_branches"), ondelete="CASCADE"
+        ),
+        sa.ForeignKeyConstraint(
+            ["project_state_id"],
+            ["project_states.id"],
+            name=op.f("fk_user_inputs_project_state_id_project_states"),
+            ondelete="SET NULL",
+        ),
+        sa.PrimaryKeyConstraint("id", name=op.f("pk_user_inputs")),
+    )
+    # ### end Alembic commands ###
+
+
+def downgrade() -> None:
+    # ### commands auto generated by Alembic - please adjust! ###
+    op.drop_table("user_inputs")
+    op.drop_table("llm_requests")
+    op.drop_table("exec_logs")
+    # ### end Alembic commands ###
diff --git a/core/db/models/__init__.py b/core/db/models/__init__.py
new file mode 100644
index 00000000..1b81fb88
--- /dev/null
+++ b/core/db/models/__init__.py
@@ -0,0 +1,29 @@
+# Pythagora database models
+#
+# Always import models from this module to ensure the SQLAlchemy registry
+# is correctly populated.
+
+from .base import Base
+from .branch import Branch
+from .exec_log import ExecLog
+from .file import File
+from .file_content import FileContent
+from .llm_request import LLMRequest
+from .project import Project
+from .project_state import ProjectState
+from .specification import Complexity, Specification
+from .user_input import UserInput
+
+__all__ = [
+    "Base",
+    "Branch",
+    "Complexity",
+    "ExecLog",
+    "File",
+    "FileContent",
+    "LLMRequest",
+    "Project",
+    "ProjectState",
+    "Specification",
+    "UserInput",
+]
diff --git a/core/db/models/base.py b/core/db/models/base.py
new file mode 100644
index 00000000..238fd4df
--- /dev/null
+++ b/core/db/models/base.py
@@ -0,0 +1,45 @@
+# DeclarativeBase enables declarative configuration of
+# database models within SQLAlchemy.
+#
+# It also sets up a registry for the classes that inherit from it,
+# so that SQLAlechemy understands how they map to database tables.
+
+from sqlalchemy import MetaData
+from sqlalchemy.ext.asyncio import AsyncAttrs
+from sqlalchemy.orm import DeclarativeBase
+from sqlalchemy.types import JSON
+
+
+class Base(AsyncAttrs, DeclarativeBase):
+    """Base class for all SQL database models."""
+
+    # Mapping of Python types to SQLAlchemy types.
+    type_annotation_map = {
+        list[dict]: JSON,
+        list[str]: JSON,
+        dict: JSON,
+    }
+
+    metadata = MetaData(
+        # Naming conventions for constraints, foreign keys, etc.
+        naming_convention={
+            "ix": "ix_%(column_0_label)s",
+            "uq": "uq_%(table_name)s_%(column_0_name)s",
+            "ck": "ck_%(table_name)s_`%(constraint_name)s`",
+            "fk": "fk_%(table_name)s_%(column_0_name)s_%(referred_table_name)s",
+            "pk": "pk_%(table_name)s",
+        }
+    )
+
+    def __eq__(self, other) -> bool:
+        """
+        Two instances of the same model class are the same if their
+        IDs are the same.
+
+        This allows comparison of models bound to different sessions.
+        """
+        return isinstance(other, self.__class__) and self.id == other.id
+
+    def __repr__(self) -> str:
+        """Return a string representation of the model."""
+        return f"<{self.__class__.__name__}(id={self.id})>"
diff --git a/core/db/models/branch.py b/core/db/models/branch.py
new file mode 100644
index 00000000..76ab4c95
--- /dev/null
+++ b/core/db/models/branch.py
@@ -0,0 +1,89 @@
+from datetime import datetime
+from typing import TYPE_CHECKING, Optional, Union
+from uuid import UUID, uuid4
+
+from sqlalchemy import ForeignKey, inspect, select
+from sqlalchemy.orm import Mapped, mapped_column, relationship
+from sqlalchemy.sql import func
+
+from core.db.models import Base
+
+if TYPE_CHECKING:
+    from sqlalchemy.ext.asyncio import AsyncSession
+
+    from core.db.models import ExecLog, LLMRequest, Project, ProjectState, UserInput
+
+
+class Branch(Base):
+    __tablename__ = "branches"
+
+    DEFAULT = "main"
+
+    # ID and parent FKs
+    id: Mapped[UUID] = mapped_column(primary_key=True, default=uuid4)
+    project_id: Mapped[UUID] = mapped_column(ForeignKey("projects.id", ondelete="CASCADE"))
+
+    # Attributes
+    created_at: Mapped[datetime] = mapped_column(server_default=func.now())
+    name: Mapped[str] = mapped_column(default=DEFAULT)
+
+    # Relationships
+    project: Mapped["Project"] = relationship(back_populates="branches", lazy="selectin")
+    states: Mapped[list["ProjectState"]] = relationship(back_populates="branch", cascade="all")
+    llm_requests: Mapped[list["LLMRequest"]] = relationship(back_populates="branch", cascade="all")
+    user_inputs: Mapped[list["UserInput"]] = relationship(back_populates="branch", cascade="all")
+    exec_logs: Mapped[list["ExecLog"]] = relationship(back_populates="branch", cascade="all")
+
+    @staticmethod
+    async def get_by_id(session: "AsyncSession", branch_id: Union[str, UUID]) -> Optional["Branch"]:
+        """
+        Get a project by ID.
+
+        :param session: The SQLAlchemy session.
+        :param project_id: The branch ID (as str or UUID value).
+        :return: The Branch object if found, None otherwise.
+        """
+        if not isinstance(branch_id, UUID):
+            branch_id = UUID(branch_id)
+
+        result = await session.execute(select(Branch).where(Branch.id == branch_id))
+        return result.scalar_one_or_none()
+
+    async def get_last_state(self) -> Optional["ProjectState"]:
+        """
+        Get the last project state of the branch.
+
+        :return: The last step of the branch, or None if there are no steps.
+        """
+
+        from core.db.models import ProjectState
+
+        session = inspect(self).async_session
+        if session is None:
+            raise ValueError("Branch instance not associated with a DB session.")
+
+        result = await session.execute(
+            select(ProjectState)
+            .where(ProjectState.branch_id == self.id)
+            .order_by(ProjectState.step_index.desc())
+            .limit(1)
+        )
+        return result.scalar_one_or_none()
+
+    async def get_state_at_step(self, step_index: int) -> Optional["ProjectState"]:
+        """
+        Get the project state at the given step index for the branch.
+
+        :return: The indicated step within the branch, or None if there's no such step.
+        """
+
+        from core.db.models import ProjectState
+
+        session = inspect(self).async_session
+        if session is None:
+            raise ValueError("Branch instance not associated with a DB session.")
+
+        result = await session.execute(
+            select(ProjectState).where((ProjectState.branch_id == self.id) & (ProjectState.step_index == step_index))
+        )
+        return result.scalar_one_or_none()
diff --git a/core/db/models/exec_log.py b/core/db/models/exec_log.py
new file mode 100644
index 00000000..b24907de
--- /dev/null
+++ b/core/db/models/exec_log.py
@@ -0,0 +1,71 @@
+from datetime import datetime
+from typing import TYPE_CHECKING, Optional
+from uuid import UUID
+
+from sqlalchemy import ForeignKey, inspect
+from sqlalchemy.orm import Mapped, mapped_column, relationship
+
+from core.db.models import Base
+from core.proc.exec_log import ExecLog as ExecLogData
+
+if TYPE_CHECKING:
+    from core.db.models import Branch, ProjectState
+
+
+class ExecLog(Base):
+    __tablename__ = "exec_logs"
+
+    # ID and parent FKs
+    id: Mapped[int] = mapped_column(primary_key=True, autoincrement=True)
+    branch_id: Mapped[UUID] = mapped_column(ForeignKey("branches.id", ondelete="CASCADE"))
+    project_state_id: Mapped[Optional[UUID]] = mapped_column(ForeignKey("project_states.id", ondelete="SET NULL"))
+
+    # Attributes
+    started_at: Mapped[datetime] = mapped_column()
+    duration: Mapped[float] = mapped_column()
+    cmd: Mapped[str] = mapped_column()
+    cwd: Mapped[str] = mapped_column()
+    env: Mapped[dict] = mapped_column()
+    timeout: Mapped[Optional[float]] = mapped_column()
+    status_code: Mapped[Optional[int]] = mapped_column()
+    stdout: Mapped[str] = mapped_column()
+    stderr: Mapped[str] = mapped_column()
+    analysis: Mapped[str] = mapped_column()
+    success: Mapped[bool] = mapped_column()
+
+    # Relationships
+    branch: Mapped["Branch"] = relationship(back_populates="exec_logs")
+    project_state: Mapped["ProjectState"] = relationship(back_populates="exec_logs")
+
+    @classmethod
+    def from_exec_log(cls, project_state: "ProjectState", exec_log: ExecLogData) -> "ExecLog":
+        """
+        Store the user input in the database.
+
+        Note this just creates the UserInput object. It is committed to the
+        database only when the DB session itself is comitted.
+
+        :param project_state: Project state to associate the request log with.
+        :param question: Question the user was asked.
+        :param user_input: User input.
+        :return: Newly created User input in the database.
+        """
+        session = inspect(project_state).async_session
+
+        obj = cls(
+            project_state=project_state,
+            branch=project_state.branch,
+            started_at=exec_log.started_at,
+            duration=exec_log.duration,
+            cmd=exec_log.cmd,
+            cwd=exec_log.cwd,
+            env=exec_log.env,
+            timeout=exec_log.timeout,
+            status_code=exec_log.status_code,
+            stdout=exec_log.stdout,
+            stderr=exec_log.stderr,
+            analysis=exec_log.analysis,
+            success=exec_log.success,
+        )
+        session.add(obj)
+        return obj
diff --git a/core/db/models/file.py b/core/db/models/file.py
new file mode 100644
index 00000000..96d8891e
--- /dev/null
+++ b/core/db/models/file.py
@@ -0,0 +1,43 @@
+from typing import TYPE_CHECKING, Optional
+from uuid import UUID
+
+from sqlalchemy import ForeignKey, UniqueConstraint
+from sqlalchemy.orm import Mapped, mapped_column, relationship
+
+from core.db.models import Base
+
+if TYPE_CHECKING:
+    from core.db.models import FileContent, ProjectState
+
+
+class File(Base):
+    __tablename__ = "files"
+    __table_args__ = (UniqueConstraint("project_state_id", "path"),)
+
+    # ID and parent FKs
+    id: Mapped[int] = mapped_column(primary_key=True)
+    project_state_id: Mapped[UUID] = mapped_column(ForeignKey("project_states.id", ondelete="CASCADE"))
+    content_id: Mapped[str] = mapped_column(ForeignKey("file_contents.id", ondelete="RESTRICT"))
+
+    # Attributes
+    path: Mapped[str] = mapped_column()
+    meta: Mapped[dict] = mapped_column(default=dict, server_default="{}")
+
+    # Relationships
+    project_state: Mapped[Optional["ProjectState"]] = relationship(back_populates="files")
+    content: Mapped["FileContent"] = relationship(back_populates="files", lazy="selectin")
+
+    def clone(self) -> "File":
+        """
+        Clone the file object, to be used in a new project state.
+
+        The clone references the same file content object as the original.
+
+        :return: The cloned file object.
+        """
+        return File(
+            project_state=None,
+            content_id=self.content_id,
+            path=self.path,
+            meta=self.meta,
+        )
diff --git a/core/db/models/file_content.py b/core/db/models/file_content.py
new file mode 100644
index 00000000..f2cc4cc9
--- /dev/null
+++ b/core/db/models/file_content.py
@@ -0,0 +1,47 @@
+from typing import TYPE_CHECKING
+
+from sqlalchemy import select
+from sqlalchemy.ext.asyncio import AsyncSession
+from sqlalchemy.orm import Mapped, mapped_column, relationship
+
+from core.db.models import Base
+
+if TYPE_CHECKING:
+    from core.db.models import File
+
+
+class FileContent(Base):
+    __tablename__ = "file_contents"
+
+    # ID and parent FKs
+    id: Mapped[str] = mapped_column(primary_key=True)
+
+    # Attributes
+    content: Mapped[str] = mapped_column()
+
+    # Relationships
+    files: Mapped[list["File"]] = relationship(back_populates="content")
+
+    @classmethod
+    async def store(cls, session: AsyncSession, hash: str, content: str) -> "FileContent":
+        """
+        Store the file content in the database.
+
+        If the content is already stored, returns the reference to the existing
+        content object. Otherwise stores it to the database and returns the newly
+        created content object.
+
+        :param session: The database session.
+        :param hash: The hash of the file content, used as an unique ID.
+        :param content: The file content as unicode string.
+        :return: The file content object.
+        """
+        result = await session.execute(select(FileContent).where(FileContent.id == hash))
+        fc = result.scalar_one_or_none()
+        if fc is not None:
+            return fc
+
+        fc = cls(id=hash, content=content)
+        session.add(fc)
+
+        return fc
diff --git a/core/db/models/llm_request.py b/core/db/models/llm_request.py
new file mode 100644
index 00000000..e171cfdd
--- /dev/null
+++ b/core/db/models/llm_request.py
@@ -0,0 +1,79 @@
+from datetime import datetime
+from typing import TYPE_CHECKING, Optional
+from uuid import UUID
+
+from sqlalchemy import ForeignKey, inspect
+from sqlalchemy.orm import Mapped, mapped_column, relationship
+from sqlalchemy.sql import func
+
+from core.db.models import Base
+from core.llm.request_log import LLMRequestLog
+
+if TYPE_CHECKING:
+    from core.agents.base import BaseAgent
+    from core.db.models import Branch, ProjectState
+
+
+class LLMRequest(Base):
+    __tablename__ = "llm_requests"
+
+    # ID and parent FKs
+    id: Mapped[int] = mapped_column(primary_key=True, autoincrement=True)
+    branch_id: Mapped[UUID] = mapped_column(ForeignKey("branches.id", ondelete="CASCADE"))
+    project_state_id: Mapped[Optional[UUID]] = mapped_column(ForeignKey("project_states.id", ondelete="SET NULL"))
+
+    # Attributes
+    started_at: Mapped[datetime] = mapped_column(server_default=func.now())
+    agent: Mapped[Optional[str]] = mapped_column()
+    provider: Mapped[str] = mapped_column()
+    model: Mapped[str] = mapped_column()
+    temperature: Mapped[float] = mapped_column()
+    messages: Mapped[list[dict]] = mapped_column()
+    response: Mapped[Optional[str]] = mapped_column()
+    prompt_tokens: Mapped[int] = mapped_column()
+    completion_tokens: Mapped[int] = mapped_column()
+    duration: Mapped[float] = mapped_column()
+    status: Mapped[str] = mapped_column()
+    error: Mapped[Optional[str]] = mapped_column()
+
+    # Relationships
+    branch: Mapped["Branch"] = relationship(back_populates="llm_requests")
+    project_state: Mapped["ProjectState"] = relationship(back_populates="llm_requests")
+
+    @classmethod
+    def from_request_log(
+        cls,
+        project_state: "ProjectState",
+        agent: Optional["BaseAgent"],
+        request_log: LLMRequestLog,
+    ) -> "LLMRequest":
+        """
+        Store the request log in the database.
+
+        Note this just creates the request log object. It is committed to the
+        database only when the DB session itself is comitted.
+
+        :param project_state: Project state to associate the request log with.
+        :param agent: Agent that made the request (if the caller was an agent).
+        :param request_log: Request log.
+        :return: Newly created LLM request log in the database.
+        """
+        session = inspect(project_state).async_session
+
+        obj = cls(
+            project_state=project_state,
+            branch=project_state.branch,
+            agent=agent.agent_type,
+            provider=request_log.provider,
+            model=request_log.model,
+            temperature=request_log.temperature,
+            messages=request_log.messages,
+            response=request_log.response,
+            prompt_tokens=request_log.prompt_tokens,
+            completion_tokens=request_log.completion_tokens,
+            duration=request_log.duration,
+            status=request_log.status,
+            error=request_log.error,
+        )
+        session.add(obj)
+        return obj
diff --git a/core/db/models/project.py b/core/db/models/project.py
new file mode 100644
index 00000000..b093a4fb
--- /dev/null
+++ b/core/db/models/project.py
@@ -0,0 +1,124 @@
+import re
+from datetime import datetime
+from typing import TYPE_CHECKING, Optional, Union
+from unicodedata import normalize
+from uuid import UUID, uuid4
+
+from sqlalchemy import delete, inspect, select
+from sqlalchemy.ext.asyncio import AsyncSession
+from sqlalchemy.orm import Mapped, mapped_column, relationship, selectinload
+from sqlalchemy.sql import func
+
+from core.db.models import Base
+
+if TYPE_CHECKING:
+    from core.db.models import Branch
+
+
+class Project(Base):
+    __tablename__ = "projects"
+
+    # ID and parent FKs
+    id: Mapped[UUID] = mapped_column(primary_key=True, default=uuid4)
+
+    # Attributes
+    name: Mapped[str] = mapped_column()
+    created_at: Mapped[datetime] = mapped_column(server_default=func.now())
+    folder_name: Mapped[str] = mapped_column(
+        default=lambda context: Project.get_folder_from_project_name(context.get_current_parameters()["name"])
+    )
+
+    # Relationships
+    branches: Mapped[list["Branch"]] = relationship(back_populates="project", cascade="all")
+
+    @staticmethod
+    async def get_by_id(session: "AsyncSession", project_id: Union[str, UUID]) -> Optional["Project"]:
+        """
+        Get a project by ID.
+
+        :param session: The SQLAlchemy session.
+        :param project_id: The project ID (as str or UUID value).
+        :return: The Project object if found, None otherwise.
+        """
+        if not isinstance(project_id, UUID):
+            project_id = UUID(project_id)
+
+        result = await session.execute(select(Project).where(Project.id == project_id))
+        return result.scalar_one_or_none()
+
+    async def get_branch(self, name: Optional[str] = None) -> Optional["Branch"]:
+        """
+        Get a project branch by name.
+
+        :param session: The SQLAlchemy session.
+        :param branch_name: The name of the branch (default "main").
+        :return: The Branch object if found, None otherwise.
+        """
+        from core.db.models import Branch
+
+        session = inspect(self).async_session
+        if session is None:
+            raise ValueError("Project instance not associated with a DB session.")
+
+        if name is None:
+            name = Branch.DEFAULT
+
+        result = await session.execute(select(Branch).where(Branch.project_id == self.id, Branch.name == name))
+        return result.scalar_one_or_none()
+
+    @staticmethod
+    async def get_all_projects(session: "AsyncSession") -> list["Project"]:
+        """
+        Get all projects.
+
+        This assumes the projects have at least one branch and one state.
+
+        :param session: The SQLAlchemy session.
+        :return: List of Project objects.
+        """
+        from core.db.models import Branch, ProjectState
+
+        latest_state_query = (
+            select(ProjectState.branch_id, func.max(ProjectState.id).label("max_id"))
+            .group_by(ProjectState.branch_id)
+            .subquery()
+        )
+
+        query = (
+            select(Project, Branch, ProjectState)
+            .join(Branch, Project.branches)
+            .join(ProjectState, Branch.states)
+            .join(latest_state_query, ProjectState.id == latest_state_query.columns.max_id)
+            .options(selectinload(Project.branches), selectinload(Branch.states))
+            .order_by(Project.name, Branch.name)
+        )
+
+        results = await session.execute(query)
+        return results.scalars().all()
+
+    @staticmethod
+    def get_folder_from_project_name(name: str):
+        """
+        Get the folder name from the project name.
+
+        :param name: Project name.
+        :return: Folder name.
+        """
+        # replace unicode with accents with base characters (eg "šašavi" → "sasavi")
+        name = normalize("NFKD", name).encode("ascii", "ignore").decode("utf-8")
+
+        # replace spaces/interpunction with a single dash
+        return re.sub(r"[^a-zA-Z0-9]+", "-", name).lower().strip("-")
+
+    @staticmethod
+    async def delete_by_id(session: "AsyncSession", project_id: UUID) -> int:
+        """
+        Delete a project by ID.
+
+        :param session: The SQLAlchemy session.
+        :param project_id: The project ID
+        :return: Number of rows deleted.
+        """
+
+        result = await session.execute(delete(Project).where(Project.id == project_id))
+        return result.rowcount
diff --git a/core/db/models/project_state.py b/core/db/models/project_state.py
new file mode 100644
index 00000000..c9589b30
--- /dev/null
+++ b/core/db/models/project_state.py
@@ -0,0 +1,338 @@
+from copy import deepcopy
+from datetime import datetime
+from typing import TYPE_CHECKING, Optional
+from uuid import UUID, uuid4
+
+from sqlalchemy import ForeignKey, UniqueConstraint, delete, inspect
+from sqlalchemy.ext.asyncio import AsyncSession
+from sqlalchemy.orm import Mapped, mapped_column, relationship
+from sqlalchemy.orm.attributes import flag_modified
+from sqlalchemy.sql import func
+
+from core.db.models import Base
+from core.log import get_logger
+
+if TYPE_CHECKING:
+    from core.db.models import Branch, ExecLog, File, FileContent, LLMRequest, Specification, UserInput
+
+log = get_logger(__name__)
+
+
+class ProjectState(Base):
+    __tablename__ = "project_states"
+    __table_args__ = (
+        UniqueConstraint("prev_state_id"),
+        UniqueConstraint("branch_id", "step_index"),
+        {"sqlite_autoincrement": True},
+    )
+
+    # ID and parent FKs
+    id: Mapped[UUID] = mapped_column(primary_key=True, default=uuid4)
+    branch_id: Mapped[UUID] = mapped_column(ForeignKey("branches.id", ondelete="CASCADE"))
+    prev_state_id: Mapped[Optional[UUID]] = mapped_column(ForeignKey("project_states.id", ondelete="CASCADE"))
+    specification_id: Mapped[int] = mapped_column(ForeignKey("specifications.id"))
+
+    # Attributes
+    created_at: Mapped[datetime] = mapped_column(server_default=func.now())
+    step_index: Mapped[int] = mapped_column(default=1, server_default="1")
+    epics: Mapped[list[dict]] = mapped_column(default=list)
+    tasks: Mapped[list[dict]] = mapped_column(default=list)
+    steps: Mapped[list[dict]] = mapped_column(default=list)
+    iterations: Mapped[list[dict]] = mapped_column(default=list)
+    relevant_files: Mapped[list[str]] = mapped_column(default=list)
+    modified_files: Mapped[dict] = mapped_column(default=dict)
+    run_command: Mapped[Optional[str]] = mapped_column()
+
+    # Relationships
+    branch: Mapped["Branch"] = relationship(back_populates="states", lazy="selectin")
+    prev_state: Mapped[Optional["ProjectState"]] = relationship(
+        back_populates="next_state",
+        remote_side=[id],
+        single_parent=True,
+    )
+    next_state: Mapped[Optional["ProjectState"]] = relationship(back_populates="prev_state")
+    files: Mapped[list["File"]] = relationship(
+        back_populates="project_state",
+        lazy="selectin",
+        cascade="all,delete-orphan",
+    )
+    specification: Mapped["Specification"] = relationship(back_populates="project_states", lazy="selectin")
+    llm_requests: Mapped[list["LLMRequest"]] = relationship(back_populates="project_state", cascade="all")
+    user_inputs: Mapped[list["UserInput"]] = relationship(back_populates="project_state", cascade="all")
+    exec_logs: Mapped[list["ExecLog"]] = relationship(back_populates="project_state", cascade="all")
+
+    @property
+    def unfinished_steps(self) -> list[dict]:
+        """
+        Get the list of unfinished steps.
+
+        :return: List of unfinished steps.
+        """
+        return [step for step in self.steps if not step.get("completed")]
+
+    @property
+    def current_step(self) -> Optional[dict]:
+        """
+        Get the current step.
+
+        Current step is always the first step that's not finished yet.
+
+        :return: The current step, or None if there are no more unfinished steps.
+        """
+        li = self.unfinished_steps
+        return li[0] if li else None
+
+    @property
+    def unfinished_iterations(self) -> list[dict]:
+        """
+        Get the list of unfinished iterations.
+
+        :return: List of unfinished iterations.
+        """
+        return [iteration for iteration in self.iterations if not iteration.get("completed")]
+
+    @property
+    def current_iteration(self) -> Optional[dict]:
+        """
+        Get the current iteration.
+
+        Current iteration is always the first iteration that's not finished yet.
+
+        :return: The current iteration, or None if there are no unfinished iterations.
+        """
+        li = self.unfinished_iterations
+        return li[0] if li else None
+
+    @property
+    def unfinished_tasks(self) -> list[dict]:
+        """
+        Get the list of unfinished tasks.
+
+        :return: List of unfinished tasks.
+        """
+        return [task for task in self.tasks if not task.get("completed")]
+
+    @property
+    def current_task(self) -> Optional[dict]:
+        """
+        Get the current task.
+
+        Current task is always the first task that's not finished yet.
+
+        :return: The current task, or None if there are no unfinished tasks.
+        """
+        li = self.unfinished_tasks
+        return li[0] if li else None
+
+    @property
+    def unfinished_epics(self) -> list[dict]:
+        """
+        Get the list of unfinished epics.
+
+        :return: List of unfinished epics.
+        """
+        return [epic for epic in self.epics if not epic.get("completed")]
+
+    @property
+    def current_epic(self) -> Optional[dict]:
+        """
+        Get the current epic.
+
+        Current epic is always the first epic that's not finished yet.
+
+        :return: The current epic, or None if there are no unfinished epics.
+        """
+        li = self.unfinished_epics
+        return li[0] if li else None
+
+    @property
+    def relevant_file_objects(self):
+        """
+        Get the relevant files with their content.
+
+        :return: List of tuples with file path and content.
+        """
+        return [file for file in self.files if file.path in self.relevant_files]
+
+    @staticmethod
+    def create_initial_state(branch: "Branch") -> "ProjectState":
+        """
+        Create the initial project state for a new branch.
+
+        This does *not* commit the new state to the database.
+
+        No checks are made to ensure that the branch does not
+        already have a state.
+
+        :param branch: The branch to create the state for.
+        :return: The new ProjectState object.
+        """
+        from core.db.models import Specification
+
+        return ProjectState(
+            branch=branch,
+            specification=Specification(),
+            step_index=1,
+        )
+
+    async def create_next_state(self) -> "ProjectState":
+        """
+        Create the next project state for the branch.
+
+        This does NOT insert the new state and the associated objects (spec,
+        files, ...) to the database.
+
+        :param session: The SQLAlchemy session.
+        :return: The new ProjectState object.
+        """
+        if not self.id:
+            raise ValueError("Cannot create next state for unsaved state.")
+
+        if "next_state" in self.__dict__:
+            raise ValueError(f"Next state already exists for state with id={self.id}.")
+
+        new_state = ProjectState(
+            branch=self.branch,
+            prev_state=self,
+            step_index=self.step_index + 1,
+            specification=self.specification,
+            epics=deepcopy(self.epics),
+            tasks=deepcopy(self.tasks),
+            steps=deepcopy(self.steps),
+            iterations=deepcopy(self.iterations),
+            files=[],
+            relevant_files=deepcopy(self.relevant_files),
+            modified_files=deepcopy(self.modified_files),
+        )
+
+        session: AsyncSession = inspect(self).async_session
+        session.add(new_state)
+
+        for file in await self.awaitable_attrs.files:
+            clone = file.clone()
+            new_state.files.append(clone)
+
+        return new_state
+
+    def complete_step(self):
+        if not self.unfinished_steps:
+            raise ValueError("There are no unfinished steps to complete")
+        if "next_state" in self.__dict__:
+            raise ValueError("Current state is read-only (already has a next state).")
+
+        log.debug(f"Completing step {self.unfinished_steps[0]['type']}")
+        self.unfinished_steps[0]["completed"] = True
+        flag_modified(self, "steps")
+
+    def complete_task(self):
+        if not self.unfinished_tasks:
+            raise ValueError("There are no unfinished tasks to complete")
+        if "next_state" in self.__dict__:
+            raise ValueError("Current state is read-only (already has a next state).")
+
+        log.debug(f"Completing task {self.unfinished_tasks[0]['description']}")
+        self.unfinished_tasks[0]["completed"] = True
+        self.steps = []
+        self.iterations = []
+        self.relevant_files = []
+        self.modified_files = {}
+        flag_modified(self, "tasks")
+
+        if not self.unfinished_tasks and self.unfinished_epics:
+            self.complete_epic()
+
+    def complete_epic(self):
+        if not self.unfinished_epics:
+            raise ValueError("There are no unfinished epics to complete")
+        if "next_state" in self.__dict__:
+            raise ValueError("Current state is read-only (already has a next state).")
+
+        log.debug(f"Completing epic {self.unfinished_epics[0]['name']}")
+        self.unfinished_epics[0]["completed"] = True
+        flag_modified(self, "epics")
+
+    def complete_iteration(self):
+        if not self.unfinished_iterations:
+            raise ValueError("There are no unfinished iterations to complete")
+        if "next_state" in self.__dict__:
+            raise ValueError("Current state is read-only (already has a next state).")
+
+        log.debug(f"Completing iteration {self.unfinished_iterations[0]}")
+        self.unfinished_iterations[0]["completed"] = True
+        self.flag_iterations_as_modified()
+
+    def flag_iterations_as_modified(self):
+        """
+        Flag the iteration field as having been modified
+
+        Used by Agents that perform modifications within the mutable iterations field,
+        to tell the database that it was modified and should get saved (as SQLalchemy
+        can't detect changes in mutable fields by itself).
+        """
+        flag_modified(self, "iterations")
+
+    def get_file_by_path(self, path: str) -> Optional["File"]:
+        """
+        Get a file from the current project state, by the file path.
+
+        :param path: The file path.
+        :return: The file object, or None if not found.
+        """
+        for file in self.files:
+            if file.path == path:
+                return file
+
+        return None
+
+    def save_file(self, path: str, content: "FileContent", external: bool = False) -> "File":
+        """
+        Save a file to the project state.
+
+        This either creates a new file pointing at the given content,
+        or updates the content of an existing file. This method
+        doesn't actually commit the file to the database, just attaches
+        it to the project state.
+
+        If the file was created by Pythagora (not externally by user or template import),
+        mark it as relevant for the current task.
+
+        :param path: The file path.
+        :param content: The file content.
+        :param external: Whether the file was added externally (e.g. by a user).
+        :return: The (unsaved) file object.
+        """
+        from core.db.models import File
+
+        if "next_state" in self.__dict__:
+            raise ValueError("Current state is read-only (already has a next state).")
+
+        file = self.get_file_by_path(path)
+        if file:
+            original_content = file.content.content
+            file.content = content
+        else:
+            original_content = ""
+            file = File(path=path, content=content)
+            self.files.append(file)
+
+        if path not in self.modified_files and not external:
+            self.modified_files[path] = original_content
+        if path not in self.relevant_files:
+            self.relevant_files.append(path)
+
+        return file
+
+    async def delete_after(self):
+        """
+        Delete all states in the branch after this one.
+        """
+
+        session: AsyncSession = inspect(self).async_session
+
+        log.debug(f"Deleting all project states in branch {self.branch_id} after {self.id}")
+        await session.execute(
+            delete(ProjectState).where(
+                ProjectState.branch_id == self.branch_id,
+                ProjectState.step_index > self.step_index,
+            )
+        )
diff --git a/core/db/models/specification.py b/core/db/models/specification.py
new file mode 100644
index 00000000..5f98789e
--- /dev/null
+++ b/core/db/models/specification.py
@@ -0,0 +1,48 @@
+from typing import TYPE_CHECKING, Optional
+
+from sqlalchemy.orm import Mapped, mapped_column, relationship
+
+from core.db.models import Base
+
+if TYPE_CHECKING:
+    from core.db.models import ProjectState
+
+
+class Complexity:
+    """Estimate of the project or feature complexity."""
+
+    SIMPLE = "simple"
+    MODERATE = "moderate"
+    HARD = "hard"
+
+
+class Specification(Base):
+    __tablename__ = "specifications"
+
+    # ID and parent FKs
+    id: Mapped[int] = mapped_column(primary_key=True, autoincrement=True)
+
+    # Attributes
+    description: Mapped[str] = mapped_column(default="")
+    architecture: Mapped[str] = mapped_column(default="")
+    system_dependencies: Mapped[list[dict]] = mapped_column(default=list)
+    package_dependencies: Mapped[list[dict]] = mapped_column(default=list)
+    template: Mapped[Optional[str]] = mapped_column()
+    complexity: Mapped[str] = mapped_column(server_default=Complexity.HARD)
+
+    # Relationships
+    project_states: Mapped[list["ProjectState"]] = relationship(back_populates="specification")
+
+    def clone(self) -> "Specification":
+        """
+        Clone the specification.
+        """
+        clone = Specification(
+            description=self.description,
+            architecture=self.architecture,
+            system_dependencies=self.system_dependencies,
+            package_dependencies=self.package_dependencies,
+            template=self.template,
+            complexity=self.complexity,
+        )
+        return clone
diff --git a/core/db/models/user_input.py b/core/db/models/user_input.py
new file mode 100644
index 00000000..f75a8fe9
--- /dev/null
+++ b/core/db/models/user_input.py
@@ -0,0 +1,59 @@
+from datetime import datetime
+from typing import TYPE_CHECKING, Optional
+from uuid import UUID
+
+from sqlalchemy import ForeignKey, inspect
+from sqlalchemy.orm import Mapped, mapped_column, relationship
+from sqlalchemy.sql import func
+
+from core.db.models import Base
+from core.ui.base import UserInput as UserInputData
+
+if TYPE_CHECKING:
+    from core.db.models import Branch, ProjectState
+
+
+class UserInput(Base):
+    __tablename__ = "user_inputs"
+
+    # ID and parent FKs
+    id: Mapped[int] = mapped_column(primary_key=True, autoincrement=True)
+    branch_id: Mapped[UUID] = mapped_column(ForeignKey("branches.id", ondelete="CASCADE"))
+    project_state_id: Mapped[Optional[UUID]] = mapped_column(ForeignKey("project_states.id", ondelete="SET NULL"))
+
+    # Attributes
+    created_at: Mapped[datetime] = mapped_column(server_default=func.now())
+    question: Mapped[str] = mapped_column()
+    answer_text: Mapped[Optional[str]] = mapped_column()
+    answer_button: Mapped[Optional[str]] = mapped_column()
+    cancelled: Mapped[bool] = mapped_column()
+
+    # Relationships
+    branch: Mapped["Branch"] = relationship(back_populates="user_inputs")
+    project_state: Mapped["ProjectState"] = relationship(back_populates="user_inputs")
+
+    @classmethod
+    def from_user_input(cls, project_state: "ProjectState", question: str, user_input: UserInputData) -> "UserInput":
+        """
+        Store the user input in the database.
+
+        Note this just creates the UserInput object. It is committed to the
+        database only when the DB session itself is comitted.
+
+        :param project_state: Project state to associate the request log with.
+        :param question: Question the user was asked.
+        :param user_input: User input.
+        :return: Newly created User input in the database.
+        """
+        session = inspect(project_state).async_session
+
+        obj = cls(
+            project_state=project_state,
+            branch=project_state.branch,
+            question=question,
+            answer_text=user_input.text,
+            answer_button=user_input.button,
+            cancelled=user_input.cancelled,
+        )
+        session.add(obj)
+        return obj
diff --git a/core/db/session.py b/core/db/session.py
new file mode 100644
index 00000000..f55460d0
--- /dev/null
+++ b/core/db/session.py
@@ -0,0 +1,75 @@
+from sqlalchemy import event
+from sqlalchemy.ext.asyncio import AsyncSession, async_sessionmaker, create_async_engine
+
+from core.config import DBConfig
+from core.log import get_logger
+
+log = get_logger(__name__)
+
+
+class SessionManager:
+    """
+    Async-aware context manager for database session.
+
+    Usage:
+
+    >>> config = DBConfig(url="sqlite+aiosqlite:///test.db")
+    >>> async with DBSession(config) as session:
+    ...     # Do something with the session
+    """
+
+    def __init__(self, config: DBConfig):
+        """
+        Initialize the session manager with the given configuration.
+
+        :param config: Database configuration.
+        """
+        self.config = config
+        self.engine = create_async_engine(
+            self.config.url, echo=config.debug_sql, echo_pool="debug" if config.debug_sql else None
+        )
+        self.SessionClass = async_sessionmaker(self.engine, expire_on_commit=False)
+        self.session = None
+        self.recursion_depth = 0
+
+        event.listen(self.engine.sync_engine, "connect", self._on_connect)
+
+    def _on_connect(self, dbapi_connection, _):
+        """Connection event handler"""
+        log.debug(f"Connected to database {self.config.url}")
+
+        if self.config.url.startswith("sqlite"):
+            # Note that SQLite uses NullPool by default, meaning every session creates a
+            # database "connection". This is fine and preferred for SQLite because
+            # it's a local file. PostgreSQL or other database use a real connection pool
+            # by default.
+            dbapi_connection.execute("pragma foreign_keys=on")
+
+    async def start(self) -> AsyncSession:
+        if self.session is not None:
+            self.recursion_depth += 1
+            log.warning(f"Re-entering database session (depth: {self.recursion_depth}), potential bug", stack_info=True)
+            return self.session
+
+        self.session = self.SessionClass()
+        return self.session
+
+    async def close(self):
+        if self.session is None:
+            log.warning("Closing database session that was never opened", stack_info=True)
+            return
+        if self.recursion_depth > 0:
+            self.recursion_depth -= 1
+            return
+
+        await self.session.close()
+        self.session = None
+
+    async def __aenter__(self) -> AsyncSession:
+        return await self.start()
+
+    async def __aexit__(self, exc_type, exc_val, exc_tb):
+        return await self.close()
+
+
+__all__ = ["SessionManager"]
diff --git a/core/db/setup.py b/core/db/setup.py
new file mode 100644
index 00000000..357a10fa
--- /dev/null
+++ b/core/db/setup.py
@@ -0,0 +1,49 @@
+from os.path import dirname, join
+
+from alembic import command
+from alembic.config import Config
+
+from core.config import DBConfig
+from core.log import get_logger
+
+log = get_logger(__name__)
+
+
+def _async_to_sync_db_scheme(url: str) -> str:
+    """
+    Convert an async database URL to a synchronous one.
+
+    This is needed because Alembic does not support async database
+    connections.
+
+    :param url: Asynchronouse database URL.
+    :return: Synchronous database URL.
+    """
+    if url.startswith("postgresql+asyncpg://"):
+        return url.replace("postgresql+asyncpg://", "postgresql://")
+    elif url.startswith("sqlite+aiosqlite://"):
+        return url.replace("sqlite+aiosqlite://", "sqlite://")
+    return url
+
+
+def run_migrations(config: DBConfig):
+    """
+    Run database migrations using Alembic.
+
+    This needs to happen synchronously, before the asyncio
+    mainloop is started, and before any database access.
+
+    :param config: Database configuration.
+    """
+    url = _async_to_sync_db_scheme(config.url)
+    ini_location = join(dirname(__file__), "alembic.ini")
+
+    log.debug(f"Running database migrations for {url} (config: {ini_location})")
+
+    alembic_cfg = Config(ini_location)
+    alembic_cfg.set_main_option("sqlalchemy.url", url)
+    alembic_cfg.set_main_option("pythagora_runtime", "true")
+    command.upgrade(alembic_cfg, "head")
+
+
+__all__ = ["run_migrations"]
diff --git a/core/db/v0importer.py b/core/db/v0importer.py
new file mode 100644
index 00000000..6b158bdf
--- /dev/null
+++ b/core/db/v0importer.py
@@ -0,0 +1,246 @@
+from json import loads
+from os.path import exists
+from pathlib import Path
+from uuid import UUID, uuid4
+
+import aiosqlite
+
+from core.db.models import Branch, Project, ProjectState
+from core.db.session import SessionManager
+from core.log import get_logger
+from core.state.state_manager import StateManager
+
+log = get_logger(__name__)
+
+
+class LegacyDatabaseImporter:
+    def __init__(self, session_manager: SessionManager, dbpath: str):
+        self.session_manager = session_manager
+        self.state_manager = StateManager(self.session_manager, None)
+        self.dbpath = dbpath
+        self.conn = None
+
+        if not exists(dbpath):
+            raise FileNotFoundError(f"File not found: {dbpath}")
+
+    async def import_database(self):
+        info = await self.load_legacy_database()
+        await self.save_to_new_database(info)
+
+    async def load_legacy_database(self):
+        async with aiosqlite.connect(self.dbpath) as conn:
+            self.conn = conn
+            is_valid = await self.verify_schema()
+            if not is_valid:
+                raise ValueError(f"Database {self.dbpath} doesn't look like a GPT-Pilot database")
+
+            apps = await self.get_apps()
+            info = {}
+            for app_id in apps:
+                app_info = await self.get_app_info(app_id)
+                info[app_id] = {
+                    "name": apps[app_id],
+                    **app_info,
+                }
+
+        return info
+
+    async def verify_schema(self) -> bool:
+        tables = set()
+        async with self.conn.execute("select name from sqlite_master where type = 'table'") as cursor:
+            async for row in cursor:
+                tables.add(row[0])
+
+        return "app" in tables and "development_steps" in tables
+
+    async def get_apps(self) -> dict[str, str]:
+        apps = {}
+        async with self.conn.execute("select id, name, status from app") as cursor:
+            async for id, name, status in cursor:
+                if status == "coding":
+                    apps[id] = name
+        return apps
+
+    async def get_app_info(self, app_id: str) -> dict:
+        app_info = {
+            "initial_prompt": None,
+            "architecture": None,
+            "tasks": [],
+        }
+
+        async with self.conn.execute("select architecture from architecture where app_id = ?", (app_id,)) as cursor:
+            row = await cursor.fetchone()
+            if row:
+                app_info["architecture"] = loads(row[0])
+
+        async with self.conn.execute("select prompt from project_description where app_id = ?", (app_id,)) as cursor:
+            row = await cursor.fetchone()
+            if row:
+                app_info["initial_prompt"] = row[0]
+
+        async with self.conn.execute(
+            "select id, prompt_path, prompt_data, messages, llm_response from development_steps "
+            "where app_id = ? order by created_at asc",
+            (app_id,),
+        ) as cursor:
+            async for row in cursor:
+                dev_step_id, prompt_path, prompt_data, messages, llm_response = row
+                if prompt_path == "development/task/breakdown.prompt":
+                    task_info = await self.get_task_info(dev_step_id, prompt_data, llm_response)
+                    app_info["tasks"].append(task_info)
+
+        return app_info
+
+    async def get_task_info(self, dev_step_id, prompt_data_json: str, llm_response: dict) -> dict:
+        prompt_data = loads(prompt_data_json)
+        current_feature = prompt_data.get("current_feature")
+        previous_features = prompt_data.get("previous_features") or []
+        tasks = prompt_data["development_tasks"]
+        current_task_index = prompt_data["current_task_index"]
+        current_task = tasks[current_task_index]
+        instructions = llm_response
+        files = await self.get_task_files(dev_step_id)
+        return {
+            "current_feature": current_feature,
+            "previous_features": previous_features,
+            "tasks": tasks,
+            "current_task_index": current_task_index,
+            "current_task": current_task,
+            "instructions": instructions,
+            "files": files,
+        }
+
+    async def get_task_files(self, dev_step_id: int):
+        files = {}
+
+        async with self.conn.execute(
+            "select content, path, name, description from file_snapshot "
+            "inner join file on file_snapshot.file_id = file.id "
+            "where file_snapshot.development_step_id = ?",
+            (dev_step_id,),
+        ) as cursor:
+            async for row in cursor:
+                content, path, name, description = row
+                file_path = Path(path + "/" + name).as_posix() if path else name
+                try:
+                    if isinstance(content, bytes):
+                        content = content.decode("utf-8")
+                except:  # noqa
+                    # skip binary file
+                    continue
+                files[file_path] = {
+                    "description": description or None,
+                    "content": content,
+                }
+
+        return files
+
+    async def save_to_new_database(self, info: dict):
+        async with self.session_manager as session:
+            projects = await Project.get_all_projects(session)
+
+        for project in projects:
+            imported_app = info.pop(project.id.hex, None)
+            if imported_app:
+                log.info(f"Project {project.name} already exists in the new database, skipping")
+
+        for app_id, app_info in info.items():
+            await self.save_app(app_id, app_info)
+
+    async def save_app(self, app_id: str, app_info: dict):
+        log.info(f"Importing app {app_info['name']} (id={app_id}) ...")
+
+        async with self.session_manager as session:
+            project = Project(id=UUID(app_id), name=app_info["name"])
+            branch = Branch(project=project)
+            state = ProjectState.create_initial_state(branch)
+
+            spec = state.specification
+            spec.description = app_info["initial_prompt"]
+            spec.architecture = app_info["architecture"]["architecture"]
+            spec.system_dependencies = app_info["architecture"]["system_dependencies"]
+            spec.package_dependencies = app_info["architecture"]["package_dependencies"]
+            spec.template = app_info["architecture"].get("template")
+
+            session.add(project)
+            await session.commit()
+
+        project = await self.state_manager.load_project(project_id=app_id)
+
+        # It is much harder to import all tasks and keep features/tasks lists in sync, so
+        # we only support importing the latest task.
+        if app_info["tasks"]:
+            await self.save_latest_task(app_info["tasks"][-1])
+
+        # This just closes the session and removes the last (incomplete) state.
+        # Everything else should already be safely comitted.
+        await self.state_manager.rollback()
+
+    async def save_latest_task(self, task: dict):
+        sm = self.state_manager
+        state = sm.current_state
+
+        state.epics = [
+            {
+                "id": uuid4().hex,
+                "name": "Initial Project",
+                "description": state.specification.description,
+                "summary": None,
+                "completed": bool(task["previous_features"]) or (task["current_feature"] is not None),
+                "complexity": "hard",
+            }
+        ]
+
+        for i, feature in enumerate(task["previous_features"]):
+            state.epics += [
+                {
+                    "id": uuid4().hex,
+                    "name": f"Feature #{i + 1}",
+                    "description": feature["summary"],  # FIXME: is this good enough
+                    "summary": None,
+                    "completed": True,
+                    "complexity": "hard",
+                }
+            ]
+
+        if task["current_feature"]:
+            state.epics = state.epics + [
+                {
+                    "id": uuid4().hex,
+                    "name": f"Feature #{len(state.epics)}",
+                    "description": task["current_feature"],
+                    "summary": None,
+                    "completed": False,
+                    "complexity": "hard",
+                }
+            ]
+
+        current_task_index = task["current_task_index"]
+        state.tasks = [
+            {
+                "id": uuid4().hex,
+                "description": task_info["description"],
+                "instructions": None,
+                "completed": current_task_index > i,
+            }
+            for i, task_info in enumerate(task["tasks"])
+        ]
+        state.tasks[current_task_index]["instructions"] = task["instructions"]
+        await sm.current_session.commit()
+
+        # Reload project at the initialized state to reinitialize the next state
+        await self.state_manager.load_project(project_id=state.branch.project.id, step_index=state.step_index)
+
+        await self.save_task_files(task["files"])
+        await self.state_manager.commit()
+
+    async def save_task_files(self, files: dict):
+        for path, file_info in files.items():
+            await self.state_manager.save_file(
+                path,
+                file_info["content"],
+                metadata={
+                    "description": file_info["description"],
+                    "references": [],
+                },
+            )
diff --git a/core/disk/__init__.py b/core/disk/__init__.py
new file mode 100644
index 00000000..e69de29b
diff --git a/core/disk/ignore.py b/core/disk/ignore.py
new file mode 100644
index 00000000..ea25d944
--- /dev/null
+++ b/core/disk/ignore.py
@@ -0,0 +1,125 @@
+import os.path
+from fnmatch import fnmatch
+from typing import Optional
+
+
+class IgnoreMatcher:
+    """
+    A class to match paths against a list of ignore patterns or
+    file attributes (size, type).
+    """
+
+    def __init__(
+        self,
+        root_path: str,
+        ignore_paths: list[str],
+        *,
+        ignore_size_threshold: Optional[int] = None,
+    ):
+        """
+        Initialize the IgnoreMatcher object.
+
+        Ignore paths are matched agains the file name and the full path,
+        and may include shell-like wildcards ("*" for any number of characters,
+        "?" for a single character). Paths are normalized, so "/" works on both
+        Unix and Windows, and Windows matching is case insensitive.
+
+        :param root_path: Root path to use when checking files on disk.
+        :param ignore_paths: List of patterns to ignore.
+        :param ignore_size_threshold: Files larger than this size will be ignored.
+        """
+        self.root_path = root_path
+        self.ignore_paths = ignore_paths
+        self.ignore_size_threshold = ignore_size_threshold
+
+    def ignore(self, path: str) -> bool:
+        """
+        Check if the given path matches any of the ignore patterns.
+
+        :param path: (Relative) path to the file or directory to check
+        :return: True if the path matches any of the ignore patterns, False otherwise
+        """
+
+        full_path = os.path.normpath(os.path.join(self.root_path, path))
+
+        if self._is_in_ignore_list(path):
+            return True
+
+        if self._is_large_file(full_path):
+            return True
+
+        # Binary files are always ignored
+        if self._is_binary(full_path):
+            return True
+
+        return False
+
+    def _is_in_ignore_list(self, path: str) -> bool:
+        """
+        Check if the given path matches any of the ignore patterns.
+
+        Both the (relative) file path and the file (base) name are matched.
+
+        :param path: The path to the file or directory to check
+        :return: True if the path matches any of the ignore patterns, False otherwise.
+        """
+        name = os.path.basename(path)
+        for pattern in self.ignore_paths:
+            if fnmatch(name, pattern) or fnmatch(path, pattern):
+                return True
+        return False
+
+    def _is_large_file(self, full_path: str) -> bool:
+        """
+        Check if the given file is larger than the threshold.
+
+        This also returns True if the file doesn't or is not a regular file (eg.
+        it's a symlink), since we want to ignore those kinds of files as well.
+
+        :param path: Full path to the file to check.
+        :return: True if the file is larger than the threshold, False otherwise.
+        """
+        if self.ignore_size_threshold is None:
+            return False
+
+        # We don't handle directories here
+        if os.path.isdir(full_path):
+            return False
+
+        if not os.path.isfile(full_path):
+            return True
+
+        try:
+            return bool(os.path.getsize(full_path) > self.ignore_size_threshold)
+        except:  # noqa
+            return True
+
+    def _is_binary(self, full_path: str) -> bool:
+        """
+        Check if the given file is binary and should be ignored.
+
+        This also returns True if the file doesn't or is not a regular file (eg.
+        it's a symlink), or can't be opened, since we want to ignore those too.
+
+        :param path: Full path to the file to check.
+        :return: True if the file should be ignored, False otherwise.
+        """
+
+        # We don't handle directories here
+        if os.path.isdir(full_path):
+            return False
+
+        if not os.path.isfile(full_path):
+            return True
+
+        try:
+            with open(full_path, "r", encoding="utf-8") as f:
+                f.read(128 * 1024)
+            return False
+        except:  # noqa
+            # If we can't open the file for any reason (eg. PermissionError), it's
+            # best to ignore it anyway
+            return True
+
+
+__all__ = ["IgnoreMatcher"]
diff --git a/core/disk/vfs.py b/core/disk/vfs.py
new file mode 100644
index 00000000..6dbad3fc
--- /dev/null
+++ b/core/disk/vfs.py
@@ -0,0 +1,188 @@
+import os
+import os.path
+from hashlib import sha1
+from pathlib import Path
+
+from core.disk.ignore import IgnoreMatcher
+from core.log import get_logger
+
+log = get_logger(__name__)
+
+
+class VirtualFileSystem:
+    def save(self, path: str, content: str):
+        """
+        Save content to a file. Use for both new and updated files.
+
+        :param path: Path to the file, relative to project root.
+        :param content: Content to save.
+        """
+        raise NotImplementedError()
+
+    def read(self, path: str) -> str:
+        """
+        Read file contents.
+
+        :param path: Path to the file, relative to project root.
+        :return: File contents.
+        """
+        raise NotImplementedError()
+
+    def remove(self, path: str):
+        """
+        Remove a file.
+
+        If file doesn't exist or is a directory, or if the file is ignored,
+        do nothing.
+
+        :param path: Path to the file, relative to project root.
+        """
+        raise NotImplementedError()
+
+    def get_full_path(self, path: str) -> str:
+        """
+        Get the full path to a file.
+
+        This should be used to check the full path of the file on whichever
+        file system it locally is stored. For example, getting a full path
+        to a file and then passing it to an external program via run_command
+        should work.
+
+        :param path: Path to the file, relative to project root.
+        :return: Full path to the file.
+        """
+        raise NotImplementedError()
+
+    def _filter_by_prefix(self, file_list: list[str], prefix: str) -> list[str]:
+        # We use "/" internally on all platforms, including win32
+        if not prefix.endswith("/"):
+            prefix = prefix + "/"
+        return [f for f in file_list if f.startswith(prefix)]
+
+    def _get_file_list(self) -> list[str]:
+        raise NotImplementedError()
+
+    def list(self, prefix: str = None) -> list[str]:
+        """
+        Return a list of files in the project.
+
+        File paths are relative to the project root.
+
+        :param prefix: Optional prefix to filter files for.
+        :return: List of file paths.
+        """
+        retval = sorted(self._get_file_list())
+        if prefix:
+            retval = self._filter_by_prefix(retval, prefix)
+        return retval
+
+    def hash(self, path: str) -> str:
+        content = self.read(path)
+        return self.hash_string(content)
+
+    @staticmethod
+    def hash_string(content: str) -> str:
+        return sha1(content.encode("utf-8")).hexdigest()
+
+
+class MemoryVFS(VirtualFileSystem):
+    files: dict[str, str]
+
+    def __init__(self):
+        self.files = {}
+
+    def save(self, path: str, content: str):
+        self.files[path] = content
+
+    def read(self, path: str) -> str:
+        try:
+            return self.files[path]
+        except KeyError:
+            raise ValueError(f"File not found: {path}")
+
+    def remove(self, path: str):
+        if path in self.files:
+            del self.files[path]
+
+    def get_full_path(self, path: str) -> str:
+        # We use "/" internally on all platforms, including win32
+        return "/" + path
+
+    def _get_file_list(self) -> list[str]:
+        return self.files.keys()
+
+
+class LocalDiskVFS(VirtualFileSystem):
+    def __init__(
+        self,
+        root: str,
+        create: bool = True,
+        allow_existing: bool = True,
+        ignore_matcher: IgnoreMatcher = None,
+    ):
+        if not os.path.isdir(root):
+            if create:
+                os.makedirs(root)
+            else:
+                raise ValueError(f"Root directory does not exist: {root}")
+        else:
+            if not allow_existing:
+                raise FileExistsError(f"Root directory already exists: {root}")
+
+        if ignore_matcher is None:
+            ignore_matcher = IgnoreMatcher(root, [])
+
+        self.root = root
+        self.ignore_matcher = ignore_matcher
+
+    def get_full_path(self, path: str) -> str:
+        return os.path.normpath(os.path.join(self.root, path))
+
+    def save(self, path: str, content: str):
+        full_path = self.get_full_path(path)
+        os.makedirs(os.path.dirname(full_path), exist_ok=True)
+        with open(full_path, "w", encoding="utf-8") as f:
+            f.write(content)
+        log.debug(f"Saved file {path} ({len(content)} bytes) to {full_path}")
+
+    def read(self, path: str) -> str:
+        full_path = self.get_full_path(path)
+        if not os.path.isfile(full_path):
+            raise ValueError(f"File not found: {path}")
+
+        # TODO: do we want error handling here?
+        with open(full_path, "r", encoding="utf-8") as f:
+            return f.read()
+
+    def remove(self, path: str):
+        if self.ignore_matcher.ignore(path):
+            return
+
+        full_path = self.get_full_path(path)
+        if os.path.isfile(full_path):
+            try:
+                os.remove(full_path)
+                log.debug(f"Removed file {path} from {full_path}")
+            except Exception as err:  # noqa
+                log.error(f"Failed to remove file {path}: {err}", exc_info=True)
+
+    def _get_file_list(self) -> list[str]:
+        files = []
+        for dpath, dirnames, filenames in os.walk(self.root):
+            # Modify in place to prevent recursing into ignored directories
+            dirnames[:] = [
+                d
+                for d in dirnames
+                if not self.ignore_matcher.ignore(os.path.relpath(os.path.join(dpath, d), self.root))
+            ]
+
+            for filename in filenames:
+                path = os.path.relpath(os.path.join(dpath, filename), self.root)
+                if not self.ignore_matcher.ignore(path):
+                    # We use "/" internally on all platforms, including win32
+                    files.append(Path(path).as_posix())
+
+        return files
+
+
+__all__ = ["VirtualFileSystem", "MemoryVFS", "LocalDiskVFS"]
diff --git a/core/llm/__init__.py b/core/llm/__init__.py
new file mode 100644
index 00000000..e69de29b
diff --git a/core/llm/anthropic_client.py b/core/llm/anthropic_client.py
new file mode 100644
index 00000000..e2a41e9d
--- /dev/null
+++ b/core/llm/anthropic_client.py
@@ -0,0 +1,123 @@
+import datetime
+import zoneinfo
+from typing import Optional
+
+from anthropic import AsyncAnthropic, RateLimitError
+from httpx import Timeout
+
+from core.config import LLMProvider
+from core.llm.convo import Convo
+from core.log import get_logger
+
+from .base import BaseLLMClient
+
+log = get_logger(__name__)
+
+# Maximum number of tokens supported by Anthropic Claude 3
+MAX_TOKENS = 4096
+
+
+class AnthropicClient(BaseLLMClient):
+    provider = LLMProvider.ANTHROPIC
+
+    def _init_client(self):
+        self.client = AsyncAnthropic(
+            api_key=self.config.api_key,
+            base_url=self.config.base_url,
+            timeout=Timeout(
+                max(self.config.connect_timeout, self.config.read_timeout),
+                connect=self.config.connect_timeout,
+                read=self.config.read_timeout,
+            ),
+        )
+        self.stream_handler = self.stream_handler
+
+    def _adapt_messages(self, convo: Convo) -> list[dict[str, str]]:
+        """
+        Adapt the conversation messages to the format expected by the Anthropic Claude model.
+
+        Claude only recognizes "user" and "assistant" roles, and requires them to be switched
+        for each message (ie. no consecutive messages from the same role).
+
+        :param convo: Conversation to adapt.
+        :return: Adapted conversation messages.
+        """
+        messages = []
+        for msg in convo.messages:
+            if msg["role"] == "function":
+                raise ValueError("Anthropic Claude doesn't support function calling")
+
+            role = "user" if msg["role"] in ["user", "system"] else "assistant"
+            if messages and messages[-1]["role"] == role:
+                messages[-1]["content"] += "\n\n" + msg["content"]
+            else:
+                messages.append(
+                    {
+                        "role": role,
+                        "content": msg["content"],
+                    }
+                )
+        return messages
+
+    async def _make_request(
+        self,
+        convo: Convo,
+        temperature: Optional[float] = None,
+        json_mode: bool = False,
+    ) -> tuple[str, int, int]:
+        messages = self._adapt_messages(convo)
+        completion_kwargs = {
+            "max_tokens": MAX_TOKENS,
+            "model": self.config.model,
+            "messages": messages,
+            "temperature": self.config.temperature if temperature is None else temperature,
+        }
+        if json_mode:
+            completion_kwargs["response_format"] = {"type": "json_object"}
+
+        response = []
+        async with self.client.messages.stream(**completion_kwargs) as stream:
+            async for content in stream.text_stream:
+                response.append(content)
+                if self.stream_handler:
+                    await self.stream_handler(content)
+
+            # TODO: get tokens from the final message
+            final_message = await stream.get_final_message()
+            final_message.content
+
+        response_str = "".join(response)
+
+        # Tell the stream handler we're done
+        if self.stream_handler:
+            await self.stream_handler(None)
+
+        return response_str, final_message.usage.input_tokens, final_message.usage.output_tokens
+
+    def rate_limit_sleep(self, err: RateLimitError) -> Optional[datetime.timedelta]:
+        """
+        Anthropic rate limits docs:
+        https://docs.anthropic.com/en/api/rate-limits#response-headers
+        Limit reset times are in RFC 3339 format.
+
+        """
+        headers = err.response.headers
+        if "anthropic-ratelimit-tokens-remaining" not in headers:
+            return None
+
+        remaining_tokens = headers["anthropic-ratelimit-tokens-remaining"]
+        if remaining_tokens == 0:
+            relevant_dt = headers["anthropic-ratelimit-tokens-reset"]
+        else:
+            relevant_dt = headers["anthropic-ratelimit-requests-reset"]
+
+        try:
+            reset_time = datetime.datetime.fromisoformat(relevant_dt)
+        except ValueError:
+            return datetime.timedelta(seconds=5)
+
+        now = datetime.datetime.now(tz=zoneinfo.ZoneInfo("UTC"))
+        return reset_time - now
+
+
+__all__ = ["AnthropicClient"]
diff --git a/core/llm/base.py b/core/llm/base.py
new file mode 100644
index 00000000..a62f70d0
--- /dev/null
+++ b/core/llm/base.py
@@ -0,0 +1,306 @@
+import asyncio
+import datetime
+import json
+from enum import Enum
+from time import time
+from typing import Any, Callable, Optional, Tuple
+
+import httpx
+
+from core.config import LLMConfig, LLMProvider
+from core.llm.convo import Convo
+from core.llm.request_log import LLMRequestLog, LLMRequestStatus
+from core.log import get_logger
+
+log = get_logger(__name__)
+
+
+class LLMError(str, Enum):
+    KEY_EXPIRED = "key_expired"
+    RATE_LIMITED = "rate_limited"
+
+
+class APIError(Exception):
+    def __init__(self, message: str):
+        self.message = message
+
+
+class BaseLLMClient:
+    """
+    Base asynchronous streaming client for language models.
+
+    Example usage:
+
+    >>> async def stream_handler(content: str):
+    ...     print(content)
+    ...
+    >>> def parser(content: str) -> dict:
+    ...     return json.loads(content)
+    ...
+    >>> client_class = BaseClient.for_provider(provider)
+    >>> client = client_class(config, stream_handler=stream_handler)
+    >>> response, request_log = await client(convo, parser=parser)
+    """
+
+    provider: LLMProvider
+
+    def __init__(
+        self,
+        config: LLMConfig,
+        *,
+        stream_handler: Optional[Callable] = None,
+        error_handler: Optional[Callable] = None,
+    ):
+        """
+        Initialize the client with the given configuration.
+
+        :param config: Configuration for the client.
+        :param stream_handler: Optional handler for streamed responses.
+        """
+        self.config = config
+        self.stream_handler = stream_handler
+        self.error_handler = error_handler
+        self._init_client()
+
+    def _init_client(self):
+        raise NotImplementedError()
+
+    async def _make_request(
+        self,
+        convo: Convo,
+        temperature: Optional[float] = None,
+        json_mode: bool = False,
+    ) -> tuple[str, int, int]:
+        """
+        Call the Anthropic Claude model with the given conversation.
+
+        Low-level method that streams the response chunks.
+        Use `__call__` instead of this method.
+
+        :param convo: Conversation to send to the LLM.
+        :param json_mode: If True, the response is expected to be JSON.
+        :return: Tuple containing the full response content, number of input tokens, and number of output tokens.
+        """
+        raise NotImplementedError()
+
+    async def _adapt_messages(self, convo: Convo) -> list[dict[str, str]]:
+        """
+        Adapt the conversation messages to the format expected by the LLM.
+
+        Claude only recognizes "user" and "assistant roles"
+
+        :param convo: Conversation to adapt.
+        :return: Adapted conversation messages.
+        """
+        messages = []
+        for msg in convo.messages:
+            if msg.role == "function":
+                raise ValueError("Anthropic Claude doesn't support function calling")
+
+            role = "user" if msg.role in ["user", "system"] else "assistant"
+            if messages and messages[-1]["role"] == role:
+                messages[-1]["content"] += "\n\n" + msg.content
+            else:
+                messages.append(
+                    {
+                        "role": role,
+                        "content": msg.content,
+                    }
+                )
+        return messages
+
+    async def __call__(
+        self,
+        convo: Convo,
+        *,
+        temperature: Optional[float] = None,
+        parser: Optional[Callable] = None,
+        max_retries: int = 3,
+        json_mode: bool = False,
+    ) -> Tuple[Any, LLMRequestLog]:
+        """
+        Invoke the LLM with the given conversation.
+
+        Stream handler, if provided, should be an async function
+        that takes a single argument, the response content (str).
+        It will be called for each response chunk.
+
+        Parser, if provided, should be a function that takes the
+        response content (str) and returns the parsed response.
+        On parse error, the parser should raise a ValueError with
+        a descriptive error message that will be sent back to the LLM
+        to retry, up to max_retries.
+
+        :param convo: Conversation to send to the LLM.
+        :param parser: Optional parser for the response.
+        :param max_retries: Maximum number of retries for parsing the response.
+        :param json_mode: If True, the response is expected to be JSON.
+        :return: Tuple of the (parsed) response and request log entry.
+        """
+        import anthropic
+        import groq
+        import openai
+
+        if temperature is None:
+            temperature = self.config.temperature
+
+        convo = convo.fork()
+        request_log = LLMRequestLog(
+            provider=self.provider,
+            model=self.config.model,
+            temperature=temperature,
+        )
+
+        prompt_length_kb = len(json.dumps(convo.messages).encode("utf-8")) / 1024
+        log.debug(
+            f"Calling {self.provider.value} model {self.config.model} (temp={temperature}), prompt length: {prompt_length_kb:.1f} KB"
+        )
+        t0 = time()
+
+        for _ in range(max_retries):
+            request_log.messages = convo.messages[:]
+            request_log.response = None
+            request_log.error = None
+            response = None
+
+            try:
+                response, prompt_tokens, completion_tokens = await self._make_request(
+                    convo,
+                    temperature=temperature,
+                    json_mode=json_mode,
+                )
+            except (openai.APIConnectionError, anthropic.APIConnectionError, groq.APIConnectionError) as err:
+                log.warning(f"API connection error: {err}", exc_info=True)
+                request_log.error = str(f"API connection error: {err}")
+                request_log.status = LLMRequestStatus.ERROR
+                continue
+            except httpx.ReadTimeout as err:
+                log.warning(f"Read timeout (set to {self.config.read_timeout}s): {err}", exc_info=True)
+                request_log.error = str(f"Read timeout: {err}")
+                request_log.status = LLMRequestStatus.ERROR
+                continue
+            except httpx.ReadError as err:
+                log.warning(f"Read error: {err}", exc_info=True)
+                request_log.error = str(f"Read error: {err}")
+                request_log.status = LLMRequestStatus.ERROR
+                continue
+            except (openai.RateLimitError, anthropic.RateLimitError, groq.RateLimitError) as err:
+                log.warning(f"Rate limit error: {err}", exc_info=True)
+                request_log.error = str(f"Rate limit error: {err}")
+                request_log.status = LLMRequestStatus.ERROR
+                wait_time = self.rate_limit_sleep(err)
+                if wait_time:
+                    message = f"We've hit {self.config.provider.value} rate limit. Sleeping for {wait_time.seconds} seconds..."
+                    await self.error_handler(LLMError.RATE_LIMITED, message)
+                    await asyncio.sleep(wait_time.seconds)
+                    continue
+                else:
+                    # RateLimitError that shouldn't be retried, eg. insufficient funds
+                    err_msg = err.response.json().get("error", {}).get("message", "Rate limiting error.")
+                    raise APIError(err_msg) from err
+            except (openai.NotFoundError, anthropic.NotFoundError, groq.NotFoundError) as err:
+                err_msg = err.response.json().get("error", {}).get("message", f"Model not found: {self.config.model}")
+                raise APIError(err_msg) from err
+            except (openai.AuthenticationError, anthropic.AuthenticationError, groq.AuthenticationError) as err:
+                log.warning(f"Key expired: {err}", exc_info=True)
+                err_msg = err.response.json().get("error", {}).get("message", "Incorrect API key")
+                if "[BricksLLM]" in err_msg:
+                    # We only want to show the key expired message if it's from Bricks
+                    await self.error_handler(LLMError.KEY_EXPIRED)
+
+                raise APIError(err_msg) from err
+            except (openai.APIStatusError, anthropic.APIStatusError, groq.APIStatusError) as err:
+                # Token limit exceeded (in original gpt-pilot handled as
+                # TokenLimitError) is thrown as 400 (OpenAI, Anthropic) or 413 (Groq).
+                # All providers throw an exception that is caught here.
+                # OpenAI and Groq return a `code` field in the error JSON that lets
+                # us confirm that we've breached the token limit, but Anthropic doesn't,
+                # so we can't be certain that's the problem in Anthropic case.
+                # Here we try to detect that and tell the user what happened.
+                err_code = err.response.json().get("error", {}).get("code", "")
+                if err_code in ("request_too_large", "context_length_exceeded", "string_above_max_length"):
+                    # Handle OpenAI and Groq token limit exceeded
+                    # OpenAI will return `string_above_max_length` for prompts more than 1M characters
+                    message = "".join(
+                        [
+                            "We sent too large request to the LLM, resulting in an error. ",
+                            "This is usually caused by including framework files in an LLM request. ",
+                            "Here's how you can get GPT Pilot to ignore those extra files: ",
+                            "https://bit.ly/faq-token-limit-error",
+                        ]
+                    )
+                    raise APIError(message) from err
+
+                log.warning(f"API error: {err}", exc_info=True)
+                request_log.error = str(f"API error: {err}")
+                request_log.status = LLMRequestStatus.ERROR
+                return None, request_log
+
+            request_log.response = response
+
+            request_log.prompt_tokens += prompt_tokens
+            request_log.completion_tokens += completion_tokens
+            if parser:
+                try:
+                    response = parser(response)
+                    break
+                except ValueError as err:
+                    log.debug(f"Error parsing GPT response: {err}, asking LLM to retry", exc_info=True)
+                    convo.assistant(response)
+                    convo.user(f"Error parsing response: {err}. Please output your response EXACTLY as requested.")
+                    continue
+            else:
+                break
+        else:
+            log.warning(f"Failed to parse response after {max_retries} retries")
+            response = None
+            request_log.status = LLMRequestStatus.ERROR
+
+        t1 = time()
+        request_log.duration = t1 - t0
+
+        log.debug(
+            f"Total {self.provider.value} response time {request_log.duration:.2f}s, {request_log.prompt_tokens} prompt tokens, {request_log.completion_tokens} completion tokens used"
+        )
+
+        return response, request_log
+
+    @staticmethod
+    def for_provider(provider: LLMProvider) -> type["BaseLLMClient"]:
+        """
+        Return LLM client for the specified provider.
+
+        :param provider: Provider to return the client for.
+        :return: Client class for the specified provider.
+        """
+        from .anthropic_client import AnthropicClient
+        from .groq_client import GroqClient
+        from .openai_client import OpenAIClient
+
+        if provider == LLMProvider.OPENAI:
+            return OpenAIClient
+        elif provider == LLMProvider.ANTHROPIC:
+            return AnthropicClient
+        elif provider == LLMProvider.GROQ:
+            return GroqClient
+        else:
+            raise ValueError(f"Unsupported LLM provider: {provider.value}")
+
+    def rate_limit_sleep(self, err: Exception) -> Optional[datetime.timedelta]:
+        """
+        Return how long we need to sleep because of rate limiting.
+
+        These are computed from the response headers that each LLM returns.
+        For details, check the implementation for the specific LLM. If there
+        are no rate limiting headers, we assume that the request should not
+        be retried and return None (this will be the case for insufficient
+        quota/funds in the account).
+
+        :param err: RateLimitError that was raised by the LLM client.
+        :return: optional timedelta to wait before trying again
+        """
+
+        raise NotImplementedError()
+
+
+__all__ = ["BaseLLMClient"]
diff --git a/core/llm/convo.py b/core/llm/convo.py
new file mode 100644
index 00000000..bf2a9fee
--- /dev/null
+++ b/core/llm/convo.py
@@ -0,0 +1,163 @@
+from copy import deepcopy
+from typing import Iterator, Optional
+
+
+class Convo:
+    """
+    A conversation between a user and a Large Language Model (LLM) assistant.
+    """
+
+    ROLES = ["system", "user", "assistant", "function"]
+
+    messages: list[dict[str, str]]
+
+    def __init__(self, content: Optional[str] = None):
+        """
+        Initialize a new conversation.
+
+        :param content: Initial system message (optional).
+        """
+        self.messages = []
+        if content is not None:
+            self.system(content)
+
+    @staticmethod
+    def _dedent(text: str) -> str:
+        """
+        Remove common leading whitespace from every line of text.
+
+        :param text: Text to dedent.
+        :return: Dedented text.
+        """
+        indent = len(text)
+        lines = text.splitlines()
+        for line in lines:
+            if line.strip():
+                indent = min(indent, len(line) - len(line.lstrip()))
+        dedented_lines = [line[indent:].rstrip() for line in lines]
+        return "\n".join(line for line in dedented_lines)
+
+    def add(self, role: str, content: str, name: Optional[str] = None) -> "Convo":
+        """
+        Add a message to the conversation.
+
+        In most cases, you should use the convenience methods instead.
+
+        :param role: Role of the message (system, user, assistant, function).
+        :param content: Content of the message.
+        :param name: Name of the message sender (optional).
+        :return: The conv object.
+        """
+
+        if role not in self.ROLES:
+            raise ValueError(f"Unknown role: {role}")
+        if not content:
+            raise ValueError("Empty message content")
+        if not isinstance(content, str) and not isinstance(content, dict):
+            raise TypeError(f"Invalid message content: {type(content).__name__}")
+
+        message = {
+            "role": role,
+            "content": self._dedent(content) if isinstance(content, str) else content,
+        }
+        if name is not None:
+            message["name"] = name
+
+        self.messages.append(message)
+        return self
+
+    def system(self, content: str, name: Optional[str] = None) -> "Convo":
+        """
+        Add a system message to the conversation.
+
+        System messages can use `name` for showing example conversations
+        between an example user and an example assistant.
+
+        :param content: Content of the message.
+        :param name: Name of the message sender (optional).
+        :return: The convo object.
+        """
+        return self.add("system", content, name)
+
+    def user(self, content: str, name: Optional[str] = None) -> "Convo":
+        """
+        Add a user message to the conversation.
+
+        :param content: Content of the message.
+        :param name: User name (optional).
+        :return: The convo object.
+        """
+        return self.add("user", content, name)
+
+    def assistant(self, content: str, name: Optional[str] = None) -> "Convo":
+        """
+        Add an assistant message to the conversation.
+
+        :param content: Content of the message.
+        :param name: Assistant name (optional).
+        :return: The convo object.
+        """
+        return self.add("assistant", content, name)
+
+    def function(self, content: str, name: Optional[str] = None) -> "Convo":
+        """
+        Add a function (tool) response to the conversation.
+
+        :param content: Content of the message.
+        :param name: Function/tool name (optional).
+        :return: The convo object.
+        """
+        return self.add("function", content, name)
+
+    def fork(self) -> "Convo":
+        """
+        Create an identical copy of the conversation.
+
+        This performs a deep copy of all the message
+        contents, so you can safely modify both the
+        parent and the child conversation.
+
+        :return: A copy of the conversation.
+        """
+        child = Convo()
+        child.messages = deepcopy(self.messages)
+        return child
+
+    def after(self, parent: "Convo") -> "Convo":
+        """
+        Create a chat with only messages after the last common
+        message (that appears in both parent conversation and
+        this one).
+
+        :param parent: Parent conversation.
+        :return: A new conversation with only new messages.
+        """
+        index = 0
+        while index < min(len(self.messages), len(parent.messages)) and self.messages[index] == parent.messages[index]:
+            index += 1
+
+        child = Convo()
+        child.messages = [deepcopy(msg) for msg in self.messages[index:]]
+        return child
+
+    def last(self) -> Optional[dict[str, str]]:
+        """
+        Get the last message in the conversation.
+
+        :return: The last message, or None if the conversation is empty.
+        """
+        return self.messages[-1] if self.messages else None
+
+    def __iter__(self) -> Iterator[dict[str, str]]:
+        """
+        Iterate over the messages in the conversation.
+
+        :return: An iterator over the messages.
+        """
+        return iter(self.messages)
+
+    def __repr__(self) -> str:
+        return f"<Convo({self.messages})>"
+
+
+__all__ = ["Convo"]
diff --git a/core/llm/groq_client.py b/core/llm/groq_client.py
new file mode 100644
index 00000000..2021abdc
--- /dev/null
+++ b/core/llm/groq_client.py
@@ -0,0 +1,93 @@
+import datetime
+from typing import Optional
+
+import tiktoken
+from groq import AsyncGroq, RateLimitError
+from httpx import Timeout
+
+from core.config import LLMProvider
+from core.llm.base import BaseLLMClient
+from core.llm.convo import Convo
+from core.log import get_logger
+
+log = get_logger(__name__)
+tokenizer = tiktoken.get_encoding("cl100k_base")
+
+
+class GroqClient(BaseLLMClient):
+    provider = LLMProvider.GROQ
+
+    def _init_client(self):
+        self.client = AsyncGroq(
+            api_key=self.config.api_key,
+            base_url=self.config.base_url,
+            timeout=Timeout(
+                max(self.config.connect_timeout, self.config.read_timeout),
+                connect=self.config.connect_timeout,
+                read=self.config.read_timeout,
+            ),
+        )
+
+    async def _make_request(
+        self,
+        convo: Convo,
+        temperature: Optional[float] = None,
+        json_mode: bool = False,
+    ) -> tuple[str, int, int]:
+        completion_kwargs = {
+            "model": self.config.model,
+            "messages": convo.messages,
+            "temperature": self.config.temperature if temperature is None else temperature,
+            "stream": True,
+        }
+        if json_mode:
+            completion_kwargs["response_format"] = {"type": "json_object"}
+
+        stream = await self.client.chat.completions.create(**completion_kwargs)
+        response = []
+        prompt_tokens = 0
+        completion_tokens = 0
+
+        async for chunk in stream:
+            if not chunk.choices:
+                continue
+
+            content = chunk.choices[0].delta.content
+            if not content:
+                continue
+
+            response.append(content)
+            if self.stream_handler:
+                await self.stream_handler(content)
+
+        response_str = "".join(response)
+
+        # Tell the stream handler we're done
+        if self.stream_handler:
+            await self.stream_handler(None)
+
+        if prompt_tokens == 0 and completion_tokens == 0:
+            # FIXME: Here we estimate Groq tokens using the same method as for OpenAI....
+            # See https://cookbook.openai.com/examples/how_to_count_tokens_with_tiktoken
+            prompt_tokens = sum(3 + len(tokenizer.encode(msg["content"])) for msg in convo.messages)
+            completion_tokens = len(tokenizer.encode(response_str))
+
+        return response_str, prompt_tokens, completion_tokens
+
+    def rate_limit_sleep(self, err: RateLimitError) -> Optional[datetime.timedelta]:
+        """
+        Groq rate limits docs: https://console.groq.com/docs/rate-limits
+
+        Groq includes `retry-after` header when 429 RateLimitError is
+        thrown, so we use that instead of calculating our own backoff time.
+        """
+
+        headers = err.response.headers
+        if "retry-after" not in headers:
+            return None
+
+        retry_after = int(err.response.headers["retry-after"])
+        return datetime.timedelta(seconds=retry_after)
+
+
+__all__ = ["GroqClient"]
diff --git a/core/llm/openai_client.py b/core/llm/openai_client.py
new file mode 100644
index 00000000..a58f92d6
--- /dev/null
+++ b/core/llm/openai_client.py
@@ -0,0 +1,116 @@
+import datetime
+import re
+from typing import Optional
+
+import tiktoken
+from httpx import Timeout
+from openai import AsyncOpenAI, RateLimitError
+
+from core.config import LLMProvider
+from core.llm.base import BaseLLMClient
+from core.llm.convo import Convo
+from core.log import get_logger
+
+log = get_logger(__name__)
+tokenizer = tiktoken.get_encoding("cl100k_base")
+
+
+class OpenAIClient(BaseLLMClient):
+    provider = LLMProvider.OPENAI
+
+    def _init_client(self):
+        self.client = AsyncOpenAI(
+            api_key=self.config.api_key,
+            base_url=self.config.base_url,
+            timeout=Timeout(
+                max(self.config.connect_timeout, self.config.read_timeout),
+                connect=self.config.connect_timeout,
+                read=self.config.read_timeout,
+            ),
+        )
+
+    async def _make_request(
+        self,
+        convo: Convo,
+        temperature: Optional[float] = None,
+        json_mode: bool = False,
+    ) -> tuple[str, int, int]:
+        completion_kwargs = {
+            "model": self.config.model,
+            "messages": convo.messages,
+            "temperature": self.config.temperature if temperature is None else temperature,
+            "stream": True,
+            "stream_options": {
+                "include_usage": True,
+            },
+        }
+        if json_mode:
+            completion_kwargs["response_format"] = {"type": "json_object"}
+
+        stream = await self.client.chat.completions.create(**completion_kwargs)
+        response = []
+        prompt_tokens = 0
+        completion_tokens = 0
+
+        async for chunk in stream:
+            if chunk.usage:
+                prompt_tokens += chunk.usage.prompt_tokens
+                completion_tokens += chunk.usage.completion_tokens
+
+            if not chunk.choices:
+                continue
+
+            content = chunk.choices[0].delta.content
+            if not content:
+                continue
+
+            response.append(content)
+            if self.stream_handler:
+                await self.stream_handler(content)
+
+        response_str = "".join(response)
+
+        # Tell the stream handler we're done
+        if self.stream_handler:
+            await self.stream_handler(None)
+
+        if prompt_tokens == 0 and completion_tokens == 0:
+            # See https://cookbook.openai.com/examples/how_to_count_tokens_with_tiktoken
+            prompt_tokens = sum(3 + len(tokenizer.encode(msg["content"])) for msg in convo.messages)
+            completion_tokens = len(tokenizer.encode(response_str))
+            log.warning(
+                "OpenAI response did not include token counts, estimating with tiktoken: "
+                f"{prompt_tokens} input tokens, {completion_tokens} output tokens"
+            )
+
+        return response_str, prompt_tokens, completion_tokens
+
+    def rate_limit_sleep(self, err: RateLimitError) -> Optional[datetime.timedelta]:
+        """
+        OpenAI rate limits docs:
+        https://platform.openai.com/docs/guides/rate-limits/error-mitigation
+        Limit reset times are in "2h32m54s" format.
+        """
+
+        headers = err.response.headers
+        if "x-ratelimit-remaining-tokens" not in headers:
+            return None
+
+        remaining_tokens = headers["x-ratelimit-remaining-tokens"]
+        time_regex = r"(?:(\d+)h)?(?:(\d+)m)?(?:(\d+)s)?"
+        if remaining_tokens == 0:
+            match = re.search(time_regex, headers["x-ratelimit-reset-tokens"])
+        else:
+            match = re.search(time_regex, headers["x-ratelimit-reset-requests"])
+
+        if match:
+            seconds = int(match.group(1)) * 3600 + int(match.group(2)) * 60 + int(match.group(3))
+        else:
+            # Not sure how this would happen, we would have to get a RateLimitError,
+            # but nothing (or invalid entry) in the `reset` field. Using a sane default.
+            seconds = 5
+
+        return datetime.timedelta(seconds=seconds)
+
+
+__all__ = ["OpenAIClient"]
diff --git a/core/llm/parser.py b/core/llm/parser.py
new file mode 100644
index 00000000..86cfcb6f
--- /dev/null
+++ b/core/llm/parser.py
@@ -0,0 +1,161 @@
+import json
+import re
+from enum import Enum
+from typing import Optional, Union
+
+from pydantic import BaseModel, ValidationError
+
+
+class MultiCodeBlockParser:
+    """
+    Parse multiple Markdown code blocks from a string.
+
+    Expects zero or more blocks, and ignores any text
+    outside of the code blocks.
+
+    Example usage:
+
+    >>> parser = MultiCodeBlockParser()
+    >>> text = '''
+    ... text outside block
+    ...
+    ... ```python
+    ... first block
+    ... ```
+    ... some text between blocks
+    ... ```js
+    ... more
+    ... code
+    ... ```
+    ... some text after blocks
+    '''
+    >>> assert parser(text) == ["first block", "more\ncode"]
+
+    If no code blocks are found, an empty list is returned:
+    """
+
+    def __init__(self):
+        # FIXME: ``` should be the only content on the line`
+        self.pattern = re.compile(r"```([a-z0-9]+\n)?(.*?)```\s*", re.DOTALL)
+
+    def __call__(self, text: str) -> list[str]:
+        blocks = []
+        for block in self.pattern.findall(text):
+            blocks.append(block[1].strip())
+        return blocks
+
+
+class CodeBlockParser(MultiCodeBlockParser):
+    """
+    Parse a Markdown code block from a string.
+
+    Expects exactly one code block, and ignores
+    any text before or after it.
+
+    Usage:
+    >>> parser = CodeBlockParser()
+    >>> text = "text\n```py\ncodeblock\n'''\nmore text"
+    >>> assert parser(text) == "codeblock"
+
+    This is a special case of MultiCodeBlockParser,
+    checking that there's exactly one block.
+    """
+
+    def __call__(self, text: str) -> str:
+        blocks = super().__call__(text)
+        # FIXME: if there are more than 1 code block, this means the output actually contains ```,
+        # so re-parse this with that in mind
+        if len(blocks) != 1:
+            raise ValueError(f"Expected a single code block, got {len(blocks)}")
+        return blocks[0]
+
+
+class OptionalCodeBlockParser:
+    def __call__(self, text: str) -> str:
+        text = text.strip()
+        if text.startswith("```") and text.endswith("\n```"):
+            # Remove the first and last line. Note the first line may include syntax
+            # highlighting, so we can't just remove the first 3 characters.
+            text = "\n".join(text.splitlines()[1:-1]).strip()
+        return text
+
+
+class JSONParser:
+    def __init__(self, spec: Optional[BaseModel] = None, strict: bool = True):
+        self.spec = spec
+        self.strict = strict or (spec is not None)
+
+    @property
+    def schema(self):
+        return self.spec.model_json_schema() if self.spec else None
+
+    @staticmethod
+    def errors_to_markdown(errors: list) -> str:
+        error_txt = []
+        for error in errors:
+            loc = ".".join(str(loc) for loc in error["loc"])
+            etype = error["type"]
+            msg = error["msg"]
+            error_txt.append(f"- `{loc}`: {etype} ({msg})")
+        return "\n".join(error_txt)
+
+    def __call__(self, text: str) -> Union[BaseModel, dict, None]:
+        text = text.strip()
+        if text.startswith("```"):
+            try:
+                text = CodeBlockParser()(text)
+            except ValueError:
+                if self.strict:
+                    raise
+                else:
+                    return None
+
+        try:
+            data = json.loads(text.strip())
+        except json.JSONDecodeError as e:
+            if self.strict:
+                raise ValueError(f"JSON is not valid: {e}") from e
+            else:
+                return None
+        if self.spec is None:
+            return data
+
+        try:
+            model = self.spec(**data)
+        except ValidationError as err:
+            errtxt = self.errors_to_markdown(err.errors())
+            raise ValueError(f"Invalid JSON format:\n{errtxt}") from err
+        except Exception as err:
+            raise ValueError(f"Error parsing JSON: {err}") from err
+
+        return model
+
+
+class EnumParser:
+    def __init__(self, spec: Enum, ignore_case: bool = True):
+        self.spec = spec
+        self.ignore_case = ignore_case
+
+    def __call__(self, text: str) -> Enum:
+        text = text.strip()
+        if self.ignore_case:
+            text = text.lower()
+        try:
+            return self.spec(text)
+        except ValueError as e:
+            options = ", ".join([str(v) for v in self.spec])
+            raise ValueError(f"Invalid option '{text}'; valid options: {options}") from e
+
+
+class StringParser:
+    def __call__(self, text: str) -> str:
+        # Strip any leading and trailing whitespace
+        text = text.strip()
+
+        # Check and remove quotes at the start and end if they match
+        if text.startswith(("'", '"')) and text.endswith(("'", '"')) and len(text) > 1:
+            # Remove the first and last character if they are both quotes
+            if text[0] == text[-1]:
+                text = text[1:-1]
+
+        return text
diff --git a/core/llm/prompt.py b/core/llm/prompt.py
new file mode 100644
index 00000000..03f938e0
--- /dev/null
+++ b/core/llm/prompt.py
@@ -0,0 +1,48 @@
+from os.path import isdir
+from typing import Any, Optional
+
+from jinja2 import BaseLoader, Environment, FileSystemLoader, StrictUndefined, TemplateNotFound
+
+
+class FormatTemplate:
+    def __call__(self, template: str, **kwargs: dict[str, Any]) -> str:
+        return template.format(**kwargs)
+
+
+class BaseJinjaTemplate:
+    def __init__(self, loader: Optional[BaseLoader]):
+        self.env = Environment(
+            loader=loader,
+            autoescape=False,
+            lstrip_blocks=True,
+            trim_blocks=True,
+            keep_trailing_newline=True,
+            undefined=StrictUndefined,
+        )
+
+
+class JinjaStringTemplate(BaseJinjaTemplate):
+    def __init__(self):
+        super().__init__(None)
+
+    def __call__(self, template: str, **kwargs: dict[str, Any]) -> str:
+        tpl = self.env.from_string(template)
+        return tpl.render(**kwargs)
+
+
+class JinjaFileTemplate(BaseJinjaTemplate):
+    def __init__(self, template_dirs: list[str]):
+        for td in template_dirs:
+            if not isdir(td):
+                raise ValueError(f"Template directory does not exist: {td}")
+        super().__init__(FileSystemLoader(template_dirs))
+
+    def __call__(self, template: str, **kwargs: dict[str, Any]) -> str:
+        try:
+            tpl = self.env.get_template(template)
+        except TemplateNotFound as err:
+            raise ValueError(f"Template not found: {template}") from err
+        return tpl.render(**kwargs)
+
+
+__all__ = ["FormatTemplate", "JinjaStringTemplate", "JinjaFileTemplate"]
diff --git a/core/llm/request_log.py b/core/llm/request_log.py
new file mode 100644
index 00000000..69703fde
--- /dev/null
+++ b/core/llm/request_log.py
@@ -0,0 +1,28 @@
+from datetime import datetime
+from enum import Enum
+
+from pydantic import BaseModel, Field
+
+from core.config import LLMProvider
+
+
+class LLMRequestStatus(str, Enum):
+    SUCCESS = "success"
+    ERROR = "error"
+
+
+class LLMRequestLog(BaseModel):
+    provider: LLMProvider
+    model: str
+    temperature: float
+    messages: list[dict[str, str]] = Field(default_factory=list)
+    response: str = ""
+    prompt_tokens: int = 0
+    completion_tokens: int = 0
+    started_at: datetime = Field(default_factory=datetime.now)
+    duration: float = 0.0
+    status: LLMRequestStatus = LLMRequestStatus.SUCCESS
+    error: str = ""
+
+
+__all__ = ["LLMRequestLog", "LLMRequestStatus"]
diff --git a/core/log/__init__.py b/core/log/__init__.py
new file mode 100644
index 00000000..73164051
--- /dev/null
+++ b/core/log/__init__.py
@@ -0,0 +1,50 @@
+from logging import FileHandler, Formatter, Logger, StreamHandler, getLogger
+
+from core.config import LogConfig
+
+
+def setup(config: LogConfig, force: bool = False):
+    """
+    Set up logging based on the current configuration.
+
+    The method is idempotent unless `force` is set to True,
+    in which case it will reconfigure the logging.
+    """
+
+    root = getLogger()
+    logger = getLogger("pythagora")
+    # Only clear/remove existing log handlers if we're forcing a new setup
+    if not force and (root.handlers or logger.handlers):
+        return
+
+    while force and root.handlers:
+        root.removeHandler(root.handlers[0])
+
+    while force and logger.handlers:
+        logger.removeHandler(logger.handlers[0])
+
+    level = config.level
+    formatter = Formatter(config.format)
+
+    if config.output:
+        handler = FileHandler(config.output, encoding="utf-8")
+    else:
+        handler = StreamHandler()
+
+    handler.setFormatter(formatter)
+    handler.setLevel(level)
+
+    logger.setLevel(level)
+    logger.addHandler(handler)
+
+
+def get_logger(name) -> Logger:
+    """
+    Get log function for a given (module) name
+
+    :return: Logger instance
+    """
+    return getLogger(name)
+
+
+__all__ = ["setup", "get_logger"]
diff --git a/core/proc/__init__.py b/core/proc/__init__.py
new file mode 100644
index 00000000..e69de29b
diff --git a/core/proc/exec_log.py b/core/proc/exec_log.py
new file mode 100644
index 00000000..258dd317
--- /dev/null
+++ b/core/proc/exec_log.py
@@ -0,0 +1,21 @@
+from datetime import datetime
+from typing import Optional
+
+from pydantic import BaseModel, Field
+
+
+class ExecLog(BaseModel):
+    started_at: datetime = Field(default_factory=datetime.now)
+    duration: float = Field(description="The duration of the command/process run in seconds")
+    cmd: str = Field(description="The full command (as executed in the shell)")
+    cwd: str = Field(description="The working directory for the command (relative to project root)")
+    env: dict = Field(description="The environment variables for the command")
+    timeout: Optional[float] = Field(description="The command timeout in seconds (or None if no timeout)")
+    status_code: Optional[int] = Field(description="The command return code, or None if there was a timeout")
+    stdout: str = Field(description="The command standard output")
+    stderr: str = Field(description="The command standard error")
+    analysis: str = Field(description="The result analysis as performed by the LLM")
+    success: bool = Field(description="Whether the command was successful")
+
+
+__all__ = ["ExecLog"]
diff --git a/core/proc/process_manager.py b/core/proc/process_manager.py
new file mode 100644
index 00000000..990724d1
--- /dev/null
+++ b/core/proc/process_manager.py
@@ -0,0 +1,278 @@
+import asyncio
+import signal
+import sys
+import time
+from dataclasses import dataclass
+from os import getenv
+from os.path import abspath, join
+from typing import Callable, Optional
+from uuid import UUID, uuid4
+
+import psutil
+
+from core.log import get_logger
+
+log = get_logger(__name__)
+
+NONBLOCK_READ_TIMEOUT = 0.01
+BUSY_WAIT_INTERVAL = 0.1
+WATCHER_IDLE_INTERVAL = 1.0
+MAX_COMMAND_TIMEOUT = 180
+
+
+@dataclass
+class LocalProcess:
+    id: UUID
+    cmd: str
+    cwd: str
+    env: dict[str, str]
+    stdout: str
+    stderr: str
+    _process: asyncio.subprocess.Process
+
+    def __hash__(self) -> int:
+        return hash(self.id)
+
+    @staticmethod
+    async def start(
+        cmd: str,
+        *,
+        cwd: str = ".",
+        env: dict[str, str],
+        bg: bool = False,
+    ) -> "LocalProcess":
+        log.debug(f"Starting process: {cmd} (cwd={cwd}, env={env})")
+        _process = await asyncio.create_subprocess_shell(
+            cmd,
+            cwd=cwd,
+            env=env,
+            start_new_session=bg,
+            stdin=asyncio.subprocess.PIPE,
+            stdout=asyncio.subprocess.PIPE,
+            stderr=asyncio.subprocess.PIPE,
+        )
+        if bg:
+            _process.stdin.close()
+
+        return LocalProcess(
+            id=uuid4(),
+            cmd=cmd,
+            cwd=cwd,
+            env=env,
+            stdout="",
+            stderr="",
+            _process=_process,
+        )
+
+    async def wait(self, timeout: Optional[float] = None) -> int:
+        try:
+            future = self._process.wait()
+            if timeout:
+                future = asyncio.wait_for(future, timeout)
+            retcode = await future
+        except asyncio.TimeoutError:
+            log.debug(f"Process {self.cmd} still running after {timeout}s, terminating")
+            await self.terminate()
+            # FIXME: this may still hang if we don't manage to kill the process.
+            retcode = await self._process.wait()
+
+        await self.read_output()
+        return retcode
+
+    @staticmethod
+    async def _nonblock_read(reader: asyncio.StreamReader, timeout: float) -> str:
+        """
+        Reads data from a stream reader without blocking (for long).
+
+        This wraps the read in a (short) timeout to avoid blocking the event loop for too long.
+
+        :param reader: Async stream reader to read from.
+        :param timeout: Timeout for the read operation (should not be too long).
+        :return: Data read from the stream reader, or empty string.
+        """
+        try:
+            data = await asyncio.wait_for(reader.read(), timeout)
+            return data.decode("utf-8", errors="ignore")
+        except asyncio.TimeoutError:
+            return ""
+
+    async def read_output(self, timeout: float = NONBLOCK_READ_TIMEOUT) -> tuple[str, str]:
+        new_stdout = await self._nonblock_read(self._process.stdout, timeout)
+        new_stderr = await self._nonblock_read(self._process.stderr, timeout)
+        self.stdout += new_stdout
+        self.stderr += new_stderr
+        return (new_stdout, new_stderr)
+
+    async def _terminate_process_tree(self, signal: int):
+        # This is a recursive function that terminates the entire process tree
+        # of the current process. It first terminates all child processes, then
+        # terminates itself.
+        shell_process = psutil.Process(self._process.pid)
+        processes = shell_process.children(recursive=True)
+        processes.append(shell_process)
+        for proc in processes:
+            try:
+                proc.send_signal(signal)
+            except psutil.NoSuchProcess:
+                pass
+
+        psutil.wait_procs(processes, timeout=1)
+
+    async def terminate(self, kill: bool = True):
+        if kill and sys.platform != "win32":
+            await self._terminate_process_tree(signal.SIGKILL)
+        else:
+            # Windows doesn't have SIGKILL
+            await self._terminate_process_tree(signal.SIGTERM)
+
+    @property
+    def is_running(self) -> bool:
+        try:
+            return psutil.Process(self._process.pid).is_running()
+        except psutil.NoSuchProcess:
+            return False
+
+    @property
+    def pid(self) -> int:
+        return self._process.pid
+
+
+class ProcessManager:
+    def __init__(
+        self,
+        *,
+        root_dir: str,
+        env: Optional[dict[str, str]] = None,
+        output_handler: Optional[Callable] = None,
+        exit_handler: Optional[Callable] = None,
+    ):
+        if env is None:
+            env = {
+                "PATH": getenv("PATH"),
+            }
+        self.processes: dict[UUID, LocalProcess] = {}
+        self.default_env = env
+        self.root_dir = root_dir
+        self.watcher_should_run = True
+        self.watcher_task = asyncio.create_task(self.watcher())
+        self.output_handler = output_handler
+        self.exit_handler = exit_handler
+
+    async def stop_watcher(self):
+        """
+        Stop the process watcher.
+
+        This should only be done when the ProcessManager is no longer needed.
+        """
+        if not self.watcher_should_run:
+            raise ValueError("Process watcher is not running")
+
+        self.watcher_should_run = False
+        await self.watcher_task
+
+    async def watcher(self):
+        """
+        Watch over the processes and manage their output and lifecycle.
+
+        This is a separate coroutine running independently of the caller
+        coroutine.
+        """
+        # IDs of processes whos output has been fully read after they finished
+        complete_processes = set()
+
+        while self.watcher_should_run:
+            procs = [p for p in self.processes.values() if p.id not in complete_processes]
+            if len(procs) == 0:
+                await asyncio.sleep(WATCHER_IDLE_INTERVAL)
+                continue
+
+            for process in procs:
+                out, err = await process.read_output()
+                if self.output_handler and (out or err):
+                    await self.output_handler(out, err)
+
+                if not process.is_running:
+                    # We're not removing the complete process from the self.processes
+                    # list to give time to the rest of the system to read its outputs
+                    complete_processes.add(process.id)
+                    if self.exit_handler:
+                        await self.exit_handler(process)
+
+            # Sleep a bit to avoid busy-waiting
+            await asyncio.sleep(BUSY_WAIT_INTERVAL)
+
+    async def start_process(
+        self,
+        cmd: str,
+        *,
+        cwd: str = ".",
+        env: Optional[dict[str, str]] = None,
+        bg: bool = True,
+    ) -> LocalProcess:
+        env = {**self.default_env, **(env or {})}
+        abs_cwd = abspath(join(self.root_dir, cwd))
+        process = await LocalProcess.start(cmd, cwd=abs_cwd, env=env, bg=bg)
+        if bg:
+            self.processes[process.id] = process
+        return process
+
+    async def run_command(
+        self,
+        cmd: str,
+        *,
+        cwd: str = ".",
+        env: Optional[dict[str, str]] = None,
+        timeout: float = MAX_COMMAND_TIMEOUT,
+    ) -> tuple[Optional[int], str, str]:
+        """
+        Run command and wait for it to finish.
+
+        Status code is an integer representing the process exit code, or
+        None if the process timed out and was terminated.
+
+        :param cmd: Command to run.
+        :param cwd: Working directory.
+        :param env: Environment variables.
+        :param timeout: Timeout in seconds.
+        :return: Tuple of (status code, stdout, stderr).
+        """
+        timeout = min(timeout, MAX_COMMAND_TIMEOUT)
+        terminated = False
+        process = await self.start_process(cmd, cwd=cwd, env=env, bg=False)
+
+        t0 = time.time()
+        while process.is_running and (time.time() - t0) < timeout:
+            out, err = await process.read_output(BUSY_WAIT_INTERVAL)
+            if self.output_handler and (out or err):
+                await self.output_handler(out, err)
+
+        if process.is_running:
+            log.debug(f"Process {cmd} still running after {timeout}s, terminating")
+            await process.terminate()
+            terminated = True
+        else:
+            await process.wait()
+
+        out, err = await process.read_output()
+        if self.output_handler and (out or err):
+            await self.output_handler(out, err)
+
+        if terminated:
+            status_code = None
+        else:
+            status_code = process._process.returncode or 0
+
+        return (status_code, process.stdout, process.stderr)
+
+    def list_running_processes(self):
+        return [p for p in self.processes.values() if p.is_running]
+
+    async def terminate_process(self, process_id: UUID) -> tuple[str, str]:
+        if process_id not in self.processes:
+            raise ValueError(f"Process {process_id} not found")
+
+        process = self.processes[process_id]
+        await process.terminate(kill=False)
+        del self.processes[process_id]
+
+        return (process.stdout, process.stderr)
diff --git a/core/prompts/architect/technologies.prompt b/core/prompts/architect/technologies.prompt
new file mode 100644
index 00000000..6b9479ca
--- /dev/null
+++ b/core/prompts/architect/technologies.prompt
@@ -0,0 +1,68 @@
+You're designing the architecture and technical specifications for a new project.
+
+If the project requirements call out for specific technology, use that. Otherwise, if working on a web app, prefer Node.js for the backend (with Express if a web server is needed, and MongoDB if a database is needed), and Bootstrap for the front-end. You MUST NOT use Docker, Kubernetes, microservices and single-page app frameworks like React, Next.js, Angular, Vue or Svelte unless the project details explicitly require it.
+
+Here are the details for the new project:
+-----------------------------
+{% include "partials/project_details.prompt" %}
+{% include "partials/features_list.prompt" %}
+-----------------------------
+
+Based on these details, think step by step to design the architecture for the project and choose technologies to use in building it.
+
+1. First, design and describe project architecture in general terms
+2. Then, list any system dependencies that should be installed on the system prior to start of development.  For each system depedency, output a {{ os }} command to check whether it's installed.
+3. Finally, list any other 3rd party packages or libraries that will be used (that will be installed later using packager a package manager in the project repository/environment).
+4. {% if templates %}Optionally, choose a project starter template.{% else %}(for this project there are no available starter/boilerplate templates, so there's no template to choose){% endif %}
+
+{% if templates %}
+You have an option to use a project template that implements standard boilerplate/scaffolding so you can start faster and be more productive. To be considered, a template must be compatible with the architecture and technologies you've choosen (it doesn't need to implement everything that will be used in the project, just a useful subset). If multiple templates can be considered, pick one that's the best match.
+
+If no project templates are a good match, don't pick any! It's better to start from scratch than to use a template that is not a good fit for the project and then spend time reworking it to fit the requirements.
+
+Here are the available project templates:
+{% for name, tpl in templates.items() %}
+### {{ name }}
+{{ tpl.description }}
+
+Contains:
+{{ tpl.summary }}
+{% endfor %}
+{% endif %}
+
+*IMPORTANT*: You must follow these rules while creating your project:
+
+* You must only list *system* dependencies, ie. the ones that need to be installed (typically as admin) to set up the programming language, database, etc. Any packages that will need to be installed via language/platform-specific package managers are *not* system dependencies.
+* If there are several popular options (such as Nginx or Apache for web server), pick one that would be more suitable for the app in question.
+* DO NOT include text editors, IDEs, shells, OpenSSL, CLI tools such as git, AWS, or Stripe clients, or other utilities in your list. only direct dependencies required to build and run the project.
+* If a dependency (such as database) has a cloud alternative or can be installed on another computer (ie. isn't required on this computer), you must mark it as `required_locally: false`
+
+Output only your response in JSON format like in this example, without other commentary:
+```json
+{
+    "architecture": "Detailed description of the architecture of the application",
+    "system_dependencies": [
+        {
+            "name": "Node.js",
+            "description": "JavaScript runtime for building apps. This is required to be able to run the app you're building.",
+            "test": "node --version",
+            "required_locally": true
+        },
+        {
+            "name": "MongoDB",
+            "description": "NoSQL database. If you don't want to install MongoDB locally, you can use a cloud version such as MongoDB Atlas.",
+            "test": "mongosh --version",
+            "required_locally": false
+        },
+        ...
+    ],
+    "package_dependencies": [
+        {
+            "name": "express",
+            "description": "Express web server for Node"
+        },
+        ...
+    ],
+    "template": "name of the project template to use" // or null if you decide not to use a project template
+}
+```
diff --git a/core/prompts/code-monkey/breakdown.prompt b/core/prompts/code-monkey/breakdown.prompt
new file mode 100644
index 00000000..e5bf532f
--- /dev/null
+++ b/core/prompts/code-monkey/breakdown.prompt
@@ -0,0 +1,2 @@
+{# This is the same template as for Developer's breakdown because Code Monkey is reusing it in a conversation #}
+{% extends "developer/breakdown.prompt" %}
diff --git a/core/prompts/code-monkey/describe_file.prompt b/core/prompts/code-monkey/describe_file.prompt
new file mode 100644
index 00000000..36ddc63f
--- /dev/null
+++ b/core/prompts/code-monkey/describe_file.prompt
@@ -0,0 +1,26 @@
+Your task is to explain the functionality implemented by a particular source code file.
+
+Given a file path and file contents, your output should contain:
+
+* a detailed explanation of what the file is about;
+* a list of all other files referenced (imported) from this file. note that some libraries, frameworks or libraries assume file extension and don't use it explicitly. For example, "import foo" in Python references "foo.py" without specifying the extension. In your response, use the complete file name including the implied extension (for example "foo.py", not just "foo").
+
+Please analyze file `{{ path }}`, which contains the following content:
+```
+{{ content }}
+```
+
+Output the result in a JSON format with the following structure, as in this example:
+
+Example:
+{
+    "summary": "Describe in detail the functionality being defind o implemented in this file. Be as detailed as possible",
+    "references": [
+        "some/file.py",
+        "some/other/file.js"
+    ],
+}
+
+**IMPORTANT** In references, only include references to files that are local to the project. Do not include standard libraries or well-known external dependencies.
+
+Your response must be a valid JSON document, following the example format. Do not add any extra explanation or commentary outside the JSON document.
diff --git a/core/prompts/code-monkey/implement_changes.prompt b/core/prompts/code-monkey/implement_changes.prompt
new file mode 100644
index 00000000..b3c5593b
--- /dev/null
+++ b/core/prompts/code-monkey/implement_changes.prompt
@@ -0,0 +1,56 @@
+{% if rework_feedback is defined %}
+You previously made changes to file `{{ file_name }}`, according to the instructions described in the previous message.
+The reviewer accepted some of your changes, and the file now looks like this:
+```
+{{ file_content }}
+```
+{% elif file_content %}
+I need to modify file `{{ file_name }}` that currently looks like this:
+```
+{{ file_content }}
+```
+{% else %}
+I need to create a new file `{{ file_name }}`:
+{% endif %}
+
+**IMPORTANT**
+{% if rework_feedback is defined %}
+But not all changes were accepted, and the reviewer provided feedback on the changes that you must rework:
+{{ rework_feedback}}
+Please update the file accordingly and output the full new version of the file.
+{% else %}
+I want you to implement changes described in previous message, that starts with `{{ " ".join(instructions.split()[:5]) }}` and ends with `{{ " ".join(instructions.split()[-5:]) }}`.
+{% endif %}
+Make sure you don't make any mistakes, especially ones that could affect rest of project. Your changes will {% if rework_feedback is defined %}again {% endif %}be reviewed by very detailed reviewer. Because of that, it is extremely important that you are STRICTLY following ALL the following rules while implementing changes:
+
+**IMPORTANT** Output format
+You must output the COMPLETE NEW VERSION of this file in following format:
+-----------------------format----------------------------
+```
+the full contents of the updated file, without skipping over any content
+```
+------------------------end_of_format---------------------------
+
+**IMPORTANT** Comprehensive Codebase Insight
+It's crucial to grasp the full scope of the codebase related to your tasks to avert mistakes. Check the initial conversation message for a list of files. Pay a lot of attention to files that are directly included in the file you are currently modifying or that are importing your file.
+Consider these examples to guide your approach and thought process:
+-----------------------start_of_examples----------------------------
+- UI components or templates: Instead of placing scripts directly on specific pages, integrating them in the <head> section or as reusable partials enhances application-wide consistency and reusability.
+- Database operations: Be careful not to execute an action, like password hashing, both in a routing function and a model's pre('save') hook, which could lead to redundancy and errors.
+- Adding backend logic: Prior to creating new functions, verify if an equivalent function exists in the codebase that you could import and use, preventing unnecessary code duplication and keeping the project efficient.
+-----------------------end_of_examples----------------------------
+
+**IMPORTANT** Coding principles
+Write high-quality code, first organize it logically with clear, meaningful names for variables, functions, and classes. Aim for simplicity and adhere to the DRY (Don't Repeat Yourself) principle to avoid code duplication. Ensure your codebase is structured and modular for easy navigation and updates.
+
+**IMPORTANT** If the instructions have comments like `// ..add code here...` or `# placeholder for code`, instead of copying the comment, interpret the instructions and output the relevant code.
+
+**IMPORTANT** Your reply MUST NOT omit any code in the new implementation or substitute anything with comments like `// .. rest of the code goes here ..` or `# insert existing code here`, because I will overwrite the existing file with the content you provide. Output ONLY the content for this file, without additional explanation, suggestions or notes. Your output MUST start with ``` and MUST end with ``` and include only the complete file contents.
+
+**IMPORTANT** For hardcoded configuration values that the user needs to change, mark the line that needs user configuration with `INPUT_REQUIRED {config_description}` comment,  where `config_description` is a description of the value that needs to be set by the user. Use appropriate syntax for comments in the file you're saving (for example `// INPUT_REQUIRED {config_description}` in JavaScript). NEVER ask the user to write code or provide implementation, even if the instructions suggest it! If the file type doesn't support comments (eg JSON), don't add any.
+
+**IMPORTANT**: Logging
+Whenever you write code, make sure to log code execution so that when a developer looks at the CLI output, they can understand what is happening on the server. If the description above mentions the exact code that needs to be added but doesn't contain enough logs, you need to add the logs handlers inside that code yourself.
+
+**IMPORTANT**: Error handling
+Whenever you write code, make sure to add error handling for all edge cases you can think of because this app will be used in production so there shouldn't be any crashes. Whenever you log the error, you **MUST** log the entire error message and trace and not only the error message. If the description above mentions the exact code that needs to be added but doesn't contain enough error handlers, you need to add the error handlers inside that code yourself.
diff --git a/core/prompts/code-monkey/review_feedback.prompt b/core/prompts/code-monkey/review_feedback.prompt
new file mode 100644
index 00000000..cfb252e9
--- /dev/null
+++ b/core/prompts/code-monkey/review_feedback.prompt
@@ -0,0 +1,17 @@
+Your changes have been reviewed.
+{% if content != original_content %}
+The reviewer approved and applied some of your changes, but requested you rework the others.
+
+Here's the file with the approved changes already applied:
+```
+{{ content }}
+```
+
+Here's the reviewer's feedback:
+{% else %}
+The reviewer requested that you rework your changes, here's the feedback:
+{% endif %}
+
+{{ rework_feedback }}
+
+Based on this feedback and the original instructions, think carefully, make the correct changes, and output the entire file again. Remember, Output ONLY the content for this file, without additional explanation, suggestions or notes. Your output MUST start with ``` and MUST end with ``` and include only the complete file contents.
diff --git a/core/prompts/code-monkey/system.prompt b/core/prompts/code-monkey/system.prompt
new file mode 100644
index 00000000..3efb6569
--- /dev/null
+++ b/core/prompts/code-monkey/system.prompt
@@ -0,0 +1,3 @@
+You are a full stack software developer that works in a software development agency.
+You write modular, clean, maintainable, production-ready code.
+Your job is to implement tasks that your tech lead assigns you.
diff --git a/core/prompts/code-reviewer/breakdown.prompt b/core/prompts/code-reviewer/breakdown.prompt
new file mode 100644
index 00000000..f575d5d8
--- /dev/null
+++ b/core/prompts/code-reviewer/breakdown.prompt
@@ -0,0 +1,2 @@
+{# This is the same template as for Developer's breakdown because Code Reviewer is reusing it in a conversation #}
+{% extends "developer/breakdown.prompt" %}
diff --git a/core/prompts/code-reviewer/review_changes.prompt b/core/prompts/code-reviewer/review_changes.prompt
new file mode 100644
index 00000000..82802a05
--- /dev/null
+++ b/core/prompts/code-reviewer/review_changes.prompt
@@ -0,0 +1,29 @@
+A developer on your team has been working on the task described in previous message. Based on those instructions, the developer has made changes to file `{{ file_name }}`.
+
+Here is the original content of this file:
+```
+{{ old_content }}
+```
+
+Here is the diff of the changes:
+
+{% for hunk in hunks %}## Hunk {{ loop.index }}
+```diff
+{{ hunk }}
+```
+{% endfor %}
+
+As you can see, there {% if hunks|length == 1 %}is only one hunk in this diff, and it{% else %}are {{hunks|length}} hunks in this diff, and each{% endif %} starts with the `@@` header line.
+
+When reviewing the code changes, apply these principles to decide on each hunk:
+- Apply: Approve and integrate the hunk into our core codebase if it accurately delivers the intended functionality or enhancement, aligning with our project objectives. This action confirms the change is beneficial and meets our quality standards.
+- Ignore: Use this option sparingly, only when you're certain the entire hunk is incorrect or will introduce errors (logical, syntax, etc.) that could negatively impact the project. Ignoring means the hunk will be completely removed. This should be reserved for cases where the inclusion of the code is definitively more harmful than its absence. Emphasize careful consideration before choosing 'Ignore.' It's crucial for situations where the hunk's removal is the only option to prevent significant issues. Otherwise, 'Rework' might be the better choice to ensure the code's integrity and functionality.
+- Rework: Suggest this option if the concept behind the change is valid and necessary but is implemented in a way that introduces problems. This indicates a need for a revision of the hunk to refine its integration without fully discarding the underlying idea.
+
+When deciding what should be done with the hunk you are currently reviewing, pick an option that most reviewers of your skill would choose. Your decisions have to be consistent.
+
+Keep in mind you're just reviewing current file. You don't need to consider if other files are created, dependent packages installed, etc. Focus only on reviewing the changes in this file based on the instructions in the previous message.
+
+Note that the developer may add, modify or delete logging (including `gpt_pilot_debugging_log`) or error handling that's not explicitly asked for, but is a part of good development practice. Unless these logging and error handling additions break something, your decision to apply, ignore or rework the hunk should not be based on this. Base your decision only on functional changes - comments or logging are less important. Importantly, don't ask for a rework just because of logging or error handling changes. Also, take into account this is a junior developer and while the approach they take may not be the best practice, if it's not *wrong*, let it pass. Ask for rework only if the change is clearly bad and would break something.
+
+The developer that wrote this is sometimes sloppy and has could have deleted some parts of the code that contain important functionality and should not be deleted. Pay special attention to that in your review.
diff --git a/core/prompts/code-reviewer/system.prompt b/core/prompts/code-reviewer/system.prompt
new file mode 100644
index 00000000..17d4635a
--- /dev/null
+++ b/core/prompts/code-reviewer/system.prompt
@@ -0,0 +1,2 @@
+You are a world class full stack software developer. You write modular, clean, maintainable, production-ready code.
+Your job is to review changes implemented by your junior team members.
diff --git a/core/prompts/developer/breakdown.prompt b/core/prompts/developer/breakdown.prompt
new file mode 100644
index 00000000..31732563
--- /dev/null
+++ b/core/prompts/developer/breakdown.prompt
@@ -0,0 +1,34 @@
+You are working on an app called "{{ state.branch.project.name }}" and you need to write code for the entire {% if state.epics|length > 1 %}feature{% else %}app{% endif %} based on the tasks that the tech lead gives you. So that you understand better what you're working on, you're given other specs for "{{ state.branch.project.name }}" as well.
+
+{% include "partials/project_details.prompt" %}
+{% include "partials/features_list.prompt" %}
+{% include "partials/files_list.prompt" %}
+
+We've broken the development of this {% if state.epics|length > 1 %}feature{% else %}app{% endif %} down to these tasks:
+```
+{% for task in state.tasks %}
+{{ loop.index }}. {{ task.description }}{% if task.get("completed") %} (completed){% endif %}
+{% endfor %}
+```
+
+You are currently working on task #{{ current_task_index + 1 }} with the following description:
+```
+{{ task.description }}
+```
+{% if current_task_index != 0 %}All previous tasks are finished and you don't have to work on them.{% endif %}
+
+Now, tell me all the code that needs to be written to implement ONLY this task and have it fully working and all commands that need to be run to implement this task.
+
+**IMPORTANT**
+{%- if state.epics|length == 1 %}
+Remember, I created an empty folder where I will start writing files that you tell me and that are needed for this app.
+{% endif %}
+{% include "partials/relative_paths.prompt" %}
+DO NOT specify commands to create any folders or files, they will be created automatically - just specify the relative path to each file that needs to be written.
+
+{% include "partials/file_naming.prompt" %}
+{% include "partials/execution_order.prompt" %}
+{% include "partials/human_intervention_explanation.prompt" %}
+{% include "partials/file_size_limit.prompt" %}
+
+Never use the port 5000 to run the app, it's reserved.
diff --git a/core/prompts/developer/filter_files.prompt b/core/prompts/developer/filter_files.prompt
new file mode 100644
index 00000000..bb12cd52
--- /dev/null
+++ b/core/prompts/developer/filter_files.prompt
@@ -0,0 +1,16 @@
+We're starting work on a new task for a project we're working on.
+
+{% include "partials/project_details.prompt" %}
+{% include "partials/files_list.prompt" %}
+{% include "partials/relative_paths.prompt" %}
+
+We've broken the development of the project down to these tasks:
+```
+{% for task in state.tasks %}
+{{ loop.index }}. {{ task.description }}{% if task.get("completed") %} (completed){% endif %}
+{% endfor %}
+```
+
+The next task we need to work on is: {{ current_task.description }}
+
+Before we dive into solving this task, we need to determine which files which files from the above list are relevant to this task. Output the relevant files in a JSON list.
diff --git a/core/prompts/developer/iteration.prompt b/core/prompts/developer/iteration.prompt
new file mode 100644
index 00000000..97d7d10c
--- /dev/null
+++ b/core/prompts/developer/iteration.prompt
@@ -0,0 +1 @@
+{% extends "troubleshooter/iteration.prompt" %}
\ No newline at end of file
diff --git a/core/prompts/developer/parse_task.prompt b/core/prompts/developer/parse_task.prompt
new file mode 100644
index 00000000..5acd9570
--- /dev/null
+++ b/core/prompts/developer/parse_task.prompt
@@ -0,0 +1,43 @@
+Ok, now, take your response and convert it to a list of actionable steps that will be executed by a machine.
+Analyze the entire message, think step by step and make sure that you don't omit any information
+when converting this message to steps.
+
+Each step can be either:
+
+* `command` - command to run (must be able to run on a {{ os }} machine, assume current working directory is project root folder)
+* `save_file` - create or update ONE file
+* `human_intervention` - if you need the human to do something, use this type of step and explain in details what you want the human to do. NEVER use `human_intervention` for testing, as testing will be done separately by a dedicated QA after all the steps are done. Also you MUST NOT use `human_intervention` to ask the human to write or review code.
+
+**IMPORTANT**: If multiple changes are required for same file, you must provide single `save_file` step for each file.
+
+{% include "partials/file_naming.prompt" %}
+{% include "partials/relative_paths.prompt" %}
+{% include "partials/execution_order.prompt" %}
+{% include "partials/human_intervention_explanation.prompt" %}
+
+**IMPORTANT**: Remember, NEVER output human intervention steps to do manual tests or coding tasks, even if the previous message asks for it! The testing will be done *after* these steps and you MUST NOT include testing in these steps.
+
+Examples:
+------------------------example_1---------------------------
+```
+{
+  "tasks": [
+    {
+      "type": "save_file",
+      "save_file": {
+        "path": "server.js"
+      },
+    },
+    {
+      "type": "command",
+      "command": {
+        "command": "mv index.js public/index.js"",
+        "timeout": 5,
+        "success_message": "",
+        "command_id": "move_index_file"
+      }
+    }
+  ]
+}
+```
+------------------------end_of_example_1---------------------------
diff --git a/core/prompts/developer/system.prompt b/core/prompts/developer/system.prompt
new file mode 100644
index 00000000..3cc4d4af
--- /dev/null
+++ b/core/prompts/developer/system.prompt
@@ -0,0 +1,5 @@
+You are a world class full stack software developer working in a team.
+
+You write modular, well-organized code split across files that are not too big, so that the codebase is maintainable. You include proper error handling and logging for your clean, readable, production-level quality code.
+
+Your job is to implement tasks assigned by your tech lead, following task implementation instructions.
diff --git a/core/prompts/error-handler/debug.prompt b/core/prompts/error-handler/debug.prompt
new file mode 100644
index 00000000..28143c54
--- /dev/null
+++ b/core/prompts/error-handler/debug.prompt
@@ -0,0 +1,58 @@
+A coding task has been implemented for the new project we're working on.
+
+{% include "partials/project_details.prompt" %}
+{% include "partials/files_list.prompt" %}
+
+We've broken the development of the project down to these tasks:
+```
+{% for task in state.tasks %}
+{{ loop.index }}. {{ task.description }}{% if task.get("completed") %} (completed){% endif %}
+{% endfor %}
+```
+
+The current task is: {{ current_task.description }}
+
+Here are the detailed instructions for the current task:
+```
+{{ current_task.instructions }}
+```
+{# FIXME: the above stands in place of a previous (task breakdown) convo, and is duplicated in define_user_review_goal, review_task and debug prompts #}
+
+{% if task_steps and step_index is not none -%}
+The current task has been split into multiple steps, and each step is one of the following:
+* `command` - command to run
+* `save_file` -  create or update a file
+* `human_intervention` - if the human needs to do something
+
+{# FIXME: this is copypasted from ran_command #}
+Here is the list of all steps in in this task (steps that were already completed are marked as COMPLETED, future steps that will be executed once debugging is done are marked as FUTURE, and the current step is marked as CURRENT STEP):
+{% for step in task_steps %}
+* {% if loop.index0 < step_index %}(COMPLETED){% elif loop.index0 > step_index %}(FUTURE){% else %}(**CURRENT STEP**){% endif %} {{ step.type }}: `{% if step.type == 'command' %}{{ step.command.command }}{% elif step.type == 'save_file' %}{{ step.save_file.path }}{% endif %}`
+{% endfor %}
+
+When trying to see if command was ran successfully, take into consideration steps that were previously executed and steps that will be executed after the current step. It can happen that command seems like it failed but it will be fixed with next steps. In that case you should consider that command to be successfully executed.
+{%- endif %}
+
+I ran the command `{{ cmd }}`, and it {% if status_code is none %}timed out{% else %}exited with status code {{ status_code }}{% endif %}.
+{% if stdout %}
+Command stdout:
+```
+{{ stdout }}
+```
+{% endif %}
+{% if stderr %}
+Command stderr:
+```
+{{ stderr }}
+```
+{% endif %}
+{# end copypasted #}
+
+{{ analysis }}
+
+Based on the above, I want you to propose a step by step plan to solve the problem and continue with the the current task. I will take your plan and replace the current steps with it, so make sure it contains everything needed to complete this task AND THIS TASK ONLY.
+
+{% include "partials/file_naming.prompt" %}
+{% include "partials/execution_order.prompt" %}
+{% include "partials/human_intervention_explanation.prompt" %}
+{% include "partials/file_size_limit.prompt" %}
diff --git a/core/prompts/executor/ran_command.prompt b/core/prompts/executor/ran_command.prompt
new file mode 100644
index 00000000..da40cb36
--- /dev/null
+++ b/core/prompts/executor/ran_command.prompt
@@ -0,0 +1,56 @@
+A coding task has been implemented for the new project we're working on.
+
+{% include "partials/project_details.prompt" %}
+{% include "partials/files_list.prompt" %}
+
+We've broken the development of the project down to these tasks:
+```
+{% for task in state.tasks %}
+{{ loop.index }}. {{ task.description }}{% if task.get("completed") %} (completed){% endif %}
+{% endfor %}
+```
+
+The current task is: {{ current_task.description }}
+
+Here are the detailed instructions for the current task:
+```
+{{ current_task.instructions }}
+```
+{# FIXME: the above stands in place of a previous (task breakdown) convo, and is duplicated in define_user_review_goal and debug prompts #}
+
+{% if task_steps and step_index is not none -%}
+The current task has been split into multiple steps, and each step is one of the following:
+* `command` - command to run
+* `save_file` -  create or update a file
+* `human_intervention` - if the human needs to do something
+
+Here is the list of all steps in in this task (steps that were already completed are marked as COMPLETED, future steps that will be executed once debugging is done are marked as FUTURE, and the current step is marked as CURRENT STEP):
+{% for step in task_steps %}
+* {% if loop.index0 < step_index %}(COMPLETED){% elif loop.index0 > step_index %}(FUTURE){% else %}(**CURRENT STEP**){% endif %} {{ step.type }}: `{% if step.type == 'command' %}{{ step.command.command }}{% elif step.type == 'save_file' %}{{ step.save_file.path }}{% endif %}`
+{% endfor %}
+
+When trying to see if command was ran successfully, take into consideration steps that were previously executed and steps that will be executed after the current step. It can happen that command seems like it failed but it will be fixed with next steps. In that case you should consider that command to be successfully executed.
+{%- endif %}
+
+I ran the command `{{ cmd }}`, and it {% if status_code is none %}timed out{% else %}exited with status code {{ status_code }}{% endif %}.
+{% if stdout %}
+Command stdout:
+```
+{{ stdout }}
+```
+{% endif %}
+{% if stderr %}
+Command stderr:
+```
+{{ stderr }}
+```
+{% endif %}
+
+Think about the output and result of this command in the context of current task and current step. Provide detailed analysis of the output and determine if the command was successfully executed.
+Output your response in the following JSON format:
+```
+{
+    "analysis": "Detailed analysis of the command results. In this error the command was successfully executed because...",
+    "success": true
+}
+```
diff --git a/core/prompts/partials/execution_order.prompt b/core/prompts/partials/execution_order.prompt
new file mode 100644
index 00000000..cdf46f50
--- /dev/null
+++ b/core/prompts/partials/execution_order.prompt
@@ -0,0 +1 @@
+All the steps will be executed in order in which you give them, so it is very important that you think about all steps before you start listing them. For example, you should never code something before you install dependencies or you should never try access a file before it exists in project.
diff --git a/core/prompts/partials/features_list.prompt b/core/prompts/partials/features_list.prompt
new file mode 100644
index 00000000..ef4925e4
--- /dev/null
+++ b/core/prompts/partials/features_list.prompt
@@ -0,0 +1,16 @@
+{% if state.epics|length > 2 %}
+
+Here is the list of features that were previously implemented on top of initial high level description of "{{ state.branch.project.name }}":
+```
+{% for feature in state.epics[1:] %}
+- {{ loop.index0 }}. {{ feature.summary }}
+{% endfor %}
+```
+{% endif %}
+{% if state.epics|length > 1 %}
+
+Here is the feature that you are implementing right now:
+```
+{{ state.unfinished_epics[0].description }}
+```
+{% endif %}
diff --git a/core/prompts/partials/file_naming.prompt b/core/prompts/partials/file_naming.prompt
new file mode 100644
index 00000000..0822ffb6
--- /dev/null
+++ b/core/prompts/partials/file_naming.prompt
@@ -0,0 +1 @@
+**IMPORTANT**: When creating and naming new files, ensure the file naming (camelCase, kebab-case, underscore_case, etc) is consistent with the best practices and coding style of the language.
\ No newline at end of file
diff --git a/core/prompts/partials/file_size_limit.prompt b/core/prompts/partials/file_size_limit.prompt
new file mode 100644
index 00000000..69a7f88e
--- /dev/null
+++ b/core/prompts/partials/file_size_limit.prompt
@@ -0,0 +1,2 @@
+**IMPORTANT**
+When you think about in which file should the new code go to, always try to make files as small as possible and put code in more smaller files rather than in one big file.
diff --git a/core/prompts/partials/files_list.prompt b/core/prompts/partials/files_list.prompt
new file mode 100644
index 00000000..3dbc7093
--- /dev/null
+++ b/core/prompts/partials/files_list.prompt
@@ -0,0 +1,26 @@
+{% if state.relevant_files %}
+These files are currently implemented in the project:
+{% for file in state.files %}
+* `{{ file.path }}{% if file.meta.get("description") %}: {{file.meta.description}}{% endif %}`
+{% endfor %}
+
+Here are the complete contents of files relevant to this task:
+---START_OF_FILES---
+{% for file in state.relevant_file_objects %}
+File **`{{ file.path }}`** ({{file.content.content.splitlines()|length}} lines of code):
+```
+{{ file.content.content }}```
+
+{% endfor %}
+---END_OF_FILES---
+{% elif state.files %}
+These files are currently implemented in the project:
+---START_OF_FILES---
+{% for file in state.files %}
+**`{{ file.path }}`** ({{file.content.content.splitlines()|length}} lines of code):
+```
+{{ file.content.content }}```
+
+{% endfor %}
+---END_OF_FILES---
+{% endif %}
diff --git a/core/prompts/partials/human_intervention_explanation.prompt b/core/prompts/partials/human_intervention_explanation.prompt
new file mode 100644
index 00000000..64e775bf
--- /dev/null
+++ b/core/prompts/partials/human_intervention_explanation.prompt
@@ -0,0 +1,38 @@
+**IMPORTANT**
+You must not tell me to run a command in the database or anything OS related - only if some dependencies need to be installed. If there is a need to run an OS related command, specifically tell me that this should be labeled as "Human Intervention" and explain what the human needs to do.
+Avoid using "Human Intervention" if possible. You should NOT use "Human Intervention" for anything else than steps that you can't execute. Also, you must not use "Human Intervention" to ask user to test that the application works, because this will be done separately after all the steps are finished - no need to ask the user now.
+
+Here are a few examples when and how to use "Human Intervention":
+------------------------start_of_example_1---------------------------
+Here is an example of good response for the situation where it seems like 3rd party API, in this case Facebook, is not working:
+
+* "Human Intervention"
+"1. Check latest Facebook API documentation for updates on endpoints, parameters, or authentication.
+2. Verify Facebook API key/authentication and request format to ensure they are current and correctly implemented.
+3. Use REST client tools like Postman or cURL to directly test the Facebook API endpoints.
+4. Check the Facebook API's status page for any reported downtime or service issues.
+5. Try calling the Facebook API from a different environment to isolate the issue."
+------------------------end_of_example_1---------------------------
+
+------------------------start_of_example_2---------------------------
+Here is an example of good response for the situation where the user needs to enable some settings in their Gmail account:
+
+* "Human Intervention"
+"To enable sending emails from your Node.js app via your Gmail, account, you need to do the following:
+1. Log in to your Gmail account.
+2. Go to 'Manage your Google Account' > Security.
+3. Scroll down to 'Less secure app access' and turn it on.
+4. Under 'Signing in to Google', select 'App Passwords'. (You may need to sign in again)
+5. At the bottom, click 'Select app' and choose the app you’re using.
+6. Click 'Generate'.
+Then, use your gmail address and the password generated in the step #6 and put it into the .env file."
+------------------------end_of_example_2---------------------------
+
+------------------------start_of_example_3---------------------------
+Here is an example when there are issues with writing to the MongoDB connection:
+
+* "Human Intervention"
+"1. Verify the MongoDB credentials provided have write permissions, not just read-only access.
+2. Confirm correct database and collection names are used when connecting to database.
+3. Update credentials if necessary to include insert document permissions."
+------------------------end_of_example_3---------------------------
diff --git a/core/prompts/partials/project_details.prompt b/core/prompts/partials/project_details.prompt
new file mode 100644
index 00000000..3efac36d
--- /dev/null
+++ b/core/prompts/partials/project_details.prompt
@@ -0,0 +1,22 @@
+Here is a high level description of "{{ state.branch.project.name }}":
+```
+{{ state.specification.description }}
+```
+
+{% if state.specification.architecture %}
+Here is a short description of the project architecture:
+{{ state.specification.architecture }}
+{% endif %}
+{% if state.specification.system_dependencies %}
+
+Here are the technologies that should be used for this project:
+{% for tech in state.specification.system_dependencies %}
+* {{ tech.name }} - {{ tech.description }}
+{% endfor %}
+{% endif %}
+{% if state.specification.package_dependencies %}
+
+{% for tech in state.specification.package_dependencies %}
+* {{ tech.name }} - {{ tech.description }}
+{% endfor %}
+{% endif %}
diff --git a/core/prompts/partials/project_tasks.prompt b/core/prompts/partials/project_tasks.prompt
new file mode 100644
index 00000000..f4f60a75
--- /dev/null
+++ b/core/prompts/partials/project_tasks.prompt
@@ -0,0 +1,67 @@
+Before we go into the coding part, I want you to split the development process of creating this {{ task_type }} into smaller tasks so that it is easier to develop, debug and make the {{ task_type }} work.
+
+Each task needs to be related only to the development of this {{ task_type }} and nothing else - once the {{ task_type }} is fully working, that is it. There shouldn't be a task for researching, deployment, writing documentation, testing or anything that is not writing the actual code.
+
+**IMPORTANT**
+As an experienced tech lead you always follow rules on how to create tasks. Dividing project into tasks is extremely important job and you have to do it very carefully.
+
+Now, based on the project details provided{% if task_type  == 'feature' %} and new feature description{% endif %}, think task by task and create the entire development plan{% if task_type  == 'feature' %} for new feature{% elif task_type  == 'app' %}. {% if state.files %}Continue from the existing code listed above{% else %}Start from the project setup{% endif %} and specify each task until the moment when the entire app should be fully working{% if state.files %}. You should not reimplement what's already done - just continue from the implementation already there{% endif %}{% endif %} while strictly following these rules:
+
+Rule #1
+There should never be a task that is only testing or ensuring something works, every task must have coding involved. Have this in mind for every task, but it is extremely important for last task of project. Testing if {{ task_type }} works will be done as part of each task.
+
+Rule #2
+This rule applies to the complexity of tasks.
+You have to make sure the project is not split into tasks that are too small or simple for no reason but also not too big or complex so that they are hard to develop, debug and review.
+Have in mind that project already has workspace folder created and only system dependencies installed. You don't have to create tasks for that.
+Here are examples of poorly created tasks:
+
+**too simple tasks**
+- Set up a Node.js project and install all necessary dependencies.
+- Establish a MongoDB database connection using Mongoose with the IP '127.0.0.1'.
+
+**too complex tasks**
+- Set up Node.js project with /home, /profile, /register and /login routes that will have user authentication, connection to MongoDB with user schemas, mailing of new users and frontend with nice design.
+
+You must to avoid creating tasks that are too simple or too complex. You have to aim to create tasks that are medium complexity. Here are examples of tasks that are good:
+
+**good tasks**
+- Set up a Node.js project, install all necessary dependencies and set up an express server with a simple route to `/ping` that returns the status 200.
+- Establish a MongoDB database connection and implement the message schema using Mongoose for persistent storage of messages.
+
+Rule #3
+This rule applies to the number of tasks you will create.
+Every {{ task_type }} should have different number of tasks depending on complexity. Think task by task and create the minimum number of tasks that are relevant for this specific {{ task_type }}.
+{% if task_type  == 'feature' %} If the feature is small, it is ok to have only 1 task.{% endif %}
+Here are some examples of apps with different complexity that can give you guidance on how many tasks you should create:
+
+Example #1:
+app description: "I want to create an app that will just say 'Hello World' when I open it on my localhost:3000."
+number of tasks: 1
+
+Example #2:
+app description: "Create a node.js app that enables users to register and log into the app. On frontend it should have /home (shows user data), /register and /login. It should use sessions to keep user logged in."
+number of tasks: 2-4
+
+Example #3:
+app description: "A cool online shoe store, with a sleek look. In terms of data models, there are shoes, categories and user profiles. For web pages: product listing, details, shopping cart. It must look cool and jazzy."
+number of tasks: 5-15
+
+Rule #4
+This rule applies to writing task 'description'.
+Every task must have a clear and very detailed (must be minimum of 4 sentences but can be more) 'description'. It must be very clear so that even developers who just moved to this project can execute them without additional questions. It is not enough to just write something like "Create a route for /home". You have to describe what needs to be done in that route, what data needs to be returned, what should be the status code, etc. Give as many details as possible and make sure no information is missing that could be needed for this task.
+Here is an example of good and bad task description:
+
+**bad task**
+{
+    "description": "Create a route for /dashboard"
+}
+
+**good task**
+{
+    "description": "In 'route.js' add a route for /dashboard that returns the status 200. Route should be accessible only for logged in users. In 'middlewares.js' there should be a check if user is logged in using session. If user is not logged in, it should redirect to /login. If user is logged in, it should return the user data. User data should be fetched from database in 'users' collection using the user id from session."
+}
+
+Rule #5
+When creating and naming new files, ensure the file naming (camelCase, kebab-case, underscore_case, etc) is consistent with the best practices and coding style of the language.
+Pay attention to file paths: if the command or argument is a file or folder from the project, use paths relative to the project root (for example, use `somedir/somefile` instead of `/somedir/somefile`).
diff --git a/core/prompts/partials/relative_paths.prompt b/core/prompts/partials/relative_paths.prompt
new file mode 100644
index 00000000..3c42412a
--- /dev/null
+++ b/core/prompts/partials/relative_paths.prompt
@@ -0,0 +1 @@
+**IMPORTANT**: Pay attention to file paths: if the command or argument is a file or folder from the project, use paths relative to the project root (for example, use `somedir/somefile` instead of `/path/to/project/somedir/somefile`).
diff --git a/core/prompts/problem-solver/get_alternative_solutions.prompt b/core/prompts/problem-solver/get_alternative_solutions.prompt
new file mode 100644
index 00000000..72cc23ef
--- /dev/null
+++ b/core/prompts/problem-solver/get_alternative_solutions.prompt
@@ -0,0 +1,57 @@
+You are working on an app called "{{ state.branch.project.name }}" and you need to write code for the entire {% if state.epics|length > 1 %}feature{% else %}app{% endif %} based on the tasks that the tech lead gives you. So that you understand better what you're working on, you're given other specs for "{{ state.branch.project.name }}" as well.
+
+{% include "partials/project_details.prompt" %}
+{% include "partials/features_list.prompt" %}
+
+We've broken the development of this {% if state.epics|length > 1 %}feature{% else %}app{% endif %} down to these tasks:
+```
+{% for task in state.tasks %}
+{{ loop.index }}. {{ task.description }}{% if task.get("completed") %} (completed){% endif %}
+{% endfor %}
+```
+
+{% if state.current_task %}
+You are currently working on, and have to focus only on, this task:
+```
+{{ state.current_task.description }}
+```
+
+{% endif %}
+A part of the app is already finished.
+{% include "partials/files_list.prompt" %}
+
+You are trying to solve an issue that your colleague is reporting.
+{% if previous_solutions|length > 0 %}
+You tried {{ previous_solutions|length }} times to solve it but it was unsuccessful. In last few attempts, your colleague gave you this report:
+{% for solution in previous_solutions[-3:] %}
+----------------------------start_of_report_{{ loop.index }}----------------------------
+{{ solution.user_feedback }}
+----------------------------end_of_report_{{ loop.index }}----------------------------
+
+Then, you gave the following proposal (proposal_{{ loop.index }}) of what needs to be done to fix the issue:
+----------------------------start_of_proposal_{{ loop.index }}----------------------------
+{{ solution.description }}
+----------------------------end_of_of_proposal_{{ loop.index }}----------------------------
+
+{% if not loop.last %}
+Then, upon implementing these changes, your colleague came back with the following report:
+{% endif %}
+{% endfor %}
+{% endif %}
+
+{% if user_input != '' %}
+Your colleague who is testing the app "{{ name }}" sent you this report now:
+```
+{{ user_input }}
+```
+
+You tried to solve this problem before but your colleague is telling you that you got into a loop where all your tries end up the same way - with an error.
+{%- endif -%}
+
+It seems that the solutions you're proposing aren't working.
+
+Now, think step by step about 5 alternative solutions to get this code to work that are most probable to solve this issue.
+
+Every proposed solution needs to be concrete and not vague (eg, it cannot be "Review and change apps functionality") and based on the code changes. A solution can be complex if it's related to the same part of the code (eg. "Try changing the input variables X, Y and Z to a method N").
+
+Order them in the order of the biggest probability of fixing the problem. A developer will then go through this list item by item, try to implement it, and check if it solved the issue until the end of the list.
diff --git a/core/prompts/problem-solver/iteration.prompt b/core/prompts/problem-solver/iteration.prompt
new file mode 100644
index 00000000..97d7d10c
--- /dev/null
+++ b/core/prompts/problem-solver/iteration.prompt
@@ -0,0 +1 @@
+{% extends "troubleshooter/iteration.prompt" %}
\ No newline at end of file
diff --git a/core/prompts/problem-solver/system.prompt b/core/prompts/problem-solver/system.prompt
new file mode 100644
index 00000000..e69de29b
diff --git a/core/prompts/spec-writer/ask_questions.prompt b/core/prompts/spec-writer/ask_questions.prompt
new file mode 100644
index 00000000..6703fdca
--- /dev/null
+++ b/core/prompts/spec-writer/ask_questions.prompt
@@ -0,0 +1,76 @@
+Your task is to talk to a new client and develop a detailed specification for a new application the client wants to build. This specification will serve as an input to an AI software developer and thus must be very detailed, contain all the project functionality and precisely define behaviour, 3rd-party integrations (if any), etc.
+
+The AI developer prefers working on web apps using Node/Express/MongoDB/Mongoose/EJS stack, and use vanilla JS with Bootstrap on the frontend, unless the client has different requirements.
+Try to avoid the use of Docker, Kubernetes, microservices and single-page app frameworks like React, Next.js, Angular, Vue or Svelte unless the brief explicitly requires it.
+
+In your work, follow these important rules:
+* In your communication with the client, be straightforward, concise, and focused on the task.
+* Ask questions ONE BY ONE. This is veryy important, as the client is easily confused. If you were to ask multiple questions the user would probably miss some questions, so remember to always ask the questions one by one
+* Ask specific questions, taking into account what you already know about the project. For example, don't ask "what features do you need?" or "describe your idea"; instead ask "what is the most important feature?"
+* Pay special attention to any documentation or information that the project might require (such as accessing a custom API, etc). Be sure to ask the user to provide information and examples that the developers will need to build the proof-of-concept. You will need to output all of this in the final specification.
+* This is a a prototype project, it is important to have small and well-defined scope. If the scope seems to grow too large (beyond a week or two of work for one developer), ask the user if they can simplify the project.
+* Do not address non-functional requirements (performance, deployment, security, budget, timelines, etc...). We are only concerned with functional and technical specification here.
+* Do not address deployment or hosting, including DevOps tasks to set up a CI/CD pipeline
+* Don't address or invision any future development (post proof-of-concept), the scope of your task is to only spec the PoC/prototype.
+* If the user provided specific information on how to access 3rd party API or how exactly to implement something, you MUST include that in the specification. Remember, the AI developer will only have access to the specification you write.
+
+Ensure that you have all the information about:
+* overall description and goals for the app
+* all the features of the application
+* functional specification
+    * how the user will use the app
+    * enumerate all the parts of the application (eg. pages of the application, background processing if any, etc); for each part, explain *in detail* how it should work from the perspective of the user
+    * identify any constraints, business rules, user flows or other important info that affect how the application works or how it is used
+* technical specification
+    * what kind of an application this is and what platform/technologies will be used
+    * the architecture of the application (what happens on backend, frontend, mobile, background tasks, integration with 3rd party services, etc)
+    * detailed description of each component of the application architecture
+* integration specification
+    * any 3rd party apps, services, APIs that will be used (eg. for auth, payments, etc..)
+    * if a custom API is used, precise definitions, with examples, how to use the custom API or do the custom integration
+
+If you identify any missing information or need clarification on any vague or ambiguous parts of the brief, ask the client about it.
+
+Important note: don't ask trivial questions for obvious or unimportant parts of the app, for example:
+* Bad questions example 1:
+  * Client brief: I want to build a hello world web app
+  * Bad questions:
+    * What title do you want for the web page that displays "Hello World"?
+    * What color and font size would you like for the "Hello World" text to be displayed in?
+    * Should the "Hello World" message be static text served directly from the server, or would you like it implemented via JavaScript on the client side?
+  * Explanation: There's no need to micromanage the developer(s) and designer(s), the client would've specified these details if they were important.
+
+If you ask such trivial questions, the client will think you're stupid and will leave. DOn'T DO THAT
+
+Think carefully about what a developer must know to be able to build the app. The specification must address all of this information, otherwise the AI software developer will not be able to build the app.
+
+When you gather all the information from the client, output the complete specification. Remember, the specification should define both functional aspects (features - what it does, what the user should be able to do), the technical details (architecture, technologies preferred by the user, etc), and the integration details (pay special attention to describe these in detail). Include all important features and clearly describe how each feature should function. IMPORTANT: Do not add any preamble (eg. "Here's the specification....") or conclusion/commentary (eg. "Let me know if you have further questions")!
+
+Here's an EXAMPLE initial prompt:
+---start-of-example-output---
+Online forum similar to Hacker News (news.ycombinator.com), with a simple and clean interface, where people can post links or text posts, and other people can upvote, downvote and comment on. Reading is open to anonymous users, but users must register to post, upvote, downvote or comment. Use simple username+password authentication. The forum should be implemented in Node.js with Express framework, using MongoDB and Mongoose ORM.
+
+The UI should use EJS view engine, Bootstrap for styling and plain vanilla JavaScript. Design should be simple and look like Hacker News, with a top bar for navigation, using a blue color scheme instead of the orange color in HN. The footer in each page should just be "Built using GPT Pilot".
+
+Each story has a title (one-line text), a link (optional, URL to an external article being shared on AI News), and text (text to show in the post). Link and text are mutually exclusive - if the submitter tries to use both, show them an error.
+
+Use the following algorithm to rank top stories, and comments within a story: "score = upvotes - downvotes + comments - sqrt(age)" , where "upvotes" and "downvotes" are the number of upvotes and downvotes the story or comment has, "comments" is the number of comments for a story (total), or the number of sub-comments (for a comment), and "age" is how old is the story, in minutes, and "sqrt" is the square root function.
+
+Implement the following pages:
+
+* / - shows the top 20 posted stories, ranked using the scoring algorithm, with a "More" link that shows the next 20 (pagination using "p" query parameter), and so on
+* /newest - shows the latest 20 posted stories, ranked chronologically (newest first), with a "More" link that shows the next 20 (pagination using "p" query parameter), and so on
+* /submit - shows a form to submit a new story, upon submitting the user should get redirected to /newest
+* /login - shows a login form (username, password, "login" button, and a link to register page for new users)
+* /register - shows a register form (username, password, "register" button, and a link to login page for existing users)
+* /item - shows the story (use "id" query parameter to pass the story ID to this route)
+* /comment - shows the form to send a comment  (just a textarea and "submit" button) - upon commenting, the person should get redirected to the story they commented on
+
+The / and /newest pages should show the story title (link to the external article if "link" is set, otherwise link to the story item /item page), number of points (points = upvotes - downvotes), poster username (no link), how old is the story ("x minutes ago", "y hours ago" or "z days ago"), and "xyz comments" (link to /item page of the story). This is basically the same how HN shows it.
+
+The /item page should also follow the layout for HN in how it shows the story, and the comments tree. Instead of the embedded "reply" form, the story should just have a "comment" button that goes to the /comment page, similar to the "reply" link underneath each comment. Both should link to the /comment page.
+---end-of-example-output---
+
+Remember, this is important: the AI developer will not have access to client's initial description and transcript of your conversation. The developer will only see the specification you output on the end. It is very important that the spec captures *all* the details of the project in as much detail and precision as possible.
+
+Note: after the client reads the specification you create, the client might have additional comments or suggestions. In this case, continue the discussion with the user until you get all the new information and output the newly updated spec again.
diff --git a/core/prompts/spec-writer/prompt_complexity.prompt b/core/prompts/spec-writer/prompt_complexity.prompt
new file mode 100644
index 00000000..53331436
--- /dev/null
+++ b/core/prompts/spec-writer/prompt_complexity.prompt
@@ -0,0 +1,8 @@
+```
+{{ prompt }}
+```
+
+The above is a user prompt for application/software tool they are trying to develop. Determine the complexity of the user's request. Do NOT respond with thoughts, reasoning, explanations or anything similar, return ONLY a string representation of the complexity level. Use the following scale:
+"hard" for high complexity
+"moderate" for moderate complexity
+"simple" for low complexity
diff --git a/core/prompts/spec-writer/review_spec.prompt b/core/prompts/spec-writer/review_spec.prompt
new file mode 100644
index 00000000..8f8e5f5e
--- /dev/null
+++ b/core/prompts/spec-writer/review_spec.prompt
@@ -0,0 +1,22 @@
+Your team has taken the client brief and turned it into a project specification.
+
+Your job is to check the specification and identify all the information that is contained in the client brief, but missing from the specification.
+
+This might include:
+* details on how the app should work
+* information which 3rd party packages or APIs to use or avoid
+* concrete examples of API requests/responses, library usage, or other external documentation
+
+Here is the client brief:
+---CLIENT-BRIEF-START---
+{{ state.specification.description }}
+---CLIENT-BRIEF-END---
+
+Here is the specification your team came up with:
+---SPEC-START---
+{{ spec }}
+---SPEC-END---
+
+In your response, output all the information that is present in the client brief but missing from the spec, so it can be appended.
+
+Note: don't output suggestion to your team to take back to the drawing board. Instead, just output the missing information and the team will append it to the generated spec. If there is no missing information, just output an empty response ('').
diff --git a/core/prompts/spec-writer/system.prompt b/core/prompts/spec-writer/system.prompt
new file mode 100644
index 00000000..675c7cb6
--- /dev/null
+++ b/core/prompts/spec-writer/system.prompt
@@ -0,0 +1 @@
+You are a product owner working in a software development agency.
diff --git a/core/prompts/task-reviewer/review_task.prompt b/core/prompts/task-reviewer/review_task.prompt
new file mode 100644
index 00000000..1cdb2344
--- /dev/null
+++ b/core/prompts/task-reviewer/review_task.prompt
@@ -0,0 +1,58 @@
+You are working on a App called "{{ state.branch.project.name }}" and your job is to review changes made.
+
+{% include "partials/project_details.prompt" %}
+{% include "partials/features_list.prompt" %}
+
+Development process of this app was split into smaller tasks. Here is the list of all tasks:
+```
+{% for task in state.tasks %}
+{{ loop.index }}. {{ task.description }}
+{% endfor %}
+```
+
+You are currently working on task "{{ current_task.description }}" and you have to focus only on that task.
+
+A part of the app is already finished.
+{% include "partials/files_list.prompt" %}
+
+{% if all_feedbacks -%}
+While working on this task, your colleague who is testing the app "{{ state.branch.project.name }}" sent you some additional information on what doesn't work as intended or what should be added. Here are all the inputs he sent you:
+```
+{% for feedback in all_feedbacks %}
+{{ loop.index }}. {{ feedback }}
+{% endfor %}
+```
+
+After you got each of these additional inputs, you tried to fix it as part of this task. {% endif %}Files that were modified during implementation of the task are:
+{% for path, content in files_after_modification %}
+* `{{ path }}`
+{% endfor %}
+
+Now I will show you how those files looked before this task implementation started. If a file is listed as the file that changed but is not in this list that means it was created during this task. Here are files before implementation of this task:
+
+---start_of_files_at_start_of_task---
+{% for path, content in files_before_modification.items() %}{% if content %}
+* File `{{ path }}`:
+```
+{{ content }}```
+
+{% endif %}{% endfor %}
+---end_of_files_at_start_of_task---
+
+
+**IMPORTANT**
+You have to review this task implementation. You are known to be very strict with your reviews and very good at noticing bugs but you don't mind minor changes like refactoring, adding or removing logs and so on. You think twice through all information given before giving any conclusions.
+
+Each task goes through multiple reviews and you have to focus only on your part of review.
+In this review, your goal is to check:
+1. If there are some functionalities that were removed but are still needed.
+2. If new files or functions are created but never called or used.
+3. If there is some "dead code" that should be removed.
+4. If there is some duplicate code resulting from refactoring or moving code into separate classes or files.
+
+If everything is ok respond only with "DONE" and nothing else. Do NOT respond with thoughts, reasoning, explanations or anything similar if everything is ok, respond just with "DONE".
+
+If you find any of these 4 mistakes, describe in detail what has to be changed.
+
+{% include "partials/relative_paths.prompt" %}
+{% include "partials/execution_order.prompt" %}
diff --git a/core/prompts/task-reviewer/system.prompt b/core/prompts/task-reviewer/system.prompt
new file mode 100644
index 00000000..ab215173
--- /dev/null
+++ b/core/prompts/task-reviewer/system.prompt
@@ -0,0 +1,7 @@
+You are a world class full stack software developer working in a team.
+
+You write modular, well-organized code split across files that are not too big, so that the codebase is maintainable. You include proper error handling and logging for your clean, readable, production-level quality code.
+
+When reviewing other people's code, you are strict with your reviews and very good at noticing bugs but you don't mind minor changes like refactoring, adding or removing logs and so on. You think twice through all information given before giving any conclusions.
+
+Your job is to review tasks implemented by your team, following the task implementation instructions.
diff --git a/core/prompts/tech-lead/plan.prompt b/core/prompts/tech-lead/plan.prompt
new file mode 100644
index 00000000..84a5cade
--- /dev/null
+++ b/core/prompts/tech-lead/plan.prompt
@@ -0,0 +1,25 @@
+You are working in a software development agency and a project manager and software architect approach you telling you that you're assigned to {% if task_type  == 'feature' %}add new feature to an existing project{% else %}work on a new project{% endif %}.
+You are working on an app called "{{ state.branch.project.name }}" and you need to create a detailed development plan so that developers can start developing the app.
+
+{% include "partials/project_details.prompt" %}
+{% include "partials/features_list.prompt" %}
+{% if existing_summary %}
+
+The developers have already used a project scaffolding tool that creates the initial boilerplate for the project:
+{{ existing_summary }}
+{% endif %}
+
+{% include "partials/files_list.prompt" %}
+
+{% if task_type  == 'feature' %}
+Finally, here is the description of new feature that needs to be added to the app "{{ state.branch.project.name }}":
+```
+{{ epic.description }}
+```
+{% endif %}
+
+{% if epic.complexity and epic.complexity == 'simple' %}
+This is very low complexity {{ task_type }} and because of that, you have to create ONLY one task that is sufficient to fully implement it.
+{% else %}
+{% include "partials/project_tasks.prompt" %}
+{% endif %}
diff --git a/core/prompts/tech-lead/system.prompt b/core/prompts/tech-lead/system.prompt
new file mode 100644
index 00000000..8a43e031
--- /dev/null
+++ b/core/prompts/tech-lead/system.prompt
@@ -0,0 +1,4 @@
+You are an experienced tech lead in a software development agency.
+Your main task is to break down the project into smaller tasks that developers will do.
+You must specify each task as clear as possible.
+Each task must have a description of what needs to be implemented.
diff --git a/core/prompts/tech-lead/update_plan.prompt b/core/prompts/tech-lead/update_plan.prompt
new file mode 100644
index 00000000..ca8ff1fa
--- /dev/null
+++ b/core/prompts/tech-lead/update_plan.prompt
@@ -0,0 +1,59 @@
+You are working on an app called "{{ state.branch.project.name }}".
+
+{% include "partials/project_details.prompt" %}
+
+Development plan for that {{ task_type }} was created and the {{ task_type }} was then broken down to smaller tasks so that it's easier for development.
+
+Here are tasks that are finished so far:
+```
+{% for task in finished_tasks %}
+- Task #{{ loop.index }}
+Description: {{ task }}
+
+{% endfor %}
+```
+
+Here are tasks that still have to be implemented:
+```
+{% for task in state.unfinished_tasks %}
+- Task #{{ finished_tasks|length + loop.index }}
+Description: {{ task }}
+
+{% endfor %}
+```
+
+{% if finished_tasks %}
+This is the last task you were working on:
+```
+{{ finished_tasks[-1].description }}
+```
+{% endif %}
+
+While working on that last task, you were iterating based on user feedbacks for this {{ task_type }}. Here is list of all iterations:
+```
+{% for iteration in state.iterations %}
+- Iteration #{{ loop.index }}:
+
+User feedback: {{ iteration.user_feedback }}
+Developer solution: {{ iteration.description }}
+{% endfor %}
+```
+
+{% if modified_files|length > 0 %}
+Here are files that were modified during this task implementation:
+---start_of_current_files---
+{% for file in modified_files %}
+**{{ file.path }}** ({{ file.content.content.splitlines()|length }} lines of code):
+```
+{{ file.content }}
+```
+{% endfor %}
+---end_of_current_files---
+{% endif %}
+
+
+You need to think step by step what was done in last task and update development plan if needed. All iterations that were mentioned were executed and finished successfully and that needs to be reflected in updated development plan.
+As output you have to give 2 things:
+1. Reword/update current task, "updated_current_task", if needed, based on what is implemented so far. Consider current task description, all iterations that were implemented during this task and all changes that were made to the code.
+
+2. Give me updated list of tasks that still have to be implemented. Take into consideration all tasks in current development plan, previous tasks that were finished and everything that was implemented in this task. There should be minimum possible number of tasks that still have to be executed to finish the app. You must list only tasks that need implementation and were not done in scope of previous tasks or during iterations on current task. Do not create new tasks, only remove tasks from list of tasks that still have to be implemented in case they were implemented during current task.
diff --git a/core/prompts/tech-writer/create_readme.prompt b/core/prompts/tech-writer/create_readme.prompt
new file mode 100644
index 00000000..4d9e8d95
--- /dev/null
+++ b/core/prompts/tech-writer/create_readme.prompt
@@ -0,0 +1,37 @@
+You are working on a project called "{{ state.branch.project.name }}" and you need to create a detailed documentation for current state of project. Your first task is to create README.md file.
+
+{% include "partials/project_details.prompt" %}
+{% include "partials/features_list.prompt" %}
+{% include "partials/files_list.prompt" %}
+
+DO NOT specify commands to create any folders or files, they will be created automatically - just specify the relative path to file that needs to be written.
+
+{% include "partials/relative_paths.prompt" %}
+
+Now, based on the project details provided, think step by step and create README.md file for this project. The file should have the following format:
+
+# Project name
+
+Short description (a few sentences) of the project based on the project details.
+
+## Overview
+
+Description of the architecture and technologies used in the project, and the project structure.
+
+## Features
+
+Description of what the app can do and how it can be used.
+
+## Getting started
+
+### Requirements
+
+Required technologies/setup needed on the computer to run the project.
+
+### Quickstart
+
+How to set up the project and run it
+
+### License
+
+The project is proprietary (not open source), just output the standard Copyright (c) 2024. template here.
diff --git a/core/prompts/tech-writer/system.prompt b/core/prompts/tech-writer/system.prompt
new file mode 100644
index 00000000..3bf0e485
--- /dev/null
+++ b/core/prompts/tech-writer/system.prompt
@@ -0,0 +1 @@
+You are technical writer and as such, you excel in clear, concise communication, skillfully breaking down complex technical concepts for a variety of audiences. Your proficiency in research and attention to detail ensures accuracy and consistency in your work. You adeptly organize complex information in a user-friendly manner, understanding and anticipating the needs of your audience. Your collaborative skills enhance your ability to work effectively with diverse teams. In your role, you not only create documentation but also efficiently manage documentation projects, always prioritizing clarity and usefulness for the end-user.
diff --git a/core/prompts/troubleshooter/breakdown.prompt b/core/prompts/troubleshooter/breakdown.prompt
new file mode 100644
index 00000000..6ca24015
--- /dev/null
+++ b/core/prompts/troubleshooter/breakdown.prompt
@@ -0,0 +1,2 @@
+{# This is the same template as for Developer's breakdown because Troubleshooter is reusing it in a conversation #}
+{% extends "developer/breakdown.prompt" %}
diff --git a/core/prompts/troubleshooter/bug_report.prompt b/core/prompts/troubleshooter/bug_report.prompt
new file mode 100644
index 00000000..ac273572
--- /dev/null
+++ b/core/prompts/troubleshooter/bug_report.prompt
@@ -0,0 +1,43 @@
+You're working on an new app and the user has just been testing it.
+
+{% include "partials/project_details.prompt" %}
+{% include "partials/files_list.prompt" %}
+
+{% if user_instructions %}
+The user was given instructions on how to test if the app is working correctly. Here are the instructions:
+```
+{{ user_instructions }}
+```
+{% endif %}
+
+The user then wrote this feedback:
+```
+{{ user_feedback }}
+```
+{% if additional_qa|length > 0 %}
+Here are questions and answers that you already asked the user:
+```
+{% for row in additional_qa %}
+Q: {{ row.question }}
+A: {{ row.answer }}
+{% endfor %}
+```
+{% endif %}
+
+Your job is to identify if feedback is good enough for you to solve the problem. If not, what information you need to solve the problem. Ask for any information that you need to solve the problem.
+If you have enough information don't ask any questions.
+
+When thinking of questions, consider the following:
+- After getting answers to your questions, you must be able to solve the problem.
+- Ask only crucial questions. Do not ask for information that you do not need to solve the problem.
+- Ask least amount of questions to get the most information and to solve the problem.
+- Ask only questions from the list provided bellow.
+- Ask questions in same order as they are in the list.
+- Never repeat same question.
+
+Here is the list of questions you can ask:
+"Can you please provide more information on what exactly you mean?"
+"Can you please provide logs from the frontend?"
+"Can you please provide logs from the backend?"
+"What is the expected behavior and what is current behaviour?"
+"On what page does the issue happen?"
\ No newline at end of file
diff --git a/core/prompts/troubleshooter/define_user_review_goal.prompt b/core/prompts/troubleshooter/define_user_review_goal.prompt
new file mode 100644
index 00000000..78deb637
--- /dev/null
+++ b/core/prompts/troubleshooter/define_user_review_goal.prompt
@@ -0,0 +1,34 @@
+How can a human user test if this task was completed successfully?
+
+Please list actions, step by step, in order, that the user should take to verify the task. After each action, describe what the expected response is.
+
+**IMPORTANT**
+
+Follow these important rules when compiling a list of actions the user will take:
+
+1. Actions must be as specific as possible. You don't want the user to have to think anything through but rather that they just follow your instructions.
+2. In case this task can be tested by making an API request, you should always prefer to test functionality in the browser. In case you can't do that, do not suggest how can a request be made with Postman but rather write a full cURL command that the user can just run.
+3. Do not require the user to write any code or edit files to test this task.
+4. If the user must run a command, assume the user already has a terminal opened in the project root directory (no need to instruct the user "open the terminal" or "make sure you're in the project directory")
+5. The user is using {{ os }}, so the commands must run on that operating system
+6. Assume system services, such as the database, are already set up and running. Don't ask user to install or run any software other than the app they're testing.
+7. Don't ask the user to test things which aren't implemented yet (eg. opening a theoretical web page that doesn't exist yet, or clicking on a button that isn't implemented yet)
+
+Remember, these rules are very important and you must follow them!
+
+Here is an example output with a few user steps:
+---example---
+### Step 1
+Action: Start the server using `npm start`
+Expected result: You should see the message "Connected to database" or similar
+
+### Step 2
+Action: Open your web browser and visit http://localhost:3000/
+Expected result: Web page opens and you see a "Hello World" message with a contact form
+
+### Step 3
+Action: Click on the "Submit" button in the web form
+Expected result: Form is submitted, page is reloaded and "Thank you" message is shown
+---end_of_example---
+
+If nothing needs to be tested for this task, instead of outputting the steps, just output a single word: DONE
diff --git a/core/prompts/troubleshooter/get_run_command.prompt b/core/prompts/troubleshooter/get_run_command.prompt
new file mode 100644
index 00000000..522f6075
--- /dev/null
+++ b/core/prompts/troubleshooter/get_run_command.prompt
@@ -0,0 +1,9 @@
+How can I run this app?
+**IMPORTANT**
+Do not reply with anything else but the command with which I can run this app with.
+For example, if the command is "python app.py", then your response needs to be only `python app.py` without the `
+
+{% include "partials/relative_paths.prompt" %}
+
+If there is no command to run reply with empty response.
+For example, if we only setup package.json and no other files are coded there is no command to run so respond with `` without the `
diff --git a/core/prompts/troubleshooter/iteration.prompt b/core/prompts/troubleshooter/iteration.prompt
new file mode 100644
index 00000000..32509fe4
--- /dev/null
+++ b/core/prompts/troubleshooter/iteration.prompt
@@ -0,0 +1,53 @@
+You are working on an app called "{{ state.branch.project.name }}" and you need to write code for the entire application.
+
+{% include "partials/project_details.prompt" %}
+
+{% if state.tasks and state.current_task %}
+Development process of this app was split into smaller tasks. Here is the list of all tasks:
+```{% for task in state.tasks %}
+{{ loop.index }}. {{ task.description }}
+{% endfor %}
+```
+You are currently working on task "{{ state.current_task.description }}" and you have to focus only on that task.
+
+{% endif %}
+A part of the app is already finished.
+{% include "partials/files_list.prompt" %}
+
+{% if user_feedback %}
+User who was using the app "{{ state.branch.project.name }}" sent you this feedback:
+```
+{{ user_feedback }}
+```
+{% endif %}
+{% if user_feedback_qa %}
+Feedback was not clear enough so you asked user for additional information and got this response:
+```
+{% for row in user_feedback_qa %}
+Q: {{ row.question }}
+A: {{ row.answer }}
+{% endfor %}
+```
+{% endif %}
+{% if next_solution_to_try is not none %}
+Focus on solving this issue in the following way:
+```
+{{ next_solution_to_try }}
+```
+{% endif %}
+Now, you have to debug this issue and comply with the additional user feedback.
+
+**IMPORTANT**
+Think about all information provided. Your job is to look at big picture by analysing all files to find where the issue is.
+Don't reply with any code, your thoughts or breakdown of the issue. Respond only with description of solution, explaining what should be steps in solving the issue.
+Create as little steps as possible to fix the issue. Each step should describe, using sentences and not code, what changes are needed in specific file or describe command that needs to be executed to continue working on the issue.
+When there are multiple things that have to be done in one file write everything as one step and don't split it in multiple steps.
+You can count that the environment is set up previously and packages listed in files are installed so tell me only commands needed for installation of new dependencies, if there are any.
+
+**IMPORTANT**
+If report mentions *multiple* issues, treat it as if you got several separate reports: think through each, and provide solutions for each.
+
+{% include "partials/execution_order.prompt" %}
+{% include "partials/file_size_limit.prompt" %}
+{% include "partials/file_naming.prompt" %}
+{% include "partials/relative_paths.prompt" %}
diff --git a/core/prompts/troubleshooter/system.prompt b/core/prompts/troubleshooter/system.prompt
new file mode 100644
index 00000000..e69de29b
diff --git a/core/state/__init__.py b/core/state/__init__.py
new file mode 100644
index 00000000..e69de29b
diff --git a/core/state/state_manager.py b/core/state/state_manager.py
new file mode 100644
index 00000000..b87eb64c
--- /dev/null
+++ b/core/state/state_manager.py
@@ -0,0 +1,504 @@
+import os.path
+from typing import TYPE_CHECKING, Optional
+from uuid import UUID, uuid4
+
+from core.config import FileSystemType, get_config
+from core.db.models import Branch, ExecLog, File, FileContent, LLMRequest, Project, ProjectState, UserInput
+from core.db.session import SessionManager
+from core.disk.ignore import IgnoreMatcher
+from core.disk.vfs import LocalDiskVFS, MemoryVFS, VirtualFileSystem
+from core.llm.request_log import LLMRequestLog, LLMRequestStatus
+from core.log import get_logger
+from core.proc.exec_log import ExecLog as ExecLogData
+from core.telemetry import telemetry
+from core.ui.base import UIBase
+from core.ui.base import UserInput as UserInputData
+
+if TYPE_CHECKING:
+    from core.agents.base import BaseAgent
+
+log = get_logger(__name__)
+
+
+class StateManager:
+    """
+    Manages loading, updating and saving project states.
+
+    All project state references reading the current state
+    should use `StateManager.current` attribute. All changes
+    to the state should be done through the `StateManager.next`
+    attribute.
+    """
+
+    current_state: Optional[ProjectState]
+    next_state: Optional[ProjectState]
+
+    def __init__(self, session_manager: SessionManager, ui: Optional[UIBase] = None):
+        self.session_manager = session_manager
+        self.ui = ui
+        self.file_system = None
+        self.project = None
+        self.branch = None
+        self.current_state = None
+        self.next_state = None
+        self.current_session = None
+
+    async def list_projects(self) -> list[Project]:
+        """
+        List projects with branches
+
+        :return: List of projects with all their branches.
+        """
+        async with self.session_manager as session:
+            return await Project.get_all_projects(session)
+
+    async def create_project(self, name: str, folder_name: Optional[str] = None) -> Project:
+        """
+        Create a new project and set it as the current one.
+
+        :param name: Project name.
+        :return: The Project object.
+        """
+        session = await self.session_manager.start()
+        project = Project(name=name, folder_name=folder_name)
+        branch = Branch(project=project)
+        state = ProjectState.create_initial_state(branch)
+        session.add(project)
+        await session.commit()
+
+        log.info(
+            f'Created new project "{name}" (id={project.id}) '
+            f'with default branch "{branch.name}" (id={branch.id}) '
+            f"and initial state id={state.id} (step_index={state.step_index})"
+        )
+
+        self.current_session = session
+        self.current_state = state
+        self.next_state = state
+        self.project = project
+        self.branch = branch
+        self.file_system = await self.init_file_system(load_existing=False)
+        return project
+
+    async def delete_project(self, project_id: UUID) -> bool:
+        session = await self.session_manager.start()
+        rows = await Project.delete_by_id(session, project_id)
+        await session.commit()
+        if rows > 0:
+            log.info(f"Deleted project {project_id}.")
+        return bool(rows)
+
+    async def load_project(
+        self,
+        *,
+        project_id: Optional[UUID] = None,
+        branch_id: Optional[UUID] = None,
+        step_index: Optional[int] = None,
+    ) -> Optional[ProjectState]:
+        """
+        Load project state from the database.
+
+        If `branch_id` is provided, load the latest state of the branch.
+        Otherwise, if `project_id` is provided, load the latest state of
+        the `main` branch in the project.
+
+        If `step_index' is provided, load the state at the given step
+        of the branch instead of the last one.
+
+        The returned ProjectState will have branch and branch.project
+        relationships preloaded. All other relationships must be
+        excplicitly loaded using ProjectState.awaitable_attrs or
+        AsyncSession.refresh.
+
+        :param project_id: Project ID (keyword-only, optional).
+        :param branch_id: Branch ID (keyword-only, optional).
+        :param step_index: Step index within the branch (keyword-only, optional).
+        :return: The ProjectState object if found, None otherwise.
+        """
+
+        if self.current_session:
+            log.info("Current session exists, rolling back changes.")
+            await self.rollback()
+
+        state = None
+        session = await self.session_manager.start()
+
+        if branch_id is not None:
+            branch = await Branch.get_by_id(session, branch_id)
+            if branch is not None:
+                if step_index:
+                    state = await branch.get_state_at_step(step_index)
+                else:
+                    state = await branch.get_last_state()
+
+        elif project_id is not None:
+            project = await Project.get_by_id(session, project_id)
+            if project is not None:
+                branch = await project.get_branch()
+                if branch is not None:
+                    if step_index:
+                        state = await branch.get_state_at_step(step_index)
+                    else:
+                        state = await branch.get_last_state()
+        else:
+            raise ValueError("Project or branch ID must be provided.")
+
+        if state is None:
+            await self.session_manager.close()
+            log.debug(
+                f"Unable to load project state (project_id={project_id}, branch_id={branch_id}, step_index={step_index})"
+            )
+            return None
+
+        # TODO: in the future, we might want to create a new branch here?
+        await state.delete_after()
+        await session.commit()
+
+        self.current_session = session
+        self.current_state = state
+        self.branch = state.branch
+        self.project = state.branch.project
+        self.next_state = await state.create_next_state()
+        # TODO: overwrite files?
+        self.file_system = await self.init_file_system(load_existing=True)
+        log.debug(
+            f"Loaded project {self.project} ({self.project.id}) "
+            f"branch {self.branch} ({self.branch.id}"
+            f"step {state.step_index} (state id={state.id})"
+        )
+
+        if self.current_state.current_epic:
+            await self.ui.send_task_progress(
+                self.current_state.tasks.index(self.current_state.current_task) + 1,
+                len(self.current_state.tasks),
+                self.current_state.current_task["description"],
+                self.current_state.current_epic.get("source", "app"),
+                "in-progress",
+            )
+
+        return self.current_state
+
+    async def commit(self) -> ProjectState:
+        """
+        Commit the new project state to the database.
+
+        This commits `next_state` to the database, making the changes
+        permanent, then creates a new state for further changes.
+
+        :return: The committed state.
+        """
+        if self.next_state is None:
+            raise ValueError("No state to commit.")
+        if self.current_session is None:
+            raise ValueError("No database session open.")
+
+        await self.current_session.commit()
+
+        # Having a shorter-lived sessions is considered a good practice in SQLAlchemy,
+        # so we close and recreate the session for each state. This uses db
+        # connection from a connection pool, so it is fast. Note that SQLite uses
+        # no connection pool by default because it's all in-process so it's fast anyway.
+        self.current_session.expunge_all()
+        await self.session_manager.close()
+        self.current_session = await self.session_manager.start()
+
+        self.current_state = self.next_state
+        self.current_session.add(self.next_state)
+        self.next_state = await self.current_state.create_next_state()
+
+        telemetry.inc("num_steps")
+
+        # FIXME: write a test to verify files (and file content) are preloaded
+        return self.current_state
+
+    async def rollback(self):
+        """
+        Abandon (rollback) the next state changes.
+        """
+        if not self.current_session:
+            return
+        await self.current_session.rollback()
+        await self.session_manager.close()
+        self.current_session = None
+        return
+
+    async def log_llm_request(self, request_log: LLMRequestLog, agent: Optional["BaseAgent"] = None):
+        """
+        Log the request to the next state.
+
+        Note: contrary to most other methods, this stores the information
+        to the CURRENT state, not the next one. As the requests/responses
+        depend on the current state, it makes it easier to analyze the
+        database by just looking at a single project state later.
+
+        :param request_log: The request log to log.
+        """
+        telemetry.record_llm_request(
+            request_log.prompt_tokens + request_log.completion_tokens,
+            request_log.duration,
+            request_log.status != LLMRequestStatus.SUCCESS,
+        )
+        LLMRequest.from_request_log(self.current_state, agent, request_log)
+
+    async def log_user_input(self, question: str, response: UserInputData):
+        """
+        Log the user input to the current state.
+
+        Note: contrary to most other methods, this stores the information
+        to the CURRENT state, not the next one. As the user interactions
+        depend on the current state, it makes it easier to analyze the
+        database by just looking at a single project state later.
+
+        :param question: The question asked.
+        :param response: The user response.
+        """
+        telemetry.inc("num_inputs")
+        UserInput.from_user_input(self.current_state, question, response)
+
+    async def log_command_run(self, exec_log: ExecLogData):
+        """
+        Log the command run to the current state.
+
+        Note: contrary to most other methods, this stores the information
+        to the CURRENT state, not the next one. As the command execution
+        depend on the current state, it makes it easier to analyze the
+        database by just looking at a single project state later.
+
+        :param exec_log: The command execution log.
+        """
+        telemetry.inc("num_commands")
+        ExecLog.from_exec_log(self.current_state, exec_log)
+
+    async def log_event(self, type: str, **kwargs):
+        """
+        Log an event like:
+
+
+        * start of epic
+        * start of task
+        * start of iteration
+        * end of task
+        * end of epic
+        * loop detected
+        """
+        # TODO: implement this
+        # Consider seting this/orchestrator so that the side effect is to send
+        # the update to the UI (vscode extension)
+
+    async def log_task_completed(self):
+        telemetry.inc("num_tasks")
+        if not self.next_state.unfinished_tasks:
+            if len(self.current_state.epics) == 1:
+                telemetry.set("end_result", "success:initial-project")
+            else:
+                telemetry.set("end_result", "success:feature")
+            await telemetry.send()
+
+    async def get_file_by_path(self, path: str) -> Optional[File]:
+        """
+        Get a file from the current project state, by the file path.
+
+        :param path: The file path.
+        :return: The file object, or None if not found.
+        """
+        # FIXME - is this needed? should all be preloaded
+        file = self.current_state.get_file_by_path(path)
+        if file is None:
+            return None
+        # Make sure content is loaded
+        await file.awaitable_attrs.content
+        return file
+
+    async def save_file(
+        self,
+        path: str,
+        content: str,
+        metadata: Optional[dict] = None,
+        from_template: bool = False,
+    ):
+        """
+        Save a file to the project.
+
+        Note that the file is saved to the file system immediately, but in
+        database it may be rolled back if `next_state` is never committed.
+
+        :param path: The file path.
+        :param content: The file content.
+        :param metadata: Optional metadata (eg. description) to save with the file.
+        :param from_template: Whether the file is part of a template.
+        """
+        try:
+            original_content = self.file_system.read(path)
+        except ValueError:
+            original_content = ""
+
+        # FIXME: VFS methods should probably be async
+        self.file_system.save(path, content)
+
+        hash = self.file_system.hash_string(content)
+        file_content = await FileContent.store(self.current_session, hash, content)
+
+        file = self.next_state.save_file(path, file_content)
+        if self.ui and not from_template:
+            await self.ui.open_editor(self.file_system.get_full_path(path))
+        if metadata:
+            file.meta = metadata
+
+        if not from_template:
+            delta_lines = len(content.splitlines()) - len(original_content.splitlines())
+            telemetry.inc("created_lines", delta_lines)
+
+    async def init_file_system(self, load_existing: bool) -> VirtualFileSystem:
+        """
+        Initialize file system interface for the new or loaded project.
+
+        When creating a new project, `load_existing` should be False to ensure a
+        new unique project folder is created. When loading an existing project,
+        `load_existing` should be True to allow using already-existing folder
+        with the project files. If the folder doesn't exist, it will be created.
+
+        This also initializes the ignore mechanism, so that files are correctly
+        ignored as configured.
+
+        :param load_existing: Whether to load existing files from the file system.
+        :return: The file system interface.
+        """
+        config = get_config()
+
+        if config.fs.type == FileSystemType.MEMORY:
+            return MemoryVFS()
+
+        if config.fs.type != FileSystemType.LOCAL:
+            raise ValueError(f"Unsupported file system type: {config.fs.type}")
+
+        while True:
+            root = self.get_full_project_root()
+            ignore_matcher = IgnoreMatcher(
+                root,
+                config.fs.ignore_paths,
+                ignore_size_threshold=config.fs.ignore_size_threshold,
+            )
+
+            try:
+                return LocalDiskVFS(root, allow_existing=load_existing, ignore_matcher=ignore_matcher)
+            except FileExistsError:
+                log.warning(f"Directory {root} already exists, changing project folder to {self.project.folder_name}")
+                self.project.folder_name = self.project.folder_name + "-" + uuid4().hex[:7]
+                await self.current_session.commit()
+
+    def get_full_project_root(self) -> str:
+        """
+        Get the full path to the project root folder.
+
+        :return: The full path to the project root folder.
+        """
+        config = get_config()
+
+        if self.project is None:
+            raise ValueError("No project loaded")
+        return os.path.join(config.fs.workspace_root, self.project.folder_name)
+
+    async def import_files(self) -> list[File]:
+        """
+        Scan the file system, import new/modified files, delete removed files.
+
+        The files are saved to / removed from `next_state`, but not committed
+        to database until the new state is committed.
+
+        :return: List of imported files.
+        """
+        known_files = {file.path: file for file in self.current_state.files}
+        files_in_workspace = set()
+        imported_files = []
+
+        for path in self.file_system.list():
+            files_in_workspace.add(path)
+            content = self.file_system.read(path)
+            saved_file = known_files.get(path)
+            if saved_file and saved_file.content.content == content:
+                continue
+
+            # TODO: unify this with self.save_file() / refactor that whole bit
+            hash = self.file_system.hash_string(content)
+            log.debug(f"Importing file {path} (hash={hash}, size={len(content)} bytes)")
+            file_content = await FileContent.store(self.current_session, hash, content)
+            file = self.next_state.save_file(path, file_content, external=True)
+            imported_files.append(file)
+
+        for path, file in known_files.items():
+            if path not in files_in_workspace:
+                log.debug(f"File {path} was removed from workspace, deleting from project")
+                next_state_file = self.next_state.get_file_by_path(path)
+                self.next_state.files.remove(next_state_file)
+
+        return imported_files
+
+    async def restore_files(self) -> list[File]:
+        """
+        Restore files from the database to VFS.
+
+        Warning: this could overwrite user's files on disk!
+
+        :return: List of restored files.
+        """
+        known_files = {file.path: file for file in self.current_state.files}
+        files_in_workspace = self.file_system.list()
+
+        for disk_f in files_in_workspace:
+            if disk_f not in known_files:
+                self.file_system.remove(disk_f)
+
+        restored_files = []
+        for path, file in known_files.items():
+            restored_files.append(file)
+            self.file_system.save(path, file.content.content)
+
+        return restored_files
+
+    async def get_modified_files(self) -> list[str]:
+        """
+        Return a list of new or modified files from the file system.
+
+        :return: List of paths for new or modified files.
+        """
+
+        modified_files = []
+        files_in_workspace = self.file_system.list()
+        for path in files_in_workspace:
+            content = self.file_system.read(path)
+            saved_file = self.current_state.get_file_by_path(path)
+            if saved_file and saved_file.content.content == content:
+                continue
+            modified_files.append(path)
+
+        # Handle files removed from disk
+        await self.current_state.awaitable_attrs.files
+        for db_file in self.current_state.files:
+            if db_file.path not in files_in_workspace:
+                modified_files.append(db_file.path)
+
+        return modified_files
+
+    def workspace_is_empty(self) -> bool:
+        """
+        Returns whether the workspace has any files in them or is empty.
+        """
+        return not bool(self.file_system.list())
+
+    @staticmethod
+    def get_input_required(content: str) -> list[int]:
+        """
+        Get the list of lines containing INPUT_REQUIRED keyword.
+
+        :param content: The file content to search.
+        :return: Indices of lines with INPUT_REQUIRED keyword, starting from 1.
+        """
+        lines = []
+        for i, line in enumerate(content.splitlines(), start=1):
+            if "INPUT_REQUIRED" in line:
+                lines.append(i)
+
+        return lines
+
+
+__all__ = ["StateManager"]
diff --git a/core/telemetry/__init__.py b/core/telemetry/__init__.py
new file mode 100644
index 00000000..186a016a
--- /dev/null
+++ b/core/telemetry/__init__.py
@@ -0,0 +1,367 @@
+import sys
+import time
+import traceback
+from copy import deepcopy
+from pathlib import Path
+from typing import Any
+
+import httpx
+
+from core.config import get_config
+from core.config.user_settings import settings
+from core.config.version import get_version
+from core.log import get_logger
+
+log = get_logger(__name__)
+
+LARGE_REQUEST_THRESHOLD = 50000  # tokens
+SLOW_REQUEST_THRESHOLD = 300  # seconds
+
+
+class Telemetry:
+    """
+    Pythagora telemetry data collection.
+
+    This class is a singleton, use the `telemetry` global variable to access it:
+
+    >>> from core.telemetry import telemetry
+
+    To record start of application creation process:
+
+    >>> telemetry.start()
+
+    To record data or increase counters:
+
+    >>> telemetry.set("model", "gpt-4")
+    >>> telemetry.inc("num_llm_requests", 5)
+
+    To stop recording and send the data:
+
+    >>> telemetry.stop()
+    >>> await telemetry.send()
+
+    Note: all methods are no-ops if telemetry is not enabled.
+    """
+
+    MAX_CRASH_FRAMES = 3
+
+    def __init__(self):
+        self.enabled = False
+        self.telemetry_id = None
+        self.endpoint = None
+        self.clear_data()
+
+        if settings.telemetry is not None:
+            self.enabled = settings.telemetry.enabled
+            self.telemetry_id = settings.telemetry.id
+            self.endpoint = settings.telemetry.endpoint
+
+        if self.enabled:
+            log.debug(f"Telemetry enabled (id={self.telemetry_id}), configure or disable it in {settings.config_path}")
+
+    def clear_data(self):
+        """
+        Reset all telemetry data to default values.
+        """
+        config = get_config()
+
+        self.data = {
+            # System platform
+            "platform": sys.platform,
+            # Python version used for GPT Pilot
+            "python_version": sys.version,
+            # GPT Pilot version
+            "pilot_version": get_version(),
+            # GPT Pilot Extension version
+            "extension_version": None,
+            # Is extension used
+            "is_extension": False,
+            # The default LLM provider and model
+            "provider": config.agent["default"].provider.value,
+            "model": config.agent["default"].model,
+            # Initial prompt
+            "initial_prompt": None,
+            # Optional template used for the project
+            "template": None,
+            # Optional user contact email
+            "user_contact": None,
+            # Unique project ID (app_id)
+            "app_id": None,
+            # Project architecture
+            "architecture": None,
+        }
+        if sys.platform == "linux":
+            try:
+                import distro
+
+                self.data["linux_distro"] = distro.name(pretty=True)
+            except Exception as err:
+                log.debug(f"Error getting Linux distribution info: {err}", exc_info=True)
+        self.clear_counters()
+
+    def clear_counters(self):
+        """
+        Reset telemetry counters while keeping the base data.
+        """
+        self.data.update(
+            {
+                # Number of LLM requests made
+                "num_llm_requests": 0,
+                # Number of LLM requests that resulted in an error
+                "num_llm_errors": 0,
+                # Number of tokens used for LLM requests
+                "num_llm_tokens": 0,
+                # Number of development steps
+                "num_steps": 0,
+                # Number of commands run during development
+                "num_commands": 0,
+                # Number of times a human input was required during development
+                "num_inputs": 0,
+                # Number of files in the project
+                "num_files": 0,
+                # Total number of lines in the project
+                "num_lines": 0,
+                # Number of tasks started during development
+                "num_tasks": 0,
+                # Number of seconds elapsed during development
+                "elapsed_time": 0,
+                # Total number of lines created by GPT Pilot
+                "created_lines": 0,
+                # End result of development:
+                # - success:initial-project
+                # - success:feature
+                # - success:exit
+                # - failure
+                # - failure:api-error
+                # - interrupt
+                "end_result": None,
+                # Whether the project is continuation of a previous session
+                "is_continuation": False,
+                # Optional user feedback
+                "user_feedback": None,
+                # If GPT Pilot crashes, record diagnostics
+                "crash_diagnostics": None,
+                # Statistics for large requests
+                "large_requests": None,
+                # Statistics for slow requests
+                "slow_requests": None,
+            }
+        )
+        self.start_time = None
+        self.end_time = None
+        self.large_requests = []
+        self.slow_requests = []
+
+    def set(self, name: str, value: Any):
+        """
+        Set a telemetry data field to a value.
+
+        :param name: name of the telemetry data field
+        :param value: value to set the field to
+
+        Note: only known data fields may be set, see `Telemetry.clear_data()` for a list.
+        """
+        if name not in self.data:
+            log.error(f"Telemetry.record(): ignoring unknown telemetry data field: {name}")
+            return
+
+        self.data[name] = value
+
+    def inc(self, name: str, value: int = 1):
+        """
+        Increase a telemetry data field by a value.
+
+        :param name: name of the telemetry data field
+        :param value: value to increase the field by (default: 1)
+
+        Note: only known data fields may be increased, see `Telemetry.clear_data()` for a list.
+        """
+        if name not in self.data:
+            log.error(f"Telemetry.increase(): ignoring unknown telemetry data field: {name}")
+            return
+
+        self.data[name] += value
+
+    def start(self):
+        """
+        Record start of application creation process.
+        """
+        self.start_time = time.time()
+        self.end_time = None
+
+    def stop(self):
+        """
+        Record end of application creation process.
+        """
+        if self.start_time is None:
+            log.error("Telemetry.stop(): cannot stop telemetry, it was never started")
+            return
+
+        self.end_time = time.time()
+        self.data["elapsed_time"] = int(self.end_time - self.start_time)
+
+    def record_crash(
+        self,
+        exception: Exception,
+        end_result: str = "failure",
+    ):
+        """
+        Record crash diagnostics.
+
+        :param error: exception that caused the crash
+
+        Records the following crash diagnostics data:
+        * full stack trace
+        * exception (class name and message)
+        * file:line for the last (innermost) 3 frames of the stack trace
+        """
+        self.set("end_result", end_result)
+
+        root_dir = Path(__file__).parent.parent.parent
+        stack_trace = traceback.format_exc()
+        exception_class_name = exception.__class__.__name__
+        exception_message = str(exception)
+        frames = []
+
+        # Let's not crash if there's something funny in frame or path handling
+        try:
+            tb = exception.__traceback__
+            while tb is not None:
+                frame = tb.tb_frame
+                file_path = Path(frame.f_code.co_filename).absolute().relative_to(root_dir).as_posix()
+                frame_info = {"file": file_path, "line": tb.tb_lineno}
+                if not file_path.startswith("pilot-env"):
+                    frames.append(frame_info)
+                tb = tb.tb_next
+        except:  # noqa
+            pass
+
+        frames.reverse()
+        self.data["crash_diagnostics"] = {
+            "stack_trace": stack_trace,
+            "exception_class": exception_class_name,
+            "exception_message": exception_message,
+            "frames": frames[: self.MAX_CRASH_FRAMES],
+        }
+
+    def record_llm_request(
+        self,
+        tokens: int,
+        elapsed_time: int,
+        is_error: bool,
+    ):
+        """
+        Record an LLM request.
+
+        :param tokens: number of tokens in the request
+        :param elapsed_time: time elapsed for the request
+        :param is_error: whether the request resulted in an error
+        """
+        self.inc("num_llm_requests")
+
+        if is_error:
+            self.inc("num_llm_errors")
+        else:
+            self.inc("num_llm_tokens", tokens)
+
+        if tokens > LARGE_REQUEST_THRESHOLD:
+            self.large_requests.append(tokens)
+        if elapsed_time > SLOW_REQUEST_THRESHOLD:
+            self.slow_requests.append(elapsed_time)
+
+    def calculate_statistics(self):
+        """
+        Calculate statistics for large and slow requests.
+        """
+        n_large = len(self.large_requests)
+        n_slow = len(self.slow_requests)
+
+        self.data["large_requests"] = {
+            "num_requests": n_large,
+            "min_tokens": min(self.large_requests) if n_large > 0 else None,
+            "max_tokens": max(self.large_requests) if n_large > 0 else None,
+            "avg_tokens": sum(self.large_requests) // n_large if n_large > 0 else None,
+            "median_tokens": sorted(self.large_requests)[n_large // 2] if n_large > 0 else None,
+        }
+        self.data["slow_requests"] = {
+            "num_requests": n_slow,
+            "min_time": min(self.slow_requests) if n_slow > 0 else None,
+            "max_time": max(self.slow_requests) if n_slow > 0 else None,
+            "avg_time": sum(self.slow_requests) // n_slow if n_slow > 0 else None,
+            "median_time": sorted(self.slow_requests)[n_slow // 2] if n_slow > 0 else None,
+        }
+
+    async def send(self, event: str = "pilot-telemetry"):
+        """
+        Send telemetry data to the phone-home endpoint.
+
+        Note: this method clears all telemetry data after sending it.
+        """
+        if not self.enabled:
+            return
+
+        if self.endpoint is None:
+            log.error("Telemetry.send(): cannot send telemetry, no endpoint configured")
+            return
+
+        if self.start_time is not None and self.end_time is None:
+            self.stop()
+
+        self.calculate_statistics()
+        payload = {
+            "pathId": self.telemetry_id,
+            "event": event,
+            "data": self.data,
+        }
+
+        log.debug(f"Telemetry.send(): sending telemetry data to {self.endpoint}")
+        try:
+            async with httpx.AsyncClient() as client:
+                response = await client.post(self.endpoint, json=payload)
+                response.raise_for_status()
+            self.clear_counters()
+            self.set("is_continuation", True)
+        except httpx.RequestError as e:
+            log.error(f"Telemetry.send(): failed to send telemetry data: {e}", exc_info=True)
+
+    def get_project_stats(self) -> dict:
+        return {
+            "num_lines": self.data["num_lines"],
+            "num_files": self.data["num_files"],
+            "num_tokens": self.data["num_llm_tokens"],
+        }
+
+    async def trace_code_event(self, name: str, data: dict):
+        """
+        Record a code event to trace potential logic bugs.
+
+        :param name: name of the event
+        :param data: data to send with the event
+        """
+        if not self.enabled:
+            return
+
+        payload = {
+            "pathId": self.telemetry_id,
+            "event": f"trace-{name}",
+            "data": data,
+        }
+
+        log.debug(f"Sending trace event {name} to {self.endpoint}")
+
+        try:
+            async with httpx.AsyncClient() as client:
+                await client.post(self.endpoint, json=payload)
+        except httpx.RequestError:
+            pass
+
+    async def trace_loop(self, name: str, task_with_loop: dict):
+        payload = deepcopy(self.data)
+        payload["task_with_loop"] = task_with_loop
+        await self.trace_code_event(name, payload)
+
+
+telemetry = Telemetry()
+
+
+__all__ = ["telemetry"]
diff --git a/core/templates/__init__.py b/core/templates/__init__.py
new file mode 100644
index 00000000..e69de29b
diff --git a/core/templates/example_project.py b/core/templates/example_project.py
new file mode 100644
index 00000000..61cb13c3
--- /dev/null
+++ b/core/templates/example_project.py
@@ -0,0 +1,66 @@
+EXAMPLE_PROJECT_DESCRIPTION = """
+The application is a simple ToDo app built using React. Its primary function is to allow users to manage a list of tasks (todos). Each task has a description and a state (open or completed, with the default state being open). The application is frontend-only, with no user sign-up or authentication process. The goal is to provide a straightforward and user-friendly interface for task management.
+
+Features:
+1. Display of Todos: A list that displays all todo items. Each item shows its description and a checkbox to indicate its state (open or completed).
+2. Add New Todo: A button to add a new todo item. Clicking this button will prompt the user to enter a description for the new todo.
+3. Toggle State: Each todo item includes a checkbox. Checking/unchecking this box toggles the todo's state between open and completed.
+4. Local Storage: The application will use the browser's local storage to persist todos between sessions, ensuring that users do not lose their data upon reloading the application.
+
+Functional Specification:
+- Upon loading the application, it fetches existing todos from the local storage and displays them in a list.
+- Each todo item in the list displays a checkbox and a description. The checkbox reflects the todo's current state (checked for completed, unchecked for open).
+- When the user checks or unchecks a checkbox, the application updates the state of the corresponding todo item and saves the updated list to local storage.
+- Clicking the "Add New Todo" button prompts the user to enter a description for the new todo. Upon confirmation, the application adds the new todo (with the default state of open) to the list and updates local storage.
+- The application does not support deleting or editing todo items to keep the interface and interactions simple.
+- Todos persist between sessions using the browser's local storage. The application saves any changes to the todo list (additions or state changes) in local storage and retrieves this data when the application is reloaded.
+
+Technical Specification:
+- Platform/Technologies: The application is a web application developed using React. No backend technologies are required.
+- Styling: Use Bootstrap 5 for a simple and functional interface. Load Boostrap from the CDN (don't install it locally):
+    - https://cdn.jsdelivr.net/npm/bootstrap@5.0.2/dist/css/bootstrap.min.css
+    - https://cdn.jsdelivr.net/npm/bootstrap@5.0.2/dist/js/bootstrap.bundle.min.js
+- State Management: Directly in the React component
+    - make sure to initialize the state from the local storage as default (... = useState(JSON.parse(localStorage.getItem('todos')) || []) to avoid race conditions
+- Data Persistence: The application uses the browser's local storage to persist todos between sessions. It stores the array of todos as a JSON string and parses this data on application load.
+"""
+
+EXAMPLE_PROJECT_ARCHITECTURE = {
+    "architecture": (
+        "The application is a client-side React web application that uses local storage for data persistence. "
+        "It consists of a single page with components for listing todos, adding new todos, and toggling their completion status. "
+        "State management is handled directly within React components, leveraging useState and useEffect hooks for state manipulation and side effects, respectively. "
+        "Bootstrap 5 is used for styling to provide a responsive and accessible UI."
+    ),
+    "system_dependencies": [
+        {
+            "name": "Node.js",
+            "description": "JavaScript runtime needed to run the React development tools and build the project.",
+            "test": "node --version",
+            "required_locally": True,
+        }
+    ],
+    "package_dependencies": [
+        {"name": "react", "description": "A JavaScript library for building user interfaces."},
+        {"name": "react-dom", "description": "Serves as the entry point to the DOM and server renderers for React."},
+        {"name": "bootstrap", "description": "Frontend framework for developing responsive and mobile-first websites."},
+    ],
+    "template": "javascript_react",
+}
+
+EXAMPLE_PROJECT_PLAN = [
+    {
+        "description": (
+            "Create a new component TodoList: This component will display the list of todo items. "
+            "Use localStorage directly to access the current state of todos and map over them, rendering each todo item as a list item. "
+            "Each item should display the todo's description and a checkbox that reflects the todo's state (checked for completed, unchecked for open). "
+            "When the checkbox is clicked, dispatch an action to toggle the state of the todo. "
+            "Also create AddTodo: This component will include a button that, when clicked, displays a prompt asking the user for a description of the new todo. "
+            "Upon confirmation, dispatch an action to add the new todo to the state with a default state of open. "
+            "Ensure the component also updates the local storage with the new list of todos. "
+            "Finally, use TodoList and AddTodo components in App component to implement the required functionality. "
+            "Integrate Boostrap 5 for styling - add CSS/JS to index.html, style App.jsx and other files as appropriate."
+        ),
+        "completed": False,
+    }
+]
diff --git a/core/templates/javascript_react.py b/core/templates/javascript_react.py
new file mode 100644
index 00000000..82c94eac
--- /dev/null
+++ b/core/templates/javascript_react.py
@@ -0,0 +1,38 @@
+from core.proc.process_manager import ProcessManager
+
+
+async def install_hook(process_manager: ProcessManager):
+    """
+    Command to run to complete the project scaffolding setup.
+
+    :param process_manager: ProcessManager instance to run the install commands with.
+    """
+    await process_manager.run_command("npm install")
+
+
+JAVASCRIPT_REACT = {
+    "path": "javascript_react",
+    "description": "React web app using Vite devserver/bundler",
+    "summary": "\n".join(
+        [
+            "* Initial setup with Vite for fast development",
+            "* Basic project structure for React development",
+            "* Development server setup for hot reloading",
+            "* Minimal configuration to get started with React",
+        ]
+    ),
+    "install_hook": install_hook,
+    "files": {
+        "vite.config.js": "Configuration file for Vite, a fast developer-friendly Javascript bundler/devserver.",
+        "index.html": "Main entry point for the project. It includes a basic HTML structure with a root div element and a script tag importing a JavaScript file named main.jsx using the module type. References: src/main.jsx",
+        ".eslintrc.cjs": "Configuration file for ESLint, a static code analysis tool for identifying problematic patterns found in JavaScript code. It defines rules for linting JavaScript code with a focus on React applications.",
+        ".gitignore": "Specifies patterns to exclude files and directories from being tracked by Git version control system. It is used to prevent certain files from being committed to the repository.",
+        "package.json": "Standard Nodejs package metadata file, specifies dependencies and start scripts. It also specifies that the project is a module.",
+        "public/.gitkeep": "Empty file",
+        "src/App.css": "Contains styling rules for the root element of the application, setting a maximum width, centering it on the page, adding padding, and aligning text to the center.",
+        "src/index.css": "Defines styling rules for the root element, body, and h1 elements of a web page.",
+        "src/App.jsx": "Defines a functional component that serves as the root component in the project. The component is exported as the default export. References: src/App.css",
+        "src/main.jsx": "Main entry point for a React application. It imports necessary modules, renders the main component 'App' inside a 'React.StrictMode' component, and mounts it to the root element in the HTML document. References: App.jsx, index.css",
+        "src/assets/.gitkeep": "Empty file",
+    },
+}
diff --git a/core/templates/node_express_mongoose.py b/core/templates/node_express_mongoose.py
new file mode 100644
index 00000000..6b691aa8
--- /dev/null
+++ b/core/templates/node_express_mongoose.py
@@ -0,0 +1,45 @@
+from core.proc.process_manager import ProcessManager
+
+
+async def install_hook(process_manager: ProcessManager):
+    """
+    Command to run to complete the project scaffolding setup.
+
+    :param process_manager: ProcessManager instance to run the install commands with.
+    """
+    await process_manager.run_command("npm install")
+
+
+NODE_EXPRESS_MONGOOSE = {
+    "path": "node_express_mongoose",
+    "description": "Node + Express + MongoDB web app with session-based authentication, EJS views and Bootstrap 5",
+    "summary": "\n".join(
+        [
+            "* initial Node + Express setup",
+            "* User model in Mongoose ORM with username and password fields, ensuring username is unique and hashing passwords with bcrypt prior to saving to the database",
+            "* session-based authentication using username + password (hashed using bcrypt) in routes/authRoutes.js, using express-session",
+            "* authentication middleware to protect routes that require login",
+            "* EJS view engine, html head, header and footer EJS partials, with included Boostrap 5.x CSS and JS",
+            "* routes and EJS views for login, register, and home (main) page",
+            "* config loading from environment using dotenv with a placeholder .env.example file: you will need to create a .env file with your own values",
+        ]
+    ),
+    "install_hook": install_hook,
+    "files": {
+        ".env.example": "The .env.example file serves as a template for setting up environment variables used in the application. It provides placeholders for values such as the port number, MongoDB database URL, and session secret string.",
+        ".env": "This file is a configuration file in the form of a .env file. It contains environment variables used by the application, such as the port to listen on, the MongoDB database URL, and the session secret string.",
+        "server.js": "This `server.js` file sets up an Express server with MongoDB database connection, session management using connect-mongo, templating engine EJS, static file serving, authentication routes, error handling, and request logging. [References: dotenv, mongoose, express, express-session, connect-mongo, ./routes/authRoutes]",
+        "package.json": "This `package.json` file is used to define the metadata and dependencies for a Node.js project named 'tt0'. It specifies the project name, version, main entry point file, scripts for starting and testing the project, dependencies required by the project, and other metadata like author and license. [References: server.js]",
+        "views/login.ejs": "This file represents the login page of a web application using EJS (Embedded JavaScript) templating. It includes partials for the head, header, and footer sections, and contains a form for users to input their username and password to log in. [References: partials/_head.ejs, partials/_header.ejs, partials/_footer.ejs]",
+        "views/register.ejs": "The 'views/register.ejs' file contains the HTML markup for a registration form. It includes fields for username and password, along with a button to submit the form and a link to redirect to the login page if the user already has an account. [References: partials/_head.ejs, partials/_header.ejs, partials/_footer.ejs]",
+        "views/index.ejs": "This file represents the main view for a web application. It includes partials for the head, header, and footer sections, and contains a simple HTML structure with a main container displaying a heading. [References: partials/_head.ejs, partials/_header.ejs, partials/_footer.ejs, js/main.js]",
+        "views/partials/_header.ejs": "This file represents a partial view for the header section of a web page. It includes a navigation bar with a brand logo, toggle button, and links for Home, Login, and Logout based on the user's session status.",
+        "views/partials/_head.ejs": "This file represents the partial for the head section of an HTML document. It includes meta tags, a title tag, and links to external CSS files (Bootstrap and a custom stylesheet).",
+        "views/partials/_footer.ejs": "This file defines the footer section of a web page using EJS (Embedded JavaScript) templating. It includes a copyright notice and a link to the Bootstrap JavaScript library. [References: https://cdn.jsdelivr.net/npm/bootstrap@5.3.2/dist/js/bootstrap.min.js]",
+        "routes/authRoutes.js": "This file defines routes for user authentication including registration, login, and logout. It interacts with a User model to handle user data and uses bcrypt for password hashing and comparison. [References: models/User.js]",
+        "routes/middleware/authMiddleware.js": "This file defines a middleware function called isAuthenticated, which checks if a user is authenticated based on the presence of a userId in the session object. If authenticated, it allows the request to proceed to the next middleware or route handler; otherwise, it returns a 401 status response indicating the user is not authenticated.",
+        "models/User.js": "This file defines a Mongoose model for a user with fields for username and password. It includes a pre-save hook to hash the user's password before saving it to the database using bcrypt. [References: mongoose, bcrypt]",
+        "public/js/main.js": "The main.js file is a placeholder for future JavaScript code. It currently does not contain any specific functionality.",
+        "public/css/style.css": "This file is a placeholder for custom styles. It does not contain any specific styles but is intended for adding custom CSS styles.",
+    },
+}
diff --git a/core/templates/registry.py b/core/templates/registry.py
new file mode 100644
index 00000000..e67d6dad
--- /dev/null
+++ b/core/templates/registry.py
@@ -0,0 +1,94 @@
+import os
+from enum import Enum
+from typing import Optional
+from uuid import uuid4
+
+from core.log import get_logger
+from core.proc.process_manager import ProcessManager
+from core.state.state_manager import StateManager
+
+from .javascript_react import JAVASCRIPT_REACT
+from .node_express_mongoose import NODE_EXPRESS_MONGOOSE
+from .render import Renderer
+
+PROJECT_TEMPLATES = {
+    "node_express_mongoose": NODE_EXPRESS_MONGOOSE,
+    "javascript_react": JAVASCRIPT_REACT,
+}
+
+log = get_logger(__name__)
+
+
+class ProjectTemplateEnum(str, Enum):
+    """Choices of available project templates."""
+
+    NODE_EXPRESS_MONGOOSE = "node_express_mongoose"
+    JAVASCRIPT_REACT = "javascript_react"
+
+
+async def apply_project_template(
+    template_name: str,
+    state_manager: StateManager,
+    process_manager: ProcessManager,
+) -> Optional[str]:
+    """
+    Apply a project template to a new project.
+
+    :param template_name: The name of the template to apply.
+    :param state_manager: The state manager instance to save files to.
+    :param process_manager: The process manager instance to run install hooks with.
+    :return: A summary of the applied template, or None if no template was applied.
+    """
+    if not template_name or template_name not in PROJECT_TEMPLATES:
+        log.warning(f"Project template '{template_name}' not found, ignoring")
+        return None
+
+    project_name = state_manager.current_state.branch.project.name
+    project_description = state_manager.current_state.specification.description
+    template = PROJECT_TEMPLATES[template_name]
+    install_hook = template.get("install_hook")
+
+    # TODO: this could be configurable to get premium templates
+    r = Renderer(os.path.join(os.path.dirname(__file__), "tpl"))
+
+    log.info(f"Applying project template {template_name}...")
+
+    files = r.render_tree(
+        template["path"],
+        {
+            "project_name": project_name,
+            "project_description": project_description,
+            "random_secret": uuid4().hex,
+        },
+    )
+
+    descriptions = template.get("files", {})
+    for file_name, file_content in files.items():
+        desc = descriptions.get(file_name)
+        metadata = {"description": desc} if desc else None
+        await state_manager.save_file(file_name, file_content, metadata=metadata, from_template=True)
+
+    try:
+        if install_hook:
+            await install_hook(process_manager)
+    except Exception as err:
+        log.error(
+            f"Error running install hook for project template '{template_name}': {err}",
+            exc_info=True,
+        )
+
+    return template["summary"]
+
+
+def get_template_summary(template_name: str) -> Optional[str]:
+    """
+    Get a summary of a project template.
+
+    :param template_name: The name of the project template.
+    :return: A summary of the template, or None if no template was found.
+    """
+    if not template_name or template_name not in PROJECT_TEMPLATES:
+        log.warning(f"Project template '{template_name}' not found, ignoring")
+        return None
+    template = PROJECT_TEMPLATES[template_name]
+    return template["summary"]
diff --git a/core/templates/render.py b/core/templates/render.py
new file mode 100644
index 00000000..18b3fe37
--- /dev/null
+++ b/core/templates/render.py
@@ -0,0 +1,106 @@
+from __future__ import annotations
+
+from os import walk
+from os.path import join, relpath
+from typing import Any, Callable
+
+from jinja2 import Environment, FileSystemLoader
+
+
+class Renderer:
+    """
+    Render a Jinja template
+
+    Sets up Jinja renderer and renders one or more templates
+    using provided context.
+
+    * `render_template` renders a single template
+    * `render_tree` renders all templates starting from a predefined
+      root folder (which must reside inside templates folder structure)
+
+    Rendered template(s) are returned as strings. Nothing is written
+    to disk.
+
+    Usage:
+
+    >>> import Renderer from render
+    >>> r = Renderer('path/to/templates')
+    >>> output_string = r.render_template('template.html', {'key': 'value'})
+    >>> output_tree = r.render_tree('tree/root', {'key': 'value'})
+    """
+
+    def __init__(self, template_dir: str):
+        self.template_dir = template_dir
+        self.jinja_env = Environment(
+            loader=FileSystemLoader(template_dir),
+            autoescape=False,
+            lstrip_blocks=True,
+            trim_blocks=True,
+            keep_trailing_newline=True,
+        )
+        # Add filters here
+        # self.jinja_env.filters["qstr"] = qstr
+
+    def render_template(self, template: str, context: Any) -> str:
+        """
+        Render a single template to a string using provided context
+
+        :param template: Name of the template file, relative to `template_dir`.
+        :param context: Context to render the template with.
+        :return: The resulting string.
+        """
+
+        # Jinja2 always uses /, even on Windows
+        template = template.replace("\\", "/")
+
+        tpl_object = self.jinja_env.get_template(template)
+        return tpl_object.render(context)
+
+    def render_tree(self, root: str, context: Any, filter: Callable = None) -> dict[str, str]:
+        """
+        Render a tree folder structure of templates using provided context
+
+        :param root: Root of the tree (relative to `template_dir`).
+        :param context: Context to render the templates with.
+        :param filter: If defined, will be called for each file to check if it
+        needs to be processed and determine output file path.
+        :return: A flat dictionary with path => content structure.
+
+        Root must be inside the template_dir (and must be specified relative
+        to it), but need not be at the root of the template-dir.
+
+        If supplied, `filter` must be a callable taking a single string
+        argument. It will be called for every file before processing it, with
+        the file name (relative to root of the tree) as the argument. If filter
+        returns a non-empty string, file will be rendered. If it returns None
+        or an empty string, file will be skipped. If `filter` is not defined,
+        all files are processed.
+
+        In the returned structure, `file_name` is location of the file
+        relative to the tree root (unless changed by `filter`) and
+        `contents` is file contents rendered to a binary (utf8-encoded) string.
+
+        Directories are implied by file paths, not represented by elements
+        in the returned dictionary.
+        """
+
+        retval = {}
+
+        # Actual full path of the root of the tree we're rendering
+        full_root = join(self.template_dir, root)
+
+        for path, subdirs, files in walk(full_root):
+            for file in files:
+                file_path = join(path, file)  # actual full path of the template file
+                tpl_location = relpath(file_path, self.template_dir)  # template relative to template_dir
+                output_location = relpath(file_path, full_root)  # template relative to tree root
+
+                if filter:
+                    output_location = filter(output_location)
+                    if not output_location:
+                        continue
+
+                contents = self.render_template(tpl_location, context)
+                retval[output_location] = contents
+
+        return retval
diff --git a/core/templates/tpl/javascript_react/.eslintrc.cjs b/core/templates/tpl/javascript_react/.eslintrc.cjs
new file mode 100644
index 00000000..3e212e1d
--- /dev/null
+++ b/core/templates/tpl/javascript_react/.eslintrc.cjs
@@ -0,0 +1,21 @@
+module.exports = {
+  root: true,
+  env: { browser: true, es2020: true },
+  extends: [
+    'eslint:recommended',
+    'plugin:react/recommended',
+    'plugin:react/jsx-runtime',
+    'plugin:react-hooks/recommended',
+  ],
+  ignorePatterns: ['dist', '.eslintrc.cjs'],
+  parserOptions: { ecmaVersion: 'latest', sourceType: 'module' },
+  settings: { react: { version: '18.2' } },
+  plugins: ['react-refresh'],
+  rules: {
+    'react/jsx-no-target-blank': 'off',
+    'react-refresh/only-export-components': [
+      'warn',
+      { allowConstantExport: true },
+    ],
+  },
+}
diff --git a/core/templates/tpl/javascript_react/.gitignore b/core/templates/tpl/javascript_react/.gitignore
new file mode 100644
index 00000000..a547bf36
--- /dev/null
+++ b/core/templates/tpl/javascript_react/.gitignore
@@ -0,0 +1,24 @@
+# Logs
+logs
+*.log
+npm-debug.log*
+yarn-debug.log*
+yarn-error.log*
+pnpm-debug.log*
+lerna-debug.log*
+
+node_modules
+dist
+dist-ssr
+*.local
+
+# Editor directories and files
+.vscode/*
+!.vscode/extensions.json
+.idea
+.DS_Store
+*.suo
+*.ntvs*
+*.njsproj
+*.sln
+*.sw?
diff --git a/core/templates/tpl/javascript_react/index.html b/core/templates/tpl/javascript_react/index.html
new file mode 100644
index 00000000..0777b6a9
--- /dev/null
+++ b/core/templates/tpl/javascript_react/index.html
@@ -0,0 +1,12 @@
+<!doctype html>
+<html lang="en">
+  <head>
+    <meta charset="UTF-8" />
+    <meta name="viewport" content="width=device-width, initial-scale=1.0" />
+    <title>{{ project_name }}</title>
+  </head>
+  <body>
+    <div id="root"></div>
+    <script type="module" src="/src/main.jsx"></script>
+  </body>
+</html>
diff --git a/core/templates/tpl/javascript_react/package.json b/core/templates/tpl/javascript_react/package.json
new file mode 100644
index 00000000..54f58642
--- /dev/null
+++ b/core/templates/tpl/javascript_react/package.json
@@ -0,0 +1,26 @@
+{
+  "name": "{{ project_name }}",
+  "private": true,
+  "version": "0.0.0",
+  "type": "module",
+  "scripts": {
+    "dev": "vite",
+    "build": "vite build",
+    "lint": "eslint . --ext js,jsx --report-unused-disable-directives --max-warnings 0",
+    "preview": "vite preview"
+  },
+  "dependencies": {
+    "react": "^18.2.0",
+    "react-dom": "^18.2.0"
+  },
+  "devDependencies": {
+    "@types/react": "^18.2.64",
+    "@types/react-dom": "^18.2.21",
+    "@vitejs/plugin-react": "^4.2.1",
+    "eslint": "^8.57.0",
+    "eslint-plugin-react": "^7.34.0",
+    "eslint-plugin-react-hooks": "^4.6.0",
+    "eslint-plugin-react-refresh": "^0.4.5",
+    "vite": "^5.1.6"
+  }
+}
diff --git a/core/templates/tpl/javascript_react/public/.gitkeep b/core/templates/tpl/javascript_react/public/.gitkeep
new file mode 100644
index 00000000..e69de29b
diff --git a/core/templates/tpl/javascript_react/src/App.css b/core/templates/tpl/javascript_react/src/App.css
new file mode 100644
index 00000000..4014cd73
--- /dev/null
+++ b/core/templates/tpl/javascript_react/src/App.css
@@ -0,0 +1,7 @@
+#root {
+  max-width: 1280px;
+  margin: 0 auto;
+  padding: 2rem;
+  text-align: center;
+}
+
diff --git a/core/templates/tpl/javascript_react/src/App.jsx b/core/templates/tpl/javascript_react/src/App.jsx
new file mode 100644
index 00000000..f6228cd3
--- /dev/null
+++ b/core/templates/tpl/javascript_react/src/App.jsx
@@ -0,0 +1,11 @@
+import './App.css'
+
+function App() {
+  return (
+    <>
+      <h1>{{ project_name }}</h1>
+    </>
+  )
+}
+
+export default App
diff --git a/core/templates/tpl/javascript_react/src/assets/.gitkeep b/core/templates/tpl/javascript_react/src/assets/.gitkeep
new file mode 100644
index 00000000..e69de29b
diff --git a/core/templates/tpl/javascript_react/src/index.css b/core/templates/tpl/javascript_react/src/index.css
new file mode 100644
index 00000000..7b61cd42
--- /dev/null
+++ b/core/templates/tpl/javascript_react/src/index.css
@@ -0,0 +1,20 @@
+:root {
+  font-synthesis: none;
+  text-rendering: optimizeLegibility;
+  -webkit-font-smoothing: antialiased;
+  -moz-osx-font-smoothing: grayscale;
+}
+
+body {
+  margin: 0;
+  display: flex;
+  place-items: center;
+  min-width: 320px;
+  min-height: 100vh;
+}
+
+h1 {
+  font-size: 3.2em;
+  line-height: 1.1;
+}
+
diff --git a/core/templates/tpl/javascript_react/src/main.jsx b/core/templates/tpl/javascript_react/src/main.jsx
new file mode 100644
index 00000000..54b39dd1
--- /dev/null
+++ b/core/templates/tpl/javascript_react/src/main.jsx
@@ -0,0 +1,10 @@
+import React from 'react'
+import ReactDOM from 'react-dom/client'
+import App from './App.jsx'
+import './index.css'
+
+ReactDOM.createRoot(document.getElementById('root')).render(
+  <React.StrictMode>
+    <App />
+  </React.StrictMode>,
+)
diff --git a/core/templates/tpl/javascript_react/vite.config.js b/core/templates/tpl/javascript_react/vite.config.js
new file mode 100644
index 00000000..5a33944a
--- /dev/null
+++ b/core/templates/tpl/javascript_react/vite.config.js
@@ -0,0 +1,7 @@
+import { defineConfig } from 'vite'
+import react from '@vitejs/plugin-react'
+
+// https://vitejs.dev/config/
+export default defineConfig({
+  plugins: [react()],
+})
diff --git a/core/templates/tpl/node_express_mongoose/.env.example b/core/templates/tpl/node_express_mongoose/.env.example
new file mode 100644
index 00000000..81ca25c9
--- /dev/null
+++ b/core/templates/tpl/node_express_mongoose/.env.example
@@ -0,0 +1,10 @@
+# Copy this file to .env and edit the settings
+
+# Port to listen on (example: 3000)
+PORT=
+
+# MongoDB database URL (example: mongodb://localhost/dbname)
+DATABASE_URL=
+
+# Session secret string (must be unique to your server)
+SESSION_SECRET=
diff --git a/core/templates/tpl/node_express_mongoose/models/User.js b/core/templates/tpl/node_express_mongoose/models/User.js
new file mode 100644
index 00000000..14298001
--- /dev/null
+++ b/core/templates/tpl/node_express_mongoose/models/User.js
@@ -0,0 +1,24 @@
+const mongoose = require('mongoose');
+const bcrypt = require('bcrypt');
+
+const userSchema = new mongoose.Schema({
+  username: { type: String, unique: true, required: true },
+  password: { type: String, required: true }
+});
+
+userSchema.pre('save', function(next) {
+  const user = this;
+  if (!user.isModified('password')) return next();
+  bcrypt.hash(user.password, 10, (err, hash) => {
+    if (err) {
+      console.error('Error hashing password:', err);
+      return next(err);
+    }
+    user.password = hash;
+    next();
+  });
+});
+
+const User = mongoose.model('User', userSchema);
+
+module.exports = User;
\ No newline at end of file
diff --git a/core/templates/tpl/node_express_mongoose/package.json b/core/templates/tpl/node_express_mongoose/package.json
new file mode 100644
index 00000000..9b2ed935
--- /dev/null
+++ b/core/templates/tpl/node_express_mongoose/package.json
@@ -0,0 +1,27 @@
+{
+  "name": "{{ project_name }}",
+  "version": "1.0.0",
+  "description": "",
+  "main": "server.js",
+  "scripts": {
+    "start": "node server.js",
+    "test": "echo \"Error: no test specified\" && exit 1"
+  },
+  "keywords": [],
+  "author": "",
+  "license": "ISC",
+  "dependencies": {
+    "bcrypt": "^5.1.1",
+    "body-parser": "^1.20.2",
+    "chart.js": "^4.4.1",
+    "connect-flash": "^0.1.1",
+    "csv-writer": "^1.6.0",
+    "dotenv": "^16.4.1",
+    "ejs": "^3.1.9",
+    "express": "^4.18.2",
+    "express-session": "^1.18.0",
+    "connect-mongo": "^5.1.0",
+    "moment": "^2.30.1",
+    "mongoose": "^8.1.1"
+  }
+}
diff --git a/core/templates/tpl/node_express_mongoose/public/css/style.css b/core/templates/tpl/node_express_mongoose/public/css/style.css
new file mode 100644
index 00000000..97c28409
--- /dev/null
+++ b/core/templates/tpl/node_express_mongoose/public/css/style.css
@@ -0,0 +1 @@
+/* Placeholder for custom styles */
diff --git a/core/templates/tpl/node_express_mongoose/public/js/main.js b/core/templates/tpl/node_express_mongoose/public/js/main.js
new file mode 100644
index 00000000..151865e7
--- /dev/null
+++ b/core/templates/tpl/node_express_mongoose/public/js/main.js
@@ -0,0 +1 @@
+// Placeholder for future JavaScript code
diff --git a/core/templates/tpl/node_express_mongoose/routes/authRoutes.js b/core/templates/tpl/node_express_mongoose/routes/authRoutes.js
new file mode 100644
index 00000000..a671fcab
--- /dev/null
+++ b/core/templates/tpl/node_express_mongoose/routes/authRoutes.js
@@ -0,0 +1,56 @@
+const express = require('express');
+const User = require('../models/User');
+const bcrypt = require('bcrypt');
+const router = express.Router();
+
+router.get('/auth/register', (req, res) => {
+  res.render('register');
+});
+
+router.post('/auth/register', async (req, res) => {
+  try {
+    const { username, password } = req.body;
+    // User model will automatically hash the password using bcrypt
+    await User.create({ username, password });
+    res.redirect('/auth/login');
+  } catch (error) {
+    console.error('Registration error:', error);
+    res.status(500).send(error.message);
+  }
+});
+
+router.get('/auth/login', (req, res) => {
+  res.render('login');
+});
+
+router.post('/auth/login', async (req, res) => {
+  try {
+    const { username, password } = req.body;
+    const user = await User.findOne({ username });
+    if (!user) {
+      return res.status(400).send('User not found');
+    }
+    const isMatch = await bcrypt.compare(password, user.password);
+    if (isMatch) {
+      req.session.userId = user._id;
+      return res.redirect('/');
+    } else {
+      return res.status(400).send('Password is incorrect');
+    }
+  } catch (error) {
+    console.error('Login error:', error);
+    return res.status(500).send(error.message);
+  }
+});
+
+router.get('/auth/logout', (req, res) => {
+  req.session.destroy(err => {
+    if (err) {
+      console.error('Error during session destruction:', err); // gpt_pilot_debugging_log
+      return res.status(500).send('Error logging out');
+    }
+    res.redirect('/auth/login');
+  });
+});
+
+module.exports = router;
diff --git a/core/templates/tpl/node_express_mongoose/routes/middleware/authMiddleware.js b/core/templates/tpl/node_express_mongoose/routes/middleware/authMiddleware.js
new file mode 100644
index 00000000..2b8d97a6
--- /dev/null
+++ b/core/templates/tpl/node_express_mongoose/routes/middleware/authMiddleware.js
@@ -0,0 +1,11 @@
+const isAuthenticated = (req, res, next) => {
+  if (req.session && req.session.userId) {
+    return next(); // User is authenticated, proceed to the next middleware/route handler
+  } else {
+    return res.status(401).send('You are not authenticated'); // User is not authenticated
+  }
+};
+
+module.exports = {
+  isAuthenticated
+};
\ No newline at end of file
diff --git a/core/templates/tpl/node_express_mongoose/server.js b/core/templates/tpl/node_express_mongoose/server.js
new file mode 100644
index 00000000..0cea71cf
--- /dev/null
+++ b/core/templates/tpl/node_express_mongoose/server.js
@@ -0,0 +1,93 @@
+// Load environment variables
+require("dotenv").config();
+const mongoose = require("mongoose");
+const express = require("express");
+const session = require("express-session");
+const MongoStore = require('connect-mongo');
+const authRoutes = require("./routes/authRoutes");
+
+if (!process.env.DATABASE_URL || !process.env.SESSION_SECRET) {
+  console.error("Error: config environment variables not set. Please create/edit .env configuration file.");
+  process.exit(-1);
+}
+
+const app = express();
+const port = process.env.PORT || 3000;
+
+// Middleware to parse request bodies
+app.use(express.urlencoded({ extended: true }));
+app.use(express.json());
+
+// Setting the templating engine to EJS
+app.set("view engine", "ejs");
+
+// Serve static files
+app.use(express.static("public"));
+
+// Database connection
+mongoose
+  .connect(process.env.DATABASE_URL)
+  .then(() => {
+    console.log("Database connected successfully");
+  })
+  .catch((err) => {
+    console.error(`Database connection error: ${err.message}`);
+    console.error(err.stack);
+    process.exit(1);
+  });
+
+// Session configuration with connect-mongo
+app.use(
+  session({
+    secret: process.env.SESSION_SECRET,
+    resave: false,
+    saveUninitialized: false,
+    store: MongoStore.create({ mongoUrl: process.env.DATABASE_URL }),
+  }),
+);
+
+app.on("error", (error) => {
+  console.error(`Server error: ${error.message}`);
+  console.error(error.stack);
+});
+
+// Logging session creation and destruction
+app.use((req, res, next) => {
+  const sess = req.session;
+  // Make session available to all views
+  res.locals.session = sess;
+  if (!sess.views) {
+    sess.views = 1;
+    console.log("Session created at: ", new Date().toISOString());
+  } else {
+    sess.views++;
+    console.log(
+      `Session accessed again at: ${new Date().toISOString()}, Views: ${sess.views}, User ID: ${sess.userId || '(unauthenticated)'}`,
+    );
+  }
+  next();
+});
+
+// Authentication Routes
+app.use(authRoutes);
+
+// Root path response
+app.get("/", (req, res) => {
+  res.render("index");
+});
+
+// If no routes handled the request, it's a 404
+app.use((req, res, next) => {
+  res.status(404).send("Page not found.");
+});
+
+// Error handling
+app.use((err, req, res, next) => {
+  console.error(`Unhandled application error: ${err.message}`);
+  console.error(err.stack);
+  res.status(500).send("There was an error serving your request.");
+});
+
+app.listen(port, () => {
+  console.log(`Server running at http://localhost:${port}`);
+});
diff --git a/core/templates/tpl/node_express_mongoose/views/index.ejs b/core/templates/tpl/node_express_mongoose/views/index.ejs
new file mode 100644
index 00000000..ac5f6c94
--- /dev/null
+++ b/core/templates/tpl/node_express_mongoose/views/index.ejs
@@ -0,0 +1,14 @@
+<!DOCTYPE html>
+<html lang="en">
+<%- include('partials/_head.ejs') %>
+  <body>
+<%- include('partials/_header.ejs') %>
+    <main role="main" class="container mt-4">
+      <div class="text-center">
+        <h1>{{ project_name }}</h1>
+      </div>
+    </main>
+  </body>
+<%- include('partials/_footer.ejs') %>
+  <script src="/js/main.js"></script>
+</html>
diff --git a/core/templates/tpl/node_express_mongoose/views/login.ejs b/core/templates/tpl/node_express_mongoose/views/login.ejs
new file mode 100644
index 00000000..a3e3be56
--- /dev/null
+++ b/core/templates/tpl/node_express_mongoose/views/login.ejs
@@ -0,0 +1,25 @@
+<!DOCTYPE html>
+<html lang="en">
+<%- include('partials/_head.ejs') %>
+  <body>
+<%- include('partials/_header.ejs') %>
+    <main role="main">
+      <div class="container mt-5">
+        <h2>Login</h2>
+        <form action="/auth/login" method="POST">
+          <div class="mb-3">
+            <input type="text" name="username" placeholder="Username" required class="form-control">
+          </div>
+          <div class="mb-3">
+            <input type="password" name="password" placeholder="Password" required class="form-control">
+          </div>
+          <div class="mb-3">
+            <button type="submit" class="btn btn-primary">Login</button>
+            Don't have an account? <a href="/auth/register">Register</a>
+          </div>
+        </form>
+      </div>
+    </main>
+  </body>
+<%- include('partials/_footer.ejs') %>
+</html>
diff --git a/core/templates/tpl/node_express_mongoose/views/partials/_footer.ejs b/core/templates/tpl/node_express_mongoose/views/partials/_footer.ejs
new file mode 100644
index 00000000..7c136a6a
--- /dev/null
+++ b/core/templates/tpl/node_express_mongoose/views/partials/_footer.ejs
@@ -0,0 +1,7 @@
+  <footer class="footer fixed-bottom bg-light">
+    <div class="container text-center my-2">
+      <span>Copyright &copy; <%= 1900 + new Date().getYear() %> {{ project_name }}</span>
+    </div>
+  </footer>
+  <script src="https://cdn.jsdelivr.net/npm/bootstrap@5.3.2/dist/js/bootstrap.min.js" integrity="sha384-BBtl+eGJRgqQAUMxJ7pMwbEyER4l1g+O15P+16Ep7Q9Q+zqX6gSbd85u4mG4QzX+" crossorigin="anonymous"></script>
+
diff --git a/core/templates/tpl/node_express_mongoose/views/partials/_head.ejs b/core/templates/tpl/node_express_mongoose/views/partials/_head.ejs
new file mode 100644
index 00000000..807f3eb1
--- /dev/null
+++ b/core/templates/tpl/node_express_mongoose/views/partials/_head.ejs
@@ -0,0 +1,7 @@
+<head>
+  <meta charset="UTF-8">
+  <title>{{ project_name }}</title>
+  <link rel="stylesheet" href="https://cdn.jsdelivr.net/npm/bootstrap@5.3.2/dist/css/bootstrap.min.css" integrity="sha384-T3c6CoIi6uLrA9TneNEoa7RxnatzjcDSCmG1MXxSR1GAsXEV/Dwwykc2MPK8M2HN" crossorigin="anonymous">
+  <link rel="stylesheet" href="/css/style.css">
+</head>
+
diff --git a/core/templates/tpl/node_express_mongoose/views/partials/_header.ejs b/core/templates/tpl/node_express_mongoose/views/partials/_header.ejs
new file mode 100644
index 00000000..612b9a94
--- /dev/null
+++ b/core/templates/tpl/node_express_mongoose/views/partials/_header.ejs
@@ -0,0 +1,20 @@
+  <nav class="navbar navbar-expand-md navbar-dark bg-dark">
+    <a class="navbar-brand" href="/">{{ project_name }}</a>
+    <button class="navbar-toggler" type="button" data-toggle="collapse" data-target="#navbarNav" aria-controls="navbarNav" aria-expanded="false" aria-label="Toggle navigation">
+      <span class="navbar-toggler-icon"></span>
+    </button>
+    <div class="collapse navbar-collapse" id="navbarNav">
+      <ul class="navbar-nav">
+        <li class="nav-item">
+          <a class="nav-link" href="/">Home</a>
+        </li>
+        <li class="nav-item">
+          <% if (session && session.userId) { %>
+          <a class="nav-link" href="/auth/logout">Logout</a>
+          <% } else { %>
+          <a class="nav-link" href="/auth/login">Login</a>
+          <% } %>
+        </li>
+      </ul>
+    </div>
+  </nav>
diff --git a/core/templates/tpl/node_express_mongoose/views/register.ejs b/core/templates/tpl/node_express_mongoose/views/register.ejs
new file mode 100644
index 00000000..c42448f3
--- /dev/null
+++ b/core/templates/tpl/node_express_mongoose/views/register.ejs
@@ -0,0 +1,25 @@
+<!DOCTYPE html>
+<html lang="en">
+<%- include('partials/_head.ejs') %>
+  <body>
+<%- include('partials/_header.ejs') %>
+    <main role="main">
+      <div class="container mt-5">
+        <h2>Register</h2>
+        <form action="/auth/register" method="POST">
+          <div class="mb-3">
+            <input type="text" name="username" placeholder="Username" required class="form-control">
+          </div>
+          <div class="mb-3">
+            <input type="password" name="password" placeholder="Password" required class="form-control">
+          </div>
+          <div class="mb-3">
+            <button type="submit" class="btn btn-primary mr-2">Register</button>
+            Already have an account? <a href="/auth/login">Login</a>
+          </div>
+        </form>
+      </div>
+    </main>
+  </body>
+<%- include('partials/_footer.ejs') %>
+</html>
diff --git a/core/ui/base.py b/core/ui/base.py
new file mode 100644
index 00000000..5fb11ffc
--- /dev/null
+++ b/core/ui/base.py
@@ -0,0 +1,237 @@
+from enum import Enum
+from typing import Optional
+
+from pydantic import BaseModel
+
+
+class ProjectStage(str, Enum):
+    DESCRIPTION = "project_description"
+    ARCHITECTURE = "architecture"
+    CODING = "coding"
+
+
+class UISource:
+    """
+    Source for UI messages.
+
+    See also: `AgentSource`
+
+    Attributes:
+    * `display_name`: Human-readable name of the source.
+    * `type_name`: Type name of the source (used in IPC)
+    """
+
+    display_name: str
+    type_name: str
+
+    def __init__(self, display_name: str, type_name: str):
+        """
+        Create a new UI source.
+
+        :param display_name: Human-readable name of the source.
+        :param type_name: Type name of the source (used in IPC)
+        """
+        self.display_name = display_name
+        self.type_name = type_name
+
+    def __str__(self) -> str:
+        return self.display_name
+
+
+class AgentSource(UISource):
+    """
+    Agent UI source.
+
+    Attributes:
+    * `display_name`: Human-readable name of the agent (eg. "Product Owner").
+    * `type_name`: Type of the agent (eg. "agent:product-owner").
+    """
+
+    def __init__(self, display_name: str, agent_type: str):
+        """
+        Create a new agent source.
+
+        :param display_name: Human-readable name of the agent.
+        :param agent_type: Type of the agent.
+        """
+        super().__init__(display_name, f"agent:{agent_type}")
+
+
+class UserInput(BaseModel):
+    """
+    Represents user input.
+
+    See also: `UIBase.ask_question()`
+
+    Attributes:
+    * `text`: User-provided text (if any).
+    * `button`: Name (key) of the button the user selected (if any).
+    * `cancelled`: Whether the user cancelled the input.
+    """
+
+    text: Optional[str] = None
+    button: Optional[str] = None
+    cancelled: bool = False
+
+
+class UIBase:
+    """
+    Base class for UI adapters.
+    """
+
+    async def start(self) -> bool:
+        """
+        Start the UI adapter.
+
+        :return: Whether the UI was started successfully.
+        """
+        raise NotImplementedError()
+
+    async def stop(self):
+        """
+        Stop the UI adapter.
+        """
+        raise NotImplementedError()
+
+    async def send_stream_chunk(self, chunk: str, *, source: Optional[UISource] = None):
+        """
+        Send a chunk of the stream to the UI.
+
+        :param chunk: Chunk of the stream.
+        :param source: Source of the stream (if any).
+        """
+        raise NotImplementedError()
+
+    async def send_message(self, message: str, *, source: Optional[UISource] = None):
+        """
+        Send a complete message to the UI.
+
+        :param message: Message content.
+        :param source: Source of the message (if any).
+        """
+        raise NotImplementedError()
+
+    async def send_key_expired(self, message: Optional[str] = None):
+        """
+        Send the key expired message.
+        """
+        raise NotImplementedError()
+
+    async def ask_question(
+        self,
+        question: str,
+        *,
+        buttons: Optional[dict[str, str]] = None,
+        default: Optional[str] = None,
+        buttons_only: bool = False,
+        allow_empty: bool = False,
+        hint: Optional[str] = None,
+        initial_text: Optional[str] = None,
+        source: Optional[UISource] = None,
+    ) -> UserInput:
+        """
+        Ask the user a question.
+
+        If buttons are provided, the UI should use the item values
+        as button labels, and item keys as the values to return.
+
+        After the user answers, constructs a `UserInput` object
+        with the selected button or text. If the user cancels
+        the input, the `cancelled` attribute should be set to True.
+
+        :param question: Question to ask.
+        :param buttons: Buttons to display (if any).
+        :param default: Default value (if user provides no input).
+        :param buttons_only: Whether to only show buttons (disallow custom text).
+        :param allow_empty: Whether to allow empty input.
+        :param source: Source of the question (if any).
+        :return: User input.
+        """
+        raise NotImplementedError()
+
+    async def send_project_stage(self, stage: ProjectStage):
+        """
+        Send a project stage to the UI.
+
+        :param stage: Project stage.
+        """
+        raise NotImplementedError()
+
+    async def send_task_progress(
+        self,
+        index: int,
+        n_tasks: int,
+        description: str,
+        source: str,
+        status: str,
+        source_index: int = 1,
+    ):
+        """
+        Send a task progress update to the UI.
+
+        :param index: Index of the current task, starting from 1.
+        :param n_tasks: Total number of tasks.
+        :param description: Description of the task.
+        :param source: Source of the task, one of: 'app', 'feature', 'debugger', 'troubleshooting', 'review'.
+        :param status: Status of the task, can be 'in_progress' or 'done'.
+        :param source_index: Index of the source.
+        """
+        raise NotImplementedError()
+
+    async def send_step_progress(
+        self,
+        index: int,
+        n_steps: int,
+        step: dict,
+        task_source: str,
+    ):
+        """
+        Send a step progress update to the UI.
+
+        :param index: Index of the step within the current task, starting from 1.
+        :param n_steps: Number of steps in the current task.
+        :param step: Step data.
+        :param task_source: Source of the task, one of: 'app', 'feature', 'debugger', 'troubleshooting', 'review'.
+        """
+        raise NotImplementedError()
+
+    async def send_run_command(self, run_command: str):
+        """
+        Send a run command to the UI.
+
+        :param run_command: Run command.
+        """
+        raise NotImplementedError()
+
+    async def open_editor(self, file: str, line: Optional[int] = None):
+        """
+        Open an editor at the specified file and line.
+
+        :param file: File to open.
+        :param line: Line to highlight.
+        """
+        raise NotImplementedError()
+
+    async def send_project_root(self, path: str):
+        """
+        Tell UI component about the project root path.
+
+        :param path: Project root path.
+        """
+        raise NotImplementedError()
+
+    async def send_project_stats(self, stats: dict):
+        """
+        Send project statistics to the UI.
+
+        The stats object should have the following keys:
+        * `num_lines` - Total number of lines in the project
+        * `num_files` - Number of files in the project
+        * `num_tokens` - Number of tokens used for LLM requests in this session
+
+        :param stats: Project statistics.
+        """
+        raise NotImplementedError()
+
+
+__all__ = ["UISource", "AgentSource", "UserInput", "UIBase"]
diff --git a/core/ui/console.py b/core/ui/console.py
new file mode 100644
index 00000000..505d2da7
--- /dev/null
+++ b/core/ui/console.py
@@ -0,0 +1,112 @@
+from typing import Optional
+
+from core.log import get_logger
+from core.ui.base import ProjectStage, UIBase, UISource, UserInput
+
+log = get_logger(__name__)
+
+
+class PlainConsoleUI(UIBase):
+    """
+    UI adapter for plain (no color) console output.
+    """
+
+    async def start(self) -> bool:
+        log.debug("Starting console UI")
+        return True
+
+    async def stop(self):
+        log.debug("Stopping console UI")
+
+    async def send_stream_chunk(self, chunk: Optional[str], *, source: Optional[UISource] = None):
+        if chunk is None:
+            # end of stream
+            print("", flush=True)
+        else:
+            print(chunk, end="", flush=True)
+
+    async def send_message(self, message: str, *, source: Optional[UISource] = None):
+        if source:
+            print(f"[{source}] {message}")
+        else:
+            print(message)
+
+    async def send_key_expired(self, message: Optional[str]):
+        if message:
+            await self.send_message(message)
+
+    async def ask_question(
+        self,
+        question: str,
+        *,
+        buttons: Optional[dict[str, str]] = None,
+        default: Optional[str] = None,
+        buttons_only: bool = False,
+        allow_empty: bool = False,
+        hint: Optional[str] = None,
+        initial_text: Optional[str] = None,
+        source: Optional[UISource] = None,
+    ) -> UserInput:
+        if source:
+            print(f"[{source}] {question}")
+        else:
+            print(f"{question}")
+
+        if buttons:
+            for k, v in buttons.items():
+                default_str = " (default)" if k == default else ""
+                print(f"  [{k}]: {v}{default_str}")
+
+        while True:
+            try:
+                choice = input("> ").strip()
+            except KeyboardInterrupt:
+                return UserInput(cancelled=True)
+            if not choice and default:
+                choice = default
+            if buttons and choice in buttons:
+                return UserInput(button=choice, text=None)
+            if buttons_only:
+                print("Please choose one of available options")
+                continue
+            if choice or allow_empty:
+                return UserInput(button=None, text=choice)
+            print("Please provide a valid input")
+
+    async def send_project_stage(self, stage: ProjectStage):
+        pass
+
+    async def send_task_progress(
+        self,
+        index: int,
+        n_tasks: int,
+        description: str,
+        source: str,
+        status: str,
+        source_index: int = 1,
+    ):
+        pass
+
+    async def send_step_progress(
+        self,
+        index: int,
+        n_steps: int,
+        step: dict,
+        task_source: str,
+    ):
+        pass
+
+    async def send_run_command(self, run_command: str):
+        pass
+
+    async def open_editor(self, file: str, line: Optional[int] = None):
+        pass
+
+    async def send_project_root(self, path: str):
+        pass
+
+    async def send_project_stats(self, stats: dict):
+        pass
+
+
+__all__ = ["PlainConsoleUI"]
diff --git a/core/ui/ipc_client.py b/core/ui/ipc_client.py
new file mode 100644
index 00000000..3d37ab75
--- /dev/null
+++ b/core/ui/ipc_client.py
@@ -0,0 +1,317 @@
+import asyncio
+import json
+from enum import Enum
+from os.path import basename
+from typing import Optional, Union
+
+from pydantic import BaseModel, ValidationError
+
+from core.config import LocalIPCConfig
+from core.log import get_logger
+from core.ui.base import ProjectStage, UIBase, UISource, UserInput
+
+VSCODE_EXTENSION_HOST = "localhost"
+VSCODE_EXTENSION_PORT = 8125
+MESSAGE_SIZE_LIMIT = 512 * 1024
+
+log = get_logger(__name__)
+
+
+# TODO: unify these (and corresponding changes in the extension) before release
+# Also clean up (remove) double JSON encoding in some of the messages
+class MessageType(str, Enum):
+    EXIT = "exit"
+    STREAM = "stream"
+    VERBOSE = "verbose"
+    BUTTONS = "button"
+    BUTTONS_ONLY = "buttons-only"
+    RESPONSE = "response"
+    USER_INPUT_REQUEST = "user_input_request"
+    INFO = "info"
+    PROGRESS = "progress"
+    RUN_COMMAND = "run_command"
+    OPEN_FILE = "openFile"
+    PROJECT_FOLDER_NAME = "project_folder_name"
+    PROJECT_STATS = "projectStats"
+    HINT = "hint"
+    KEY_EXPIRED = "keyExpired"
+    INPUT_PREFILL = "inputPrefill"
+
+
+class Message(BaseModel):
+    """
+    Message structure for IPC communication with the VSCode extension.
+
+    Attributes:
+    * `type`: Message type (always "response" for VSC server responses)
+    * `category`: Message category (eg. "agent:product-owner"), optional
+    * `content`: Message content (eg. "Hello, how are you?"), optional
+    """
+
+    type: MessageType
+    category: Optional[str] = None
+    content: Union[str, dict, None] = None
+
+    def to_bytes(self) -> bytes:
+        """
+        Convert Message instance to wire format.
+        """
+        return self.model_dump_json().encode("utf-8")
+
+    @classmethod
+    def from_bytes(self, data: bytes) -> "Message":
+        """
+        Parses raw byte payload into a message.
+
+        This is done in two phases. First, the bytes are UTF-8
+        decoded and converted to a dict. Then, the dictionary
+        structure is parsed into a Message object.
+
+        This lets us raise different errors based on whether the
+        data is not valid JSON or the JSON structure is not valid
+        for a Message object.
+
+        :param data: Raw byte payload.
+        :return: Message object.
+        """
+        try:
+            json_data = json.loads(data.decode("utf-8"))
+        except (json.JSONDecodeError, UnicodeDecodeError) as err:
+            raise ValueError(f"Error decoding JSON: {err}") from err
+        return Message.model_validate_json(json.dumps(json_data))
+
+
+class IPCClientUI(UIBase):
+    """
+    UI adapter for Pythagora VSCode extension IPC.
+    """
+
+    def __init__(self, config: LocalIPCConfig):
+        """
+        Initialize the IPC client with the given configuration.
+        """
+        self.config = config
+        self.reader = None
+        self.writer = None
+
+    async def start(self):
+        log.debug(f"Connecting to IPC server at {self.config.host}:{self.config.port}")
+        try:
+            self.reader, self.writer = await asyncio.open_connection(
+                self.config.host,
+                self.config.port,
+                limit=MESSAGE_SIZE_LIMIT,
+            )
+            return True
+        except (ConnectionError, OSError, ConnectionRefusedError) as err:
+            log.error(f"Can't connect to the Pythagora VSCode extension: {err}")
+            return False
+
+    async def _send(self, type: MessageType, **kwargs):
+        msg = Message(type=type, **kwargs)
+        data = msg.to_bytes()
+        try:
+            self.writer.write(len(data).to_bytes(4, byteorder="big"))
+            self.writer.write(data)
+            await self.writer.drain()
+        except (ConnectionResetError, BrokenPipeError) as err:
+            log.error(f"Connection lost while sending the message: {err}")
+
+    async def _receive(self) -> Optional[Message]:
+        data = b""
+        while True:
+            try:
+                response = await self.reader.read(MESSAGE_SIZE_LIMIT)
+            except (asyncio.exceptions.IncompleteReadError, ConnectionResetError, asyncio.exceptions.CancelledError):
+                return None
+
+            if response == b"":
+                # We're at EOF, the server closed the connection
+                return None
+
+            data += response
+            try:
+                return Message.from_bytes(data)
+            except ValidationError as err:
+                # Incorrect payload is most likely a bug in the server, ignore the message
+                log.error(f"Error parsing incoming message: {err}", exc_info=True)
+                data = b""
+                continue
+            except ValueError:
+                # Most likely, this is as an incomplete message from the server, wait a bit more
+                continue
+
+    async def stop(self):
+        if not self.writer:
+            return
+
+        log.debug(f"Closing the IPC connection to {self.config.host}:{self.config.port}")
+
+        try:
+            await self._send(MessageType.EXIT)
+            self.writer.close()
+            await self.writer.wait_closed()
+        except Exception as err:
+            log.error(f"Error while closing the connection: {err}", exc_info=True)
+
+        self.writer = None
+        self.reader = None
+
+    async def send_stream_chunk(self, chunk: Optional[str], *, source: Optional[UISource] = None):
+        if not self.writer:
+            return
+
+        if chunk is None:
+            chunk = "\n"  # end of stream
+
+        await self._send(
+            MessageType.STREAM,
+            content=chunk,
+            category=source.type_name if source else None,
+        )
+
+    async def send_message(self, message: str, *, source: Optional[UISource] = None):
+        if not self.writer:
+            return
+
+        log.debug(f"Sending message: [{message.strip()}] from {source.type_name if source else '(none)'}")
+        await self._send(
+            MessageType.VERBOSE,
+            content=message,
+            category=source.type_name if source else None,
+        )
+
+    async def send_key_expired(self, message: Optional[str] = None):
+        await self._send(MessageType.KEY_EXPIRED)
+        await self.writer.drain()
+
+    async def ask_question(
+        self,
+        question: str,
+        *,
+        buttons: Optional[dict[str, str]] = None,
+        default: Optional[str] = None,
+        buttons_only: bool = False,
+        allow_empty: bool = False,
+        hint: Optional[str] = None,
+        initial_text: Optional[str] = None,
+        source: Optional[UISource] = None,
+    ) -> UserInput:
+        if not self.writer:
+            return UserInput(cancelled=True)
+
+        category = source.type_name if source else None
+
+        if hint:
+            await self._send(MessageType.HINT, content=hint, category=category)
+        else:
+            await self._send(MessageType.VERBOSE, content=question, category=category)
+
+        await self._send(MessageType.USER_INPUT_REQUEST, content=question, category=category)
+        if buttons:
+            buttons_str = "/".join(buttons.values())
+            if buttons_only:
+                await self._send(MessageType.BUTTONS_ONLY, content=buttons_str, category=category)
+            else:
+                await self._send(MessageType.BUTTONS, content=buttons_str, category=category)
+        if initial_text:
+            # FIXME: add this to base and console and document it after merging with hint PR
+            await self._send(MessageType.INPUT_PREFILL, content=initial_text, category=category)
+
+        response = await self._receive()
+        if response is None:
+            return UserInput(cancelled=True)
+
+        answer = response.content.strip()
+        if not answer and default:
+            answer = default
+
+        if buttons:
+            # Answer matches one of the buttons (or maybe the default if it's a button name)
+            if answer in buttons:
+                return UserInput(button=answer, text=None)
+            # VSCode extension only deals with values so we need to check them as well
+            value2key = {v: k for k, v in buttons.items()}
+            if answer in value2key:
+                return UserInput(button=value2key[answer], text=None)
+
+        if answer or allow_empty:
+            return UserInput(button=None, text=answer)
+
+        # Empty answer which we don't allow, treat as user cancelled the input
+        return UserInput(cancelled=True)
+
+    async def send_project_stage(self, stage: ProjectStage):
+        await self._send(MessageType.INFO, content=json.dumps({"project_stage": stage.value}))
+
+    async def send_task_progress(
+        self,
+        index: int,
+        n_tasks: int,
+        description: str,
+        source: str,
+        status: str,
+        source_index: int = 1,
+    ):
+        await self._send(
+            MessageType.PROGRESS,
+            content={
+                "task": {
+                    "index": index,
+                    "num_of_tasks": n_tasks,
+                    "description": description,
+                    "source": source,
+                    "status": status,
+                    "source_index": source_index,
+                }
+            },
+        )
+
+    async def send_step_progress(
+        self,
+        index: int,
+        n_steps: int,
+        step: dict,
+        task_source: str,
+    ):
+        await self._send(
+            MessageType.PROGRESS,
+            content={
+                "step": {
+                    "index": index,
+                    "num_of_steps": n_steps,
+                    "step": step,
+                    "source": task_source,
+                }
+            },
+        )
+
+    async def send_run_command(self, run_command: str):
+        await self._send(
+            MessageType.RUN_COMMAND,
+            content=run_command,
+        )
+
+    async def open_editor(self, file: str, line: Optional[int] = None):
+        await self._send(
+            MessageType.OPEN_FILE,
+            content={
+                "path": file,  # we assume it's a full path, read the rant in HumanInput.input_required()
+                "line": line,
+            },
+        )
+
+    async def send_project_root(self, path: str):
+        await self._send(
+            MessageType.PROJECT_FOLDER_NAME,
+            content=basename(path),
+        )
+
+    async def send_project_stats(self, stats: dict):
+        await self._send(
+            MessageType.PROJECT_STATS,
+            content=stats,
+        )
+
+
+__all__ = ["IPCClientUI"]
diff --git a/example-config.json b/example-config.json
new file mode 100644
index 00000000..899ff115
--- /dev/null
+++ b/example-config.json
@@ -0,0 +1,76 @@
+{
+  // Configuration for the LLM providers that can be used. Pythagora supports
+  // OpenAI, Anthropic and Groq. Azure and OpenRouter and local LLMs (such as LM-Studio)
+  // also work, you can use "openai" provider to define these.
+  "llm": {
+    "openai": {
+      // Base url to the provider/server, omitting the trailing "chat/completions" part.
+      "base_url": null,
+      "api_key": null,
+      "connect_timeout": 60.0,
+      "read_timeout": 10.0
+    }
+  },
+  // Each agent can use a different model or configuration. The default, as before, is GPT4 Turbo
+  // for most tasks and GPT3.5 Turbo to generate file descriptions. The agent name here should match
+  // the Python class name.
+  "agent": {
+    "default": {
+      "provider": "openai",
+      "model": "gpt-4-turbo",
+      "temperature": 0.5
+    },
+    "CodeMonkey.describe_files": {
+      "provider": "openai",
+      "model": "gpt-3.5-turbo",
+      "temperature": 0.0
+    }
+  },
+  // Logging configuration outputs debug log to "pythagora.log" by default. If you set this to null,
+  // the log will be sent to stdout.
+  "log": {
+    "level": "DEBUG",
+    "format": "%(asctime)s %(levelname)s [%(name)s] %(message)s",
+    "output": "pythagora.log"
+  },
+  // Database to use. Pythagora uses asyncio so asyncio-compatible database engine should be specified.
+  // If "debug_sql" is set to True, all SQL queries will be logged.
+  "db": {
+    "url": "sqlite+aiosqlite:///pythagora.db",
+    "debug_sql": false
+  },
+  "ui": {
+    "type": "plain"
+  },
+  "fs": {
+    "type": "local",
+    // Root directory of the workspace. Pythagora will store all projects under this directory by default.
+    "workspace_root": "workspace",
+    // Directories, files and patterns to ignore when examining the files in the project.
+    // Note that Pythagora already ignores all binary (non-text) files by default.
+    "ignore_paths": [
+      ".git",
+      ".gpt-pilot",
+      ".idea",
+      ".vscode",
+      ".next",
+      ".DS_Store",
+      "__pycache__",
+      "site-packages",
+      "node_modules",
+      "package-lock.json",
+      "venv",
+      "dist",
+      "build",
+      "target",
+      "*.min.js",
+      "*.min.css",
+      "*.svg",
+      "*.csv",
+      "*.log",
+      "go.sum"
+    ],
+    // Files larger than 50KB will be ignored, even if they otherwise wouldn't be.
+    "ignore_size_threshold": 50000
+  }
+}
\ No newline at end of file
diff --git a/pilot/.gitkeep b/pilot/.gitkeep
new file mode 100644
index 00000000..e69de29b
diff --git a/pyproject.toml b/pyproject.toml
new file mode 100644
index 00000000..03bf390a
--- /dev/null
+++ b/pyproject.toml
@@ -0,0 +1,61 @@
+[tool.poetry]
+name = "gpt-pilot"
+version = "0.2.0-alpha"
+description = "Build complete apps using AI agents"
+authors = ["Senko Rasic <senko@pythagora.ai>"]
+license = "FSL-1.1-MIT"
+readme = "README.md"
+packages = [{include = "core", from = "."}]
+repository = "https://github.com/Pythagora-io/pythagora-core"
+homepage = "https://pythagora.ai"
+keywords = ["ai", "llm"]
+
+[[tool.poetry.source]]
+name = "PyPI"
+priority = "primary"
+
+[[tool.poetry.source]]
+name = "testpypi"
+url = "https://test.pypi.org/legacy/"
+priority = "explicit"
+
+[build-system]
+requires = ["poetry-core"]
+build-backend = "poetry.core.masonry.api"
+
+[tool.poetry.dependencies]
+python = "^3.9"
+pydantic = "^2.7.1"
+openai = "^1.27.0"
+anthropic = "^0.25.8"
+groq = "^0.6.0"
+jinja2 = "^3.1.3"
+tiktoken = "^0.6.0"
+sqlalchemy = { version = "^2.0.29", extras = ["asyncio"] }
+aiosqlite = "^0.20.0"
+psutil = "^5.9.8"
+httpx = "^0.27.0"
+alembic = "^1.13.1"
+python-dotenv = "^1.0.1"
+
+[tool.poetry.group.dev.dependencies]
+pytest = "^8.1.1"
+ruff = "^0.3.5"
+pytest-cov = "^5.0.0"
+pytest-asyncio = "^0.23.6"
+pytest-timeout = "^2.3.1"
+pre-commit = "^3.7.0"
+
+[tool.pytest.ini_options]
+addopts = "-ra -q --cov=core --no-cov-on-fail --timeout 10"
+pythonpath = ["."]
+
+[tool.coverage.report]
+exclude_lines = ["if TYPE_CHECKING:", "raise NotImplementedError()"]
+omit = ["core/db/migrations/*", "core/templates/tpl/*"]
+
+[tool.ruff]
+line-length = 120
+indent-width = 4
+target-version = "py39"
+lint.extend-select = ["I"]
diff --git a/requirements.txt b/requirements.txt
new file mode 100644
index 00000000..03ac14be
--- /dev/null
+++ b/requirements.txt
@@ -0,0 +1,39 @@
+aiosqlite==0.20.0
+alembic==1.13.1
+annotated-types==0.7.0
+anthropic==0.25.9
+anyio==4.3.0
+certifi==2024.2.2
+charset-normalizer==3.3.2
+colorama==0.4.6
+distro==1.9.0
+exceptiongroup==1.2.1
+filelock==3.14.0
+fsspec==2024.5.0
+greenlet==3.0.3
+groq==0.6.0
+h11==0.14.0
+httpcore==1.0.5
+httpx==0.27.0
+huggingface-hub==0.23.1
+idna==3.7
+jinja2==3.1.4
+mako==1.3.5
+markupsafe==2.1.5
+openai==1.30.1
+packaging==24.0
+psutil==5.9.8
+pydantic-core==2.18.2
+pydantic==2.7.1
+python-dotenv==1.0.1
+pyyaml==6.0.1
+regex==2024.5.15
+requests==2.32.2
+sniffio==1.3.1
+sqlalchemy==2.0.30
+sqlalchemy[asyncio]==2.0.30
+tiktoken==0.6.0
+tokenizers==0.19.1
+tqdm==4.66.4
+typing-extensions==4.11.0
+urllib3==2.2.1
diff --git a/tests/__init__.py b/tests/__init__.py
new file mode 100644
index 00000000..e69de29b
diff --git a/tests/agents/__init__.py b/tests/agents/__init__.py
new file mode 100644
index 00000000..e69de29b
diff --git a/tests/agents/test_architect.py b/tests/agents/test_architect.py
new file mode 100644
index 00000000..37b74cd1
--- /dev/null
+++ b/tests/agents/test_architect.py
@@ -0,0 +1,51 @@
+from unittest.mock import AsyncMock
+
+import pytest
+
+from core.agents.architect import Architect, Architecture, PackageDependency, SystemDependency
+from core.agents.response import ResponseType
+from core.ui.base import UserInput
+
+
+@pytest.mark.asyncio
+async def test_run(agentcontext):
+    sm, pm, ui, mock_get_llm = agentcontext
+
+    ui.ask_question.return_value = UserInput(button="continue")
+    pm.run_command = AsyncMock(return_value=(0, "", ""))
+
+    arch = Architect(sm, ui, process_manager=pm)
+    arch.get_llm = mock_get_llm(
+        return_value=Architecture(
+            architecture="dummy arch",
+            system_dependencies=[
+                SystemDependency(
+                    name="docker",
+                    description="Docker is a containerization platform.",
+                    test="docker --version",
+                    required_locally=True,
+                )
+            ],
+            package_dependencies=[
+                PackageDependency(
+                    name="express",
+                    description="Express is a Node.js framework.",
+                )
+            ],
+            template="javascript_react",
+        )
+    )
+    response = await arch.run()
+
+    arch.get_llm.return_value.assert_awaited_once()
+    ui.ask_question.assert_awaited_once()
+    pm.run_command.assert_awaited_once_with("docker --version")
+
+    assert response.type == ResponseType.DONE
+
+    await sm.commit()
+
+    assert sm.current_state.specification.architecture == "dummy arch"
+    assert sm.current_state.specification.system_dependencies[0]["name"] == "docker"
+    assert sm.current_state.specification.package_dependencies[0]["name"] == "express"
+    assert sm.current_state.specification.template == "javascript_react"
diff --git a/tests/agents/test_base.py b/tests/agents/test_base.py
new file mode 100644
index 00000000..e6b58355
--- /dev/null
+++ b/tests/agents/test_base.py
@@ -0,0 +1,78 @@
+from unittest.mock import AsyncMock, MagicMock, patch
+
+import pytest
+
+from core.agents.base import BaseAgent
+from core.ui.base import UIBase
+
+
+class AgentUnderTest(BaseAgent):
+    agent_type = "test-agent"
+    display_name = "Test Agent"
+
+
+@pytest.mark.asyncio
+async def test_send_message():
+    ui = MagicMock(spec=UIBase)
+    agent = AgentUnderTest(None, ui)
+
+    await agent.send_message("Hello, world!")
+    ui.send_message.assert_called_once_with("Hello, world!\n", source=agent.ui_source)
+
+
+@pytest.mark.asyncio
+async def test_stream_handler():
+    ui = MagicMock(spec=UIBase)
+    agent = AgentUnderTest(None, ui)
+
+    await agent.stream_handler("chunk")
+    ui.send_stream_chunk.assert_called_once_with("chunk", source=agent.ui_source)
+
+
+@pytest.mark.asyncio
+async def test_ask_question():
+    ui = MagicMock()
+    state_manager = MagicMock(log_user_input=AsyncMock())
+    agent = AgentUnderTest(state_manager, ui)
+    ui.ask_question = AsyncMock(return_value="response")
+
+    await agent.ask_question("How are you?", buttons={"ok": "Okay"})
+    ui.ask_question.assert_called_once_with(
+        "How are you?",
+        buttons={"ok": "Okay"},
+        buttons_only=False,
+        default=None,
+        allow_empty=False,
+        hint=None,
+        initial_text=None,
+        source=agent.ui_source,
+    )
+
+    state_manager.log_user_input.assert_awaited_once()
+    state_manager.log_user_input.assert_called_once_with("How are you?", "response")
+
+
+@pytest.mark.asyncio
+@patch("core.agents.base.BaseLLMClient")
+async def test_get_llm(mock_BaseLLMClient):
+    ui = MagicMock(spec=UIBase)
+    state_manager = MagicMock(log_llm_request=AsyncMock())
+    agent = AgentUnderTest(state_manager, ui)
+    mock_OpenAIClient = mock_BaseLLMClient.for_provider.return_value
+
+    mock_client = AsyncMock(return_value=("response", "log"))
+    mock_OpenAIClient.return_value = mock_client
+
+    llm = agent.get_llm()
+
+    mock_BaseLLMClient.for_provider.assert_called_once_with("openai")
+
+    mock_OpenAIClient.assert_called_once()
+    assert mock_OpenAIClient.call_args.kwargs["stream_handler"] == agent.stream_handler
+
+    response = await llm(None)
+    mock_OpenAIClient.return_value.assert_awaited_once_with(None)
+    assert response == "response"
+
+    state_manager.log_llm_request.assert_awaited_once()
+    assert state_manager.log_llm_request.call_args.args[0] == "log"
diff --git a/tests/agents/test_convo.py b/tests/agents/test_convo.py
new file mode 100644
index 00000000..4b1feff3
--- /dev/null
+++ b/tests/agents/test_convo.py
@@ -0,0 +1,43 @@
+from unittest.mock import MagicMock
+
+from pydantic import BaseModel, Field
+
+from core.agents.convo import AgentConvo
+
+
+def test_init():
+    """Test that init stores the agent instance and adds a system message."""
+    agent = MagicMock(agent_type="spec-writer", current_state=None)
+    convo = AgentConvo(agent)
+
+    assert convo.agent_instance == agent
+    assert len(convo.messages) == 1
+    assert convo.messages[0]["role"] == "system"
+
+
+def test_fork():
+    """Test that fork() creates a new AgentConvo instance, not base Convo."""
+    agent = MagicMock(agent_type="spec-writer", current_state=None)
+    convo = AgentConvo(agent)
+
+    child = convo.fork()
+    assert child.agent_instance == agent
+
+    child.template("ask_questions")
+
+    assert len(convo.messages) == 1
+    assert len(child.messages) == 2
+
+
+def test_require_schema():
+    """Test that require_schema() adds a message with the schema description."""
+
+    class MyModel(BaseModel):
+        name: str = Field(description="User name")
+        age: int
+
+    agent = MagicMock(agent_type="spec-writer", current_state=None)
+    convo = AgentConvo(agent).require_schema(MyModel)
+
+    assert len(convo.messages) == 2
+    assert '"description": "User name"' in convo.messages[1]["content"]
diff --git a/tests/agents/test_orchestrator.py b/tests/agents/test_orchestrator.py
new file mode 100644
index 00000000..bda048a3
--- /dev/null
+++ b/tests/agents/test_orchestrator.py
@@ -0,0 +1,86 @@
+from unittest.mock import AsyncMock, Mock, patch
+
+import pytest
+
+from core.agents.orchestrator import Orchestrator
+from core.state.state_manager import StateManager
+from core.ui.console import PlainConsoleUI
+
+
+@pytest.mark.asyncio
+@patch("core.agents.base.BaseLLMClient")
+@patch("core.state.state_manager.StateManager")
+async def test_check_llms_are_accessible(mock_StateManager, mock_BaseLLMClient):
+    mock_sm = mock_StateManager.return_value
+    mock_sm.log_llm_request = AsyncMock()
+
+    mock_OpenAIClient = mock_BaseLLMClient.for_provider.return_value
+    mock_client = AsyncMock(return_value=("START", "log"))
+    mock_OpenAIClient.return_value = mock_client
+
+    orca = Orchestrator(mock_sm, PlainConsoleUI())
+    assert await orca.test_llm_access()
+
+
+@pytest.mark.asyncio
+@patch("core.agents.base.BaseLLMClient")
+@patch("core.state.state_manager.StateManager")
+async def test_check_llms_returns_fail_if_one_fails(mock_StateManager, mock_BaseLLMClient):
+    mock_sm = mock_StateManager.return_value
+    mock_sm.log_llm_request = AsyncMock()
+
+    mock_OpenAIClient = mock_BaseLLMClient.for_provider.return_value
+    mock_client = AsyncMock(return_value=(None, "log"))
+    mock_OpenAIClient.return_value = mock_client
+
+    orca = Orchestrator(mock_sm, PlainConsoleUI())
+    assert await orca.test_llm_access() is False
+
+
+@pytest.mark.asyncio
+@patch("core.agents.base.BaseLLMClient")
+@patch("core.state.state_manager.StateManager")
+async def test_check_llms_returns_fail_if_llm_throws_exception(mock_StateManager, mock_BaseLLMClient):
+    mock_sm = mock_StateManager.return_value
+    mock_sm.log_llm_request = AsyncMock()
+
+    mock_OpenAIClient = mock_BaseLLMClient.for_provider.return_value
+    mock_client = AsyncMock(side_effect=ValueError("Invalid API key"))
+    mock_OpenAIClient.return_value = mock_client
+
+    orca = Orchestrator(mock_sm, PlainConsoleUI())
+    assert await orca.test_llm_access() is False
+
+
+@pytest.mark.asyncio
+async def test_offline_changes_check_restores_if_workspace_empty():
+    sm = Mock(spec=StateManager)
+    sm.workspace_is_empty.return_value = True
+    ui = Mock()
+    orca = Orchestrator(state_manager=sm, ui=ui)
+    await orca.offline_changes_check()
+    assert sm.restore_files.assert_called_once
+
+
+@pytest.mark.asyncio
+async def test_offline_changes_check_imports_changes_from_disk():
+    sm = AsyncMock()
+    sm.workspace_is_empty.return_value = False
+    ui = AsyncMock()
+    ui.ask_question.return_value.button = "yes"
+    orca = Orchestrator(state_manager=sm, ui=ui)
+    await orca.offline_changes_check()
+    assert sm.import_files.assert_called_once
+    assert sm.restore_files.assert_not_called
+
+
+@pytest.mark.asyncio
+async def test_offline_changes_check_restores_changes_from_db():
+    sm = AsyncMock()
+    sm.workspace_is_empty.return_value = False
+    ui = AsyncMock()
+    ui.ask_question.return_value.button = "no"
+    orca = Orchestrator(state_manager=sm, ui=ui)
+    await orca.offline_changes_check()
+    assert sm.import_files.assert_not_called
+    assert sm.restore_files.assert_called_once
diff --git a/tests/agents/test_spec_writer.py b/tests/agents/test_spec_writer.py
new file mode 100644
index 00000000..1a5ab30e
--- /dev/null
+++ b/tests/agents/test_spec_writer.py
@@ -0,0 +1,63 @@
+import pytest
+
+from core.agents.response import ResponseType
+from core.agents.spec_writer import SpecWriter
+from core.db.models import Complexity
+from core.ui.base import UserInput
+
+
+@pytest.mark.asyncio
+async def test_start_example_project(agentcontext):
+    sm, _, ui, _ = agentcontext
+
+    ui.ask_question.return_value = UserInput(button="example")
+
+    sw = SpecWriter(sm, ui)
+    response = await sw.run()
+    assert response.type == ResponseType.DONE
+
+    assert sm.current_state.specification.description != ""
+    assert sm.current_state.specification.architecture != ""
+    assert sm.current_state.specification.system_dependencies != []
+    assert sm.current_state.specification.package_dependencies != []
+    assert sm.current_state.specification.complexity == Complexity.SIMPLE
+    assert sm.current_state.epics != []
+    assert sm.current_state.tasks != []
+
+
+@pytest.mark.asyncio
+async def test_run(agentcontext):
+    sm, _, ui, mock_get_llm = agentcontext
+
+    ui.ask_question.side_effect = [
+        # initial description
+        UserInput(text="hello world"),
+        # answer to the first question
+        UserInput(button="skip"),
+        # accept the generated spec
+        UserInput(button="continue"),
+    ]
+
+    sw = SpecWriter(sm, ui)
+    sw.get_llm = mock_get_llm(
+        side_effect=[
+            # analyze complexity answer
+            "hard",
+            # the question for the user
+            "q1",
+            # spec output
+            "Test Spec " + 500 * ".",
+            # review output
+            "Spec Review",
+        ]
+    )
+
+    response = await sw.run()
+    assert response.type == ResponseType.DONE
+
+    ui.ask_question.assert_awaited()
+
+    await sm.commit()
+
+    assert "Test Spec" in sm.current_state.specification.description
+    assert "Spec Review" in sm.current_state.specification.description
diff --git a/tests/agents/test_tech_lead.py b/tests/agents/test_tech_lead.py
new file mode 100644
index 00000000..a3d5a147
--- /dev/null
+++ b/tests/agents/test_tech_lead.py
@@ -0,0 +1,136 @@
+import pytest
+
+from core.agents.response import ResponseType
+from core.agents.tech_lead import DevelopmentPlan, Task, TechLead, UpdatedDevelopmentPlan
+from core.db.models import Complexity
+from core.ui.base import UserInput
+
+
+@pytest.mark.asyncio
+async def test_create_initial_epic(agentcontext):
+    """
+    If there are no epics defined, the TechLead agent should create an initial project epic.
+    """
+    sm, _, ui, _ = agentcontext
+
+    sm.current_state.specification.complexity = Complexity.SIMPLE
+
+    tl = TechLead(sm, ui)
+    response = await tl.run()
+    assert response.type == ResponseType.DONE
+
+    await sm.commit()
+
+    assert sm.current_state.epics != []
+    assert sm.current_state.epics[0]["name"] == "Initial Project"
+    assert sm.current_state.epics[0]["completed"] is False
+
+
+@pytest.mark.asyncio
+async def test_apply_project_template(agentcontext):
+    sm, _, ui, _ = agentcontext
+
+    sm.current_state.specification.template = "javascript_react"
+    sm.current_state.epics = [{"name": "Initial Project"}]
+
+    await sm.commit()
+
+    tl = TechLead(sm, ui)
+    response = await tl.run()
+    assert response.type == ResponseType.DONE
+
+    await sm.commit()
+    assert sm.current_state.files != []
+
+
+@pytest.mark.asyncio
+async def test_ask_for_feature(agentcontext):
+    """
+    If there are epics and all are completed, the TechLead agent should ask for a new feature.
+    """
+    sm, _, ui, _ = agentcontext
+
+    sm.current_state.epics = [{"name": "Initial Project", "completed": True}]
+    ui.ask_question.return_value = UserInput(text="make it pop")
+
+    tl = TechLead(sm, ui)
+    response = await tl.run()
+    assert response.type == ResponseType.DONE
+
+    await sm.commit()
+
+    assert len(sm.current_state.epics) == 2
+    assert sm.current_state.epics[1]["description"] == "make it pop"
+    assert sm.current_state.epics[1]["completed"] is False
+
+
+@pytest.mark.asyncio
+async def test_plan_epic(agentcontext):
+    """
+    If called and there's an incomplete epic, the TechLead agent should plan the epic.
+    """
+    sm, _, ui, mock_get_llm = agentcontext
+
+    sm.current_state.epics = [
+        {
+            "name": "Initial Project",
+            "description": "hello world",
+            "complexity": Complexity.SIMPLE,
+            "completed": False,
+        }
+    ]
+    await sm.commit()
+
+    tl = TechLead(sm, ui)
+    tl.get_llm = mock_get_llm(
+        return_value=DevelopmentPlan(
+            plan=[
+                Task(description="Task 1"),
+                Task(description="Task 2"),
+            ]
+        )
+    )
+    response = await tl.run()
+    assert response.type == ResponseType.DONE
+
+    await sm.commit()
+
+    assert len(sm.current_state.tasks) == 2
+    assert sm.current_state.tasks[0]["description"] == "Task 1"
+    assert sm.current_state.tasks[1]["description"] == "Task 2"
+
+
+@pytest.mark.asyncio
+async def test_update_epic(agentcontext):
+    """
+    Updating the current epic's dev plan according to the current task iterations.
+    """
+    sm, _, ui, mock_get_llm = agentcontext
+
+    sm.current_state.epics = [{"name": "Initial Project"}]
+    sm.current_state.tasks = [
+        {"description": "Just Finished", "completed": False},
+        {"description": "Future Task", "completed": False},
+    ]
+    sm.current_state.iterations = [
+        {"user_feedback": "Doesn't work", "description": "There, I fixed it"},
+    ]
+    await sm.commit()
+
+    tl = TechLead(sm, ui)
+    tl.get_llm = mock_get_llm(
+        return_value=UpdatedDevelopmentPlan(
+            updated_current_task=Task(description="Updated Just Finished"),
+            plan=[Task(description="Alternative Future Task")],
+        )
+    )
+
+    response = await tl.update_epic()
+    assert response.type == ResponseType.DONE
+
+    await sm.commit()
+
+    assert sm.current_state.tasks[0]["description"] == "Updated Just Finished"
+    assert sm.current_state.tasks[0]["completed"] is True
+    assert sm.current_state.tasks[1]["description"] == "Alternative Future Task"
+    assert sm.current_state.tasks[1]["completed"] is False
diff --git a/tests/cli/__init__.py b/tests/cli/__init__.py
new file mode 100644
index 00000000..e69de29b
diff --git a/tests/cli/test_cli.py b/tests/cli/test_cli.py
new file mode 100644
index 00000000..3ca5e910
--- /dev/null
+++ b/tests/cli/test_cli.py
@@ -0,0 +1,337 @@
+import json
+from argparse import ArgumentParser, ArgumentTypeError
+from unittest.mock import AsyncMock, MagicMock, patch
+
+import pytest
+
+from core.cli.helpers import (
+    init,
+    list_projects,
+    list_projects_json,
+    load_config,
+    load_project,
+    parse_arguments,
+    parse_llm_endpoint,
+    parse_llm_key,
+    show_config,
+)
+from core.cli.main import async_main
+from core.config import Config, LLMProvider, loader
+
+
+def write_test_config(tmp_path):
+    cfg = {
+        "fs": {"workspace_root": str(tmp_path)},
+        "db": {"url": f"sqlite+aiosqlite:///{tmp_path.as_posix()}/test.db"},
+    }
+    config_file = tmp_path / "config.json"
+    config_file.write_text(json.dumps(cfg), encoding="utf-8")
+    return config_file
+
+
+@patch("core.cli.helpers.ArgumentParser")
+def test_parse_arguments(mock_ArgumentParser):
+    parser = mock_ArgumentParser.return_value
+
+    parse_arguments()
+
+    flags = set(call[0][0] for call in parser.add_argument.call_args_list)
+    assert flags == {
+        "--config",
+        "--show-config",
+        "--level",
+        "--database",
+        "--local-ipc-port",
+        "--local-ipc-host",
+        "--version",
+        "--list",
+        "--list-json",
+        "--project",
+        "--delete",
+        "--branch",
+        "--step",
+        "--llm-endpoint",
+        "--llm-key",
+        "--import-v0",
+        "--email",
+        "--extension-version",
+    }
+
+    parser.parse_args.assert_called_once_with()
+
+
+@pytest.mark.parametrize(
+    ("value", "expected"),
+    [
+        ("openai:https://api.openai.com", (LLMProvider.OPENAI, "https://api.openai.com")),
+        ("noprovider:https://example.com", ArgumentTypeError),
+        ("https://example.com", ArgumentTypeError),
+        ("whatever", ArgumentTypeError),
+    ],
+)
+def test_parse_llm_endpoint(value, expected):
+    if isinstance(expected, type) and issubclass(expected, Exception):
+        with pytest.raises(expected):
+            parse_llm_endpoint(value)
+    else:
+        parsed_args = parse_llm_endpoint(value)
+        assert parsed_args == expected
+
+
+@pytest.mark.parametrize(
+    ("value", "expected"),
+    [
+        ("openai:sk-abc", (LLMProvider.OPENAI, "sk-abc")),
+        ("noprovider:fake-secret-key", ArgumentTypeError),
+        ("sk-abc", ArgumentTypeError),
+    ],
+)
+def test_parse_llm_key(value, expected):
+    if isinstance(expected, type) and issubclass(expected, Exception):
+        with pytest.raises(expected):
+            parse_llm_key(value)
+    else:
+        parsed_args = parse_llm_key(value)
+        assert parsed_args == expected
+
+
+@patch("core.cli.helpers.import_from_dotenv")
+def test_load_config_not_found(mock_import_from_dotenv, tmp_path, capsys):
+    config_file = tmp_path / "config.json"
+    mock_import_from_dotenv.return_value = False
+    config = load_config(MagicMock(config=config_file))
+
+    assert config == Config()
+    captured = capsys.readouterr()
+    assert f"Configuration file not found: {config_file}" in captured.err
+    mock_import_from_dotenv.assert_called_once_with(config_file)
+
+
+def test_load_config_not_json(tmp_path, capsys):
+    config_file = tmp_path / "config.json"
+    config_file.write_text("not really a JSON file", encoding="utf-8")
+
+    config = load_config(MagicMock(config=config_file))
+
+    assert config is None
+    captured = capsys.readouterr()
+    assert f"Error parsing config file {config_file}" in captured.err
+
+
+def test_load_config_defaults(tmp_path):
+    config_file = tmp_path / "config.json"
+    config_file.write_text("{}", encoding="utf-8")
+
+    config = load_config(MagicMock(config=config_file, level=None, database=None, local_ipc_port=None))
+
+    assert config.log.level == "DEBUG"
+    assert config.db.url == "sqlite+aiosqlite:///pythagora.db"
+    assert config.ui.type == "plain"
+
+
+def test_load_config_overridden(tmp_path):
+    config_file = tmp_path / "config.json"
+    config_file.write_text("{}", encoding="utf-8")
+
+    args = MagicMock(
+        config=config_file,
+        level="warning",
+        database="postgresql+asyncpg://localhost/mydb",
+        local_ipc_port=1234,
+        local_ipc_host="localhost",
+        llm_endpoint=[(LLMProvider.OPENAI, "https://test.openai.com")],
+        llm_key=[(LLMProvider.ANTHROPIC, "sk-test")],
+    )
+    config = load_config(args)
+
+    assert config.log.level == "WARNING"
+    assert config.db.url == "postgresql+asyncpg://localhost/mydb"
+    assert config.ui.type == "ipc-client"
+    assert config.ui.port == 1234
+    assert config.llm[LLMProvider.OPENAI].base_url == "https://test.openai.com"
+    assert config.llm[LLMProvider.ANTHROPIC].api_key == "sk-test"
+
+
+def test_show_default_config(capsys):
+    loader.config = Config()
+    show_config()
+    captured = capsys.readouterr()
+    assert Config.model_validate_json(captured.out) == Config()
+
+
+@pytest.mark.asyncio
+@patch("core.cli.helpers.StateManager")
+async def test_list_projects_json(mock_StateManager, capsys):
+    sm = mock_StateManager.return_value
+
+    branch = MagicMock(
+        id=MagicMock(hex="1234"),
+        states=[
+            MagicMock(step_index=1),
+            MagicMock(step_index=2),
+        ],
+    )
+    branch.name = "branch1"
+
+    project = MagicMock(
+        id=MagicMock(hex="abcd"),
+        branches=[branch],
+    )
+    project.name = "project1"
+    sm.list_projects = AsyncMock(return_value=[project])
+    await list_projects_json(None)
+
+    mock_StateManager.assert_called_once_with(None)
+    sm.list_projects.assert_awaited_once_with()
+
+    data = json.loads(capsys.readouterr().out)
+
+    assert data == [
+        {
+            "name": "project1",
+            "id": "abcd",
+            "branches": [
+                {
+                    "name": "branch1",
+                    "id": "1234",
+                    "steps": [
+                        {"step": 1, "name": "Step #1"},
+                        {"step": 2, "name": "Latest step"},
+                    ],
+                },
+            ],
+        },
+    ]
+
+
+@pytest.mark.asyncio
+@patch("core.cli.helpers.StateManager")
+async def test_list_projects(mock_StateManager, capsys):
+    sm = mock_StateManager.return_value
+
+    branch = MagicMock(
+        id="1234",
+        states=[
+            MagicMock(step_index=1),
+            MagicMock(step_index=2),
+        ],
+    )
+    branch.name = "branch1"
+
+    project = MagicMock(
+        id="abcd",
+        branches=[branch],
+    )
+    project.name = "project1"
+    sm.list_projects = AsyncMock(return_value=[project])
+    await list_projects(None)
+
+    mock_StateManager.assert_called_once_with(None)
+    sm.list_projects.assert_awaited_once_with()
+
+    data = capsys.readouterr().out
+
+    assert "* project1 (abcd)" in data
+    assert "- branch1 (1234)" in data
+
+
+@pytest.mark.asyncio
+@pytest.mark.parametrize(
+    ("args", "kwargs", "retval"),
+    [
+        (["abc", None, None], dict(project_id="abc", step_index=None), True),
+        (["abc", None, None], dict(project_id="abc", step_index=None), False),
+        (["abc", "def", None], dict(branch_id="def", step_index=None), True),
+        (["abc", "def", None], dict(branch_id="def", step_index=None), False),
+        (["abc", None, 123], dict(project_id="abc", step_index=123), True),
+        (["abc", "def", 123], dict(branch_id="def", step_index=123), False),
+    ],
+)
+async def test_load_project(args, kwargs, retval, capsys):
+    sm = MagicMock(load_project=AsyncMock(return_value=retval))
+
+    success = await load_project(sm, *args)
+
+    assert success is retval
+    sm.load_project.assert_awaited_once_with(**kwargs)
+
+    if not success:
+        data = capsys.readouterr().err
+        assert "not found" in data
+
+
+def test_init(tmp_path):
+    config_file = write_test_config(tmp_path)
+
+    class MockArgumentParser(ArgumentParser):
+        def parse_args(self):
+            return super().parse_args(["--config", str(config_file)])
+
+    with patch("core.cli.helpers.ArgumentParser", new=MockArgumentParser):
+        ui, db, args = init()
+
+    assert ui is not None
+    assert db is not None
+
+
+@pytest.mark.asyncio
+@pytest.mark.parametrize(
+    ("args", "run_orchestrator", "retval"),
+    [
+        (["--list"], False, True),
+        (["--list-json"], False, True),
+        (["--show-config"], False, True),
+        (["--project", "ca7a0cc9-767f-472a-aefb-0c8d3377c9bc"], False, False),
+        (["--branch", "ca7a0cc9-767f-472a-aefb-0c8d3377c9bc"], False, False),
+        (["--step", "123"], False, False),
+        ([], True, True),
+    ],
+)
+@patch("core.cli.main.Orchestrator")
+async def test_main(mock_Orchestrator, args, run_orchestrator, retval, tmp_path):
+    config_file = write_test_config(tmp_path)
+
+    class MockArgumentParser(ArgumentParser):
+        def parse_args(self):
+            return super().parse_args(["--config", str(config_file)] + args)
+
+    with patch("core.cli.helpers.ArgumentParser", new=MockArgumentParser):
+        ui, db, args = init()
+
+    ui.ask_question = AsyncMock(return_value=MagicMock(text="test", cancelled=False))
+
+    mock_orca = mock_Orchestrator.return_value
+    mock_orca.run = AsyncMock(return_value=True)
+
+    success = await async_main(ui, db, args)
+    assert success is retval
+
+    if run_orchestrator:
+        ui.ask_question.assert_called_once()
+        mock_Orchestrator.assert_called_once()
+        mock_orca.run.assert_awaited_once_with()
+
+
+@pytest.mark.asyncio
+@patch("core.cli.main.Orchestrator")
+async def test_main_handles_crash(mock_Orchestrator, tmp_path, capsys):
+    config_file = write_test_config(tmp_path)
+
+    class MockArgumentParser(ArgumentParser):
+        def parse_args(self):
+            return super().parse_args(["--config", str(config_file)])
+
+    with patch("core.cli.helpers.ArgumentParser", new=MockArgumentParser):
+        ui, db, args = init()
+
+    ui.ask_question = AsyncMock(return_value=MagicMock(text="test", cancelled=False))
+    ui.send_message = AsyncMock()
+
+    mock_orca = mock_Orchestrator.return_value
+    mock_orca.run = AsyncMock(side_effect=RuntimeError("test error"))
+
+    success = await async_main(ui, db, args)
+
+    assert success is False
+    ui.send_message.assert_called_once()
+    assert "test error" in ui.send_message.call_args[0][0]
diff --git a/tests/config/__init__.py b/tests/config/__init__.py
new file mode 100644
index 00000000..e69de29b
diff --git a/tests/config/test_config.py b/tests/config/test_config.py
new file mode 100644
index 00000000..b89e17b3
--- /dev/null
+++ b/tests/config/test_config.py
@@ -0,0 +1,130 @@
+import codecs
+import json
+from os.path import dirname, join
+
+import pytest
+from pydantic import ValidationError
+
+from core.config import Config, ConfigLoader, LLMProvider, get_config, loader
+
+test_config_data = {
+    "llm": {
+        "openai": {
+            "base_url": "https://api.openai.com/v1",
+            "api_key": "sk-openai",
+            "connect_timeout": 60.0,
+            "read_timeout": 10.0,
+        },
+        "anthropic": {
+            "base_url": "https://api.anthropic.com",
+            "api_key": "sk-anthropic",
+            "connect_timeout": 60.0,
+            "read_timeout": 10.0,
+        },
+    },
+    "agent": {
+        "default": {
+            "provider": "openai",
+            "model": "gpt-4-turbo",
+            "temperature": 0.1,
+        },
+        "CodeMonkey": {
+            "provider": "anthropic",
+            "model": "claude-3-opus",
+            "temperature": 0.5,
+        },
+    },
+}
+
+
+def test_parse_config():
+    config = ConfigLoader.from_json(json.dumps(test_config_data))
+
+    assert config.llm_for_agent().provider == LLMProvider.OPENAI
+    assert config.llm_for_agent().model == "gpt-4-turbo"
+    assert config.llm_for_agent().base_url == "https://api.openai.com/v1"
+    assert config.llm_for_agent().api_key == "sk-openai"
+    assert config.llm_for_agent("CodeMonkey").provider == LLMProvider.ANTHROPIC
+    assert config.llm_for_agent("CodeMonkey").model == "claude-3-opus"
+    assert config.llm_for_agent("CodeMonkey").base_url == "https://api.anthropic.com"
+    assert config.llm_for_agent("CodeMonkey").api_key == "sk-anthropic"
+
+
+def test_default_agent_llm_config():
+    data = {
+        "llm": {"openai": test_config_data["llm"]["openai"]},
+        "agent": {"default": test_config_data["agent"]["default"]},
+    }
+
+    config = ConfigLoader.from_json(json.dumps(data))
+
+    assert config.llm_for_agent("CodeMonkey").provider == LLMProvider.OPENAI
+
+
+def test_builtin_defaults():
+    config = ConfigLoader.from_json("{}")
+
+    assert config.llm_for_agent().provider == LLMProvider.OPENAI
+    assert config.llm_for_agent().model == "gpt-4-turbo"
+    assert config.llm_for_agent().base_url is None
+    assert config.llm_for_agent().api_key is None
+
+
+def test_unsupported_provider():
+    data = {
+        "llm": {
+            "default": {
+                "base_url": "https://api.openai.com/v1",
+                "api_key": "sk-openai",
+            }
+        },
+        "agent": {
+            "default": {
+                "provider": "acme",
+                "model": "gpt-4-turbo",
+                "temperature": 0.1,
+            }
+        },
+    }
+
+    with pytest.raises(ValidationError) as einfo:
+        ConfigLoader.from_json(json.dumps(data))
+
+    assert "llm.default.[key]" in str(einfo.value)
+    assert "agent.default.provider" in str(einfo.value)
+
+
+def test_load_from_file_with_comments():
+    config_path = join(dirname(__file__), "testconfig.json")
+
+    config = ConfigLoader().load(config_path)
+    assert config.llm_for_agent("CodeMonkey").provider == LLMProvider.ANTHROPIC
+
+
+def test_default_config():
+    loader.config = Config()
+    config = get_config()
+    assert config.llm_for_agent().provider == LLMProvider.OPENAI
+    assert config.log.level == "DEBUG"
+
+
+@pytest.mark.parametrize(
+    ("encoding", "bom"),
+    [
+        ("utf-8", None),
+        ("utf-16", None),
+        ("utf-16-le", codecs.BOM_UTF16_LE),
+        ("utf-16-be", codecs.BOM_UTF16_BE),
+    ],
+)
+def test_encodings(encoding, bom, tmp_path):
+    config_json = json.dumps(test_config_data)
+    config_path = tmp_path / "config.json"
+
+    with open(config_path, "wb") as f:
+        if bom:
+            f.write(bom)
+        f.write(config_json.encode(encoding))
+
+    config = ConfigLoader().load(config_path)
+    assert config.llm_for_agent().model == "gpt-4-turbo"
diff --git a/tests/config/test_env_importer.py b/tests/config/test_env_importer.py
new file mode 100644
index 00000000..4acf95ae
--- /dev/null
+++ b/tests/config/test_env_importer.py
@@ -0,0 +1,80 @@
+from core.config import LLMProvider
+from core.config.env_importer import convert_config
+
+
+def test_convert_config():
+    values = {
+        "ENDPOINT": "OPENAI",
+        "OPENAI_ENDPOINT": "",
+        "OPENAI_API_KEY": "",
+        "AZURE_API_KEY": "",
+        "AZURE_ENDPOINT": "",
+        "OPENROUTER_API_KEY": "",
+        "ANTHROPIC_API_KEY": "",
+        "MODEL_NAME": "gpt-4-turbo-preview",
+        "MAX_TOKENS": "8192",
+        "DB_NAME": "gpt-pilot",
+        "DB_HOST": "",
+        "DB_PORT": "",
+        "DB_USER": "",
+        "DB_PASSWORD": "",
+        "IGNORE_PATHS": "folder1,folder2",
+    }
+    config = convert_config(values)
+
+    assert config.llm[LLMProvider.OPENAI].base_url is None
+    assert config.llm[LLMProvider.OPENAI].api_key is None
+    assert "folder1" in config.fs.ignore_paths
+    assert "folder2" in config.fs.ignore_paths
+
+
+def test_convert_openai_config():
+    values = {
+        "ENDPOINT": "OPENAI",
+        "OPENAI_ENDPOINT": "http://example.openai.com/v1/chat/completions",
+        "OPENAI_API_KEY": "sk-mykey",
+        "MODEL_NAME": "gpt-4o",
+    }
+    config = convert_config(values)
+
+    assert config.llm[LLMProvider.OPENAI].base_url == "http://example.openai.com/v1/"
+    assert config.llm[LLMProvider.OPENAI].api_key == "sk-mykey"
+    assert config.agent["default"].model == "gpt-4o"
+
+
+def test_convert_azure_config():
+    values = {
+        "ENDPOINT": "AZURE",
+        "AZURE_ENDPOINT": "http://openai.azure.com/v1/chat/completions",
+        "AZURE_API_KEY": "sk-mykey",
+    }
+    config = convert_config(values)
+
+    assert config.llm[LLMProvider.OPENAI].base_url == "http://openai.azure.com/v1/"
+    assert config.llm[LLMProvider.OPENAI].api_key == "sk-mykey"
+
+
+def test_convert_openrouter_config():
+    values = {
+        "ENDPOINT": "OPENROUTER",
+        "OPENROUTER_ENDPOINT": "https://openrouter.ai/api/v1/chat/completions",
+        "OPENROUTER_API_KEY": "sk-or-v1-mykey",
+    }
+    config = convert_config(values)
+
+    assert config.llm[LLMProvider.OPENAI].base_url == "https://openrouter.ai/api/v1/"
+    assert config.llm[LLMProvider.OPENAI].api_key == "sk-or-v1-mykey"
+
+
+def test_convert_anthropic_config():
+    values = {
+        "ENDPOINT": "OPENAI",
+        "ANTHROPIC_ENDPOINT": None,
+        "ANTHROPIC_API_KEY": "sk-anthropic",
+        "MODEL_NAME": "anthropic/claude",
+    }
+    config = convert_config(values)
+
+    assert config.llm[LLMProvider.ANTHROPIC].base_url is None
+    assert config.llm[LLMProvider.ANTHROPIC].api_key == "sk-anthropic"
+    assert config.agent["default"].model == "claude"
diff --git a/tests/config/test_version.py b/tests/config/test_version.py
new file mode 100644
index 00000000..80b43afa
--- /dev/null
+++ b/tests/config/test_version.py
@@ -0,0 +1,26 @@
+from os.path import dirname, join
+from subprocess import check_output
+from unittest.mock import patch
+
+from core.config.version import get_git_commit, get_package_version, get_version
+
+
+def test_get_package_version():
+    with open(join(dirname(__file__), "..", "..", "pyproject.toml"), "r", encoding="utf-8") as f:
+        pyproject_toml = f.read()
+
+    version = get_package_version()
+    assert version in pyproject_toml
+
+
+def test_get_git_version():
+    commit_from_git = check_output(["git", "rev-parse", "HEAD"]).decode("utf-8").strip()
+    git_commit = get_git_commit()
+    assert git_commit == commit_from_git
+
+
+@patch("core.config.version.get_git_commit", return_value="abc")
+@patch("core.config.version.get_package_version", return_value="1.2.3")
+def test_get_version(_mock_get_package_version, _mock_get_git_commit):
+    version = get_version()
+    assert version == "1.2.3-gitabc"
diff --git a/tests/config/testconfig.json b/tests/config/testconfig.json
new file mode 100644
index 00000000..b8af0a4d
--- /dev/null
+++ b/tests/config/testconfig.json
@@ -0,0 +1,26 @@
+{
+    "llm": {
+        "openai": {
+            "base_url": "https://api.openai.com/v1",
+            "api_key": "sk-openai"
+        },
+        "anthropic": {
+            "base_url": "https://api.anthropic.com",
+            "api_key": "sk-anthropic"
+        }
+    },
+    "agent": {
+        // The default LLM config
+        "default": {
+            "provider": "openai",
+            "model": "gpt-4-turbo",
+            "temperature": 0.1
+        },
+        // Agent-specific LLM config
+        "CodeMonkey": {
+            "provider": "anthropic",
+            "model": "claude-3-opus",
+            "temperature": 0.5
+        }
+    }
+}
diff --git a/tests/conftest.py b/tests/conftest.py
new file mode 100644
index 00000000..c1bb695d
--- /dev/null
+++ b/tests/conftest.py
@@ -0,0 +1,84 @@
+from typing import Callable
+from unittest.mock import AsyncMock, MagicMock, patch
+
+import pytest_asyncio
+
+from core.config import DBConfig
+from core.db.models import Base
+from core.db.session import SessionManager
+from core.state.state_manager import StateManager
+
+
+@pytest_asyncio.fixture
+async def testmanager():
+    """
+    Set up a temporary in-memory database for testing.
+
+    This fixture is an async context manager.
+    """
+    db_cfg = DBConfig(url="sqlite+aiosqlite:///:memory:")
+    manager = SessionManager(db_cfg)
+    async with manager.engine.begin() as conn:
+        await conn.run_sync(Base.metadata.create_all)
+
+    yield manager
+
+
+@pytest_asyncio.fixture
+async def testdb(testmanager):
+    """
+    Set up a temporary in-memory database for testing.
+
+    This fixture is an async context manager that yields
+    a database session.
+    """
+    async with testmanager as db:
+        yield db
+
+
+@pytest_asyncio.fixture
+async def agentcontext(testmanager):
+    """
+    Set up state manager, process manager, UI mock, and LLM mock for testing.
+
+    Database and filesystem are in-memory.
+
+    Yields the (state manager, process manager, UI mock, LLM mock) tuple.
+    """
+    with patch("core.state.state_manager.get_config") as mock_get_config:
+        mock_get_config.return_value.fs.type = "memory"
+        sm = StateManager(testmanager)
+        pm = MagicMock()
+        ui = MagicMock(
+            send_project_stage=AsyncMock(),
+            send_message=AsyncMock(),
+            ask_question=AsyncMock(),
+        )
+
+        await sm.create_project("test")
+
+        mock_llm = None
+
+        def mock_get_llm(return_value=None, side_effect=None) -> Callable:
+            """
+            Returns a function that when called returns an async function
+            that when awaited returns the given value, simulatng a LLM call.
+
+            The mock LLM is created only once and reused for all calls in the test.
+
+            :param return_value: The value to return when awaited (optional).
+            :param side_effect: The side effect to apply when awaited (optional).
+            :return: A function that returns the mocked LLM.
+            """
+            nonlocal mock_llm
+
+            if not mock_llm:
+                mock_llm = MagicMock(  # agent's get_llm() function
+                    return_value=AsyncMock(  # the llm() async function
+                        return_value=return_value,
+                        side_effect=side_effect,
+                    )
+                )
+            return mock_llm
+
+        yield sm, pm, ui, mock_get_llm
diff --git a/tests/db/__init__.py b/tests/db/__init__.py
new file mode 100644
index 00000000..e69de29b
diff --git a/tests/db/factories.py b/tests/db/factories.py
new file mode 100644
index 00000000..a7e3f77b
--- /dev/null
+++ b/tests/db/factories.py
@@ -0,0 +1,18 @@
+from core.db.models import Branch, Project, ProjectState
+
+
+def create_project_state(project_name="Test Project", branch_name=Branch.DEFAULT):
+    """
+    Create a test Project, Branch, Specification and ProjectState objects.
+
+    The objects are created as transient objects, so they need to be added
+    to a session before committing and saving them to the database.
+
+    :param project_name: The project name.
+    :param branch_name: The branch name.
+    :return: The ProjectState object.
+    """
+
+    project = Project(name=project_name)
+    branch = Branch(name=branch_name, project=project)
+    return ProjectState.create_initial_state(branch)
diff --git a/tests/db/test_branch.py b/tests/db/test_branch.py
new file mode 100644
index 00000000..a8f31ea5
--- /dev/null
+++ b/tests/db/test_branch.py
@@ -0,0 +1,68 @@
+from uuid import uuid4
+
+import pytest
+
+from core.db.models import Branch, Project
+
+from .factories import create_project_state
+
+
+@pytest.mark.asyncio
+async def test_get_by_id_requires_valid_uuid(testdb):
+    with pytest.raises(ValueError):
+        await Branch.get_by_id(testdb, "invalid-uuid")
+
+
+@pytest.mark.asyncio
+async def test_get_by_id_no_match(testdb):
+    fake_id = uuid4().hex
+    result = await Branch.get_by_id(testdb, fake_id)
+    assert result is None
+
+
+@pytest.mark.asyncio
+async def test_get_by_id(testdb):
+    project = Project(name="test")
+    branch = Branch(project=project)
+    testdb.add(project)
+    await testdb.commit()
+
+    b = await Branch.get_by_id(testdb, branch.id)
+    assert b == branch
+    assert b.name == Branch.DEFAULT
+
+
+@pytest.mark.asyncio
+async def test_get_last_state_no_steps(testdb):
+    project = Project(name="test")
+    branch = Branch(project=project)
+    testdb.add(project)
+    await testdb.commit()
+
+    s = await branch.get_last_state()
+    assert s is None
+
+
+@pytest.mark.asyncio
+async def test_get_last_state(testdb):
+    state1 = create_project_state()
+    testdb.add(state1)
+    await testdb.commit()
+
+    state2 = await state1.create_next_state()
+    testdb.add(state2)
+    await testdb.commit()
+
+    s = await state1.branch.get_last_state()
+    assert s == state2
+    assert s.branch == state1.branch
+    assert s.branch.project == state1.branch.project
+
+
+@pytest.mark.asyncio
+async def test_get_last_state_no_session():
+    project = Project(name="test")
+    branch = Branch(project=project)
+
+    with pytest.raises(ValueError):
+        await branch.get_last_state()
diff --git a/tests/db/test_db.py b/tests/db/test_db.py
new file mode 100644
index 00000000..dfcea508
--- /dev/null
+++ b/tests/db/test_db.py
@@ -0,0 +1,59 @@
+import pytest
+from sqlalchemy import func, select
+
+from core.config import DBConfig
+from core.db.models import Project, ProjectState
+from core.db.setup import run_migrations
+
+from .factories import create_project_state
+
+
+def test_migrations(tmp_path):
+    db_cfg = DBConfig(url=f"sqlite+aiosqlite:///{tmp_path}/test.db")
+    run_migrations(db_cfg)
+
+
+@pytest.mark.asyncio
+async def test_select_empty(testdb):
+    q = await testdb.execute(select(func.count()).select_from(Project))
+    count = q.scalar_one()
+    assert count == 0
+
+
+@pytest.mark.asyncio
+async def test_create_select_project_branch_state(testdb):
+    state = create_project_state()
+    state.tasks = [{"id": "test", "name": "test task"}]
+
+    testdb.add(state)
+    await testdb.commit()
+
+    q = await testdb.execute(select(func.count()).select_from(Project))
+    count = q.scalar_one()
+    assert count == 1
+
+
+@pytest.mark.asyncio
+async def test_deleting_project_state_clears_back_references(testdb):
+    state1 = create_project_state()
+    testdb.add(state1)
+    await testdb.commit()
+
+    state2 = await state1.create_next_state()
+    testdb.add(state2)
+    await testdb.commit()
+
+    # Check that both project states were added correctly
+    q = await testdb.execute(select(ProjectState).where(ProjectState.id == state2.id))
+    result = q.scalar_one()
+
+    assert result == state2
+    assert result.prev_state == state1
+
+    # Delete the first one
+    await testdb.delete(state1)
+    await testdb.commit()
+
+    # Check the second one still exists and has no back reference
+    await testdb.refresh(state2)
+    assert state2.prev_state is None
diff --git a/tests/db/test_project.py b/tests/db/test_project.py
new file mode 100644
index 00000000..2b3c5401
--- /dev/null
+++ b/tests/db/test_project.py
@@ -0,0 +1,109 @@
+from uuid import uuid4
+
+import pytest
+
+from core.db.models import Branch, Project
+
+from .factories import create_project_state
+
+
+@pytest.mark.asyncio
+async def test_get_by_id_requires_valid_uuid(testdb):
+    with pytest.raises(ValueError):
+        await Project.get_by_id(testdb, "invalid-uuid")
+
+
+@pytest.mark.asyncio
+async def test_get_by_id_no_match(testdb):
+    fake_id = uuid4().hex
+    result = await Project.get_by_id(testdb, fake_id)
+    assert result is None
+
+
+@pytest.mark.asyncio
+async def test_get_by_id(testdb):
+    project = Project(name="test")
+    testdb.add(project)
+    await testdb.commit()
+
+    p = await Project.get_by_id(testdb, project.id)
+    assert p == project
+
+
+@pytest.mark.asyncio
+async def test_delete_by_id(testdb):
+    project = Project(name="test")
+    testdb.add(project)
+    await testdb.commit()
+
+    await Project.delete_by_id(testdb, project.id)
+    await testdb.commit()
+    assert await Project.get_by_id(testdb, project.id) is None
+
+
+@pytest.mark.asyncio
+async def test_get_branch_no_match(testdb):
+    project = Project(name="test")
+    testdb.add(project)
+    await testdb.commit()
+
+    b = await project.get_branch()
+    assert b is None
+
+
+@pytest.mark.asyncio
+async def test_get_branch(testdb):
+    project = Project(name="test")
+    branch = Branch(project=project)
+    testdb.add(project)
+    testdb.add(branch)
+    await testdb.commit()
+
+    b = await project.get_branch()
+    assert b == branch
+
+
+@pytest.mark.asyncio
+async def test_get_branch_no_session():
+    project = Project(name="test")
+
+    with pytest.raises(ValueError):
+        await project.get_branch()
+
+
+@pytest.mark.asyncio
+async def test_get_all_projects(testdb):
+    state1 = create_project_state()
+    state2 = create_project_state()
+    testdb.add(state1)
+    testdb.add(state2)
+
+    projects = await Project.get_all_projects(testdb)
+    assert len(projects) == 2
+    assert state1.branch.project in projects
+    assert state2.branch.project in projects
+
+
+@pytest.mark.asyncio
+async def test_default_folder_name(testdb):
+    project = Project(name="test project")
+    testdb.add(project)
+    await testdb.commit()
+
+    assert project.folder_name == "test-project"
+
+
+@pytest.mark.parametrize(
+    ("project_name", "expected_folder_name"),
+    [
+        ("Test", "test"),
+        ("with space", "with-space"),
+        ("with   many   spaces", "with-many-spaces"),
+        ("w00t? with,interpunction!", "w00t-with-interpunction"),
+        ("With special / * and ☺️ emojis", "with-special-and-emojis"),
+        ("Šašavi niño & mädchen", "sasavi-nino-madchen"),
+    ],
+)
+def test_get_folder_from_project_name(project_name, expected_folder_name):
+    folder_name = Project.get_folder_from_project_name(project_name)
+    assert folder_name == expected_folder_name
diff --git a/tests/db/test_project_state.py b/tests/db/test_project_state.py
new file mode 100644
index 00000000..0fd30bab
--- /dev/null
+++ b/tests/db/test_project_state.py
@@ -0,0 +1,220 @@
+import pytest
+from sqlalchemy import select
+
+from core.db.models import Branch, File, FileContent, Project, ProjectState
+
+from .factories import create_project_state
+
+
+@pytest.mark.asyncio
+async def test_get_by_id(testdb):
+    state = create_project_state()
+    testdb.add(state)
+    await testdb.commit()
+
+    s = (await testdb.execute(select(ProjectState).where(ProjectState.id == state.id))).scalar_one_or_none()
+    assert s.branch == state.branch
+    assert s.branch.project == state.branch.project
+
+
+@pytest.mark.asyncio
+async def test_get_last_state_no_session():
+    project = Project(name="test")
+    branch = Branch(project=project)
+
+    with pytest.raises(ValueError):
+        await branch.get_last_state()
+
+
+@pytest.mark.asyncio
+async def test_get_by_id_preloads_branch_project_files(testdb):
+    f = File(path="test.txt", content=FileContent(id="test", content="hello world"))
+
+    state = create_project_state()
+    state.files.append(f)
+    testdb.add(state)
+
+    await testdb.commit()
+    testdb.expunge_all()
+
+    s = (await testdb.execute(select(ProjectState).where(ProjectState.id == state.id))).scalar_one_or_none()
+
+    # If "get_by_id" doesn't populate branch and project and load the files,
+    # this will crash because they can't be lazy-loaded without an await.
+    assert s.branch == state.branch
+    assert s.branch.project == state.branch.project
+    assert s.files[0].content.content == "hello world"
+
+
+@pytest.mark.asyncio
+async def test_create_next_state_clones_files(testdb):
+    f = File(path="test.txt", content=FileContent(id="test", content="hello world"))
+
+    state = create_project_state()
+    state.files.append(f)
+    testdb.add(state)
+
+    await testdb.commit()
+
+    next_state = await state.create_next_state()
+
+    # Check that the new state has a new file with the same content
+    assert next_state.files[0].id != state.files[0].id
+    assert next_state.files[0].content_id == f.content_id
+
+
+@pytest.mark.asyncio
+async def test_create_next_deep_copies_fields(testdb):
+    state = create_project_state()
+    testdb.add(state)
+
+    state.epics = [{"name": "Initial project", "completed": False}]
+    state.tasks = [{"description": "test task", "completed": False}]
+    state.iterations = [{"description": "test iteration", "completed": False}]
+    state.steps = [{"type": "test step", "completed": False}]
+    await testdb.commit()
+
+    next_state = await state.create_next_state()
+
+    next_state.epics[0]["completed"] = True
+    next_state.tasks[0]["completed"] = True
+    next_state.iterations[0]["completed"] = True
+    next_state.steps[0]["completed"] = True
+    next_state.relevant_files.append("test.txt")
+    next_state.modified_files["test.txt"] = "Hello World"
+
+    assert state.epics[0]["completed"] is False
+    assert state.tasks[0]["completed"] is False
+    assert state.iterations[0]["completed"] is False
+    assert state.steps[0]["completed"] is False
+    assert state.relevant_files == []
+    assert state.modified_files == {}
+
+
+@pytest.mark.asyncio
+async def test_deleting_state_removes_child_objects(testdb):
+    file = File(path="test.txt", content=FileContent(id="test", content="hello world"))
+
+    state = create_project_state()
+    testdb.add(state)
+    await testdb.commit()
+
+    next_state = await state.create_next_state()
+    next_state.files.append(file)
+    await testdb.commit()
+
+    # Double-check that objects are in the database
+    s = (await testdb.execute(select(ProjectState).where(ProjectState.id == next_state.id))).scalar_one_or_none()
+    assert s == next_state
+    f = (await testdb.execute(select(File).where(File.id == file.id))).scalar_one_or_none()
+    assert f == file
+
+    await state.delete_after()
+
+    # Verify they're deleted
+    s = (await testdb.execute(select(ProjectState).where(ProjectState.id == next_state.id))).scalar_one_or_none()
+    assert s is None
+    f = (await testdb.execute(select(File).where(File.id == file.id))).scalar_one_or_none()
+    assert f is None
+
+
+@pytest.mark.asyncio
+async def test_completing_unfinished_steps(testdb):
+    state = create_project_state()
+    state.steps = [
+        {
+            "id": "abc",
+            "completed": False,
+            "type": "create_readme",
+        },
+    ]
+    testdb.add(state)
+    await testdb.commit()
+
+    assert state.unfinished_steps == state.steps
+    assert state.current_step["id"] == "abc"
+    state.complete_step()
+    assert state.unfinished_steps == []
+    assert state.current_step is None
+    await testdb.commit()
+
+    await testdb.refresh(state)
+
+    assert state.current_step is None
+
+
+@pytest.mark.asyncio
+async def test_completing_unfinished_iterations(testdb):
+    state = create_project_state()
+    state.iterations = [
+        {
+            "id": "abc",
+            "description": "LLM breakdown of the iteration",
+            "completed": False,
+        }
+    ]
+    testdb.add(state)
+    await testdb.commit()
+
+    assert state.unfinished_iterations == state.iterations
+    assert state.current_iteration["id"] == "abc"
+    state.complete_iteration()
+    assert state.unfinished_iterations == []
+    assert state.current_iteration is None
+    await testdb.commit()
+
+    await testdb.refresh(state)
+
+    assert state.current_iteration is None
+
+
+@pytest.mark.asyncio
+async def test_completing_unfinished_tasks(testdb):
+    state = create_project_state()
+    state.tasks = [
+        {
+            "id": "abc",
+            "description": "test task",
+            "instructions": None,
+            "completed": False,
+        }
+    ]
+    testdb.add(state)
+    await testdb.commit()
+
+    assert state.unfinished_tasks == state.tasks
+    assert state.current_task["id"] == "abc"
+    state.complete_task()
+    assert state.unfinished_tasks == []
+    assert state.current_task is None
+    await testdb.commit()
+
+    await testdb.refresh(state)
+
+    assert state.current_task is None
+
+
+@pytest.mark.asyncio
+async def test_completing_unfinished_epics(testdb):
+    state = create_project_state()
+    state.epics = [
+        {
+            "id": "abc",
+            "name": "Initial project",
+            "description": "Hello World",
+            "completed": False,
+        }
+    ]
+    testdb.add(state)
+    await testdb.commit()
+
+    assert state.unfinished_epics == state.epics
+    assert state.current_epic["id"] == "abc"
+    state.complete_epic()
+    assert state.unfinished_epics == []
+    assert state.current_epic is None
+    await testdb.commit()
+
+    await testdb.refresh(state)
+
+    assert state.current_epic is None
diff --git a/tests/disk/__init__.py b/tests/disk/__init__.py
new file mode 100644
index 00000000..e69de29b
diff --git a/tests/disk/test_ignore.py b/tests/disk/test_ignore.py
new file mode 100644
index 00000000..f6277ceb
--- /dev/null
+++ b/tests/disk/test_ignore.py
@@ -0,0 +1,72 @@
+from io import StringIO
+from os.path import join
+from unittest.mock import MagicMock, patch
+
+import pytest
+
+from core.disk.ignore import IgnoreMatcher
+
+
+@pytest.mark.parametrize(
+    ("path", "expected"),
+    [
+        ("test.py", False),
+        ("test.pyc", True),
+        (join("module", "test.py"), False),
+        (join("module", "test.pyc"), True),
+        ("node_modules", True),
+        (join("docs", "_build"), True),
+        (join("some", "lower", "dir"), True),
+        (join("tests", "some", "lower", "dir"), False),
+        (join("module", "migrations", "0001_initial.py"), True),
+        (join("module", "another", "migrations", "0001_initial.py"), True),
+        (join("module", "migrations", "0001_initial.json"), False),
+    ],
+)
+@patch("os.path.isfile", return_value=True)
+@patch("builtins.open", return_value=MagicMock(read=StringIO("")))
+def test_ignore_paths(_mock_open, _mock_isfile, path, expected):
+    matcher = IgnoreMatcher(
+        "/tmp",
+        [
+            "*.pyc",
+            "node_modules",
+            "_build",
+            "some/lower/dir",
+            "*/migrations/*.py",
+        ],
+    )
+    assert matcher.ignore(path) == expected
+
+
+@pytest.mark.parametrize(
+    ("path", "size", "expected"),
+    [
+        ("test.py", 100, False),
+        ("test.py", 101, True),
+    ],
+)
+@patch("os.path.isfile", return_value=True)
+@patch("builtins.open", return_value=MagicMock(read=StringIO("")))
+@patch("os.path.getsize")
+def test_ignore_large_files(
+    _mock_getsize,
+    _mock_open,
+    _mock_isfile,
+    path,
+    size,
+    expected,
+):
+    _mock_getsize.return_value = size
+    matcher = IgnoreMatcher("/tmp", [], ignore_size_threshold=100)
+    assert matcher.ignore(path) == expected
+
+
+@patch("os.path.isfile", return_value=True)
+@patch("builtins.open")
+def test_ignore_binary(_mock_open, _mock_isfile):
+    _mock_open.return_value.__enter__.return_value.read.side_effect = UnicodeDecodeError(
+        "utf-8", b"", 0, 1, "invalid start byte"
+    )
+    matcher = IgnoreMatcher("/tmp", [])
+    assert matcher.ignore("test.py") is True
diff --git a/tests/disk/test_vfs.py b/tests/disk/test_vfs.py
new file mode 100644
index 00000000..077ff522
--- /dev/null
+++ b/tests/disk/test_vfs.py
@@ -0,0 +1,88 @@
+from os.path import exists, join
+
+from core.disk.ignore import IgnoreMatcher
+from core.disk.vfs import LocalDiskVFS, MemoryVFS
+
+
+def test_memory_vfs():
+    vfs = MemoryVFS()
+
+    assert vfs.list() == []
+
+    vfs.save("test.txt", "hello world")
+    assert vfs.read("test.txt") == "hello world"
+    assert vfs.list() == ["test.txt"]
+
+    vfs.save("subdir/another.txt", "hello world")
+    assert vfs.read("subdir/another.txt") == "hello world"
+    assert vfs.list() == ["subdir/another.txt", "test.txt"]
+
+    assert vfs.list("subdir") == ["subdir/another.txt"]
+    assert vfs.list("subdir/") == ["subdir/another.txt"]
+
+    assert vfs.list("nonexistent") == []
+
+    vfs.remove("test.txt")
+    assert vfs.list() == ["subdir/another.txt"]
+
+    vfs.remove("nonexistent.txt")
+
+
+def test_local_disk_vfs(tmp_path):
+    vfs = LocalDiskVFS(tmp_path)
+
+    assert vfs.list() == []
+
+    vfs.save("test.txt", "hello world")
+    assert vfs.read("test.txt") == "hello world"
+    assert vfs.list() == ["test.txt"]
+
+    vfs.save("subdir/another.txt", "hello world")
+    assert vfs.read("subdir/another.txt") == "hello world"
+    assert vfs.list() == ["subdir/another.txt", "test.txt"]
+
+    assert vfs.list("subdir") == ["subdir/another.txt"]
+    assert vfs.list("subdir/") == ["subdir/another.txt"]
+
+    assert vfs.list("nonexistent") == []
+
+    vfs.remove("test.txt")
+    assert vfs.list() == ["subdir/another.txt"]
+
+    vfs.remove("nonexistent.txt")
+
+
+def test_local_disk_vfs_with_matcher(tmp_path):
+    matcher = IgnoreMatcher(tmp_path, ["*.log"])
+    vfs = LocalDiskVFS(tmp_path, ignore_matcher=matcher)
+
+    with open(join(tmp_path, "test.log"), "w") as f:
+        f.write("this should be ignored")
+
+    assert vfs.list() == []
+
+    with open(join(tmp_path, "test.txt"), "w") as f:
+        f.write("hello world")
+
+    assert vfs.list() == ["test.txt"]
+    assert vfs.read("test.txt") == "hello world"
+
+    vfs.save("subdir/another.txt", "hello world")
+    assert exists(join(tmp_path, "subdir", "another.txt"))
+
+    assert vfs.read("subdir/another.txt") == "hello world"
+    assert vfs.list() == ["subdir/another.txt", "test.txt"]
+
+    assert vfs.list("subdir") == ["subdir/another.txt"]
+    assert vfs.list("subdir/") == ["subdir/another.txt"]
+
+    assert vfs.list("nonexistent") == []
+
+    vfs.remove("test.txt")
+    assert vfs.list() == ["subdir/another.txt"]
+    assert not exists(join(tmp_path, "test.txt"))
+
+    vfs.remove("nonexistent.txt")
+
+    vfs.remove("test.log")
+    assert exists(join(tmp_path, "test.log"))
diff --git a/tests/integration/__init__.py b/tests/integration/__init__.py
new file mode 100644
index 00000000..e69de29b
diff --git a/tests/integration/llm/__init__.py b/tests/integration/llm/__init__.py
new file mode 100644
index 00000000..e69de29b
diff --git a/tests/integration/llm/test_anthropic.py b/tests/integration/llm/test_anthropic.py
new file mode 100644
index 00000000..cdb06057
--- /dev/null
+++ b/tests/integration/llm/test_anthropic.py
@@ -0,0 +1,119 @@
+from json import loads
+from os import getenv
+
+import pytest
+
+from core.config import LLMConfig, LLMProvider
+from core.llm.anthropic_client import AnthropicClient
+from core.llm.convo import Convo
+from core.llm.request_log import LLMRequestStatus
+
+run_integration_tests = getenv("INTEGRATION_TESTS", "").lower()
+if run_integration_tests not in ["true", "yes", "1", "on"]:
+    pytest.skip("Skipping integration tests", allow_module_level=True)
+
+if not getenv("ANTHROPIC_API_KEY"):
+    pytest.skip(
+        "Skipping Anthropic integration tests: ANTHROPIC_API_KEY is not set",
+        allow_module_level=True,
+    )
+
+
+@pytest.mark.asyncio
+async def test_incorrect_key():
+    cfg = LLMConfig(
+        provider=LLMProvider.ANTHROPIC,
+        model="claude-3-haiku-20240307",
+        api_key="sk-incorrect",
+        temperature=0.5,
+    )
+
+    async def print_handler(msg: str):
+        print(msg)
+
+    llm = AnthropicClient(cfg, stream_handler=print_handler)
+    convo = Convo("you're a friendly assistant").user("tell me joke")
+
+    with pytest.raises(ValueError, match="invalid x-api-key"):
+        await llm(convo)
+
+
+@pytest.mark.asyncio
+async def test_unknown_model():
+    cfg = LLMConfig(
+        provider=LLMProvider.ANTHROPIC,
+        model="gpt-3.6-nonexistent",
+        temperature=0.5,
+    )
+
+    llm = AnthropicClient(cfg)
+    convo = Convo("you're a friendly assistant").user("tell me joke")
+
+    with pytest.raises(ValueError, match="model: gpt-3.6-nonexistent"):
+        await llm(convo)
+
+
+@pytest.mark.asyncio
+async def test_anthropic_success():
+    cfg = LLMConfig(
+        provider=LLMProvider.ANTHROPIC,
+        model="claude-3-haiku-20240307",
+        temperature=0.5,
+    )
+
+    streamed_response = []
+
+    async def stream_handler(content: str):
+        if content:
+            streamed_response.append(content)
+
+    llm = AnthropicClient(cfg, stream_handler=stream_handler)
+    convo = Convo("you're a friendly assistant").user("tell me joke")
+
+    response, req_log = await llm(convo)
+    assert response == "".join(streamed_response)
+
+    assert req_log.messages == convo.messages
+    assert req_log.prompt_tokens > 0
+    assert req_log.completion_tokens > 0
+
+
+@pytest.mark.asyncio
+async def test_anthropic_json_mode():
+    cfg = LLMConfig(
+        provider=LLMProvider.ANTHROPIC,
+        model="claude-3-haiku-20240307",
+        temperature=0.5,
+    )
+
+    llm = AnthropicClient(cfg)
+    convo = Convo("you're a friendly assistant")
+    convo.user(
+        "Tell me a q/a joke. "
+        'Output it in a JSON format like: {"q": "...", "a": "..."}.'
+        "Important, do not output anything except a valid JSON structure."
+    )
+
+    response, req_log = await llm(convo)
+
+    data = loads(response)
+    assert "q" in data
+    assert "a" in data
+
+
+@pytest.mark.asyncio
+async def test_context_too_large():
+    cfg = LLMConfig(
+        provider=LLMProvider.ANTHROPIC,
+        model="claude-3-haiku-20240307",
+        temperature=0.5,
+    )
+
+    large_convo = " ".join(["lorem ipsum dolor sit amet"] * 60000)
+    llm = AnthropicClient(cfg)
+    convo = Convo("you're a friendly assistant")
+    convo.user(large_convo)
+
+    response, req_log = await llm(convo)
+    assert response is None
+    assert req_log.status == LLMRequestStatus.ERROR
diff --git a/tests/integration/llm/test_groq.py b/tests/integration/llm/test_groq.py
new file mode 100644
index 00000000..c988a37a
--- /dev/null
+++ b/tests/integration/llm/test_groq.py
@@ -0,0 +1,124 @@
+from json import loads
+from os import getenv
+
+import pytest
+
+from core.config import LLMConfig, LLMProvider
+from core.llm.convo import Convo
+from core.llm.groq_client import GroqClient
+
+run_integration_tests = getenv("INTEGRATION_TESTS", "").lower()
+if run_integration_tests not in ["true", "yes", "1", "on"]:
+    pytest.skip("Skipping integration tests", allow_module_level=True)
+
+if not getenv("GROQ_API_KEY"):
+    pytest.skip(
+        "Skipping Groq integration tests: GROQ_API_KEY is not set",
+        allow_module_level=True,
+    )
+
+
+@pytest.mark.asyncio
+async def test_incorrect_key():
+    cfg = LLMConfig(
+        provider=LLMProvider.GROQ,
+        model="llama3-8b-8192",
+        api_key="sk-incorrect",
+        temperature=0.5,
+    )
+
+    async def print_handler(msg: str):
+        print(msg)
+
+    llm = GroqClient(cfg, stream_handler=print_handler)
+    convo = Convo("you're a friendly assistant").user("tell me joke")
+
+    with pytest.raises(ValueError, match="Invalid API Key"):
+        await llm(convo)
+
+
+@pytest.mark.asyncio
+async def test_unknown_model():
+    cfg = LLMConfig(
+        provider=LLMProvider.GROQ,
+        model="gpt-3.6-nonexistent",
+        temperature=0.5,
+    )
+
+    llm = GroqClient(cfg)
+    convo = Convo("you're a friendly assistant").user("tell me joke")
+
+    with pytest.raises(ValueError, match="does not exist"):
+        await llm(convo)
+
+
+@pytest.mark.asyncio
+async def test_groq_success():
+    cfg = LLMConfig(
+        provider=LLMProvider.GROQ,
+        model="llama3-8b-8192",
+        temperature=0.5,
+    )
+
+    streamed_response = []
+
+    async def stream_handler(content: str):
+        if content:
+            streamed_response.append(content)
+
+    llm = GroqClient(cfg, stream_handler=stream_handler)
+    convo = Convo("you're a friendly assistant").user("tell me joke")
+
+    response, req_log = await llm(convo)
+    assert response == "".join(streamed_response)
+
+    print(streamed_response)
+    print(response)
+    print(llm.config)
+    assert req_log.messages == convo.messages
+    assert req_log.prompt_tokens > 0
+    assert req_log.completion_tokens > 0
+
+
+@pytest.mark.asyncio
+async def test_groq_json_mode():
+    cfg = LLMConfig(
+        provider=LLMProvider.GROQ,
+        model="llama3-8b-8192",
+        temperature=0.5,
+    )
+
+    llm = GroqClient(cfg)
+    convo = Convo("you always respond in valid JSON, without any comments")
+    convo.user('tell me a q/a joke. output it in a JSON format like: {"q": "...", "a": "..."}')
+
+    response, req_log = await llm(convo)
+
+    data = loads(response)
+    assert "q" in data
+    assert "a" in data
+
+
+@pytest.mark.asyncio
+async def test_context_too_large():
+    cfg = LLMConfig(
+        provider=LLMProvider.GROQ,
+        model="llama3-8b-8192",
+        temperature=0.5,
+    )
+
+    streamed_response = []
+
+    async def stream_handler(content: str):
+        if content:
+            streamed_response.append(content)
+
+    llm = GroqClient(cfg, stream_handler=stream_handler)
+    large_convo = " ".join(["lorem ipsum dolor sit amet"] * 30000)
+    convo = Convo(large_convo)
+
+    with pytest.raises(ValueError, match="Context limit exceeded."):
+        await llm(convo)
+
+    streamed = "".join(streamed_response)
+    assert "We sent too large request to the LLM" in streamed
diff --git a/tests/integration/llm/test_openai.py b/tests/integration/llm/test_openai.py
new file mode 100644
index 00000000..7d9ed2a4
--- /dev/null
+++ b/tests/integration/llm/test_openai.py
@@ -0,0 +1,121 @@
+from json import loads
+from os import getenv
+
+import pytest
+
+from core.config import LLMConfig, LLMProvider
+from core.llm.convo import Convo
+from core.llm.openai_client import OpenAIClient
+
+run_integration_tests = getenv("INTEGRATION_TESTS", "").lower()
+if run_integration_tests not in ["true", "yes", "1", "on"]:
+    pytest.skip("Skipping integration tests", allow_module_level=True)
+
+if not getenv("OPENAI_API_KEY"):
+    pytest.skip(
+        "Skipping OpenAI integration tests: OPENAI_API_KEY is not set",
+        allow_module_level=True,
+    )
+
+
+@pytest.mark.asyncio
+async def test_incorrect_key():
+    cfg = LLMConfig(
+        provider=LLMProvider.OPENAI,
+        model="gpt-3.5-turbo",
+        api_key="sk-incorrect",
+        temperature=0.5,
+    )
+
+    async def print_handler(msg: str):
+        print(msg)
+
+    llm = OpenAIClient(cfg, stream_handler=print_handler)
+    convo = Convo("you're a friendly assistant").user("tell me joke")
+
+    with pytest.raises(ValueError, match="Incorrect API key provided: sk-inc"):
+        await llm(convo)
+
+
+@pytest.mark.asyncio
+async def test_unknown_model():
+    cfg = LLMConfig(
+        provider=LLMProvider.OPENAI,
+        model="gpt-3.6-nonexistent",
+        temperature=0.5,
+    )
+
+    llm = OpenAIClient(cfg)
+    convo = Convo("you're a friendly assistant").user("tell me joke")
+
+    with pytest.raises(ValueError, match="does not exist"):
+        await llm(convo)
+
+
+@pytest.mark.asyncio
+async def test_openai_success():
+    cfg = LLMConfig(
+        provider=LLMProvider.OPENAI,
+        model="gpt-3.5-turbo",
+        temperature=0.5,
+    )
+
+    streamed_response = []
+
+    async def stream_handler(content: str):
+        if content:
+            streamed_response.append(content)
+
+    llm = OpenAIClient(cfg, stream_handler=stream_handler)
+    convo = Convo("you're a friendly assistant").user("tell me joke")
+
+    response, req_log = await llm(convo)
+    assert response == "".join(streamed_response)
+
+    assert req_log.messages == convo.messages
+    assert req_log.prompt_tokens > 0
+    assert req_log.completion_tokens > 0
+
+
+@pytest.mark.asyncio
+async def test_openai_json_mode():
+    cfg = LLMConfig(
+        provider=LLMProvider.OPENAI,
+        model="gpt-3.5-turbo",
+        temperature=0.5,
+    )
+
+    llm = OpenAIClient(cfg)
+    convo = Convo("you're a friendly assistant")
+    convo.user('tell me a q/a joke. output it in a JSON format like: {"q": "...", "a": "..."}')
+
+    response, req_log = await llm(convo)
+
+    data = loads(response)
+    assert "q" in data
+    assert "a" in data
+
+
+@pytest.mark.asyncio
+async def test_context_too_large():
+    cfg = LLMConfig(
+        provider=LLMProvider.OPENAI,
+        model="gpt-3.5-turbo",
+        temperature=0.5,
+    )
+
+    streamed_response = []
+
+    async def stream_handler(content: str):
+        if content:
+            streamed_response.append(content)
+
+    llm = OpenAIClient(cfg, stream_handler=stream_handler)
+    convo = Convo("you're a friendly assistant")
+    large_convo = " ".join(["lorem ipsum dolor sit amet"] * 30000)
+    convo.user(large_convo)
+    with pytest.raises(ValueError, match="Context limit exceeded."):
+        await llm(convo)
+
+    streamed = "".join(streamed_response)
+    assert "We sent too large request to the LLM" in streamed
diff --git a/tests/llm/__init__.py b/tests/llm/__init__.py
new file mode 100644
index 00000000..e69de29b
diff --git a/tests/llm/prompts/test.txt b/tests/llm/prompts/test.txt
new file mode 100644
index 00000000..4a94a825
--- /dev/null
+++ b/tests/llm/prompts/test.txt
@@ -0,0 +1,4 @@
+hello {{ name }},
+{% if age > 0 %}
+you are {{ age }} bn years old
+{% endif %}
diff --git a/tests/llm/test_convo.py b/tests/llm/test_convo.py
new file mode 100644
index 00000000..4d2b1713
--- /dev/null
+++ b/tests/llm/test_convo.py
@@ -0,0 +1,296 @@
+import pytest
+
+from core.llm.convo import Convo
+
+
+def test_convo_constructor_without_content():
+    convo = Convo()
+    assert convo.messages == []
+
+
+def test_convo_constructor_with_content():
+    convo = Convo("Hello, world!")
+    assert len(convo.messages) == 1
+    assert convo.messages[0]["role"] == "system"
+    assert convo.messages[0]["content"] == "Hello, world!"
+
+
+def test_convo_constructor_with_whitespace_content():
+    convo = Convo("  Hello, world!  ")
+    assert len(convo.messages) == 1
+    assert convo.messages[0]["role"] == "system"
+    assert convo.messages[0]["content"] == "Hello, world!"
+
+
+def test_add_unknown_role_raises_value_error():
+    convo = Convo()
+    with pytest.raises(ValueError) as excinfo:
+        convo.add("unknown", "hello")
+    assert str(excinfo.value) == "Unknown role: unknown"
+
+
+def test_add_adds_message_with_role_and_content():
+    convo = Convo()
+    convo.add("user", "hello")
+    assert convo.messages[0] == {"role": "user", "content": "hello"}
+
+
+def test_add_adds_message_with_role_content_and_name():
+    convo = Convo()
+    convo.add("user", "hello", "Alice")
+    assert convo.messages[0] == {"role": "user", "content": "hello", "name": "Alice"}
+
+
+def test_add_dedents_string_content():
+    convo = Convo()
+    convo.add("user", "\n    hello\n    world\n")
+    assert convo.messages[0]["content"] == "\nhello\nworld"
+
+
+def test_add_forwards_dict_content():
+    convo = Convo()
+    convo.add("user", {"text": "hello"})
+    assert convo.messages[0]["content"] == {"text": "hello"}
+
+
+def test_system_adds_system_message():
+    convo = Convo()
+    convo.system("Hello, world!")
+    assert convo.messages == [{"role": "system", "content": "Hello, world!"}]
+
+
+def test_system_adds_system_message_with_name():
+    convo = Convo()
+    convo.system("Hello, world!", "System")
+    assert convo.messages == [{"role": "system", "content": "Hello, world!", "name": "System"}]
+
+
+def test_system_dedents_content():
+    convo = Convo()
+    convo.system("    Hello, world!")
+    assert convo.messages == [{"role": "system", "content": "Hello, world!"}]
+
+
+def test_system_preserves_lines_in_content():
+    convo = Convo()
+    convo.system("Hello,\nworld!")
+    assert convo.messages == [{"role": "system", "content": "Hello,\nworld!"}]
+
+
+def test_user_adds_user_message():
+    convo = Convo()
+    convo.user("Hello, World!")
+    assert convo.messages[0] == {"role": "user", "content": "Hello, World!"}
+
+
+def test_user_adds_user_message_with_name():
+    convo = Convo()
+    convo.user("Hello, World!", "John Doe")
+    assert convo.messages[0] == {
+        "role": "user",
+        "content": "Hello, World!",
+        "name": "John Doe",
+    }
+
+
+def test_user_raises_error_if_content_is_empty_string():
+    convo = Convo()
+    with pytest.raises(ValueError):
+        convo.user("")
+
+
+def test_user_raises_error_if_content_is_none():
+    convo = Convo()
+    with pytest.raises(ValueError):
+        convo.user(None)
+
+
+def test_assistant_adds_correct_message():
+    convo = Convo()
+    convo.assistant("Hello, world!")
+    assert convo.messages == [{"role": "assistant", "content": "Hello, world!"}]
+
+
+def test_assistant_dedents_content():
+    convo = Convo()
+    convo.assistant("    Hello, world!")
+    assert convo.messages == [{"role": "assistant", "content": "Hello, world!"}]
+
+
+def test_assistant_adds_name_if_provided():
+    convo = Convo()
+    convo.assistant("Hello, world!", "Geppetto")
+    assert convo.messages == [{"role": "assistant", "content": "Hello, world!", "name": "Geppetto"}]
+
+
+def test_assistant_returns_self():
+    convo = Convo()
+    result = convo.assistant("Hello, world!")
+    assert result is convo
+
+
+def test_function_message_added_correctly():
+    convo = Convo()
+    convo.function("Hello World")
+    assert len(convo.messages) == 1
+    assert convo.messages[0]["role"] == "function"
+    assert convo.messages[0]["content"] == "Hello World"
+
+
+def test_function_message_with_name_added_correctly():
+    convo = Convo()
+    convo.function("Hello World", name="Function1")
+    assert len(convo.messages) == 1
+    assert convo.messages[0]["role"] == "function"
+    assert convo.messages[0]["content"] == "Hello World"
+    assert convo.messages[0]["name"] == "Function1"
+
+
+def test_function_message_content_dedented():
+    convo = Convo()
+    convo.function("  Hello World  ")
+    assert len(convo.messages) == 1
+    assert convo.messages[0]["role"] == "function"
+    assert convo.messages[0]["content"] == "Hello World"
+
+
+def test_function_message_return_convo_object():
+    convo = Convo()
+    result = convo.function("Hello World")
+    assert isinstance(result, Convo)
+
+
+def test_function_message_with_empty_content():
+    convo = Convo()
+    with pytest.raises(ValueError):
+        convo.function("")
+
+
+def test_function_message_with_non_string_content():
+    convo = Convo()
+    with pytest.raises(TypeError):
+        convo.function(123)
+
+
+def test_convo_fork():
+    convo1 = Convo("Hello")
+    convo1.user("Hello LLM!")
+    convo1.assistant("Hello User!")
+    convo2 = convo1.fork()
+    assert convo1.messages == convo2.messages
+    convo1.user("New message in convo1")
+    assert convo1.messages != convo2.messages
+    convo2.assistant("New message in convo2")
+    assert convo1.messages != convo2.messages
+
+
+def test_convo_fork_with_no_messages():
+    convo1 = Convo()
+    convo2 = convo1.fork()
+    assert convo1.messages == convo2.messages
+    convo1.user("New message in convo1")
+    assert convo1.messages != convo2.messages
+    convo2.assistant("New message in convo2")
+    assert convo1.messages != convo2.messages
+
+
+def test_convo_fork_with_multiple_messages():
+    convo1 = Convo("Init")
+    convo1.user("Hello!").assistant("Hi!").user("How are you?")
+    convo2 = convo1.fork()
+    assert convo1.messages == convo2.messages
+    convo1.assistant("I'm good! How are you?")
+    assert convo1.messages != convo2.messages
+    convo2.assistant("I'm fine! How are you?")
+    assert convo1.messages != convo2.messages
+
+
+def test_after_with_empty_convos():
+    convo1 = Convo()
+    convo2 = Convo()
+    new_convo = convo1.after(convo2)
+    assert new_convo.messages == []
+
+
+def test_after_with_no_common_messages():
+    convo1 = Convo()
+    convo1.user("Hello")
+    convo2 = Convo()
+    convo2.user("Hi")
+    new_convo = convo1.after(convo2)
+    assert new_convo.messages == convo1.messages
+
+
+def test_after_with_some_common_messages():
+    convo1 = Convo()
+    convo1.user("Hello").assistant("How can I assist?")
+    convo2 = convo1.fork()
+    convo2.user("What's the weather?")
+    new_convo = convo2.after(convo1)
+    assert new_convo.messages == [{"role": "user", "content": "What's the weather?"}]
+
+
+def test_after_with_all_common_messages():
+    convo1 = Convo()
+    convo1.user("Hello")
+    convo2 = convo1.fork()
+    new_convo = convo2.after(convo1)
+    assert new_convo.messages == []
+
+
+def test_after_with_more_messages_in_parent_convo():
+    convo1 = Convo()
+    convo1.user("Hello").assistant("How can I assist?").user("What's the weather?")
+    convo2 = Convo()
+    convo2.user("Hello").assistant("How can I assist?")
+    new_convo = convo1.after(convo2)
+    assert new_convo.messages == [{"role": "user", "content": "What's the weather?"}]
+
+
+def test_last_empty_convo():
+    convo = Convo()
+    assert convo.last() is None
+
+
+def test_last_single_message_convo():
+    convo = Convo()
+    convo.user("Hello")
+    assert convo.last()["content"] == "Hello"
+
+
+def test_last_multiple_messages_convo():
+    convo = Convo()
+    convo.user("Hello")
+    convo.assistant("Hi")
+    assert convo.last()["content"] == "Hi"
+
+
+def test_last_after_fork():
+    convo = Convo()
+    convo.user("Hello")
+    forked_convo = convo.fork()
+    forked_convo.assistant("Hi")
+    assert convo.last()["content"] == "Hello"
+    assert forked_convo.last()["content"] == "Hi"
+
+
+def test_last_after_deepcopy():
+    from copy import deepcopy
+
+    convo = Convo()
+    convo.user("Hello")
+    copied_convo = deepcopy(convo)
+    copied_convo.assistant("Hi")
+    assert convo.last()["content"] == "Hello"
+    assert copied_convo.last()["content"] == "Hi"
+
+
+def test_message_iterator():
+    convo = Convo("hello").user("world")
+    messages = []
+    for message in convo:
+        messages.append(message)
+    assert messages == [
+        {"role": "system", "content": "hello"},
+        {"role": "user", "content": "world"},
+    ]
diff --git a/tests/llm/test_openai.py b/tests/llm/test_openai.py
new file mode 100644
index 00000000..50f4381c
--- /dev/null
+++ b/tests/llm/test_openai.py
@@ -0,0 +1,117 @@
+from unittest.mock import AsyncMock, MagicMock, call, patch
+
+import pytest
+
+from core.config import LLMConfig
+from core.llm.convo import Convo
+from core.llm.openai_client import OpenAIClient
+
+
+async def mock_response_generator(*content):
+    for item in content:
+        chunk = MagicMock()
+        chunk.choices = [MagicMock(delta=MagicMock(content=item))]
+        yield chunk
+
+
+@pytest.mark.asyncio
+@patch("core.llm.openai_client.AsyncOpenAI")
+async def test_openai_calls_gpt(mock_AsyncOpenAI):
+    cfg = LLMConfig(model="gpt-4-turbo")
+    convo = Convo("system hello").user("user hello")
+
+    stream = AsyncMock(return_value=mock_response_generator("hello", None, "world"))
+    mock_AsyncOpenAI.return_value.chat.completions.create = stream
+
+    llm = OpenAIClient(cfg)
+    response, req_log = await llm(convo, json_mode=True)
+    assert response == "helloworld"
+
+    assert req_log.model == cfg.model
+    assert req_log.provider == cfg.provider
+    assert req_log.temperature == cfg.temperature
+    assert req_log.response == response
+    assert req_log.status == "success"
+
+    stream.assert_awaited_once_with(
+        model="gpt-4-turbo",
+        messages=[
+            {"role": "system", "content": "system hello"},
+            {"role": "user", "content": "user hello"},
+        ],
+        temperature=0.5,
+        stream=True,
+        stream_options={"include_usage": True},
+        response_format={"type": "json_object"},
+    )
+
+
+@pytest.mark.asyncio
+@patch("core.llm.openai_client.AsyncOpenAI")
+async def test_openai_stream_handler(mock_AsyncOpenAI):
+    cfg = LLMConfig(model="gpt-4-turbo")
+    convo = Convo("system hello").user("user hello")
+
+    stream_handler = AsyncMock()
+
+    stream = AsyncMock(return_value=mock_response_generator("hello", None, "world"))
+    mock_AsyncOpenAI.return_value.chat.completions.create = stream
+
+    llm = OpenAIClient(cfg, stream_handler=stream_handler)
+    await llm(convo)
+
+    stream_handler.assert_has_awaits([call("hello"), call("world")])
+
+
+@pytest.mark.asyncio
+@patch("core.llm.openai_client.AsyncOpenAI")
+async def test_openai_parser_with_retries(mock_AsyncOpenAI):
+    cfg = LLMConfig(model="gpt-4-turbo")
+    convo = Convo("system").user("user")
+
+    parser = MagicMock()
+    parser.side_effect = [ValueError("Try again"), "world"]
+
+    stream = AsyncMock(
+        side_effect=[
+            mock_response_generator("hello"),
+            mock_response_generator("world"),
+        ]
+    )
+    mock_AsyncOpenAI.return_value.chat.completions.create = stream
+
+    llm = OpenAIClient(cfg)
+    response, req_log = await llm(convo, parser=parser)
+
+    assert response == "world"
+    assert stream.await_count == 2
+    assert req_log.status == "success"
+
+    assert req_log.messages == [
+        {"role": "system", "content": "system"},
+        {"role": "user", "content": "user"},
+        {"role": "assistant", "content": "hello"},
+        {
+            "role": "user",
+            "content": "Error parsing response: Try again. Please output your response EXACTLY as requested.",
+        },
+    ]
+
+
+@pytest.mark.asyncio
+@patch("core.llm.openai_client.AsyncOpenAI")
+async def test_openai_parser_fails(mock_AsyncOpenAI):
+    cfg = LLMConfig(model="gpt-4-turbo")
+    convo = Convo("system").user("user")
+
+    parser = MagicMock()
+    parser.side_effect = [ValueError("Try again")]
+
+    stream = AsyncMock(return_value=mock_response_generator("hello"))
+    mock_AsyncOpenAI.return_value.chat.completions.create = stream
+
+    llm = OpenAIClient(cfg)
+    response, req_log = await llm(convo, parser=parser, max_retries=1)
+
+    assert response is None
+    assert req_log.status == "error"
diff --git a/tests/llm/test_parser.py b/tests/llm/test_parser.py
new file mode 100644
index 00000000..2fc80c05
--- /dev/null
+++ b/tests/llm/test_parser.py
@@ -0,0 +1,190 @@
+from enum import Enum
+from typing import Tuple
+
+import pytest
+from pydantic import BaseModel, field_validator
+
+from core.llm.parser import CodeBlockParser, EnumParser, JSONParser, MultiCodeBlockParser
+
+
+@pytest.mark.parametrize(
+    ("input", "expected"),
+    [
+        ("", []),
+        ("some text without code blocks", []),
+        ("```\n```", [""]),
+        ("```py\n```", [""]),
+        ("```py\nsome code\n```", ["some code"]),
+        (
+            "some text preamble\n" "```\nsome code\n```\n" "```py\nmore\ncode\n```\n" "some text conclusion",
+            ["some code", "more\ncode"],
+        ),
+    ],
+)
+def test_multi_code_block_parser(input, expected):
+    parser = MultiCodeBlockParser()
+    assert parser(input) == expected
+
+
+@pytest.mark.parametrize(
+    ("input", "expected"),
+    [
+        ("", None),
+        ("some text without code blocks", None),
+        ("```py\nsome code\n```", "some code"),
+        ("```\nfirst\n```\n ... \n```\nsecond\n```", None),
+    ],
+)
+def test_code_block_parser(input, expected):
+    parser = CodeBlockParser()
+    if expected is None:
+        with pytest.raises(ValueError):
+            parser(input)
+    else:
+        assert parser(input) == expected
+
+
+@pytest.mark.parametrize(
+    ("input", "strict", "expected"),
+    [
+        ("{}", True, {}),
+        ('{"a": 1}', True, {"a": 1}),
+        ('```json\n{"a": 1, "b": "c"}```', True, {"a": 1, "b": "c"}),
+        ("", True, ValueError),
+        ("", False, None),
+        ("{bad json}", True, ValueError),
+        ("{bad json}", False, None),
+        ("```{", True, ValueError),
+        ("```{", False, None),
+        ('{"a": 1, "b": "c"}', False, {"a": 1, "b": "c"}),
+    ],
+)
+def test_parse_json_no_spec(input, strict, expected):
+    parser = JSONParser(strict=strict)
+    if expected == ValueError:
+        with pytest.raises(ValueError):
+            parser(input)
+    else:
+        assert parser(input) == expected
+
+
+@pytest.mark.parametrize(
+    ("input", "expected"),
+    [
+        (
+            '{"name": "John", "children": [{"age": 1, "name": "Jane", "geo": [1.0, 2.0]}]}',
+            {
+                "name": "John",
+                "children": [
+                    {
+                        "age": 1,
+                        "name": "Jane",
+                        "geo": (1.0, 2.0),
+                    },
+                ],
+            },
+        ),
+        (
+            '```{"name": "John", "children": [{"age": 1, "name": "Jane", "geo": [1.0, 2.0]}]}```',
+            {
+                "name": "John",
+                "children": [
+                    {
+                        "age": 1,
+                        "name": "Jane",
+                        "geo": (1.0, 2.0),
+                    },
+                ],
+            },
+        ),
+        (
+            # age must not be negative
+            '{"name": "John", "children": [{"age": -1, "name": "Jane", "geo": [1.0, 2.0]}]}',
+            ValueError,
+        ),
+        (
+            # missing required field children
+            '{"name": "John"}',
+            ValueError,
+        ),
+        (
+            # incorrect type of children.geo.0 field
+            '{"name": "John", "children": [{"age": 1, "name": "Jane", "geo": ["a", 2.0]}]',
+            ValueError,
+        ),
+        (
+            # incorrect tuple size
+            '{"name": "John", "children": [{"age": 1, "name": "Jane", "geo": [1.0, 2.0, 3.0]}]',
+            ValueError,
+        ),
+    ],
+)
+def test_parse_json_with_spec(input, expected):
+    class ChildModel(BaseModel):
+        age: int
+        name: str
+        geo: Tuple[float, float]
+
+        @field_validator("age")
+        def age_must_be_positive(cls, v):
+            if v < 0:
+                raise ValueError("age must be positive")
+            return v
+
+    class ParentModel(BaseModel):
+        name: str
+        children: list[ChildModel]
+
+    parser = JSONParser(spec=ParentModel)
+    if expected is ValueError:
+        with pytest.raises(ValueError):
+            parser(input)
+    else:
+        assert parser(input).model_dump() == expected
+
+
+def test_parse_json_schema():
+    class TestModel(BaseModel):
+        name: str
+        age: int
+
+    parser = JSONParser(spec=TestModel)
+    assert parser.schema == {
+        "title": "TestModel",
+        "type": "object",
+        "properties": {
+            "name": {
+                "title": "Name",
+                "type": "string",
+            },
+            "age": {
+                "title": "Age",
+                "type": "integer",
+            },
+        },
+        "required": ["name", "age"],
+    }
+
+
+@pytest.mark.parametrize(
+    ("input", "expected"),
+    [
+        ("", ValueError),
+        ("first", "first"),
+        ("SECOND", "second"),
+        ("  third  ", "third"),
+        ("fourth", ValueError),
+    ],
+)
+def test_enum_parser(input, expected):
+    class Choices(Enum):
+        FIRST = "first"
+        SECOND = "second"
+        THIRD = "third"
+
+    parser = EnumParser(Choices)
+    if expected is ValueError:
+        with pytest.raises(ValueError):
+            parser(input)
+    else:
+        assert parser(input).value == expected
diff --git a/tests/llm/test_prompt.py b/tests/llm/test_prompt.py
new file mode 100644
index 00000000..89790442
--- /dev/null
+++ b/tests/llm/test_prompt.py
@@ -0,0 +1,59 @@
+import pytest
+from jinja2 import UndefinedError
+
+from core.llm.prompt import FormatTemplate, JinjaFileTemplate, JinjaStringTemplate
+
+
+def test_format_template():
+    template = FormatTemplate()
+    assert (
+        template(
+            "hello {name}, you are {age:.2f} bn years old",
+            name="world",
+            age=4.54,
+        )
+        == "hello world, you are 4.54 bn years old"
+    )
+
+
+def test_jinja_string_template():
+    template = JinjaStringTemplate()
+
+    test_template = """
+    hello <{{ name }}>,
+    {% if age > 0 %}
+    you are {{ age }} bn years old
+    {% endif %}
+    """
+
+    expected_output = """
+    hello <world>,
+    you are 4.54 bn years old
+    """
+
+    assert template(test_template, name="world", age=4.54) == expected_output
+
+
+def test_jinja_template_catches_undefined_variable():
+    template = JinjaStringTemplate()
+
+    with pytest.raises(UndefinedError, match="is undefined"):
+        template("hello {{ world }}")
+
+
+def test_jinja_file_template():
+    template = JinjaFileTemplate(["tests/llm/prompts"])
+
+    assert (
+        template(
+            "test.txt",
+            name="world",
+            age=4.54,
+        )
+        == "hello world,\nyou are 4.54 bn years old\n"
+    )
+
+
+def test_jinja_file_template_nonexistent_directory():
+    with pytest.raises(ValueError):
+        JinjaFileTemplate(["nonexistent"])
diff --git a/tests/log/__init__.py b/tests/log/__init__.py
new file mode 100644
index 00000000..e69de29b
diff --git a/tests/log/test_log.py b/tests/log/test_log.py
new file mode 100644
index 00000000..0fa50a22
--- /dev/null
+++ b/tests/log/test_log.py
@@ -0,0 +1,40 @@
+from os.path import join
+
+from core.config import LogConfig
+from core.log import get_logger, setup
+
+
+def test_file_handler(tmp_path):
+    output = join(tmp_path, "test.log")
+    cfg = LogConfig(level="DEBUG", output=output)
+    setup(cfg, force=True)
+
+    logger = get_logger("pythagora")
+    logger.debug("debug message")
+
+    assert len(logger.handlers) == 1
+    handler = logger.handlers[0]
+    assert handler.level == 10
+    assert handler.stream.name == output
+
+    logger.removeHandler(handler)
+    handler.close()
+
+
+def test_log_level(capsys):
+    cfg = LogConfig(level="WARNING", output=None)
+    setup(cfg, force=True)
+
+    logger = get_logger("pythagora.test_default_setup")
+    logger.debug("debug message")
+    logger.info("info message")
+    logger.warning("warning message")
+    logger.error("error message")
+    logger.critical("critical message")
+
+    stderr = capsys.readouterr().err
+    assert "debug message" not in stderr
+    assert "info message" not in stderr
+    assert "warning message" in stderr
+    assert "error message" in stderr
+    assert "critical message" in stderr
diff --git a/tests/proc/__init__.py b/tests/proc/__init__.py
new file mode 100644
index 00000000..e69de29b
diff --git a/tests/proc/test_process_manager.py b/tests/proc/test_process_manager.py
new file mode 100644
index 00000000..51ff834a
--- /dev/null
+++ b/tests/proc/test_process_manager.py
@@ -0,0 +1,149 @@
+from os import getenv, makedirs
+from os.path import join
+from sys import platform
+from unittest.mock import patch
+
+import pytest
+from psutil import Process
+
+from core.proc.process_manager import LocalProcess, ProcessManager
+
+
+@pytest.mark.asyncio
+async def test_local_process_start_terminate(tmp_path):
+    cmd = "timeout 5" if platform == "win32" else "sleep 5"
+
+    lp = await LocalProcess.start(
+        cmd,
+        cwd=tmp_path,
+        env={"PATH": getenv("PATH")},
+        bg=False,
+    )
+
+    assert lp.cmd == cmd
+    assert lp.cwd == tmp_path
+    assert lp.env == {"PATH": getenv("PATH")}
+    assert lp.stdout == ""
+    assert lp.stderr == ""
+
+    p = Process(lp.pid)
+    assert p.is_running()
+
+    await lp.terminate()
+
+    assert not p.is_running()
+
+
+@pytest.mark.asyncio
+async def test_local_process_wait(tmp_path):
+    cmd = "timeout 5" if platform == "win32" else "sleep 5"
+
+    lp = await LocalProcess.start(
+        cmd,
+        cwd=tmp_path,
+        env={"PATH": getenv("PATH")},
+        bg=False,
+    )
+
+    p = Process(lp.pid)
+    assert p.is_running()
+
+    await lp.wait(0.1)
+    assert not p.is_running()
+
+
+@pytest.mark.asyncio
+@patch("core.proc.process_manager.WATCHER_IDLE_INTERVAL", 0.1)
+async def test_process_manager_run_command_capture_stdout(tmp_path):
+    pm = ProcessManager(root_dir=tmp_path)
+
+    assert pm.processes == {}
+
+    return_code, stdout, stderr = await pm.run_command("echo hello")
+
+    await pm.stop_watcher()
+
+    assert pm.processes == {}
+    assert return_code == 0
+    assert stdout.strip() == "hello"
+    assert stderr == ""
+
+
+@pytest.mark.asyncio
+@patch("core.proc.process_manager.WATCHER_IDLE_INTERVAL", 0.1)
+async def test_process_manager_run_command_capture_stderr(tmp_path):
+    pm = ProcessManager(root_dir=tmp_path)
+
+    assert pm.processes == {}
+
+    return_code, stdout, stderr = await pm.run_command("echo hello >&2")
+
+    await pm.stop_watcher()
+
+    assert pm.processes == {}
+    assert return_code == 0
+    assert stdout == ""
+    assert stderr.strip() == "hello"
+
+
+@pytest.mark.asyncio
+@patch("core.proc.process_manager.WATCHER_IDLE_INTERVAL", 0.1)
+async def test_process_manager_start_list_terminate(tmp_path):
+    cmd = "timeout 5" if platform == "win32" else "sleep 5"
+    cwd = join("some", "sub", "directory")
+    abs_cwd = join(tmp_path, cwd)
+    makedirs(abs_cwd, exist_ok=True)
+
+    pm = ProcessManager(root_dir=tmp_path)
+    lp = await pm.start_process(cmd, cwd=cwd, bg=True)
+
+    assert lp.id in pm.processes
+
+    running_processes = pm.list_running_processes()
+    assert running_processes == [lp]
+
+    p = Process(lp.pid)
+    assert p.cwd() == abs_cwd
+
+    await pm.terminate_process(lp.id)
+
+    await pm.stop_watcher()
+
+    assert p.is_running() is False
+    assert lp.id not in pm.processes
+
+
+@pytest.mark.asyncio
+@patch("core.proc.process_manager.WATCHER_IDLE_INTERVAL", 0.1)
+async def test_watcher(tmp_path):
+    stdout = ""
+    stderr = ""
+    exited = False
+
+    async def output_handler(out, err):
+        nonlocal stdout, stderr
+        stdout += out
+        stderr += err
+
+    async def exit_handler(process):
+        nonlocal exited
+        exited = True
+
+    pm = ProcessManager(root_dir=tmp_path, output_handler=output_handler, exit_handler=exit_handler)
+
+    lp = await pm.start_process("echo hello", bg=True)
+    import asyncio
+
+    for i in range(10):
+        await asyncio.sleep(0.1)
+        if exited:
+            break
+    else:
+        raise Exception("Process did not exit within 1s")
+
+    assert stdout.strip() == "hello"
+    assert stderr == ""
+    assert lp.stdout.strip() == "hello"
+    assert lp.stderr == ""
+
+    await pm.stop_watcher()
diff --git a/tests/state/__init__.py b/tests/state/__init__.py
new file mode 100644
index 00000000..e69de29b
diff --git a/tests/state/test_state_manager.py b/tests/state/test_state_manager.py
new file mode 100644
index 00000000..f17bafb3
--- /dev/null
+++ b/tests/state/test_state_manager.py
@@ -0,0 +1,222 @@
+import os
+from unittest.mock import AsyncMock, MagicMock, patch
+
+import pytest
+
+from core.config import FileSystemConfig
+from core.state.state_manager import StateManager
+
+
+@pytest.mark.asyncio
+async def test_list_projects_empty(testmanager):
+    sm = StateManager(testmanager)
+    projects = await sm.list_projects()
+    assert projects == []
+
+
+@pytest.mark.asyncio
+@patch("core.state.state_manager.get_config")
+async def test_create_project(mock_get_config, testmanager):
+    mock_get_config.return_value.fs.type = "memory"
+    sm = StateManager(testmanager)
+    project = await sm.create_project("test")
+
+    async with testmanager as session:
+        session.add(project)
+        branch = await project.get_branch()
+        initial_state = await branch.get_last_state()
+
+    assert sm.project == project
+    assert sm.branch == branch
+    assert sm.current_state == initial_state
+
+    projects = await sm.list_projects()
+    assert projects == [project]
+
+
+@pytest.mark.asyncio
+@patch("core.state.state_manager.get_config")
+async def test_load_project(mock_get_config, testmanager):
+    mock_get_config.return_value.fs.type = "memory"
+    sm = StateManager(testmanager)
+    project = await sm.create_project("test")
+
+    project_state = await sm.load_project(project_id=project.id)
+
+    assert project_state.branch.project == project
+
+
+@pytest.mark.asyncio
+@patch("core.state.state_manager.get_config")
+async def test_delete_project(mock_get_config, testmanager):
+    mock_get_config.return_value.fs.type = "memory"
+    sm = StateManager(testmanager)
+    project = await sm.create_project("test")
+
+    projects = await sm.list_projects()
+    assert projects == [project]
+
+    await sm.delete_project(project.id)
+    projects = await sm.list_projects()
+    assert projects == []
+
+
+@pytest.mark.asyncio
+@patch("core.state.state_manager.get_config")
+async def test_load_project_branch(mock_get_config, testmanager):
+    mock_get_config.return_value.fs.type = "memory"
+    sm = StateManager(testmanager)
+    project = await sm.create_project("test")
+
+    project_state = await sm.load_project(branch_id=project.branches[0].id)
+
+    assert project_state.branch.project == project
+
+
+@pytest.mark.asyncio
+@patch("core.state.state_manager.get_config")
+async def test_load_nonexistent_step(mock_get_config, testmanager):
+    mock_get_config.return_value.fs.type = "memory"
+    sm = StateManager(testmanager)
+    project = await sm.create_project("test")
+
+    project_state = await sm.load_project(project_id=project.id, step_index=99999)
+    assert project_state is None
+
+
+@pytest.mark.asyncio
+@patch("core.state.state_manager.get_config")
+async def test_load_specific_step(mock_get_config, testmanager):
+    mock_get_config.return_value.fs.type = "memory"
+    sm = StateManager(testmanager)
+    project = await sm.create_project("test")
+
+    project_state = await sm.load_project(project_id=project.id, step_index=project.branches[0].states[0].step_index)
+
+    assert project_state.branch.project == project
+
+
+@pytest.mark.asyncio
+@patch("core.state.state_manager.get_config")
+async def test_commit(mock_get_config, testmanager):
+    mock_get_config.return_value.fs.type = "memory"
+    sm = StateManager(testmanager)
+    project = await sm.create_project("test")
+
+    initial_state = project.branches[0].states[0]
+    sm.next_state.epics = [{"id": "epic-123"}]
+    sm.next_state.tasks = [{"id": "task-456"}]
+    sm.next_state.iterations = [{"id": "iteration-789"}]
+    sm.next_state.steps = [{"id": "step-012"}]
+
+    new_state = await sm.commit()
+
+    # Initial state is special in that when it gets commited, we're
+    # still on the same state.
+    assert new_state.id == initial_state.id
+    assert new_state.prev_state_id is None
+
+    # The 2nd commit will actually create a new state
+    next_state = await sm.commit()
+    assert next_state.id != new_state.id
+    assert next_state.prev_state_id == new_state.id
+
+    # Test that data was correctly copied over
+    assert next_state.epics == [{"id": "epic-123"}]
+    assert next_state.tasks == [{"id": "task-456"}]
+    assert next_state.iterations == [{"id": "iteration-789"}]
+    assert next_state.steps == [{"id": "step-012"}]
+
+
+@pytest.mark.asyncio
+@patch("core.state.state_manager.get_config")
+async def test_save_file(mock_get_config, testmanager):
+    mock_get_config.return_value.fs.type = "memory"
+
+    # Setup the UI mock
+    ui = MagicMock(open_editor=AsyncMock())
+
+    # Create an instance of StateManager with mocked UI
+    sm = StateManager(testmanager, ui)
+    await sm.create_project("test")
+
+    # The initial state in the project is weird because it's both current
+    # and next, and this can play havoc with the SQLAlchemy session and
+    # object caching. Commit it here to get that out of our way.
+    await sm.commit()
+
+    # Save the file and commit the new state to the database
+    await sm.save_file("test.txt", "Hello, world!")
+    await sm.commit()
+
+    # Assert that UI's open_editor was called
+    ui.open_editor.assert_called_once_with("/test.txt")
+
+    # Assert that file was saved to disk
+    assert sm.file_system.read("test.txt") == "Hello, world!"
+
+    # Assert that file was saved to database
+    file = await sm.get_file_by_path("test.txt")
+    assert file is not None
+    assert file.content.content == "Hello, world!"
+
+
+@pytest.mark.asyncio
+@patch("core.state.state_manager.get_config")
+async def test_importing_changed_files_to_db(mock_get_config, tmpdir, testmanager):
+    mock_get_config.return_value.fs = FileSystemConfig(workspace_root=str(tmpdir))
+    sm = StateManager(testmanager)
+    project = await sm.create_project("test")
+
+    async with testmanager as session:
+        session.add(project)
+        await sm.commit()
+        await sm.save_file("file1.txt", "this is the content 1")
+        await sm.save_file("file2.txt", "this is the content 2")
+        await sm.save_file("file3.txt", "this is the content 3")
+        await sm.commit()
+
+        os.remove(os.path.join(tmpdir, "test", "file1.txt"))  # Remove the first file
+        with open(os.path.join(tmpdir, "test", "file2.txt"), "a") as f:
+            f.write("modified")  # Change the second file
+
+        await sm.import_files()
+        await sm.commit()
+
+        assert not os.path.exists(os.path.join(tmpdir, "test", "file1.txt"))
+        assert os.path.exists(os.path.join(tmpdir, "test", "file2.txt"))
+        assert os.path.exists(os.path.join(tmpdir, "test", "file3.txt"))
+
+        db_files = set([f.path for f in sm.current_state.files])
+        assert "file1.txt" not in db_files
+        assert "file2.txt" in db_files
+        assert "file3.txt" in db_files
+
+
+@pytest.mark.asyncio
+@patch("core.state.state_manager.get_config")
+async def test_restoring_files_from_db(mock_get_config, tmpdir, testmanager):
+    mock_get_config.return_value.fs = FileSystemConfig(workspace_root=str(tmpdir))
+    sm = StateManager(testmanager)
+    project = await sm.create_project("test1")
+
+    async with testmanager as session:
+        session.add(project)
+        await sm.commit()
+        await sm.save_file("file1.txt", "this is the content 1")
+        await sm.save_file("file2.txt", "this is the content 2")
+        await sm.save_file("file3.txt", "this is the content 3")
+        await sm.commit()
+
+        os.remove(os.path.join(tmpdir, "test1", "file1.txt"))  # Remove the first file
+        with open(os.path.join(tmpdir, "test1", "file2.txt"), "a") as f:
+            f.write("modified")  # Change the second file
+        await sm.restore_files()
+
+        assert os.path.exists(os.path.join(tmpdir, "test1", "file1.txt"))
+        assert os.path.exists(os.path.join(tmpdir, "test1", "file2.txt"))
+        assert os.path.exists(os.path.join(tmpdir, "test1", "file3.txt"))
+
+        assert open(os.path.join(tmpdir, "test1", "file1.txt")).read() == "this is the content 1"
+        assert open(os.path.join(tmpdir, "test1", "file2.txt")).read() == "this is the content 2"
+        assert open(os.path.join(tmpdir, "test1", "file3.txt")).read() == "this is the content 3"
diff --git a/tests/telemetry/test_telemetry.py b/tests/telemetry/test_telemetry.py
new file mode 100644
index 00000000..6b74b75a
--- /dev/null
+++ b/tests/telemetry/test_telemetry.py
@@ -0,0 +1,282 @@
+from unittest.mock import AsyncMock, MagicMock, patch
+
+import httpx
+import pytest
+import pytest_asyncio
+
+from core.telemetry import Telemetry
+
+
+@pytest_asyncio.fixture
+async def mock_httpx_post():
+    with patch("core.telemetry.httpx") as mock_httpx:
+        mock_httpx.RequestError = httpx.RequestError
+        mock_client = mock_httpx.AsyncClient.return_value
+        mock_async_with = mock_client.__aenter__.return_value
+        mock_post = mock_async_with.post = AsyncMock(return_value=MagicMock())
+        yield mock_post
+
+
+@patch("core.telemetry.sys.platform", "test_platform")
+@patch("core.telemetry.sys.version", "test_version")
+@patch("core.telemetry.get_version", lambda: "test_pilot_version")
+def test_clear_data_resets_data():
+    telemetry = Telemetry()
+    empty = Telemetry()
+
+    telemetry.data = {
+        "model": "test-model",
+        "num_llm_requests": 10,
+        "num_llm_tokens": 100,
+        "num_steps": 5,
+        "elapsed_time": 123.45,
+        "end_result": "success",
+        "user_feedback": "Great!",
+        "user_contact": "user@example.com",
+    }
+    assert telemetry.data != empty.data
+
+    telemetry.clear_data()
+
+    assert telemetry.data == empty.data
+
+
+def test_clear_data_resets_times():
+    telemetry = Telemetry()
+    telemetry.start_time = 1234567890
+    telemetry.end_time = 1234567895
+
+    telemetry.clear_data()
+
+    assert telemetry.start_time is None
+    assert telemetry.end_time is None
+
+
+def test_clear_counter_resets_times_but_leaves_data():
+    telemetry = Telemetry()
+    telemetry.data["model"] = "test-model"
+    telemetry.start_time = 1234567890
+    telemetry.end_time = 1234567895
+
+    telemetry.clear_counters()
+
+    assert telemetry.data["model"] == "test-model"
+    assert telemetry.start_time is None
+    assert telemetry.end_time is None
+
+
+@patch("core.telemetry.settings")
+def test_set_updates_data(mock_settings):
+    mock_settings.telemetry = MagicMock(id="test-id", endpoint="test-endpoint", enabled=True)
+    telemetry = Telemetry()
+    telemetry.set("model", "fake-model")
+    assert telemetry.data["model"] == "fake-model"
+
+
+@patch("core.telemetry.settings")
+def test_set_ignores_unknown_field(mock_settings):
+    mock_settings.telemetry = MagicMock(id="test-id", endpoint="test-endpoint", enabled=True)
+    telemetry = Telemetry()
+    telemetry.set("nonexistent_field", "value")
+    assert "nonexistent_field" not in telemetry.data
+
+
+@patch("core.telemetry.settings")
+def test_inc_increments_known_data_field(mock_settings):
+    mock_settings.telemetry = MagicMock(id="test-id", endpoint="test-endpoint", enabled=True)
+    telemetry = Telemetry()
+    telemetry.inc("num_llm_requests", 42)
+    assert telemetry.data["num_llm_requests"] == 42
+
+
+@patch("core.telemetry.settings")
+def test_inc_ignores_unknown_data_field(mock_settings):
+    mock_settings.telemetry = MagicMock(id="test-id", endpoint="test-endpoint", enabled=True)
+    telemetry = Telemetry()
+    telemetry.inc("unknown_field")
+    assert "unknown_field" not in telemetry.data
+
+
+@patch("core.telemetry.time")
+@patch("core.telemetry.settings")
+def test_start_with_telemetry_enabled(mock_settings, mock_time):
+    mock_settings.telemetry = MagicMock(id="test-id", endpoint="test-endpoint", enabled=True)
+    mock_time.time.return_value = 1234.0
+
+    telemetry = Telemetry()
+
+    telemetry.start()
+    assert telemetry.start_time == 1234.0
+
+
+@patch("core.telemetry.settings")
+def test_stop_when_not_enabled_does_nothing(mock_settings):
+    mock_settings.telemetry = MagicMock(id="test-id", endpoint="test-endpoint", enabled=False)
+
+    telemetry = Telemetry()
+    telemetry.stop()
+
+    assert telemetry.end_time is None
+
+
+@patch("core.telemetry.time")
+@patch("core.telemetry.settings")
+def test_stop_calculates_elapsed_time(mock_settings, mock_time):
+    mock_settings.telemetry = MagicMock(id="test-id", endpoint="test-endpoint", enabled=True)
+    mock_time.time.side_effect = [1234, 1235]
+    telemetry = Telemetry()
+
+    telemetry.start()
+    telemetry.stop()
+
+    assert telemetry.data["elapsed_time"] == 1
+
+
+@pytest.mark.asyncio
+@patch("core.telemetry.settings")
+async def test_send_enabled_and_successful(mock_settings, mock_httpx_post):
+    mock_settings.telemetry = MagicMock(id="test-id", endpoint="test-endpoint", enabled=True)
+
+    telemetry = Telemetry()
+    with patch.object(telemetry, "calculate_statistics"):
+        await telemetry.send()
+
+    expected = {
+        "pathId": "test-id",
+        "event": "pilot-telemetry",
+        "data": telemetry.data,
+    }
+    mock_httpx_post.assert_awaited_once_with("test-endpoint", json=expected)
+
+
+@pytest.mark.asyncio
+@patch("core.telemetry.settings")
+async def test_send_enabled_but_post_fails(mock_settings, mock_httpx_post):
+    mock_settings.telemetry = MagicMock(id="test-id", endpoint="test-endpoint", enabled=True)
+    mock_httpx_post.side_effect = httpx.RequestError("Connection error")
+
+    telemetry = Telemetry()
+    with patch.object(telemetry, "calculate_statistics"):
+        await telemetry.send()
+
+    expected = {
+        "pathId": "test-id",
+        "event": "pilot-telemetry",
+        "data": telemetry.data,
+    }
+    mock_httpx_post.assert_awaited_once_with(telemetry.endpoint, json=expected)
+
+
+@pytest.mark.asyncio
+@patch("core.telemetry.settings")
+async def test_send_not_enabled(mock_settings, mock_httpx_post):
+    mock_settings.telemetry = MagicMock(id="test-id", endpoint="test-endpoint", enabled=False)
+
+    telemetry = Telemetry()
+    await telemetry.send()
+
+    mock_httpx_post.assert_not_called()
+
+
+@pytest.mark.asyncio
+@patch("core.telemetry.settings")
+async def test_send_no_endpoint_configured(mock_settings, mock_httpx_post):
+    mock_settings.telemetry = MagicMock(id="test-id", endpoint=None, enabled=True)
+
+    telemetry = Telemetry()
+    await telemetry.send()
+
+    mock_httpx_post.assert_not_called()
+
+
+@pytest.mark.asyncio
+@patch("core.telemetry.settings")
+async def test_send_clears_counters_after_sending(mock_settings, mock_httpx_post):
+    mock_settings.telemetry = MagicMock(id="test-id", endpoint="test-endpoint", enabled=True)
+
+    telemetry = Telemetry()
+    telemetry.data["model"] = "test-model"
+    telemetry.data["num_llm_tokens"] = 100
+    await telemetry.send()
+
+    assert telemetry.data["model"] == "test-model"
+    assert telemetry.data["num_llm_tokens"] == 0
+
+
+@patch("core.telemetry.settings")
+def test_record_crash(mock_settings):
+    mock_settings.telemetry = MagicMock(id="test-id", endpoint="test-endpoint", enabled=True)
+
+    telemetry = Telemetry()
+    try:
+        raise ValueError("test error")
+    except Exception as err:
+        telemetry.record_crash(err)
+
+    assert telemetry.data["end_result"] == "failure"
+    diag = telemetry.data["crash_diagnostics"]
+    assert diag["exception_class"] == "ValueError"
+    assert diag["exception_message"] == "test error"
+    assert diag["frames"][0]["file"] == "tests/telemetry/test_telemetry.py"
+    assert "ValueError: test error" in diag["stack_trace"]
+
+
+@patch("core.telemetry.settings")
+def test_record_crash_crashes(mock_settings):
+    mock_settings.telemetry = MagicMock(id="test-id", endpoint="test-endpoint", enabled=True)
+
+    telemetry = Telemetry()
+    telemetry.record_crash(None)
+
+    assert telemetry.data["end_result"] == "failure"
+    diag = telemetry.data["crash_diagnostics"]
+    assert diag["exception_class"] == "NoneType"
+    assert diag["exception_message"] == "None"
+    assert diag["frames"] == []
+
+
+@patch("core.telemetry.settings")
+def test_record_llm_request(mock_settings):
+    mock_settings.telemetry = MagicMock(id="test-id", endpoint="test-endpoint", enabled=True)
+
+    telemetry = Telemetry()
+    telemetry.record_llm_request(100000, 3600, True)
+    telemetry.record_llm_request(90000, 1, False)
+    telemetry.record_llm_request(1, 1800, False)
+
+    # All three
+    assert telemetry.data["num_llm_requests"] == 3
+    # Only the last two
+    assert telemetry.data["num_llm_tokens"] == 90001
+    # Only the first one
+    assert telemetry.data["num_llm_errors"] == 1
+
+    # First two
+    assert telemetry.large_requests == [100000, 90000]
+    # FIrst and last
+    assert telemetry.slow_requests == [3600, 1800]
+
+
+@patch("core.telemetry.settings")
+def test_calculate_statistics(mock_settings):
+    mock_settings.telemetry = MagicMock(id="test-id", endpoint="test-endpoint", enabled=True)
+
+    telemetry = Telemetry()
+    telemetry.large_requests = [10, 10, 20, 40, 100]
+    telemetry.slow_requests = [10, 10, 20, 40, 100]
+
+    telemetry.calculate_statistics()
+    assert telemetry.data["large_requests"] == {
+        "num_requests": 5,
+        "min_tokens": 10,
+        "max_tokens": 100,
+        "avg_tokens": 36,
+        "median_tokens": 20,
+    }
+    assert telemetry.data["slow_requests"] == {
+        "num_requests": 5,
+        "min_time": 10,
+        "max_time": 100,
+        "avg_time": 36,
+        "median_time": 20,
+    }
diff --git a/tests/templates/test_templates.py b/tests/templates/test_templates.py
new file mode 100644
index 00000000..ef30581a
--- /dev/null
+++ b/tests/templates/test_templates.py
@@ -0,0 +1,56 @@
+from unittest.mock import AsyncMock, MagicMock, patch
+
+import pytest
+
+from core.state.state_manager import StateManager
+from core.templates.registry import apply_project_template
+
+
+@pytest.mark.asyncio
+@patch("core.state.state_manager.get_config")
+async def test_render_javascript_react(mock_get_config, testmanager):
+    mock_get_config.return_value.fs.type = "memory"
+    sm = StateManager(testmanager)
+    pm = MagicMock(run_command=AsyncMock())
+
+    await sm.create_project("TestProjectName")
+    await sm.commit()
+
+    summary = await apply_project_template("javascript_react", sm, pm)
+    sm.next_state.specification.description = summary
+    await sm.commit()
+
+    files = [f.path for f in sm.current_state.files]
+    assert "React" in sm.current_state.specification.description
+    assert "package.json" in files
+
+    package_json = await sm.get_file_by_path("package.json")
+    assert package_json is not None
+    assert "TestProjectName" in package_json.content.content
+
+    pm.run_command.assert_awaited_once_with("npm install")
+
+
+@pytest.mark.asyncio
+@patch("core.state.state_manager.get_config")
+async def test_render_node_express_mongoose(mock_get_config, testmanager):
+    mock_get_config.return_value.fs.type = "memory"
+    sm = StateManager(testmanager)
+    pm = MagicMock(run_command=AsyncMock())
+
+    await sm.create_project("TestProjectName")
+    await sm.commit()
+
+    summary = await apply_project_template("node_express_mongoose", sm, pm)
+    sm.next_state.specification.description = summary
+    await sm.commit()
+
+    files = [f.path for f in sm.current_state.files]
+    assert "Mongoose" in sm.current_state.specification.description
+    assert "server.js" in files
+
+    package_json = await sm.get_file_by_path("package.json")
+    assert package_json is not None
+    assert "TestProjectName" in package_json.content.content
+
+    pm.run_command.assert_awaited_once_with("npm install")
diff --git a/tests/ui/__init__.py b/tests/ui/__init__.py
new file mode 100644
index 00000000..e69de29b
diff --git a/tests/ui/test_console.py b/tests/ui/test_console.py
new file mode 100644
index 00000000..5b0834c8
--- /dev/null
+++ b/tests/ui/test_console.py
@@ -0,0 +1,88 @@
+from unittest.mock import patch
+
+import pytest
+
+from core.ui.base import AgentSource
+from core.ui.console import PlainConsoleUI
+
+
+@pytest.mark.asyncio
+async def test_send_message(capsys):
+    src = AgentSource("Product Owner", "product-owner")
+    ui = PlainConsoleUI()
+
+    connected = await ui.start()
+    assert connected is True
+    await ui.send_message("Hello from the other side ♫", source=src)
+
+    captured = capsys.readouterr()
+    assert captured.out == "[Product Owner] Hello from the other side ♫\n"
+    await ui.stop()
+
+
+@pytest.mark.asyncio
+async def test_stream(capsys):
+    src = AgentSource("Product Owner", "product-owner")
+    ui = PlainConsoleUI()
+
+    await ui.start()
+    for word in ["Hellø ", "fröm ", "the ", "other ", "šide ", "♫"]:
+        await ui.send_stream_chunk(word, source=src)
+
+    captured = capsys.readouterr()
+    assert captured.out == "Hellø fröm the other šide ♫"
+    await ui.stop()
+
+
+@pytest.mark.asyncio
+@patch("builtins.input", return_value="awesome")
+async def test_ask_question_simple(mock_input):
+    ui = PlainConsoleUI()
+
+    await ui.start()
+    input = await ui.ask_question("Hello, how are you?")
+
+    assert input.cancelled is False
+    assert input.button is None
+    assert input.text == "awesome"
+
+    await ui.stop()
+
+    mock_input.assert_called_once()
+
+
+@pytest.mark.asyncio
+@patch("builtins.input", return_value="yes")
+async def test_ask_question_with_buttons(mock_input):
+    ui = PlainConsoleUI()
+
+    await ui.start()
+    input = await ui.ask_question(
+        "Are you sure?",
+        buttons={"yes": "Yes, I'm sure", "no": "No, cancel"},
+    )
+
+    assert input.cancelled is False
+    assert input.button == "yes"
+    assert input.text is None
+
+    await ui.stop()
+
+    mock_input.assert_called_once()
+
+
+@pytest.mark.asyncio
+@patch("builtins.input", side_effect=KeyboardInterrupt())
+async def test_ask_question_interrupted(mock_input):
+    ui = PlainConsoleUI()
+
+    await ui.start()
+    input = await ui.ask_question("Hello, how are you?")
+
+    assert input.cancelled is True
+    assert input.button is None
+    assert input.text is None
+
+    await ui.stop()
+
+    mock_input.assert_called_once()
diff --git a/tests/ui/test_ipc_client.py b/tests/ui/test_ipc_client.py
new file mode 100644
index 00000000..99811d54
--- /dev/null
+++ b/tests/ui/test_ipc_client.py
@@ -0,0 +1,270 @@
+import asyncio
+import json
+import sys
+
+import pytest
+
+from core.config import LocalIPCConfig
+from core.ui.base import AgentSource
+from core.ui.ipc_client import IPCClientUI
+
+if sys.platform == "win32":
+    pytest.skip(
+        "Skipping IPC Client tests on Windows due to mock server timeouts",
+        allow_module_level=True,
+    )
+
+
+class IPCServer:
+    """
+    Fake IPC server mocking the VSCode extension host.
+    """
+
+    def __init__(self, responses: list[dict]):
+        """
+        Set up the IPC server with a list of responses.
+        The server will pop responses from the list in order.
+        The number of responses must be equal to the number of calls.
+        made. If a response is not needed for a particular call, the
+        response can be None, otherwise it should be a dict.
+
+        (Note that the client always sends one extra EXIT message).
+
+        :param responses: List of responses to send to the client.
+        """
+        self.responses = responses
+        self.messages = []
+        self.server = None
+
+    async def handle_connection(
+        self,
+        reader: asyncio.StreamReader,
+        writer: asyncio.StreamWriter,
+    ):
+        """
+        Connection handler used by asyncio.start_server.
+        """
+
+        while len(self.responses):
+            data = await reader.read(65536)
+
+            while len(data):
+                # VSCode IPC protocol: first 4 bytes are the message length
+                data_len = int.from_bytes(data[:4], byteorder="big")
+                payload_json = data[4 : 4 + data_len]
+
+                # Record the incoming message
+                payload = json.loads(payload_json.decode("utf-8"))
+                self.messages.append(payload)
+
+                # Since data can have multiple messages, we need to check
+                # if there are any more responses again here.
+                response = self.responses.pop(0) if len(self.responses) else None
+                if response is not None:
+                    writer.write(json.dumps(response).encode("utf-8"))
+                    await writer.drain()
+
+                data = data[4 + data_len :]
+
+        writer.close()
+        await writer.wait_closed()
+
+    async def __aenter__(self) -> tuple[int, list]:
+        self.server = await asyncio.start_server(self.handle_connection, "127.0.0.1", 0)
+        port = self.server.sockets[0].getsockname()[1]
+        return port, self.messages
+
+    async def __aexit__(self, exc_type, exc_val, exc_tb):
+        # We want the "async with" block to wait for all the server-side work
+        while len(self.responses):
+            await asyncio.sleep(0.01)
+        self.server.close()
+        await self.server.wait_closed()
+
+
+@pytest.mark.asyncio
+async def test_send_message():
+    server_responses = [None, None]
+
+    async with IPCServer(server_responses) as (port, messages):
+        src = AgentSource("Product Owner", "product-owner")
+        ui = IPCClientUI(LocalIPCConfig(port=port))
+
+        connected = await ui.start()
+        assert connected is True
+        await ui.send_message("Hello from the other side ♫", source=src)
+        await ui.stop()
+
+    assert messages == [
+        {
+            "type": "verbose",
+            "content": "Hello from the other side ♫",
+            "category": "agent:product-owner",
+        },
+        {
+            "type": "exit",
+            "content": None,
+            "category": None,
+        },
+    ]
+
+
+@pytest.mark.asyncio
+async def test_stream():
+    server_responses = [None, None, None]
+
+    async with IPCServer(server_responses) as (port, messages):
+        src = AgentSource("Product Owner", "product-owner")
+        ui = IPCClientUI(LocalIPCConfig(port=port))
+
+        connected = await ui.start()
+        assert connected is True
+
+        for word in ["Hello", "world"]:
+            await ui.send_stream_chunk(word, source=src)
+            await asyncio.sleep(0.01)
+        await ui.stop()
+
+    assert messages == [
+        {
+            "type": "stream",
+            "content": "Hello",
+            "category": "agent:product-owner",
+        },
+        {
+            "type": "stream",
+            "content": "world",
+            "category": "agent:product-owner",
+        },
+        {
+            "type": "exit",
+            "content": None,
+            "category": None,
+        },
+    ]
+
+
+@pytest.mark.asyncio
+async def test_server_not_running():
+    ui = IPCClientUI(LocalIPCConfig(port=1))
+    connected = await ui.start()
+
+    assert connected is False
+
+
+@pytest.mark.asyncio
+async def test_server_closes_connection():
+    async with IPCServer([]) as (port, _messages):
+        ui = IPCClientUI(LocalIPCConfig(port=port))
+
+        connected = await ui.start()
+        assert connected is True
+
+        answer = await ui.ask_question("Hello, how are you?")
+
+        await ui.stop()
+
+    assert answer.cancelled is True
+
+
+@pytest.mark.asyncio
+async def test_ask_question():
+    server_responses = [
+        {
+            "type": "response",
+            "content": "I'm fine, thank you!",
+        },
+        None,
+    ]
+
+    async with IPCServer(server_responses) as (port, _messages):
+        ui = IPCClientUI(LocalIPCConfig(port=port))
+
+        await ui.start()
+        answer = await ui.ask_question("Hello, how are you?")
+
+        await ui.stop()
+
+    assert answer.cancelled is False
+    assert answer.text == "I'm fine, thank you!"
+
+
+@pytest.mark.asyncio
+async def test_ask_question_buttons():
+    server_responses = [
+        {
+            "type": "response",
+            # VSC ext. responds with button label
+            "content": "Yes, I'm sure",
+        },
+        None,
+    ]
+
+    async with IPCServer(server_responses) as (port, _messages):
+        ui = IPCClientUI(LocalIPCConfig(port=port))
+
+        await ui.start()
+        answer = await ui.ask_question(
+            "Are you sure",
+            buttons={
+                "yes": "Yes, I'm sure",
+                "no": "No, cancel",
+            },
+        )
+
+        await ui.stop()
+
+    assert answer.cancelled is False
+    assert answer.button == "yes"
+
+
+@pytest.mark.asyncio
+async def test_ask_question_buttons_only_with_default():
+    server_responses = [
+        {
+            "type": "response",
+            "content": "",
+        },
+        None,
+    ]
+
+    async with IPCServer(server_responses) as (port, _messages):
+        ui = IPCClientUI(LocalIPCConfig(port=port))
+
+        await ui.start()
+        answer = await ui.ask_question(
+            "Are you sure",
+            buttons={
+                "yes": "Yes, I'm sure",
+                "no": "No, cancel",
+            },
+            buttons_only=True,
+            default="no",
+        )
+
+        await ui.stop()
+
+    assert answer.cancelled is False
+    assert answer.button == "no"
+
+
+@pytest.mark.asyncio
+async def test_handle_garbage_response():
+    server_responses = ["", {"incorrect": "payload"}, None]
+
+    async with IPCServer(server_responses) as (port, _messages):
+        ui = IPCClientUI(LocalIPCConfig(port=port))
+
+        await ui.start()
+
+        # These two are only because our fake server expect to receive one and then
+        # send one messages, and ask_question() should ignore the two
+        # incorrectly-formatted responses
+        await ui.send_message("fake1")
+        await ui.send_message("fake2")
+
+        answer = await ui.ask_question("Are you sure")
+
+        await ui.stop()
+
+    assert answer.cancelled is True