Files
gpt-pilot/core/proc/process_manager.py
Senko Rasic 5b474ccc1f merge gpt-pilot 0.2 codebase
This is a complete rewrite of the GPT Pilot core, from the ground
up, making the agentic architecture front and center, and also
fixing some long-standing problems with the database architecture
that weren't feasible to solve without breaking compatibility.

As the database structure and config file syntax have changed,
we have automatic imports for projects and current configs,
see the README.md file for details.

This also relicenses the project to FSL-1.1-MIT license.
2024-05-22 21:42:25 +02:00

279 lines
8.8 KiB
Python

import asyncio
import signal
import sys
import time
from dataclasses import dataclass
from os import getenv
from os.path import abspath, join
from typing import Callable, Optional
from uuid import UUID, uuid4
import psutil
from core.log import get_logger
log = get_logger(__name__)
NONBLOCK_READ_TIMEOUT = 0.01
BUSY_WAIT_INTERVAL = 0.1
WATCHER_IDLE_INTERVAL = 1.0
MAX_COMMAND_TIMEOUT = 180
@dataclass
class LocalProcess:
id: UUID
cmd: str
cwd: str
env: dict[str, str]
stdout: str
stderr: str
_process: asyncio.subprocess.Process
def __hash__(self) -> int:
return hash(self.id)
@staticmethod
async def start(
cmd: str,
*,
cwd: str = ".",
env: dict[str, str],
bg: bool = False,
) -> "LocalProcess":
log.debug(f"Starting process: {cmd} (cwd={cwd}, env={env})")
_process = await asyncio.create_subprocess_shell(
cmd,
cwd=cwd,
env=env,
start_new_session=bg,
stdin=asyncio.subprocess.PIPE,
stdout=asyncio.subprocess.PIPE,
stderr=asyncio.subprocess.PIPE,
)
if bg:
_process.stdin.close()
return LocalProcess(
id=uuid4(),
cmd=cmd,
cwd=cwd,
env=env,
stdout="",
stderr="",
_process=_process,
)
async def wait(self, timeout: Optional[float] = None) -> int:
try:
future = self._process.wait()
if timeout:
future = asyncio.wait_for(future, timeout)
retcode = await future
except asyncio.TimeoutError:
log.debug(f"Process {self.cmd} still running after {timeout}s, terminating")
await self.terminate()
# FIXME: this may still hang if we don't manage to kill the process.
retcode = await self._process.wait()
await self.read_output()
return retcode
@staticmethod
async def _nonblock_read(reader: asyncio.StreamReader, timeout: float) -> str:
"""
Reads data from a stream reader without blocking (for long).
This wraps the read in a (short) timeout to avoid blocking the event loop for too long.
:param reader: Async stream reader to read from.
:param timeout: Timeout for the read operation (should not be too long).
:return: Data read from the stream reader, or empty string.
"""
try:
data = await asyncio.wait_for(reader.read(), timeout)
return data.decode("utf-8", errors="ignore")
except asyncio.TimeoutError:
return ""
async def read_output(self, timeout: float = NONBLOCK_READ_TIMEOUT) -> tuple[str, str]:
new_stdout = await self._nonblock_read(self._process.stdout, timeout)
new_stderr = await self._nonblock_read(self._process.stderr, timeout)
self.stdout += new_stdout
self.stderr += new_stderr
return (new_stdout, new_stderr)
async def _terminate_process_tree(self, signal: int):
# This is a recursive function that terminates the entire process tree
# of the current process. It first terminates all child processes, then
# terminates itself.
shell_process = psutil.Process(self._process.pid)
processes = shell_process.children(recursive=True)
processes.append(shell_process)
for proc in processes:
try:
proc.send_signal(signal)
except psutil.NoSuchProcess:
pass
psutil.wait_procs(processes, timeout=1)
async def terminate(self, kill: bool = True):
if kill and sys.platform != "win32":
await self._terminate_process_tree(signal.SIGKILL)
else:
# Windows doesn't have SIGKILL
await self._terminate_process_tree(signal.SIGTERM)
@property
def is_running(self) -> bool:
try:
return psutil.Process(self._process.pid).is_running()
except psutil.NoSuchProcess:
return False
@property
def pid(self) -> int:
return self._process.pid
class ProcessManager:
def __init__(
self,
*,
root_dir: str,
env: Optional[dict[str, str]] = None,
output_handler: Optional[Callable] = None,
exit_handler: Optional[Callable] = None,
):
if env is None:
env = {
"PATH": getenv("PATH"),
}
self.processes: dict[UUID, LocalProcess] = {}
self.default_env = env
self.root_dir = root_dir
self.watcher_should_run = True
self.watcher_task = asyncio.create_task(self.watcher())
self.output_handler = output_handler
self.exit_handler = exit_handler
async def stop_watcher(self):
"""
Stop the process watcher.
This should only be done when the ProcessManager is no longer needed.
"""
if not self.watcher_should_run:
raise ValueError("Process watcher is not running")
self.watcher_should_run = False
await self.watcher_task
async def watcher(self):
"""
Watch over the processes and manage their output and lifecycle.
This is a separate coroutine running independently of the caller
coroutine.
"""
# IDs of processes whos output has been fully read after they finished
complete_processes = set()
while self.watcher_should_run:
procs = [p for p in self.processes.values() if p.id not in complete_processes]
if len(procs) == 0:
await asyncio.sleep(WATCHER_IDLE_INTERVAL)
continue
for process in procs:
out, err = await process.read_output()
if self.output_handler and (out or err):
await self.output_handler(out, err)
if not process.is_running:
# We're not removing the complete process from the self.processes
# list to give time to the rest of the system to read its outputs
complete_processes.add(process.id)
if self.exit_handler:
await self.exit_handler(process)
# Sleep a bit to avoid busy-waiting
await asyncio.sleep(BUSY_WAIT_INTERVAL)
async def start_process(
self,
cmd: str,
*,
cwd: str = ".",
env: Optional[dict[str, str]] = None,
bg: bool = True,
) -> LocalProcess:
env = {**self.default_env, **(env or {})}
abs_cwd = abspath(join(self.root_dir, cwd))
process = await LocalProcess.start(cmd, cwd=abs_cwd, env=env, bg=bg)
if bg:
self.processes[process.id] = process
return process
async def run_command(
self,
cmd: str,
*,
cwd: str = ".",
env: Optional[dict[str, str]] = None,
timeout: float = MAX_COMMAND_TIMEOUT,
) -> tuple[Optional[int], str, str]:
"""
Run command and wait for it to finish.
Status code is an integer representing the process exit code, or
None if the process timed out and was terminated.
:param cmd: Command to run.
:param cwd: Working directory.
:param env: Environment variables.
:param timeout: Timeout in seconds.
:return: Tuple of (status code, stdout, stderr).
"""
timeout = min(timeout, MAX_COMMAND_TIMEOUT)
terminated = False
process = await self.start_process(cmd, cwd=cwd, env=env, bg=False)
t0 = time.time()
while process.is_running and (time.time() - t0) < timeout:
out, err = await process.read_output(BUSY_WAIT_INTERVAL)
if self.output_handler and (out or err):
await self.output_handler(out, err)
if process.is_running:
log.debug(f"Process {cmd} still running after {timeout}s, terminating")
await process.terminate()
terminated = True
else:
await process.wait()
out, err = await process.read_output()
if self.output_handler and (out or err):
await self.output_handler(out, err)
if terminated:
status_code = None
else:
status_code = process._process.returncode or 0
return (status_code, process.stdout, process.stderr)
def list_running_processes(self):
return [p for p in self.processes.values() if p.is_running]
async def terminate_process(self, process_id: UUID) -> tuple[str, str]:
if process_id not in self.processes:
raise ValueError(f"Process {process_id} not found")
process = self.processes[process_id]
await process.terminate(kill=False)
del self.processes[process_id]
return (process.stdout, process.stderr)