gpt-pilot/core/proc/process_manager.py

import asyncio
import signal
import sys
import time
from dataclasses import dataclass
from os import getenv
from os.path import abspath, join
from typing import Callable, Optional
from uuid import UUID, uuid4

import psutil

from core.log import get_logger

log = get_logger(__name__)

NONBLOCK_READ_TIMEOUT = 0.01
BUSY_WAIT_INTERVAL = 0.1
WATCHER_IDLE_INTERVAL = 1.0
MAX_COMMAND_TIMEOUT = 180


@dataclass
class LocalProcess:
    id: UUID
    cmd: str
    cwd: str
    env: dict[str, str]
    stdout: str
    stderr: str
    _process: asyncio.subprocess.Process

    def __hash__(self) -> int:
        return hash(self.id)

    @staticmethod
    async def start(
        cmd: str,
        *,
        cwd: str = ".",
        env: dict[str, str],
        bg: bool = False,
    ) -> "LocalProcess":
        log.debug(f"Starting process: {cmd} (cwd={cwd}, env={env})")
        _process = await asyncio.create_subprocess_shell(
            cmd,
            cwd=cwd,
            env=env,
            start_new_session=bg,
            stdin=asyncio.subprocess.PIPE,
            stdout=asyncio.subprocess.PIPE,
            stderr=asyncio.subprocess.PIPE,
        )
        if bg:
            _process.stdin.close()

        return LocalProcess(
            id=uuid4(),
            cmd=cmd,
            cwd=cwd,
            env=env,
            stdout="",
            stderr="",
            _process=_process,
        )

    async def wait(self, timeout: Optional[float] = None) -> int:
        try:
            future = self._process.wait()
            if timeout:
                future = asyncio.wait_for(future, timeout)
            retcode = await future
        except asyncio.TimeoutError:
            log.debug(f"Process {self.cmd} still running after {timeout}s, terminating")
            await self.terminate()
            # FIXME: this may still hang if we don't manage to kill the process.
            retcode = await self._process.wait()

        await self.read_output()
        return retcode

    @staticmethod
    async def _nonblock_read(reader: asyncio.StreamReader, timeout: float) -> str:
        """
        Reads data from a stream reader without blocking (for long).

        This wraps the read in a (short) timeout to avoid blocking the event loop for too long.

        :param reader: Async stream reader to read from.
        :param timeout: Timeout for the read operation (should not be too long).
        :return: Data read from the stream reader, or empty string.
        """
        try:
            data = await asyncio.wait_for(reader.read(), timeout)
            return data.decode("utf-8", errors="ignore")
        except asyncio.TimeoutError:
            return ""

    async def read_output(self, timeout: float = NONBLOCK_READ_TIMEOUT) -> tuple[str, str]:
        new_stdout = await self._nonblock_read(self._process.stdout, timeout)
        new_stderr = await self._nonblock_read(self._process.stderr, timeout)
        self.stdout += new_stdout
        self.stderr += new_stderr
        return (new_stdout, new_stderr)

    async def _terminate_process_tree(self, signal: int):
        # This is a recursive function that terminates the entire process tree
        # of the current process. It first terminates all child processes, then
        # terminates itself.
        shell_process = psutil.Process(self._process.pid)
        processes = shell_process.children(recursive=True)
        processes.append(shell_process)
        for proc in processes:
            try:
                proc.send_signal(signal)
            except psutil.NoSuchProcess:
                pass

        psutil.wait_procs(processes, timeout=1)

    async def terminate(self, kill: bool = True):
        if kill and sys.platform != "win32":
            await self._terminate_process_tree(signal.SIGKILL)
        else:
            # Windows doesn't have SIGKILL
            await self._terminate_process_tree(signal.SIGTERM)

    @property
    def is_running(self) -> bool:
        try:
            return psutil.Process(self._process.pid).is_running()
        except psutil.NoSuchProcess:
            return False

    @property
    def pid(self) -> int:
        return self._process.pid


class ProcessManager:
    def __init__(
        self,
        *,
        root_dir: str,
        env: Optional[dict[str, str]] = None,
        output_handler: Optional[Callable] = None,
        exit_handler: Optional[Callable] = None,
    ):
        if env is None:
            env = {
                "PATH": getenv("PATH"),
            }
        self.processes: dict[UUID, LocalProcess] = {}
        self.default_env = env
        self.root_dir = root_dir
        self.watcher_should_run = True
        self.watcher_task = asyncio.create_task(self.watcher())
        self.output_handler = output_handler
        self.exit_handler = exit_handler

    async def stop_watcher(self):
        """
        Stop the process watcher.

        This should only be done when the ProcessManager is no longer needed.
        """
        if not self.watcher_should_run:
            raise ValueError("Process watcher is not running")

        self.watcher_should_run = False
        await self.watcher_task

    async def watcher(self):
        """
        Watch over the processes and manage their output and lifecycle.

        This is a separate coroutine running independently of the caller
        coroutine.
        """
        # IDs of processes whos output has been fully read after they finished
        complete_processes = set()

        while self.watcher_should_run:
            procs = [p for p in self.processes.values() if p.id not in complete_processes]
            if len(procs) == 0:
                await asyncio.sleep(WATCHER_IDLE_INTERVAL)
                continue

            for process in procs:
                out, err = await process.read_output()
                if self.output_handler and (out or err):
                    await self.output_handler(out, err)

                if not process.is_running:
                    # We're not removing the complete process from the self.processes
                    # list to give time to the rest of the system to read its outputs
                    complete_processes.add(process.id)
                    if self.exit_handler:
                        await self.exit_handler(process)

            # Sleep a bit to avoid busy-waiting
            await asyncio.sleep(BUSY_WAIT_INTERVAL)

    async def start_process(
        self,
        cmd: str,
        *,
        cwd: str = ".",
        env: Optional[dict[str, str]] = None,
        bg: bool = True,
    ) -> LocalProcess:
        env = {**self.default_env, **(env or {})}
        abs_cwd = abspath(join(self.root_dir, cwd))
        process = await LocalProcess.start(cmd, cwd=abs_cwd, env=env, bg=bg)
        if bg:
            self.processes[process.id] = process
        return process

    async def run_command(
        self,
        cmd: str,
        *,
        cwd: str = ".",
        env: Optional[dict[str, str]] = None,
        timeout: float = MAX_COMMAND_TIMEOUT,
    ) -> tuple[Optional[int], str, str]:
        """
        Run command and wait for it to finish.

        Status code is an integer representing the process exit code, or
        None if the process timed out and was terminated.

        :param cmd: Command to run.
        :param cwd: Working directory.
        :param env: Environment variables.
        :param timeout: Timeout in seconds.
        :return: Tuple of (status code, stdout, stderr).
        """
        timeout = min(timeout, MAX_COMMAND_TIMEOUT)
        terminated = False
        process = await self.start_process(cmd, cwd=cwd, env=env, bg=False)

        t0 = time.time()
        while process.is_running and (time.time() - t0) < timeout:
            out, err = await process.read_output(BUSY_WAIT_INTERVAL)
            if self.output_handler and (out or err):
                await self.output_handler(out, err)

        if process.is_running:
            log.debug(f"Process {cmd} still running after {timeout}s, terminating")
            await process.terminate()
            terminated = True
        else:
            await process.wait()

        out, err = await process.read_output()
        if self.output_handler and (out or err):
            await self.output_handler(out, err)

        if terminated:
            status_code = None
        else:
            status_code = process._process.returncode or 0

        return (status_code, process.stdout, process.stderr)

    def list_running_processes(self):
        return [p for p in self.processes.values() if p.is_running]

    async def terminate_process(self, process_id: UUID) -> tuple[str, str]:
        if process_id not in self.processes:
            raise ValueError(f"Process {process_id} not found")

        process = self.processes[process_id]
        await process.terminate(kill=False)
        del self.processes[process_id]

        return (process.stdout, process.stderr)