Files
10ksim/deployments/deployment/base_experiment.py
PearsonWhite 4fbe63bf18 Add deploy to BaseExperiment and add --dry-run (#132)
* Move some logic for deploying to BaseExperiment

* Add --dry-run to BaseExperiment and kube_utils
2025-09-22 11:49:56 -04:00

225 lines
7.1 KiB
Python
Executable File

import json
import logging
import os
import shutil
from abc import ABC, abstractmethod
from argparse import ArgumentParser, Namespace
from collections import defaultdict
from contextlib import ExitStack
from datetime import datetime, timezone
from pathlib import Path
from typing import Any, Optional
from kubernetes.client import ApiClient
from pydantic import BaseModel, Field
from ruamel import yaml
from ruamel.yaml.comments import CommentedMap
from deployment.builders import build_deployment
from kube_utils import (
dict_get,
get_cleanup,
kubectl_apply,
poll_namespace_has_objects,
wait_for_no_objs_in_namespace,
wait_for_rollout,
)
logger = logging.getLogger(__name__)
class BaseExperiment(ABC, BaseModel):
"""Base experiment that add an ExitStack with `workdir` to `run` and uses an internal `_run`.
How to use:
- Inherit from this class.
- Call `BaseExperiment.add_args` in the child class's `add_parser`
- Implement `_run` in the child class.
"""
events_log_path: Path = Field(default=Path("events.log"))
deployed: dict[str, list] = defaultdict(list)
"""Dict of [namespace : yamls] for every yaml deployed with self.deploy.
Used to determine whether or not to call `_wait_until_clear`."""
@staticmethod
def add_args(subparser: ArgumentParser):
subparser.add_argument(
"--workdir",
type=str,
required=False,
default=None,
help="Folder to use for generating the deployment files.",
)
subparser.add_argument(
"--skip-check",
action="store_true",
required=False,
help="If present, does not wait until the namespace is empty before running the test.",
)
subparser.add_argument(
"--dry-run",
action="store_true",
required=False,
default=False,
help="If True, does not actually deploy kubernetes configs but run kubectl apply --dry-run.",
)
def deploy(
self,
api_client: ApiClient,
stack,
args: Namespace,
values_yaml,
workdir,
service: str,
*,
wait_for_ready=True,
extra_values_paths=None,
timeout=3600,
):
yaml_obj = build_deployment(
deployment_dir=Path(os.path.dirname(__file__)) / service,
workdir=os.path.join(workdir, service),
cli_values=values_yaml,
name=service,
extra_values_names=[],
extra_values_paths=extra_values_paths,
)
required_fields = ["metadata/namespace", "metadata/name", "kind"]
for field in required_fields:
if dict_get(yaml_obj, field) is None:
raise ValueError(
f"Deployment yaml must have an explicit value for field. Field: `{field}`"
)
try:
dry_run = args.dry_run
except AttributeError:
dry_run = False
namespace = yaml_obj["metadata"]["namespace"]
if len(self.deployed[namespace]) == 0:
self._wait_until_clear(
api_client=api_client,
namespace=namespace,
skip_check=args.skip_check,
)
if not dry_run:
cleanup = get_cleanup(
api_client=api_client,
namespace=namespace,
deployments=[yaml_obj],
)
stack.callback(cleanup)
self.log_event(
{"event": "deployment", "phase": "start", "service": service, "namespace": namespace}
)
self.deployed[namespace].append(yaml_obj)
kubectl_apply(yaml_obj, namespace=namespace, dry_run=dry_run)
if not dry_run:
if wait_for_ready:
wait_for_rollout(
yaml_obj["kind"],
yaml_obj["metadata"]["name"],
namespace,
timeout,
api_client,
("Ready", "True"),
)
self.log_event(
{"event": "deployment", "phase": "finished", "service": service, "namespace": namespace}
)
return yaml_obj
def _set_events_log(self, workdir: Optional[str]) -> None:
if self.events_log_path.is_absolute():
return
if not self.events_log_path.is_absolute():
if workdir is None:
raise ValueError(
f"Logging event requires absolute events_log_path or non-None workdir. Path: `{self.events_log_path}` workdir: `{workdir}` experiment type: `{type(self)}`"
)
self.events_log_path = Path(workdir) / self.events_log_path
def run(
self,
api_client: ApiClient,
args: Namespace,
values_yaml: Optional[yaml.YAMLObject],
):
if values_yaml is None:
values_yaml = CommentedMap()
self.deployed.clear()
workdir = args.output_folder
if args.workdir:
workdir = os.path.join(workdir, args.workdir)
with ExitStack() as stack:
stack.callback(lambda: self.log_event("cleanup_finished"))
os.makedirs(workdir, exist_ok=True)
self._set_events_log(workdir)
shutil.copy(args.values_path, os.path.join(workdir, "cli_values.yaml"))
self._run(
api_client=api_client,
workdir=workdir,
args=args,
values_yaml=values_yaml,
stack=stack,
)
stack.callback(lambda: self.log_event("cleanup_start"))
self.log_event("run_finished")
@abstractmethod
def _run(
self,
# TODO [move things into class]: move all into class so they can be accessed more easily and set before calling run?
api_client: ApiClient,
workdir: str,
args: Namespace,
values_yaml: Optional[yaml.YAMLObject],
stack: ExitStack,
):
pass
def _wait_until_clear(self, api_client: ApiClient, namespace: str, skip_check: bool):
# Wait for namespace to be clear unless --skip-check flag was used.
if not skip_check:
self.log_event("wait_for_clear_start")
wait_for_no_objs_in_namespace(namespace=namespace, api_client=api_client)
self.log_event("wait_for_clear_finished")
else:
namepace_is_empty = poll_namespace_has_objects(
namespace=namespace, api_client=api_client
)
if not namepace_is_empty:
logger.warning(f"Namespace is not empty! Namespace: `{namespace}`")
def _preprocess_event(self, event: Any) -> Any:
if isinstance(event, str):
event = {"event": event}
if isinstance(event, dict):
event["timestamp"] = datetime.now(timezone.utc).strftime("%Y-%m-%d %H:%M:%S")
return json.dumps(event)
else:
return event
def log_event(self, event: Any):
out_path = Path(self.events_log_path)
with open(out_path, "a") as out_file:
out_file.write(self._preprocess_event(event))
out_file.write("\n")