Add comments to the runtime_build script (#3333)

This commit is contained in:
mamoodi
2024-08-12 15:17:12 -04:00
committed by GitHub
parent 5226d8d684
commit e2b2f74737
2 changed files with 133 additions and 55 deletions

View File

@@ -1,6 +1,7 @@
DOCKER_REGISTRY=ghcr.io DOCKER_REGISTRY=ghcr.io
DOCKER_ORG=opendevin DOCKER_ORG=opendevin
DOCKER_BASE_DIR="./containers/runtime" DOCKER_BASE_DIR="./containers/runtime"
# These two variables will be appended by the runtime_build.py script # These variables will be appended by the runtime_build.py script
# DOCKER_IMAGE= # DOCKER_IMAGE=
# DOCKER_IMAGE_TAG= # DOCKER_IMAGE_TAG=
# DOCKER_IMAGE_HASH_TAG=

View File

@@ -18,7 +18,11 @@ RUNTIME_IMAGE_REPO = os.getenv(
def _get_package_version(): def _get_package_version():
"""Read the version from pyproject.toml as the other one may be outdated.""" """Read the version from pyproject.toml.
Returns:
- The version specified in pyproject.toml under [tool.poetry]
"""
project_root = os.path.dirname(os.path.dirname(os.path.abspath(opendevin.__file__))) project_root = os.path.dirname(os.path.dirname(os.path.abspath(opendevin.__file__)))
pyproject_path = os.path.join(project_root, 'pyproject.toml') pyproject_path = os.path.join(project_root, 'pyproject.toml')
with open(pyproject_path, 'r') as f: with open(pyproject_path, 'r') as f:
@@ -27,13 +31,15 @@ def _get_package_version():
def _create_project_source_dist(): def _create_project_source_dist():
"""Create a source distribution of the project. Return the path to the tarball.""" """Create a source distribution of the project.
# Copy the project directory to the container
# get the location of "opendevin" package Returns:
- str: The path to the project tarball
"""
project_root = os.path.dirname(os.path.dirname(os.path.abspath(opendevin.__file__))) project_root = os.path.dirname(os.path.dirname(os.path.abspath(opendevin.__file__)))
logger.info(f'Using project root: {project_root}') logger.info(f'Using project root: {project_root}')
# run "python -m build -s" on project_root # run "python -m build -s" on project_root to create project tarball
result = subprocess.run(['python', '-m', 'build', '-s', project_root]) result = subprocess.run(['python', '-m', 'build', '-s', project_root])
if result.returncode != 0: if result.returncode != 0:
logger.error(f'Build failed: {result}') logger.error(f'Build failed: {result}')
@@ -53,12 +59,18 @@ def _create_project_source_dist():
def _put_source_code_to_dir(temp_dir: str): def _put_source_code_to_dir(temp_dir: str):
"""Put the source code of OpenDevin to the temp_dir/code.""" """Builds the project source tarball. Copies it to temp_dir and unpacks it.
The OpenDevin source code ends up in the temp_dir/code directory
Parameters:
- temp_dir (str): The directory to put the source code in
"""
# Build the project source tarball
tarball_path = _create_project_source_dist() tarball_path = _create_project_source_dist()
filename = os.path.basename(tarball_path) filename = os.path.basename(tarball_path)
filename = filename.removesuffix('.tar.gz') filename = filename.removesuffix('.tar.gz')
# move the tarball to temp_dir # Move the project tarball to temp_dir
_res = shutil.copy(tarball_path, os.path.join(temp_dir, 'project.tar.gz')) _res = shutil.copy(tarball_path, os.path.join(temp_dir, 'project.tar.gz'))
if _res: if _res:
os.remove(tarball_path) os.remove(tarball_path)
@@ -66,11 +78,11 @@ def _put_source_code_to_dir(temp_dir: str):
f'Source distribution moved to {os.path.join(temp_dir, "project.tar.gz")}' f'Source distribution moved to {os.path.join(temp_dir, "project.tar.gz")}'
) )
# unzip the tarball # Unzip the tarball
shutil.unpack_archive(os.path.join(temp_dir, 'project.tar.gz'), temp_dir) shutil.unpack_archive(os.path.join(temp_dir, 'project.tar.gz'), temp_dir)
# remove the tarball # Remove the tarball
os.remove(os.path.join(temp_dir, 'project.tar.gz')) os.remove(os.path.join(temp_dir, 'project.tar.gz'))
# rename the directory to the 'code' # Rename the directory containing the code to 'code'
os.rename(os.path.join(temp_dir, filename), os.path.join(temp_dir, 'code')) os.rename(os.path.join(temp_dir, filename), os.path.join(temp_dir, 'code'))
logger.info(f'Unpacked source code directory: {os.path.join(temp_dir, "code")}') logger.info(f'Unpacked source code directory: {os.path.join(temp_dir, "code")}')
@@ -80,13 +92,23 @@ def _generate_dockerfile(
skip_init: bool = False, skip_init: bool = False,
extra_deps: str | None = None, extra_deps: str | None = None,
) -> str: ) -> str:
"""Generate the Dockerfile content for the eventstream runtime image based on user-provided base image.""" """Generate the Dockerfile content for the runtime image based on the base image.
Parameters:
- base_image (str): The base image provided for the runtime image
- skip_init (boolean):
- extra_deps (str):
Returns:
- str: The resulting Dockerfile content
"""
env = Environment( env = Environment(
loader=FileSystemLoader( loader=FileSystemLoader(
searchpath=os.path.join(os.path.dirname(__file__), 'runtime_templates') searchpath=os.path.join(os.path.dirname(__file__), 'runtime_templates')
) )
) )
template = env.get_template('Dockerfile.j2') template = env.get_template('Dockerfile.j2')
dockerfile_content = template.render( dockerfile_content = template.render(
base_image=base_image, base_image=base_image,
skip_init=skip_init, skip_init=skip_init,
@@ -101,11 +123,21 @@ def prep_docker_build_folder(
skip_init: bool = False, skip_init: bool = False,
extra_deps: str | None = None, extra_deps: str | None = None,
) -> str: ) -> str:
"""Prepares the docker build folder by copying the source code and generating the Dockerfile. """Prepares a docker build folder by copying the source code and generating the Dockerfile
Return the MD5 hash of the directory. Parameters:
- dir_path (str): The build folder to place the source code and Dockerfile
- base_image (str): The base Docker image to use for the Dockerfile
- skip_init (str):
- extra_deps (str):
Returns:
- str: The MD5 hash of the build folder directory (dir_path)
""" """
# Copy the source code to directory. It will end up in dir_path/code
_put_source_code_to_dir(dir_path) _put_source_code_to_dir(dir_path)
# Create a Dockerfile and write it to dir_path
dockerfile_content = _generate_dockerfile( dockerfile_content = _generate_dockerfile(
base_image, base_image,
skip_init=skip_init, skip_init=skip_init,
@@ -113,14 +145,15 @@ def prep_docker_build_folder(
) )
logger.info( logger.info(
( (
f'===== Dockerfile content =====\n' f'===== Dockerfile content start =====\n'
f'{dockerfile_content}\n' f'{dockerfile_content}\n'
f'===============================' f'===== Dockerfile content end ====='
) )
) )
with open(os.path.join(dir_path, 'Dockerfile'), 'w') as file: with open(os.path.join(dir_path, 'Dockerfile'), 'w') as file:
file.write(dockerfile_content) file.write(dockerfile_content)
# Get the MD5 hash of the dir_path directory
hash = dirhash(dir_path, 'md5') hash = dirhash(dir_path, 'md5')
logger.info( logger.info(
f'Input base image: {base_image}\n' f'Input base image: {base_image}\n'
@@ -138,23 +171,22 @@ def _build_sandbox_image(
target_image_hash_tag: str, target_image_hash_tag: str,
target_image_tag: str, target_image_tag: str,
) -> str: ) -> str:
"""Build the sandbox image. """Build and tag the sandbox image.
The image will be tagged as both: The image will be tagged as both:
- target_image_repo:target_image_hash_tag - target_image_hash_tag
- target_image_repo:target_image_tag - target_image_tag
Args: Parameters:
docker_folder: str: the path to the docker build folder - docker_folder (str): the path to the docker build folder
docker_client: docker.DockerClient: the docker client - docker_client (docker.DockerClient): the docker client
target_image_repo: str: the repository name for the target image - target_image_repo (str): the repository name for the target image
target_image_hash_tag: str: the *hash* tag for the target image that is calculated based - target_image_hash_tag (str): the *hash* tag for the target image that is calculated based
on the contents of the docker build folder (source code and Dockerfile) on the contents of the docker build folder (source code and Dockerfile)
e.g., ubuntu:latest -> od_runtime:1234567890abcdef e.g. 1234567890abcdef
target_image_tag: str: the tag for the target image that's generic and based on the base image name -target_image_tag (str): the tag for the target image that's generic and based on the base image name
e.g., ubuntu:latest -> od_runtime:ubuntu_tag_latest e.g. od_v0.8.3_image_ubuntu_tag_22.04
""" """
# 1. Always directly build and tag using the dir_hash # Build the Docker image and tag it with the hash (target_image_hash_tag)
target_image_hash_name = f'{target_image_repo}:{target_image_hash_tag}' target_image_hash_name = f'{target_image_repo}:{target_image_hash_tag}'
try: try:
build_logs = docker_client.api.build( build_logs = docker_client.api.build(
@@ -175,7 +207,7 @@ def _build_sandbox_image(
else: else:
logger.info(str(log)) logger.info(str(log))
# 2. Re-tag the image with a more generic tag (as somewhat of "latest" tag) # Re-tag the image with the target_image_tag
logger.info(f'Image [{target_image_hash_name}] build finished.') logger.info(f'Image [{target_image_hash_name}] build finished.')
image = docker_client.images.get(target_image_hash_name) image = docker_client.images.get(target_image_hash_name)
image.tag(target_image_repo, target_image_tag) image.tag(target_image_repo, target_image_tag)
@@ -183,24 +215,32 @@ def _build_sandbox_image(
f'Re-tagged image [{target_image_hash_name}] with more generic tag [{target_image_tag}]' f'Re-tagged image [{target_image_hash_name}] with more generic tag [{target_image_tag}]'
) )
# check if the image is built successfully # Check if the image is built successfully
image = docker_client.images.get(target_image_hash_name) image = docker_client.images.get(target_image_hash_name)
if image is None: if image is None:
raise RuntimeError( raise RuntimeError(f'Build failed: Image {target_image_hash_name} not found')
f'Build failed: Image [{target_image_repo}:{target_image_hash_tag}] not found'
)
logger.info( logger.info(
f'Image [{target_image_repo}:{target_image_hash_tag}] (hash: [{target_image_tag}]) built successfully' f'Image {target_image_repo} with tags [{target_image_hash_tag}, {target_image_tag}] built successfully'
) )
return target_image_hash_name return target_image_hash_name
def get_runtime_image_repo_and_tag(base_image: str) -> tuple[str, str]: def get_runtime_image_repo_and_tag(base_image: str) -> tuple[str, str]:
"""Retrieves the Docker repo and tag associated with the Docker image.
Parameters:
- base_image (str): The name of the base Docker image
Returns:
- tuple[str, str]: The Docker repo and tag of the Docker image
"""
if RUNTIME_IMAGE_REPO in base_image: if RUNTIME_IMAGE_REPO in base_image:
logger.info( logger.info(
f'The provided image [{base_image}] is a already a valid od_runtime image.\n' f'The provided image [{base_image}] is a already a valid od_runtime image.\n'
f'Will try to reuse it as is.' f'Will try to reuse it as is.'
) )
if ':' not in base_image: if ':' not in base_image:
base_image = base_image + ':latest' base_image = base_image + ':latest'
repo, tag = base_image.split(':') repo, tag = base_image.split(':')
@@ -217,9 +257,14 @@ def get_runtime_image_repo_and_tag(base_image: str) -> tuple[str, str]:
def _check_image_exists(image_name: str, docker_client: docker.DockerClient) -> bool: def _check_image_exists(image_name: str, docker_client: docker.DockerClient) -> bool:
"""Check if the image exists in the registry (try to pull it first) AND in the local store. """Check if the image exists in the registry (try to pull it first) AND in the local store.
image_name is f'{repo}:{tag}' Parameters:
- image_name (str): The Docker image to check (<image repo>:<image tag>)
- docker_client (docker.DockerClient): The Docker client
Returns:
- bool: Whether the Docker image exists in the registry and in the local store
""" """
# Try to pull the new image from the registry # Try to pull the Docker image from the registry
try: try:
docker_client.images.pull(image_name) docker_client.images.pull(image_name)
except Exception: except Exception:
@@ -241,12 +286,22 @@ def build_runtime_image(
dry_run: bool = False, dry_run: bool = False,
force_rebuild: bool = False, force_rebuild: bool = False,
) -> str: ) -> str:
"""Build the runtime image for the OpenDevin runtime. """Prepares the final docker build folder.
If dry_run is False, it will also build the OpenDevin runtime Docker image using the docker build folder.
Parameters:
- base_image (str): The name of the base Docker image to use
- docker_client (docker.DockerClient): The Docker client
- extra_deps (str):
- docker_build_folder (str): The directory to use for the build. If not provided a temporary directory will be used
- dry_run (bool): if True, it will only ready the build folder. It will not actually build the Docker image
- force_rebuild (bool): if True, it will create the Dockerfile which uses the base_image
Returns:
- str: <image_repo>:<MD5 hash>. Where MD5 hash is the hash of the docker build folder
See https://docs.all-hands.dev/modules/usage/runtime for more details. See https://docs.all-hands.dev/modules/usage/runtime for more details.
""" """
runtime_image_repo, runtime_image_tag = get_runtime_image_repo_and_tag(base_image)
# Calculate the hash for the docker build folder (source code and Dockerfile) # Calculate the hash for the docker build folder (source code and Dockerfile)
with tempfile.TemporaryDirectory() as temp_dir: with tempfile.TemporaryDirectory() as temp_dir:
from_scratch_hash = prep_docker_build_folder( from_scratch_hash = prep_docker_build_folder(
@@ -256,34 +311,35 @@ def build_runtime_image(
extra_deps=extra_deps, extra_deps=extra_deps,
) )
# hash image name, if the hash matches, it means the image is already runtime_image_repo, runtime_image_tag = get_runtime_image_repo_and_tag(base_image)
# built from scratch with the *exact SAME source code* on the exact Dockerfile
# The image name in the format <image repo>:<hash>
hash_runtime_image_name = f'{runtime_image_repo}:{from_scratch_hash}' hash_runtime_image_name = f'{runtime_image_repo}:{from_scratch_hash}'
# non-hash generic image name, it could contains *similar* dependencies # non-hash generic image name, it could contain *similar* dependencies
# but *might* not exactly match the state of the source code. # but *might* not exactly match the state of the source code.
# It resembles the "latest" tag in the docker image naming convention for # It resembles the "latest" tag in the docker image naming convention for
# a particular {repo}:{tag} pair (e.g., ubuntu:latest -> od_runtime:ubuntu_tag_latest) # a particular {repo}:{tag} pair (e.g., ubuntu:latest -> od_runtime:ubuntu_tag_latest)
# we will build from IT to save time if the `from_scratch_hash` is not found # we will build from IT to save time if the `from_scratch_hash` is not found
generic_runtime_image_name = f'{runtime_image_repo}:{runtime_image_tag}' generic_runtime_image_name = f'{runtime_image_repo}:{runtime_image_tag}'
# 1. If the image exists with the same hash, we will reuse it as is # Scenario 1: If we already have an image with the exact same hash, then it means the image is already built
# with the exact same source code and Dockerfile, so we will reuse it. Building it is not required.
if _check_image_exists(hash_runtime_image_name, docker_client): if _check_image_exists(hash_runtime_image_name, docker_client):
logger.info( logger.info(
f'Image [{hash_runtime_image_name}] exists with matched hash for Docker build folder.\n' f'Image [{hash_runtime_image_name}] already exists so we will reuse it.'
'Will reuse it as is.'
) )
return hash_runtime_image_name return hash_runtime_image_name
# 2. If the exact hash is not found, we will FIRST try to re-build it # Scenario 2: If a Docker image with the exact hash is not found, we will FIRST try to re-build it
# by leveraging the non-hash `generic_runtime_image_name` to save some time # by leveraging the `generic_runtime_image_name` to save some time
# from re-building the dependencies (e.g., poetry install, apt install) # from re-building the dependencies (e.g., poetry install, apt install)
elif ( elif (
_check_image_exists(generic_runtime_image_name, docker_client) _check_image_exists(generic_runtime_image_name, docker_client)
and not force_rebuild and not force_rebuild
): ):
logger.info( logger.info(
f'Cannot find matched hash for image [{hash_runtime_image_name}]\n' f'Cannot find docker Image [{hash_runtime_image_name}]\n'
f'Will try to re-build it from latest [{generic_runtime_image_name}] image to potentially save ' f'Will try to re-build it from latest [{generic_runtime_image_name}] image to potentially save '
f'time for dependencies installation.\n' f'time for dependencies installation.\n'
) )
@@ -297,6 +353,7 @@ def build_runtime_image(
skip_init=True, # skip init since we are re-using the existing image skip_init=True, # skip init since we are re-using the existing image
extra_deps=extra_deps, extra_deps=extra_deps,
) )
assert ( assert (
_skip_init_hash != from_scratch_hash _skip_init_hash != from_scratch_hash
), f'The skip_init hash [{_skip_init_hash}] should not match the existing hash [{from_scratch_hash}]' ), f'The skip_init hash [{_skip_init_hash}] should not match the existing hash [{from_scratch_hash}]'
@@ -317,16 +374,18 @@ def build_runtime_image(
logger.info( logger.info(
f'Dry run: Skipping image build for [{generic_runtime_image_name}]' f'Dry run: Skipping image build for [{generic_runtime_image_name}]'
) )
if docker_build_folder is None: if docker_build_folder is None:
shutil.rmtree(cur_docker_build_folder) shutil.rmtree(cur_docker_build_folder)
# 3. If the image is not found AND we cannot re-use the non-hash latest relavant image, # Scenario 3: If the Docker image with the required hash is not found AND we cannot re-use the latest
# we will build it completely from scratch # relevant image, we will build it completely from scratch
else: else:
if force_rebuild: if force_rebuild:
logger.info( logger.info(
f'Force re-build: Will try to re-build image [{generic_runtime_image_name}] from scratch.\n' f'Force re-build: Will try to re-build image [{generic_runtime_image_name}] from scratch.\n'
) )
cur_docker_build_folder = docker_build_folder or tempfile.mkdtemp() cur_docker_build_folder = docker_build_folder or tempfile.mkdtemp()
_new_from_scratch_hash = prep_docker_build_folder( _new_from_scratch_hash = prep_docker_build_folder(
cur_docker_build_folder, cur_docker_build_folder,
@@ -368,17 +427,27 @@ if __name__ == '__main__':
args = parser.parse_args() args = parser.parse_args()
if args.build_folder is not None: if args.build_folder is not None:
# If a build_folder is provided, we do not actually build the Docker image. We copy the necessary source code
# and create a Dockerfile dynamically and place it in the build_folder only. This allows the Docker image to
# then be created using the Dockerfile (most likely using the containers/build.sh script)
build_folder = args.build_folder build_folder = args.build_folder
assert os.path.exists( assert os.path.exists(
build_folder build_folder
), f'Build folder {build_folder} does not exist' ), f'Build folder {build_folder} does not exist'
logger.info( logger.info(
f'Will prepare a build folder by copying the source code and generating the Dockerfile: {build_folder}' f'Copying the source code and generating the Dockerfile in the build folder: {build_folder}'
) )
runtime_image_repo, runtime_image_tag = get_runtime_image_repo_and_tag( runtime_image_repo, runtime_image_tag = get_runtime_image_repo_and_tag(
args.base_image args.base_image
) )
logger.info(
f'Runtime image repo: {runtime_image_repo} and runtime image tag: {runtime_image_tag}'
)
with tempfile.TemporaryDirectory() as temp_dir: with tempfile.TemporaryDirectory() as temp_dir:
# dry_run is true so we only prepare a temp_dir containing the required source code and the Dockerfile. We
# then obtain the MD5 hash of the folder and return <image_repo>:<temp_dir_md5_hash>
runtime_image_hash_name = build_runtime_image( runtime_image_hash_name = build_runtime_image(
args.base_image, args.base_image,
docker_client=docker.from_env(), docker_client=docker.from_env(),
@@ -386,15 +455,19 @@ if __name__ == '__main__':
dry_run=True, dry_run=True,
force_rebuild=args.force_rebuild, force_rebuild=args.force_rebuild,
) )
_runtime_image_repo, runtime_image_hash_tag = runtime_image_hash_name.split( _runtime_image_repo, runtime_image_hash_tag = runtime_image_hash_name.split(
':' ':'
) )
# Move contents of temp_dir to build_folder # Move contents of temp_dir to build_folder
shutil.copytree(temp_dir, build_folder, dirs_exist_ok=True) shutil.copytree(temp_dir, build_folder, dirs_exist_ok=True)
logger.info( logger.info(
f'Build folder [{build_folder}] is ready: {os.listdir(build_folder)}' f'Build folder [{build_folder}] is ready: {os.listdir(build_folder)}'
) )
# We now update the config.sh in the build_folder to contain the required values. This is used in the
# containers/build.sh script which is called to actually build the Docker image
with open(os.path.join(build_folder, 'config.sh'), 'a') as file: with open(os.path.join(build_folder, 'config.sh'), 'a') as file:
file.write( file.write(
( (
@@ -405,10 +478,14 @@ if __name__ == '__main__':
) )
) )
logger.info( logger.info(
f'`config.sh` is updated with the new image name [{runtime_image_repo}] and tag [{runtime_image_tag}, {runtime_image_hash_tag}]' f'`config.sh` is updated with the image repo[{runtime_image_repo}] and tags [{runtime_image_tag}, {runtime_image_hash_tag}]'
)
logger.info(
f'Dockerfile, source code and config.sh are ready in {build_folder}'
) )
logger.info(f'Dockerfile and source distribution are ready in {build_folder}')
else: else:
# If a build_folder is not provided, after copying the required source code and dynamically creating the
# Dockerfile, we actually build the Docker image
logger.info('Building image in a temporary folder') logger.info('Building image in a temporary folder')
client = docker.from_env() client = docker.from_env()
image_name = build_runtime_image(args.base_image, client) image_name = build_runtime_image(args.base_image, client)