diff --git a/containers/sandbox/Dockerfile b/containers/sandbox/Dockerfile index 13563aaa09..6da20b7dfe 100644 --- a/containers/sandbox/Dockerfile +++ b/containers/sandbox/Dockerfile @@ -22,6 +22,7 @@ RUN apt-get update && apt-get install -y \ g++ \ make \ iproute2 \ + libgl1-mesa-glx \ && rm -rf /var/lib/apt/lists/* RUN mkdir -p -m0755 /var/run/sshd @@ -32,3 +33,5 @@ RUN ln -s /usr/bin/python3 /usr/bin/python # install basic dependencies for CodeActAgent RUN pip3 install --upgrade pip RUN pip3 install jupyterlab notebook jupyter_kernel_gateway flake8 +# TODO: those dependencies are needed for agentskills, we should pack them in a new sandbox image +RUN pip3 install python-docx PyPDF2 python-pptx pylatexenc openai opencv-python diff --git a/opendevin/runtime/plugins/agent_skills/agentskills.py b/opendevin/runtime/plugins/agent_skills/agentskills.py index 0351c1bd09..bd36ffab20 100644 --- a/opendevin/runtime/plugins/agent_skills/agentskills.py +++ b/opendevin/runtime/plugins/agent_skills/agentskills.py @@ -20,12 +20,29 @@ import subprocess from inspect import signature from typing import Optional +import base64 +import PyPDF2 +import docx +from pptx import Presentation +from pylatexenc.latex2text import LatexNodes2Text +from openai import OpenAI + CURRENT_FILE = None CURRENT_LINE = 1 WINDOW = 100 ENABLE_AUTO_LINT = os.getenv('ENABLE_AUTO_LINT', 'false').lower() == 'true' +# OPENAI +OPENAI_API_KEY = os.getenv('OPENAI_API_KEY', '') +OPENAI_BASE_URL = os.getenv('OPENAI_BASE_URL', 'https://api.openai.com/v1') +OPENAI_MODEL = os.getenv('OPENAI_MODEL', 'gpt-4o-2024-05-13') +MAX_TOKEN = os.getenv('MAX_TOKEN', 500) + +OPENAI_PROXY = f'{OPENAI_BASE_URL}/chat/completions' + +client = OpenAI(api_key=OPENAI_API_KEY, base_url=OPENAI_BASE_URL) + def _lint_file(file_path: str) -> Optional[str]: """ @@ -103,9 +120,9 @@ def open_file(path: str, line_number: Optional[int] = None) -> None: if line_number is not None: if ( - not isinstance(line_number, int) - or line_number < 1 - or line_number > total_lines + not isinstance(line_number, int) + or line_number < 1 + or line_number > total_lines ): raise ValueError(f'Line number must be between 1 and {total_lines}') CURRENT_LINE = line_number @@ -261,7 +278,7 @@ def edit_file(start: int, end: int, content: str) -> None: # recover the original file with open(original_file_backup_path, 'r') as fin, open( - CURRENT_FILE, 'w' + CURRENT_FILE, 'w' ) as fout: fout.write(fin.read()) os.remove(original_file_backup_path) @@ -379,7 +396,203 @@ def find_file(file_name: str, dir_path: str = './') -> None: print(f'[No matches found for "{file_name}" in {dir_path}]') +def parse_pdf(file_path: str) -> None: + """Parses the content of a PDF file and prints it. + + Args: + file_path: str: The path to the file to open. + """ + print(f'[Reading PDF file from {file_path}]') + content = PyPDF2.PdfReader(file_path) + text = '' + for page_idx in range(len(content.pages)): + text += f'@@ Page {page_idx + 1} @@\n' + content.pages[page_idx].extract_text() + f'\n\n' + print(text.strip()) + + +def parse_docx(file_path: str) -> None: + """ + Parses the content of a DOCX file and prints it. + + Args: + file_path: str: The path to the file to open. + """ + print(f'[Reading DOCX file from {file_path}]') + content = docx.Document(file_path) + text = '' + for i, para in enumerate(content.paragraphs): + text += f'@@ Page {i + 1} @@\n' + para.text + f'\n\n' + print(text) + + +def parse_latex(file_path: str) -> None: + """ + Parses the content of a LaTex file and prints it. + + Args: + file_path: str: The path to the file to open. + """ + print(f'[Reading LaTex file from {file_path}]') + with open(file_path, 'r') as f: + data = f.read() + text = LatexNodes2Text().latex_to_text(data) + print(text.strip()) + + +def _base64_img(file_path: str) -> str: + with open(file_path, 'rb') as image_file: + encoded_image = base64.b64encode(image_file.read()).decode('utf-8') + return encoded_image + + +def _base64_video(file_path: str, frame_interval: int = 10) -> list[str]: + import cv2 + video = cv2.VideoCapture(file_path) + base64_frames = [] + frame_count = 0 + while video.isOpened(): + success, frame = video.read() + if not success: + break + if frame_count % frame_interval == 0: + _, buffer = cv2.imencode('.jpg', frame) + base64_frames.append(base64.b64encode(buffer).decode('utf-8')) + frame_count += 1 + video.release() + return base64_frames + + +def _prepare_image_messages(task: str, base64_image: str): + return [ + { + 'role': 'user', + 'content': [ + {'type': 'text', 'text': task}, + { + 'type': 'image_url', + 'image_url': { + 'url': f'data:image/jpeg;base64,{base64_image}' + }, + }, + ], + } + ] + + +def parse_audio(file_path: str, model: str = 'whisper-1') -> None: + """ + Parses the content of an audio file and prints it. + + Args: + file_path: str: The path to the audio file to transcribe. + model: Optional[str]: The audio model to use for transcription. Defaults to 'whisper-1'. + """ + print(f'[Transcribing audio file from {file_path}]') + try: + # TODO: record the COST of the API call + with open(file_path, 'rb') as audio_file: + transcript = client.audio.translations.create( + model=model, file=audio_file + ) + print(transcript.text) + + except Exception as e: + print(f'Error transcribing audio file: {e}') + + +def parse_image(file_path: str, task: str = 'Describe this image as detail as possible.') -> None: + """ + Parses the content of an image file and prints the description. + + Args: + file_path: str: The path to the file to open. + task: Optional[str]: The task description for the API call. Defaults to 'Describe this image as detail as possible.'. + """ + print(f'[Reading image file from {file_path}]') + # TODO: record the COST of the API call + try: + base64_image = _base64_img(file_path) + response = client.chat.completions.create( + model=OPENAI_MODEL, + messages=_prepare_image_messages(task, base64_image), + max_tokens=MAX_TOKEN + ) + content = response.choices[0].message.content + print(content) + + except Exception as error: + print(f'Error with the request: {error}') + + +def parse_video(file_path: str, + task: str = 'Describe this image as detail as possible.', + frame_interval: int = 30) -> None: + """ + Parses the content of an image file and prints the description. + + Args: + file_path: str: The path to the video file to open. + task: Optional[str]: The task description for the API call. Defaults to 'Describe this image as detail as possible.'. + frame_interval: Optional[int]: The interval between frames to analyze. Defaults to 30. + + """ + print(f'[Processing video file from {file_path} with frame interval {frame_interval}]') + + video_summary = '' + task = task or 'This is one frame from a video, please summarize this frame.' + base64_frames = _base64_video(file_path) + selected_frames = base64_frames[::frame_interval] + + if len(selected_frames) > 30: + new_interval = len(base64_frames) // 30 + selected_frames = base64_frames[::new_interval] + + print(f'Totally {len(selected_frames)} would be analyze...\n') + + idx = 0 + for base64_frame in selected_frames: + idx += 1 + print(f'Process the {file_path}, current No. {idx * frame_interval} frame...') + # TODO: record the COST of the API call + try: + response = client.chat.completions.create( + model=OPENAI_MODEL, + messages=_prepare_image_messages(task, base64_frame), + max_tokens=MAX_TOKEN + ) + + content = response.choices[0].message.content + current_frame_content = f"Frame {idx}'s content: {content}\n" + print(current_frame_content) + + except Exception as error: + print(f'Error with the request: {error}') + + +def parse_pptx(file_path: str) -> None: + """ + Parses the content of a pptx file and prints it. + + Args: + file_path: str: The path to the file to open. + """ + print(f'[Reading PowerPoint file from {file_path}]') + try: + pres = Presentation(str(file_path)) + text = [] + for slide_idx, slide in enumerate(pres.slides): + text.append(f'@@ Slide {slide_idx + 1} @@') + for shape in slide.shapes: + if hasattr(shape, 'text'): + text.append(shape.text) + print('\n'.join(text)) + + except Exception as e: + print(f'Error reading PowerPoint file: {e}') + + __all__ = [ + # file operation 'open_file', 'goto_line', 'scroll_down', @@ -389,8 +602,15 @@ __all__ = [ 'search_dir', 'search_file', 'find_file', + # readers + 'parse_pdf', + 'parse_docx', + 'parse_latex', + 'parse_pptx' ] +if OPENAI_API_KEY and OPENAI_BASE_URL: + __all__ += ['parse_audio', 'parse_video', 'parse_image'] DOCUMENTATION = '' for func_name in __all__: diff --git a/opendevin/runtime/plugins/agent_skills/setup.sh b/opendevin/runtime/plugins/agent_skills/setup.sh index 3fc3d789ae..659d5db8fd 100755 --- a/opendevin/runtime/plugins/agent_skills/setup.sh +++ b/opendevin/runtime/plugins/agent_skills/setup.sh @@ -10,4 +10,4 @@ export PATH=/opendevin/plugins/agent_skills:$PATH echo 'export PYTHONPATH=/opendevin/plugins/agent_skills:$PYTHONPATH' >> ~/.bashrc export PYTHONPATH=/opendevin/plugins/agent_skills:$PYTHONPATH -pip install flake8 +pip install flake8 python-docx PyPDF2 python-pptx pylatexenc openai opencv-python diff --git a/poetry.lock b/poetry.lock index 0628356f15..a6184934f0 100644 --- a/poetry.lock +++ b/poetry.lock @@ -1,4 +1,4 @@ -# This file is automatically @generated by Poetry 1.8.2 and should not be changed by hand. +# This file is automatically @generated by Poetry 1.8.3 and should not be changed by hand. [[package]] name = "aenum" @@ -674,6 +674,17 @@ files = [ {file = "cfgv-3.4.0.tar.gz", hash = "sha256:e52591d4c5f5dead8e0f673fb16db7949d2cfb3f7da4582893288f0ded8fe560"}, ] +[[package]] +name = "chardet" +version = "5.2.0" +description = "Universal encoding detector for Python 3" +optional = false +python-versions = ">=3.7" +files = [ + {file = "chardet-5.2.0-py3-none-any.whl", hash = "sha256:e1cf59446890a00105fe7b7912492ea04b6e6f06d4b742b2c788469e34c82970"}, + {file = "chardet-5.2.0.tar.gz", hash = "sha256:1b3b6ff479a8c414bc3fa2c0852995695c4a026dcd6d0633b2dd092ca39c1cf7"}, +] + [[package]] name = "charset-normalizer" version = "3.3.2" @@ -4053,6 +4064,28 @@ typing-extensions = ">=4.7,<5" [package.extras] datalib = ["numpy (>=1)", "pandas (>=1.2.3)", "pandas-stubs (>=1.1.0.11)"] +[[package]] +name = "opencv-python" +version = "4.9.0.80" +description = "Wrapper package for OpenCV python bindings." +optional = false +python-versions = ">=3.6" +files = [ + {file = "opencv-python-4.9.0.80.tar.gz", hash = "sha256:1a9f0e6267de3a1a1db0c54213d022c7c8b5b9ca4b580e80bdc58516c922c9e1"}, + {file = "opencv_python-4.9.0.80-cp37-abi3-macosx_10_16_x86_64.whl", hash = "sha256:7e5f7aa4486651a6ebfa8ed4b594b65bd2d2f41beeb4241a3e4b1b85acbbbadb"}, + {file = "opencv_python-4.9.0.80-cp37-abi3-macosx_11_0_arm64.whl", hash = "sha256:71dfb9555ccccdd77305fc3dcca5897fbf0cf28b297c51ee55e079c065d812a3"}, + {file = "opencv_python-4.9.0.80-cp37-abi3-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:7b34a52e9da36dda8c151c6394aed602e4b17fa041df0b9f5b93ae10b0fcca2a"}, + {file = "opencv_python-4.9.0.80-cp37-abi3-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:e4088cab82b66a3b37ffc452976b14a3c599269c247895ae9ceb4066d8188a57"}, + {file = "opencv_python-4.9.0.80-cp37-abi3-win32.whl", hash = "sha256:dcf000c36dd1651118a2462257e3a9e76db789a78432e1f303c7bac54f63ef6c"}, + {file = "opencv_python-4.9.0.80-cp37-abi3-win_amd64.whl", hash = "sha256:3f16f08e02b2a2da44259c7cc712e779eff1dd8b55fdb0323e8cab09548086c0"}, +] + +[package.dependencies] +numpy = [ + {version = ">=1.26.0", markers = "python_version >= \"3.12\""}, + {version = ">=1.23.5", markers = "python_version >= \"3.11\" and python_version < \"3.12\""}, +] + [[package]] name = "opentelemetry-api" version = "1.24.0" @@ -4332,8 +4365,8 @@ files = [ [package.dependencies] numpy = [ - {version = ">=1.23.2", markers = "python_version == \"3.11\""}, {version = ">=1.26.0", markers = "python_version >= \"3.12\""}, + {version = ">=1.23.2", markers = "python_version == \"3.11\""}, ] python-dateutil = ">=2.8.2" pytz = ">=2020.1" @@ -4972,6 +5005,16 @@ dev = ["coverage[toml] (==5.0.4)", "cryptography (>=3.4.0)", "pre-commit", "pyte docs = ["sphinx (>=4.5.0,<5.0.0)", "sphinx-rtd-theme", "zope.interface"] tests = ["coverage[toml] (==5.0.4)", "pytest (>=6.0.0,<7.0.0)"] +[[package]] +name = "pylatexenc" +version = "2.10" +description = "Simple LaTeX parser providing latex-to-unicode and unicode-to-latex conversion" +optional = false +python-versions = "*" +files = [ + {file = "pylatexenc-2.10.tar.gz", hash = "sha256:3dd8fd84eb46dc30bee1e23eaab8d8fb5a7f507347b23e5f38ad9675c84f40d3"}, +] + [[package]] name = "pyparsing" version = "3.1.2" @@ -5004,6 +5047,24 @@ docs = ["myst_parser", "sphinx", "sphinx_rtd_theme"] full = ["Pillow (>=8.0.0)", "PyCryptodome", "cryptography"] image = ["Pillow (>=8.0.0)"] +[[package]] +name = "pypdf2" +version = "3.0.1" +description = "A pure-python PDF library capable of splitting, merging, cropping, and transforming PDF files" +optional = false +python-versions = ">=3.6" +files = [ + {file = "PyPDF2-3.0.1.tar.gz", hash = "sha256:a74408f69ba6271f71b9352ef4ed03dc53a31aa404d29b5d31f53bfecfee1440"}, + {file = "pypdf2-3.0.1-py3-none-any.whl", hash = "sha256:d16e4205cfee272fbdc0568b68d82be796540b1537508cef59388f839c191928"}, +] + +[package.extras] +crypto = ["PyCryptodome"] +dev = ["black", "flit", "pip-tools", "pre-commit (<2.18.0)", "pytest-cov", "wheel"] +docs = ["myst_parser", "sphinx", "sphinx_rtd_theme"] +full = ["Pillow", "PyCryptodome"] +image = ["Pillow"] + [[package]] name = "pypika" version = "0.48.9" @@ -5121,6 +5182,21 @@ files = [ [package.dependencies] six = ">=1.5" +[[package]] +name = "python-docx" +version = "1.1.2" +description = "Create, read, and update Microsoft Word .docx files." +optional = false +python-versions = ">=3.7" +files = [ + {file = "python_docx-1.1.2-py3-none-any.whl", hash = "sha256:08c20d6058916fb19853fcf080f7f42b6270d89eac9fa5f8c15f691c0017fabe"}, + {file = "python_docx-1.1.2.tar.gz", hash = "sha256:0cf1f22e95b9002addca7948e16f2cd7acdfd498047f1941ca5d293db7762efd"}, +] + +[package.dependencies] +lxml = ">=3.1.0" +typing-extensions = ">=4.9.0" + [[package]] name = "python-dotenv" version = "1.0.1" @@ -5149,6 +5225,22 @@ files = [ [package.extras] dev = ["atomicwrites (==1.4.1)", "attrs (==23.2.0)", "coverage (==7.4.1)", "hatch", "invoke (==2.2.0)", "more-itertools (==10.2.0)", "pbr (==6.0.0)", "pluggy (==1.4.0)", "py (==1.11.0)", "pytest (==8.0.0)", "pytest-cov (==4.1.0)", "pytest-timeout (==2.2.0)", "pyyaml (==6.0.1)", "ruff (==0.2.1)"] +[[package]] +name = "python-pptx" +version = "0.6.23" +description = "Generate and manipulate Open XML PowerPoint (.pptx) files" +optional = false +python-versions = "*" +files = [ + {file = "python-pptx-0.6.23.tar.gz", hash = "sha256:587497ff28e779ab18dbb074f6d4052893c85dedc95ed75df319364f331fedee"}, + {file = "python_pptx-0.6.23-py3-none-any.whl", hash = "sha256:dd0527194627a2b7cc05f3ba23ecaa2d9a0d5ac9b6193a28ed1b7a716f4217d4"}, +] + +[package.dependencies] +lxml = ">=3.1.0" +Pillow = ">=3.3.2" +XlsxWriter = ">=0.5.7" + [[package]] name = "pytz" version = "2024.1" @@ -5346,6 +5438,26 @@ files = [ {file = "regex-2024.5.15.tar.gz", hash = "sha256:d3ee02d9e5f482cc8309134a91eeaacbdd2261ba111b0fef3748eeb4913e6a2c"}, ] +[[package]] +name = "reportlab" +version = "4.2.0" +description = "The Reportlab Toolkit" +optional = false +python-versions = "<4,>=3.7" +files = [ + {file = "reportlab-4.2.0-py3-none-any.whl", hash = "sha256:53630f9d25a7938def3e6a93d723b72a7a5921d34d23cf7a0930adeb2cb0e6c1"}, + {file = "reportlab-4.2.0.tar.gz", hash = "sha256:474fb28d63431a5d47d75c90d580393050df7d491a09c7877df3291a2e9f6d0a"}, +] + +[package.dependencies] +chardet = "*" +pillow = ">=9.0.0" + +[package.extras] +accel = ["rl-accel (>=0.9.0,<1.1)"] +pycairo = ["freetype-py (>=2.3.0,<2.4)", "rlPyCairo (>=0.2.0,<1)"] +renderpm = ["rl-renderPM (>=4.0.3,<4.1)"] + [[package]] name = "requests" version = "2.31.0" @@ -7102,6 +7214,17 @@ files = [ {file = "wrapt-1.16.0.tar.gz", hash = "sha256:5f370f952971e7d17c7d1ead40e49f32345a7f7a5373571ef44d800d06b1899d"}, ] +[[package]] +name = "xlsxwriter" +version = "3.2.0" +description = "A Python module for creating Excel XLSX files." +optional = false +python-versions = ">=3.6" +files = [ + {file = "XlsxWriter-3.2.0-py3-none-any.whl", hash = "sha256:ecfd5405b3e0e228219bcaf24c2ca0915e012ca9464a14048021d21a995d490e"}, + {file = "XlsxWriter-3.2.0.tar.gz", hash = "sha256:9977d0c661a72866a61f9f7a809e25ebbb0fb7036baa3b9fe74afcfca6b3cb8c"}, +] + [[package]] name = "xxhash" version = "3.4.1" @@ -7403,4 +7526,4 @@ testing = ["coverage (>=5.0.3)", "zope.event", "zope.testing"] [metadata] lock-version = "2.0" python-versions = "^3.11" -content-hash = "5c32421d9ba3c24d624b55092816059d5fe2bdbee0bc53e5158e7b4368780391" +content-hash = "141771396f59fc23d52623ada07e4b89272ca781e5a2072f98ebccdf3f18a43b" diff --git a/pyproject.toml b/pyproject.toml index 33cbbe6731..4822b8a6e7 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -53,6 +53,14 @@ pytest-cov = "*" pytest-asyncio = "*" pytest-forked = "*" flake8 = "*" +openai = "*" +python-docx = "*" +PyPDF2 = "*" +pylatexenc = "*" +python-pptx = "*" +opencv-python = "*" +pandas = "*" +reportlab = "*" [tool.coverage.run] concurrency = ["gevent"] diff --git a/tests/integration/mock/CodeActAgent/test_edits/prompt_001.log b/tests/integration/mock/CodeActAgent/test_edits/prompt_001.log index b721882ea1..0d0d81ef3e 100644 --- a/tests/integration/mock/CodeActAgent/test_edits/prompt_001.log +++ b/tests/integration/mock/CodeActAgent/test_edits/prompt_001.log @@ -72,6 +72,26 @@ find_file(file_name: str, dir_path: str = './') -> None: file_name: str: The name of the file to find. dir_path: Optional[str]: The path to the directory to search. +parse_pdf(file_path: str) -> None: + Parses the content of a PDF file and prints it. + Args: + file_path: str: The path to the file to open. + +parse_docx(file_path: str) -> None: + Parses the content of a DOCX file and prints it. + Args: + file_path: str: The path to the file to open. + +parse_latex(file_path: str) -> None: + Parses the content of a LaTex file and prints it. + Args: + file_path: str: The path to the file to open. + +parse_pptx(file_path: str) -> None: + Parses the content of a pptx file and prints it. + Args: + file_path: str: The path to the file to open. + Please note that THE `edit_file` FUNCTION REQUIRES PROPER INDENTATION. If the assistant would like to add the line ' print(x)', it must fully write that out, with all those spaces before the code! Indentation is important and code that is not indented correctly will fail and require fixing before it can be run. The assistant's response should be concise. diff --git a/tests/integration/mock/CodeActAgent/test_edits/prompt_002.log b/tests/integration/mock/CodeActAgent/test_edits/prompt_002.log index e5f4a5bfda..1bb362ace8 100644 --- a/tests/integration/mock/CodeActAgent/test_edits/prompt_002.log +++ b/tests/integration/mock/CodeActAgent/test_edits/prompt_002.log @@ -72,6 +72,26 @@ find_file(file_name: str, dir_path: str = './') -> None: file_name: str: The name of the file to find. dir_path: Optional[str]: The path to the directory to search. +parse_pdf(file_path: str) -> None: + Parses the content of a PDF file and prints it. + Args: + file_path: str: The path to the file to open. + +parse_docx(file_path: str) -> None: + Parses the content of a DOCX file and prints it. + Args: + file_path: str: The path to the file to open. + +parse_latex(file_path: str) -> None: + Parses the content of a LaTex file and prints it. + Args: + file_path: str: The path to the file to open. + +parse_pptx(file_path: str) -> None: + Parses the content of a pptx file and prints it. + Args: + file_path: str: The path to the file to open. + Please note that THE `edit_file` FUNCTION REQUIRES PROPER INDENTATION. If the assistant would like to add the line ' print(x)', it must fully write that out, with all those spaces before the code! Indentation is important and code that is not indented correctly will fail and require fixing before it can be run. The assistant's response should be concise. diff --git a/tests/integration/mock/CodeActAgent/test_edits/prompt_003.log b/tests/integration/mock/CodeActAgent/test_edits/prompt_003.log index eab65fc73b..63a07514e2 100644 --- a/tests/integration/mock/CodeActAgent/test_edits/prompt_003.log +++ b/tests/integration/mock/CodeActAgent/test_edits/prompt_003.log @@ -72,6 +72,26 @@ find_file(file_name: str, dir_path: str = './') -> None: file_name: str: The name of the file to find. dir_path: Optional[str]: The path to the directory to search. +parse_pdf(file_path: str) -> None: + Parses the content of a PDF file and prints it. + Args: + file_path: str: The path to the file to open. + +parse_docx(file_path: str) -> None: + Parses the content of a DOCX file and prints it. + Args: + file_path: str: The path to the file to open. + +parse_latex(file_path: str) -> None: + Parses the content of a LaTex file and prints it. + Args: + file_path: str: The path to the file to open. + +parse_pptx(file_path: str) -> None: + Parses the content of a pptx file and prints it. + Args: + file_path: str: The path to the file to open. + Please note that THE `edit_file` FUNCTION REQUIRES PROPER INDENTATION. If the assistant would like to add the line ' print(x)', it must fully write that out, with all those spaces before the code! Indentation is important and code that is not indented correctly will fail and require fixing before it can be run. The assistant's response should be concise. diff --git a/tests/integration/mock/CodeActAgent/test_ipython/prompt_001.log b/tests/integration/mock/CodeActAgent/test_ipython/prompt_001.log index 0b3ad30a94..95cc3b0b89 100644 --- a/tests/integration/mock/CodeActAgent/test_ipython/prompt_001.log +++ b/tests/integration/mock/CodeActAgent/test_ipython/prompt_001.log @@ -72,6 +72,26 @@ find_file(file_name: str, dir_path: str = './') -> None: file_name: str: The name of the file to find. dir_path: Optional[str]: The path to the directory to search. +parse_pdf(file_path: str) -> None: + Parses the content of a PDF file and prints it. + Args: + file_path: str: The path to the file to open. + +parse_docx(file_path: str) -> None: + Parses the content of a DOCX file and prints it. + Args: + file_path: str: The path to the file to open. + +parse_latex(file_path: str) -> None: + Parses the content of a LaTex file and prints it. + Args: + file_path: str: The path to the file to open. + +parse_pptx(file_path: str) -> None: + Parses the content of a pptx file and prints it. + Args: + file_path: str: The path to the file to open. + Please note that THE `edit_file` FUNCTION REQUIRES PROPER INDENTATION. If the assistant would like to add the line ' print(x)', it must fully write that out, with all those spaces before the code! Indentation is important and code that is not indented correctly will fail and require fixing before it can be run. The assistant's response should be concise. diff --git a/tests/integration/mock/CodeActAgent/test_ipython/prompt_002.log b/tests/integration/mock/CodeActAgent/test_ipython/prompt_002.log index c9f42f6137..bfd88b19c3 100644 --- a/tests/integration/mock/CodeActAgent/test_ipython/prompt_002.log +++ b/tests/integration/mock/CodeActAgent/test_ipython/prompt_002.log @@ -72,6 +72,26 @@ find_file(file_name: str, dir_path: str = './') -> None: file_name: str: The name of the file to find. dir_path: Optional[str]: The path to the directory to search. +parse_pdf(file_path: str) -> None: + Parses the content of a PDF file and prints it. + Args: + file_path: str: The path to the file to open. + +parse_docx(file_path: str) -> None: + Parses the content of a DOCX file and prints it. + Args: + file_path: str: The path to the file to open. + +parse_latex(file_path: str) -> None: + Parses the content of a LaTex file and prints it. + Args: + file_path: str: The path to the file to open. + +parse_pptx(file_path: str) -> None: + Parses the content of a pptx file and prints it. + Args: + file_path: str: The path to the file to open. + Please note that THE `edit_file` FUNCTION REQUIRES PROPER INDENTATION. If the assistant would like to add the line ' print(x)', it must fully write that out, with all those spaces before the code! Indentation is important and code that is not indented correctly will fail and require fixing before it can be run. The assistant's response should be concise. diff --git a/tests/integration/mock/CodeActAgent/test_ipython_module/prompt_001.log b/tests/integration/mock/CodeActAgent/test_ipython_module/prompt_001.log index f30d99d13f..06772b205c 100644 --- a/tests/integration/mock/CodeActAgent/test_ipython_module/prompt_001.log +++ b/tests/integration/mock/CodeActAgent/test_ipython_module/prompt_001.log @@ -72,6 +72,26 @@ find_file(file_name: str, dir_path: str = './') -> None: file_name: str: The name of the file to find. dir_path: Optional[str]: The path to the directory to search. +parse_pdf(file_path: str) -> None: + Parses the content of a PDF file and prints it. + Args: + file_path: str: The path to the file to open. + +parse_docx(file_path: str) -> None: + Parses the content of a DOCX file and prints it. + Args: + file_path: str: The path to the file to open. + +parse_latex(file_path: str) -> None: + Parses the content of a LaTex file and prints it. + Args: + file_path: str: The path to the file to open. + +parse_pptx(file_path: str) -> None: + Parses the content of a pptx file and prints it. + Args: + file_path: str: The path to the file to open. + Please note that THE `edit_file` FUNCTION REQUIRES PROPER INDENTATION. If the assistant would like to add the line ' print(x)', it must fully write that out, with all those spaces before the code! Indentation is important and code that is not indented correctly will fail and require fixing before it can be run. The assistant's response should be concise. diff --git a/tests/integration/mock/CodeActAgent/test_ipython_module/prompt_002.log b/tests/integration/mock/CodeActAgent/test_ipython_module/prompt_002.log index bf367ef74e..7ff45f6479 100644 --- a/tests/integration/mock/CodeActAgent/test_ipython_module/prompt_002.log +++ b/tests/integration/mock/CodeActAgent/test_ipython_module/prompt_002.log @@ -72,6 +72,26 @@ find_file(file_name: str, dir_path: str = './') -> None: file_name: str: The name of the file to find. dir_path: Optional[str]: The path to the directory to search. +parse_pdf(file_path: str) -> None: + Parses the content of a PDF file and prints it. + Args: + file_path: str: The path to the file to open. + +parse_docx(file_path: str) -> None: + Parses the content of a DOCX file and prints it. + Args: + file_path: str: The path to the file to open. + +parse_latex(file_path: str) -> None: + Parses the content of a LaTex file and prints it. + Args: + file_path: str: The path to the file to open. + +parse_pptx(file_path: str) -> None: + Parses the content of a pptx file and prints it. + Args: + file_path: str: The path to the file to open. + Please note that THE `edit_file` FUNCTION REQUIRES PROPER INDENTATION. If the assistant would like to add the line ' print(x)', it must fully write that out, with all those spaces before the code! Indentation is important and code that is not indented correctly will fail and require fixing before it can be run. The assistant's response should be concise. diff --git a/tests/integration/mock/CodeActAgent/test_ipython_module/prompt_003.log b/tests/integration/mock/CodeActAgent/test_ipython_module/prompt_003.log index e7499e34a9..2e60a75cc6 100644 --- a/tests/integration/mock/CodeActAgent/test_ipython_module/prompt_003.log +++ b/tests/integration/mock/CodeActAgent/test_ipython_module/prompt_003.log @@ -72,6 +72,26 @@ find_file(file_name: str, dir_path: str = './') -> None: file_name: str: The name of the file to find. dir_path: Optional[str]: The path to the directory to search. +parse_pdf(file_path: str) -> None: + Parses the content of a PDF file and prints it. + Args: + file_path: str: The path to the file to open. + +parse_docx(file_path: str) -> None: + Parses the content of a DOCX file and prints it. + Args: + file_path: str: The path to the file to open. + +parse_latex(file_path: str) -> None: + Parses the content of a LaTex file and prints it. + Args: + file_path: str: The path to the file to open. + +parse_pptx(file_path: str) -> None: + Parses the content of a pptx file and prints it. + Args: + file_path: str: The path to the file to open. + Please note that THE `edit_file` FUNCTION REQUIRES PROPER INDENTATION. If the assistant would like to add the line ' print(x)', it must fully write that out, with all those spaces before the code! Indentation is important and code that is not indented correctly will fail and require fixing before it can be run. The assistant's response should be concise. diff --git a/tests/integration/mock/CodeActAgent/test_write_simple_script/prompt_001.log b/tests/integration/mock/CodeActAgent/test_write_simple_script/prompt_001.log index 0dbd5d1143..c7b6a29394 100644 --- a/tests/integration/mock/CodeActAgent/test_write_simple_script/prompt_001.log +++ b/tests/integration/mock/CodeActAgent/test_write_simple_script/prompt_001.log @@ -72,6 +72,26 @@ find_file(file_name: str, dir_path: str = './') -> None: file_name: str: The name of the file to find. dir_path: Optional[str]: The path to the directory to search. +parse_pdf(file_path: str) -> None: + Parses the content of a PDF file and prints it. + Args: + file_path: str: The path to the file to open. + +parse_docx(file_path: str) -> None: + Parses the content of a DOCX file and prints it. + Args: + file_path: str: The path to the file to open. + +parse_latex(file_path: str) -> None: + Parses the content of a LaTex file and prints it. + Args: + file_path: str: The path to the file to open. + +parse_pptx(file_path: str) -> None: + Parses the content of a pptx file and prints it. + Args: + file_path: str: The path to the file to open. + Please note that THE `edit_file` FUNCTION REQUIRES PROPER INDENTATION. If the assistant would like to add the line ' print(x)', it must fully write that out, with all those spaces before the code! Indentation is important and code that is not indented correctly will fail and require fixing before it can be run. The assistant's response should be concise. diff --git a/tests/integration/mock/CodeActAgent/test_write_simple_script/prompt_002.log b/tests/integration/mock/CodeActAgent/test_write_simple_script/prompt_002.log index 850b39b1fb..c9d8359a14 100644 --- a/tests/integration/mock/CodeActAgent/test_write_simple_script/prompt_002.log +++ b/tests/integration/mock/CodeActAgent/test_write_simple_script/prompt_002.log @@ -72,6 +72,26 @@ find_file(file_name: str, dir_path: str = './') -> None: file_name: str: The name of the file to find. dir_path: Optional[str]: The path to the directory to search. +parse_pdf(file_path: str) -> None: + Parses the content of a PDF file and prints it. + Args: + file_path: str: The path to the file to open. + +parse_docx(file_path: str) -> None: + Parses the content of a DOCX file and prints it. + Args: + file_path: str: The path to the file to open. + +parse_latex(file_path: str) -> None: + Parses the content of a LaTex file and prints it. + Args: + file_path: str: The path to the file to open. + +parse_pptx(file_path: str) -> None: + Parses the content of a pptx file and prints it. + Args: + file_path: str: The path to the file to open. + Please note that THE `edit_file` FUNCTION REQUIRES PROPER INDENTATION. If the assistant would like to add the line ' print(x)', it must fully write that out, with all those spaces before the code! Indentation is important and code that is not indented correctly will fail and require fixing before it can be run. The assistant's response should be concise. diff --git a/tests/integration/mock/CodeActAgent/test_write_simple_script/prompt_003.log b/tests/integration/mock/CodeActAgent/test_write_simple_script/prompt_003.log index ca6571b805..c48cad55b2 100644 --- a/tests/integration/mock/CodeActAgent/test_write_simple_script/prompt_003.log +++ b/tests/integration/mock/CodeActAgent/test_write_simple_script/prompt_003.log @@ -72,6 +72,26 @@ find_file(file_name: str, dir_path: str = './') -> None: file_name: str: The name of the file to find. dir_path: Optional[str]: The path to the directory to search. +parse_pdf(file_path: str) -> None: + Parses the content of a PDF file and prints it. + Args: + file_path: str: The path to the file to open. + +parse_docx(file_path: str) -> None: + Parses the content of a DOCX file and prints it. + Args: + file_path: str: The path to the file to open. + +parse_latex(file_path: str) -> None: + Parses the content of a LaTex file and prints it. + Args: + file_path: str: The path to the file to open. + +parse_pptx(file_path: str) -> None: + Parses the content of a pptx file and prints it. + Args: + file_path: str: The path to the file to open. + Please note that THE `edit_file` FUNCTION REQUIRES PROPER INDENTATION. If the assistant would like to add the line ' print(x)', it must fully write that out, with all those spaces before the code! Indentation is important and code that is not indented correctly will fail and require fixing before it can be run. The assistant's response should be concise. diff --git a/tests/unit/test_agent_skill.py b/tests/unit/test_agent_skill.py index f1c30cda2f..a701e76aa9 100644 --- a/tests/unit/test_agent_skill.py +++ b/tests/unit/test_agent_skill.py @@ -1,7 +1,9 @@ import contextlib import io +import docx import pytest +import sys from opendevin.runtime.plugins.agent_skills.agentskills import ( create_file, @@ -13,6 +15,11 @@ from opendevin.runtime.plugins.agent_skills.agentskills import ( scroll_up, search_dir, search_file, + parse_docx, + parse_latex, + parse_pdf, + parse_pptx, + parse_image ) @@ -588,3 +595,121 @@ def test_lint_file_disabled_undefined_name(tmp_path, monkeypatch, capsys): '[File updated. Please review the changes and make sure they are correct (correct indentation, no duplicate lines, etc). Edit the file again if necessary.]\n' ) assert result.split('\n') == expected.split('\n') + + +def test_parse_docx(tmp_path): + # Create a DOCX file with some content + test_docx_path = tmp_path / "test.docx" + doc = docx.Document() + doc.add_paragraph('Hello, this is a test document.') + doc.add_paragraph('This is the second paragraph.') + doc.save(str(test_docx_path)) + + old_stdout = sys.stdout + sys.stdout = io.StringIO() + + # Call the parse_docx function + parse_docx(str(test_docx_path)) + + # Capture the output + output = sys.stdout.getvalue() + sys.stdout = old_stdout + + # Check if the output is correct + expected_output = ( + f'[Reading DOCX file from {test_docx_path}]\n' + '@@ Page 1 @@\nHello, this is a test document.\n\n' + '@@ Page 2 @@\nThis is the second paragraph.\n\n\n' + ) + assert output == expected_output, f"Expected output does not match. Got: {output}" + + +def test_parse_latex(tmp_path): + # Create a LaTeX file with some content + test_latex_path = tmp_path / "test.tex" + with open(test_latex_path, 'w') as f: + f.write(r''' + \documentclass{article} + \begin{document} + Hello, this is a test LaTeX document. + \end{document} + ''') + + old_stdout = sys.stdout + sys.stdout = io.StringIO() + + # Call the parse_latex function + parse_latex(str(test_latex_path)) + + # Capture the output + output = sys.stdout.getvalue() + sys.stdout = old_stdout + + # Check if the output is correct + expected_output = ( + f'[Reading LaTex file from {test_latex_path}]\n' + 'Hello, this is a test LaTeX document.\n' + ) + assert output == expected_output, f"Expected output does not match. Got: {output}" + + +def test_parse_pdf(tmp_path): + # Create a PDF file with some content + test_pdf_path = tmp_path / "test.pdf" + from reportlab.lib.pagesizes import letter + from reportlab.pdfgen import canvas + + c = canvas.Canvas(str(test_pdf_path), pagesize=letter) + c.drawString(100, 750, "Hello, this is a test PDF document.") + c.save() + + old_stdout = sys.stdout + sys.stdout = io.StringIO() + + # Call the parse_pdf function + parse_pdf(str(test_pdf_path)) + + # Capture the output + output = sys.stdout.getvalue() + sys.stdout = old_stdout + + # Check if the output is correct + expected_output = ( + f'[Reading PDF file from {test_pdf_path}]\n' + '@@ Page 1 @@\n' + 'Hello, this is a test PDF document.\n' + ) + assert output == expected_output, f"Expected output does not match. Got: {output}" + + +def test_parse_pptx(tmp_path): + test_pptx_path = tmp_path / "test.pptx" + from pptx import Presentation + pres = Presentation() + + slide1 = pres.slides.add_slide(pres.slide_layouts[0]) + title1 = slide1.shapes.title + title1.text = "Hello, this is the first test PPTX slide." + + slide2 = pres.slides.add_slide(pres.slide_layouts[0]) + title2 = slide2.shapes.title + title2.text = "Hello, this is the second test PPTX slide." + + pres.save(str(test_pptx_path)) + + old_stdout = sys.stdout + sys.stdout = io.StringIO() + + parse_pptx(str(test_pptx_path)) + + output = sys.stdout.getvalue() + sys.stdout = old_stdout + + expected_output = ( + f'[Reading PowerPoint file from {test_pptx_path}]\n' + '@@ Slide 1 @@\n' + 'Hello, this is the first test PPTX slide.\n\n' + '@@ Slide 2 @@\n' + 'Hello, this is the second test PPTX slide.\n\n' + ) + assert output == expected_output, f"Expected output does not match. Got: {output}" \ No newline at end of file