diff --git a/extract_pdf_skill.py b/extract_pdf_skill.py new file mode 100644 index 0000000..c518e9a --- /dev/null +++ b/extract_pdf_skill.py @@ -0,0 +1,17 @@ +from unstructured.partition.pdf import partition_pdf +import PyPDF2 + +def extract_text_from_pdf(pdf_file): + with open(pdf_file, "rb") as file: + reader = PyPDF2.PdfReader(file) + text = "" + for page in range(len(reader.pages)): + text += reader.pages[page].extract_text() + return text + +text = extract_text_from_pdf("bank-statement.pdf") +print("text : ", text) + +# below does not work +# pdf_text = partition_pdf(text) +# print("pdf_text : ", pdf_text) \ No newline at end of file diff --git a/main.py b/main.py new file mode 100644 index 0000000..906274c --- /dev/null +++ b/main.py @@ -0,0 +1,43 @@ +import autogen +import os +from dotenv import load_dotenv + +load_dotenv() # take environment variables from .env. +config_list = [ + { + 'model': 'gpt-3.5-turbo-16k', + 'api_key': os.getenv('OPENAI_API_KEY'), + } +] + +llm_config = { + "timeout": 300, + "seed": 42, # for caching. once task is run it caches the response, + "config_list": config_list, + "temperature": 0 #lower temperature more standard lesss creative response, higher is more creative + +} + +assistant = autogen.AssistantAgent( + name="laon_assistant", + llm_config=llm_config, + system_message="checks the bank documents. extract pdf using extract_pdf_skill.", +) + +user_proxy = autogen.UserProxyAgent( + name="user_proxy", + human_input_mode="TERMINATE", + max_consecutive_auto_reply=3, + is_termination_msg=lambda x: x.get("content", "").rstrip().endswith("TERMINATE"), + code_execution_config={"work_dir": "web"}, + llm_config=llm_config, + system_message="""Reply TERMINATE if the task has been solved at full satisfaction + otherwise, reply CONTINUE, or the reason why the task is not solved yet.""" +) + +def main(): + task = """""" + user_proxy.initiate_chat(assistant, message=task) + +if __name__ == "__main__": + main() \ No newline at end of file diff --git a/requirements.txt b/requirements.txt new file mode 100644 index 0000000..3a64b93 --- /dev/null +++ b/requirements.txt @@ -0,0 +1,110 @@ +aiohttp==3.9.5 +aiosignal==1.3.1 +altair==5.3.0 +annotated-types==0.7.0 +anyio==4.4.0 +attrs==23.2.0 +backoff==2.2.1 +beautifulsoup4==4.12.3 +blinker==1.8.2 +cachetools==5.3.3 +certifi==2024.6.2 +cffi==1.16.0 +chardet==5.2.0 +charset-normalizer==3.3.2 +click==8.1.7 +cryptography==42.0.8 +dataclasses-json==0.6.7 +deepdiff==7.0.1 +emoji==2.12.1 +filetype==1.2.0 +frozenlist==1.4.1 +gitdb==4.0.11 +GitPython==3.1.43 +h11==0.14.0 +httpcore==1.0.5 +httpx==0.27.0 +idna==3.7 +Jinja2==3.1.4 +joblib==1.4.2 +jsonpatch==1.33 +jsonpath-python==1.0.6 +jsonpointer==3.0.0 +jsonschema==4.22.0 +jsonschema-specifications==2023.12.1 +langchain==0.2.4 +langchain-core==0.2.6 +langchain-text-splitters==0.2.1 +langdetect==1.0.9 +langsmith==0.1.77 +lxml==5.2.2 +markdown-it-py==3.0.0 +MarkupSafe==2.1.5 +marshmallow==3.21.3 +mdurl==0.1.2 +multidict==6.0.5 +mypy-extensions==1.0.0 +nest-asyncio==1.6.0 +nltk==3.8.1 +numpy==1.26.4 +ollama==0.2.1 +opencv-contrib-python==4.10.0.84 +opencv-python==4.10.0.84 +ordered-set==4.1.0 +orjson==3.10.5 +packaging==24.1 +pandas==2.2.2 +pdf2image==1.17.0 +pdfminer==20191125 +pdfminer.six==20231228 +phi==0.6.7 +phidata==2.4.20 +pillow==10.3.0 +pillow_heif==0.16.0 +protobuf==4.25.3 +pyarrow==16.1.0 +pycparser==2.22 +pycryptodome==3.20.0 +pydantic==2.7.4 +pydantic-settings==2.3.3 +pydantic_core==2.18.4 +pydeck==0.9.1 +Pygments==2.18.0 +pypdf==4.2.0 +PyPDF2==3.0.1 +python-dateutil==2.9.0.post0 +python-dotenv==1.0.1 +python-iso639==2024.4.27 +python-magic==0.4.27 +pytz==2024.1 +PyYAML==6.0.1 +rapidfuzz==3.9.3 +referencing==0.35.1 +regex==2024.5.15 +requests==2.32.3 +requests-toolbelt==1.0.0 +rich==13.7.1 +rpds-py==0.18.1 +shellingham==1.5.4 +six==1.16.0 +smmap==5.0.1 +sniffio==1.3.1 +soupsieve==2.5 +SQLAlchemy==2.0.30 +streamlit==1.35.0 +tabulate==0.9.0 +tenacity==8.3.0 +toml==0.10.2 +tomli==2.0.1 +toolz==0.12.1 +tornado==6.4.1 +tqdm==4.66.4 +typer==0.12.3 +typing-inspect==0.9.0 +typing_extensions==4.12.2 +tzdata==2024.1 +unstructured==0.14.6 +unstructured-client==0.23.3 +urllib3==2.2.1 +wrapt==1.16.0 +yarl==1.9.4