init commit to create a skill, wip

This commit is contained in:
Jayaditya Gupta
2024-06-20 13:12:08 +05:30
parent be94fda89e
commit 08b5e46d5f
3 changed files with 170 additions and 0 deletions

17
extract_pdf_skill.py Normal file
View File

@@ -0,0 +1,17 @@
from unstructured.partition.pdf import partition_pdf
import PyPDF2
def extract_text_from_pdf(pdf_file):
with open(pdf_file, "rb") as file:
reader = PyPDF2.PdfReader(file)
text = ""
for page in range(len(reader.pages)):
text += reader.pages[page].extract_text()
return text
text = extract_text_from_pdf("bank-statement.pdf")
print("text : ", text)
# below does not work
# pdf_text = partition_pdf(text)
# print("pdf_text : ", pdf_text)

43
main.py Normal file
View File

@@ -0,0 +1,43 @@
import autogen
import os
from dotenv import load_dotenv
load_dotenv() # take environment variables from .env.
config_list = [
{
'model': 'gpt-3.5-turbo-16k',
'api_key': os.getenv('OPENAI_API_KEY'),
}
]
llm_config = {
"timeout": 300,
"seed": 42, # for caching. once task is run it caches the response,
"config_list": config_list,
"temperature": 0 #lower temperature more standard lesss creative response, higher is more creative
}
assistant = autogen.AssistantAgent(
name="laon_assistant",
llm_config=llm_config,
system_message="checks the bank documents. extract pdf using extract_pdf_skill.",
)
user_proxy = autogen.UserProxyAgent(
name="user_proxy",
human_input_mode="TERMINATE",
max_consecutive_auto_reply=3,
is_termination_msg=lambda x: x.get("content", "").rstrip().endswith("TERMINATE"),
code_execution_config={"work_dir": "web"},
llm_config=llm_config,
system_message="""Reply TERMINATE if the task has been solved at full satisfaction
otherwise, reply CONTINUE, or the reason why the task is not solved yet."""
)
def main():
task = """"""
user_proxy.initiate_chat(assistant, message=task)
if __name__ == "__main__":
main()

110
requirements.txt Normal file
View File

@@ -0,0 +1,110 @@
aiohttp==3.9.5
aiosignal==1.3.1
altair==5.3.0
annotated-types==0.7.0
anyio==4.4.0
attrs==23.2.0
backoff==2.2.1
beautifulsoup4==4.12.3
blinker==1.8.2
cachetools==5.3.3
certifi==2024.6.2
cffi==1.16.0
chardet==5.2.0
charset-normalizer==3.3.2
click==8.1.7
cryptography==42.0.8
dataclasses-json==0.6.7
deepdiff==7.0.1
emoji==2.12.1
filetype==1.2.0
frozenlist==1.4.1
gitdb==4.0.11
GitPython==3.1.43
h11==0.14.0
httpcore==1.0.5
httpx==0.27.0
idna==3.7
Jinja2==3.1.4
joblib==1.4.2
jsonpatch==1.33
jsonpath-python==1.0.6
jsonpointer==3.0.0
jsonschema==4.22.0
jsonschema-specifications==2023.12.1
langchain==0.2.4
langchain-core==0.2.6
langchain-text-splitters==0.2.1
langdetect==1.0.9
langsmith==0.1.77
lxml==5.2.2
markdown-it-py==3.0.0
MarkupSafe==2.1.5
marshmallow==3.21.3
mdurl==0.1.2
multidict==6.0.5
mypy-extensions==1.0.0
nest-asyncio==1.6.0
nltk==3.8.1
numpy==1.26.4
ollama==0.2.1
opencv-contrib-python==4.10.0.84
opencv-python==4.10.0.84
ordered-set==4.1.0
orjson==3.10.5
packaging==24.1
pandas==2.2.2
pdf2image==1.17.0
pdfminer==20191125
pdfminer.six==20231228
phi==0.6.7
phidata==2.4.20
pillow==10.3.0
pillow_heif==0.16.0
protobuf==4.25.3
pyarrow==16.1.0
pycparser==2.22
pycryptodome==3.20.0
pydantic==2.7.4
pydantic-settings==2.3.3
pydantic_core==2.18.4
pydeck==0.9.1
Pygments==2.18.0
pypdf==4.2.0
PyPDF2==3.0.1
python-dateutil==2.9.0.post0
python-dotenv==1.0.1
python-iso639==2024.4.27
python-magic==0.4.27
pytz==2024.1
PyYAML==6.0.1
rapidfuzz==3.9.3
referencing==0.35.1
regex==2024.5.15
requests==2.32.3
requests-toolbelt==1.0.0
rich==13.7.1
rpds-py==0.18.1
shellingham==1.5.4
six==1.16.0
smmap==5.0.1
sniffio==1.3.1
soupsieve==2.5
SQLAlchemy==2.0.30
streamlit==1.35.0
tabulate==0.9.0
tenacity==8.3.0
toml==0.10.2
tomli==2.0.1
toolz==0.12.1
tornado==6.4.1
tqdm==4.66.4
typer==0.12.3
typing-inspect==0.9.0
typing_extensions==4.12.2
tzdata==2024.1
unstructured==0.14.6
unstructured-client==0.23.3
urllib3==2.2.1
wrapt==1.16.0
yarl==1.9.4