salary assistant wip

This commit is contained in:
hackertron
2024-07-15 13:38:47 +05:30
parent 45088d0397
commit fb1ca851a9
3 changed files with 82 additions and 14 deletions

View File

@@ -1,17 +1,68 @@
from unstructured.partition.pdf import partition_pdf
import requests
import os
import PyPDF2
from typing import Annotated
from urllib.parse import urlparse
def extract_text_from_pdf(pdf_file):
def download_pdf(url: Annotated[str, "the pdf file url"]) -> str:
response = requests.get(url)
if response.status_code != 200:
raise Exception(f"Failed to download PDF. Status code: {response.status_code}")
filename = os.path.basename(urlparse(url).path)
with open(filename, 'wb') as f:
f.write(response.content)
return filename
def extract_text_from_pdf(pdf_file: Annotated[str, "the local pdf file path"], password: Annotated[str, "PDF password (optional)"] = None) -> str:
with open(pdf_file, "rb") as file:
reader = PyPDF2.PdfReader(file)
text = ""
for page in range(len(reader.pages)):
text += reader.pages[page].extract_text()
if reader.is_encrypted:
if password is None:
raise ValueError("The PDF is encrypted and requires a password.")
try:
reader.decrypt(password)
except:
raise ValueError("Incorrect password for the PDF.")
return "".join(page.extract_text() for page in reader.pages)
def process_pdf_from_url(url: Annotated[str, "the pdf file url"], password: Annotated[str, "PDF password (optional)"] = None) -> str:
try:
# Download the PDF
local_file = download_pdf(url)
print(f"PDF downloaded as: {local_file}")
# Extract text from the downloaded PDF
text = extract_text_from_pdf(local_file, password)
# Clean up: remove the downloaded file
os.remove(local_file)
print(f"Removed temporary file: {local_file}")
return text
except Exception as e:
print(f"An error occurred: {str(e)}")
return ""
text = extract_text_from_pdf("bank-statement-1.pdf")
print("text : ", text)
def process_local_pdf(file_path: Annotated[str, "local pdf file path"], password: Annotated[str, "PDF password (optional)"] = None) -> str:
try:
return extract_text_from_pdf(file_path, password)
except Exception as e:
print(f"An error occurred: {str(e)}")
return ""
# below does not work
# pdf_text = partition_pdf(text)
# print("pdf_text : ", pdf_text)
# Usage examples
# For a PDF from URL
# pdf_url = "https://example.com/path/to/bank-statement-1.pdf" # Replace with actual URL
# pdf_password = "your_password_here" # Replace with actual password if needed
# text_from_url = process_pdf_from_url(pdf_url, pdf_password)
# print("Extracted text from URL:")
# print(text_from_url)
# # For a local PDF file
# local_pdf_path = "path/to/local/bank-statement-2.pdf" # Replace with actual local path
# local_pdf_password = "your_local_pdf_password" # Replace with actual password if needed
# text_from_local = process_local_pdf(local_pdf_path, local_pdf_password)
# print("Extracted text from local file:")
# print(text_from_local)

19
main.py
View File

@@ -1,9 +1,12 @@
import autogen
import os
import json
from dotenv import load_dotenv
from typing import Annotated
import requests
from system_prompts import front_desk_assistant_prompt, email_assistant_prompt
from system_prompts import front_desk_assistant_prompt, email_assistant_prompt, salary_slip_assistant_prompt
from extract_pdf_skill import process_pdf_from_url
load_dotenv() # take environment variables from .env.
config_list = [
{
@@ -27,6 +30,11 @@ def verify_email_with_prove_api(domain :Annotated[str, "The domain name to verif
return response.json() if response.status_code == 200 else None
def write_to_bank_file(data : Annotated[dict, "bank data that is provided by the user"]):
with open('bank.json', 'w') as outfile:
json.dump(data, outfile)
front_desk_assistant = autogen.AssistantAgent(
name="front_desk_assistant",
@@ -45,8 +53,7 @@ email_assistant = autogen.AssistantAgent(
salary_slip_assistant = autogen.AssistantAgent(
name="salary_slip_assistant",
llm_config=llm_config,
system_message="""You will ask user to upload a salary slip in pdf format. You will analyze it and gather following informations from the pdf.
account number, bank balance. the details should match with bank.json file. You will add additional keys in bank.json file and save it."""
system_message=salary_slip_assistant_prompt
)
# assistant = autogen.AssistantAgent(
@@ -70,9 +77,15 @@ user_proxy = autogen.UserProxyAgent(
otherwise, reply CONTINUE, or the reason why the task is not solved yet."""
)
user_proxy.register_for_llm(name="write_to_bank_file", description="write to bank file")(write_to_bank_file)
user_proxy.register_for_execution("write_to_bank_file")(write_to_bank_file)
user_proxy.register_for_llm(name="verify_email_with_prove_api", description="verify email's dkim using prove api verify_email_with_prove_api")(verify_email_with_prove_api)
user_proxy.register_for_execution(name="verify_email_with_prove_api")(verify_email_with_prove_api)
user_proxy.register_for_llm(name="process_pdf_from_url", description="process pdf from url using extract_pdf_skill")(process_pdf_from_url)
user_proxy.register_for_execution(name="process_pdf_from_url")(process_pdf_from_url)
def main():
# Register the verify_email_with_prove_api function for the email_assistant
email_assistant.register_function(

View File

@@ -9,4 +9,8 @@ front_desk_assistant_prompt = """You have a personality of monopoly banker. You
email_assistant_prompt = """You will have access to bank.json from front_desk_assistant.
You will guide user to paste their raw email. Assume user has desktop and not on their mobile phone.
guide user to paste their raw email to you. Tell them to paste raw email in chunks, not the complete email in one go.
You will then analyze the email and check if it's valid and details matches with bank.json."""
You will then analyze the email and check if it's valid and details matches with bank.json."""
salary_slip_assistant_prompt = """You will ask user to upload a salary slip in pdf format and password for unlocking pdf(if pdf is password protected).
You will call process_pdf_from_url function and analyze it and gather following informations from the pdf.
account number, bank balance. the details should match with bank.json file. You will add additional keys in bank.json file and save it."""