mirror of
https://github.com/hackertron/LucidLoanMachine.git
synced 2026-01-09 04:28:03 -05:00
salary assistant wip
This commit is contained in:
@@ -1,17 +1,68 @@
|
||||
from unstructured.partition.pdf import partition_pdf
|
||||
import requests
|
||||
import os
|
||||
import PyPDF2
|
||||
from typing import Annotated
|
||||
from urllib.parse import urlparse
|
||||
|
||||
def extract_text_from_pdf(pdf_file):
|
||||
def download_pdf(url: Annotated[str, "the pdf file url"]) -> str:
|
||||
response = requests.get(url)
|
||||
if response.status_code != 200:
|
||||
raise Exception(f"Failed to download PDF. Status code: {response.status_code}")
|
||||
filename = os.path.basename(urlparse(url).path)
|
||||
with open(filename, 'wb') as f:
|
||||
f.write(response.content)
|
||||
return filename
|
||||
|
||||
def extract_text_from_pdf(pdf_file: Annotated[str, "the local pdf file path"], password: Annotated[str, "PDF password (optional)"] = None) -> str:
|
||||
with open(pdf_file, "rb") as file:
|
||||
reader = PyPDF2.PdfReader(file)
|
||||
text = ""
|
||||
for page in range(len(reader.pages)):
|
||||
text += reader.pages[page].extract_text()
|
||||
|
||||
if reader.is_encrypted:
|
||||
if password is None:
|
||||
raise ValueError("The PDF is encrypted and requires a password.")
|
||||
try:
|
||||
reader.decrypt(password)
|
||||
except:
|
||||
raise ValueError("Incorrect password for the PDF.")
|
||||
|
||||
return "".join(page.extract_text() for page in reader.pages)
|
||||
|
||||
def process_pdf_from_url(url: Annotated[str, "the pdf file url"], password: Annotated[str, "PDF password (optional)"] = None) -> str:
|
||||
try:
|
||||
# Download the PDF
|
||||
local_file = download_pdf(url)
|
||||
print(f"PDF downloaded as: {local_file}")
|
||||
|
||||
# Extract text from the downloaded PDF
|
||||
text = extract_text_from_pdf(local_file, password)
|
||||
|
||||
# Clean up: remove the downloaded file
|
||||
os.remove(local_file)
|
||||
print(f"Removed temporary file: {local_file}")
|
||||
|
||||
return text
|
||||
except Exception as e:
|
||||
print(f"An error occurred: {str(e)}")
|
||||
return ""
|
||||
|
||||
text = extract_text_from_pdf("bank-statement-1.pdf")
|
||||
print("text : ", text)
|
||||
def process_local_pdf(file_path: Annotated[str, "local pdf file path"], password: Annotated[str, "PDF password (optional)"] = None) -> str:
|
||||
try:
|
||||
return extract_text_from_pdf(file_path, password)
|
||||
except Exception as e:
|
||||
print(f"An error occurred: {str(e)}")
|
||||
return ""
|
||||
|
||||
# below does not work
|
||||
# pdf_text = partition_pdf(text)
|
||||
# print("pdf_text : ", pdf_text)
|
||||
# Usage examples
|
||||
# For a PDF from URL
|
||||
# pdf_url = "https://example.com/path/to/bank-statement-1.pdf" # Replace with actual URL
|
||||
# pdf_password = "your_password_here" # Replace with actual password if needed
|
||||
# text_from_url = process_pdf_from_url(pdf_url, pdf_password)
|
||||
# print("Extracted text from URL:")
|
||||
# print(text_from_url)
|
||||
|
||||
# # For a local PDF file
|
||||
# local_pdf_path = "path/to/local/bank-statement-2.pdf" # Replace with actual local path
|
||||
# local_pdf_password = "your_local_pdf_password" # Replace with actual password if needed
|
||||
# text_from_local = process_local_pdf(local_pdf_path, local_pdf_password)
|
||||
# print("Extracted text from local file:")
|
||||
# print(text_from_local)
|
||||
19
main.py
19
main.py
@@ -1,9 +1,12 @@
|
||||
import autogen
|
||||
import os
|
||||
import json
|
||||
from dotenv import load_dotenv
|
||||
from typing import Annotated
|
||||
import requests
|
||||
from system_prompts import front_desk_assistant_prompt, email_assistant_prompt
|
||||
from system_prompts import front_desk_assistant_prompt, email_assistant_prompt, salary_slip_assistant_prompt
|
||||
from extract_pdf_skill import process_pdf_from_url
|
||||
|
||||
load_dotenv() # take environment variables from .env.
|
||||
config_list = [
|
||||
{
|
||||
@@ -27,6 +30,11 @@ def verify_email_with_prove_api(domain :Annotated[str, "The domain name to verif
|
||||
return response.json() if response.status_code == 200 else None
|
||||
|
||||
|
||||
def write_to_bank_file(data : Annotated[dict, "bank data that is provided by the user"]):
|
||||
with open('bank.json', 'w') as outfile:
|
||||
json.dump(data, outfile)
|
||||
|
||||
|
||||
|
||||
front_desk_assistant = autogen.AssistantAgent(
|
||||
name="front_desk_assistant",
|
||||
@@ -45,8 +53,7 @@ email_assistant = autogen.AssistantAgent(
|
||||
salary_slip_assistant = autogen.AssistantAgent(
|
||||
name="salary_slip_assistant",
|
||||
llm_config=llm_config,
|
||||
system_message="""You will ask user to upload a salary slip in pdf format. You will analyze it and gather following informations from the pdf.
|
||||
account number, bank balance. the details should match with bank.json file. You will add additional keys in bank.json file and save it."""
|
||||
system_message=salary_slip_assistant_prompt
|
||||
)
|
||||
|
||||
# assistant = autogen.AssistantAgent(
|
||||
@@ -70,9 +77,15 @@ user_proxy = autogen.UserProxyAgent(
|
||||
otherwise, reply CONTINUE, or the reason why the task is not solved yet."""
|
||||
)
|
||||
|
||||
user_proxy.register_for_llm(name="write_to_bank_file", description="write to bank file")(write_to_bank_file)
|
||||
user_proxy.register_for_execution("write_to_bank_file")(write_to_bank_file)
|
||||
|
||||
user_proxy.register_for_llm(name="verify_email_with_prove_api", description="verify email's dkim using prove api verify_email_with_prove_api")(verify_email_with_prove_api)
|
||||
user_proxy.register_for_execution(name="verify_email_with_prove_api")(verify_email_with_prove_api)
|
||||
|
||||
user_proxy.register_for_llm(name="process_pdf_from_url", description="process pdf from url using extract_pdf_skill")(process_pdf_from_url)
|
||||
user_proxy.register_for_execution(name="process_pdf_from_url")(process_pdf_from_url)
|
||||
|
||||
def main():
|
||||
# Register the verify_email_with_prove_api function for the email_assistant
|
||||
email_assistant.register_function(
|
||||
|
||||
@@ -9,4 +9,8 @@ front_desk_assistant_prompt = """You have a personality of monopoly banker. You
|
||||
email_assistant_prompt = """You will have access to bank.json from front_desk_assistant.
|
||||
You will guide user to paste their raw email. Assume user has desktop and not on their mobile phone.
|
||||
guide user to paste their raw email to you. Tell them to paste raw email in chunks, not the complete email in one go.
|
||||
You will then analyze the email and check if it's valid and details matches with bank.json."""
|
||||
You will then analyze the email and check if it's valid and details matches with bank.json."""
|
||||
|
||||
salary_slip_assistant_prompt = """You will ask user to upload a salary slip in pdf format and password for unlocking pdf(if pdf is password protected).
|
||||
You will call process_pdf_from_url function and analyze it and gather following informations from the pdf.
|
||||
account number, bank balance. the details should match with bank.json file. You will add additional keys in bank.json file and save it."""
|
||||
Reference in New Issue
Block a user