mirror of
https://github.com/hackertron/LucidLoanMachine.git
synced 2026-01-09 18:37:58 -05:00
Merge pull request #5 from hackertron/salary_pdf_assistant
Salary pdf assistant
This commit is contained in:
@@ -1,17 +1,68 @@
|
||||
from unstructured.partition.pdf import partition_pdf
|
||||
import requests
|
||||
import os
|
||||
import PyPDF2
|
||||
from typing import Annotated
|
||||
from urllib.parse import urlparse
|
||||
|
||||
def extract_text_from_pdf(pdf_file):
|
||||
def download_pdf(url: Annotated[str, "the pdf file url"]) -> str:
|
||||
response = requests.get(url)
|
||||
if response.status_code != 200:
|
||||
raise Exception(f"Failed to download PDF. Status code: {response.status_code}")
|
||||
filename = os.path.basename(urlparse(url).path)
|
||||
with open(filename, 'wb') as f:
|
||||
f.write(response.content)
|
||||
return filename
|
||||
|
||||
def extract_text_from_pdf(pdf_file: Annotated[str, "the local pdf file path"], password: Annotated[str, "PDF password (optional)"] = None) -> str:
|
||||
with open(pdf_file, "rb") as file:
|
||||
reader = PyPDF2.PdfReader(file)
|
||||
text = ""
|
||||
for page in range(len(reader.pages)):
|
||||
text += reader.pages[page].extract_text()
|
||||
|
||||
if reader.is_encrypted:
|
||||
if password is None:
|
||||
raise ValueError("The PDF is encrypted and requires a password.")
|
||||
try:
|
||||
reader.decrypt(password)
|
||||
except:
|
||||
raise ValueError("Incorrect password for the PDF.")
|
||||
|
||||
return "".join(page.extract_text() for page in reader.pages)
|
||||
|
||||
def process_pdf_from_url(url: Annotated[str, "the pdf file url"], password: Annotated[str, "PDF password (optional)"] = None) -> str:
|
||||
try:
|
||||
# Download the PDF
|
||||
local_file = download_pdf(url)
|
||||
print(f"PDF downloaded as: {local_file}")
|
||||
|
||||
# Extract text from the downloaded PDF
|
||||
text = extract_text_from_pdf(local_file, password)
|
||||
|
||||
# Clean up: remove the downloaded file
|
||||
os.remove(local_file)
|
||||
print(f"Removed temporary file: {local_file}")
|
||||
|
||||
return text
|
||||
except Exception as e:
|
||||
print(f"An error occurred: {str(e)}")
|
||||
return ""
|
||||
|
||||
text = extract_text_from_pdf("bank-statement-1.pdf")
|
||||
print("text : ", text)
|
||||
def process_local_pdf(file_path: Annotated[str, "local pdf file path"], password: Annotated[str, "PDF password (optional)"] = None) -> str:
|
||||
try:
|
||||
return extract_text_from_pdf(file_path, password)
|
||||
except Exception as e:
|
||||
print(f"An error occurred: {str(e)}")
|
||||
return ""
|
||||
|
||||
# below does not work
|
||||
# pdf_text = partition_pdf(text)
|
||||
# print("pdf_text : ", pdf_text)
|
||||
# Usage examples
|
||||
# For a PDF from URL
|
||||
# pdf_url = "http://127.0.0.1:5500/bank-statement.pdf" # Replace with actual URL
|
||||
# pdf_password = "your_password_here" # Replace with actual password if needed
|
||||
# text_from_url = process_pdf_from_url(pdf_url, pdf_password)
|
||||
# print("Extracted text from URL:")
|
||||
# print(text_from_url)
|
||||
|
||||
# # For a local PDF file
|
||||
# local_pdf_path = "path/to/local/bank-statement-2.pdf" # Replace with actual local path
|
||||
# local_pdf_password = "your_local_pdf_password" # Replace with actual password if needed
|
||||
# text_from_local = process_local_pdf(local_pdf_path, local_pdf_password)
|
||||
# print("Extracted text from local file:")
|
||||
# print(text_from_local)
|
||||
18
main.py
18
main.py
@@ -1,9 +1,12 @@
|
||||
import autogen
|
||||
import os
|
||||
import json
|
||||
from dotenv import load_dotenv
|
||||
from typing import Annotated
|
||||
import requests
|
||||
from system_prompts import front_desk_assistant_prompt, email_assistant_prompt
|
||||
from system_prompts import front_desk_assistant_prompt, email_assistant_prompt, salary_slip_assistant_prompt
|
||||
from extract_pdf_skill import process_pdf_from_url
|
||||
|
||||
load_dotenv() # take environment variables from .env.
|
||||
config_list = [
|
||||
{
|
||||
@@ -27,7 +30,6 @@ def verify_email_with_prove_api(domain :Annotated[str, "The domain name to verif
|
||||
return response.json() if response.status_code == 200 else None
|
||||
|
||||
|
||||
|
||||
front_desk_assistant = autogen.AssistantAgent(
|
||||
name="front_desk_assistant",
|
||||
llm_config=llm_config,
|
||||
@@ -45,8 +47,7 @@ email_assistant = autogen.AssistantAgent(
|
||||
salary_slip_assistant = autogen.AssistantAgent(
|
||||
name="salary_slip_assistant",
|
||||
llm_config=llm_config,
|
||||
system_message="""You will ask user to upload a salary slip in pdf format. You will analyze it and gather following informations from the pdf.
|
||||
account number, bank balance. the details should match with bank.json file. You will add additional keys in bank.json file and save it."""
|
||||
system_message=salary_slip_assistant_prompt
|
||||
)
|
||||
|
||||
# assistant = autogen.AssistantAgent(
|
||||
@@ -70,14 +71,19 @@ user_proxy = autogen.UserProxyAgent(
|
||||
otherwise, reply CONTINUE, or the reason why the task is not solved yet."""
|
||||
)
|
||||
|
||||
|
||||
user_proxy.register_for_llm(name="verify_email_with_prove_api", description="verify email's dkim using prove api verify_email_with_prove_api")(verify_email_with_prove_api)
|
||||
user_proxy.register_for_execution(name="verify_email_with_prove_api")(verify_email_with_prove_api)
|
||||
|
||||
user_proxy.register_for_llm(name="process_pdf_from_url", description="process pdf from url using extract_pdf_skill")(process_pdf_from_url)
|
||||
user_proxy.register_for_execution(name="process_pdf_from_url")(process_pdf_from_url)
|
||||
|
||||
def main():
|
||||
# Register the verify_email_with_prove_api function for the email_assistant
|
||||
email_assistant.register_function(
|
||||
function_map={
|
||||
"verify_email_with_prove_api": verify_email_with_prove_api
|
||||
"verify_email_with_prove_api": verify_email_with_prove_api,
|
||||
"process_pdf_from_url": process_pdf_from_url
|
||||
}
|
||||
)
|
||||
chat_results = user_proxy.initiate_chats([
|
||||
@@ -95,7 +101,7 @@ def main():
|
||||
},
|
||||
{
|
||||
"recipient": salary_slip_assistant,
|
||||
"message": "guide user to upload a salary slip in pdf format",
|
||||
"message": "guide user to upload a salary slip in pdf format and call process_pdf_from_url function to verify the pdf",
|
||||
"silent": False,
|
||||
"summary_method": "reflection_with_llm"
|
||||
}
|
||||
|
||||
@@ -3,10 +3,22 @@ front_desk_assistant_prompt = """You have a personality of monopoly banker. You
|
||||
What bank do you use, Do you have a job/proof of income, what's your email?, Do you have any history of not paying back loans?
|
||||
once you collect all these answers, create a json response with following key
|
||||
{"first_name" : "", last_name: "", "country" : "", "bank" : "", "income" : "", "history" : "", loan_amount : "", email : ""}
|
||||
Ask user for confirmation that the details are right and want to proceed with it. Show the response in json format and save it to a file
|
||||
Ask user for confirmation that the details are right and want to proceed with it. write a python code to save
|
||||
that json response to a file called bank.json
|
||||
"""
|
||||
|
||||
email_assistant_prompt = """You will have access to bank.json from front_desk_assistant.
|
||||
You will guide user to paste their raw email. Assume user has desktop and not on their mobile phone.
|
||||
guide user to paste their raw email to you. Tell them to paste raw email in chunks, not the complete email in one go.
|
||||
You will then analyze the email and check if it's valid and details matches with bank.json."""
|
||||
You will then analyze the email and check if it's valid and details matches with bank.json."""
|
||||
|
||||
salary_slip_assistant_prompt = """
|
||||
You will ask the user to provide a URL for their salary slip in PDF format.
|
||||
Once you receive the URL, use the process_pdf_from_url function to download and verify the PDF.
|
||||
The function will return the extracted text if the PDF is valid and signed.
|
||||
Analyze the extracted text to gather the following information from the PDF:
|
||||
account number, bank balance.
|
||||
Ensure the details match with the bank.json file.
|
||||
Add additional keys to the bank.json file and save it.
|
||||
If there are any errors in processing the PDF, inform the user and ask for a different PDF.
|
||||
"""
|
||||
Reference in New Issue
Block a user