mirror of
https://github.com/hackertron/LucidLoanMachine.git
synced 2026-01-09 20:47:57 -05:00
salary assistant wip
This commit is contained in:
@@ -1,17 +1,68 @@
|
|||||||
from unstructured.partition.pdf import partition_pdf
|
import requests
|
||||||
|
import os
|
||||||
import PyPDF2
|
import PyPDF2
|
||||||
|
from typing import Annotated
|
||||||
|
from urllib.parse import urlparse
|
||||||
|
|
||||||
def extract_text_from_pdf(pdf_file):
|
def download_pdf(url: Annotated[str, "the pdf file url"]) -> str:
|
||||||
|
response = requests.get(url)
|
||||||
|
if response.status_code != 200:
|
||||||
|
raise Exception(f"Failed to download PDF. Status code: {response.status_code}")
|
||||||
|
filename = os.path.basename(urlparse(url).path)
|
||||||
|
with open(filename, 'wb') as f:
|
||||||
|
f.write(response.content)
|
||||||
|
return filename
|
||||||
|
|
||||||
|
def extract_text_from_pdf(pdf_file: Annotated[str, "the local pdf file path"], password: Annotated[str, "PDF password (optional)"] = None) -> str:
|
||||||
with open(pdf_file, "rb") as file:
|
with open(pdf_file, "rb") as file:
|
||||||
reader = PyPDF2.PdfReader(file)
|
reader = PyPDF2.PdfReader(file)
|
||||||
text = ""
|
|
||||||
for page in range(len(reader.pages)):
|
if reader.is_encrypted:
|
||||||
text += reader.pages[page].extract_text()
|
if password is None:
|
||||||
|
raise ValueError("The PDF is encrypted and requires a password.")
|
||||||
|
try:
|
||||||
|
reader.decrypt(password)
|
||||||
|
except:
|
||||||
|
raise ValueError("Incorrect password for the PDF.")
|
||||||
|
|
||||||
|
return "".join(page.extract_text() for page in reader.pages)
|
||||||
|
|
||||||
|
def process_pdf_from_url(url: Annotated[str, "the pdf file url"], password: Annotated[str, "PDF password (optional)"] = None) -> str:
|
||||||
|
try:
|
||||||
|
# Download the PDF
|
||||||
|
local_file = download_pdf(url)
|
||||||
|
print(f"PDF downloaded as: {local_file}")
|
||||||
|
|
||||||
|
# Extract text from the downloaded PDF
|
||||||
|
text = extract_text_from_pdf(local_file, password)
|
||||||
|
|
||||||
|
# Clean up: remove the downloaded file
|
||||||
|
os.remove(local_file)
|
||||||
|
print(f"Removed temporary file: {local_file}")
|
||||||
|
|
||||||
return text
|
return text
|
||||||
|
except Exception as e:
|
||||||
|
print(f"An error occurred: {str(e)}")
|
||||||
|
return ""
|
||||||
|
|
||||||
text = extract_text_from_pdf("bank-statement-1.pdf")
|
def process_local_pdf(file_path: Annotated[str, "local pdf file path"], password: Annotated[str, "PDF password (optional)"] = None) -> str:
|
||||||
print("text : ", text)
|
try:
|
||||||
|
return extract_text_from_pdf(file_path, password)
|
||||||
|
except Exception as e:
|
||||||
|
print(f"An error occurred: {str(e)}")
|
||||||
|
return ""
|
||||||
|
|
||||||
# below does not work
|
# Usage examples
|
||||||
# pdf_text = partition_pdf(text)
|
# For a PDF from URL
|
||||||
# print("pdf_text : ", pdf_text)
|
# pdf_url = "https://example.com/path/to/bank-statement-1.pdf" # Replace with actual URL
|
||||||
|
# pdf_password = "your_password_here" # Replace with actual password if needed
|
||||||
|
# text_from_url = process_pdf_from_url(pdf_url, pdf_password)
|
||||||
|
# print("Extracted text from URL:")
|
||||||
|
# print(text_from_url)
|
||||||
|
|
||||||
|
# # For a local PDF file
|
||||||
|
# local_pdf_path = "path/to/local/bank-statement-2.pdf" # Replace with actual local path
|
||||||
|
# local_pdf_password = "your_local_pdf_password" # Replace with actual password if needed
|
||||||
|
# text_from_local = process_local_pdf(local_pdf_path, local_pdf_password)
|
||||||
|
# print("Extracted text from local file:")
|
||||||
|
# print(text_from_local)
|
||||||
19
main.py
19
main.py
@@ -1,9 +1,12 @@
|
|||||||
import autogen
|
import autogen
|
||||||
import os
|
import os
|
||||||
|
import json
|
||||||
from dotenv import load_dotenv
|
from dotenv import load_dotenv
|
||||||
from typing import Annotated
|
from typing import Annotated
|
||||||
import requests
|
import requests
|
||||||
from system_prompts import front_desk_assistant_prompt, email_assistant_prompt
|
from system_prompts import front_desk_assistant_prompt, email_assistant_prompt, salary_slip_assistant_prompt
|
||||||
|
from extract_pdf_skill import process_pdf_from_url
|
||||||
|
|
||||||
load_dotenv() # take environment variables from .env.
|
load_dotenv() # take environment variables from .env.
|
||||||
config_list = [
|
config_list = [
|
||||||
{
|
{
|
||||||
@@ -27,6 +30,11 @@ def verify_email_with_prove_api(domain :Annotated[str, "The domain name to verif
|
|||||||
return response.json() if response.status_code == 200 else None
|
return response.json() if response.status_code == 200 else None
|
||||||
|
|
||||||
|
|
||||||
|
def write_to_bank_file(data : Annotated[dict, "bank data that is provided by the user"]):
|
||||||
|
with open('bank.json', 'w') as outfile:
|
||||||
|
json.dump(data, outfile)
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
front_desk_assistant = autogen.AssistantAgent(
|
front_desk_assistant = autogen.AssistantAgent(
|
||||||
name="front_desk_assistant",
|
name="front_desk_assistant",
|
||||||
@@ -45,8 +53,7 @@ email_assistant = autogen.AssistantAgent(
|
|||||||
salary_slip_assistant = autogen.AssistantAgent(
|
salary_slip_assistant = autogen.AssistantAgent(
|
||||||
name="salary_slip_assistant",
|
name="salary_slip_assistant",
|
||||||
llm_config=llm_config,
|
llm_config=llm_config,
|
||||||
system_message="""You will ask user to upload a salary slip in pdf format. You will analyze it and gather following informations from the pdf.
|
system_message=salary_slip_assistant_prompt
|
||||||
account number, bank balance. the details should match with bank.json file. You will add additional keys in bank.json file and save it."""
|
|
||||||
)
|
)
|
||||||
|
|
||||||
# assistant = autogen.AssistantAgent(
|
# assistant = autogen.AssistantAgent(
|
||||||
@@ -70,9 +77,15 @@ user_proxy = autogen.UserProxyAgent(
|
|||||||
otherwise, reply CONTINUE, or the reason why the task is not solved yet."""
|
otherwise, reply CONTINUE, or the reason why the task is not solved yet."""
|
||||||
)
|
)
|
||||||
|
|
||||||
|
user_proxy.register_for_llm(name="write_to_bank_file", description="write to bank file")(write_to_bank_file)
|
||||||
|
user_proxy.register_for_execution("write_to_bank_file")(write_to_bank_file)
|
||||||
|
|
||||||
user_proxy.register_for_llm(name="verify_email_with_prove_api", description="verify email's dkim using prove api verify_email_with_prove_api")(verify_email_with_prove_api)
|
user_proxy.register_for_llm(name="verify_email_with_prove_api", description="verify email's dkim using prove api verify_email_with_prove_api")(verify_email_with_prove_api)
|
||||||
user_proxy.register_for_execution(name="verify_email_with_prove_api")(verify_email_with_prove_api)
|
user_proxy.register_for_execution(name="verify_email_with_prove_api")(verify_email_with_prove_api)
|
||||||
|
|
||||||
|
user_proxy.register_for_llm(name="process_pdf_from_url", description="process pdf from url using extract_pdf_skill")(process_pdf_from_url)
|
||||||
|
user_proxy.register_for_execution(name="process_pdf_from_url")(process_pdf_from_url)
|
||||||
|
|
||||||
def main():
|
def main():
|
||||||
# Register the verify_email_with_prove_api function for the email_assistant
|
# Register the verify_email_with_prove_api function for the email_assistant
|
||||||
email_assistant.register_function(
|
email_assistant.register_function(
|
||||||
|
|||||||
@@ -9,4 +9,8 @@ front_desk_assistant_prompt = """You have a personality of monopoly banker. You
|
|||||||
email_assistant_prompt = """You will have access to bank.json from front_desk_assistant.
|
email_assistant_prompt = """You will have access to bank.json from front_desk_assistant.
|
||||||
You will guide user to paste their raw email. Assume user has desktop and not on their mobile phone.
|
You will guide user to paste their raw email. Assume user has desktop and not on their mobile phone.
|
||||||
guide user to paste their raw email to you. Tell them to paste raw email in chunks, not the complete email in one go.
|
guide user to paste their raw email to you. Tell them to paste raw email in chunks, not the complete email in one go.
|
||||||
You will then analyze the email and check if it's valid and details matches with bank.json."""
|
You will then analyze the email and check if it's valid and details matches with bank.json."""
|
||||||
|
|
||||||
|
salary_slip_assistant_prompt = """You will ask user to upload a salary slip in pdf format and password for unlocking pdf(if pdf is password protected).
|
||||||
|
You will call process_pdf_from_url function and analyze it and gather following informations from the pdf.
|
||||||
|
account number, bank balance. the details should match with bank.json file. You will add additional keys in bank.json file and save it."""
|
||||||
Reference in New Issue
Block a user