added support for epub

This commit is contained in:
ethan
2023-05-16 14:16:35 -07:00
parent 4f5395d305
commit 51e06f9cfd
4 changed files with 16 additions and 6 deletions

View File

@@ -20,7 +20,9 @@ load_dotenv('test.env')
model_type = os.getenv('MODEL_TYPE')
model_path = os.getenv('MODEL_PATH')
print(model_path)
accepted_filetypes = ['.txt', '.pdf', '.epub']
#Model is initialized here. Configure it with your parameters and the path to your model.
@@ -44,7 +46,7 @@ def chat():
st.session_state.text_input = ''
directory = 'documents'
files = os.listdir(directory)
files = [file for file in files if file.endswith('.txt') or file.endswith('.pdf')]
files = [file for file in files if file.endswith(tuple(accepted_filetypes))]
selected_file = st.selectbox('Select a file', files)
st.write('You selected: ' + selected_file)
selected_file_path = os.path.join(directory, selected_file)

View File

@@ -19,6 +19,8 @@ load_dotenv('test.env')
st.set_page_config(page_title='BriefGPT')
accepted_filetypes = ['.txt', '.pdf', '.epub']
def summarize():
"""
The main function for the Streamlit app.
@@ -33,7 +35,7 @@ def summarize():
if input_method == 'Document':
directory = 'documents'
files = os.listdir(directory)
files = [file for file in files if file.endswith('.txt') or file.endswith('.pdf')]
files = [file for file in files if file.endswith(tuple(accepted_filetypes))]
if files:
selected_file = st.selectbox('Select a file', files)
st.write('You selected: ' + selected_file)
@@ -67,7 +69,7 @@ def chat():
st.session_state.text_input = ''
directory = 'documents'
files = os.listdir(directory)
files = [file for file in files if file.endswith('.txt') or file.endswith('.pdf')]
files = [file for file in files if file.endswith(tuple(accepted_filetypes))]
selected_file = st.selectbox('Select a file', files)
st.write('You selected: ' + selected_file)
selected_file_path = os.path.join(directory, selected_file)
@@ -96,7 +98,7 @@ def documents():
st.markdown('Documents are stored in the documents folder in the project directory.')
directory = 'documents'
files = os.listdir(directory)
files = [file for file in files if file.endswith('.txt') or file.endswith('.pdf')]
files = [file for file in files if file.endswith(tuple(accepted_filetypes))]
if files:
files_df = pd.DataFrame(files, columns=['File Name'], index=range(1, len(files) + 1))
st.dataframe(files_df, width=1000)

Binary file not shown.

View File

@@ -6,7 +6,7 @@ import tiktoken
from langchain import PromptTemplate
from langchain.chains.summarize import load_summarize_chain
from langchain.chat_models import ChatOpenAI
from langchain.document_loaders import YoutubeLoader, TextLoader, PyPDFLoader
from langchain.document_loaders import YoutubeLoader, TextLoader, PyPDFLoader, UnstructuredEPubLoader
from langchain.embeddings import OpenAIEmbeddings
from langchain.schema import Document
@@ -31,6 +31,12 @@ def doc_loader(file_path: str):
loader = TextLoader(file_path, encoding='utf-8')
elif file_path.endswith('.pdf'):
loader = PyPDFLoader(file_path)
elif file_path.endswith('.epub'):
try:
loader = UnstructuredEPubLoader(file_path)
except Exception as e:
st.warning('Error loading file - ensure you have pandoc installed and added to PATH.')
return loader.load()