mirror of
https://github.com/e-johnstonn/BriefGPT.git
synced 2026-01-09 13:57:58 -05:00
added support for epub
This commit is contained in:
@@ -20,7 +20,9 @@ load_dotenv('test.env')
|
||||
|
||||
model_type = os.getenv('MODEL_TYPE')
|
||||
model_path = os.getenv('MODEL_PATH')
|
||||
print(model_path)
|
||||
|
||||
|
||||
accepted_filetypes = ['.txt', '.pdf', '.epub']
|
||||
|
||||
#Model is initialized here. Configure it with your parameters and the path to your model.
|
||||
|
||||
@@ -44,7 +46,7 @@ def chat():
|
||||
st.session_state.text_input = ''
|
||||
directory = 'documents'
|
||||
files = os.listdir(directory)
|
||||
files = [file for file in files if file.endswith('.txt') or file.endswith('.pdf')]
|
||||
files = [file for file in files if file.endswith(tuple(accepted_filetypes))]
|
||||
selected_file = st.selectbox('Select a file', files)
|
||||
st.write('You selected: ' + selected_file)
|
||||
selected_file_path = os.path.join(directory, selected_file)
|
||||
|
||||
8
main.py
8
main.py
@@ -19,6 +19,8 @@ load_dotenv('test.env')
|
||||
|
||||
st.set_page_config(page_title='BriefGPT')
|
||||
|
||||
accepted_filetypes = ['.txt', '.pdf', '.epub']
|
||||
|
||||
def summarize():
|
||||
"""
|
||||
The main function for the Streamlit app.
|
||||
@@ -33,7 +35,7 @@ def summarize():
|
||||
if input_method == 'Document':
|
||||
directory = 'documents'
|
||||
files = os.listdir(directory)
|
||||
files = [file for file in files if file.endswith('.txt') or file.endswith('.pdf')]
|
||||
files = [file for file in files if file.endswith(tuple(accepted_filetypes))]
|
||||
if files:
|
||||
selected_file = st.selectbox('Select a file', files)
|
||||
st.write('You selected: ' + selected_file)
|
||||
@@ -67,7 +69,7 @@ def chat():
|
||||
st.session_state.text_input = ''
|
||||
directory = 'documents'
|
||||
files = os.listdir(directory)
|
||||
files = [file for file in files if file.endswith('.txt') or file.endswith('.pdf')]
|
||||
files = [file for file in files if file.endswith(tuple(accepted_filetypes))]
|
||||
selected_file = st.selectbox('Select a file', files)
|
||||
st.write('You selected: ' + selected_file)
|
||||
selected_file_path = os.path.join(directory, selected_file)
|
||||
@@ -96,7 +98,7 @@ def documents():
|
||||
st.markdown('Documents are stored in the documents folder in the project directory.')
|
||||
directory = 'documents'
|
||||
files = os.listdir(directory)
|
||||
files = [file for file in files if file.endswith('.txt') or file.endswith('.pdf')]
|
||||
files = [file for file in files if file.endswith(tuple(accepted_filetypes))]
|
||||
if files:
|
||||
files_df = pd.DataFrame(files, columns=['File Name'], index=range(1, len(files) + 1))
|
||||
st.dataframe(files_df, width=1000)
|
||||
|
||||
BIN
requirements.txt
BIN
requirements.txt
Binary file not shown.
@@ -6,7 +6,7 @@ import tiktoken
|
||||
from langchain import PromptTemplate
|
||||
from langchain.chains.summarize import load_summarize_chain
|
||||
from langchain.chat_models import ChatOpenAI
|
||||
from langchain.document_loaders import YoutubeLoader, TextLoader, PyPDFLoader
|
||||
from langchain.document_loaders import YoutubeLoader, TextLoader, PyPDFLoader, UnstructuredEPubLoader
|
||||
from langchain.embeddings import OpenAIEmbeddings
|
||||
from langchain.schema import Document
|
||||
|
||||
@@ -31,6 +31,12 @@ def doc_loader(file_path: str):
|
||||
loader = TextLoader(file_path, encoding='utf-8')
|
||||
elif file_path.endswith('.pdf'):
|
||||
loader = PyPDFLoader(file_path)
|
||||
elif file_path.endswith('.epub'):
|
||||
try:
|
||||
loader = UnstructuredEPubLoader(file_path)
|
||||
except Exception as e:
|
||||
st.warning('Error loading file - ensure you have pandoc installed and added to PATH.')
|
||||
|
||||
return loader.load()
|
||||
|
||||
|
||||
|
||||
Reference in New Issue
Block a user