added support for epub

2026-01-09 13:57:58 -05:00 · 2023-05-16 14:16:35 -07:00
parent 4f5395d305
commit 51e06f9cfd
4 changed files with 16 additions and 6 deletions
--- a/local_app.py
+++ b/local_app.py
@@ -20,7 +20,9 @@ load_dotenv('test.env')

 model_type = os.getenv('MODEL_TYPE')
 model_path = os.getenv('MODEL_PATH')
-print(model_path)
+
+
+accepted_filetypes = ['.txt', '.pdf', '.epub']

 #Model is initialized here. Configure it with your parameters and the path to your model.

@@ -44,7 +46,7 @@ def chat():
        st.session_state.text_input = ''
    directory = 'documents'
    files = os.listdir(directory)
-    files = [file for file in files if file.endswith('.txt') or file.endswith('.pdf')]
+    files = [file for file in files if file.endswith(tuple(accepted_filetypes))]
    selected_file = st.selectbox('Select a file', files)
    st.write('You selected: ' + selected_file)
    selected_file_path = os.path.join(directory, selected_file)
--- a/main.py
+++ b/main.py
@@ -19,6 +19,8 @@ load_dotenv('test.env')

 st.set_page_config(page_title='BriefGPT')

+accepted_filetypes = ['.txt', '.pdf', '.epub']
+
 def summarize():
    """
    The main function for the Streamlit app.
@@ -33,7 +35,7 @@ def summarize():
    if input_method == 'Document':
        directory = 'documents'
        files = os.listdir(directory)
-        files = [file for file in files if file.endswith('.txt') or file.endswith('.pdf')]
+        files = [file for file in files if file.endswith(tuple(accepted_filetypes))]
        if files:
            selected_file = st.selectbox('Select a file', files)
            st.write('You selected: ' + selected_file)
@@ -67,7 +69,7 @@ def chat():
        st.session_state.text_input = ''
    directory = 'documents'
    files = os.listdir(directory)
-    files = [file for file in files if file.endswith('.txt') or file.endswith('.pdf')]
+    files = [file for file in files if file.endswith(tuple(accepted_filetypes))]
    selected_file = st.selectbox('Select a file', files)
    st.write('You selected: ' + selected_file)
    selected_file_path = os.path.join(directory, selected_file)
@@ -96,7 +98,7 @@ def documents():
    st.markdown('Documents are stored in the documents folder in the project directory.')
    directory = 'documents'
    files = os.listdir(directory)
-    files = [file for file in files if file.endswith('.txt') or file.endswith('.pdf')]
+    files = [file for file in files if file.endswith(tuple(accepted_filetypes))]
    if files:
        files_df = pd.DataFrame(files, columns=['File Name'], index=range(1, len(files) + 1))
        st.dataframe(files_df, width=1000)
--- a/requirements.txt
+++ b/requirements.txt
--- a/summary_utils.py
+++ b/summary_utils.py
@@ -6,7 +6,7 @@ import tiktoken
 from langchain import PromptTemplate
 from langchain.chains.summarize import load_summarize_chain
 from langchain.chat_models import ChatOpenAI
-from langchain.document_loaders import YoutubeLoader, TextLoader, PyPDFLoader
+from langchain.document_loaders import YoutubeLoader, TextLoader, PyPDFLoader, UnstructuredEPubLoader
 from langchain.embeddings import OpenAIEmbeddings
 from langchain.schema import Document

@@ -31,6 +31,12 @@ def doc_loader(file_path: str):
        loader = TextLoader(file_path, encoding='utf-8')
    elif file_path.endswith('.pdf'):
        loader = PyPDFLoader(file_path)
+    elif file_path.endswith('.epub'):
+        try:
+            loader = UnstructuredEPubLoader(file_path)
+        except Exception as e:
+            st.warning('Error loading file - ensure you have pandoc installed and added to PATH.')
+
    return loader.load()