...

2026-01-10 14:28:15 -05:00 · 2025-12-23 13:18:36 -08:00
parent e3f9eb2a7c
commit e36ca7b0da
2 changed files with 47 additions and 49 deletions
--- a/lib/classes/tts_engines/xtts.py
+++ b/lib/classes/tts_engines/xtts.py
@@ -21,7 +21,7 @@ class XTTSv2(TTSUtils, TTSRegistry, name='xtts'):
            self.vtt_path = os.path.join(self.session['process_dir'],Path(self.session['final_name']).stem+'.vtt')
            using_gpu = self.session['device'] != devices['CPU']['proc']
            enough_vram = self.session['free_vram_gb'] > 4.0
-            seed = 123456
+            seed = 0
            #random.seed(seed)
            #np.random.seed(seed)
            torch.manual_seed(seed)
--- a/lib/core.py
+++ b/lib/core.py
@@ -511,7 +511,14 @@ def convert2epub(id:str)-> bool:
                error = f'Unsupported file format: {file_ext}'
                print(error)
                return False
-            if file_ext == '.pdf':
+            if file_ext == '.txt':
+                with open(file_input, 'r', encoding='utf-8') as f:
+                    text = f.read()
+                text = text.replace('\r\n', '\n')
+                text = text.replace('\n\n', '\n[[pause]]\n')
+                with open(tmp_txt_path, 'w', encoding='utf-8') as f:
+                    f.write(text)
+            elif file_ext == '.pdf':
                msg = 'File input is a PDF. flatten it in XHTML...'
                print(msg)
                doc = fitz.open(file_input)
@@ -774,6 +781,7 @@ YOU CAN IMPROVE IT OR ASK TO A TRAINING MODEL EXPERT.
        return error, None

 def filter_chapter(doc:EpubHtml, id:str, stanza_nlp:Pipeline, is_num2words_compat:bool)->list|None:
+
    def tuple_row(node:Any, last_text_char:str|None=None)->Generator[tuple[str, Any], None, None]|None:
        try:
            for child in node.children:
@@ -789,7 +797,6 @@ def filter_chapter(doc:EpubHtml, id:str, stanza_nlp:Pipeline, is_num2words_compa
                        if title:
                            yield ("heading", title)
                            last_text_char = title[-1] if title else last_text_char
-
                    elif name == "table":
                        yield ("table", child)
                    else:
@@ -850,52 +857,43 @@ def filter_chapter(doc:EpubHtml, id:str, stanza_nlp:Pipeline, is_num2words_compa
                error = 'No tuples_list from body created!'
                print(error)
                return None
-            if len(tuples_list) >= 2:
-                first_typ, first_payload = tuples_list[0]
-                second_typ, _ = tuples_list[1]
-                if second_typ in ("break", "pause"):
-                    if isinstance(first_payload, str):
-                        text = first_payload.rstrip()
-                        if text and text[-1].isalnum():
-                            tuples_list[0] = (first_typ, text + ".")
-            text_list = []
-            handled_tables = set()
-            prev_typ = None
-            for typ, payload in tuples_list:
-                if typ == "heading":
-                    text_list.append(payload.strip())
-                elif typ == "break":
-                    if prev_typ != 'break':
-                        text_list.append(TTS_SML['break'])
-                elif typ == 'pause':
-                    if prev_typ != 'pause':
-                        text_list.append(TTS_SML['pause'])
-                elif typ == "table":
-                    table = payload
-                    if table in handled_tables:
-                        prev_typ = typ
-                        continue
-                    handled_tables.add(table)
-                    rows = table.find_all("tr")
-                    if not rows:
-                        prev_typ = typ
-                        continue
-                    headers = [c.get_text(strip=True) for c in rows[0].find_all(["td", "th"])]
-                    for row in rows[1:]:
-                        cells = [c.get_text(strip=True).replace('\xa0', ' ') for c in row.find_all("td")]
-                        if not cells:
-                            continue
-                        if len(cells) == len(headers) and headers:
-                            line = " — ".join(f"{h}: {c}" for h, c in zip(headers, cells))
-                        else:
-                            line = " — ".join(cells)
-                        if line:
-                            text_list.append(line.strip())
-                else:
-                    text = payload.strip()
-                    if text:
-                        text_list.append(text)
-                prev_typ = typ
+			text_list = []
+			handled_tables = set()
+			prev_typ = None
+			for typ, payload in tuples_list:
+				if typ == "heading":
+					text_list.append(payload.strip())
+				elif typ in ("break", "pause"):
+					if text_list and text_list[-1] and text_list[-1][-1].isalnum():
+						text_list[-1] += "."
+					if prev_typ != typ:
+						text_list.append(TTS_SML[typ])
+				elif typ == "table":
+					table = payload
+					if table in handled_tables:
+						prev_typ = typ
+						continue
+					handled_tables.add(table)
+					rows = table.find_all("tr")
+					if not rows:
+						prev_typ = typ
+						continue
+					headers = [c.get_text(strip=True) for c in rows[0].find_all(["td", "th"])]
+					for row in rows[1:]:
+						cells = [c.get_text(strip=True).replace('\xa0', ' ') for c in row.find_all("td")]
+						if not cells:
+							continue
+						if len(cells) == len(headers) and headers:
+							line = " — ".join(f"{h}: {c}" for h, c in zip(headers, cells))
+						else:
+							line = " — ".join(cells)
+						if line:
+							text_list.append(line.strip())
+				else:
+					text = payload.strip()
+					if text:
+						text_list.append(text)
+				prev_typ = typ
            max_chars = int(language_mapping[lang]['max_chars'] / 2)
            clean_list = []
            i = 0