...

2026-01-08 05:23:55 -05:00 · 2025-12-30 15:48:49 -08:00
parent 1f1b5308b5
commit c6453a227c
4 changed files with 12 additions and 6 deletions
--- a/ebook2audiobook.egg-info/requires.txt
+++ b/ebook2audiobook.egg-info/requires.txt
@@ -8,8 +8,7 @@ ebooklib
 fastapi
 hf_xet
 beautifulsoup4
-sudachipy
-sudachidict-core
+nagisa
 pymupdf
 pymupdf-layout
 pytesseract
--- a/lib/core.py
+++ b/lib/core.py
@@ -1039,10 +1039,19 @@ def get_sentences(text:str, id:str)->list|None:
                        jieba.dt.cache_file = os.path.join(models_dir, 'jieba.cache')
                        result.extend([t for t in jieba.cut(segment) if t.strip()])
                    elif lang == 'jpn':
+                        """
                        from sudachipy import dictionary, tokenizer
                        sudachi = dictionary.Dictionary().create()
                        mode = tokenizer.Tokenizer.SplitMode.C
                        result.extend([m.surface() for m in sudachi.tokenize(segment, mode) if m.surface().strip()])
+                        """
+                        import nagisa
+                        tokens = nagisa.tagging(segment).words
+                        result.extend([
+                            token
+                            for token in tokens
+                            if token.strip()
+                        ])
                    elif lang == 'kor':
                        from soynlp.tokenizer import LTokenizer
                        ltokenizer = LTokenizer()
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -27,8 +27,7 @@ dependencies = [
 	"fastapi",
 	"hf_xet",
 	"beautifulsoup4",
-	"sudachipy",
-	"sudachidict-core",
+	"nagisa",
 	"pymupdf",
 	"pymupdf-layout",
 	"pytesseract",
--- a/requirements.txt
+++ b/requirements.txt
@@ -8,8 +8,7 @@ ebooklib
 fastapi
 hf_xet
 beautifulsoup4
-sudachipy
-sudachidict-core
+nagisa
 pymupdf
 pymupdf-layout
 pytesseract