mirror of
https://github.com/DrewThomasson/ebook2audiobook.git
synced 2026-01-08 21:38:12 -05:00
...
This commit is contained in:
@@ -8,8 +8,7 @@ ebooklib
|
|||||||
fastapi
|
fastapi
|
||||||
hf_xet
|
hf_xet
|
||||||
beautifulsoup4
|
beautifulsoup4
|
||||||
sudachipy
|
nagisa
|
||||||
sudachidict-core
|
|
||||||
pymupdf
|
pymupdf
|
||||||
pymupdf-layout
|
pymupdf-layout
|
||||||
pytesseract
|
pytesseract
|
||||||
|
|||||||
@@ -1039,10 +1039,19 @@ def get_sentences(text:str, id:str)->list|None:
|
|||||||
jieba.dt.cache_file = os.path.join(models_dir, 'jieba.cache')
|
jieba.dt.cache_file = os.path.join(models_dir, 'jieba.cache')
|
||||||
result.extend([t for t in jieba.cut(segment) if t.strip()])
|
result.extend([t for t in jieba.cut(segment) if t.strip()])
|
||||||
elif lang == 'jpn':
|
elif lang == 'jpn':
|
||||||
|
"""
|
||||||
from sudachipy import dictionary, tokenizer
|
from sudachipy import dictionary, tokenizer
|
||||||
sudachi = dictionary.Dictionary().create()
|
sudachi = dictionary.Dictionary().create()
|
||||||
mode = tokenizer.Tokenizer.SplitMode.C
|
mode = tokenizer.Tokenizer.SplitMode.C
|
||||||
result.extend([m.surface() for m in sudachi.tokenize(segment, mode) if m.surface().strip()])
|
result.extend([m.surface() for m in sudachi.tokenize(segment, mode) if m.surface().strip()])
|
||||||
|
"""
|
||||||
|
import nagisa
|
||||||
|
tokens = nagisa.tagging(segment).words
|
||||||
|
result.extend([
|
||||||
|
token
|
||||||
|
for token in tokens
|
||||||
|
if token.strip()
|
||||||
|
])
|
||||||
elif lang == 'kor':
|
elif lang == 'kor':
|
||||||
from soynlp.tokenizer import LTokenizer
|
from soynlp.tokenizer import LTokenizer
|
||||||
ltokenizer = LTokenizer()
|
ltokenizer = LTokenizer()
|
||||||
|
|||||||
@@ -27,8 +27,7 @@ dependencies = [
|
|||||||
"fastapi",
|
"fastapi",
|
||||||
"hf_xet",
|
"hf_xet",
|
||||||
"beautifulsoup4",
|
"beautifulsoup4",
|
||||||
"sudachipy",
|
"nagisa",
|
||||||
"sudachidict-core",
|
|
||||||
"pymupdf",
|
"pymupdf",
|
||||||
"pymupdf-layout",
|
"pymupdf-layout",
|
||||||
"pytesseract",
|
"pytesseract",
|
||||||
|
|||||||
@@ -8,8 +8,7 @@ ebooklib
|
|||||||
fastapi
|
fastapi
|
||||||
hf_xet
|
hf_xet
|
||||||
beautifulsoup4
|
beautifulsoup4
|
||||||
sudachipy
|
nagisa
|
||||||
sudachidict-core
|
|
||||||
pymupdf
|
pymupdf
|
||||||
pymupdf-layout
|
pymupdf-layout
|
||||||
pytesseract
|
pytesseract
|
||||||
|
|||||||
Reference in New Issue
Block a user