This commit is contained in:
Jong Wook Kim
2023-05-05 01:40:00 -07:00
committed by GitHub
parent 248b6cb124
commit f53a161d55

View File

@@ -215,6 +215,8 @@ def find_alignment(
words, word_tokens = tokenizer.split_to_word_tokens(text_tokens + [tokenizer.eot])
word_boundaries = np.pad(np.cumsum([len(t) for t in word_tokens[:-1]]), (1, 0))
if len(word_boundaries) <= 1:
return []
jumps = np.pad(np.diff(text_indices), (1, 0), constant_values=1).astype(bool)
jump_times = time_indices[jumps] / TOKENS_PER_SECOND