fixing roberta add_prefix_space bug (#546)

* fixing roberta add_prefix_space bug
This commit is contained in:
Xueqing Liu
2022-05-12 10:57:25 -04:00
committed by GitHub
parent 2a8decdc50
commit 2ca9e41e4b

View File

@@ -542,7 +542,13 @@ class TransformersEstimator(BaseEstimator):
)
else:
return AutoTokenizer.from_pretrained(
self._training_args.model_path, use_fast=True
self._training_args.model_path,
use_fast=True,
add_prefix_space=True
if "roberta" in self._training_args.model_path
else False, # If roberta model, must set add_prefix_space to True to avoid the assertion error at
# https://github.com/huggingface/transformers/blob/main/src/transformers/models/roberta/tokenization_roberta_fast.py#L249
)
@property