fixing a bug in nlp/utils.py (#590)

* fixing a bug for ner
This commit is contained in:
Xueqing Liu
2022-06-14 17:31:12 -04:00
committed by GitHub
parent 1111d6d43a
commit 79a24d06a9

View File

@@ -84,10 +84,10 @@ def tokenize_and_align_labels(
tokenized_inputs = tokenizer(
[list(examples[X_sent_key])],
padding="max_length"
if hf_args.pad_to_max_length
if hf_args and hf_args.pad_to_max_length
else False, # to be consistent with https://github.com/huggingface/transformers/blob/main/examples/pytorch/token-classification/run_ner.py#L394
truncation=True,
max_length=hf_args.max_seq_length,
max_length=hf_args.max_seq_length if hf_args else None,
# We use this argument because the texts in our dataset are lists of words (with a label for each word).
is_split_into_words=True,
)