fixing a bug in nlp/utils.py (#590)

* fixing a bug for ner
2026-04-20 03:02:16 -04:00 · 2022-06-14 17:31:12 -04:00
parent 1111d6d43a
commit 79a24d06a9
1 changed files with 2 additions and 2 deletions
--- a/flaml/nlp/utils.py
+++ b/flaml/nlp/utils.py
@@ -84,10 +84,10 @@ def tokenize_and_align_labels(
    tokenized_inputs = tokenizer(
        [list(examples[X_sent_key])],
        padding="max_length"
-        if hf_args.pad_to_max_length
+        if hf_args and hf_args.pad_to_max_length
        else False,  # to be consistent with https://github.com/huggingface/transformers/blob/main/examples/pytorch/token-classification/run_ner.py#L394
        truncation=True,
-        max_length=hf_args.max_seq_length,
+        max_length=hf_args.max_seq_length if hf_args else None,
        # We use this argument because the texts in our dataset are lists of words (with a label for each word).
        is_split_into_words=True,
    )