mirror of
https://github.com/microsoft/autogen.git
synced 2026-04-20 03:02:16 -04:00
@@ -84,10 +84,10 @@ def tokenize_and_align_labels(
|
||||
tokenized_inputs = tokenizer(
|
||||
[list(examples[X_sent_key])],
|
||||
padding="max_length"
|
||||
if hf_args.pad_to_max_length
|
||||
if hf_args and hf_args.pad_to_max_length
|
||||
else False, # to be consistent with https://github.com/huggingface/transformers/blob/main/examples/pytorch/token-classification/run_ner.py#L394
|
||||
truncation=True,
|
||||
max_length=hf_args.max_seq_length,
|
||||
max_length=hf_args.max_seq_length if hf_args else None,
|
||||
# We use this argument because the texts in our dataset are lists of words (with a label for each word).
|
||||
is_split_into_words=True,
|
||||
)
|
||||
|
||||
Reference in New Issue
Block a user