Merge branch 'main' into LexiFlow

2026-02-17 12:22:45 -05:00 · 2022-10-14 11:04:18 -07:00
parent 2334a9c81d d3e0d1d852
commit cafb67123a
14 changed files with 248 additions and 119 deletions
--- a/test/nlp/test_autohf.py
+++ b/test/nlp/test_autohf.py
@@ -2,6 +2,8 @@ import sys
 import pytest
 import requests
 from utils import get_toy_data_seqclassification, get_automl_settings
+import os
+import shutil


@pytest.mark.skipif(sys.platform == "darwin", reason="do not run on mac os")
@@ -71,6 +73,9 @@ def test_hf_data():

    del automl

+    if os.path.exists("test/data/output/"):
+        shutil.rmtree("test/data/output/")
+

 if __name__ == "__main__":
    test_hf_data()
--- a/test/nlp/test_autohf_classificationhead.py
+++ b/test/nlp/test_autohf_classificationhead.py
@@ -1,14 +1,105 @@
-from utils import get_toy_data_multiclassclassification, get_automl_settings
+from utils import (
+    get_toy_data_regression,
+    get_toy_data_binclassification,
+    get_toy_data_multiclassclassification,
+    get_automl_settings,
+)
+import sys
+import pytest
+import os
+import shutil
+
+data_list = [
+    "get_toy_data_regression",
+    "get_toy_data_binclassification",
+    "get_toy_data_multiclassclassification",
+]
+model_path_list = [
+    "textattack/bert-base-uncased-STS-B",
+    "textattack/bert-base-uncased-SST-2",
+    "textattack/bert-base-uncased-MNLI",
+]


-def test_classification_head():
+def test_switch_1_1():
+    data_idx, model_path_idx = 0, 0
+    _test_switch_classificationhead(
+        data_list[data_idx], model_path_list[model_path_idx]
+    )
+
+
+def test_switch_1_2():
+    data_idx, model_path_idx = 0, 1
+    _test_switch_classificationhead(
+        data_list[data_idx], model_path_list[model_path_idx]
+    )
+
+
+def test_switch_1_3():
+    data_idx, model_path_idx = 0, 2
+    _test_switch_classificationhead(
+        data_list[data_idx], model_path_list[model_path_idx]
+    )
+
+
+def test_switch_2_1():
+    data_idx, model_path_idx = 1, 0
+    _test_switch_classificationhead(
+        data_list[data_idx], model_path_list[model_path_idx]
+    )
+
+
+def test_switch_2_2():
+    data_idx, model_path_idx = 1, 1
+    _test_switch_classificationhead(
+        data_list[data_idx], model_path_list[model_path_idx]
+    )
+
+
+def test_switch_2_3():
+    data_idx, model_path_idx = 1, 2
+    _test_switch_classificationhead(
+        data_list[data_idx], model_path_list[model_path_idx]
+    )
+
+
+def test_switch_3_1():
+    data_idx, model_path_idx = 2, 0
+    _test_switch_classificationhead(
+        data_list[data_idx], model_path_list[model_path_idx]
+    )
+
+
+def test_switch_3_2():
+    data_idx, model_path_idx = 2, 1
+    _test_switch_classificationhead(
+        data_list[data_idx], model_path_list[model_path_idx]
+    )
+
+
+def test_switch_3_3():
+    data_idx, model_path_idx = 2, 2
+    _test_switch_classificationhead(
+        data_list[data_idx], model_path_list[model_path_idx]
+    )
+
+
+def _test_switch_classificationhead(each_data, each_model_path):
    from flaml import AutoML
    import requests

-    X_train, y_train, X_val, y_val = get_toy_data_multiclassclassification()
    automl = AutoML()

+    X_train, y_train, X_val, y_val = globals()[each_data]()
    automl_settings = get_automl_settings()
+    automl_settings["model_path"] = each_model_path
+
+    if each_data == "get_toy_data_regression":
+        automl_settings["task"] = "seq-regression"
+        automl_settings["metric"] = "pearsonr"
+    else:
+        automl_settings["task"] = "seq-classification"
+        automl_settings["metric"] = "accuracy"

    try:
        automl.fit(
@@ -21,6 +112,9 @@ def test_classification_head():
    except requests.exceptions.HTTPError:
        return

+    if os.path.exists("test/data/output/"):
+        shutil.rmtree("test/data/output/")
+

 if __name__ == "__main__":
-    test_classification_head()
+    _test_switch_classificationhead(data_list[0], model_path_list[0])
--- a/test/nlp/test_autohf_custom_metric.py
+++ b/test/nlp/test_autohf_custom_metric.py
@@ -1,6 +1,8 @@
 import sys
 import pytest
 from utils import get_toy_data_seqclassification, get_automl_settings
+import os
+import shutil


 def custom_metric(
@@ -81,6 +83,9 @@ def test_custom_metric():

    del automl

+    if os.path.exists("test/data/output/"):
+        shutil.rmtree("test/data/output/")
+

 if __name__ == "__main__":
    test_custom_metric()
--- a/test/nlp/test_autohf_cv.py
+++ b/test/nlp/test_autohf_cv.py
@@ -1,6 +1,8 @@
 import sys
 import pytest
 from utils import get_toy_data_seqclassification, get_automl_settings
+import os
+import shutil


@pytest.mark.skipif(sys.platform == "darwin", reason="do not run on mac os")
@@ -19,6 +21,9 @@ def test_cv():
    except requests.exceptions.HTTPError:
        return

+    if os.path.exists("test/data/output/"):
+        shutil.rmtree("test/data/output/")
+

 if __name__ == "__main__":
    test_cv()
--- a/test/nlp/test_autohf_multichoice_classification.py
+++ b/test/nlp/test_autohf_multichoice_classification.py
@@ -1,6 +1,8 @@
 import sys
 import pytest
 from utils import get_toy_data_multiplechoiceclassification, get_automl_settings
+import os
+import shutil


@pytest.mark.skipif(sys.platform == "darwin", reason="do not run on mac os")
@@ -46,6 +48,9 @@ def test_mcc():
    accuracy = round(true_count / len(y_pred), 5)
    print("Accuracy: " + str(accuracy))

+    if os.path.exists("test/data/output/"):
+        shutil.rmtree("test/data/output/")
+

 if __name__ == "__main__":
    test_mcc()
--- a/test/nlp/test_autohf_regression.py
+++ b/test/nlp/test_autohf_regression.py
@@ -1,6 +1,8 @@
 import sys
 import pytest
 from utils import get_toy_data_seqregression, get_automl_settings
+import os
+import shutil


@pytest.mark.skipif(sys.platform == "darwin", reason="do not run on mac os")
@@ -32,6 +34,9 @@ def test_regression():
    )
    automl.predict(X_val)

+    if os.path.exists("test/data/output/"):
+        shutil.rmtree("test/data/output/")
+

 if __name__ == "__main__":
    test_regression()
--- a/test/nlp/test_autohf_summarization.py
+++ b/test/nlp/test_autohf_summarization.py
@@ -2,6 +2,8 @@ import sys
 import pytest
 import requests
 from utils import get_toy_data_summarization, get_automl_settings
+import os
+import shutil


@pytest.mark.skipif(
@@ -48,6 +50,9 @@ def test_summarization():
    )
    automl.predict(X_test)

+    if os.path.exists("test/data/output/"):
+        shutil.rmtree("test/data/output/")
+

 if __name__ == "__main__":
    test_summarization()
--- a/test/nlp/test_autohf_tokenclassification.py
+++ b/test/nlp/test_autohf_tokenclassification.py
@@ -1,6 +1,8 @@
 import sys
 import pytest
 import requests
+import os
+import shutil
 from utils import (
    get_toy_data_tokenclassification_idlabel,
    get_toy_data_tokenclassification_tokenlabel,
@@ -62,6 +64,9 @@ def test_tokenclassification_idlabel():
                if min_inter_result != sys.maxsize:
                    assert val_loss == min_inter_result

+    if os.path.exists("test/data/output/"):
+        shutil.rmtree("test/data/output/")
+

@pytest.mark.skipif(
    sys.platform == "darwin" or sys.version < "3.7",
@@ -106,6 +111,9 @@ def test_tokenclassification_tokenlabel():
                if min_inter_result != sys.maxsize:
                    assert val_loss == min_inter_result

+    if os.path.exists("test/data/output/"):
+        shutil.rmtree("test/data/output/")
+

 if __name__ == "__main__":
    test_tokenclassification_idlabel()
--- a/test/nlp/test_default.py
+++ b/test/nlp/test_default.py
@@ -1,6 +1,8 @@
 from utils import get_toy_data_seqclassification, get_automl_settings
 import sys
 from flaml.default import portfolio
+import os
+import shutil


 def pop_args(fit_kwargs):
@@ -80,6 +82,9 @@ def test_starting_point_not_in_search_space():
        == "albert-base-v2"
    )

+    if os.path.exists("test/data/output/"):
+        shutil.rmtree("test/data/output/")
+

 def test_points_to_evaluate():
    from flaml import AutoML
@@ -99,6 +104,9 @@ def test_points_to_evaluate():

    automl.fit(X_train, y_train, **automl_settings)

+    if os.path.exists("test/data/output/"):
+        shutil.rmtree("test/data/output/")
+

 # TODO: implement _test_zero_shot_model
 def test_zero_shot_nomodel():
@@ -131,6 +139,9 @@ def test_zero_shot_nomodel():
    pop_args(fit_kwargs)
    model.fit(X_train, y_train, **fit_kwargs)

+    if os.path.exists("test/data/output/"):
+        shutil.rmtree("test/data/output/")
+

 def test_build_error_portfolio(path="./test/nlp/default", strategy="greedy"):
    import os
@@ -159,3 +170,9 @@ def test_build_error_portfolio(path="./test/nlp/default", strategy="greedy"):
        )
    except ValueError:
        print("Feature not implemented")
+
+    import os
+    import shutil
+
+    if os.path.exists("test/data/output/"):
+        shutil.rmtree("test/data/output/")
--- a/test/nlp/utils.py
+++ b/test/nlp/utils.py
@@ -70,23 +70,19 @@ def get_toy_data_seqclassification():
    return X_train, y_train, X_val, y_val, X_test


-def get_toy_data_multiclassclassification():
+def get_toy_data_binclassification():
    train_data = {
        "text": [
            "i didnt feel humiliated",
            "i can go from feeling so hopeless to so damned hopeful just from being around someone who cares and is awake",
-            "im grabbing a minute to post i feel greedy wrong",
            "i am ever feeling nostalgic about the fireplace i will know that it is still on the property",
-            "i am feeling grouchy",
            "ive been feeling a little burdened lately wasnt sure why that was",
-            "ive been taking or milligrams or times recommended amount and ive fallen asleep a lot faster but i also feel like so funny",
-            "i feel as confused about life as a teenager or as jaded as a year old man",
            "i have been with petronas for years i feel that petronas has performed well and made a huge profit",
            "i feel romantic too",
            "i feel like i have to make the suffering i m seeing mean something",
            "i do feel that running is a divine experience and that i can expect to have some type of spiritual encounter",
        ],
-        "label": [0, 0, 3, 2, 3, 0, 5, 4, 1, 2, 0, 1],
+        "label": [0, 0, 1, 0, 1, 1, 0, 1],
    }
    train_dataset = pd.DataFrame(train_data)

@@ -95,9 +91,84 @@ def get_toy_data_multiclassclassification():
            "i think it s the easiest time of year to feel dissatisfied",
            "i feel low energy i m just thirsty",
            "i have immense sympathy with the general point but as a possible proto writer trying to find time to write in the corners of life and with no sign of an agent let alone a publishing contract this feels a little precious",
-            "i do not feel reassured anxiety is on each side",
        ],
-        "label": [3, 0, 1, 1],
+        "label": [0, 1, 1],
+    }
+    dev_dataset = pd.DataFrame(dev_data)
+
+    custom_sent_keys = ["text"]
+    label_key = "label"
+
+    X_train = train_dataset[custom_sent_keys]
+    y_train = train_dataset[label_key]
+
+    X_val = dev_dataset[custom_sent_keys]
+    y_val = dev_dataset[label_key]
+
+    return X_train, y_train, X_val, y_val
+
+
+def get_toy_data_regression():
+    train_data = {
+        "text": [
+            "i didnt feel humiliated",
+            "i can go from feeling so hopeless to so damned hopeful just from being around someone who cares and is awake",
+            "i am ever feeling nostalgic about the fireplace i will know that it is still on the property",
+            "ive been feeling a little burdened lately wasnt sure why that was",
+            "i have been with petronas for years i feel that petronas has performed well and made a huge profit",
+            "i feel romantic too",
+            "i feel like i have to make the suffering i m seeing mean something",
+            "i do feel that running is a divine experience and that i can expect to have some type of spiritual encounter",
+        ],
+        "label": [1.0, 1.0, 3.0, 1.0, 5.0, 5.0, 1.0, 3.0],
+    }
+    train_dataset = pd.DataFrame(train_data)
+
+    dev_data = {
+        "text": [
+            "i think it s the easiest time of year to feel dissatisfied",
+            "i feel low energy i m just thirsty",
+            "i have immense sympathy with the general point but as a possible proto writer trying to find time to write in the corners of life and with no sign of an agent let alone a publishing contract this feels a little precious",
+        ],
+        "label": [1.0, 3.0, 3.0],
+    }
+    dev_dataset = pd.DataFrame(dev_data)
+
+    custom_sent_keys = ["text"]
+    label_key = "label"
+
+    X_train = train_dataset[custom_sent_keys]
+    y_train = train_dataset[label_key]
+
+    X_val = dev_dataset[custom_sent_keys]
+    y_val = dev_dataset[label_key]
+
+    return X_train, y_train, X_val, y_val
+
+
+def get_toy_data_multiclassclassification():
+    train_data = {
+        "text": [
+            "i didnt feel humiliated",
+            "i can go from feeling so hopeless to so damned hopeful just from being around someone who cares and is awake",
+            "i am ever feeling nostalgic about the fireplace i will know that it is still on the property",
+            "ive been feeling a little burdened lately wasnt sure why that was",
+            "i have been with petronas for years i feel that petronas has performed well and made a huge profit",
+            "i feel romantic too",
+            "i feel like i have to make the suffering i m seeing mean something",
+            "i do feel that running is a divine experience and that i can expect to have some type of spiritual encounter",
+        ],
+        "label": [0, 0, 2, 0, 1, 2, 0, 1],
+    }
+    train_dataset = pd.DataFrame(train_data)
+
+    dev_data = {
+        "text": [
+            "i think it s the easiest time of year to feel dissatisfied",
+            "i feel low energy i m just thirsty",
+            "i have immense sympathy with the general point but as a possible proto writer trying to find time to write in the corners of life and with no sign of an agent let alone a publishing contract this feels a little precious",
+        ],
+        "label": [0, 1, 1],
    }
    dev_dataset = pd.DataFrame(dev_data)