Merge branch 'main' into LexiFlow

This commit is contained in:
Chi Wang
2022-10-14 11:04:18 -07:00
committed by GitHub
14 changed files with 248 additions and 119 deletions

View File

@@ -2,6 +2,8 @@ import sys
import pytest
import requests
from utils import get_toy_data_seqclassification, get_automl_settings
import os
import shutil
@pytest.mark.skipif(sys.platform == "darwin", reason="do not run on mac os")
@@ -71,6 +73,9 @@ def test_hf_data():
del automl
if os.path.exists("test/data/output/"):
shutil.rmtree("test/data/output/")
if __name__ == "__main__":
test_hf_data()

View File

@@ -1,14 +1,105 @@
from utils import get_toy_data_multiclassclassification, get_automl_settings
from utils import (
get_toy_data_regression,
get_toy_data_binclassification,
get_toy_data_multiclassclassification,
get_automl_settings,
)
import sys
import pytest
import os
import shutil
data_list = [
"get_toy_data_regression",
"get_toy_data_binclassification",
"get_toy_data_multiclassclassification",
]
model_path_list = [
"textattack/bert-base-uncased-STS-B",
"textattack/bert-base-uncased-SST-2",
"textattack/bert-base-uncased-MNLI",
]
def test_classification_head():
def test_switch_1_1():
data_idx, model_path_idx = 0, 0
_test_switch_classificationhead(
data_list[data_idx], model_path_list[model_path_idx]
)
def test_switch_1_2():
data_idx, model_path_idx = 0, 1
_test_switch_classificationhead(
data_list[data_idx], model_path_list[model_path_idx]
)
def test_switch_1_3():
data_idx, model_path_idx = 0, 2
_test_switch_classificationhead(
data_list[data_idx], model_path_list[model_path_idx]
)
def test_switch_2_1():
data_idx, model_path_idx = 1, 0
_test_switch_classificationhead(
data_list[data_idx], model_path_list[model_path_idx]
)
def test_switch_2_2():
data_idx, model_path_idx = 1, 1
_test_switch_classificationhead(
data_list[data_idx], model_path_list[model_path_idx]
)
def test_switch_2_3():
data_idx, model_path_idx = 1, 2
_test_switch_classificationhead(
data_list[data_idx], model_path_list[model_path_idx]
)
def test_switch_3_1():
data_idx, model_path_idx = 2, 0
_test_switch_classificationhead(
data_list[data_idx], model_path_list[model_path_idx]
)
def test_switch_3_2():
data_idx, model_path_idx = 2, 1
_test_switch_classificationhead(
data_list[data_idx], model_path_list[model_path_idx]
)
def test_switch_3_3():
data_idx, model_path_idx = 2, 2
_test_switch_classificationhead(
data_list[data_idx], model_path_list[model_path_idx]
)
def _test_switch_classificationhead(each_data, each_model_path):
from flaml import AutoML
import requests
X_train, y_train, X_val, y_val = get_toy_data_multiclassclassification()
automl = AutoML()
X_train, y_train, X_val, y_val = globals()[each_data]()
automl_settings = get_automl_settings()
automl_settings["model_path"] = each_model_path
if each_data == "get_toy_data_regression":
automl_settings["task"] = "seq-regression"
automl_settings["metric"] = "pearsonr"
else:
automl_settings["task"] = "seq-classification"
automl_settings["metric"] = "accuracy"
try:
automl.fit(
@@ -21,6 +112,9 @@ def test_classification_head():
except requests.exceptions.HTTPError:
return
if os.path.exists("test/data/output/"):
shutil.rmtree("test/data/output/")
if __name__ == "__main__":
test_classification_head()
_test_switch_classificationhead(data_list[0], model_path_list[0])

View File

@@ -1,6 +1,8 @@
import sys
import pytest
from utils import get_toy_data_seqclassification, get_automl_settings
import os
import shutil
def custom_metric(
@@ -81,6 +83,9 @@ def test_custom_metric():
del automl
if os.path.exists("test/data/output/"):
shutil.rmtree("test/data/output/")
if __name__ == "__main__":
test_custom_metric()

View File

@@ -1,6 +1,8 @@
import sys
import pytest
from utils import get_toy_data_seqclassification, get_automl_settings
import os
import shutil
@pytest.mark.skipif(sys.platform == "darwin", reason="do not run on mac os")
@@ -19,6 +21,9 @@ def test_cv():
except requests.exceptions.HTTPError:
return
if os.path.exists("test/data/output/"):
shutil.rmtree("test/data/output/")
if __name__ == "__main__":
test_cv()

View File

@@ -1,6 +1,8 @@
import sys
import pytest
from utils import get_toy_data_multiplechoiceclassification, get_automl_settings
import os
import shutil
@pytest.mark.skipif(sys.platform == "darwin", reason="do not run on mac os")
@@ -46,6 +48,9 @@ def test_mcc():
accuracy = round(true_count / len(y_pred), 5)
print("Accuracy: " + str(accuracy))
if os.path.exists("test/data/output/"):
shutil.rmtree("test/data/output/")
if __name__ == "__main__":
test_mcc()

View File

@@ -1,6 +1,8 @@
import sys
import pytest
from utils import get_toy_data_seqregression, get_automl_settings
import os
import shutil
@pytest.mark.skipif(sys.platform == "darwin", reason="do not run on mac os")
@@ -32,6 +34,9 @@ def test_regression():
)
automl.predict(X_val)
if os.path.exists("test/data/output/"):
shutil.rmtree("test/data/output/")
if __name__ == "__main__":
test_regression()

View File

@@ -2,6 +2,8 @@ import sys
import pytest
import requests
from utils import get_toy_data_summarization, get_automl_settings
import os
import shutil
@pytest.mark.skipif(
@@ -48,6 +50,9 @@ def test_summarization():
)
automl.predict(X_test)
if os.path.exists("test/data/output/"):
shutil.rmtree("test/data/output/")
if __name__ == "__main__":
test_summarization()

View File

@@ -1,6 +1,8 @@
import sys
import pytest
import requests
import os
import shutil
from utils import (
get_toy_data_tokenclassification_idlabel,
get_toy_data_tokenclassification_tokenlabel,
@@ -62,6 +64,9 @@ def test_tokenclassification_idlabel():
if min_inter_result != sys.maxsize:
assert val_loss == min_inter_result
if os.path.exists("test/data/output/"):
shutil.rmtree("test/data/output/")
@pytest.mark.skipif(
sys.platform == "darwin" or sys.version < "3.7",
@@ -106,6 +111,9 @@ def test_tokenclassification_tokenlabel():
if min_inter_result != sys.maxsize:
assert val_loss == min_inter_result
if os.path.exists("test/data/output/"):
shutil.rmtree("test/data/output/")
if __name__ == "__main__":
test_tokenclassification_idlabel()

View File

@@ -1,6 +1,8 @@
from utils import get_toy_data_seqclassification, get_automl_settings
import sys
from flaml.default import portfolio
import os
import shutil
def pop_args(fit_kwargs):
@@ -80,6 +82,9 @@ def test_starting_point_not_in_search_space():
== "albert-base-v2"
)
if os.path.exists("test/data/output/"):
shutil.rmtree("test/data/output/")
def test_points_to_evaluate():
from flaml import AutoML
@@ -99,6 +104,9 @@ def test_points_to_evaluate():
automl.fit(X_train, y_train, **automl_settings)
if os.path.exists("test/data/output/"):
shutil.rmtree("test/data/output/")
# TODO: implement _test_zero_shot_model
def test_zero_shot_nomodel():
@@ -131,6 +139,9 @@ def test_zero_shot_nomodel():
pop_args(fit_kwargs)
model.fit(X_train, y_train, **fit_kwargs)
if os.path.exists("test/data/output/"):
shutil.rmtree("test/data/output/")
def test_build_error_portfolio(path="./test/nlp/default", strategy="greedy"):
import os
@@ -159,3 +170,9 @@ def test_build_error_portfolio(path="./test/nlp/default", strategy="greedy"):
)
except ValueError:
print("Feature not implemented")
import os
import shutil
if os.path.exists("test/data/output/"):
shutil.rmtree("test/data/output/")

View File

@@ -70,23 +70,19 @@ def get_toy_data_seqclassification():
return X_train, y_train, X_val, y_val, X_test
def get_toy_data_multiclassclassification():
def get_toy_data_binclassification():
train_data = {
"text": [
"i didnt feel humiliated",
"i can go from feeling so hopeless to so damned hopeful just from being around someone who cares and is awake",
"im grabbing a minute to post i feel greedy wrong",
"i am ever feeling nostalgic about the fireplace i will know that it is still on the property",
"i am feeling grouchy",
"ive been feeling a little burdened lately wasnt sure why that was",
"ive been taking or milligrams or times recommended amount and ive fallen asleep a lot faster but i also feel like so funny",
"i feel as confused about life as a teenager or as jaded as a year old man",
"i have been with petronas for years i feel that petronas has performed well and made a huge profit",
"i feel romantic too",
"i feel like i have to make the suffering i m seeing mean something",
"i do feel that running is a divine experience and that i can expect to have some type of spiritual encounter",
],
"label": [0, 0, 3, 2, 3, 0, 5, 4, 1, 2, 0, 1],
"label": [0, 0, 1, 0, 1, 1, 0, 1],
}
train_dataset = pd.DataFrame(train_data)
@@ -95,9 +91,84 @@ def get_toy_data_multiclassclassification():
"i think it s the easiest time of year to feel dissatisfied",
"i feel low energy i m just thirsty",
"i have immense sympathy with the general point but as a possible proto writer trying to find time to write in the corners of life and with no sign of an agent let alone a publishing contract this feels a little precious",
"i do not feel reassured anxiety is on each side",
],
"label": [3, 0, 1, 1],
"label": [0, 1, 1],
}
dev_dataset = pd.DataFrame(dev_data)
custom_sent_keys = ["text"]
label_key = "label"
X_train = train_dataset[custom_sent_keys]
y_train = train_dataset[label_key]
X_val = dev_dataset[custom_sent_keys]
y_val = dev_dataset[label_key]
return X_train, y_train, X_val, y_val
def get_toy_data_regression():
train_data = {
"text": [
"i didnt feel humiliated",
"i can go from feeling so hopeless to so damned hopeful just from being around someone who cares and is awake",
"i am ever feeling nostalgic about the fireplace i will know that it is still on the property",
"ive been feeling a little burdened lately wasnt sure why that was",
"i have been with petronas for years i feel that petronas has performed well and made a huge profit",
"i feel romantic too",
"i feel like i have to make the suffering i m seeing mean something",
"i do feel that running is a divine experience and that i can expect to have some type of spiritual encounter",
],
"label": [1.0, 1.0, 3.0, 1.0, 5.0, 5.0, 1.0, 3.0],
}
train_dataset = pd.DataFrame(train_data)
dev_data = {
"text": [
"i think it s the easiest time of year to feel dissatisfied",
"i feel low energy i m just thirsty",
"i have immense sympathy with the general point but as a possible proto writer trying to find time to write in the corners of life and with no sign of an agent let alone a publishing contract this feels a little precious",
],
"label": [1.0, 3.0, 3.0],
}
dev_dataset = pd.DataFrame(dev_data)
custom_sent_keys = ["text"]
label_key = "label"
X_train = train_dataset[custom_sent_keys]
y_train = train_dataset[label_key]
X_val = dev_dataset[custom_sent_keys]
y_val = dev_dataset[label_key]
return X_train, y_train, X_val, y_val
def get_toy_data_multiclassclassification():
train_data = {
"text": [
"i didnt feel humiliated",
"i can go from feeling so hopeless to so damned hopeful just from being around someone who cares and is awake",
"i am ever feeling nostalgic about the fireplace i will know that it is still on the property",
"ive been feeling a little burdened lately wasnt sure why that was",
"i have been with petronas for years i feel that petronas has performed well and made a huge profit",
"i feel romantic too",
"i feel like i have to make the suffering i m seeing mean something",
"i do feel that running is a divine experience and that i can expect to have some type of spiritual encounter",
],
"label": [0, 0, 2, 0, 1, 2, 0, 1],
}
train_dataset = pd.DataFrame(train_data)
dev_data = {
"text": [
"i think it s the easiest time of year to feel dissatisfied",
"i feel low energy i m just thirsty",
"i have immense sympathy with the general point but as a possible proto writer trying to find time to write in the corners of life and with no sign of an agent let alone a publishing contract this feels a little precious",
],
"label": [0, 1, 1],
}
dev_dataset = pd.DataFrame(dev_data)