adding TODOs for NLP module, so students can implement other tasks easier (#321)

* fixing ray pickle bug, skipping macosx bug, completing code for seqregression

* catching connectionerror

* ading TODOs for NLP module
This commit is contained in:
Xueqing Liu
2021-12-03 12:45:16 -05:00
committed by GitHub
parent c57954fbbd
commit fb59bb9928
9 changed files with 214 additions and 63 deletions

View File

@@ -1,22 +1,25 @@
import os
import sys
import pytest
@pytest.mark.skipif(os.name == "posix", reason="do not run on mac os")
@pytest.mark.skipif(sys.platform == "darwin", reason="do not run on mac os")
def test_hf_data():
from flaml import AutoML
import requests
from datasets import load_dataset
train_dataset = (
load_dataset("glue", "mrpc", split="train[:1%]").to_pandas().iloc[0:4]
)
dev_dataset = (
load_dataset("glue", "mrpc", split="train[1%:2%]").to_pandas().iloc[0:4]
)
test_dataset = (
load_dataset("glue", "mrpc", split="test[1%:2%]").to_pandas().iloc[0:4]
)
try:
train_dataset = (
load_dataset("glue", "mrpc", split="train[:1%]").to_pandas().iloc[0:4]
)
dev_dataset = (
load_dataset("glue", "mrpc", split="train[1%:2%]").to_pandas().iloc[0:4]
)
test_dataset = (
load_dataset("glue", "mrpc", split="test[1%:2%]").to_pandas().iloc[0:4]
)
except requests.exceptions.ConnectionError:
return
custom_sent_keys = ["sentence1", "sentence2"]
label_key = "label"
@@ -75,12 +78,15 @@ def test_hf_data():
def _test_custom_data():
from flaml import AutoML
import requests
import pandas as pd
train_dataset = pd.read_csv("data/input/train.tsv", delimiter="\t", quoting=3)
dev_dataset = pd.read_csv("data/input/dev.tsv", delimiter="\t", quoting=3)
test_dataset = pd.read_csv("data/input/test.tsv", delimiter="\t", quoting=3)
try:
train_dataset = pd.read_csv("data/input/train.tsv", delimiter="\t", quoting=3)
dev_dataset = pd.read_csv("data/input/dev.tsv", delimiter="\t", quoting=3)
test_dataset = pd.read_csv("data/input/test.tsv", delimiter="\t", quoting=3)
except requests.exceptions.ConnectionError:
pass
custom_sent_keys = ["#1 String", "#2 String"]
label_key = "Quality"

View File

@@ -1,10 +1,17 @@
def test_classification_head():
from flaml import AutoML
import requests
from datasets import load_dataset
train_dataset = load_dataset("emotion", split="train[:1%]").to_pandas().iloc[0:10]
dev_dataset = load_dataset("emotion", split="train[1%:2%]").to_pandas().iloc[0:10]
try:
train_dataset = (
load_dataset("emotion", split="train[:1%]").to_pandas().iloc[0:10]
)
dev_dataset = (
load_dataset("emotion", split="train[1%:2%]").to_pandas().iloc[0:10]
)
except requests.exceptions.ConnectionError:
return
custom_sent_keys = ["text"]
label_key = "label"

View File

@@ -1,16 +1,19 @@
import os
import sys
import pytest
@pytest.mark.skipif(os.name == "posix", reason="do not run on mac os")
@pytest.mark.skipif(sys.platform == "darwin", reason="do not run on mac os")
def test_cv():
from flaml import AutoML
import requests
from datasets import load_dataset
train_dataset = (
load_dataset("glue", "mrpc", split="train[:1%]").to_pandas().iloc[0:4]
)
try:
train_dataset = (
load_dataset("glue", "mrpc", split="train[:1%]").to_pandas().iloc[0:4]
)
except requests.exceptions.ConnectionError:
return
custom_sent_keys = ["sentence1", "sentence2"]
label_key = "label"

View File

@@ -1,15 +1,20 @@
import os
import sys
import pytest
@pytest.mark.skipif(os.name == "posix", reason="do not run on mac os")
@pytest.mark.skipif(sys.platform == "darwin", reason="do not run on mac os")
def test_max_iter_1():
from flaml import AutoML
import requests
from datasets import load_dataset
train_dataset = load_dataset("glue", "mrpc", split="train").to_pandas().iloc[0:4]
dev_dataset = load_dataset("glue", "mrpc", split="train").to_pandas().iloc[0:4]
try:
train_dataset = (
load_dataset("glue", "mrpc", split="train").to_pandas().iloc[0:4]
)
dev_dataset = load_dataset("glue", "mrpc", split="train").to_pandas().iloc[0:4]
except requests.exceptions.ConnectionError:
return
custom_sent_keys = ["sentence1", "sentence2"]
label_key = "label"

View File

@@ -1,23 +1,26 @@
import os
import sys
import pytest
@pytest.mark.skipif(os.name == "posix", reason="do not run on mac os")
@pytest.mark.skipif(sys.platform == "darwin", reason="do not run on mac os")
def test_regression():
try:
import ray
except ImportError:
return
from flaml import AutoML
import requests
from datasets import load_dataset
train_dataset = (
load_dataset("glue", "stsb", split="train[:1%]").to_pandas().iloc[:20]
)
dev_dataset = (
load_dataset("glue", "stsb", split="train[1%:2%]").to_pandas().iloc[:20]
)
try:
train_dataset = (
load_dataset("glue", "stsb", split="train[:1%]").to_pandas().iloc[:20]
)
dev_dataset = (
load_dataset("glue", "stsb", split="train[1%:2%]").to_pandas().iloc[:20]
)
except requests.exceptions.ConnectionError:
return
custom_sent_keys = ["sentence1", "sentence2"]
label_key = "label"
@@ -50,3 +53,7 @@ def test_regression():
automl.fit(
X_train=X_train, y_train=y_train, X_val=X_val, y_val=y_val, **automl_settings
)
if __name__ == "__main__":
test_regression()