mirror of
https://github.com/tinygrad/tinygrad.git
synced 2026-01-09 15:08:02 -05:00
Update file paths (#1179)
This commit is contained in:
@@ -12,7 +12,7 @@ iou = _mask.iou
|
|||||||
merge = _mask.merge
|
merge = _mask.merge
|
||||||
frPyObjects = _mask.frPyObjects
|
frPyObjects = _mask.frPyObjects
|
||||||
|
|
||||||
BASEDIR = pathlib.Path(__file__).parent.parent / "extra" / "datasets" / "COCO"
|
BASEDIR = pathlib.Path(__file__).parent / "COCO"
|
||||||
BASEDIR.mkdir(exist_ok=True)
|
BASEDIR.mkdir(exist_ok=True)
|
||||||
|
|
||||||
def create_dict(key_row, val_row, rows): return {row[key_row]:row[val_row] for row in rows}
|
def create_dict(key_row, val_row, rows): return {row[key_row]:row[val_row] for row in rows}
|
||||||
|
|||||||
@@ -5,7 +5,7 @@ import numpy as np
|
|||||||
from PIL import Image
|
from PIL import Image
|
||||||
import functools, pathlib
|
import functools, pathlib
|
||||||
|
|
||||||
BASEDIR = pathlib.Path(__file__).parent.parent / "extra/datasets/imagenet"
|
BASEDIR = pathlib.Path(__file__).parent / "imagenet"
|
||||||
ci = json.load(open(BASEDIR / "imagenet_class_index.json"))
|
ci = json.load(open(BASEDIR / "imagenet_class_index.json"))
|
||||||
cir = {v[0]: int(k) for k,v in ci.items()}
|
cir = {v[0]: int(k) for k,v in ci.items()}
|
||||||
|
|
||||||
|
|||||||
@@ -14,38 +14,38 @@ def imagenet_extract(file, path, small=False):
|
|||||||
|
|
||||||
def imagenet_prepare_val():
|
def imagenet_prepare_val():
|
||||||
# Read in the labels file
|
# Read in the labels file
|
||||||
with open(Path(__file__).parent.parent / "extra/datasets/imagenet/imagenet_2012_validation_synset_labels.txt", 'r') as f:
|
with open(Path(__file__).parent / "imagenet" / "imagenet_2012_validation_synset_labels.txt", 'r') as f:
|
||||||
labels = f.read().splitlines()
|
labels = f.read().splitlines()
|
||||||
f.close()
|
f.close()
|
||||||
# Get a list of images
|
# Get a list of images
|
||||||
images = os.listdir(Path(__file__).parent.parent / "extra/datasets/imagenet/val")
|
images = os.listdir(Path(__file__).parent / "imagenet" / "val")
|
||||||
images.sort()
|
images.sort()
|
||||||
# Create folders and move files into those
|
# Create folders and move files into those
|
||||||
for co,dir in enumerate(labels):
|
for co,dir in enumerate(labels):
|
||||||
os.makedirs(Path(__file__).parent.parent / "extra/datasets/imagenet/val" / dir, exist_ok=True)
|
os.makedirs(Path(__file__).parent / "imagenet" / "val" / dir, exist_ok=True)
|
||||||
os.replace(Path(__file__).parent.parent / "extra/datasets/imagenet/val" / images[co], Path(__file__).parent.parent / "extra/datasets/imagenet/val" / dir / images[co])
|
os.replace(Path(__file__).parent / "imagenet" / "val" / images[co], Path(__file__).parent / "imagenet" / "val" / dir / images[co])
|
||||||
os.remove(Path(__file__).parent.parent / "extra/datasets/imagenet/imagenet_2012_validation_synset_labels.txt")
|
os.remove(Path(__file__).parent / "imagenet" / "imagenet_2012_validation_synset_labels.txt")
|
||||||
|
|
||||||
def imagenet_prepare_train():
|
def imagenet_prepare_train():
|
||||||
images = os.listdir(Path(__file__).parent.parent / "extra/datasets/imagenet/train")
|
images = os.listdir(Path(__file__).parent / "imagenet" / "train")
|
||||||
for co,tarf in enumerate(images):
|
for co,tarf in enumerate(images):
|
||||||
# for each tar file found. Create a folder with its name. Extract into that folder. Remove tar file
|
# for each tar file found. Create a folder with its name. Extract into that folder. Remove tar file
|
||||||
if Path(Path(__file__).parent.parent / "extra/datasets/imagenet/train" / images[co]).is_file():
|
if Path(Path(__file__).parent / "imagenet" / "train" / images[co]).is_file():
|
||||||
images[co] = tarf[:-4] # remove .tar from extracted tar files
|
images[co] = tarf[:-4] # remove .tar from extracted tar files
|
||||||
os.makedirs(Path(__file__).parent.parent / "extra/datasets/imagenet/train" / images[co], exist_ok=True)
|
os.makedirs(Path(__file__).parent / "imagenet" / "train" / images[co], exist_ok=True)
|
||||||
imagenet_extract(Path(__file__).parent.parent / "extra/datasets/imagenet/train" / tarf, Path(__file__).parent.parent / "extra/datasets/imagenet/train" / images[co], small=True)
|
imagenet_extract(Path(__file__).parent / "imagenet" / "train" / tarf, Path(__file__).parent/ "imagenet" / "train" / images[co], small=True)
|
||||||
os.remove(Path(__file__).parent.parent / "extra/datasets/imagenet/train" / tarf)
|
os.remove(Path(__file__).parent / "imagenet" / "train" / tarf)
|
||||||
|
|
||||||
if __name__ == "__main__":
|
if __name__ == "__main__":
|
||||||
os.makedirs(Path(__file__).parent.parent / "extra/datasets/imagenet", exist_ok=True)
|
os.makedirs(Path(__file__).parent / "imagenet", exist_ok=True)
|
||||||
os.makedirs(Path(__file__).parent.parent / "extra/datasets/imagenet/val", exist_ok=True)
|
os.makedirs(Path(__file__).parent / "imagenet" / "val", exist_ok=True)
|
||||||
os.makedirs(Path(__file__).parent.parent / "extra/datasets/imagenet/train", exist_ok=True)
|
os.makedirs(Path(__file__).parent / "imagenet" / "train", exist_ok=True)
|
||||||
download_file("https://raw.githubusercontent.com/raghakot/keras-vis/master/resources/imagenet_class_index.json", Path(__file__).parent.parent / "extra/datasets/imagenet/imagenet_class_index.json")
|
download_file("https://raw.githubusercontent.com/raghakot/keras-vis/master/resources/imagenet_class_index.json", Path(__file__).parent / "imagenet" / "imagenet_class_index.json")
|
||||||
download_file("https://raw.githubusercontent.com/tensorflow/models/master/research/slim/datasets/imagenet_2012_validation_synset_labels.txt", Path(__file__).parent.parent / "extra/datasets/imagenet/imagenet_2012_validation_synset_labels.txt")
|
download_file("https://raw.githubusercontent.com/tensorflow/models/master/research/slim/datasets/imagenet_2012_validation_synset_labels.txt", Path(__file__).parent / "imagenet"/ "imagenet_2012_validation_synset_labels.txt")
|
||||||
download_file("https://image-net.org/data/ILSVRC/2012/ILSVRC2012_img_val.tar", Path(__file__).parent.parent / "extra/datasets/imagenet/ILSVRC2012_img_val.tar") # 7GB
|
download_file("https://image-net.org/data/ILSVRC/2012/ILSVRC2012_img_val.tar", Path(__file__).parent / "imagenet" / "ILSVRC2012_img_val.tar") # 7GB
|
||||||
imagenet_extract(Path(__file__).parent.parent / "extra/datasets/imagenet/ILSVRC2012_img_val.tar", Path(__file__).parent.parent / "extra/datasets/imagenet/val")
|
imagenet_extract(Path(__file__).parent / "imagenet" / "ILSVRC2012_img_val.tar", Path(__file__).parent / "imagenet" / "val")
|
||||||
imagenet_prepare_val()
|
imagenet_prepare_val()
|
||||||
if os.getenv('IMGNET_TRAIN', None) is not None:
|
if os.getenv('IMGNET_TRAIN', None) is not None:
|
||||||
download_file("https://image-net.org/data/ILSVRC/2012/ILSVRC2012_img_train.tar", Path(__file__).parent.parent / "extra/datasets/imagenet/ILSVRC2012_img_train.tar") #138GB!
|
download_file("https://image-net.org/data/ILSVRC/2012/ILSVRC2012_img_train.tar", Path(__file__).parent / "imagenet" / "ILSVRC2012_img_train.tar") #138GB!
|
||||||
imagenet_extract(Path(__file__).parent.parent / "extra/datasets/imagenet/ILSVRC2012_img_train.tar", Path(__file__).parent.parent / "extra/datasets/imagenet/train")
|
imagenet_extract(Path(__file__).parent / "imagenet" / "ILSVRC2012_img_train.tar", Path(__file__).parent / "imagenet" / "train")
|
||||||
imagenet_prepare_train()
|
imagenet_prepare_train()
|
||||||
|
|||||||
@@ -9,7 +9,7 @@ import torch
|
|||||||
import torch.nn.functional as F
|
import torch.nn.functional as F
|
||||||
from tinygrad.tensor import Tensor
|
from tinygrad.tensor import Tensor
|
||||||
|
|
||||||
BASEDIR = Path(__file__).parent.parent.resolve() / "extra" / "datasets" / "kits19" / "data"
|
BASEDIR = Path(__file__).parent / "kits19" / "data"
|
||||||
|
|
||||||
"""
|
"""
|
||||||
To download the dataset:
|
To download the dataset:
|
||||||
|
|||||||
@@ -15,7 +15,7 @@ for file in $(find * | grep flac); do ffmpeg -i $file -ar 16k "$(dirname $file)/
|
|||||||
|
|
||||||
Then this [file](https://github.com/mlcommons/inference/blob/master/speech_recognition/rnnt/dev-clean-wav.json) has to also be put in `extra/datasets/librispeech`.
|
Then this [file](https://github.com/mlcommons/inference/blob/master/speech_recognition/rnnt/dev-clean-wav.json) has to also be put in `extra/datasets/librispeech`.
|
||||||
"""
|
"""
|
||||||
BASEDIR = pathlib.Path(__file__).parent.parent / "extra/datasets/librispeech"
|
BASEDIR = pathlib.Path(__file__).parent / "librispeech"
|
||||||
with open(BASEDIR / "dev-clean-wav.json") as f:
|
with open(BASEDIR / "dev-clean-wav.json") as f:
|
||||||
ci = json.load(f)
|
ci = json.load(f)
|
||||||
|
|
||||||
|
|||||||
@@ -11,7 +11,7 @@ from tqdm import tqdm
|
|||||||
import pandas as pd
|
import pandas as pd
|
||||||
import concurrent.futures
|
import concurrent.futures
|
||||||
|
|
||||||
BASEDIR = pathlib.Path(__file__).parent.parent / "extra/datasets/open-images-v6-mlperf"
|
BASEDIR = pathlib.Path(__file__).parent / "open-images-v6-mlperf"
|
||||||
BUCKET_NAME = "open-images-dataset"
|
BUCKET_NAME = "open-images-dataset"
|
||||||
BBOX_ANNOTATIONS_URL = "https://storage.googleapis.com/openimages/v5/validation-annotations-bbox.csv"
|
BBOX_ANNOTATIONS_URL = "https://storage.googleapis.com/openimages/v5/validation-annotations-bbox.csv"
|
||||||
MAP_CLASSES_URL = "https://storage.googleapis.com/openimages/v5/class-descriptions-boxable.csv"
|
MAP_CLASSES_URL = "https://storage.googleapis.com/openimages/v5/class-descriptions-boxable.csv"
|
||||||
|
|||||||
@@ -5,7 +5,7 @@ from transformers import BertTokenizer
|
|||||||
import numpy as np
|
import numpy as np
|
||||||
from extra.utils import download_file
|
from extra.utils import download_file
|
||||||
|
|
||||||
BASEDIR = Path(__file__).parent.parent / "extra/datasets/squad"
|
BASEDIR = Path(__file__).parent / "squad"
|
||||||
def init_dataset():
|
def init_dataset():
|
||||||
os.makedirs(BASEDIR, exist_ok=True)
|
os.makedirs(BASEDIR, exist_ok=True)
|
||||||
download_file("https://rajpurkar.github.io/SQuAD-explorer/dataset/dev-v1.1.json", BASEDIR / "dev-v1.1.json")
|
download_file("https://rajpurkar.github.io/SQuAD-explorer/dataset/dev-v1.1.json", BASEDIR / "dev-v1.1.json")
|
||||||
@@ -141,7 +141,7 @@ def iterate(tokenizer, start=0):
|
|||||||
yield features, example
|
yield features, example
|
||||||
|
|
||||||
if __name__ == "__main__":
|
if __name__ == "__main__":
|
||||||
tokenizer = BertTokenizer(str(Path(__file__).parent.parent / "weights/bert_vocab.txt"))
|
tokenizer = BertTokenizer(str(Path(__file__).parent.parent.parent / "weights" / "bert_vocab.txt"))
|
||||||
|
|
||||||
X, Y = next(iterate(tokenizer))
|
X, Y = next(iterate(tokenizer))
|
||||||
print(" ".join(X[0]["tokens"]))
|
print(" ".join(X[0]["tokens"]))
|
||||||
|
|||||||
Reference in New Issue
Block a user