set PAGE_SIZE=1 and generate new dataset (#7559)

13080 rows in total. both generating and loading this are pretty broken now. filters are wrong for example
This commit is contained in:
chenyu
2024-11-05 11:25:01 -05:00
committed by GitHub
parent 7581a57aac
commit 207bca6cea
6 changed files with 6 additions and 8 deletions

View File

@@ -1,11 +1,8 @@
#!/usr/bin/env python3
# extract asts from process replay artifacts
import os
from tinygrad.helpers import db_connection, VERSION
from test.external.process_replay.process_replay import _pmap
PAGE_SIZE = 100
TABLE_NAME = f"kernel_process_replay_{VERSION}"
LOGOPS = os.getenv("LOGOPS", "/tmp/sops")
def extract_ast(*args) -> bool:
@@ -13,6 +10,4 @@ def extract_ast(*args) -> bool:
return args[-1]
if __name__ == "__main__":
conn = db_connection()
row_count = conn.execute(f"SELECT COUNT(*) FROM '{TABLE_NAME}'").fetchone()[0]
_pmap("kernel", extract_ast)

View File

@@ -1,4 +1,5 @@
#!/bin/bash
export PAGE_SIZE=1
export LOGOPS=/tmp/ops
export RUN_PROCESS_REPLAY=1
rm $LOGOPS

View File

@@ -24,14 +24,16 @@ def kern_str_to_lin(kern_str:str, opts=None):
import gzip
from pathlib import Path
import random
from tinygrad.helpers import dedup
from tinygrad.helpers import dedup, DEBUG
def load_worlds(filter_reduce=True, filter_noimage=True, filter_novariable=True):
fn = Path(__file__).parent.parent / "datasets/sops.gz"
ast_strs = dedup(gzip.open(fn).read().decode('utf-8').strip().split("\n"))
assert len(ast_strs) > 5000, f"dataset size = {len(ast_strs)} is too small"
if DEBUG >= 1: print(f"loaded {len(ast_strs)=} before filters")
if filter_reduce: ast_strs = [x for x in ast_strs if "REDUCE_AXIS" in x]
if filter_noimage: ast_strs = [x for x in ast_strs if "dtypes.image" not in x]
if filter_novariable: ast_strs = [x for x in ast_strs if "Variable" not in x]
if DEBUG >= 1: print(f"loaded {len(ast_strs)=} after filters")
random.seed(1337)
random.shuffle(ast_strs)
return ast_strs