mirror of
https://github.com/tinygrad/tinygrad.git
synced 2026-01-10 07:28:15 -05:00
rename lazydata to uop (#10698)
This commit is contained in:
@@ -240,7 +240,6 @@ class LLaMa:
|
||||
#elif k.endswith('.weight'): v.shard_(device, axis=-1)
|
||||
#elif 'norm.' in k: v.shard_(device, axis=-1)
|
||||
else: v.shard_(device, axis=None)
|
||||
#print(k, v.shape, v.lazydata.axis)
|
||||
|
||||
# replace weights in model
|
||||
load_state_dict(model, weights, strict=False, consume=True)
|
||||
@@ -446,7 +445,7 @@ After you are done speaking, output [EOS]. You are not Chad.
|
||||
print(f"using LLaMA{LLAMA_SUFFIX}-{args.size} model")
|
||||
device = tuple(f"{Device.DEFAULT}:{i}" for i in range(args.shard)) if args.shard > 1 else Device.DEFAULT
|
||||
llama = LLaMa.build(MODEL_PATH, TOKENIZER_PATH, model_gen=args.gen, model_size=args.size, quantize=args.quantize, device=device)
|
||||
param_bytes = sum(x.lazydata.size * x.dtype.itemsize for x in get_parameters(llama.model))
|
||||
param_bytes = sum(x.uop.size * x.dtype.itemsize for x in get_parameters(llama.model))
|
||||
|
||||
outputted = pre_prompt if chatbot else args.prompt
|
||||
start_pos, toks = 0, [llama.tokenizer.bos_id()] + llama.tokenizer.encode(outputted)
|
||||
|
||||
@@ -284,7 +284,7 @@ if __name__ == "__main__":
|
||||
|
||||
device = tuple(f"{Device.DEFAULT}:{i}" for i in range(args.shard)) if args.shard > 1 else Device.DEFAULT
|
||||
model = build_transformer(args.model, model_size=args.size, quantize=args.quantize, device=device)
|
||||
param_bytes = sum(x.lazydata.size * x.dtype.itemsize for x in get_parameters(model))
|
||||
param_bytes = sum(x.uop.size * x.dtype.itemsize for x in get_parameters(model))
|
||||
|
||||
if not args.no_api and not args.benchmark:
|
||||
from bottle import Bottle, request, response, HTTPResponse, abort, static_file
|
||||
|
||||
@@ -16,7 +16,7 @@ if __name__ == "__main__":
|
||||
#model.load_pretrained()
|
||||
for p in nn.state.get_parameters(model): p.replace(Tensor.empty(p.shape, dtype=p.dtype)) # fake load pretrained
|
||||
|
||||
#early_sched = create_schedule([x.lazydata for x in nn.state.get_parameters(model)])
|
||||
#early_sched = create_schedule([x.uop for x in nn.state.get_parameters(model)])
|
||||
#print(f"built model {len(early_sched)}")
|
||||
|
||||
#B, T = Variable("B", 1, 128).bind(4), 64 #Variable("T", 1, 1024).bind(64)
|
||||
@@ -56,7 +56,7 @@ if __name__ == "__main__":
|
||||
state_dict.update({'X': X, 'Y': Y, 'loss': loss})
|
||||
grad_state_dict = {}
|
||||
for k,v in state_dict.items():
|
||||
if v.lazydata.base.buffer not in used_buffers: print(f"UNUSED: {k}")
|
||||
if v.uop.base.buffer not in used_buffers: print(f"UNUSED: {k}")
|
||||
if v.grad is not None: grad_state_dict['grad_'+k] = v.grad
|
||||
state_dict.update(grad_state_dict)
|
||||
state_dict.update({'adam_b1_t': optimizer.b1_t, 'adam_b2_t': optimizer.b2_t, 'adam_lr': optimizer.lr})
|
||||
@@ -65,7 +65,7 @@ if __name__ == "__main__":
|
||||
nm = inverse_state_dict[p]
|
||||
state_dict["adam_m_"+nm] = m
|
||||
state_dict["adam_v_"+nm] = v
|
||||
named_buffers = {v.lazydata.base.buffer:k.replace(".", "_") for k,v in state_dict.items()}
|
||||
named_buffers = {v.uop.base.buffer:k.replace(".", "_") for k,v in state_dict.items()}
|
||||
|
||||
c_code = ["#include <stdlib.h>", "#include <tgmath.h>", "#include <stdbool.h>"]
|
||||
if TIMING: c_code += ["#include <stdio.h>", "#include <time.h>"]
|
||||
|
||||
@@ -71,7 +71,7 @@ def loader_process(q_in, q_out, X:Tensor, seed):
|
||||
#storage_tensor._copyin(img_tensor.numpy())
|
||||
|
||||
# faster
|
||||
X[idx].contiguous().realize().lazydata.base.realized.as_buffer(force_zero_copy=True)[:] = img.tobytes()
|
||||
X[idx].contiguous().realize().uop.base.realized.as_buffer(force_zero_copy=True)[:] = img.tobytes()
|
||||
|
||||
# ideal
|
||||
#X[idx].assign(img.tobytes()) # NOTE: this is slow!
|
||||
@@ -262,8 +262,8 @@ def load_unet3d_data(preprocessed_dataset_dir, seed, queue_in, queue_out, X:Tens
|
||||
x = random_brightness_augmentation(x)
|
||||
x = gaussian_noise(x)
|
||||
|
||||
X[idx].contiguous().realize().lazydata.base.realized.as_buffer(force_zero_copy=True)[:] = x.tobytes()
|
||||
Y[idx].contiguous().realize().lazydata.base.realized.as_buffer(force_zero_copy=True)[:] = y.tobytes()
|
||||
X[idx].contiguous().realize().uop.base.realized.as_buffer(force_zero_copy=True)[:] = x.tobytes()
|
||||
Y[idx].contiguous().realize().uop.base.realized.as_buffer(force_zero_copy=True)[:] = y.tobytes()
|
||||
|
||||
queue_out.put(idx)
|
||||
queue_out.put(None)
|
||||
@@ -377,12 +377,12 @@ def load_retinanet_data(base_dir:Path, val:bool, queue_in:Queue, queue_out:Queue
|
||||
clipped_match_idxs = np.clip(match_idxs, 0, None)
|
||||
clipped_boxes, clipped_labels = tgt["boxes"][clipped_match_idxs], tgt["labels"][clipped_match_idxs]
|
||||
|
||||
boxes[idx].contiguous().realize().lazydata.base.realized.as_buffer(force_zero_copy=True)[:] = clipped_boxes.tobytes()
|
||||
labels[idx].contiguous().realize().lazydata.base.realized.as_buffer(force_zero_copy=True)[:] = clipped_labels.tobytes()
|
||||
matches[idx].contiguous().realize().lazydata.base.realized.as_buffer(force_zero_copy=True)[:] = match_idxs.tobytes()
|
||||
anchors[idx].contiguous().realize().lazydata.base.realized.as_buffer(force_zero_copy=True)[:] = anchor.tobytes()
|
||||
boxes[idx].contiguous().realize().uop.base.realized.as_buffer(force_zero_copy=True)[:] = clipped_boxes.tobytes()
|
||||
labels[idx].contiguous().realize().uop.base.realized.as_buffer(force_zero_copy=True)[:] = clipped_labels.tobytes()
|
||||
matches[idx].contiguous().realize().uop.base.realized.as_buffer(force_zero_copy=True)[:] = match_idxs.tobytes()
|
||||
anchors[idx].contiguous().realize().uop.base.realized.as_buffer(force_zero_copy=True)[:] = anchor.tobytes()
|
||||
|
||||
imgs[idx].contiguous().realize().lazydata.base.realized.as_buffer(force_zero_copy=True)[:] = img.tobytes()
|
||||
imgs[idx].contiguous().realize().uop.base.realized.as_buffer(force_zero_copy=True)[:] = img.tobytes()
|
||||
|
||||
queue_out.put(idx)
|
||||
queue_out.put(None)
|
||||
|
||||
@@ -19,8 +19,8 @@ if __name__ == "__main__":
|
||||
|
||||
inputs = run_onnx.get_empty_input_data("npy")
|
||||
out: Tensor = next(iter(run_onnx({k:v.to(None) for k,v in inputs.items()}).values())).to('cpu')
|
||||
root = out.lazydata
|
||||
targets = [x.lazydata for x in inputs.values()]
|
||||
root = out.uop
|
||||
targets = [x.uop for x in inputs.values()]
|
||||
print(targets)
|
||||
|
||||
# TODO: abstract this from gradient?
|
||||
@@ -42,7 +42,7 @@ if __name__ == "__main__":
|
||||
|
||||
print("**** real ****")
|
||||
GlobalCounters.reset()
|
||||
out.lazydata = root.substitute(kernelized).substitute(becomes_map)
|
||||
out.uop = root.substitute(kernelized).substitute(becomes_map)
|
||||
out.kernelize()
|
||||
|
||||
# realize
|
||||
|
||||
@@ -66,7 +66,7 @@ if __name__ == "__main__":
|
||||
model_path = Path(args.weights) if args.weights else download_weights(model_info["total_num_weights"])
|
||||
transformer = load_model(model_path, model_info["model_params"])
|
||||
tokenizer = AutoTokenizer.from_pretrained(model_info["tokenizer"])
|
||||
param_bytes = sum(x.lazydata.size * x.dtype.itemsize for x in get_parameters(transformer))
|
||||
param_bytes = sum(x.uop.size * x.dtype.itemsize for x in get_parameters(transformer))
|
||||
|
||||
outputted = args.prompt
|
||||
start_pos, toks = 0, tokenizer(outputted)["input_ids"]
|
||||
|
||||
@@ -13,8 +13,8 @@ def prepare_browser_chunks(model):
|
||||
chunk_size = 16 * 1024 * 1024 # small chunks based on iphone browser constraints
|
||||
metadata = {}
|
||||
# We won't export cache_kv bytes (because we start inference on client at start_pos=0), but we will tell the client how big cache_kv needs to be
|
||||
t_infos = [(v.lazydata.base.realized.nbytes, k, v.dtype) for k,v in state_dict.items() if "cache_kv" not in k]
|
||||
empty_t_infos = [(v.lazydata.base.realized.nbytes, k, v.dtype) for k,v in state_dict.items() if "cache_kv" in k]
|
||||
t_infos = [(v.uop.base.realized.nbytes, k, v.dtype) for k,v in state_dict.items() if "cache_kv" not in k]
|
||||
empty_t_infos = [(v.uop.base.realized.nbytes, k, v.dtype) for k,v in state_dict.items() if "cache_kv" in k]
|
||||
|
||||
split_t_infos = []
|
||||
for size, name, dtype in t_infos:
|
||||
@@ -48,7 +48,7 @@ def prepare_browser_chunks(model):
|
||||
weight_metadata = metadata.get(name, default)
|
||||
weight_metadata["parts"][part_num] = {"file": i, "file_start_pos": cursor, "size": size}
|
||||
metadata[name] = weight_metadata
|
||||
data = bytes(state_dict[name].lazydata.base.realized.as_buffer())
|
||||
data = bytes(state_dict[name].uop.base.realized.as_buffer())
|
||||
data = data if not offsets else data[offsets[0]:offsets[1]]
|
||||
writer.write(data)
|
||||
cursor += size
|
||||
|
||||
@@ -114,7 +114,7 @@ if __name__ == "__main__":
|
||||
run, special_names = jit_model(step, *step.input)
|
||||
functions, statements, bufs, _ = compile_net(run, special_names)
|
||||
state = get_state_dict(model)
|
||||
weights = {id(x.lazydata.base.realized): name for name, x in state.items()}
|
||||
weights = {id(x.uop.base.realized): name for name, x in state.items()}
|
||||
kernel_code = '\n\n'.join([f"const {key} = `{fixup_code(code, key)}`;" for key, code in functions.items()])
|
||||
kernel_names = ', '.join([name for (name, _, _, _) in statements])
|
||||
input_names = [name for _,name in special_names.items() if "input" in name]
|
||||
|
||||
Reference in New Issue
Block a user