diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml index 8f20b2d1cc..83f137e024 100644 --- a/.github/workflows/test.yml +++ b/.github/workflows/test.yml @@ -23,7 +23,7 @@ jobs: - name: Repo line count run: python3 sz.py - name: Lint with pylint - run: python -m pylint --disable=all -e W0311 --jobs=0 --indent-string=' ' **/*.py + run: python -m pylint --disable=all -e W0311 -e C0303 --jobs=0 --indent-string=' ' **/*.py - name: Lint with flake8 run: flake8 tinygrad/ --indent-size=2 --select=F,E112,E113,E203,E304,E502,E702,E703,E71,E72,E731,W191,W6 --statistics -j4 - name: Lint tinygrad with pylint diff --git a/examples/compile_efficientnet.py b/examples/compile_efficientnet.py index 4d7c0a7b84..10cfd9fcb6 100644 --- a/examples/compile_efficientnet.py +++ b/examples/compile_efficientnet.py @@ -69,7 +69,7 @@ if __name__ == "__main__": # the functions cprog += list(functions.values()) - # the net + # the net cprog += ["void net() {"] + [f"{name}({', '.join(args)});" for (name, args, _global_size) in statements] + ["}"] cprog += [""" diff --git a/examples/deep_deterministic_policy_gradient.py b/examples/deep_deterministic_policy_gradient.py index c162aab391..906069f0db 100644 --- a/examples/deep_deterministic_policy_gradient.py +++ b/examples/deep_deterministic_policy_gradient.py @@ -114,8 +114,8 @@ class DeepDeterministicPolicyGradient: noise_stddev: The standard deviation of the exploration noise. Note: - In contrast to the original paper, actions are already included in the first layer - of the Critic and we use a Gaussian distribution instead of an Ornstein Uhlenbeck + In contrast to the original paper, actions are already included in the first layer + of the Critic and we use a Gaussian distribution instead of an Ornstein Uhlenbeck process for exploration noise. """ @@ -203,7 +203,7 @@ class DeepDeterministicPolicyGradient: next_state_batch, done_batch, ) = self.memory.sample() - + target_actions = self.target_actor.forward(next_state_batch, self.max_action) y = reward_batch + self.gamma * self.target_critic.forward( next_state_batch, target_actions.detach() diff --git a/examples/yolov8-onnx.py b/examples/yolov8-onnx.py index 53c2c0a39c..3bab3c2956 100644 --- a/examples/yolov8-onnx.py +++ b/examples/yolov8-onnx.py @@ -7,7 +7,7 @@ from tinygrad.tensor import Tensor os.chdir("/tmp") if not os.path.isfile("yolov8n-seg.onnx"): - model = YOLO("yolov8n-seg.pt") + model = YOLO("yolov8n-seg.pt") model.export(format="onnx", imgsz=[480,640]) onnx_model = onnx.load(open("yolov8n-seg.onnx", "rb")) # TODO: move get example inputs to onnx diff --git a/examples/yolov8.py b/examples/yolov8.py index 956884a592..71da6bf25b 100644 --- a/examples/yolov8.py +++ b/examples/yolov8.py @@ -48,10 +48,10 @@ def box_area(box): def box_iou(box1, box2): lt = np.maximum(box1[:, None, :2], box2[:, :2]) rb = np.minimum(box1[:, None, 2:], box2[:, 2:]) - wh = np.clip(rb - lt, 0, None) - inter = wh[:, :, 0] * wh[:, :, 1] - area1 = box_area(box1)[:, None] - area2 = box_area(box2)[None, :] + wh = np.clip(rb - lt, 0, None) + inter = wh[:, :, 0] * wh[:, :, 1] + area1 = box_area(box1)[:, None] + area2 = box_area(box2)[None, :] iou = inter / (area1 + area2 - inter) return iou @@ -66,7 +66,7 @@ def compute_nms(boxes, scores, iou_threshold): inds = np.where(iou.squeeze() <= iou_threshold)[0] order = order[inds + 1] return np.array(keep) - + def non_max_suppression(prediction, conf_thres=0.25, iou_thres=0.45, agnostic=False, max_det=300, nc=0, max_wh=7680): prediction = prediction[0] if isinstance(prediction, (list, tuple)) else prediction bs, nc = prediction.shape[0], nc or (prediction.shape[1] - 4) @@ -86,7 +86,7 @@ def non_max_suppression(prediction, conf_thres=0.25, iou_thres=0.45, agnostic=Fa c = x[:, 5:6] * (0 if agnostic else max_wh) boxes, scores = x[:, :4] + c, x[:, 4] i = compute_nms(boxes, scores, iou_thres)[:max_det] - output[xi] = x[i] + output[xi] = x[i] return output def postprocess(preds, img, orig_imgs): @@ -102,7 +102,7 @@ def postprocess(preds, img, orig_imgs): pred[:, :4] = scale_boxes(img.shape[2:], pred[:, :4], orig_img.shape) all_preds.append(pred) return all_preds - + def draw_bounding_boxes_and_save(orig_img_paths, output_img_paths, all_predictions, class_labels, iou_threshold=0.5): color_dict = {label: tuple((((i+1) * 50) % 256, ((i+1) * 100) % 256, ((i+1) * 150) % 256)) for i, label in enumerate(class_labels)} font = cv2.FONT_HERSHEY_SIMPLEX @@ -159,7 +159,7 @@ def draw_bounding_boxes_and_save(orig_img_paths, output_img_paths, all_predictio cv2.imwrite(output_img_path, orig_img) print(f'saved detections at {output_img_path}') -# utility functions for forward pass. +# utility functions for forward pass. def dist2bbox(distance, anchor_points, xywh=True, dim=-1): lt, rb = distance.chunk(2, dim) x1y1 = anchor_points - lt @@ -167,7 +167,7 @@ def dist2bbox(distance, anchor_points, xywh=True, dim=-1): if xywh: c_xy = (x1y1 + x2y2) / 2 wh = x2y2 - x1y1 - return c_xy.cat(wh, dim=1) + return c_xy.cat(wh, dim=1) return x1y1.cat(x2y2, dim=1) def make_anchors(feats, strides, grid_cell_offset=0.5): @@ -175,13 +175,13 @@ def make_anchors(feats, strides, grid_cell_offset=0.5): assert feats is not None for i, stride in enumerate(strides): _, _, h, w = feats[i].shape - sx = Tensor.arange(w) + grid_cell_offset - sy = Tensor.arange(h) + grid_cell_offset - - # this is np.meshgrid but in tinygrad + sx = Tensor.arange(w) + grid_cell_offset + sy = Tensor.arange(h) + grid_cell_offset + + # this is np.meshgrid but in tinygrad sx = sx.reshape(1, -1).repeat([h, 1]).reshape(-1) sy = sy.reshape(-1, 1).repeat([1, w]).reshape(-1) - + anchor_points.append(Tensor.stack((sx, sy), -1).reshape(-1, 2)) stride_tensor.append(Tensor.full((h * w), stride)) anchor_points = anchor_points[0].cat(anchor_points[1], anchor_points[2]) @@ -244,32 +244,32 @@ class Upsample: (b, c), _lens = x.shape[:2], len(x.shape[2:]) tmp = x.reshape([b, c, -1] + [1] * _lens) * Tensor.ones(*[1, 1, 1] + [self.scale_factor] * _lens) return tmp.reshape(list(x.shape) + [self.scale_factor] * _lens).permute([0, 1] + list(chain.from_iterable([[y+2, y+2+_lens] for y in range(_lens)]))).reshape([b, c] + [x * self.scale_factor for x in x.shape[2:]]) - + class Conv_Block(): def __init__(self, c1, c2, kernel_size=1, stride=1, groups=1, dilation=1, padding=None): self.conv = Conv2d(c1,c2, kernel_size, stride, padding=autopad(kernel_size, padding, dilation), bias=False, groups=groups, dilation=dilation) self.bn = BatchNorm2d(c2, eps=0.001) - + def __call__(self, x): return self.bn(self.conv(x)).silu() - + class Bottleneck: def __init__(self, c1, c2 , shortcut: bool, g=1, kernels: list = (3,3), channel_factor=0.5): c_ = int(c2 * channel_factor) self.cv1 = Conv_Block(c1, c_, kernel_size=kernels[0], stride=1, padding=None) self.cv2 = Conv_Block(c_, c2, kernel_size=kernels[1], stride=1, padding=None, groups=g) self.residual = c1 == c2 and shortcut - + def __call__(self, x): return x + self.cv2(self.cv1(x)) if self.residual else self.cv2(self.cv1(x)) class C2f: - def __init__(self, c1, c2, n=1, shortcut=False, g=1, e=0.5): - self.c = int(c2 * e) + def __init__(self, c1, c2, n=1, shortcut=False, g=1, e=0.5): + self.c = int(c2 * e) self.cv1 = Conv_Block(c1, 2 * self.c, 1,) self.cv2 = Conv_Block((2 + n) * self.c, c2, 1) self.bottleneck = [Bottleneck(self.c, self.c, shortcut, g, kernels=[(3, 3), (3, 3)], channel_factor=1.0) for _ in range(n)] - + def __call__(self, x): y= list(self.cv1(x).chunk(2, 1)) y.extend(m(y[-1]) for m in self.bottleneck) @@ -282,17 +282,17 @@ class SPPF: c_ = c1 // 2 # hidden channels self.cv1 = Conv_Block(c1, c_, 1, 1, padding=None) self.cv2 = Conv_Block(c_ * 4, c2, 1, 1, padding=None) - - # TODO: this pads with 0s, whereas torch function pads with -infinity. This results in a < 2% difference in prediction which does not make a difference visually. + + # TODO: this pads with 0s, whereas torch function pads with -infinity. This results in a < 2% difference in prediction which does not make a difference visually. self.maxpool = lambda x : x.pad2d((k // 2, k // 2, k // 2, k // 2)).max_pool2d(kernel_size=k, stride=1) - + def __call__(self, x): x = self.cv1(x) x2 = self.maxpool(x) x3 = self.maxpool(x2) x4 = self.maxpool(x3) return self.cv2(x.cat(x2, x3, x4, dim=1)) - + class DFL: def __init__(self, c1=16): self.conv = Conv2d(c1, 1, 1, bias=False) @@ -303,19 +303,19 @@ class DFL: def __call__(self, x): b, c, a = x.shape # batch, channels, anchors return self.conv(x.reshape(b, 4, self.c1, a).transpose(2, 1).softmax(1)).reshape(b, 4, a) - -#backbone + +#backbone class Darknet: - def __init__(self, w, r, d): + def __init__(self, w, r, d): self.b1 = [Conv_Block(c1=3, c2= int(64*w), kernel_size=3, stride=2, padding=1), Conv_Block(int(64*w), int(128*w), kernel_size=3, stride=2, padding=1)] self.b2 = [C2f(c1=int(128*w), c2=int(128*w), n=round(3*d), shortcut=True), Conv_Block(int(128*w), int(256*w), 3, 2, 1), C2f(int(256*w), int(256*w), round(6*d), True)] self.b3 = [Conv_Block(int(256*w), int(512*w), kernel_size=3, stride=2, padding=1), C2f(int(512*w), int(512*w), round(6*d), True)] self.b4 = [Conv_Block(int(512*w), int(512*w*r), kernel_size=3, stride=2, padding=1), C2f(int(512*w*r), int(512*w*r), round(3*d), True)] self.b5 = [SPPF(int(512*w*r), int(512*w*r), 5)] - + def return_modules(self): return [*self.b1, *self.b2, *self.b3, *self.b4, *self.b5] - + def __call__(self, x): x1 = x.sequential(self.b1) x2 = x1.sequential(self.b2) @@ -334,10 +334,10 @@ class Yolov8NECK: self.n4 = C2f(c1=int(768*w), c2=int(512*w), n=round(3*d), shortcut=False) self.n5 = Conv_Block(c1=int(512* w), c2=int(512 * w), kernel_size=3, stride=2, padding=1) self.n6 = C2f(c1=int(512*w*(1+r)), c2=int(512*w*r), n=round(3*d), shortcut=False) - + def return_modules(self): return [self.n1, self.n2, self.n3, self.n4, self.n5, self.n6] - + def __call__(self, p3, p4, p5): x = self.n1(self.up(p5).cat(p4, dim=1)) head_1 = self.n2(self.up(x).cat(p3, dim=1)) @@ -345,20 +345,20 @@ class Yolov8NECK: head_3 = self.n6(self.n5(head_2).cat(p5, dim=1)) return [head_1, head_2, head_3] -#task specific head. +#task specific head. class DetectionHead: def __init__(self, nc=80, filters=()): - self.ch = 16 + self.ch = 16 self.nc = nc # number of classes - self.nl = len(filters) + self.nl = len(filters) self.no = nc + self.ch * 4 # self.stride = [8, 16, 32] c1 = max(filters[0], self.nc) c2 = max((filters[0] // 4, self.ch * 4)) - self.dfl = DFL(self.ch) + self.dfl = DFL(self.ch) self.cv3 = [[Conv_Block(x, c1, 3), Conv_Block(c1, c1, 3), Conv2d(c1, self.nc, 1)] for x in filters] self.cv2 = [[Conv_Block(x, c2, 3), Conv_Block(c2, c2, 3), Conv2d(c2, 4 * self.ch, 1)] for x in filters] - + def __call__(self, x): for i in range(self.nl): x[i] = (x[i].sequential(self.cv2[i]).cat(x[i].sequential(self.cv3[i]), dim=1)) @@ -369,7 +369,7 @@ class DetectionHead: dbox = dist2bbox(self.dfl(box), self.anchors.unsqueeze(0), xywh=True, dim=1) * self.strides z = dbox.cat(cls.sigmoid(), dim=1) return z - + class YOLOv8: def __init__(self, w, r, d, num_classes): #width_multiple, ratio_multiple, depth_multiple self.net = Darknet(w, r, d) @@ -386,9 +386,9 @@ class YOLOv8: yolov8neck_modules = [12, 15, 16, 18, 19, 21] yolov8_head_weights = [(22, self.head)] return [*zip(backbone_modules, self.net.return_modules()), *zip(yolov8neck_modules, self.fpn.return_modules()), *yolov8_head_weights] - + if __name__ == '__main__': - + # usage : python3 yolov8.py "image_URL OR image_path" "v8 variant" (optional, n is default) if len(sys.argv) < 2: print("Error: Image URL or path not provided.") @@ -397,7 +397,7 @@ if __name__ == '__main__': img_path = sys.argv[1] yolo_variant = sys.argv[2] if len(sys.argv) >= 3 else (print("No variant given, so choosing 'n' as the default. Yolov8 has different variants, you can choose from ['n', 's', 'm', 'l', 'x']") or 'n') print(f'running inference for YOLO version {yolo_variant}') - + output_folder_path = './outputs_yolov8' if not os.path.exists(output_folder_path): os.makedirs(output_folder_path) @@ -409,31 +409,31 @@ if __name__ == '__main__': print('Error in image loading. Check your image file.') sys.exit(1) pre_processed_image = preprocess(image) - + # Different YOLOv8 variants use different w , r, and d multiples. For a list , refer to this yaml file (the scales section) https://github.com/ultralytics/ultralytics/blob/main/ultralytics/models/v8/yolov8.yaml - depth, width, ratio = get_variant_multiples(yolo_variant) - yolo_infer = YOLOv8(w=width, r=ratio, d=depth, num_classes=80) - + depth, width, ratio = get_variant_multiples(yolo_variant) + yolo_infer = YOLOv8(w=width, r=ratio, d=depth, num_classes=80) + weights_location = Path(__file__).parent.parent / "weights" / f'yolov8{yolo_variant}.safetensors' download_file(f'https://gitlab.com/r3sist/yolov8_weights/-/raw/master/yolov8{yolo_variant}.safetensors', weights_location) - + state_dict = safe_load(weights_location) load_state_dict(yolo_infer, state_dict) - + st = time.time() predictions = yolo_infer(pre_processed_image) print(f'did inference in {int(round(((time.time() - st) * 1000)))}ms') post_predictions = postprocess(preds=predictions, img=pre_processed_image, orig_imgs=image) - + #v8 and v3 have same 80 class names for Object Detection class_labels = fetch('https://raw.githubusercontent.com/pjreddie/darknet/master/data/coco.names') class_labels = class_labels.decode('utf-8').split('\n') draw_bounding_boxes_and_save(orig_img_paths=image_location, output_img_paths=out_paths, all_predictions=post_predictions, class_labels=class_labels) -# TODO for later: -# 1. Fix SPPF minor difference due to maxpool -# 2. AST exp overflow warning while on cpu -# 3. Make NMS faster +# TODO for later: +# 1. Fix SPPF minor difference due to maxpool +# 2. AST exp overflow warning while on cpu +# 3. Make NMS faster # 4. Add video inference and webcam support \ No newline at end of file diff --git a/extra/lr_scheduler.py b/extra/lr_scheduler.py index 997d92236e..25badc1743 100644 --- a/extra/lr_scheduler.py +++ b/extra/lr_scheduler.py @@ -7,9 +7,9 @@ class LR_Scheduler: def __init__(self, optimizer: Optimizer): self.optimizer = optimizer self.epoch_counter = Tensor([0], requires_grad=False) - + def get_lr(self): pass - + def step(self) -> None: self.epoch_counter.assign(self.epoch_counter + 1).realize() self.optimizer.lr.assign(self.get_lr()).realize() @@ -19,7 +19,7 @@ class MultiStepLR(LR_Scheduler): super().__init__(optimizer) self.milestones = milestones self.gamma = gamma - + def get_lr(self) -> Tensor: if self.epoch_counter.numpy()[0] not in self.milestones: return self.optimizer.lr @@ -34,13 +34,13 @@ class ReduceLROnPlateau(LR_Scheduler): self.bad_epoch = 0 if mode == "min": self.threshold *= -1 - + def is_better(self, current: float) -> bool: dynamic_threshold = self.best*(1+self.threshold) if self.threshold_mode == "rel" else self.best+self.threshold if self.mode == "min": return current < dynamic_threshold return current > dynamic_threshold - + def step(self, current: float) -> None: self.epoch_counter.assign(self.epoch_counter + 1).realize() if self.is_better(current): @@ -48,7 +48,7 @@ class ReduceLROnPlateau(LR_Scheduler): self.best = current else: self.bad_epoch += 1 - + if self.bad_epoch > self.patience: self.optimizer.lr *= self.factor self.bad_epoch = 0 @@ -74,12 +74,12 @@ class OneCycleLR(LR_Scheduler): self.pct_start = pct_start assert anneal_strategy == 'linear', 'only linear annealing supported' assert not cycle_momentum, 'cycle momentum not supported' - self.optimizer.lr.assign(self.get_lr()).realize() # update the initial LR + self.optimizer.lr.assign(self.get_lr()).realize() # update the initial LR @staticmethod def _annealing_linear(start: Tensor, end: Tensor, pct: Tensor) -> Tensor: return ((end - start) * pct + start) - def get_lr(self) -> Tensor: + def get_lr(self) -> Tensor: return (self.epoch_counter < self.total_steps*self.pct_start).where( self._annealing_linear(self.initial_lr, self.max_lr, self.epoch_counter/(self.total_steps*self.pct_start)), self._annealing_linear(self.max_lr, self.min_lr, (self.epoch_counter-(self.total_steps*self.pct_start))/(self.total_steps*(1-self.pct_start))) diff --git a/extra/onnx.py b/extra/onnx.py index 3dc64fea8d..7477771570 100644 --- a/extra/onnx.py +++ b/extra/onnx.py @@ -78,7 +78,7 @@ def get_run_onnx(onnx_model: ModelProto): attribute_dict = {} for num,n in enumerate(onnx_model.graph.node): attribute_dict[num] = attribute_to_dict(n.attribute) - + onnx_model_version = onnx_model.opset_import[0].version def run_onnx(inputs={}, debug=False): @@ -204,7 +204,7 @@ def get_run_onnx(onnx_model: ModelProto): assert len(n.output) <= len(ret), f"expected output size must be less than {len(ret)}, it's {n.output}" if debug: print([x.shape if isinstance(x, Tensor) else None for x in ret]) if debug: print("outputs:") - for i in range(len(n.output)): + for i in range(len(n.output)): if debug: print(f"\t{n.output[i]} - {ret[i]}") intermediate_tensors[n.output[i]] = ret[i] #print(ret[0].numpy().mean()) diff --git a/extra/onnx_ops.py b/extra/onnx_ops.py index 377f340f65..b57a5ab4a4 100644 --- a/extra/onnx_ops.py +++ b/extra/onnx_ops.py @@ -209,7 +209,7 @@ def Or(x:Tensor, y:Tensor): return Where((x==y), x, Tensor.ones(*x.shape)).cast( def Xor(x:Tensor, y:Tensor): return Where((x==y), Tensor.zeros(*x.shape), Tensor.ones(*x.shape)).cast(dtypes.bool) def Not(x:Tensor): return Where((x==1), Tensor.zeros(*x.shape), Tensor.ones(*x.shape)).cast(dtypes.bool) -def Trilu(x: Tensor, k: Union[Tensor, int]=0, upper=1): +def Trilu(x: Tensor, k: Union[Tensor, int]=0, upper=1): k = int(k.numpy().item()) if k is not 0 else 0 # onnx passes k as a tensor int64 with one element, default is 0 return x.triu(k) if upper else x.tril(k) @@ -242,13 +242,13 @@ def NegativeLogLikelihoodLoss(input, target, weight=None, ignore_index=None, red input = input.reshape((N, C, -1)) target = target.reshape((N, -1)) if weight is not None: - mask = target.unsqueeze(-1) == Tensor.arange(C,dtype=dtypes.int64).repeat((N, 1, 1)) + mask = target.unsqueeze(-1) == Tensor.arange(C,dtype=dtypes.int64).repeat((N, 1, 1)) weight = (mask * weight).sum(axis=-1) if ignore_index is not None: cond = (target == ignore_index) - weight = cond.where(0, weight) if weight is not None else cond.where(Tensor.zeros(*target.shape), 1) - mask = target[:, None, :] == Tensor.arange(C).reshape([1, C] + [1]*(len(input.shape) -2)) - loss = (-mask * input).sum(axis=1) * (1 if weight is None else weight) + weight = cond.where(0, weight) if weight is not None else cond.where(Tensor.zeros(*target.shape), 1) + mask = target[:, None, :] == Tensor.arange(C).reshape([1, C] + [1]*(len(input.shape) -2)) + loss = (-mask * input).sum(axis=1) * (1 if weight is None else weight) if reduction == "mean": return loss.mean() if weight is None else loss.sum() / weight.sum() elif reduction == "sum": return loss.sum() return loss.reshape(t_shape) if len(i_shape) != 3 else loss @@ -259,7 +259,7 @@ def OneHot(indices, depth, values, axis=-1): if axis < 0: axis += rank + 1 ls, rs = indices.shape[0:axis], indices.shape[axis: rank] cond = indices[:,None] == Tensor.arange(depth).reshape((1,) * len(ls) + (depth,) + (1,) * len(rs)) - return cond.where(values[1], values[0]).cast(values.dtype) + return cond.where(values[1], values[0]).cast(values.dtype) def Floor(x:Tensor): return x.floor() def Ceil(x:Tensor): return x.ceil() diff --git a/extra/training.py b/extra/training.py index f2dae8f620..4deace07a1 100644 --- a/extra/training.py +++ b/extra/training.py @@ -13,7 +13,7 @@ def sparse_categorical_crossentropy(out, Y): y = Tensor(y) return out.mul(y).mean() -def train(model, X_train, Y_train, optim, steps, BS=128, lossfn=sparse_categorical_crossentropy, +def train(model, X_train, Y_train, optim, steps, BS=128, lossfn=sparse_categorical_crossentropy, transform=lambda x: x, target_transform=lambda x: x, noloss=False): Tensor.training = True losses, accuracies = [], [] @@ -41,9 +41,9 @@ def train(model, X_train, Y_train, optim, steps, BS=128, lossfn=sparse_categoric accuracies.append(accuracy) t.set_description("loss %.2f accuracy %.2f" % (loss, accuracy)) return [losses, accuracies] - -def evaluate(model, X_test, Y_test, num_classes=None, BS=128, return_predict=False, transform=lambda x: x, + +def evaluate(model, X_test, Y_test, num_classes=None, BS=128, return_predict=False, transform=lambda x: x, target_transform=lambda y: y): Tensor.training = False def numpy_eval(Y_test, num_classes): diff --git a/models/mask_rcnn.py b/models/mask_rcnn.py index 33ea5fb18d..e91f70c66f 100644 --- a/models/mask_rcnn.py +++ b/models/mask_rcnn.py @@ -40,7 +40,7 @@ def topk(input_, k, dim=-1, largest=True, sorted=False): ind_part = np.argsort(input_, axis=dim) ind = np.take_along_axis(ind, ind_part, axis=dim) if largest: input_ *= -1 - val = np.take_along_axis(input_, ind_part, axis=dim) + val = np.take_along_axis(input_, ind_part, axis=dim) return Tensor(val), ind # This is very slow for large arrays, or indices @@ -48,12 +48,12 @@ def _gather(array, indices): indices = indices.float().to(array.device) reshape_arg = [1]*array.ndim + [array.shape[-1]] return Tensor.where( - indices.unsqueeze(indices.ndim).expand(*indices.shape, array.shape[-1]) == Tensor.arange(array.shape[-1]).reshape(*reshape_arg).expand(*indices.shape, array.shape[-1]), + indices.unsqueeze(indices.ndim).expand(*indices.shape, array.shape[-1]) == Tensor.arange(array.shape[-1]).reshape(*reshape_arg).expand(*indices.shape, array.shape[-1]), array, 0, ).sum(indices.ndim) # TODO: replace npgather with a faster gather using tinygrad only -# NOTE: this blocks the gradient +# NOTE: this blocks the gradient def npgather(array,indices): if isinstance(array, Tensor): array = array.numpy() if isinstance(indices, Tensor): indices = indices.numpy() @@ -98,7 +98,7 @@ def tensor_gather(tensor, indices): return ret -class LastLevelMaxPool: +class LastLevelMaxPool: def __call__(self, x): return [Tensor.max_pool2d(x, 1, 2)] @@ -853,7 +853,7 @@ def _bilinear_interpolate( w2 = outer_prod(hy, lx) w3 = outer_prod(ly, hx) w4 = outer_prod(ly, lx) - + val = w1*v1 + w2*v2 + w3*v3 + w4*v4 return val @@ -861,41 +861,41 @@ def _bilinear_interpolate( def _roi_align(input, rois, spatial_scale, pooled_height, pooled_width, sampling_ratio, aligned): orig_dtype = input.dtype _, _, height, width = input.shape - ph = Tensor.arange(pooled_height, device=input.device) - pw = Tensor.arange(pooled_width, device=input.device) + ph = Tensor.arange(pooled_height, device=input.device) + pw = Tensor.arange(pooled_width, device=input.device) - roi_batch_ind = rois[:, 0].cast(dtypes.int32).contiguous() + roi_batch_ind = rois[:, 0].cast(dtypes.int32).contiguous() offset = 0.5 if aligned else 0.0 roi_start_w = rois[:, 1] * spatial_scale - offset roi_start_h = rois[:, 2] * spatial_scale - offset - roi_end_w = rois[:, 3] * spatial_scale - offset + roi_end_w = rois[:, 3] * spatial_scale - offset roi_end_h = rois[:, 4] * spatial_scale - offset - roi_width = roi_end_w - roi_start_w - roi_height = roi_end_h - roi_start_h + roi_width = roi_end_w - roi_start_w + roi_height = roi_end_h - roi_start_h if not aligned: - roi_width = roi_width.maximum(1.0) - roi_height = roi_height.maximum(1.0) + roi_width = roi_width.maximum(1.0) + roi_height = roi_height.maximum(1.0) - bin_size_h = roi_height / pooled_height - bin_size_w = roi_width / pooled_width + bin_size_h = roi_height / pooled_height + bin_size_w = roi_width / pooled_width exact_sampling = sampling_ratio > 0 - roi_bin_grid_h = sampling_ratio if exact_sampling else (roi_height / pooled_height).ceil() + roi_bin_grid_h = sampling_ratio if exact_sampling else (roi_height / pooled_height).ceil() roi_bin_grid_w = sampling_ratio if exact_sampling else (roi_width / pooled_width).ceil() if exact_sampling: - count = max(roi_bin_grid_h * roi_bin_grid_w, 1) - iy = Tensor.arange(roi_bin_grid_h, device=input.device) - ix = Tensor.arange(roi_bin_grid_w, device=input.device) + count = max(roi_bin_grid_h * roi_bin_grid_w, 1) + iy = Tensor.arange(roi_bin_grid_h, device=input.device) + ix = Tensor.arange(roi_bin_grid_w, device=input.device) ymask = None xmask = None else: count = (roi_bin_grid_h * roi_bin_grid_w).maximum(1) - iy = Tensor.arange(height, device=input.device) - ix = Tensor.arange(width, device=input.device) - ymask = iy[None, :] < roi_bin_grid_h[:, None] - xmask = ix[None, :] < roi_bin_grid_w[:, None] + iy = Tensor.arange(height, device=input.device) + ix = Tensor.arange(width, device=input.device) + ymask = iy[None, :] < roi_bin_grid_h[:, None] + xmask = ix[None, :] < roi_bin_grid_w[:, None] def from_K(t): return t[:, None, None] diff --git a/models/unet3d.py b/models/unet3d.py index 289ed4c86b..555c622214 100644 --- a/models/unet3d.py +++ b/models/unet3d.py @@ -30,7 +30,7 @@ class UNet3D: self.input_block = DownsampleBlock(in_channels, filters[0], stride=1) self.downsample = [DownsampleBlock(i, o) for i, o in zip(inp, out)] self.bottleneck = DownsampleBlock(filters[-1], filters[-1]) - self.upsample = [UpsampleBlock(filters[-1], filters[-1])] + [UpsampleBlock(i, o) for i, o in zip(out[::-1], inp[::-1])] + self.upsample = [UpsampleBlock(filters[-1], filters[-1])] + [UpsampleBlock(i, o) for i, o in zip(out[::-1], inp[::-1])] self.output = {"conv": nn.Conv2d(filters[0], n_class, kernel_size=(1, 1, 1))} def __call__(self, x): @@ -44,7 +44,7 @@ class UNet3D: x = upsample(x, skip) x = self.output["conv"](x) return x - + def load_from_pretrained(self): fn = Path(__file__).parent.parent / "weights" / "unet-3d.ckpt" download_file("https://zenodo.org/record/5597155/files/3dunet_kits19_pytorch.ptc?download=1", fn) diff --git a/test/test_conv.py b/test/test_conv.py index a735613f9e..433a705345 100644 --- a/test/test_conv.py +++ b/test/test_conv.py @@ -104,7 +104,7 @@ class TestConv(unittest.TestCase): x = x.conv2d(w, groups=32) out = x.numpy() Tensor.no_grad = False - + def test_multiadd(self): w = Tensor.ones(32) x = Tensor.ones(32).relu()