From 1043fa067afc986ed667739ed4592f400a333fac Mon Sep 17 00:00:00 2001 From: George Hotz Date: Mon, 5 Sep 2022 15:52:14 -0700 Subject: [PATCH] it renders something --- examples/stable_diffusion.py | 37 +++++++++++++++++++++--------------- 1 file changed, 22 insertions(+), 15 deletions(-) diff --git a/examples/stable_diffusion.py b/examples/stable_diffusion.py index b5fbb75035..45c8b97e1e 100644 --- a/examples/stable_diffusion.py +++ b/examples/stable_diffusion.py @@ -5,6 +5,7 @@ import os import math import numpy as np import traceback +from tqdm import tqdm from collections import namedtuple from extra.utils import fake_torch_load_zipped, get_child from tinygrad.nn import Conv2d @@ -545,8 +546,8 @@ REAL = int(os.getenv("REAL", 0)) if __name__ == "__main__": Tensor.no_init = True - # WTF!! no_grad brakes it - #Tensor.no_grad = True + # WTF!! no_grad breaks it + Tensor.no_grad = True model = StableDiffusion() # load in weights @@ -572,9 +573,9 @@ if __name__ == "__main__": unconditional_context = model.cond_stage_model.transformer.text_model(phrase) print("got unconditional CLIP context", unconditional_context.shape) - def get_model_output(latent): + def get_model_output(latent, t): # put into diffuser - timesteps = Tensor([1]) + timesteps = Tensor([t]) unconditional_latent = model.model.diffusion_model(latent, timesteps, unconditional_context) latent = model.model.diffusion_model(latent, timesteps, context) @@ -582,9 +583,14 @@ if __name__ == "__main__": e_t = unconditional_latent + unconditional_guidance_scale * (latent - unconditional_latent) return e_t + alphas = [0.9983, 0.6722, 0.2750, 0.0557] + alphas_prev = [0.9991499781608582, 0.9982960224151611, 0.6721514463424683, 0.27499905228614807] + sigmas = [0,0,0,0] + sqrt_one_minus_alphas = [0.0413, 0.5726, 0.8515, 0.9717] + def get_x_prev_and_pred_x0(x, e_t, index): temperature = 1 - a_t, a_prev, sigma_t, sqrt_one_minus_at = 0.9983, 0.9991, 0., 0.0413 + a_t, a_prev, sigma_t, sqrt_one_minus_at = alphas[index], alphas_prev[index], sigmas[index], sqrt_one_minus_alphas[index] pred_x0 = (x - sqrt_one_minus_at * e_t) / math.sqrt(a_t) # direction pointing to x_t @@ -598,15 +604,16 @@ if __name__ == "__main__": latent = Tensor.randn(1,4,64,64) # is this the diffusion? - index = 0 - e_t = get_model_output(latent) - print(e_t.numpy()) - x_prev, pred_x0 = get_x_prev_and_pred_x0(latent, e_t, index) - #e_t_next = get_model_output(x_prev) - #e_t_prime = (e_t + e_t_next) / 2 - #x_prev, pred_x0 = get_x_prev_and_pred_x0(latent, e_t_prime, index) - - latent = x_prev + for index, timestep in tqdm(list(enumerate([1, 251, 501, 751]))[::-1]): + print(index, timestep) + e_t = get_model_output(latent, timestep) + #print(e_t.numpy()) + x_prev, pred_x0 = get_x_prev_and_pred_x0(latent, e_t, index) + #e_t_next = get_model_output(x_prev) + #e_t_prime = (e_t + e_t_next) / 2 + #x_prev, pred_x0 = get_x_prev_and_pred_x0(latent, e_t_prime, index) + latent = x_prev + latent.realize() print(latent.numpy()) #exit(0) @@ -615,7 +622,7 @@ if __name__ == "__main__": #latent = Tensor(np.load("datasets/stable_diffusion_apple.npy")) # upsample latent space to image with autoencoder - x = model.first_stage_model.post_quant_conv(latent) + x = model.first_stage_model.post_quant_conv(1/0.18215 * latent) x = model.first_stage_model.decoder(x) # make image correct size and scale