Compare commits

...

4 Commits

3 changed files with 9 additions and 12 deletions

View File

@@ -334,6 +334,8 @@ class FluxDenoiseInvocation(BaseInvocation, WithMetadata, WithBoard):
dtype=inference_dtype, dtype=inference_dtype,
) )
# activities = [torch.profiler.ProfilerActivity.CPU, torch.profiler.ProfilerActivity.CUDA]
# with torch.profiler.profile(activities=activities, record_shapes=True, with_stack=True) as prof:
x = denoise( x = denoise(
model=transformer, model=transformer,
img=x, img=x,
@@ -353,6 +355,7 @@ class FluxDenoiseInvocation(BaseInvocation, WithMetadata, WithBoard):
pos_ip_adapter_extensions=pos_ip_adapter_extensions, pos_ip_adapter_extensions=pos_ip_adapter_extensions,
neg_ip_adapter_extensions=neg_ip_adapter_extensions, neg_ip_adapter_extensions=neg_ip_adapter_extensions,
) )
# prof.export_chrome_trace("trace.json")
x = unpack(x.float(), self.height, self.width) x = unpack(x.float(), self.height, self.width)
return x return x

View File

@@ -16,20 +16,17 @@ def attention(q: Tensor, k: Tensor, v: Tensor, pe: Tensor) -> Tensor:
def rope(pos: Tensor, dim: int, theta: int) -> Tensor: def rope(pos: Tensor, dim: int, theta: int) -> Tensor:
assert dim % 2 == 0 assert dim % 2 == 0
scale = ( scale = torch.arange(0, dim, 2, dtype=pos.dtype, device=pos.device) / dim
torch.arange(0, dim, 2, dtype=torch.float32 if pos.device.type == "mps" else torch.float64, device=pos.device)
/ dim
)
omega = 1.0 / (theta**scale) omega = 1.0 / (theta**scale)
out = torch.einsum("...n,d->...nd", pos, omega) out = torch.einsum("...n,d->...nd", pos, omega)
out = torch.stack([torch.cos(out), -torch.sin(out), torch.sin(out), torch.cos(out)], dim=-1) out = torch.stack([torch.cos(out), -torch.sin(out), torch.sin(out), torch.cos(out)], dim=-1)
out = rearrange(out, "b n d (i j) -> b n d i j", i=2, j=2) out = rearrange(out, "b n d (i j) -> b n d i j", i=2, j=2)
return out.float() return out
def apply_rope(xq: Tensor, xk: Tensor, freqs_cis: Tensor) -> tuple[Tensor, Tensor]: def apply_rope(xq: Tensor, xk: Tensor, freqs_cis: Tensor) -> tuple[Tensor, Tensor]:
xq_ = xq.float().reshape(*xq.shape[:-1], -1, 1, 2) xq_ = xq.view(*xq.shape[:-1], -1, 1, 2)
xk_ = xk.float().reshape(*xk.shape[:-1], -1, 1, 2) xk_ = xk.view(*xk.shape[:-1], -1, 1, 2)
xq_out = freqs_cis[..., 0] * xq_[..., 0] + freqs_cis[..., 1] * xq_[..., 1] xq_out = freqs_cis[..., 0] * xq_[..., 0] + freqs_cis[..., 1] * xq_[..., 1]
xk_out = freqs_cis[..., 0] * xk_[..., 0] + freqs_cis[..., 1] * xk_[..., 1] xk_out = freqs_cis[..., 0] * xk_[..., 0] + freqs_cis[..., 1] * xk_[..., 1]
return xq_out.reshape(*xq.shape).type_as(xq), xk_out.reshape(*xk.shape).type_as(xk) return xq_out.view(*xq.shape), xk_out.view(*xk.shape)

View File

@@ -66,10 +66,7 @@ class RMSNorm(torch.nn.Module):
self.scale = nn.Parameter(torch.ones(dim)) self.scale = nn.Parameter(torch.ones(dim))
def forward(self, x: Tensor): def forward(self, x: Tensor):
x_dtype = x.dtype return torch.nn.functional.rms_norm(x, self.scale.shape, self.scale, eps=1e-6)
x = x.float()
rrms = torch.rsqrt(torch.mean(x**2, dim=-1, keepdim=True) + 1e-6)
return (x * rrms).to(dtype=x_dtype) * self.scale
class QKNorm(torch.nn.Module): class QKNorm(torch.nn.Module):