mirror of
https://github.com/tinygrad/tinygrad.git
synced 2026-01-09 15:08:02 -05:00
Remove f16 decompression lib from SD compile.py (#8121)
* Remove f16-to-f32-gpu lib, use tinygrad exported decompression * No need to create new instance
This commit is contained in:
@@ -1,9 +1,10 @@
|
||||
import os
|
||||
from extra.export_model import compile_net, jit_model, dtype_to_js_type
|
||||
from extra.f16_decompress import u32_to_f16
|
||||
from examples.stable_diffusion import StableDiffusion
|
||||
from tinygrad.nn.state import get_state_dict, safe_save, safe_load_metadata, torch_load, load_state_dict
|
||||
from tinygrad.tensor import Tensor
|
||||
from tinygrad import Device
|
||||
from tinygrad import Device, dtypes
|
||||
from tinygrad.helpers import fetch
|
||||
from typing import NamedTuple, Any, List
|
||||
import requests
|
||||
@@ -95,7 +96,8 @@ if __name__ == "__main__":
|
||||
sub_steps = [
|
||||
Step(name = "textModel", input = [Tensor.randn(1, 77)], forward = model.cond_stage_model.transformer.text_model),
|
||||
Step(name = "diffusor", input = [Tensor.randn(1, 77, 768), Tensor.randn(1, 77, 768), Tensor.randn(1,4,64,64), Tensor.rand(1), Tensor.randn(1), Tensor.randn(1), Tensor.randn(1)], forward = model),
|
||||
Step(name = "decoder", input = [Tensor.randn(1,4,64,64)], forward = model.decode)
|
||||
Step(name = "decoder", input = [Tensor.randn(1,4,64,64)], forward = model.decode),
|
||||
Step(name = "f16tof32", input = [Tensor.randn(2097120, dtype=dtypes.uint32)], forward = u32_to_f16)
|
||||
]
|
||||
|
||||
prg = ""
|
||||
@@ -130,7 +132,7 @@ if __name__ == "__main__":
|
||||
|
||||
return {{
|
||||
"setup": async (device, safetensor) => {{
|
||||
const metadata = getTensorMetadata(safetensor[0]);
|
||||
const metadata = safetensor ? getTensorMetadata(safetensor[0]) : null;
|
||||
|
||||
{exported_bufs}
|
||||
|
||||
|
||||
@@ -165,10 +165,6 @@
|
||||
import ClipTokenizer from './clip_tokenizer.js';
|
||||
window.clipTokenizer = new ClipTokenizer();
|
||||
</script>
|
||||
<script type="module">
|
||||
import { f16tof32GPU } from 'https://unpkg.com/f16-to-f32-gpu@0.1.0/src/index.js';
|
||||
window.f16tof32GPU = f16tof32GPU;
|
||||
</script>
|
||||
<script src="./net.js"></script>
|
||||
</head>
|
||||
<body>
|
||||
@@ -214,6 +210,8 @@
|
||||
<canvas id="canvas" width="512" height="512"></canvas>
|
||||
|
||||
<script>
|
||||
let f16decomp = null;
|
||||
|
||||
function initDb() {
|
||||
return new Promise((resolve, reject) => {
|
||||
let db;
|
||||
@@ -416,7 +414,7 @@
|
||||
const metadata = JSON.parse(new TextDecoder("utf8").decode(combinedBuffer.subarray(8, 8 + metadataLength)));
|
||||
|
||||
const allToDecomp = combinedBuffer.byteLength - (8 + metadataLength);
|
||||
const decodeChunkSize = 67107840;
|
||||
const decodeChunkSize = 8388480;
|
||||
const numChunks = Math.ceil(allToDecomp/decodeChunkSize);
|
||||
|
||||
console.log(allToDecomp + " bytes to decompress");
|
||||
@@ -440,7 +438,8 @@
|
||||
let chunkStartF16 = 8 + metadataLength + (decodeChunkSize * i);
|
||||
let chunkEndF16 = chunkStartF16 + decodeChunkSize;
|
||||
let chunk = combinedBuffer.subarray(chunkStartF16, chunkEndF16);
|
||||
let result = await f16tof32GPU(chunk);
|
||||
let uint32Chunk = new Uint32Array(chunk.buffer, chunk.byteOffset, chunk.byteLength / 4);
|
||||
let result = await f16decomp(uint32Chunk);
|
||||
let resultUint8 = new Uint8Array(result.buffer);
|
||||
let chunkStartF32 = 8 + metadataLength + (decodeChunkSize * i * 2);
|
||||
let chunkEndF32 = chunkStartF32 + resultUint8.byteLength;
|
||||
@@ -483,6 +482,7 @@
|
||||
}
|
||||
|
||||
const device = await getDevice();
|
||||
f16decomp = await f16tof32().setup(device, safetensorParts),
|
||||
safetensorParts = await getAndDecompressF16Safetensors(device, progress);
|
||||
|
||||
modelDlTitle.innerHTML = "Compiling model"
|
||||
|
||||
Reference in New Issue
Block a user