mirror of
https://github.com/invoke-ai/InvokeAI.git
synced 2026-04-23 03:00:31 -04:00
Performance optimizations for LoRAs applied on top of GGML-quantized tensors.
This commit is contained in:
@@ -52,7 +52,9 @@ class CustomModuleMixin:
|
||||
if type(param) is torch.nn.Parameter and type(param.data) is torch.Tensor:
|
||||
pass
|
||||
elif type(param) is GGMLTensor:
|
||||
pass
|
||||
# Move to device and dequantize here. Doing it in the patch layer can result in redundant casts /
|
||||
# dequantizations.
|
||||
orig_params[param_name] = param.to(device=device).get_dequantized_tensor()
|
||||
else:
|
||||
orig_params[param_name] = torch.empty(get_param_shape(param), device="meta")
|
||||
|
||||
|
||||
Reference in New Issue
Block a user