don't pass model in convert_from_huggingface and convert_from_gguf (#10094)

it only needs n_layers
This commit is contained in:
chenyu
2025-04-28 20:11:19 -04:00
committed by GitHub
parent a2d0684fc1
commit 3eba3d6ee9
6 changed files with 22 additions and 22 deletions

View File

@@ -34,8 +34,8 @@ if __name__ == "__main__":
part2 = nn.state.torch_load(fetch("https://huggingface.co/teknium/OpenHermes-2.5-Mistral-7B/resolve/main/pytorch_model-00002-of-00002.bin?download=true"))
with Timing("weights -> model: "):
nn.state.load_state_dict(model, fix_bf16(convert_from_huggingface(part1, model, 32, 8)), strict=False)
nn.state.load_state_dict(model, fix_bf16(convert_from_huggingface(part2, model, 32, 8)), strict=False)
nn.state.load_state_dict(model, fix_bf16(convert_from_huggingface(part1, 32, 32, 8)), strict=False)
nn.state.load_state_dict(model, fix_bf16(convert_from_huggingface(part2, 32, 32, 8)), strict=False)
if not os.path.isfile("/tmp/tokenizer.model"): create_fixed_tokenizer("/tmp/tokenizer.model")
spp = SentencePieceProcessor(model_file="/tmp/tokenizer.model")

View File

@@ -211,7 +211,7 @@ class LLaMa:
else:
weights = load(str(model_path))
if "model.embed_tokens.weight" in weights:
weights = convert_from_huggingface(weights, model, params["args"]["n_heads"], params["args"].get("n_kv_heads", params["args"]["n_heads"]))
weights = convert_from_huggingface(weights, params["args"]["n_layers"], params["args"]["n_heads"], params["args"].get("n_kv_heads", params["args"]["n_heads"]))
weights = fix_bf16(weights)

View File

@@ -173,9 +173,9 @@ def build_transformer(model_path: Path, model_size="8B", quantize=None, scale_dt
else:
weights = load(str(model_path))
if "model.embed_tokens.weight" in weights:
weights = convert_from_huggingface(weights, model, MODEL_PARAMS[model_size]["args"]["n_heads"], MODEL_PARAMS[model_size]["args"]["n_kv_heads"])
weights = convert_from_huggingface(weights, MODEL_PARAMS[model_size]["args"]["n_layers"], MODEL_PARAMS[model_size]["args"]["n_heads"], MODEL_PARAMS[model_size]["args"]["n_kv_heads"])
elif "token_embd.weight" in weights:
weights = convert_from_gguf(weights, model)
weights = convert_from_gguf(weights, MODEL_PARAMS[model_size]["args"]["n_layers"])
weights = fix_bf16(weights)
with Context(BEAM=0):

View File

@@ -55,7 +55,7 @@ if __name__ == "__main__":
del model_state_dict['freqs_cis']
with Timing("load weights to GPU: "):
nhf_state = convert_from_huggingface(fetch_weights(), model, 16, 16)
nhf_state = convert_from_huggingface(fetch_weights(), 16, 16, 16)
# NOTE: i'm not sure this actually needs float32, it may just change the type of things downstream from it. but doesn't match torch w/o this
for needs_float32 in ['tok_embeddings.weight']: nhf_state[needs_float32] = nhf_state[needs_float32].float()
print(f"ram used: {GlobalCounters.mem_used/1e9:.2f} GB")

View File

@@ -44,7 +44,7 @@ def load_model(model_path:Path, model_params:Dict[str, Union[int, float]]) -> Tr
model.layers = updated_layers
# load weights
weights = fix_bf16(convert_from_huggingface(load(str(model_path / "model.safetensors.index.json")), model, model_params["n_heads"], model_params["n_kv_heads"], permute_layers=False))
weights = fix_bf16(convert_from_huggingface(load(str(model_path / "model.safetensors.index.json")), model_params["n_layers"], model_params["n_heads"], model_params["n_kv_heads"], permute_layers=False))
# replace weights in model
load_state_dict(model, weights, strict=False, consume=True)