fix(memory): use QAT variant of embedding model for better quality

Switch default local embedding model from embeddinggemma-300M to
embeddinggemma-300m-qat (Quantization Aware Training). QAT models are
trained with quantization in mind, yielding better embedding quality
at the same size (Q8_0).
This commit is contained in:
Azade 🐐
2026-02-13 12:47:53 +00:00
committed by Peter Steinberger
parent 2b154e0458
commit 5219f74615

View File

@@ -56,7 +56,8 @@ export type EmbeddingProviderOptions = {
};
};
const DEFAULT_LOCAL_MODEL = "hf:ggml-org/embeddinggemma-300M-GGUF/embeddinggemma-300M-Q8_0.gguf";
const DEFAULT_LOCAL_MODEL =
"hf:ggml-org/embeddinggemma-300m-qat-q8_0-GGUF/embeddinggemma-300m-qat-Q8_0.gguf";
function canAutoSelectLocal(options: EmbeddingProviderOptions): boolean {
const modelPath = options.local?.modelPath?.trim();