fix(memory): use QAT variant of embedding model for better quality

Switch default local embedding model from embeddinggemma-300M to embeddinggemma-300m-qat (Quantization Aware Training). QAT models are trained with quantization in mind, yielding better embedding quality at the same size (Q8_0).
2026-02-19 18:39:20 -05:00 · 2026-02-13 12:47:53 +00:00
parent 2b154e0458
commit 5219f74615
1 changed files with 2 additions and 1 deletions
--- a/src/memory/embeddings.ts
+++ b/src/memory/embeddings.ts
@@ -56,7 +56,8 @@ export type EmbeddingProviderOptions = {
  };
 };

-const DEFAULT_LOCAL_MODEL = "hf:ggml-org/embeddinggemma-300M-GGUF/embeddinggemma-300M-Q8_0.gguf";
+const DEFAULT_LOCAL_MODEL =
+  "hf:ggml-org/embeddinggemma-300m-qat-q8_0-GGUF/embeddinggemma-300m-qat-Q8_0.gguf";

 function canAutoSelectLocal(options: EmbeddingProviderOptions): boolean {
  const modelPath = options.local?.modelPath?.trim();