fix(copilot): scope dedup fingerprint by user message ID instead of text

Using user message text as the context key caused the deduplicator to
drop the second assistant reply when a user asked the same question twice
in one session. Switching to user message ID (which is unique per turn)
fixes the false positive while still preventing SSE-replayed duplicates.

Adds a regression test covering the same-question-twice scenario.
This commit is contained in:
majdyz
2026-04-13 09:55:54 +00:00
parent 7cadce4c7b
commit a17f05f2b1
2 changed files with 27 additions and 11 deletions

View File

@@ -336,6 +336,20 @@ describe("deduplicateMessages", () => {
expect(result).toHaveLength(4);
});
it("keeps second answer when same question is asked twice in one session", () => {
// Regression: scoping by user message TEXT instead of ID would treat both
// turns as the same context and drop the second identical assistant reply.
const msgs = [
makeMsgWithId("u1", "user", "What is 2+2?"),
makeMsgWithId("a1", "assistant", "4"),
makeMsgWithId("u2", "user", "What is 2+2?"), // same question, different ID
makeMsgWithId("a2", "assistant", "4"), // same answer — must be kept
];
const result = deduplicateMessages(msgs);
expect(result).toHaveLength(4);
expect(result.map((m) => m.id)).toEqual(["u1", "a1", "u2", "a2"]);
});
it("removes adjacent assistant duplicates", () => {
const msgs = [
makeMsgWithId("u1", "user", "hello"),

View File

@@ -157,27 +157,29 @@ export function shouldSuppressDuplicateSend(
* Deduplicate messages by ID and by content fingerprint.
*
* ID dedup catches exact duplicates within the same source.
* Content dedup uses a composite key of `role + preceding-user-text +
* Content dedup uses a composite key of `role + preceding-user-message-id +
* content-fingerprint` to detect replayed messages that arrive with new
* IDs after an SSE reconnection replays from the beginning of the Redis
* stream. The preceding-user-text component prevents false positives when
* the assistant legitimately gives the same answer to different questions.
* stream. Scoping by user message ID (not text) preserves the second
* assistant reply when the user asks the same question twice and gets the
* same answer — two different user messages produce two different IDs even
* when their text is identical.
*/
export function deduplicateMessages(messages: UIMessage[]): UIMessage[] {
const seenIds = new Set<string>();
const seenFingerprints = new Set<string>();
let lastUserText = "";
let lastUserMsgId = "";
return messages.filter((msg) => {
if (seenIds.has(msg.id)) return false;
seenIds.add(msg.id);
if (msg.role === "user") {
// Track the latest user message text so we can scope assistant
// fingerprints to their conversational context.
lastUserText = msg.parts
.map((p) => ("text" in p ? p.text : ""))
.join("|");
// Track the ID (not text) of the latest user message so we can scope
// assistant fingerprints to their conversational turn. Using the ID
// means two user messages with identical text are still treated as
// distinct turns, preventing false-positive deduplication.
lastUserMsgId = msg.id;
}
if (msg.role === "assistant") {
@@ -191,9 +193,9 @@ export function deduplicateMessages(messages: UIMessage[]): UIMessage[] {
.join("|");
if (contentFingerprint) {
// Scope to the preceding user message so that identical assistant
// Scope to the preceding user message turn so that identical assistant
// replies to *different* user prompts are preserved.
const contextKey = `assistant:${lastUserText}:${contentFingerprint}`;
const contextKey = `assistant:${lastUserMsgId}:${contentFingerprint}`;
if (seenFingerprints.has(contextKey)) return false;
seenFingerprints.add(contextKey);
}