fix(copilot): deduplicate SSE-replayed messages by content fingerprint

When the SSE connection reconnects, resume_session_stream replays from
"0-0" and the replayed UIMessage objects get new IDs from useChat,
bypassing the adjacent-only content dedup. Switch deduplicateMessages
to track all seen role+context+content fingerprints globally, scoped
by the preceding user message to avoid false positives when the
assistant legitimately gives identical answers to different prompts.
This commit is contained in:
majdyz
2026-04-13 08:03:51 +00:00
parent e0ddb7d4d4
commit 00a20bdfe6
2 changed files with 127 additions and 14 deletions

View File

@@ -2,6 +2,7 @@ import type { UIMessage } from "ai";
import { describe, expect, it } from "vitest";
import {
ORIGINAL_TITLE,
deduplicateMessages,
extractSendMessageText,
formatNotificationTitle,
getSendSuppressionReason,
@@ -291,3 +292,105 @@ describe("getSendSuppressionReason", () => {
).toBeNull();
});
});
// Helper that creates messages with explicit IDs for dedup tests
function makeMsgWithId(
id: string,
role: "user" | "assistant",
text: string,
): UIMessage {
return { id, role, parts: [{ type: "text", text }] };
}
describe("deduplicateMessages", () => {
it("removes messages with duplicate IDs", () => {
const msgs = [
makeMsgWithId("1", "user", "hello"),
makeMsgWithId("1", "user", "hello"),
];
expect(deduplicateMessages(msgs)).toHaveLength(1);
});
it("removes non-adjacent assistant duplicates with different IDs (SSE replay)", () => {
const msgs = [
makeMsgWithId("u1", "user", "hello"),
makeMsgWithId("a1", "assistant", "Plan of Attack"),
makeMsgWithId("a2", "assistant", "Next step"),
// SSE replay appends the same content with new IDs
makeMsgWithId("a3", "assistant", "Plan of Attack"),
makeMsgWithId("a4", "assistant", "Next step"),
];
const result = deduplicateMessages(msgs);
expect(result).toHaveLength(3); // user + 2 unique assistant msgs
expect(result.map((m) => m.id)).toEqual(["u1", "a1", "a2"]);
});
it("keeps identical assistant replies to different user prompts", () => {
const msgs = [
makeMsgWithId("u1", "user", "What is 2+2?"),
makeMsgWithId("a1", "assistant", "4"),
makeMsgWithId("u2", "user", "What is 1+3?"),
makeMsgWithId("a2", "assistant", "4"),
];
const result = deduplicateMessages(msgs);
expect(result).toHaveLength(4);
});
it("removes adjacent assistant duplicates", () => {
const msgs = [
makeMsgWithId("u1", "user", "hello"),
makeMsgWithId("a1", "assistant", "hi there"),
makeMsgWithId("a2", "assistant", "hi there"),
];
const result = deduplicateMessages(msgs);
expect(result).toHaveLength(2);
});
it("handles empty message list", () => {
expect(deduplicateMessages([])).toEqual([]);
});
it("passes through unique messages unchanged", () => {
const msgs = [
makeMsgWithId("u1", "user", "question 1"),
makeMsgWithId("a1", "assistant", "answer 1"),
makeMsgWithId("u2", "user", "question 2"),
makeMsgWithId("a2", "assistant", "answer 2"),
];
expect(deduplicateMessages(msgs)).toHaveLength(4);
});
it("deduplicates by toolCallId for tool-call parts", () => {
const msgs: UIMessage[] = [
makeMsgWithId("u1", "user", "run tool"),
{
id: "a1",
role: "assistant",
parts: [
{
type: "tool-invocation",
toolCallId: "tc-1",
toolName: "test",
args: {},
state: "call",
},
],
},
{
id: "a2",
role: "assistant",
parts: [
{
type: "tool-invocation",
toolCallId: "tc-1",
toolName: "test",
args: {},
state: "call",
},
],
},
];
const result = deduplicateMessages(msgs);
expect(result).toHaveLength(2); // user + first tool call
});
});

View File

@@ -154,24 +154,34 @@ export function shouldSuppressDuplicateSend(
}
/**
* Deduplicate messages by ID and by consecutive content fingerprint.
* Deduplicate messages by ID and by content fingerprint.
*
* ID dedup catches exact duplicates within the same source.
* Content dedup only compares each assistant message to its **immediate
* predecessor** — this catches hydration/stream boundary duplicates (where
* the same content appears under different IDs) without accidentally
* removing legitimately repeated assistant responses that are far apart.
* Content dedup uses a composite key of `role + preceding-user-text +
* content-fingerprint` to detect replayed messages that arrive with new
* IDs after an SSE reconnection replays from the beginning of the Redis
* stream. The preceding-user-text component prevents false positives when
* the assistant legitimately gives the same answer to different questions.
*/
export function deduplicateMessages(messages: UIMessage[]): UIMessage[] {
const seenIds = new Set<string>();
let lastAssistantFingerprint = "";
const seenFingerprints = new Set<string>();
let lastUserText = "";
return messages.filter((msg) => {
if (seenIds.has(msg.id)) return false;
seenIds.add(msg.id);
if (msg.role === "user") {
// Track the latest user message text so we can scope assistant
// fingerprints to their conversational context.
lastUserText = msg.parts
.map((p) => ("text" in p ? p.text : ""))
.join("|");
}
if (msg.role === "assistant") {
const fingerprint = msg.parts
const contentFingerprint = msg.parts
.map(
(p) =>
("text" in p && p.text) ||
@@ -180,13 +190,13 @@ export function deduplicateMessages(messages: UIMessage[]): UIMessage[] {
)
.join("|");
// Only dedup if this assistant message is identical to the previous one
if (fingerprint && fingerprint === lastAssistantFingerprint) return false;
if (fingerprint) lastAssistantFingerprint = fingerprint;
} else {
// Reset on non-assistant messages so that identical assistant responses
// separated by a user message (e.g. "Done!" → user → "Done!") are kept.
lastAssistantFingerprint = "";
if (contentFingerprint) {
// Scope to the preceding user message so that identical assistant
// replies to *different* user prompts are preserved.
const contextKey = `assistant:${lastUserText}:${contentFingerprint}`;
if (seenFingerprints.has(contextKey)) return false;
seenFingerprints.add(contextKey);
}
}
return true;