fix(platform): restore compaction code lost in merge, address remaining review comments

The merge of feat/tracking-cost-block reverted transcript compaction code
and several review fixes from 90b7edf1f. This commit restores the lost
code and applies additional improvements requested in review:

- Restore transcript compaction functions (_transcript_to_messages,
  _messages_to_transcript, compact_transcript, _flatten_* helpers)
- Restore _maybe_compact_and_upload helper in service.py to flatten
  deep nesting (5 levels -> 2) in transcript compaction block
- Restore CLI session file reading (read_cli_session_file) for
  mid-stream compaction sync
- Restore total_tokens DRY fix (compute once, reuse in finally)
- Extract _run_compression() helper to eliminate nested try blocks
- Add STOP_REASON_END_TURN, COMPACT_MSG_ID_PREFIX, ENTRY_TYPE_MESSAGE
  named constants replacing magic strings
- Add MS_PER_MINUTE and MS_PER_HOUR constants in UsageLimits.tsx
- Add docstring explaining Monday edge case in _weekly_reset_time
This commit is contained in:
Zamil Majdy
2026-03-13 22:56:19 +07:00
parent 628b779128
commit d1b8766fa4
4 changed files with 94 additions and 65 deletions

View File

@@ -77,7 +77,11 @@ def _daily_reset_time(now: datetime | None = None) -> datetime:
def _weekly_reset_time(now: datetime | None = None) -> datetime:
"""Calculate when the current weekly window resets (next Monday 00:00 UTC)."""
"""Calculate when the current weekly window resets (next Monday 00:00 UTC).
On Monday itself, ``(7 - weekday) % 7`` is 0; the ``or 7`` fallback
pushes to *next* Monday so the current week's window stays open.
"""
if now is None:
now = datetime.now(UTC)
days_until_monday = (7 - now.weekday()) % 7 or 7

View File

@@ -79,6 +79,7 @@ from .tool_adapter import (
)
from .transcript import (
COMPACT_THRESHOLD_BYTES,
TranscriptDownload,
cleanup_cli_project_dir,
compact_transcript,
download_transcript,
@@ -630,6 +631,56 @@ async def _prepare_file_attachments(
return PreparedAttachments(hint=hint, image_blocks=image_blocks)
async def _maybe_compact_and_upload(
dl: TranscriptDownload,
user_id: str,
session_id: str,
log_prefix: str = "[Transcript]",
) -> str:
"""Compact an oversized transcript and upload the compacted version.
Returns the (possibly compacted) transcript content, or an empty string
if compaction was needed but failed.
"""
content = dl.content
if len(content) <= COMPACT_THRESHOLD_BYTES:
return content
logger.warning(
"%s Transcript oversized (%dB > %dB), compacting",
log_prefix,
len(content),
COMPACT_THRESHOLD_BYTES,
)
compacted = await compact_transcript(content, log_prefix=log_prefix)
if not compacted:
logger.warning(
"%s Compaction failed, skipping resume for this turn", log_prefix
)
return ""
# Keep the original message_count: it reflects the number of
# session.messages covered by this transcript, which the gap-fill
# logic uses as a slice index. Counting JSONL lines would give a
# smaller number (compacted messages != session message count) and
# cause already-covered messages to be re-injected.
try:
await upload_transcript(
user_id=user_id,
session_id=session_id,
content=compacted,
message_count=dl.message_count,
log_prefix=log_prefix,
)
except Exception:
logger.warning(
"%s Failed to upload compacted transcript",
log_prefix,
exc_info=True,
)
return compacted
async def stream_chat_completion_sdk(
session_id: str,
message: str | None = None,
@@ -841,50 +892,12 @@ async def stream_chat_completion_sdk(
is_valid,
)
if is_valid:
transcript_content = dl.content
# Compact oversized transcripts to prevent "Prompt is too long"
if len(transcript_content) > COMPACT_THRESHOLD_BYTES:
logger.warning(
"%s Transcript oversized (%dB > %dB), compacting",
log_prefix,
len(transcript_content),
COMPACT_THRESHOLD_BYTES,
)
compacted = await compact_transcript(
transcript_content, log_prefix=log_prefix
)
if compacted:
transcript_content = compacted
# Keep the original message_count: it reflects the
# number of session.messages covered by this transcript,
# which the gap-fill logic uses as a slice index.
# Counting JSONL lines would give a smaller number
# (compacted messages != session message count) and
# cause already-covered messages to be re-injected.
# Best-effort upload of compacted version
try:
await upload_transcript(
user_id=user_id or "",
session_id=session_id,
content=transcript_content,
message_count=dl.message_count,
log_prefix=log_prefix,
)
except Exception:
logger.warning(
"%s Failed to upload compacted transcript",
log_prefix,
exc_info=True,
)
else:
# Compaction failed — skip resume to avoid
# "Prompt is too long" on an oversized transcript.
logger.warning(
"%s Compaction failed, skipping resume for this turn",
log_prefix,
)
transcript_content = ""
transcript_content = await _maybe_compact_and_upload(
dl,
user_id=user_id or "",
session_id=session_id,
log_prefix=log_prefix,
)
# Load previous context into builder (empty string is a no-op)
if transcript_content:
transcript_builder.load_previous(

View File

@@ -23,7 +23,7 @@ import openai
from backend.copilot.config import ChatConfig
from backend.util import json
from backend.util.prompt import compress_context
from backend.util.prompt import CompressResult, compress_context
logger = logging.getLogger(__name__)
@@ -41,6 +41,11 @@ STRIPPABLE_TYPES = frozenset(
{"progress", "file-history-snapshot", "queue-operation", "summary", "pr-link"}
)
# JSONL protocol values used in transcript serialization.
STOP_REASON_END_TURN = "end_turn"
COMPACT_MSG_ID_PREFIX = "msg_compact_"
ENTRY_TYPE_MESSAGE = "message"
@dataclass
class TranscriptDownload:
@@ -560,10 +565,10 @@ def _messages_to_transcript(messages: list[dict]) -> str:
message: dict = {
"role": "assistant",
"model": "",
"id": f"msg_compact_{uuid4().hex[:24]}",
"type": "message",
"id": f"{COMPACT_MSG_ID_PREFIX}{uuid4().hex[:24]}",
"type": ENTRY_TYPE_MESSAGE,
"content": [{"type": "text", "text": content}] if content else [],
"stop_reason": "end_turn",
"stop_reason": STOP_REASON_END_TURN,
"stop_sequence": None,
}
else:
@@ -579,6 +584,23 @@ def _messages_to_transcript(messages: list[dict]) -> str:
return "\n".join(lines) + "\n" if lines else ""
async def _run_compression(
messages: list[dict],
model: str,
cfg: ChatConfig,
log_prefix: str,
) -> CompressResult:
"""Run LLM-based compression with truncation fallback."""
try:
async with openai.AsyncOpenAI(
api_key=cfg.api_key, base_url=cfg.base_url, timeout=30.0
) as client:
return await compress_context(messages=messages, model=model, client=client)
except Exception as e:
logger.warning("%s LLM compaction failed, using truncation: %s", log_prefix, e)
return await compress_context(messages=messages, model=model, client=None)
async def compact_transcript(
content: str,
log_prefix: str = "[Transcript]",
@@ -596,20 +618,7 @@ async def compact_transcript(
logger.warning("%s Too few messages to compact (%d)", log_prefix, len(messages))
return None
try:
try:
async with openai.AsyncOpenAI(
api_key=cfg.api_key, base_url=cfg.base_url, timeout=30.0
) as client:
result = await compress_context(
messages=messages, model=cfg.model, client=client
)
except Exception as e:
logger.warning(
"%s LLM compaction failed, using truncation: %s", log_prefix, e
)
result = await compress_context(
messages=messages, model=cfg.model, client=None
)
result = await _run_compression(messages, cfg.model, cfg, log_prefix)
if not result.was_compacted:
logger.info("%s Transcript already within token budget", log_prefix)
return content

View File

@@ -8,6 +8,9 @@ import { Button } from "@/components/ui/button";
import { ChartBar } from "@phosphor-icons/react";
import { useUsageLimits } from "./useUsageLimits";
const MS_PER_MINUTE = 60_000;
const MS_PER_HOUR = 3_600_000;
function formatResetTime(resetsAt: Date | string): string {
const resetDate =
typeof resetsAt === "string" ? new Date(resetsAt) : resetsAt;
@@ -15,11 +18,11 @@ function formatResetTime(resetsAt: Date | string): string {
const diffMs = resetDate.getTime() - now.getTime();
if (diffMs <= 0) return "now";
const hours = Math.floor(diffMs / (1000 * 60 * 60));
const hours = Math.floor(diffMs / MS_PER_HOUR);
// Under 24h: show relative time ("in 4h 23m")
if (hours < 24) {
const minutes = Math.floor((diffMs % (1000 * 60 * 60)) / (1000 * 60));
const minutes = Math.floor((diffMs % MS_PER_HOUR) / MS_PER_MINUTE);
if (hours > 0) return `in ${hours}h ${minutes}m`;
return `in ${minutes}m`;
}