Compare commits

..

1 Commits

Author SHA1 Message Date
teknium1 d63d7a58fe feat: Codex-style handoff prefix for compressed context summaries
Replace the old '[CONTEXT SUMMARY]:' prefix on compressed summaries
with a Codex-inspired handoff framing that tells the model what happened
and how to use the summary.

What changes:

1. New SUMMARY_PREFIX constant — the text prepended to every
   compressed summary:

   [CONTEXT COMPACTION] An earlier part of this conversation was
   summarized to preserve context space. Below is the summary — use
   it to build on the work already done and avoid duplicating effort:

2. _with_summary_prefix() helper — normalizes model output by stripping
   any legacy '[CONTEXT SUMMARY]:' prefix the summarization model may
   have produced, then prepends the new SUMMARY_PREFIX.

3. System message annotation updated — the note appended to the system
   prompt on first compression now says 'compacted into a handoff
   summary' and instructs 'build on that summary rather than re-doing
   work' instead of the old generic note.

Why this is better:

The old prefix ('[CONTEXT SUMMARY]: <raw text>') gave the model no
context about what the summary is or how to use it. The new prefix
explicitly frames it as a context compaction event and instructs the
model to build on prior work rather than re-doing it. This reduces
redundant tool calls and file re-reads after compression.

What does NOT change:

- The compression algorithm (positional protection, boundary alignment)
- The role alternation logic (summary role adapts to avoid consecutive
  same-role messages)
- The summarization model or trigger thresholds
- LEGACY_SUMMARY_PREFIX is exported for backward compatibility

Inspired by PR #776 by @kshitijk4poor and the research in #499.
2026-03-11 05:41:05 -07:00
3 changed files with 44 additions and 28 deletions
+37 -22
View File
@@ -17,6 +17,13 @@ from agent.model_metadata import (
logger = logging.getLogger(__name__)
SUMMARY_PREFIX = (
"[CONTEXT COMPACTION] An earlier part of this conversation was "
"summarized to preserve context space. Below is the summary — use it "
"to build on the work already done and avoid duplicating effort:"
)
LEGACY_SUMMARY_PREFIX = "[CONTEXT SUMMARY]:"
class ContextCompressor:
"""Compresses conversation context when approaching the model's context limit.
@@ -103,24 +110,22 @@ class ContextCompressor:
parts.append(f"[{role.upper()}]: {content}")
content_to_summarize = "\n\n".join(parts)
prompt = (
"You are performing a CONTEXT CHECKPOINT COMPACTION. Create a handoff "
"summary for the AI assistant that will resume this conversation.\n\n"
"Include:\n"
"- Current progress and key decisions made\n"
"- Important context, constraints, or user preferences discovered\n"
"- What remains to be done (clear next steps)\n"
"- Any critical data: file paths, variable names, URLs, error messages, "
"or code snippets needed to continue\n"
"- Tool calls made and their key results\n\n"
"Be concise, structured, and focused on helping the assistant seamlessly "
"continue the work without re-doing what's already been done.\n\n"
f"Target roughly {self.summary_target_tokens} tokens.\n\n"
"---\n"
f"TURNS TO SUMMARIZE:\n{content_to_summarize}\n"
"---\n\n"
'Write only the summary, starting with "[CONTEXT SUMMARY]:" prefix.'
)
prompt = f"""Summarize these conversation turns concisely. This summary will replace these turns in the conversation history.
Write from a neutral perspective describing:
1. What actions were taken (tool calls, searches, file operations)
2. Key information or results obtained
3. Important decisions or findings
4. Relevant data, file names, or outputs
Keep factual and informative. Target ~{self.summary_target_tokens} tokens.
---
TURNS TO SUMMARIZE:
{content_to_summarize}
---
Write only the summary, starting with "[CONTEXT SUMMARY]:" prefix."""
# 1. Try the auxiliary model (cheap/fast)
if self.client:
@@ -168,9 +173,19 @@ class ContextCompressor:
raise
summary = response.choices[0].message.content.strip()
if not summary.startswith("[CONTEXT SUMMARY]:"):
summary = "[CONTEXT SUMMARY]: " + summary
return summary
return self._with_summary_prefix(summary)
@staticmethod
def _with_summary_prefix(summary: str) -> str:
"""Normalize the summary prefix to the current standard.
Strips any legacy ``[CONTEXT SUMMARY]:`` prefix the model may have
produced and prepends the current ``SUMMARY_PREFIX`` handoff text.
"""
text = (summary or "").strip()
if text.startswith(LEGACY_SUMMARY_PREFIX):
text = text[len(LEGACY_SUMMARY_PREFIX):].lstrip()
return f"{SUMMARY_PREFIX}\n{text}"
def _get_fallback_client(self):
"""Try to build a fallback client from the main model's endpoint config.
@@ -340,7 +355,7 @@ class ContextCompressor:
for i in range(compress_start):
msg = messages[i].copy()
if i == 0 and msg.get("role") == "system" and self.compression_count == 0:
msg["content"] = (msg.get("content") or "") + "\n\n[Note: Some earlier conversation turns may be summarized to preserve context space.]"
msg["content"] = (msg.get("content") or "") + "\n\n[Note: Some earlier conversation turns have been compacted into a handoff summary to preserve context space. Build on that summary rather than re-doing work.]"
compressed.append(msg)
if summary:
+5 -5
View File
@@ -3,7 +3,7 @@
import pytest
from unittest.mock import patch, MagicMock
from agent.context_compressor import ContextCompressor
from agent.context_compressor import ContextCompressor, SUMMARY_PREFIX
@pytest.fixture()
@@ -141,7 +141,7 @@ class TestGenerateSummaryNoneContent:
summary = c._generate_summary(messages)
assert isinstance(summary, str)
assert "CONTEXT SUMMARY" in summary
assert summary.startswith(SUMMARY_PREFIX)
def test_none_content_in_system_message_compress(self):
"""System message with content=None should not crash during compress."""
@@ -174,7 +174,7 @@ class TestCompressWithClient:
# Should have summary message in the middle
contents = [m.get("content", "") for m in result]
assert any("CONTEXT SUMMARY" in c for c in contents)
assert any(c.startswith(SUMMARY_PREFIX) for c in contents)
assert len(result) < len(msgs)
def test_summarization_does_not_split_tool_call_pairs(self):
@@ -246,7 +246,7 @@ class TestCompressWithClient:
{"role": "assistant", "content": "msg 5"},
]
result = c.compress(msgs)
summary_msg = [m for m in result if "CONTEXT SUMMARY" in (m.get("content") or "")]
summary_msg = [m for m in result if (m.get("content") or "").startswith(SUMMARY_PREFIX)]
assert len(summary_msg) == 1
assert summary_msg[0]["role"] == "user"
@@ -274,7 +274,7 @@ class TestCompressWithClient:
{"role": "assistant", "content": "msg 7"},
]
result = c.compress(msgs)
summary_msg = [m for m in result if "CONTEXT SUMMARY" in (m.get("content") or "")]
summary_msg = [m for m in result if (m.get("content") or "").startswith(SUMMARY_PREFIX)]
assert len(summary_msg) == 1
assert summary_msg[0]["role"] == "assistant"
+2 -1
View File
@@ -12,6 +12,7 @@ from unittest.mock import MagicMock, patch
import pytest
from agent.context_compressor import SUMMARY_PREFIX
from run_agent import AIAgent
@@ -335,7 +336,7 @@ class TestPreflightCompression:
# Simulate compression reducing messages
mock_compress.return_value = (
[
{"role": "user", "content": "[CONTEXT SUMMARY]: Previous conversation"},
{"role": "user", "content": f"{SUMMARY_PREFIX}\nPrevious conversation"},
{"role": "user", "content": "hello"},
],
"new system prompt",