feat: Codex-style handoff prefix for compressed context summaries

Replace the old '[CONTEXT SUMMARY]:' prefix on compressed summaries with a Codex-inspired handoff framing that tells the model what happened and how to use the summary. What changes: 1. New SUMMARY_PREFIX constant — the text prepended to every compressed summary: [CONTEXT COMPACTION] An earlier part of this conversation was summarized to preserve context space. Below is the summary — use it to build on the work already done and avoid duplicating effort: 2. _with_summary_prefix() helper — normalizes model output by stripping any legacy '[CONTEXT SUMMARY]:' prefix the summarization model may have produced, then prepends the new SUMMARY_PREFIX. 3. System message annotation updated — the note appended to the system prompt on first compression now says 'compacted into a handoff summary' and instructs 'build on that summary rather than re-doing work' instead of the old generic note. Why this is better: The old prefix ('[CONTEXT SUMMARY]: <raw text>') gave the model no context about what the summary is or how to use it. The new prefix explicitly frames it as a context compaction event and instructs the model to build on prior work rather than re-doing it. This reduces redundant tool calls and file re-reads after compression. What does NOT change: - The compression algorithm (positional protection, boundary alignment) - The role alternation logic (summary role adapts to avoid consecutive same-role messages) - The summarization model or trigger thresholds - LEGACY_SUMMARY_PREFIX is exported for backward compatibility Inspired by PR #776 by @kshitijk4poor and the research in #499.
2026-03-11 05:41:05 -07:00
3 changed files with 44 additions and 28 deletions
@@ -17,6 +17,13 @@ from agent.model_metadata import (

 logger = logging.getLogger(__name__)

+SUMMARY_PREFIX = (
+    "[CONTEXT COMPACTION] An earlier part of this conversation was "
+    "summarized to preserve context space. Below is the summary — use it "
+    "to build on the work already done and avoid duplicating effort:"
+)
+LEGACY_SUMMARY_PREFIX = "[CONTEXT SUMMARY]:"
+

 class ContextCompressor:
    """Compresses conversation context when approaching the model's context limit.
@@ -103,24 +110,22 @@ class ContextCompressor:
            parts.append(f"[{role.upper()}]: {content}")

        content_to_summarize = "\n\n".join(parts)
-        prompt = (
-            "You are performing a CONTEXT CHECKPOINT COMPACTION. Create a handoff "
-            "summary for the AI assistant that will resume this conversation.\n\n"
-            "Include:\n"
-            "- Current progress and key decisions made\n"
-            "- Important context, constraints, or user preferences discovered\n"
-            "- What remains to be done (clear next steps)\n"
-            "- Any critical data: file paths, variable names, URLs, error messages, "
-            "or code snippets needed to continue\n"
-            "- Tool calls made and their key results\n\n"
-            "Be concise, structured, and focused on helping the assistant seamlessly "
-            "continue the work without re-doing what's already been done.\n\n"
-            f"Target roughly {self.summary_target_tokens} tokens.\n\n"
-            "---\n"
-            f"TURNS TO SUMMARIZE:\n{content_to_summarize}\n"
-            "---\n\n"
-            'Write only the summary, starting with "[CONTEXT SUMMARY]:" prefix.'
-        )
+        prompt = f"""Summarize these conversation turns concisely. This summary will replace these turns in the conversation history.
+
+Write from a neutral perspective describing:
+1. What actions were taken (tool calls, searches, file operations)
+2. Key information or results obtained
+3. Important decisions or findings
+4. Relevant data, file names, or outputs
+
+Keep factual and informative. Target ~{self.summary_target_tokens} tokens.
+
+---
+TURNS TO SUMMARIZE:
+{content_to_summarize}
+---
+
+Write only the summary, starting with "[CONTEXT SUMMARY]:" prefix."""

        # 1. Try the auxiliary model (cheap/fast)
        if self.client:
@@ -168,9 +173,19 @@ class ContextCompressor:
                raise

        summary = response.choices[0].message.content.strip()
-        if not summary.startswith("[CONTEXT SUMMARY]:"):
-            summary = "[CONTEXT SUMMARY]: " + summary
-        return summary
+        return self._with_summary_prefix(summary)
+
+    @staticmethod
+    def _with_summary_prefix(summary: str) -> str:
+        """Normalize the summary prefix to the current standard.
+
+        Strips any legacy ``[CONTEXT SUMMARY]:`` prefix the model may have
+        produced and prepends the current ``SUMMARY_PREFIX`` handoff text.
+        """
+        text = (summary or "").strip()
+        if text.startswith(LEGACY_SUMMARY_PREFIX):
+            text = text[len(LEGACY_SUMMARY_PREFIX):].lstrip()
+        return f"{SUMMARY_PREFIX}\n{text}"

    def _get_fallback_client(self):
        """Try to build a fallback client from the main model's endpoint config.
@@ -340,7 +355,7 @@ class ContextCompressor:
        for i in range(compress_start):
            msg = messages[i].copy()
            if i == 0 and msg.get("role") == "system" and self.compression_count == 0:
-                msg["content"] = (msg.get("content") or "") + "\n\n[Note: Some earlier conversation turns may be summarized to preserve context space.]"
+                msg["content"] = (msg.get("content") or "") + "\n\n[Note: Some earlier conversation turns have been compacted into a handoff summary to preserve context space. Build on that summary rather than re-doing work.]"
            compressed.append(msg)

        if summary:
@@ -3,7 +3,7 @@
 import pytest
 from unittest.mock import patch, MagicMock

-from agent.context_compressor import ContextCompressor
+from agent.context_compressor import ContextCompressor, SUMMARY_PREFIX


@pytest.fixture()
@@ -141,7 +141,7 @@ class TestGenerateSummaryNoneContent:

        summary = c._generate_summary(messages)
        assert isinstance(summary, str)
-        assert "CONTEXT SUMMARY" in summary
+        assert summary.startswith(SUMMARY_PREFIX)

    def test_none_content_in_system_message_compress(self):
        """System message with content=None should not crash during compress."""
@@ -174,7 +174,7 @@ class TestCompressWithClient:

        # Should have summary message in the middle
        contents = [m.get("content", "") for m in result]
-        assert any("CONTEXT SUMMARY" in c for c in contents)
+        assert any(c.startswith(SUMMARY_PREFIX) for c in contents)
        assert len(result) < len(msgs)

    def test_summarization_does_not_split_tool_call_pairs(self):
@@ -246,7 +246,7 @@ class TestCompressWithClient:
            {"role": "assistant", "content": "msg 5"},
        ]
        result = c.compress(msgs)
-        summary_msg = [m for m in result if "CONTEXT SUMMARY" in (m.get("content") or "")]
+        summary_msg = [m for m in result if (m.get("content") or "").startswith(SUMMARY_PREFIX)]
        assert len(summary_msg) == 1
        assert summary_msg[0]["role"] == "user"

@@ -274,7 +274,7 @@ class TestCompressWithClient:
            {"role": "assistant", "content": "msg 7"},
        ]
        result = c.compress(msgs)
-        summary_msg = [m for m in result if "CONTEXT SUMMARY" in (m.get("content") or "")]
+        summary_msg = [m for m in result if (m.get("content") or "").startswith(SUMMARY_PREFIX)]
        assert len(summary_msg) == 1
        assert summary_msg[0]["role"] == "assistant"

@@ -12,6 +12,7 @@ from unittest.mock import MagicMock, patch

 import pytest

+from agent.context_compressor import SUMMARY_PREFIX
 from run_agent import AIAgent


@@ -335,7 +336,7 @@ class TestPreflightCompression:
            # Simulate compression reducing messages
            mock_compress.return_value = (
                [
-                    {"role": "user", "content": "[CONTEXT SUMMARY]: Previous conversation"},
+                    {"role": "user", "content": f"{SUMMARY_PREFIX}\nPrevious conversation"},
                    {"role": "user", "content": "hello"},
                ],
                "new system prompt",