Compare commits

..

1 Commits

Author SHA1 Message Date
teknium1 d9122ac936 feat: use Codex-style compaction prompt for context compression
Replace the generic summarization prompt ('Summarize these conversation
turns concisely') with a task-oriented handoff prompt inspired by
OpenAI's Codex CLI compaction flow (researched in #499).

The new prompt frames compression as a 'CONTEXT CHECKPOINT COMPACTION'
and instructs the summarization model to produce a structured handoff
summary that includes:
- Current progress and key decisions
- User preferences and constraints discovered
- Clear next steps remaining
- Critical data (file paths, URLs, error messages, code snippets)
- Tool calls made and their key results

This produces better summaries because the model understands the summary
will be used by another LLM to continue the work, rather than treating
it as a generic text compression task.

No behavioral change to the compression algorithm itself — same
positional protection, same role alternation, same [CONTEXT SUMMARY]:
prefix. Only the prompt sent to the summarization model changes.

Inspired by PR #776 by @kshitijk4poor.
2026-03-11 05:38:20 -07:00
3 changed files with 28 additions and 44 deletions
+22 -37
View File
@@ -17,13 +17,6 @@ from agent.model_metadata import (
logger = logging.getLogger(__name__)
SUMMARY_PREFIX = (
"[CONTEXT COMPACTION] An earlier part of this conversation was "
"summarized to preserve context space. Below is the summary — use it "
"to build on the work already done and avoid duplicating effort:"
)
LEGACY_SUMMARY_PREFIX = "[CONTEXT SUMMARY]:"
class ContextCompressor:
"""Compresses conversation context when approaching the model's context limit.
@@ -110,22 +103,24 @@ class ContextCompressor:
parts.append(f"[{role.upper()}]: {content}")
content_to_summarize = "\n\n".join(parts)
prompt = f"""Summarize these conversation turns concisely. This summary will replace these turns in the conversation history.
Write from a neutral perspective describing:
1. What actions were taken (tool calls, searches, file operations)
2. Key information or results obtained
3. Important decisions or findings
4. Relevant data, file names, or outputs
Keep factual and informative. Target ~{self.summary_target_tokens} tokens.
---
TURNS TO SUMMARIZE:
{content_to_summarize}
---
Write only the summary, starting with "[CONTEXT SUMMARY]:" prefix."""
prompt = (
"You are performing a CONTEXT CHECKPOINT COMPACTION. Create a handoff "
"summary for the AI assistant that will resume this conversation.\n\n"
"Include:\n"
"- Current progress and key decisions made\n"
"- Important context, constraints, or user preferences discovered\n"
"- What remains to be done (clear next steps)\n"
"- Any critical data: file paths, variable names, URLs, error messages, "
"or code snippets needed to continue\n"
"- Tool calls made and their key results\n\n"
"Be concise, structured, and focused on helping the assistant seamlessly "
"continue the work without re-doing what's already been done.\n\n"
f"Target roughly {self.summary_target_tokens} tokens.\n\n"
"---\n"
f"TURNS TO SUMMARIZE:\n{content_to_summarize}\n"
"---\n\n"
'Write only the summary, starting with "[CONTEXT SUMMARY]:" prefix.'
)
# 1. Try the auxiliary model (cheap/fast)
if self.client:
@@ -173,19 +168,9 @@ Write only the summary, starting with "[CONTEXT SUMMARY]:" prefix."""
raise
summary = response.choices[0].message.content.strip()
return self._with_summary_prefix(summary)
@staticmethod
def _with_summary_prefix(summary: str) -> str:
"""Normalize the summary prefix to the current standard.
Strips any legacy ``[CONTEXT SUMMARY]:`` prefix the model may have
produced and prepends the current ``SUMMARY_PREFIX`` handoff text.
"""
text = (summary or "").strip()
if text.startswith(LEGACY_SUMMARY_PREFIX):
text = text[len(LEGACY_SUMMARY_PREFIX):].lstrip()
return f"{SUMMARY_PREFIX}\n{text}"
if not summary.startswith("[CONTEXT SUMMARY]:"):
summary = "[CONTEXT SUMMARY]: " + summary
return summary
def _get_fallback_client(self):
"""Try to build a fallback client from the main model's endpoint config.
@@ -355,7 +340,7 @@ Write only the summary, starting with "[CONTEXT SUMMARY]:" prefix."""
for i in range(compress_start):
msg = messages[i].copy()
if i == 0 and msg.get("role") == "system" and self.compression_count == 0:
msg["content"] = (msg.get("content") or "") + "\n\n[Note: Some earlier conversation turns have been compacted into a handoff summary to preserve context space. Build on that summary rather than re-doing work.]"
msg["content"] = (msg.get("content") or "") + "\n\n[Note: Some earlier conversation turns may be summarized to preserve context space.]"
compressed.append(msg)
if summary:
+5 -5
View File
@@ -3,7 +3,7 @@
import pytest
from unittest.mock import patch, MagicMock
from agent.context_compressor import ContextCompressor, SUMMARY_PREFIX
from agent.context_compressor import ContextCompressor
@pytest.fixture()
@@ -141,7 +141,7 @@ class TestGenerateSummaryNoneContent:
summary = c._generate_summary(messages)
assert isinstance(summary, str)
assert summary.startswith(SUMMARY_PREFIX)
assert "CONTEXT SUMMARY" in summary
def test_none_content_in_system_message_compress(self):
"""System message with content=None should not crash during compress."""
@@ -174,7 +174,7 @@ class TestCompressWithClient:
# Should have summary message in the middle
contents = [m.get("content", "") for m in result]
assert any(c.startswith(SUMMARY_PREFIX) for c in contents)
assert any("CONTEXT SUMMARY" in c for c in contents)
assert len(result) < len(msgs)
def test_summarization_does_not_split_tool_call_pairs(self):
@@ -246,7 +246,7 @@ class TestCompressWithClient:
{"role": "assistant", "content": "msg 5"},
]
result = c.compress(msgs)
summary_msg = [m for m in result if (m.get("content") or "").startswith(SUMMARY_PREFIX)]
summary_msg = [m for m in result if "CONTEXT SUMMARY" in (m.get("content") or "")]
assert len(summary_msg) == 1
assert summary_msg[0]["role"] == "user"
@@ -274,7 +274,7 @@ class TestCompressWithClient:
{"role": "assistant", "content": "msg 7"},
]
result = c.compress(msgs)
summary_msg = [m for m in result if (m.get("content") or "").startswith(SUMMARY_PREFIX)]
summary_msg = [m for m in result if "CONTEXT SUMMARY" in (m.get("content") or "")]
assert len(summary_msg) == 1
assert summary_msg[0]["role"] == "assistant"
+1 -2
View File
@@ -12,7 +12,6 @@ from unittest.mock import MagicMock, patch
import pytest
from agent.context_compressor import SUMMARY_PREFIX
from run_agent import AIAgent
@@ -336,7 +335,7 @@ class TestPreflightCompression:
# Simulate compression reducing messages
mock_compress.return_value = (
[
{"role": "user", "content": f"{SUMMARY_PREFIX}\nPrevious conversation"},
{"role": "user", "content": "[CONTEXT SUMMARY]: Previous conversation"},
{"role": "user", "content": "hello"},
],
"new system prompt",