From e52f1446716fa2bf353123e3350e330de65d8931 Mon Sep 17 00:00:00 2001
From: Teknium <teknium1@gmail.com>
Date: Sun, 5 Apr 2026 23:45:07 -0700
Subject: [PATCH] =?UTF-8?q?feat:=20phase=202=20=E2=80=94=20instrument=20AP?=
 =?UTF-8?q?I=20calls,=20tools,=20sessions,=20and=20credential=20pool?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Add structured INFO-level logging to the key code paths so agent.log
captures actionable debugging data:

API calls (run_agent.py):
  - Model, provider, input/output tokens, total tokens, latency
  - Cache hit rate (cache_read_tokens / prompt_tokens percentage)
  - Logged after each successful API call with usage data

Tool execution (run_agent.py):
  - Tool name, duration, result size for successful calls
  - Tool name, duration, error preview for failures
  - Both sequential and concurrent execution paths instrumented

Session lifecycle (run_agent.py):
  - Conversation turn start: session ID, model, provider, platform,
    history size, message preview
  - Context compression: before (message count, token estimate) and
    after (compressed count, post-compression tokens)

Credential pool (agent/credential_pool.py):
  - Pool exhaustion: which credential was marked exhausted and why
  - Rotation: which credential was selected next
  - Empty pool: when all credentials are exhausted

Example agent.log output after this change:
  INFO run_agent: conversation turn: session=20260405_223500_abc model=claude-opus provider=openrouter platform=cli history=12 msg='Fix the logging...'
  INFO run_agent: tool terminal completed (2.34s, 1847 chars)
  INFO run_agent: tool read_file completed (0.01s, 3204 chars)
  INFO run_agent: API call #3: model=claude-opus provider=openrouter in=45231 out=892 total=46123 latency=4.2s cache=38102/45231 (84%)
---
 agent/credential_pool.py | 12 +++++++++++-
 run_agent.py             | 40 +++++++++++++++++++++++++++++++++++++++-
 2 files changed, 50 insertions(+), 2 deletions(-)

diff --git a/agent/credential_pool.py b/agent/credential_pool.py
index 311abea98f..740fc59d4a 100644
--- a/agent/credential_pool.py
+++ b/agent/credential_pool.py
@@ -660,6 +660,7 @@ class CredentialPool:
         available = self._available_entries(clear_expired=True, refresh=True)
         if not available:
             self._current_id = None
+            logger.info("credential pool: no available entries (all exhausted or empty)")
             return None
 
         if self._strategy == STRATEGY_RANDOM:
@@ -702,9 +703,18 @@ class CredentialPool:
             entry = self.current() or self._select_unlocked()
             if entry is None:
                 return None
+            _label = entry.label or entry.id[:8]
+            logger.info(
+                "credential pool: marking %s exhausted (status=%s), rotating",
+                _label, status_code,
+            )
             self._mark_exhausted(entry, status_code, error_context)
             self._current_id = None
-            return self._select_unlocked()
+            next_entry = self._select_unlocked()
+            if next_entry:
+                _next_label = next_entry.label or next_entry.id[:8]
+                logger.info("credential pool: rotated to %s", _next_label)
+            return next_entry
 
     def try_refresh_current(self) -> Optional[PooledCredential]:
         with self._lock:
diff --git a/run_agent.py b/run_agent.py
index d8355d7421..d54e085a0a 100644
--- a/run_agent.py
+++ b/run_agent.py
@@ -5809,6 +5809,12 @@ class AIAgent:
         Returns:
             (compressed_messages, new_system_prompt) tuple
         """
+        _pre_msg_count = len(messages)
+        logger.info(
+            "context compression started: session=%s messages=%d tokens=~%s model=%s",
+            self.session_id or "none", _pre_msg_count,
+            f"{approx_tokens:,}" if approx_tokens else "unknown", self.model,
+        )
         # Pre-compression memory flush: let the model save memories before they're lost
         self.flush_memories(messages, min_turns=0)
 
@@ -5885,6 +5891,11 @@ class AIAgent:
         except Exception:
             pass
 
+        logger.info(
+            "context compression done: session=%s messages=%d->%d tokens=~%s",
+            self.session_id or "none", _pre_msg_count, len(compressed),
+            f"{_compressed_est:,}",
+        )
         return compressed, new_system_prompt
 
     def _execute_tool_calls(self, assistant_message, messages: list, effective_task_id: str, api_call_count: int = 0) -> None:
@@ -6085,6 +6096,10 @@ class AIAgent:
                 logger.error("_invoke_tool raised for %s: %s", function_name, tool_error, exc_info=True)
             duration = time.time() - start
             is_error, _ = _detect_tool_failure(function_name, result)
+            if is_error:
+                logger.info("tool %s failed (%.2fs): %s", function_name, duration, result[:200])
+            else:
+                logger.info("tool %s completed (%.2fs, %d chars)", function_name, duration, len(result))
             results[index] = (function_name, function_args, result, duration, is_error)
 
         # Start spinner for CLI mode (skip when TUI handles tool progress)
@@ -6430,6 +6445,8 @@ class AIAgent:
             _is_error_result, _ = _detect_tool_failure(function_name, function_result)
             if _is_error_result:
                 logger.warning("Tool %s returned error (%.2fs): %s", function_name, tool_duration, result_preview)
+            else:
+                logger.info("tool %s completed (%.2fs, %d chars)", function_name, tool_duration, len(function_result))
 
             if self.tool_progress_callback:
                 try:
@@ -6807,7 +6824,17 @@ class AIAgent:
         # They are initialized in __init__ and must persist across run_conversation
         # calls so that nudge logic accumulates correctly in CLI mode.
         self.iteration_budget = IterationBudget(self.max_iterations)
-        
+
+        # Log conversation turn start for debugging/observability
+        _msg_preview = (user_message[:80] + "...") if len(user_message) > 80 else user_message
+        _msg_preview = _msg_preview.replace("\n", " ")
+        logger.info(
+            "conversation turn: session=%s model=%s provider=%s platform=%s history=%d msg=%r",
+            self.session_id or "none", self.model, self.provider or "unknown",
+            self.platform or "unknown", len(conversation_history or []),
+            _msg_preview,
+        )
+
         # Initialize conversation (copy to avoid mutating the caller's list)
         messages = list(conversation_history) if conversation_history else []
 
@@ -7581,6 +7608,17 @@ class AIAgent:
                         self.session_cache_write_tokens += canonical_usage.cache_write_tokens
                         self.session_reasoning_tokens += canonical_usage.reasoning_tokens
 
+                        # Log API call details for debugging/observability
+                        _cache_pct = ""
+                        if canonical_usage.cache_read_tokens and prompt_tokens:
+                            _cache_pct = f" cache={canonical_usage.cache_read_tokens}/{prompt_tokens} ({100*canonical_usage.cache_read_tokens/prompt_tokens:.0f}%)"
+                        logger.info(
+                            "API call #%d: model=%s provider=%s in=%d out=%d total=%d latency=%.1fs%s",
+                            self.session_api_calls, self.model, self.provider or "unknown",
+                            prompt_tokens, completion_tokens, total_tokens,
+                            api_duration, _cache_pct,
+                        )
+
                         cost_result = estimate_usage_cost(
                             self.model,
                             canonical_usage,