From e52f1446716fa2bf353123e3350e330de65d8931 Mon Sep 17 00:00:00 2001 From: Teknium Date: Sun, 5 Apr 2026 23:45:07 -0700 Subject: [PATCH] =?UTF-8?q?feat:=20phase=202=20=E2=80=94=20instrument=20AP?= =?UTF-8?q?I=20calls,=20tools,=20sessions,=20and=20credential=20pool?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Add structured INFO-level logging to the key code paths so agent.log captures actionable debugging data: API calls (run_agent.py): - Model, provider, input/output tokens, total tokens, latency - Cache hit rate (cache_read_tokens / prompt_tokens percentage) - Logged after each successful API call with usage data Tool execution (run_agent.py): - Tool name, duration, result size for successful calls - Tool name, duration, error preview for failures - Both sequential and concurrent execution paths instrumented Session lifecycle (run_agent.py): - Conversation turn start: session ID, model, provider, platform, history size, message preview - Context compression: before (message count, token estimate) and after (compressed count, post-compression tokens) Credential pool (agent/credential_pool.py): - Pool exhaustion: which credential was marked exhausted and why - Rotation: which credential was selected next - Empty pool: when all credentials are exhausted Example agent.log output after this change: INFO run_agent: conversation turn: session=20260405_223500_abc model=claude-opus provider=openrouter platform=cli history=12 msg='Fix the logging...' INFO run_agent: tool terminal completed (2.34s, 1847 chars) INFO run_agent: tool read_file completed (0.01s, 3204 chars) INFO run_agent: API call #3: model=claude-opus provider=openrouter in=45231 out=892 total=46123 latency=4.2s cache=38102/45231 (84%) --- agent/credential_pool.py | 12 +++++++++++- run_agent.py | 40 +++++++++++++++++++++++++++++++++++++++- 2 files changed, 50 insertions(+), 2 deletions(-) diff --git a/agent/credential_pool.py b/agent/credential_pool.py index 311abea98f..740fc59d4a 100644 --- a/agent/credential_pool.py +++ b/agent/credential_pool.py @@ -660,6 +660,7 @@ class CredentialPool: available = self._available_entries(clear_expired=True, refresh=True) if not available: self._current_id = None + logger.info("credential pool: no available entries (all exhausted or empty)") return None if self._strategy == STRATEGY_RANDOM: @@ -702,9 +703,18 @@ class CredentialPool: entry = self.current() or self._select_unlocked() if entry is None: return None + _label = entry.label or entry.id[:8] + logger.info( + "credential pool: marking %s exhausted (status=%s), rotating", + _label, status_code, + ) self._mark_exhausted(entry, status_code, error_context) self._current_id = None - return self._select_unlocked() + next_entry = self._select_unlocked() + if next_entry: + _next_label = next_entry.label or next_entry.id[:8] + logger.info("credential pool: rotated to %s", _next_label) + return next_entry def try_refresh_current(self) -> Optional[PooledCredential]: with self._lock: diff --git a/run_agent.py b/run_agent.py index d8355d7421..d54e085a0a 100644 --- a/run_agent.py +++ b/run_agent.py @@ -5809,6 +5809,12 @@ class AIAgent: Returns: (compressed_messages, new_system_prompt) tuple """ + _pre_msg_count = len(messages) + logger.info( + "context compression started: session=%s messages=%d tokens=~%s model=%s", + self.session_id or "none", _pre_msg_count, + f"{approx_tokens:,}" if approx_tokens else "unknown", self.model, + ) # Pre-compression memory flush: let the model save memories before they're lost self.flush_memories(messages, min_turns=0) @@ -5885,6 +5891,11 @@ class AIAgent: except Exception: pass + logger.info( + "context compression done: session=%s messages=%d->%d tokens=~%s", + self.session_id or "none", _pre_msg_count, len(compressed), + f"{_compressed_est:,}", + ) return compressed, new_system_prompt def _execute_tool_calls(self, assistant_message, messages: list, effective_task_id: str, api_call_count: int = 0) -> None: @@ -6085,6 +6096,10 @@ class AIAgent: logger.error("_invoke_tool raised for %s: %s", function_name, tool_error, exc_info=True) duration = time.time() - start is_error, _ = _detect_tool_failure(function_name, result) + if is_error: + logger.info("tool %s failed (%.2fs): %s", function_name, duration, result[:200]) + else: + logger.info("tool %s completed (%.2fs, %d chars)", function_name, duration, len(result)) results[index] = (function_name, function_args, result, duration, is_error) # Start spinner for CLI mode (skip when TUI handles tool progress) @@ -6430,6 +6445,8 @@ class AIAgent: _is_error_result, _ = _detect_tool_failure(function_name, function_result) if _is_error_result: logger.warning("Tool %s returned error (%.2fs): %s", function_name, tool_duration, result_preview) + else: + logger.info("tool %s completed (%.2fs, %d chars)", function_name, tool_duration, len(function_result)) if self.tool_progress_callback: try: @@ -6807,7 +6824,17 @@ class AIAgent: # They are initialized in __init__ and must persist across run_conversation # calls so that nudge logic accumulates correctly in CLI mode. self.iteration_budget = IterationBudget(self.max_iterations) - + + # Log conversation turn start for debugging/observability + _msg_preview = (user_message[:80] + "...") if len(user_message) > 80 else user_message + _msg_preview = _msg_preview.replace("\n", " ") + logger.info( + "conversation turn: session=%s model=%s provider=%s platform=%s history=%d msg=%r", + self.session_id or "none", self.model, self.provider or "unknown", + self.platform or "unknown", len(conversation_history or []), + _msg_preview, + ) + # Initialize conversation (copy to avoid mutating the caller's list) messages = list(conversation_history) if conversation_history else [] @@ -7581,6 +7608,17 @@ class AIAgent: self.session_cache_write_tokens += canonical_usage.cache_write_tokens self.session_reasoning_tokens += canonical_usage.reasoning_tokens + # Log API call details for debugging/observability + _cache_pct = "" + if canonical_usage.cache_read_tokens and prompt_tokens: + _cache_pct = f" cache={canonical_usage.cache_read_tokens}/{prompt_tokens} ({100*canonical_usage.cache_read_tokens/prompt_tokens:.0f}%)" + logger.info( + "API call #%d: model=%s provider=%s in=%d out=%d total=%d latency=%.1fs%s", + self.session_api_calls, self.model, self.provider or "unknown", + prompt_tokens, completion_tokens, total_tokens, + api_duration, _cache_pct, + ) + cost_result = estimate_usage_cost( self.model, canonical_usage,