feat: phase 2 — instrument API calls, tools, sessions, and credential pool

Add structured INFO-level logging to the key code paths so agent.log
captures actionable debugging data:

API calls (run_agent.py):
  - Model, provider, input/output tokens, total tokens, latency
  - Cache hit rate (cache_read_tokens / prompt_tokens percentage)
  - Logged after each successful API call with usage data

Tool execution (run_agent.py):
  - Tool name, duration, result size for successful calls
  - Tool name, duration, error preview for failures
  - Both sequential and concurrent execution paths instrumented

Session lifecycle (run_agent.py):
  - Conversation turn start: session ID, model, provider, platform,
    history size, message preview
  - Context compression: before (message count, token estimate) and
    after (compressed count, post-compression tokens)

Credential pool (agent/credential_pool.py):
  - Pool exhaustion: which credential was marked exhausted and why
  - Rotation: which credential was selected next
  - Empty pool: when all credentials are exhausted

Example agent.log output after this change:
  INFO run_agent: conversation turn: session=20260405_223500_abc model=claude-opus provider=openrouter platform=cli history=12 msg='Fix the logging...'
  INFO run_agent: tool terminal completed (2.34s, 1847 chars)
  INFO run_agent: tool read_file completed (0.01s, 3204 chars)
  INFO run_agent: API call #3: model=claude-opus provider=openrouter in=45231 out=892 total=46123 latency=4.2s cache=38102/45231 (84%)
This commit is contained in:
Teknium
2026-04-05 23:45:07 -07:00
parent c507ee2180
commit e52f144671
2 changed files with 50 additions and 2 deletions
+11 -1
View File
@@ -660,6 +660,7 @@ class CredentialPool:
available = self._available_entries(clear_expired=True, refresh=True)
if not available:
self._current_id = None
logger.info("credential pool: no available entries (all exhausted or empty)")
return None
if self._strategy == STRATEGY_RANDOM:
@@ -702,9 +703,18 @@ class CredentialPool:
entry = self.current() or self._select_unlocked()
if entry is None:
return None
_label = entry.label or entry.id[:8]
logger.info(
"credential pool: marking %s exhausted (status=%s), rotating",
_label, status_code,
)
self._mark_exhausted(entry, status_code, error_context)
self._current_id = None
return self._select_unlocked()
next_entry = self._select_unlocked()
if next_entry:
_next_label = next_entry.label or next_entry.id[:8]
logger.info("credential pool: rotated to %s", _next_label)
return next_entry
def try_refresh_current(self) -> Optional[PooledCredential]:
with self._lock:
+39 -1
View File
@@ -5809,6 +5809,12 @@ class AIAgent:
Returns:
(compressed_messages, new_system_prompt) tuple
"""
_pre_msg_count = len(messages)
logger.info(
"context compression started: session=%s messages=%d tokens=~%s model=%s",
self.session_id or "none", _pre_msg_count,
f"{approx_tokens:,}" if approx_tokens else "unknown", self.model,
)
# Pre-compression memory flush: let the model save memories before they're lost
self.flush_memories(messages, min_turns=0)
@@ -5885,6 +5891,11 @@ class AIAgent:
except Exception:
pass
logger.info(
"context compression done: session=%s messages=%d->%d tokens=~%s",
self.session_id or "none", _pre_msg_count, len(compressed),
f"{_compressed_est:,}",
)
return compressed, new_system_prompt
def _execute_tool_calls(self, assistant_message, messages: list, effective_task_id: str, api_call_count: int = 0) -> None:
@@ -6085,6 +6096,10 @@ class AIAgent:
logger.error("_invoke_tool raised for %s: %s", function_name, tool_error, exc_info=True)
duration = time.time() - start
is_error, _ = _detect_tool_failure(function_name, result)
if is_error:
logger.info("tool %s failed (%.2fs): %s", function_name, duration, result[:200])
else:
logger.info("tool %s completed (%.2fs, %d chars)", function_name, duration, len(result))
results[index] = (function_name, function_args, result, duration, is_error)
# Start spinner for CLI mode (skip when TUI handles tool progress)
@@ -6430,6 +6445,8 @@ class AIAgent:
_is_error_result, _ = _detect_tool_failure(function_name, function_result)
if _is_error_result:
logger.warning("Tool %s returned error (%.2fs): %s", function_name, tool_duration, result_preview)
else:
logger.info("tool %s completed (%.2fs, %d chars)", function_name, tool_duration, len(function_result))
if self.tool_progress_callback:
try:
@@ -6807,7 +6824,17 @@ class AIAgent:
# They are initialized in __init__ and must persist across run_conversation
# calls so that nudge logic accumulates correctly in CLI mode.
self.iteration_budget = IterationBudget(self.max_iterations)
# Log conversation turn start for debugging/observability
_msg_preview = (user_message[:80] + "...") if len(user_message) > 80 else user_message
_msg_preview = _msg_preview.replace("\n", " ")
logger.info(
"conversation turn: session=%s model=%s provider=%s platform=%s history=%d msg=%r",
self.session_id or "none", self.model, self.provider or "unknown",
self.platform or "unknown", len(conversation_history or []),
_msg_preview,
)
# Initialize conversation (copy to avoid mutating the caller's list)
messages = list(conversation_history) if conversation_history else []
@@ -7581,6 +7608,17 @@ class AIAgent:
self.session_cache_write_tokens += canonical_usage.cache_write_tokens
self.session_reasoning_tokens += canonical_usage.reasoning_tokens
# Log API call details for debugging/observability
_cache_pct = ""
if canonical_usage.cache_read_tokens and prompt_tokens:
_cache_pct = f" cache={canonical_usage.cache_read_tokens}/{prompt_tokens} ({100*canonical_usage.cache_read_tokens/prompt_tokens:.0f}%)"
logger.info(
"API call #%d: model=%s provider=%s in=%d out=%d total=%d latency=%.1fs%s",
self.session_api_calls, self.model, self.provider or "unknown",
prompt_tokens, completion_tokens, total_tokens,
api_duration, _cache_pct,
)
cost_result = estimate_usage_cost(
self.model,
canonical_usage,