feat: phase 2 — instrument API calls, tools, sessions, and credential pool
Add structured INFO-level logging to the key code paths so agent.log
captures actionable debugging data:
API calls (run_agent.py):
- Model, provider, input/output tokens, total tokens, latency
- Cache hit rate (cache_read_tokens / prompt_tokens percentage)
- Logged after each successful API call with usage data
Tool execution (run_agent.py):
- Tool name, duration, result size for successful calls
- Tool name, duration, error preview for failures
- Both sequential and concurrent execution paths instrumented
Session lifecycle (run_agent.py):
- Conversation turn start: session ID, model, provider, platform,
history size, message preview
- Context compression: before (message count, token estimate) and
after (compressed count, post-compression tokens)
Credential pool (agent/credential_pool.py):
- Pool exhaustion: which credential was marked exhausted and why
- Rotation: which credential was selected next
- Empty pool: when all credentials are exhausted
Example agent.log output after this change:
INFO run_agent: conversation turn: session=20260405_223500_abc model=claude-opus provider=openrouter platform=cli history=12 msg='Fix the logging...'
INFO run_agent: tool terminal completed (2.34s, 1847 chars)
INFO run_agent: tool read_file completed (0.01s, 3204 chars)
INFO run_agent: API call #3: model=claude-opus provider=openrouter in=45231 out=892 total=46123 latency=4.2s cache=38102/45231 (84%)
This commit is contained in:
@@ -660,6 +660,7 @@ class CredentialPool:
|
||||
available = self._available_entries(clear_expired=True, refresh=True)
|
||||
if not available:
|
||||
self._current_id = None
|
||||
logger.info("credential pool: no available entries (all exhausted or empty)")
|
||||
return None
|
||||
|
||||
if self._strategy == STRATEGY_RANDOM:
|
||||
@@ -702,9 +703,18 @@ class CredentialPool:
|
||||
entry = self.current() or self._select_unlocked()
|
||||
if entry is None:
|
||||
return None
|
||||
_label = entry.label or entry.id[:8]
|
||||
logger.info(
|
||||
"credential pool: marking %s exhausted (status=%s), rotating",
|
||||
_label, status_code,
|
||||
)
|
||||
self._mark_exhausted(entry, status_code, error_context)
|
||||
self._current_id = None
|
||||
return self._select_unlocked()
|
||||
next_entry = self._select_unlocked()
|
||||
if next_entry:
|
||||
_next_label = next_entry.label or next_entry.id[:8]
|
||||
logger.info("credential pool: rotated to %s", _next_label)
|
||||
return next_entry
|
||||
|
||||
def try_refresh_current(self) -> Optional[PooledCredential]:
|
||||
with self._lock:
|
||||
|
||||
+39
-1
@@ -5809,6 +5809,12 @@ class AIAgent:
|
||||
Returns:
|
||||
(compressed_messages, new_system_prompt) tuple
|
||||
"""
|
||||
_pre_msg_count = len(messages)
|
||||
logger.info(
|
||||
"context compression started: session=%s messages=%d tokens=~%s model=%s",
|
||||
self.session_id or "none", _pre_msg_count,
|
||||
f"{approx_tokens:,}" if approx_tokens else "unknown", self.model,
|
||||
)
|
||||
# Pre-compression memory flush: let the model save memories before they're lost
|
||||
self.flush_memories(messages, min_turns=0)
|
||||
|
||||
@@ -5885,6 +5891,11 @@ class AIAgent:
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
logger.info(
|
||||
"context compression done: session=%s messages=%d->%d tokens=~%s",
|
||||
self.session_id or "none", _pre_msg_count, len(compressed),
|
||||
f"{_compressed_est:,}",
|
||||
)
|
||||
return compressed, new_system_prompt
|
||||
|
||||
def _execute_tool_calls(self, assistant_message, messages: list, effective_task_id: str, api_call_count: int = 0) -> None:
|
||||
@@ -6085,6 +6096,10 @@ class AIAgent:
|
||||
logger.error("_invoke_tool raised for %s: %s", function_name, tool_error, exc_info=True)
|
||||
duration = time.time() - start
|
||||
is_error, _ = _detect_tool_failure(function_name, result)
|
||||
if is_error:
|
||||
logger.info("tool %s failed (%.2fs): %s", function_name, duration, result[:200])
|
||||
else:
|
||||
logger.info("tool %s completed (%.2fs, %d chars)", function_name, duration, len(result))
|
||||
results[index] = (function_name, function_args, result, duration, is_error)
|
||||
|
||||
# Start spinner for CLI mode (skip when TUI handles tool progress)
|
||||
@@ -6430,6 +6445,8 @@ class AIAgent:
|
||||
_is_error_result, _ = _detect_tool_failure(function_name, function_result)
|
||||
if _is_error_result:
|
||||
logger.warning("Tool %s returned error (%.2fs): %s", function_name, tool_duration, result_preview)
|
||||
else:
|
||||
logger.info("tool %s completed (%.2fs, %d chars)", function_name, tool_duration, len(function_result))
|
||||
|
||||
if self.tool_progress_callback:
|
||||
try:
|
||||
@@ -6807,7 +6824,17 @@ class AIAgent:
|
||||
# They are initialized in __init__ and must persist across run_conversation
|
||||
# calls so that nudge logic accumulates correctly in CLI mode.
|
||||
self.iteration_budget = IterationBudget(self.max_iterations)
|
||||
|
||||
|
||||
# Log conversation turn start for debugging/observability
|
||||
_msg_preview = (user_message[:80] + "...") if len(user_message) > 80 else user_message
|
||||
_msg_preview = _msg_preview.replace("\n", " ")
|
||||
logger.info(
|
||||
"conversation turn: session=%s model=%s provider=%s platform=%s history=%d msg=%r",
|
||||
self.session_id or "none", self.model, self.provider or "unknown",
|
||||
self.platform or "unknown", len(conversation_history or []),
|
||||
_msg_preview,
|
||||
)
|
||||
|
||||
# Initialize conversation (copy to avoid mutating the caller's list)
|
||||
messages = list(conversation_history) if conversation_history else []
|
||||
|
||||
@@ -7581,6 +7608,17 @@ class AIAgent:
|
||||
self.session_cache_write_tokens += canonical_usage.cache_write_tokens
|
||||
self.session_reasoning_tokens += canonical_usage.reasoning_tokens
|
||||
|
||||
# Log API call details for debugging/observability
|
||||
_cache_pct = ""
|
||||
if canonical_usage.cache_read_tokens and prompt_tokens:
|
||||
_cache_pct = f" cache={canonical_usage.cache_read_tokens}/{prompt_tokens} ({100*canonical_usage.cache_read_tokens/prompt_tokens:.0f}%)"
|
||||
logger.info(
|
||||
"API call #%d: model=%s provider=%s in=%d out=%d total=%d latency=%.1fs%s",
|
||||
self.session_api_calls, self.model, self.provider or "unknown",
|
||||
prompt_tokens, completion_tokens, total_tokens,
|
||||
api_duration, _cache_pct,
|
||||
)
|
||||
|
||||
cost_result = estimate_usage_cost(
|
||||
self.model,
|
||||
canonical_usage,
|
||||
|
||||
Reference in New Issue
Block a user