fix: handle multimodal content in context compression summarization

The _generate_summary() method assumed message content is always a string (msg.get('content') or ''). When content is a multimodal list (e.g. [{type: 'text', text: '...'}, {type: 'image_url', ...}]), this produced mangled output: len() returned the list length instead of character count, and slicing produced list items instead of substrings. Add _content_to_text() helper that safely converts any content format to plain text: - str → returned as-is - None → empty string - list (multimodal) → text parts joined, images replaced with [image] - dict/other → JSON serialization with str() fallback This ensures multimodal conversations compress correctly instead of producing garbled summaries. Inspired by PR #776 by @kshitijk4poor.
refactor(slack): replace print statements with structured logging
2026-03-11 05:42:31 -07:00 · 2026-03-11 05:34:43 -07:00
8 changed files with 188 additions and 605 deletions
@@ -5,6 +5,7 @@ Uses Gemini Flash (cheap/fast) to summarize middle turns while
 protecting head and tail context.
 """

+import json
 import logging
 import os
 from typing import Any, Dict, List, Optional
@@ -82,6 +83,41 @@ class ContextCompressor:
            "compression_count": self.compression_count,
        }

+    @staticmethod
+    def _content_to_text(content: Any) -> str:
+        """Convert message content to plain text for summarization.
+
+        Handles:
+        - str → returned as-is
+        - None → empty string
+        - list (multimodal) → text parts joined, images replaced with [image]
+        - other → JSON serialization or str() fallback
+        """
+        if isinstance(content, str):
+            return content
+        if content is None:
+            return ""
+        if isinstance(content, list):
+            parts = []
+            for item in content:
+                if isinstance(item, dict):
+                    item_type = item.get("type")
+                    if item_type == "text":
+                        parts.append(item.get("text", ""))
+                    elif item_type == "image_url":
+                        parts.append("[image]")
+                    elif item_type:
+                        parts.append(f"[{item_type}]")
+                    else:
+                        parts.append(str(item))
+                else:
+                    parts.append(str(item))
+            return "\n".join(part for part in parts if part)
+        try:
+            return json.dumps(content, ensure_ascii=False, sort_keys=True)
+        except TypeError:
+            return str(content)
+
    def _generate_summary(self, turns_to_summarize: List[Dict[str, Any]]) -> Optional[str]:
        """Generate a concise summary of conversation turns.

@@ -93,7 +129,7 @@ class ContextCompressor:
        parts = []
        for msg in turns_to_summarize:
            role = msg.get("role", "unknown")
-            content = msg.get("content") or ""
+            content = self._content_to_text(msg.get("content"))
            if len(content) > 2000:
                content = content[:1000] + "\n...[truncated]...\n" + content[-500:]
            tool_calls = msg.get("tool_calls", [])
@@ -1253,7 +1253,6 @@ class HermesCLI:
        # Background task tracking: {task_id: threading.Thread}
        self._background_tasks: Dict[str, threading.Thread] = {}
        self._background_task_counter = 0
-        self._stream_buf = ""

    def _invalidate(self, min_interval: float = 0.25) -> None:
        """Throttled UI repaint — prevents terminal blinking on slow/SSH connections."""
@@ -1496,7 +1495,6 @@ class HermesCLI:
                platform="cli",
                session_db=self._session_db,
                clarify_callback=self._clarify_callback,
-                stream_delta_callback=self._stream_delta,
                honcho_session_key=self.session_id,
                fallback_model=self._fallback_model,
                thinking_callback=self._on_thinking,
@@ -3341,28 +3339,6 @@ class HermesCLI:
            "Use your best judgement to make the choice and proceed."
        )

-    _stream_started = False
-
-    def _stream_delta(self, text: str):
-        """Buffer streaming tokens; emit complete lines via _cprint."""
-        if not text:
-            return
-        if not self._stream_started:
-            text = text.lstrip("\n")
-            if not text:
-                return
-            self._stream_started = True
-        self._stream_buf += text
-        while "\n" in self._stream_buf:
-            line, self._stream_buf = self._stream_buf.split("\n", 1)
-            _cprint(line)
-
-    def _flush_stream(self):
-        """Emit any remaining partial line from the stream buffer."""
-        if self._stream_buf:
-            _cprint(self._stream_buf)
-        self._stream_buf = ""
-
    def _sudo_password_callback(self) -> str:
        """
        Prompt for sudo password through the prompt_toolkit UI.
@@ -3491,8 +3467,6 @@ class HermesCLI:

        # Add user message to history
        self.conversation_history.append({"role": "user", "content": message})
-        self._stream_buf = ""
-        self._stream_started = False
        
        _cprint(f"{_GOLD}{'─' * 40}{_RST}")
        print(flush=True)
@@ -3540,7 +3514,6 @@ class HermesCLI:
                    agent_thread.join(0.1)
            
            agent_thread.join()  # Ensure agent thread completes
-            self._flush_stream()

            # Drain any remaining agent output still in the StdoutProxy
            # buffer so tool/status lines render ABOVE our response box.
@@ -3569,7 +3542,7 @@ class HermesCLI:
                if response and pending_message:
                    response = response + "\n\n---\n_[Interrupted - processing new message]_"
            
-            if response and not (self.agent and self.agent.stream_delta_callback):
+            if response:
                # Use a Rich Panel for the response box — adapts to terminal
                # width at render time instead of hard-coding border length.
                try:
@@ -413,36 +413,6 @@ class BasePlatformAdapter(ABC):
        """
        return SendResult(success=False, error="Not supported")

-    @property
-    def supports_streaming(self) -> bool:
-        """Whether this platform supports response streaming via message edits."""
-        return False
-
-    @property
-    def supports_draft_streaming(self) -> bool:
-        """Whether this platform supports native draft streaming (Bot API 9.3+)."""
-        return False
-
-    async def send_draft(self, chat_id: str, draft_id: int, text: str, metadata: dict = None) -> bool:
-        """Push a draft text update. Override in subclasses."""
-        return False
-
-    async def finalize_draft(self, chat_id: str, content: str, metadata: dict = None) -> "SendResult":
-        """Finalize a draft stream with the completed message."""
-        return SendResult(success=False, error="Not supported")
-
-    async def delete_message(self, chat_id: str, message_id: str) -> SendResult:
-        """Delete a previously sent message."""
-        return SendResult(success=False, error="Not supported")
-
-    async def send_raw(self, chat_id: str, content: str, metadata: dict = None) -> "SendResult":
-        """Send without formatting (default: delegates to send)."""
-        return await self.send(chat_id=chat_id, content=content, metadata=metadata)
-
-    async def edit_message_raw(self, chat_id: str, message_id: str, content: str) -> "SendResult":
-        """Edit without formatting (default: delegates to edit_message)."""
-        return await self.edit_message(chat_id=chat_id, message_id=message_id, content=content)
-
    async def send_typing(self, chat_id: str, metadata=None) -> None:
        """
        Send a typing indicator.
@@ -727,20 +697,11 @@ class BasePlatformAdapter(ABC):
        
        try:
            # Call the handler (this can take a while with tool calls)
-            handler_result = await self._message_handler(event)
-
-            # Normalise: handler may return str or dict(content, already_sent)
-            already_sent = False
-            if isinstance(handler_result, dict):
-                response = handler_result.get("content") or ""
-                already_sent = handler_result.get("already_sent", False)
-            else:
-                response = handler_result
+            response = await self._message_handler(event)
            
            # Send response if any
            if not response:
-                if not already_sent:
-                    logger.warning("[%s] Handler returned empty/None response for %s", self.name, event.source.chat_id)
+                logger.warning("[%s] Handler returned empty/None response for %s", self.name, event.source.chat_id)
            if response:
                # Extract MEDIA:<path> tags (from TTS tool) before other processing
                media_files, response = self.extract_media(response)
@@ -751,7 +712,7 @@ class BasePlatformAdapter(ABC):
                    logger.info("[%s] extract_images found %d image(s) in response (%d chars)", self.name, len(images), len(response))
                
                # Send the text portion first (if any remains after extractions)
-                if text_content and not already_sent:
+                if text_content:
                    logger.info("[%s] Sending response (%d chars) to %s", self.name, len(text_content), event.source.chat_id)
                    result = await self.send(
                        chat_id=event.source.chat_id,
@@ -9,6 +9,7 @@ Uses slack-bolt (Python) with Socket Mode for:
 """

 import asyncio
+import logging
 import os
 import re
 from typing import Dict, List, Optional, Any
@@ -41,6 +42,9 @@ from gateway.platforms.base import (
 )


+logger = logging.getLogger(__name__)
+
+
 def check_slack_requirements() -> bool:
    """Check if Slack dependencies are available."""
    return SLACK_AVAILABLE
@@ -73,17 +77,19 @@ class SlackAdapter(BasePlatformAdapter):
    async def connect(self) -> bool:
        """Connect to Slack via Socket Mode."""
        if not SLACK_AVAILABLE:
-            print("[Slack] slack-bolt not installed. Run: pip install slack-bolt")
+            logger.error(
+                "[Slack] slack-bolt not installed. Run: pip install slack-bolt",
+            )
            return False

        bot_token = self.config.token
        app_token = os.getenv("SLACK_APP_TOKEN")

        if not bot_token:
-            print("[Slack] SLACK_BOT_TOKEN not set")
+            logger.error("[Slack] SLACK_BOT_TOKEN not set")
            return False
        if not app_token:
-            print("[Slack] SLACK_APP_TOKEN not set")
+            logger.error("[Slack] SLACK_APP_TOKEN not set")
            return False

        try:
@@ -117,19 +123,22 @@ class SlackAdapter(BasePlatformAdapter):
            asyncio.create_task(self._handler.start_async())

            self._running = True
-            print(f"[Slack] Connected as @{bot_name} (Socket Mode)")
+            logger.info("[Slack] Connected as @%s (Socket Mode)", bot_name)
            return True

-        except Exception as e:
-            print(f"[Slack] Connection failed: {e}")
+        except Exception as e:  # pragma: no cover - defensive logging
+            logger.error("[Slack] Connection failed: %s", e, exc_info=True)
            return False

    async def disconnect(self) -> None:
        """Disconnect from Slack."""
        if self._handler:
-            await self._handler.close_async()
+            try:
+                await self._handler.close_async()
+            except Exception as e:  # pragma: no cover - defensive logging
+                logger.warning("[Slack] Error while closing Socket Mode handler: %s", e, exc_info=True)
        self._running = False
-        print("[Slack] Disconnected")
+        logger.info("[Slack] Disconnected")

    async def send(
        self,
@@ -162,8 +171,8 @@ class SlackAdapter(BasePlatformAdapter):
                raw_response=result,
            )

-        except Exception as e:
-            print(f"[Slack] Send error: {e}")
+        except Exception as e:  # pragma: no cover - defensive logging
+            logger.error("[Slack] Send error: %s", e, exc_info=True)
            return SendResult(success=False, error=str(e))

    async def edit_message(
@@ -182,7 +191,14 @@ class SlackAdapter(BasePlatformAdapter):
                text=content,
            )
            return SendResult(success=True, message_id=message_id)
-        except Exception as e:
+        except Exception as e:  # pragma: no cover - defensive logging
+            logger.error(
+                "[Slack] Failed to edit message %s in channel %s: %s",
+                message_id,
+                chat_id,
+                e,
+                exc_info=True,
+            )
            return SendResult(success=False, error=str(e))

    async def send_typing(self, chat_id: str, metadata=None) -> None:
@@ -214,8 +230,14 @@ class SlackAdapter(BasePlatformAdapter):
            )
            return SendResult(success=True, raw_response=result)

-        except Exception as e:
-            print(f"[{self.name}] Failed to send local image: {e}")
+        except Exception as e:  # pragma: no cover - defensive logging
+            logger.error(
+                "[%s] Failed to send local Slack image %s: %s",
+                self.name,
+                image_path,
+                e,
+                exc_info=True,
+            )
            return await super().send_image_file(chat_id, image_path, caption, reply_to)

    async def send_image(
@@ -247,7 +269,13 @@ class SlackAdapter(BasePlatformAdapter):

            return SendResult(success=True, raw_response=result)

-        except Exception as e:
+        except Exception as e:  # pragma: no cover - defensive logging
+            logger.warning(
+                "[Slack] Failed to upload image from URL %s, falling back to text: %s",
+                image_url,
+                e,
+                exc_info=True,
+            )
            # Fall back to sending the URL as text
            text = f"{caption}\n{image_url}" if caption else image_url
            return await self.send(chat_id=chat_id, content=text, reply_to=reply_to)
@@ -273,7 +301,13 @@ class SlackAdapter(BasePlatformAdapter):
            )
            return SendResult(success=True, raw_response=result)

-        except Exception as e:
+        except Exception as e:  # pragma: no cover - defensive logging
+            logger.error(
+                "[Slack] Failed to send audio file %s: %s",
+                audio_path,
+                e,
+                exc_info=True,
+            )
            return SendResult(success=False, error=str(e))

    async def send_video(
@@ -300,8 +334,14 @@ class SlackAdapter(BasePlatformAdapter):
            )
            return SendResult(success=True, raw_response=result)

-        except Exception as e:
-            print(f"[{self.name}] Failed to send video: {e}")
+        except Exception as e:  # pragma: no cover - defensive logging
+            logger.error(
+                "[%s] Failed to send video %s: %s",
+                self.name,
+                video_path,
+                e,
+                exc_info=True,
+            )
            return await super().send_video(chat_id, video_path, caption, reply_to)

    async def send_document(
@@ -331,8 +371,14 @@ class SlackAdapter(BasePlatformAdapter):
            )
            return SendResult(success=True, raw_response=result)

-        except Exception as e:
-            print(f"[{self.name}] Failed to send document: {e}")
+        except Exception as e:  # pragma: no cover - defensive logging
+            logger.error(
+                "[%s] Failed to send document %s: %s",
+                self.name,
+                file_path,
+                e,
+                exc_info=True,
+            )
            return await super().send_document(chat_id, file_path, caption, file_name, reply_to)

    async def get_chat_info(self, chat_id: str) -> Dict[str, Any]:
@@ -348,7 +394,13 @@ class SlackAdapter(BasePlatformAdapter):
                "name": channel.get("name", chat_id),
                "type": "dm" if is_dm else "group",
            }
-        except Exception:
+        except Exception as e:  # pragma: no cover - defensive logging
+            logger.error(
+                "[Slack] Failed to fetch chat info for %s: %s",
+                chat_id,
+                e,
+                exc_info=True,
+            )
            return {"name": chat_id, "type": "unknown"}

    # ----- Internal handlers -----
@@ -403,8 +455,8 @@ class SlackAdapter(BasePlatformAdapter):
                    media_urls.append(cached)
                    media_types.append(mimetype)
                    msg_type = MessageType.PHOTO
-                except Exception as e:
-                    print(f"[Slack] Failed to cache image: {e}", flush=True)
+                except Exception as e:  # pragma: no cover - defensive logging
+                    logger.warning("[Slack] Failed to cache image from %s: %s", url, e, exc_info=True)
            elif mimetype.startswith("audio/") and url:
                try:
                    ext = "." + mimetype.split("/")[-1].split(";")[0]
@@ -414,8 +466,8 @@ class SlackAdapter(BasePlatformAdapter):
                    media_urls.append(cached)
                    media_types.append(mimetype)
                    msg_type = MessageType.VOICE
-                except Exception as e:
-                    print(f"[Slack] Failed to cache audio: {e}", flush=True)
+                except Exception as e:  # pragma: no cover - defensive logging
+                    logger.warning("[Slack] Failed to cache audio from %s: %s", url, e, exc_info=True)
            elif url:
                # Try to handle as a document attachment
                try:
@@ -437,7 +489,7 @@ class SlackAdapter(BasePlatformAdapter):
                    file_size = f.get("size", 0)
                    MAX_DOC_BYTES = 20 * 1024 * 1024
                    if not file_size or file_size > MAX_DOC_BYTES:
-                        print(f"[Slack] Document too large or unknown size: {file_size}", flush=True)
+                        logger.warning("[Slack] Document too large or unknown size: %s", file_size)
                        continue

                    # Download and cache
@@ -449,7 +501,7 @@ class SlackAdapter(BasePlatformAdapter):
                    media_urls.append(cached_path)
                    media_types.append(doc_mime)
                    msg_type = MessageType.DOCUMENT
-                    print(f"[Slack] Cached user document: {cached_path}", flush=True)
+                    logger.debug("[Slack] Cached user document: %s", cached_path)

                    # Inject text content for .txt/.md files (capped at 100 KB)
                    MAX_TEXT_INJECT_BYTES = 100 * 1024
@@ -466,8 +518,8 @@ class SlackAdapter(BasePlatformAdapter):
                        except UnicodeDecodeError:
                            pass  # Binary content, skip injection

-                except Exception as e:
-                    print(f"[Slack] Failed to cache document: {e}", flush=True)
+                except Exception as e:  # pragma: no cover - defensive logging
+                    logger.warning("[Slack] Failed to cache document from %s: %s", url, e, exc_info=True)

        # Build source
        source = self.build_source(
@@ -299,99 +299,6 @@ class TelegramAdapter(BasePlatformAdapter):
            )
            return SendResult(success=False, error=str(e))

-    async def send_raw(
-        self, chat_id: str, content: str, metadata: dict = None,
-    ) -> SendResult:
-        """Send a plain-text message without MarkdownV2 formatting."""
-        if not self._bot:
-            return SendResult(success=False, error="Not connected")
-        try:
-            thread_id = metadata.get("thread_id") if metadata else None
-            msg = await self._bot.send_message(
-                chat_id=int(chat_id), text=content, parse_mode=None,
-                message_thread_id=int(thread_id) if thread_id else None,
-            )
-            return SendResult(success=True, message_id=str(msg.message_id))
-        except Exception as e:
-            return SendResult(success=False, error=str(e))
-
-    async def edit_message_raw(
-        self, chat_id: str, message_id: str, content: str,
-    ) -> SendResult:
-        """Edit a message with plain text (no MarkdownV2 formatting)."""
-        if not self._bot:
-            return SendResult(success=False, error="Not connected")
-        try:
-            await self._bot.edit_message_text(
-                chat_id=int(chat_id), message_id=int(message_id),
-                text=content, parse_mode=None,
-            )
-            return SendResult(success=True, message_id=message_id)
-        except Exception as e:
-            return SendResult(success=False, error=str(e))
-
-    @property
-    def supports_streaming(self) -> bool:
-        return True
-
-    @property
-    def supports_draft_streaming(self) -> bool:
-        """Whether this adapter supports Telegram Bot API sendMessageDraft (9.3+)."""
-        return True
-
-    async def send_draft(
-        self, chat_id: str, draft_id: int, text: str, metadata: dict = None,
-    ) -> bool:
-        """Push a draft update via sendMessageDraft (Bot API 9.3+)."""
-        if not self._bot:
-            return False
-        try:
-            thread_id = metadata.get("thread_id") if metadata else None
-            return await self._bot.send_message_draft(
-                chat_id=int(chat_id), draft_id=draft_id, text=text,
-                parse_mode=None,
-                message_thread_id=int(thread_id) if thread_id else None,
-            )
-        except Exception as e:
-            logger.warning("[%s] send_message_draft failed: %s", self.name, e)
-            return False
-
-    async def finalize_draft(
-        self, chat_id: str, content: str, metadata: dict = None,
-    ) -> SendResult:
-        """Finalize a draft stream by sending the completed message with formatting."""
-        if not self._bot:
-            return SendResult(success=False, error="Not connected")
-        try:
-            thread_id = metadata.get("thread_id") if metadata else None
-            formatted = self.format_message(content)
-            try:
-                msg = await self._bot.send_message(
-                    chat_id=int(chat_id), text=formatted,
-                    parse_mode=ParseMode.MARKDOWN_V2,
-                    message_thread_id=int(thread_id) if thread_id else None,
-                )
-            except Exception:
-                msg = await self._bot.send_message(
-                    chat_id=int(chat_id), text=content, parse_mode=None,
-                    message_thread_id=int(thread_id) if thread_id else None,
-                )
-            return SendResult(success=True, message_id=str(msg.message_id))
-        except Exception as e:
-            return SendResult(success=False, error=str(e))
-
-    async def delete_message(self, chat_id: str, message_id: str) -> SendResult:
-        """Delete a Telegram message."""
-        if not self._bot:
-            return SendResult(success=False, error="Not connected")
-        try:
-            await self._bot.delete_message(
-                chat_id=int(chat_id), message_id=int(message_id),
-            )
-            return SendResult(success=True, message_id=message_id)
-        except Exception as e:
-            return SendResult(success=False, error=str(e))
-
    async def send_voice(
        self,
        chat_id: str,
@@ -175,7 +175,6 @@ class AIAgent:
        thinking_callback: callable = None,
        clarify_callback: callable = None,
        step_callback: callable = None,
-        stream_delta_callback: callable = None,
        max_tokens: int = None,
        reasoning_config: Dict[str, Any] = None,
        prefill_messages: List[Dict[str, Any]] = None,
@@ -263,7 +262,6 @@ class AIAgent:
        self.thinking_callback = thinking_callback
        self.clarify_callback = clarify_callback
        self.step_callback = step_callback
-        self.stream_delta_callback = stream_delta_callback
        self._last_reported_tool = None  # Track for "new tool" mode
        
        # Interrupt mechanism for breaking out of tool loops
@@ -2062,147 +2060,6 @@ class AIAgent:
            return terminal_response
        raise RuntimeError("Responses create(stream=True) fallback did not emit a terminal response.")

-    def _interruptible_streaming_api_call(self, api_kwargs: dict, on_first_delta=None):
-        """Streaming variant of _interruptible_api_call for chat_completions.
-
-        Fires self.stream_delta_callback(text) as content tokens arrive and
-        accumulates the full response into a SimpleNamespace matching the shape
-        downstream code expects.  Falls back to the non-streaming path when the
-        provider rejects the stream request.
-        """
-        from types import SimpleNamespace
-
-        result = {"response": None, "error": None}
-        first_delta_fired = [False]
-
-        def _stream():
-            try:
-                stream_kwargs = {**api_kwargs, "stream": True,
-                                 "stream_options": {"include_usage": True}}
-                stream_resp = self.client.chat.completions.create(**stream_kwargs)
-
-                content_parts = []
-                tool_calls_acc = {}
-                finish_reason = "stop"
-                usage = None
-                reasoning_content = None
-                model = None
-                has_tool_calls = False
-
-                try:
-                    for chunk in stream_resp:
-                        if not chunk.choices:
-                            if hasattr(chunk, "usage") and chunk.usage:
-                                usage = chunk.usage
-                            continue
-
-                        choice = chunk.choices[0]
-                        if choice.finish_reason:
-                            finish_reason = choice.finish_reason
-                        if model is None and hasattr(chunk, "model"):
-                            model = chunk.model
-
-                        delta = choice.delta
-                        if delta is None:
-                            continue
-
-                        if delta.content:
-                            content_parts.append(delta.content)
-                            if not first_delta_fired[0]:
-                                first_delta_fired[0] = True
-                                if on_first_delta:
-                                    on_first_delta()
-                            if self.stream_delta_callback and not has_tool_calls:
-                                try:
-                                    self.stream_delta_callback(delta.content)
-                                except Exception:
-                                    pass
-
-                        if delta.tool_calls:
-                            has_tool_calls = True
-                            for tc_delta in delta.tool_calls:
-                                idx = tc_delta.index
-                                if idx not in tool_calls_acc:
-                                    tool_calls_acc[idx] = {
-                                        "id": tc_delta.id or "",
-                                        "type": tc_delta.type or "function",
-                                        "function": {
-                                            "name": getattr(tc_delta.function, "name", None) or "",
-                                            "arguments": getattr(tc_delta.function, "arguments", None) or "",
-                                        },
-                                    }
-                                else:
-                                    entry = tool_calls_acc[idx]
-                                    if tc_delta.id:
-                                        entry["id"] = tc_delta.id
-                                    fn = tc_delta.function
-                                    if fn:
-                                        if fn.name:
-                                            entry["function"]["name"] = fn.name
-                                        if fn.arguments:
-                                            entry["function"]["arguments"] += fn.arguments
-
-                        rc = getattr(delta, "reasoning_content", None) or getattr(delta, "reasoning", None)
-                        if rc:
-                            reasoning_content = (reasoning_content or "") + rc
-                finally:
-                    close_fn = getattr(stream_resp, "close", None)
-                    if callable(close_fn):
-                        try:
-                            close_fn()
-                        except Exception:
-                            pass
-
-                tool_calls_list = None
-                if tool_calls_acc:
-                    tool_calls_list = [
-                        SimpleNamespace(
-                            id=tc["id"], call_id=tc["id"], type=tc["type"],
-                            function=SimpleNamespace(name=tc["function"]["name"],
-                                                     arguments=tc["function"]["arguments"]),
-                        )
-                        for idx, tc in sorted(tool_calls_acc.items())
-                    ]
-
-                message = SimpleNamespace(
-                    content="".join(content_parts) or None,
-                    tool_calls=tool_calls_list,
-                    reasoning=reasoning_content,
-                    reasoning_content=reasoning_content,
-                    reasoning_details=None,
-                )
-                result["response"] = SimpleNamespace(
-                    choices=[SimpleNamespace(message=message, finish_reason=finish_reason)],
-                    usage=usage,
-                    model=model,
-                )
-            except Exception as e:
-                result["error"] = e
-
-        t = threading.Thread(target=_stream, daemon=True)
-        t.start()
-        while t.is_alive():
-            t.join(timeout=0.3)
-            if self._interrupt_requested:
-                try:
-                    self.client.close()
-                except Exception:
-                    pass
-                try:
-                    self.client = OpenAI(**self._client_kwargs)
-                except Exception:
-                    pass
-                raise InterruptedError("Agent interrupted during streaming API call")
-
-        if result["error"] is not None:
-            err = result["error"]
-            err_str = str(err).lower()
-            if any(kw in err_str for kw in ("stream", "not support", "unsupported")):
-                logger.debug("Streaming failed (%s), falling back to non-streaming.", err)
-                return self._interruptible_api_call(api_kwargs)
-            raise err
-        return result["response"]
-
    def _try_refresh_codex_client_credentials(self, *, force: bool = True) -> bool:
        if self.api_mode != "codex_responses" or self.provider != "openai-codex":
            return False
@@ -3617,17 +3474,7 @@ class AIAgent:
                    if os.getenv("HERMES_DUMP_REQUESTS", "").strip().lower() in {"1", "true", "yes", "on"}:
                        self._dump_api_request_debug(api_kwargs, reason="preflight")

-                    if self.stream_delta_callback and self.api_mode != "codex_responses":
-                        def _stop_spinner():
-                            nonlocal thinking_spinner
-                            if thinking_spinner:
-                                thinking_spinner.stop("")
-                                thinking_spinner = None
-
-                        response = self._interruptible_streaming_api_call(
-                            api_kwargs, on_first_delta=_stop_spinner)
-                    else:
-                        response = self._interruptible_api_call(api_kwargs)
+                    response = self._interruptible_api_call(api_kwargs)
                    
                    api_duration = time.time() - api_start_time
                    
@@ -4383,8 +4230,8 @@ class AIAgent:
                    turn_content = assistant_message.content or ""
                    if turn_content and self._has_content_after_think_block(turn_content):
                        self._last_content_with_tools = turn_content
-                        # Show intermediate commentary — skip when streaming (already in buffer)
-                        if self.quiet_mode and not self.stream_delta_callback:
+                        # Show intermediate commentary so the user can follow along
+                        if self.quiet_mode:
                            clean = self._strip_think_blocks(turn_content).strip()
                            if clean:
                                print(f"  ┊ 💬 {clean}")
@@ -115,6 +115,70 @@ class TestCompress:
        assert result[-2]["content"] == msgs[-2]["content"]


+class TestContentToText:
+    """Test _content_to_text handles all content types without crashing."""
+
+    def test_string_passthrough(self, compressor):
+        assert compressor._content_to_text("hello") == "hello"
+
+    def test_none_returns_empty(self, compressor):
+        assert compressor._content_to_text(None) == ""
+
+    def test_multimodal_text_parts(self, compressor):
+        content = [
+            {"type": "text", "text": "describe this image"},
+            {"type": "image_url", "image_url": {"url": "data:image/png;base64,AAAA"}},
+        ]
+        result = compressor._content_to_text(content)
+        assert "describe this image" in result
+        assert "[image]" in result
+
+    def test_multimodal_mixed_types(self, compressor):
+        content = [
+            {"type": "text", "text": "first part"},
+            {"type": "audio", "audio": {"data": "..."}},
+            {"type": "text", "text": "second part"},
+        ]
+        result = compressor._content_to_text(content)
+        assert "first part" in result
+        assert "[audio]" in result
+        assert "second part" in result
+
+    def test_dict_content_json_serialized(self, compressor):
+        content = {"key": "value"}
+        result = compressor._content_to_text(content)
+        assert "key" in result
+        assert "value" in result
+
+    def test_multimodal_in_generate_summary(self):
+        """Multimodal user messages should not crash _generate_summary."""
+        mock_client = MagicMock()
+        mock_response = MagicMock()
+        mock_response.choices = [MagicMock()]
+        mock_response.choices[0].message.content = "[CONTEXT SUMMARY]: image was discussed"
+        mock_client.chat.completions.create.return_value = mock_response
+
+        with patch("agent.context_compressor.get_model_context_length", return_value=100000), \
+             patch("agent.context_compressor.get_text_auxiliary_client", return_value=(mock_client, "test-model")):
+            c = ContextCompressor(model="test", quiet_mode=True)
+
+        messages = [
+            {"role": "user", "content": [
+                {"type": "text", "text": "What is in this image?"},
+                {"type": "image_url", "image_url": {"url": "data:image/png;base64,AAAA"}},
+            ]},
+            {"role": "assistant", "content": "I see a cat."},
+            {"role": "user", "content": "thanks"},
+        ]
+
+        summary = c._generate_summary(messages)
+        assert isinstance(summary, str)
+        # The prompt sent to the model should contain the text, not raw list
+        prompt = mock_client.chat.completions.create.call_args.kwargs["messages"][0]["content"]
+        assert "What is in this image?" in prompt
+        assert "[image]" in prompt
+
+
 class TestGenerateSummaryNoneContent:
    """Regression: content=None (from tool-call-only assistant messages) must not crash."""

@@ -1,257 +0,0 @@
-"""Tests for streaming token output — accumulator shape, callback order, fallback."""
-
-import queue
-import threading
-from types import SimpleNamespace
-from unittest.mock import MagicMock, patch, call
-
-import pytest
-
-from run_agent import AIAgent
-
-
-# ---------------------------------------------------------------------------
-# Fixtures
-# ---------------------------------------------------------------------------
-
-def _make_tool_defs(*names):
-    return [
-        {"type": "function", "function": {"name": n, "description": f"{n}", "parameters": {"type": "object", "properties": {}}}}
-        for n in names
-    ]
-
-
-@pytest.fixture()
-def agent():
-    with (
-        patch("run_agent.get_tool_definitions", return_value=_make_tool_defs("web_search")),
-        patch("run_agent.check_toolset_requirements", return_value={}),
-        patch("run_agent.OpenAI"),
-    ):
-        cb = MagicMock()
-        a = AIAgent(
-            api_key="test-key-1234567890",
-            quiet_mode=True,
-            skip_context_files=True,
-            skip_memory=True,
-            stream_delta_callback=cb,
-        )
-        a.client = MagicMock()
-        a._stream_cb = cb
-        return a
-
-
-# ---------------------------------------------------------------------------
-# Helpers — fake streaming chunks
-# ---------------------------------------------------------------------------
-
-def _chunk(content=None, tool_call_delta=None, finish_reason=None, usage=None, model=None):
-    delta = SimpleNamespace(content=content, tool_calls=tool_call_delta,
-                            reasoning_content=None, reasoning=None)
-    choice = SimpleNamespace(delta=delta, finish_reason=finish_reason)
-    c = SimpleNamespace(choices=[choice])
-    if usage is not None:
-        c.usage = SimpleNamespace(**usage)
-    if model:
-        c.model = model
-    return c
-
-
-def _usage_chunk(**kw):
-    c = SimpleNamespace(choices=[], usage=SimpleNamespace(**kw))
-    return c
-
-
-def _tc_delta(index, id=None, name=None, arguments=None, type=None):
-    fn = SimpleNamespace(name=name, arguments=arguments)
-    return SimpleNamespace(index=index, id=id, type=type, function=fn)
-
-
-# ---------------------------------------------------------------------------
-# Tests: accumulator shape
-# ---------------------------------------------------------------------------
-
-
-class TestStreamingAccumulator:
-    def test_text_only_response(self, agent):
-        """Streaming text-only response produces correct synthetic shape."""
-        chunks = [
-            _chunk(content="Hello", model="test/m"),
-            _chunk(content=" world"),
-            _chunk(finish_reason="stop"),
-            _usage_chunk(prompt_tokens=10, completion_tokens=5, total_tokens=15),
-        ]
-        agent.client.chat.completions.create.return_value = iter(chunks)
-
-        resp = agent._interruptible_streaming_api_call({"model": "test"})
-
-        assert resp.choices[0].message.content == "Hello world"
-        assert resp.choices[0].message.tool_calls is None
-        assert resp.choices[0].finish_reason == "stop"
-        assert resp.usage.prompt_tokens == 10
-        assert resp.model == "test/m"
-
-    def test_tool_call_response(self, agent):
-        """Streaming tool-call response accumulates function name + arguments."""
-        chunks = [
-            _chunk(tool_call_delta=[_tc_delta(0, id="call_1", name="web_search", arguments='{"q', type="function")]),
-            _chunk(tool_call_delta=[_tc_delta(0, arguments='uery": "hi"}')]),
-            _chunk(finish_reason="tool_calls"),
-        ]
-        agent.client.chat.completions.create.return_value = iter(chunks)
-
-        resp = agent._interruptible_streaming_api_call({"model": "test"})
-
-        tc = resp.choices[0].message.tool_calls
-        assert tc is not None
-        assert len(tc) == 1
-        assert tc[0].id == "call_1"
-        assert tc[0].function.name == "web_search"
-        assert tc[0].function.arguments == '{"query": "hi"}'
-        assert resp.choices[0].finish_reason == "tool_calls"
-
-    def test_mixed_content_and_tool_calls(self, agent):
-        """Content + tool calls in same stream are both accumulated."""
-        chunks = [
-            _chunk(content="Let me check."),
-            _chunk(tool_call_delta=[_tc_delta(0, id="c1", name="web_search", arguments="{}", type="function")]),
-            _chunk(finish_reason="tool_calls"),
-        ]
-        agent.client.chat.completions.create.return_value = iter(chunks)
-
-        resp = agent._interruptible_streaming_api_call({"model": "test"})
-
-        assert resp.choices[0].message.content == "Let me check."
-        assert len(resp.choices[0].message.tool_calls) == 1
-
-
-class TestStreamingCallbacks:
-    def test_deltas_fire_in_order(self, agent):
-        """stream_delta_callback receives content deltas in order."""
-        received = []
-        agent.stream_delta_callback = lambda t: received.append(t)
-        chunks = [_chunk(content="a"), _chunk(content="b"), _chunk(content="c"), _chunk(finish_reason="stop")]
-        agent.client.chat.completions.create.return_value = iter(chunks)
-
-        agent._interruptible_streaming_api_call({"model": "test"})
-
-        assert received == ["a", "b", "c"]
-
-    def test_on_first_delta_fires_once(self, agent):
-        first = MagicMock()
-        chunks = [_chunk(content="x"), _chunk(content="y"), _chunk(finish_reason="stop")]
-        agent.client.chat.completions.create.return_value = iter(chunks)
-
-        agent._interruptible_streaming_api_call({"model": "test"}, on_first_delta=first)
-
-        first.assert_called_once()
-
-    def test_tool_only_does_not_fire_callback(self, agent):
-        """Tool-call-only stream does not invoke stream_delta_callback."""
-        received = []
-        agent.stream_delta_callback = lambda t: received.append(t)
-        chunks = [
-            _chunk(tool_call_delta=[_tc_delta(0, id="c1", name="t", arguments="{}", type="function")]),
-            _chunk(finish_reason="tool_calls"),
-        ]
-        agent.client.chat.completions.create.return_value = iter(chunks)
-
-        agent._interruptible_streaming_api_call({"model": "test"})
-
-        assert received == []
-
-
-class TestStreamingFallback:
-    def test_stream_error_falls_back(self, agent):
-        """When streaming fails with 'not support', falls back to non-streaming."""
-        agent.client.chat.completions.create.side_effect = [
-            Exception("streaming not supported by this provider"),
-            SimpleNamespace(
-                choices=[SimpleNamespace(
-                    message=SimpleNamespace(content="ok", tool_calls=None, reasoning=None, reasoning_content=None, reasoning_details=None),
-                    finish_reason="stop",
-                )],
-                usage=None,
-                model="test/m",
-            ),
-        ]
-
-        resp = agent._interruptible_streaming_api_call({"model": "test"})
-
-        assert resp.choices[0].message.content == "ok"
-        assert agent.client.chat.completions.create.call_count == 2
-
-    def test_non_stream_error_raises(self, agent):
-        """Non-stream-related errors propagate normally."""
-        agent.client.chat.completions.create.side_effect = ValueError("bad request")
-
-        with pytest.raises(ValueError, match="bad request"):
-            agent._interruptible_streaming_api_call({"model": "test"})
-
-
-# ---------------------------------------------------------------------------
-# Tests: base.py already_sent contract
-# ---------------------------------------------------------------------------
-
-class TestAlreadySentContract:
-    def _make_adapter(self, send_side_effect=None):
-        from gateway.platforms.base import BasePlatformAdapter, SendResult
-        from gateway.config import Platform, PlatformConfig
-
-        class FakeAdapter(BasePlatformAdapter):
-            async def connect(self): return True
-            async def disconnect(self): pass
-            async def get_chat_info(self, chat_id): return {"name": "test"}
-            async def send(self, chat_id, content, reply_to=None, metadata=None):
-                if send_side_effect is not None:
-                    send_side_effect(content)
-                return SendResult(success=True, message_id="1")
-
-        cfg = PlatformConfig(enabled=True)
-        adapter = FakeAdapter(cfg, Platform.TELEGRAM)
-        adapter._running = True
-        return adapter
-
-    @pytest.mark.asyncio
-    async def test_already_sent_skips_send(self):
-        """Handler returning already_sent=True prevents base from calling send()."""
-        from gateway.platforms.base import MessageEvent
-        from gateway.config import Platform
-        from gateway.session import SessionSource
-
-        sent = []
-        adapter = self._make_adapter(send_side_effect=lambda c: sent.append(c))
-
-        async def handler(event):
-            return {"content": "hello", "already_sent": True}
-        adapter.set_message_handler(handler)
-
-        event = MessageEvent(
-            text="hi",
-            source=SessionSource(platform=Platform.TELEGRAM, chat_id="1", user_id="u1"),
-        )
-        await adapter._process_message_background(event, "s1")
-
-        assert sent == [], "send() should not be called when already_sent=True"
-
-    @pytest.mark.asyncio
-    async def test_string_response_sends_normally(self):
-        """Handler returning a plain string triggers send() as before."""
-        from gateway.platforms.base import MessageEvent
-        from gateway.config import Platform
-        from gateway.session import SessionSource
-
-        sent = []
-        adapter = self._make_adapter(send_side_effect=lambda c: sent.append(c))
-
-        async def handler(event):
-            return "hello"
-        adapter.set_message_handler(handler)
-
-        event = MessageEvent(
-            text="hi",
-            source=SessionSource(platform=Platform.TELEGRAM, chat_id="1", user_id="u1"),
-        )
-        await adapter._process_message_background(event, "s1")
-
-        assert "hello" in sent