feat(aux): use Portal /api/nous/recommended-models for auxiliary models

Wire the auxiliary client (compaction, vision, session search, web extract) to the Nous Portal's curated recommended-models endpoint when running on Nous Portal, with a TTL-cached fetch that mirrors how we pull /models for pricing. hermes_cli/models.py - fetch_nous_recommended_models(portal_base_url, force_refresh=False) 10-minute TTL cache, keyed per portal URL (staging vs prod don't collide). Public endpoint, no auth required. Returns {} on any failure so callers always get a dict. - get_nous_recommended_aux_model(vision, free_tier=None, ...) Tier-aware pick from the payload: - Paid tier → paidRecommended{Vision,Compaction}Model, falling back to freeRecommended* when the paid field is null (common during staged rollouts of new paid models). - Free tier → freeRecommended* only, never leaks paid models. When free_tier is None, auto-detects via the existing check_nous_free_tier() helper (already cached 3 min against /api/oauth/account). Detection errors default to paid so we never silently downgrade a paying user. agent/auxiliary_client.py — _try_nous() - Replaces the hardcoded xiaomi/mimo free-tier branch with a single call to get_nous_recommended_aux_model(vision=vision). - Falls back to _NOUS_MODEL (google/gemini-3-flash-preview) when the Portal is unreachable or returns a null recommendation. - The Portal is now the source of truth for aux model selection; the xiaomi allowlist we used to carry is effectively dead. Tests (15 new) - tests/hermes_cli/test_models.py::TestNousRecommendedModels Fetch caching, per-portal keying, network failure, force_refresh; paid-prefers-paid, paid-falls-to-free, free-never-leaks-paid, auto-detect, detection-error → paid default, null/blank modelName handling. - tests/agent/test_auxiliary_client.py::TestNousAuxiliaryRefresh _try_nous honors Portal recommendation for text + vision, falls back to google/gemini-3-flash-preview on None or exception. Behavior won't visibly change today — both tier recommendations currently point at google/gemini-3-flash-preview — but the moment the Portal ships a better paid recommendation, subscribers pick it up within 10 minutes without a Hermes release.
remove Nous Portal free-model allowlist
2026-04-21 22:53:45 -04:00 · 2026-04-21 22:13:05 -04:00
7 changed files with 425 additions and 150 deletions
@@ -182,8 +182,6 @@ auxiliary_is_nous: bool = False
 # Default auxiliary models per provider
 _OPENROUTER_MODEL = "google/gemini-3-flash-preview"
 _NOUS_MODEL = "google/gemini-3-flash-preview"
-_NOUS_FREE_TIER_VISION_MODEL = "xiaomi/mimo-v2-omni"
-_NOUS_FREE_TIER_AUX_MODEL = "xiaomi/mimo-v2-pro"
 _NOUS_DEFAULT_BASE_URL = "https://inference-api.nousresearch.com/v1"
 _ANTHROPIC_DEFAULT_BASE_URL = "https://api.anthropic.com"
 _AUTH_JSON_PATH = get_hermes_home() / "auth.json"
@@ -927,22 +925,35 @@ def _try_nous(vision: bool = False) -> Tuple[Optional[OpenAI], Optional[str]]:
    global auxiliary_is_nous
    auxiliary_is_nous = True
    logger.debug("Auxiliary client: Nous Portal")
-    if nous.get("source") == "pool":
-        model = "gemini-3-flash"
-    else:
-        model = _NOUS_MODEL
-    # Free-tier users can't use paid auxiliary models — use the free
-    # models instead: mimo-v2-omni for vision, mimo-v2-pro for text tasks.
-    # Paid accounts keep their tier-appropriate models: gemini-3-flash-preview
-    # for both text and vision tasks.
+
+    # Ask the Portal which model it currently recommends for this task type.
+    # The /api/nous/recommended-models endpoint is the authoritative source:
+    # it distinguishes paid vs free tier recommendations, and get_nous_recommended_aux_model
+    # auto-detects the caller's tier via check_nous_free_tier().  Fall back to
+    # _NOUS_MODEL (google/gemini-3-flash-preview) when the Portal is unreachable
+    # or returns a null recommendation for this task type.
+    model = _NOUS_MODEL
    try:
-        from hermes_cli.models import check_nous_free_tier
-        if check_nous_free_tier():
-            model = _NOUS_FREE_TIER_VISION_MODEL if vision else _NOUS_FREE_TIER_AUX_MODEL
-            logger.debug("Free-tier Nous account — using %s for auxiliary/%s",
-                         model, "vision" if vision else "text")
-    except Exception:
-        pass
+        from hermes_cli.models import get_nous_recommended_aux_model
+        recommended = get_nous_recommended_aux_model(vision=vision)
+        if recommended:
+            model = recommended
+            logger.debug(
+                "Auxiliary/%s: using Portal-recommended model %s",
+                "vision" if vision else "text", model,
+            )
+        else:
+            logger.debug(
+                "Auxiliary/%s: no Portal recommendation, falling back to %s",
+                "vision" if vision else "text", model,
+            )
+    except Exception as exc:
+        logger.debug(
+            "Auxiliary/%s: recommended-models lookup failed (%s); "
+            "falling back to %s",
+            "vision" if vision else "text", exc, model,
+        )
+
    if runtime is not None:
        api_key, base_url = runtime
    else:
@@ -3375,7 +3375,7 @@ def _login_nous(args, pconfig: ProviderConfig) -> None:
                )

            from hermes_cli.models import (
-                _PROVIDER_MODELS, get_pricing_for_provider, filter_nous_free_models,
+                _PROVIDER_MODELS, get_pricing_for_provider,
                check_nous_free_tier, partition_nous_models_by_tier,
            )
            model_ids = _PROVIDER_MODELS.get("nous", [])
@@ -3384,7 +3384,6 @@ def _login_nous(args, pconfig: ProviderConfig) -> None:
            unavailable_models: list = []
            if model_ids:
                pricing = get_pricing_for_provider("nous")
-                model_ids = filter_nous_free_models(model_ids, pricing)
                free_tier = check_nous_free_tier()
                if free_tier:
                    model_ids, unavailable_models = partition_nous_models_by_tier(
@@ -2165,7 +2165,6 @@ def _model_flow_nous(config, current_model="", args=None):
    from hermes_cli.models import (
        _PROVIDER_MODELS,
        get_pricing_for_provider,
-        filter_nous_free_models,
        check_nous_free_tier,
        partition_nous_models_by_tier,
    )
@@ -2208,10 +2207,8 @@ def _model_flow_nous(config, current_model="", args=None):
    # Check if user is on free tier
    free_tier = check_nous_free_tier()

-    # For both tiers: apply the allowlist filter first (removes non-allowlisted
-    # free models and allowlist models that aren't actually free).
-    # Then for free users: partition remaining models into selectable/unavailable.
-    model_ids = filter_nous_free_models(model_ids, pricing)
+    # For free users: partition models into selectable/unavailable based on
+    # whether they are free per the Portal-reported pricing.
    unavailable_models: list[str] = []
    if free_tier:
        model_ids, unavailable_models = partition_nous_models_by_tier(
@@ -362,17 +362,11 @@ _PROVIDER_MODELS: dict[str, list[str]] = {
 _PROVIDER_MODELS["ai-gateway"] = [mid for mid, _ in VERCEL_AI_GATEWAY_MODELS]

 # ---------------------------------------------------------------------------
-# Nous Portal free-model filtering
+# Nous Portal free-model helper
 # ---------------------------------------------------------------------------
-# Models that are ALLOWED to appear when priced as free on Nous Portal.
-# Any other free model is hidden — prevents promotional/temporary free models
-# from cluttering the selection when users are paying subscribers.
-# Models in this list are ALSO filtered out if they are NOT free (i.e. they
-# should only appear in the menu when they are genuinely free).
-_NOUS_ALLOWED_FREE_MODELS: frozenset[str] = frozenset({
-    "xiaomi/mimo-v2-pro",
-    "xiaomi/mimo-v2-omni",
-})
+# The Nous Portal models endpoint is the source of truth for which models
+# are currently offered (free or paid). We trust whatever it returns and
+# surface it to users as-is — no local allowlist filtering.


 def _is_model_free(model_id: str, pricing: dict[str, dict[str, str]]) -> bool:
@@ -386,35 +380,6 @@ def _is_model_free(model_id: str, pricing: dict[str, dict[str, str]]) -> bool:
        return False


-def filter_nous_free_models(
-    model_ids: list[str],
-    pricing: dict[str, dict[str, str]],
-) -> list[str]:
-    """Filter the Nous Portal model list according to free-model policy.
-
-    Rules:
-      • Paid models that are NOT in the allowlist → keep (normal case).
-      • Free models that are NOT in the allowlist → drop.
-      • Allowlist models that ARE free → keep.
-      • Allowlist models that are NOT free → drop.
-    """
-    if not pricing:
-        return model_ids  # no pricing data — can't filter, show everything
-
-    result: list[str] = []
-    for mid in model_ids:
-        free = _is_model_free(mid, pricing)
-        if mid in _NOUS_ALLOWED_FREE_MODELS:
-            # Allowlist model: only show when it's actually free
-            if free:
-                result.append(mid)
-        else:
-            # Regular model: keep only when it's NOT free
-            if not free:
-                result.append(mid)
-    return result
-
-
 # ---------------------------------------------------------------------------
 # Nous Portal account tier detection
 # ---------------------------------------------------------------------------
@@ -478,8 +443,7 @@ def partition_nous_models_by_tier(
 ) -> tuple[list[str], list[str]]:
    """Split Nous models into (selectable, unavailable) based on user tier.

-    For paid-tier users: all models are selectable, none unavailable
-    (free-model filtering is handled separately by ``filter_nous_free_models``).
+    For paid-tier users: all models are selectable, none unavailable.

    For free-tier users: only free models are selectable; paid models
    are returned as unavailable (shown grayed out in the menu).
@@ -549,6 +513,157 @@ def check_nous_free_tier() -> bool:
        return False  # default to paid on error — don't block users


+# ---------------------------------------------------------------------------
+# Nous Portal recommended models
+#
+# The Portal publishes a curated list of suggested models (separated into
+# paid and free tiers) plus dedicated recommendations for compaction (text
+# summarisation / auxiliary) and vision tasks. We fetch it once per process
+# with a TTL cache so callers can ask "what's the best aux model right now?"
+# without hitting the network on every lookup.
+#
+# Shape of the response (fields we care about):
+#   {
+#     "paidRecommendedModels":     [ {modelName, ...}, ... ],
+#     "freeRecommendedModels":     [ {modelName, ...}, ... ],
+#     "paidRecommendedCompactionModel":  {modelName, ...} | null,
+#     "paidRecommendedVisionModel":      {modelName, ...} | null,
+#     "freeRecommendedCompactionModel":  {modelName, ...} | null,
+#     "freeRecommendedVisionModel":      {modelName, ...} | null,
+#   }
+# ---------------------------------------------------------------------------
+
+NOUS_RECOMMENDED_MODELS_PATH = "/api/nous/recommended-models"
+_NOUS_RECOMMENDED_CACHE_TTL: int = 600  # seconds (10 minutes)
+# (result_dict, timestamp) keyed by portal_base_url so staging vs prod don't collide.
+_nous_recommended_cache: dict[str, tuple[dict[str, Any], float]] = {}
+
+
+def fetch_nous_recommended_models(
+    portal_base_url: str = "",
+    timeout: float = 5.0,
+    *,
+    force_refresh: bool = False,
+) -> dict[str, Any]:
+    """Fetch the Nous Portal's curated recommended-models payload.
+
+    Hits ``<portal>/api/nous/recommended-models``. The endpoint is public —
+    no auth is required. Results are cached per portal URL for
+    ``_NOUS_RECOMMENDED_CACHE_TTL`` seconds; pass ``force_refresh=True`` to
+    bypass the cache.
+
+    Returns the parsed JSON dict on success, or ``{}`` on any failure
+    (network, parse, non-2xx). Callers must treat missing/null fields as
+    "no recommendation" and fall back to their own default.
+    """
+    base = (portal_base_url or "https://portal.nousresearch.com").rstrip("/")
+    now = time.monotonic()
+    cached = _nous_recommended_cache.get(base)
+    if not force_refresh and cached is not None:
+        payload, cached_at = cached
+        if now - cached_at < _NOUS_RECOMMENDED_CACHE_TTL:
+            return payload
+
+    url = f"{base}{NOUS_RECOMMENDED_MODELS_PATH}"
+    try:
+        req = urllib.request.Request(
+            url,
+            headers={"Accept": "application/json"},
+        )
+        with urllib.request.urlopen(req, timeout=timeout) as resp:
+            data = json.loads(resp.read().decode())
+        if not isinstance(data, dict):
+            data = {}
+    except Exception:
+        data = {}
+
+    _nous_recommended_cache[base] = (data, now)
+    return data
+
+
+def _resolve_nous_portal_url() -> str:
+    """Best-effort lookup of the Portal base URL the user is authed against."""
+    try:
+        from hermes_cli.auth import (
+            DEFAULT_NOUS_PORTAL_URL,
+            get_provider_auth_state,
+        )
+        state = get_provider_auth_state("nous") or {}
+        portal = str(state.get("portal_base_url") or "").strip()
+        if portal:
+            return portal.rstrip("/")
+        return str(DEFAULT_NOUS_PORTAL_URL).rstrip("/")
+    except Exception:
+        return "https://portal.nousresearch.com"
+
+
+def _extract_model_name(entry: Any) -> Optional[str]:
+    """Pull the ``modelName`` field from a recommended-model entry, else None."""
+    if not isinstance(entry, dict):
+        return None
+    model_name = entry.get("modelName")
+    if isinstance(model_name, str) and model_name.strip():
+        return model_name.strip()
+    return None
+
+
+def get_nous_recommended_aux_model(
+    *,
+    vision: bool = False,
+    free_tier: Optional[bool] = None,
+    portal_base_url: str = "",
+    force_refresh: bool = False,
+) -> Optional[str]:
+    """Return the Portal's recommended model name for an auxiliary task.
+
+    Picks the best field from the Portal's recommended-models payload:
+
+    * ``vision=True``  → ``paidRecommendedVisionModel``  (paid tier) or
+                         ``freeRecommendedVisionModel``  (free tier)
+    * ``vision=False`` → ``paidRecommendedCompactionModel`` or
+                         ``freeRecommendedCompactionModel``
+
+    When ``free_tier`` is ``None`` (default) the user's tier is auto-detected
+    via :func:`check_nous_free_tier`. Pass an explicit bool to bypass the
+    detection — useful for tests or when the caller already knows the tier.
+
+    For paid-tier users we prefer the paid recommendation but gracefully fall
+    back to the free recommendation if the Portal returned ``null`` for the
+    paid field (common during the staged rollout of new paid models).
+
+    Returns ``None`` when every candidate is missing, null, or the fetch
+    fails — callers should fall back to their own default (currently
+    ``google/gemini-3-flash-preview``).
+    """
+    base = portal_base_url or _resolve_nous_portal_url()
+    payload = fetch_nous_recommended_models(base, force_refresh=force_refresh)
+    if not payload:
+        return None
+
+    if free_tier is None:
+        try:
+            free_tier = check_nous_free_tier()
+        except Exception:
+            # On any detection error, assume paid — paid users see both fields
+            # anyway so this is a safe default that maximises model quality.
+            free_tier = False
+
+    if vision:
+        paid_key, free_key = "paidRecommendedVisionModel", "freeRecommendedVisionModel"
+    else:
+        paid_key, free_key = "paidRecommendedCompactionModel", "freeRecommendedCompactionModel"
+
+    # Preference order:
+    #   free tier  → free only
+    #   paid tier  → paid, then free (if paid field is null)
+    candidates = [free_key] if free_tier else [paid_key, free_key]
+    for key in candidates:
+        name = _extract_model_name(payload.get(key))
+        if name:
+            return name
+    return None
+
+
 # ---------------------------------------------------------------------------
 # Canonical provider list — single source of truth for provider identity.
 # Every code path that lists, displays, or iterates providers derives from
@@ -483,6 +483,7 @@ class TestNousAuxiliaryRefresh:
        with (
            patch("agent.auxiliary_client._read_nous_auth", return_value={"access_token": "stale-token"}),
            patch("agent.auxiliary_client._resolve_nous_runtime_api", return_value=("fresh-agent-key", fresh_base)),
+            patch("hermes_cli.models.get_nous_recommended_aux_model", return_value=None),
            patch("agent.auxiliary_client.OpenAI") as mock_openai,
        ):
            from agent.auxiliary_client import _try_nous
@@ -491,10 +492,60 @@ class TestNousAuxiliaryRefresh:
            client, model = _try_nous()

        assert client is not None
+        # No Portal recommendation → falls back to the hardcoded default.
        assert model == "google/gemini-3-flash-preview"
        assert mock_openai.call_args.kwargs["api_key"] == "fresh-agent-key"
        assert mock_openai.call_args.kwargs["base_url"] == fresh_base

+    def test_try_nous_uses_portal_recommendation_for_text(self):
+        """When the Portal recommends a compaction model, _try_nous honors it."""
+        fresh_base = "https://inference-api.nousresearch.com/v1"
+        with (
+            patch("agent.auxiliary_client._read_nous_auth", return_value={"access_token": "***"}),
+            patch("agent.auxiliary_client._resolve_nous_runtime_api", return_value=("fresh-agent-key", fresh_base)),
+            patch("hermes_cli.models.get_nous_recommended_aux_model", return_value="minimax/minimax-m2.7") as mock_rec,
+            patch("agent.auxiliary_client.OpenAI") as mock_openai,
+        ):
+            from agent.auxiliary_client import _try_nous
+
+            mock_openai.return_value = MagicMock()
+            client, model = _try_nous(vision=False)
+
+        assert client is not None
+        assert model == "minimax/minimax-m2.7"
+        assert mock_rec.call_args.kwargs["vision"] is False
+
+    def test_try_nous_uses_portal_recommendation_for_vision(self):
+        """Vision tasks should ask for the vision-specific recommendation."""
+        fresh_base = "https://inference-api.nousresearch.com/v1"
+        with (
+            patch("agent.auxiliary_client._read_nous_auth", return_value={"access_token": "***"}),
+            patch("agent.auxiliary_client._resolve_nous_runtime_api", return_value=("fresh-agent-key", fresh_base)),
+            patch("hermes_cli.models.get_nous_recommended_aux_model", return_value="google/gemini-3-flash-preview") as mock_rec,
+            patch("agent.auxiliary_client.OpenAI"),
+        ):
+            from agent.auxiliary_client import _try_nous
+            client, model = _try_nous(vision=True)
+
+        assert client is not None
+        assert model == "google/gemini-3-flash-preview"
+        assert mock_rec.call_args.kwargs["vision"] is True
+
+    def test_try_nous_falls_back_when_recommendation_lookup_raises(self):
+        """If the Portal lookup throws, we must still return a usable model."""
+        fresh_base = "https://inference-api.nousresearch.com/v1"
+        with (
+            patch("agent.auxiliary_client._read_nous_auth", return_value={"access_token": "***"}),
+            patch("agent.auxiliary_client._resolve_nous_runtime_api", return_value=("fresh-agent-key", fresh_base)),
+            patch("hermes_cli.models.get_nous_recommended_aux_model", side_effect=RuntimeError("portal down")),
+            patch("agent.auxiliary_client.OpenAI"),
+        ):
+            from agent.auxiliary_client import _try_nous
+            client, model = _try_nous()
+
+        assert client is not None
+        assert model == "google/gemini-3-flash-preview"
+
    def test_call_llm_retries_nous_after_401(self):
        class _Auth401(Exception):
            status_code = 401
@@ -376,7 +376,6 @@ class TestLoginNousSkipKeepsCurrent:
            lambda *a, **kw: prompt_returns,
        )
        monkeypatch.setattr(models_mod, "get_pricing_for_provider", lambda p: {})
-        monkeypatch.setattr(models_mod, "filter_nous_free_models", lambda ids, p: ids)
        monkeypatch.setattr(models_mod, "check_nous_free_tier", lambda: None)
        monkeypatch.setattr(
            models_mod, "partition_nous_models_by_tier",
@@ -4,7 +4,6 @@ from unittest.mock import patch, MagicMock

 from hermes_cli.models import (
    OPENROUTER_MODELS, fetch_openrouter_models, model_ids, detect_provider_for_model,
-    filter_nous_free_models, _NOUS_ALLOWED_FREE_MODELS,
    is_nous_free_tier, partition_nous_models_by_tier,
    check_nous_free_tier, _FREE_TIER_CACHE_TTL,
 )
@@ -293,89 +292,6 @@ class TestDetectProviderForModel:
        assert result[0] not in ("nous",)  # nous has claude models but shouldn't be suggested


-class TestFilterNousFreeModels:
-    """Tests for filter_nous_free_models — Nous Portal free-model policy."""
-
-    _PAID = {"prompt": "0.000003", "completion": "0.000015"}
-    _FREE = {"prompt": "0", "completion": "0"}
-
-    def test_paid_models_kept(self):
-        """Regular paid models pass through unchanged."""
-        models = ["anthropic/claude-opus-4.6", "openai/gpt-5.4"]
-        pricing = {m: self._PAID for m in models}
-        assert filter_nous_free_models(models, pricing) == models
-
-    def test_free_non_allowlist_models_removed(self):
-        """Free models NOT in the allowlist are filtered out."""
-        models = ["anthropic/claude-opus-4.6", "arcee-ai/trinity-large-preview:free"]
-        pricing = {
-            "anthropic/claude-opus-4.6": self._PAID,
-            "arcee-ai/trinity-large-preview:free": self._FREE,
-        }
-        result = filter_nous_free_models(models, pricing)
-        assert result == ["anthropic/claude-opus-4.6"]
-
-    def test_allowlist_model_kept_when_free(self):
-        """Allowlist models are kept when they report as free."""
-        models = ["anthropic/claude-opus-4.6", "xiaomi/mimo-v2-pro"]
-        pricing = {
-            "anthropic/claude-opus-4.6": self._PAID,
-            "xiaomi/mimo-v2-pro": self._FREE,
-        }
-        result = filter_nous_free_models(models, pricing)
-        assert result == ["anthropic/claude-opus-4.6", "xiaomi/mimo-v2-pro"]
-
-    def test_allowlist_model_removed_when_paid(self):
-        """Allowlist models are removed when they are NOT free."""
-        models = ["anthropic/claude-opus-4.6", "xiaomi/mimo-v2-pro"]
-        pricing = {
-            "anthropic/claude-opus-4.6": self._PAID,
-            "xiaomi/mimo-v2-pro": self._PAID,
-        }
-        result = filter_nous_free_models(models, pricing)
-        assert result == ["anthropic/claude-opus-4.6"]
-
-    def test_no_pricing_returns_all(self):
-        """When pricing data is unavailable, all models pass through."""
-        models = ["anthropic/claude-opus-4.6", "nvidia/nemotron-3-super-120b-a12b:free"]
-        assert filter_nous_free_models(models, {}) == models
-
-    def test_model_with_no_pricing_entry_treated_as_paid(self):
-        """A model missing from the pricing dict is kept (assumed paid)."""
-        models = ["anthropic/claude-opus-4.6", "openai/gpt-5.4"]
-        pricing = {"anthropic/claude-opus-4.6": self._PAID}  # gpt-5.4 not in pricing
-        result = filter_nous_free_models(models, pricing)
-        assert result == models
-
-    def test_mixed_scenario(self):
-        """End-to-end: mix of paid, free-allowed, free-disallowed, allowlist-not-free."""
-        models = [
-            "anthropic/claude-opus-4.6",       # paid, not allowlist → keep
-            "nvidia/nemotron-3-super-120b-a12b:free",  # free, not allowlist → drop
-            "xiaomi/mimo-v2-pro",              # free, allowlist → keep
-            "xiaomi/mimo-v2-omni",             # paid, allowlist → drop
-            "openai/gpt-5.4",                  # paid, not allowlist → keep
-        ]
-        pricing = {
-            "anthropic/claude-opus-4.6": self._PAID,
-            "nvidia/nemotron-3-super-120b-a12b:free": self._FREE,
-            "xiaomi/mimo-v2-pro": self._FREE,
-            "xiaomi/mimo-v2-omni": self._PAID,
-            "openai/gpt-5.4": self._PAID,
-        }
-        result = filter_nous_free_models(models, pricing)
-        assert result == [
-            "anthropic/claude-opus-4.6",
-            "xiaomi/mimo-v2-pro",
-            "openai/gpt-5.4",
-        ]
-
-    def test_allowlist_contains_expected_models(self):
-        """Sanity: the allowlist has the models we expect."""
-        assert "xiaomi/mimo-v2-pro" in _NOUS_ALLOWED_FREE_MODELS
-        assert "xiaomi/mimo-v2-omni" in _NOUS_ALLOWED_FREE_MODELS
-
-
 class TestIsNousFreeTier:
    """Tests for is_nous_free_tier — account tier detection."""

@@ -501,3 +417,190 @@ class TestCheckNousFreeTierCache:
    def test_cache_ttl_is_short(self):
        """TTL should be short enough to catch upgrades quickly (<=5 min)."""
        assert _FREE_TIER_CACHE_TTL <= 300
+
+
+class TestNousRecommendedModels:
+    """Tests for fetch_nous_recommended_models + get_nous_recommended_aux_model."""
+
+    _SAMPLE_PAYLOAD = {
+        "paidRecommendedModels": [],
+        "freeRecommendedModels": [],
+        "paidRecommendedCompactionModel": None,
+        "paidRecommendedVisionModel": None,
+        "freeRecommendedCompactionModel": {
+            "modelName": "google/gemini-3-flash-preview",
+            "displayName": "Google: Gemini 3 Flash Preview",
+        },
+        "freeRecommendedVisionModel": {
+            "modelName": "google/gemini-3-flash-preview",
+            "displayName": "Google: Gemini 3 Flash Preview",
+        },
+    }
+
+    def setup_method(self):
+        _models_mod._nous_recommended_cache.clear()
+
+    def teardown_method(self):
+        _models_mod._nous_recommended_cache.clear()
+
+    def _mock_urlopen(self, payload):
+        """Return a context-manager mock mimicking urllib.request.urlopen()."""
+        import json as _json
+        response = MagicMock()
+        response.read.return_value = _json.dumps(payload).encode()
+        cm = MagicMock()
+        cm.__enter__.return_value = response
+        cm.__exit__.return_value = False
+        return cm
+
+    def test_fetch_caches_per_portal_url(self):
+        from hermes_cli.models import fetch_nous_recommended_models
+        mock_cm = self._mock_urlopen(self._SAMPLE_PAYLOAD)
+        with patch("urllib.request.urlopen", return_value=mock_cm) as mock_urlopen:
+            a = fetch_nous_recommended_models("https://portal.example.com")
+            b = fetch_nous_recommended_models("https://portal.example.com")
+        assert a == self._SAMPLE_PAYLOAD
+        assert b == self._SAMPLE_PAYLOAD
+        assert mock_urlopen.call_count == 1  # second call served from cache
+
+    def test_fetch_cache_is_keyed_per_portal(self):
+        from hermes_cli.models import fetch_nous_recommended_models
+        mock_cm = self._mock_urlopen(self._SAMPLE_PAYLOAD)
+        with patch("urllib.request.urlopen", return_value=mock_cm) as mock_urlopen:
+            fetch_nous_recommended_models("https://portal.example.com")
+            fetch_nous_recommended_models("https://portal.staging-nousresearch.com")
+        assert mock_urlopen.call_count == 2  # different portals → separate fetches
+
+    def test_fetch_returns_empty_on_network_failure(self):
+        from hermes_cli.models import fetch_nous_recommended_models
+        with patch("urllib.request.urlopen", side_effect=OSError("boom")):
+            result = fetch_nous_recommended_models("https://portal.example.com")
+        assert result == {}
+
+    def test_fetch_force_refresh_bypasses_cache(self):
+        from hermes_cli.models import fetch_nous_recommended_models
+        mock_cm = self._mock_urlopen(self._SAMPLE_PAYLOAD)
+        with patch("urllib.request.urlopen", return_value=mock_cm) as mock_urlopen:
+            fetch_nous_recommended_models("https://portal.example.com")
+            fetch_nous_recommended_models("https://portal.example.com", force_refresh=True)
+        assert mock_urlopen.call_count == 2
+
+    def test_get_aux_model_returns_vision_recommendation(self):
+        from hermes_cli.models import get_nous_recommended_aux_model
+        with patch(
+            "hermes_cli.models.fetch_nous_recommended_models",
+            return_value=self._SAMPLE_PAYLOAD,
+        ):
+            # Free tier → free vision recommendation.
+            model = get_nous_recommended_aux_model(vision=True, free_tier=True)
+        assert model == "google/gemini-3-flash-preview"
+
+    def test_get_aux_model_returns_compaction_recommendation(self):
+        from hermes_cli.models import get_nous_recommended_aux_model
+        payload = dict(self._SAMPLE_PAYLOAD)
+        payload["freeRecommendedCompactionModel"] = {"modelName": "minimax/minimax-m2.7"}
+        with patch(
+            "hermes_cli.models.fetch_nous_recommended_models",
+            return_value=payload,
+        ):
+            model = get_nous_recommended_aux_model(vision=False, free_tier=True)
+        assert model == "minimax/minimax-m2.7"
+
+    def test_get_aux_model_returns_none_when_field_null(self):
+        from hermes_cli.models import get_nous_recommended_aux_model
+        payload = dict(self._SAMPLE_PAYLOAD)
+        payload["freeRecommendedCompactionModel"] = None
+        with patch(
+            "hermes_cli.models.fetch_nous_recommended_models",
+            return_value=payload,
+        ):
+            model = get_nous_recommended_aux_model(vision=False, free_tier=True)
+        assert model is None
+
+    def test_get_aux_model_returns_none_on_empty_payload(self):
+        from hermes_cli.models import get_nous_recommended_aux_model
+        with patch("hermes_cli.models.fetch_nous_recommended_models", return_value={}):
+            assert get_nous_recommended_aux_model(vision=False, free_tier=True) is None
+            assert get_nous_recommended_aux_model(vision=True, free_tier=False) is None
+
+    def test_get_aux_model_returns_none_when_modelname_blank(self):
+        from hermes_cli.models import get_nous_recommended_aux_model
+        payload = {"freeRecommendedCompactionModel": {"modelName": "  "}}
+        with patch(
+            "hermes_cli.models.fetch_nous_recommended_models",
+            return_value=payload,
+        ):
+            assert get_nous_recommended_aux_model(vision=False, free_tier=True) is None
+
+    def test_paid_tier_prefers_paid_recommendation(self):
+        """Paid-tier users should get the paid model when it's populated."""
+        from hermes_cli.models import get_nous_recommended_aux_model
+        payload = {
+            "paidRecommendedCompactionModel": {"modelName": "anthropic/claude-opus-4.7"},
+            "freeRecommendedCompactionModel": {"modelName": "google/gemini-3-flash-preview"},
+            "paidRecommendedVisionModel": {"modelName": "openai/gpt-5.4"},
+            "freeRecommendedVisionModel": {"modelName": "google/gemini-3-flash-preview"},
+        }
+        with patch("hermes_cli.models.fetch_nous_recommended_models", return_value=payload):
+            text = get_nous_recommended_aux_model(vision=False, free_tier=False)
+            vision = get_nous_recommended_aux_model(vision=True, free_tier=False)
+        assert text == "anthropic/claude-opus-4.7"
+        assert vision == "openai/gpt-5.4"
+
+    def test_paid_tier_falls_back_to_free_when_paid_is_null(self):
+        """If the Portal returns null for the paid field, fall back to free."""
+        from hermes_cli.models import get_nous_recommended_aux_model
+        payload = {
+            "paidRecommendedCompactionModel": None,
+            "freeRecommendedCompactionModel": {"modelName": "google/gemini-3-flash-preview"},
+            "paidRecommendedVisionModel": None,
+            "freeRecommendedVisionModel": {"modelName": "google/gemini-3-flash-preview"},
+        }
+        with patch("hermes_cli.models.fetch_nous_recommended_models", return_value=payload):
+            text = get_nous_recommended_aux_model(vision=False, free_tier=False)
+            vision = get_nous_recommended_aux_model(vision=True, free_tier=False)
+        assert text == "google/gemini-3-flash-preview"
+        assert vision == "google/gemini-3-flash-preview"
+
+    def test_free_tier_never_uses_paid_recommendation(self):
+        """Free-tier users must not get paid-only recommendations."""
+        from hermes_cli.models import get_nous_recommended_aux_model
+        payload = {
+            "paidRecommendedCompactionModel": {"modelName": "anthropic/claude-opus-4.7"},
+            "freeRecommendedCompactionModel": None,  # no free recommendation
+        }
+        with patch("hermes_cli.models.fetch_nous_recommended_models", return_value=payload):
+            model = get_nous_recommended_aux_model(vision=False, free_tier=True)
+        # Free tier must return None — never leak the paid model.
+        assert model is None
+
+    def test_auto_detects_tier_when_not_supplied(self):
+        """Default behaviour: call check_nous_free_tier() to pick the tier."""
+        from hermes_cli.models import get_nous_recommended_aux_model
+        payload = {
+            "paidRecommendedCompactionModel": {"modelName": "paid-model"},
+            "freeRecommendedCompactionModel": {"modelName": "free-model"},
+        }
+        with (
+            patch("hermes_cli.models.fetch_nous_recommended_models", return_value=payload),
+            patch("hermes_cli.models.check_nous_free_tier", return_value=True),
+        ):
+            assert get_nous_recommended_aux_model(vision=False) == "free-model"
+        with (
+            patch("hermes_cli.models.fetch_nous_recommended_models", return_value=payload),
+            patch("hermes_cli.models.check_nous_free_tier", return_value=False),
+        ):
+            assert get_nous_recommended_aux_model(vision=False) == "paid-model"
+
+    def test_tier_detection_error_defaults_to_paid(self):
+        """If tier detection raises, assume paid so we don't downgrade silently."""
+        from hermes_cli.models import get_nous_recommended_aux_model
+        payload = {
+            "paidRecommendedCompactionModel": {"modelName": "paid-model"},
+            "freeRecommendedCompactionModel": {"modelName": "free-model"},
+        }
+        with (
+            patch("hermes_cli.models.fetch_nous_recommended_models", return_value=payload),
+            patch("hermes_cli.models.check_nous_free_tier", side_effect=RuntimeError("boom")),
+        ):
+            assert get_nous_recommended_aux_model(vision=False) == "paid-model"