Compare commits

...

2 Commits

Author SHA1 Message Date
emozilla 338b98161a feat(aux): use Portal /api/nous/recommended-models for auxiliary models
Wire the auxiliary client (compaction, vision, session search, web extract)
to the Nous Portal's curated recommended-models endpoint when running on
Nous Portal, with a TTL-cached fetch that mirrors how we pull /models for
pricing.

hermes_cli/models.py
  - fetch_nous_recommended_models(portal_base_url, force_refresh=False)
    10-minute TTL cache, keyed per portal URL (staging vs prod don't
    collide).  Public endpoint, no auth required.  Returns {} on any
    failure so callers always get a dict.
  - get_nous_recommended_aux_model(vision, free_tier=None, ...)
    Tier-aware pick from the payload:
      - Paid tier → paidRecommended{Vision,Compaction}Model, falling back
        to freeRecommended* when the paid field is null (common during
        staged rollouts of new paid models).
      - Free tier → freeRecommended* only, never leaks paid models.
    When free_tier is None, auto-detects via the existing
    check_nous_free_tier() helper (already cached 3 min against
    /api/oauth/account).  Detection errors default to paid so we never
    silently downgrade a paying user.

agent/auxiliary_client.py — _try_nous()
  - Replaces the hardcoded xiaomi/mimo free-tier branch with a single call
    to get_nous_recommended_aux_model(vision=vision).
  - Falls back to _NOUS_MODEL (google/gemini-3-flash-preview) when the
    Portal is unreachable or returns a null recommendation.
  - The Portal is now the source of truth for aux model selection; the
    xiaomi allowlist we used to carry is effectively dead.

Tests (15 new)
  - tests/hermes_cli/test_models.py::TestNousRecommendedModels
    Fetch caching, per-portal keying, network failure, force_refresh;
    paid-prefers-paid, paid-falls-to-free, free-never-leaks-paid,
    auto-detect, detection-error → paid default, null/blank modelName
    handling.
  - tests/agent/test_auxiliary_client.py::TestNousAuxiliaryRefresh
    _try_nous honors Portal recommendation for text + vision, falls
    back to google/gemini-3-flash-preview on None or exception.

Behavior won't visibly change today — both tier recommendations currently
point at google/gemini-3-flash-preview — but the moment the Portal ships
a better paid recommendation, subscribers pick it up within 10 minutes
without a Hermes release.
2026-04-21 22:53:45 -04:00
emozilla 0e88760852 remove Nous Portal free-model allowlist
Drop _NOUS_ALLOWED_FREE_MODELS + filter_nous_free_models and its two call
sites. Whatever Nous Portal prices as free now shows up in the picker as-is
— no local allowlist gatekeeping. Free-tier partitioning (paid vs free in
the menu) still runs via partition_nous_models_by_tier.
2026-04-21 22:13:05 -04:00
7 changed files with 425 additions and 150 deletions
+28 -17
View File
@@ -182,8 +182,6 @@ auxiliary_is_nous: bool = False
# Default auxiliary models per provider
_OPENROUTER_MODEL = "google/gemini-3-flash-preview"
_NOUS_MODEL = "google/gemini-3-flash-preview"
_NOUS_FREE_TIER_VISION_MODEL = "xiaomi/mimo-v2-omni"
_NOUS_FREE_TIER_AUX_MODEL = "xiaomi/mimo-v2-pro"
_NOUS_DEFAULT_BASE_URL = "https://inference-api.nousresearch.com/v1"
_ANTHROPIC_DEFAULT_BASE_URL = "https://api.anthropic.com"
_AUTH_JSON_PATH = get_hermes_home() / "auth.json"
@@ -927,22 +925,35 @@ def _try_nous(vision: bool = False) -> Tuple[Optional[OpenAI], Optional[str]]:
global auxiliary_is_nous
auxiliary_is_nous = True
logger.debug("Auxiliary client: Nous Portal")
if nous.get("source") == "pool":
model = "gemini-3-flash"
else:
model = _NOUS_MODEL
# Free-tier users can't use paid auxiliary models — use the free
# models instead: mimo-v2-omni for vision, mimo-v2-pro for text tasks.
# Paid accounts keep their tier-appropriate models: gemini-3-flash-preview
# for both text and vision tasks.
# Ask the Portal which model it currently recommends for this task type.
# The /api/nous/recommended-models endpoint is the authoritative source:
# it distinguishes paid vs free tier recommendations, and get_nous_recommended_aux_model
# auto-detects the caller's tier via check_nous_free_tier(). Fall back to
# _NOUS_MODEL (google/gemini-3-flash-preview) when the Portal is unreachable
# or returns a null recommendation for this task type.
model = _NOUS_MODEL
try:
from hermes_cli.models import check_nous_free_tier
if check_nous_free_tier():
model = _NOUS_FREE_TIER_VISION_MODEL if vision else _NOUS_FREE_TIER_AUX_MODEL
logger.debug("Free-tier Nous account — using %s for auxiliary/%s",
model, "vision" if vision else "text")
except Exception:
pass
from hermes_cli.models import get_nous_recommended_aux_model
recommended = get_nous_recommended_aux_model(vision=vision)
if recommended:
model = recommended
logger.debug(
"Auxiliary/%s: using Portal-recommended model %s",
"vision" if vision else "text", model,
)
else:
logger.debug(
"Auxiliary/%s: no Portal recommendation, falling back to %s",
"vision" if vision else "text", model,
)
except Exception as exc:
logger.debug(
"Auxiliary/%s: recommended-models lookup failed (%s); "
"falling back to %s",
"vision" if vision else "text", exc, model,
)
if runtime is not None:
api_key, base_url = runtime
else:
+1 -2
View File
@@ -3375,7 +3375,7 @@ def _login_nous(args, pconfig: ProviderConfig) -> None:
)
from hermes_cli.models import (
_PROVIDER_MODELS, get_pricing_for_provider, filter_nous_free_models,
_PROVIDER_MODELS, get_pricing_for_provider,
check_nous_free_tier, partition_nous_models_by_tier,
)
model_ids = _PROVIDER_MODELS.get("nous", [])
@@ -3384,7 +3384,6 @@ def _login_nous(args, pconfig: ProviderConfig) -> None:
unavailable_models: list = []
if model_ids:
pricing = get_pricing_for_provider("nous")
model_ids = filter_nous_free_models(model_ids, pricing)
free_tier = check_nous_free_tier()
if free_tier:
model_ids, unavailable_models = partition_nous_models_by_tier(
+2 -5
View File
@@ -2165,7 +2165,6 @@ def _model_flow_nous(config, current_model="", args=None):
from hermes_cli.models import (
_PROVIDER_MODELS,
get_pricing_for_provider,
filter_nous_free_models,
check_nous_free_tier,
partition_nous_models_by_tier,
)
@@ -2208,10 +2207,8 @@ def _model_flow_nous(config, current_model="", args=None):
# Check if user is on free tier
free_tier = check_nous_free_tier()
# For both tiers: apply the allowlist filter first (removes non-allowlisted
# free models and allowlist models that aren't actually free).
# Then for free users: partition remaining models into selectable/unavailable.
model_ids = filter_nous_free_models(model_ids, pricing)
# For free users: partition models into selectable/unavailable based on
# whether they are free per the Portal-reported pricing.
unavailable_models: list[str] = []
if free_tier:
model_ids, unavailable_models = partition_nous_models_by_tier(
+156 -41
View File
@@ -362,17 +362,11 @@ _PROVIDER_MODELS: dict[str, list[str]] = {
_PROVIDER_MODELS["ai-gateway"] = [mid for mid, _ in VERCEL_AI_GATEWAY_MODELS]
# ---------------------------------------------------------------------------
# Nous Portal free-model filtering
# Nous Portal free-model helper
# ---------------------------------------------------------------------------
# Models that are ALLOWED to appear when priced as free on Nous Portal.
# Any other free model is hidden — prevents promotional/temporary free models
# from cluttering the selection when users are paying subscribers.
# Models in this list are ALSO filtered out if they are NOT free (i.e. they
# should only appear in the menu when they are genuinely free).
_NOUS_ALLOWED_FREE_MODELS: frozenset[str] = frozenset({
"xiaomi/mimo-v2-pro",
"xiaomi/mimo-v2-omni",
})
# The Nous Portal models endpoint is the source of truth for which models
# are currently offered (free or paid). We trust whatever it returns and
# surface it to users as-is — no local allowlist filtering.
def _is_model_free(model_id: str, pricing: dict[str, dict[str, str]]) -> bool:
@@ -386,35 +380,6 @@ def _is_model_free(model_id: str, pricing: dict[str, dict[str, str]]) -> bool:
return False
def filter_nous_free_models(
model_ids: list[str],
pricing: dict[str, dict[str, str]],
) -> list[str]:
"""Filter the Nous Portal model list according to free-model policy.
Rules:
Paid models that are NOT in the allowlist keep (normal case).
Free models that are NOT in the allowlist drop.
Allowlist models that ARE free keep.
Allowlist models that are NOT free drop.
"""
if not pricing:
return model_ids # no pricing data — can't filter, show everything
result: list[str] = []
for mid in model_ids:
free = _is_model_free(mid, pricing)
if mid in _NOUS_ALLOWED_FREE_MODELS:
# Allowlist model: only show when it's actually free
if free:
result.append(mid)
else:
# Regular model: keep only when it's NOT free
if not free:
result.append(mid)
return result
# ---------------------------------------------------------------------------
# Nous Portal account tier detection
# ---------------------------------------------------------------------------
@@ -478,8 +443,7 @@ def partition_nous_models_by_tier(
) -> tuple[list[str], list[str]]:
"""Split Nous models into (selectable, unavailable) based on user tier.
For paid-tier users: all models are selectable, none unavailable
(free-model filtering is handled separately by ``filter_nous_free_models``).
For paid-tier users: all models are selectable, none unavailable.
For free-tier users: only free models are selectable; paid models
are returned as unavailable (shown grayed out in the menu).
@@ -549,6 +513,157 @@ def check_nous_free_tier() -> bool:
return False # default to paid on error — don't block users
# ---------------------------------------------------------------------------
# Nous Portal recommended models
#
# The Portal publishes a curated list of suggested models (separated into
# paid and free tiers) plus dedicated recommendations for compaction (text
# summarisation / auxiliary) and vision tasks. We fetch it once per process
# with a TTL cache so callers can ask "what's the best aux model right now?"
# without hitting the network on every lookup.
#
# Shape of the response (fields we care about):
# {
# "paidRecommendedModels": [ {modelName, ...}, ... ],
# "freeRecommendedModels": [ {modelName, ...}, ... ],
# "paidRecommendedCompactionModel": {modelName, ...} | null,
# "paidRecommendedVisionModel": {modelName, ...} | null,
# "freeRecommendedCompactionModel": {modelName, ...} | null,
# "freeRecommendedVisionModel": {modelName, ...} | null,
# }
# ---------------------------------------------------------------------------
NOUS_RECOMMENDED_MODELS_PATH = "/api/nous/recommended-models"
_NOUS_RECOMMENDED_CACHE_TTL: int = 600 # seconds (10 minutes)
# (result_dict, timestamp) keyed by portal_base_url so staging vs prod don't collide.
_nous_recommended_cache: dict[str, tuple[dict[str, Any], float]] = {}
def fetch_nous_recommended_models(
portal_base_url: str = "",
timeout: float = 5.0,
*,
force_refresh: bool = False,
) -> dict[str, Any]:
"""Fetch the Nous Portal's curated recommended-models payload.
Hits ``<portal>/api/nous/recommended-models``. The endpoint is public
no auth is required. Results are cached per portal URL for
``_NOUS_RECOMMENDED_CACHE_TTL`` seconds; pass ``force_refresh=True`` to
bypass the cache.
Returns the parsed JSON dict on success, or ``{}`` on any failure
(network, parse, non-2xx). Callers must treat missing/null fields as
"no recommendation" and fall back to their own default.
"""
base = (portal_base_url or "https://portal.nousresearch.com").rstrip("/")
now = time.monotonic()
cached = _nous_recommended_cache.get(base)
if not force_refresh and cached is not None:
payload, cached_at = cached
if now - cached_at < _NOUS_RECOMMENDED_CACHE_TTL:
return payload
url = f"{base}{NOUS_RECOMMENDED_MODELS_PATH}"
try:
req = urllib.request.Request(
url,
headers={"Accept": "application/json"},
)
with urllib.request.urlopen(req, timeout=timeout) as resp:
data = json.loads(resp.read().decode())
if not isinstance(data, dict):
data = {}
except Exception:
data = {}
_nous_recommended_cache[base] = (data, now)
return data
def _resolve_nous_portal_url() -> str:
"""Best-effort lookup of the Portal base URL the user is authed against."""
try:
from hermes_cli.auth import (
DEFAULT_NOUS_PORTAL_URL,
get_provider_auth_state,
)
state = get_provider_auth_state("nous") or {}
portal = str(state.get("portal_base_url") or "").strip()
if portal:
return portal.rstrip("/")
return str(DEFAULT_NOUS_PORTAL_URL).rstrip("/")
except Exception:
return "https://portal.nousresearch.com"
def _extract_model_name(entry: Any) -> Optional[str]:
"""Pull the ``modelName`` field from a recommended-model entry, else None."""
if not isinstance(entry, dict):
return None
model_name = entry.get("modelName")
if isinstance(model_name, str) and model_name.strip():
return model_name.strip()
return None
def get_nous_recommended_aux_model(
*,
vision: bool = False,
free_tier: Optional[bool] = None,
portal_base_url: str = "",
force_refresh: bool = False,
) -> Optional[str]:
"""Return the Portal's recommended model name for an auxiliary task.
Picks the best field from the Portal's recommended-models payload:
* ``vision=True`` ``paidRecommendedVisionModel`` (paid tier) or
``freeRecommendedVisionModel`` (free tier)
* ``vision=False`` ``paidRecommendedCompactionModel`` or
``freeRecommendedCompactionModel``
When ``free_tier`` is ``None`` (default) the user's tier is auto-detected
via :func:`check_nous_free_tier`. Pass an explicit bool to bypass the
detection useful for tests or when the caller already knows the tier.
For paid-tier users we prefer the paid recommendation but gracefully fall
back to the free recommendation if the Portal returned ``null`` for the
paid field (common during the staged rollout of new paid models).
Returns ``None`` when every candidate is missing, null, or the fetch
fails callers should fall back to their own default (currently
``google/gemini-3-flash-preview``).
"""
base = portal_base_url or _resolve_nous_portal_url()
payload = fetch_nous_recommended_models(base, force_refresh=force_refresh)
if not payload:
return None
if free_tier is None:
try:
free_tier = check_nous_free_tier()
except Exception:
# On any detection error, assume paid — paid users see both fields
# anyway so this is a safe default that maximises model quality.
free_tier = False
if vision:
paid_key, free_key = "paidRecommendedVisionModel", "freeRecommendedVisionModel"
else:
paid_key, free_key = "paidRecommendedCompactionModel", "freeRecommendedCompactionModel"
# Preference order:
# free tier → free only
# paid tier → paid, then free (if paid field is null)
candidates = [free_key] if free_tier else [paid_key, free_key]
for key in candidates:
name = _extract_model_name(payload.get(key))
if name:
return name
return None
# ---------------------------------------------------------------------------
# Canonical provider list — single source of truth for provider identity.
# Every code path that lists, displays, or iterates providers derives from
+51
View File
@@ -483,6 +483,7 @@ class TestNousAuxiliaryRefresh:
with (
patch("agent.auxiliary_client._read_nous_auth", return_value={"access_token": "stale-token"}),
patch("agent.auxiliary_client._resolve_nous_runtime_api", return_value=("fresh-agent-key", fresh_base)),
patch("hermes_cli.models.get_nous_recommended_aux_model", return_value=None),
patch("agent.auxiliary_client.OpenAI") as mock_openai,
):
from agent.auxiliary_client import _try_nous
@@ -491,10 +492,60 @@ class TestNousAuxiliaryRefresh:
client, model = _try_nous()
assert client is not None
# No Portal recommendation → falls back to the hardcoded default.
assert model == "google/gemini-3-flash-preview"
assert mock_openai.call_args.kwargs["api_key"] == "fresh-agent-key"
assert mock_openai.call_args.kwargs["base_url"] == fresh_base
def test_try_nous_uses_portal_recommendation_for_text(self):
"""When the Portal recommends a compaction model, _try_nous honors it."""
fresh_base = "https://inference-api.nousresearch.com/v1"
with (
patch("agent.auxiliary_client._read_nous_auth", return_value={"access_token": "***"}),
patch("agent.auxiliary_client._resolve_nous_runtime_api", return_value=("fresh-agent-key", fresh_base)),
patch("hermes_cli.models.get_nous_recommended_aux_model", return_value="minimax/minimax-m2.7") as mock_rec,
patch("agent.auxiliary_client.OpenAI") as mock_openai,
):
from agent.auxiliary_client import _try_nous
mock_openai.return_value = MagicMock()
client, model = _try_nous(vision=False)
assert client is not None
assert model == "minimax/minimax-m2.7"
assert mock_rec.call_args.kwargs["vision"] is False
def test_try_nous_uses_portal_recommendation_for_vision(self):
"""Vision tasks should ask for the vision-specific recommendation."""
fresh_base = "https://inference-api.nousresearch.com/v1"
with (
patch("agent.auxiliary_client._read_nous_auth", return_value={"access_token": "***"}),
patch("agent.auxiliary_client._resolve_nous_runtime_api", return_value=("fresh-agent-key", fresh_base)),
patch("hermes_cli.models.get_nous_recommended_aux_model", return_value="google/gemini-3-flash-preview") as mock_rec,
patch("agent.auxiliary_client.OpenAI"),
):
from agent.auxiliary_client import _try_nous
client, model = _try_nous(vision=True)
assert client is not None
assert model == "google/gemini-3-flash-preview"
assert mock_rec.call_args.kwargs["vision"] is True
def test_try_nous_falls_back_when_recommendation_lookup_raises(self):
"""If the Portal lookup throws, we must still return a usable model."""
fresh_base = "https://inference-api.nousresearch.com/v1"
with (
patch("agent.auxiliary_client._read_nous_auth", return_value={"access_token": "***"}),
patch("agent.auxiliary_client._resolve_nous_runtime_api", return_value=("fresh-agent-key", fresh_base)),
patch("hermes_cli.models.get_nous_recommended_aux_model", side_effect=RuntimeError("portal down")),
patch("agent.auxiliary_client.OpenAI"),
):
from agent.auxiliary_client import _try_nous
client, model = _try_nous()
assert client is not None
assert model == "google/gemini-3-flash-preview"
def test_call_llm_retries_nous_after_401(self):
class _Auth401(Exception):
status_code = 401
@@ -376,7 +376,6 @@ class TestLoginNousSkipKeepsCurrent:
lambda *a, **kw: prompt_returns,
)
monkeypatch.setattr(models_mod, "get_pricing_for_provider", lambda p: {})
monkeypatch.setattr(models_mod, "filter_nous_free_models", lambda ids, p: ids)
monkeypatch.setattr(models_mod, "check_nous_free_tier", lambda: None)
monkeypatch.setattr(
models_mod, "partition_nous_models_by_tier",
+187 -84
View File
@@ -4,7 +4,6 @@ from unittest.mock import patch, MagicMock
from hermes_cli.models import (
OPENROUTER_MODELS, fetch_openrouter_models, model_ids, detect_provider_for_model,
filter_nous_free_models, _NOUS_ALLOWED_FREE_MODELS,
is_nous_free_tier, partition_nous_models_by_tier,
check_nous_free_tier, _FREE_TIER_CACHE_TTL,
)
@@ -293,89 +292,6 @@ class TestDetectProviderForModel:
assert result[0] not in ("nous",) # nous has claude models but shouldn't be suggested
class TestFilterNousFreeModels:
"""Tests for filter_nous_free_models — Nous Portal free-model policy."""
_PAID = {"prompt": "0.000003", "completion": "0.000015"}
_FREE = {"prompt": "0", "completion": "0"}
def test_paid_models_kept(self):
"""Regular paid models pass through unchanged."""
models = ["anthropic/claude-opus-4.6", "openai/gpt-5.4"]
pricing = {m: self._PAID for m in models}
assert filter_nous_free_models(models, pricing) == models
def test_free_non_allowlist_models_removed(self):
"""Free models NOT in the allowlist are filtered out."""
models = ["anthropic/claude-opus-4.6", "arcee-ai/trinity-large-preview:free"]
pricing = {
"anthropic/claude-opus-4.6": self._PAID,
"arcee-ai/trinity-large-preview:free": self._FREE,
}
result = filter_nous_free_models(models, pricing)
assert result == ["anthropic/claude-opus-4.6"]
def test_allowlist_model_kept_when_free(self):
"""Allowlist models are kept when they report as free."""
models = ["anthropic/claude-opus-4.6", "xiaomi/mimo-v2-pro"]
pricing = {
"anthropic/claude-opus-4.6": self._PAID,
"xiaomi/mimo-v2-pro": self._FREE,
}
result = filter_nous_free_models(models, pricing)
assert result == ["anthropic/claude-opus-4.6", "xiaomi/mimo-v2-pro"]
def test_allowlist_model_removed_when_paid(self):
"""Allowlist models are removed when they are NOT free."""
models = ["anthropic/claude-opus-4.6", "xiaomi/mimo-v2-pro"]
pricing = {
"anthropic/claude-opus-4.6": self._PAID,
"xiaomi/mimo-v2-pro": self._PAID,
}
result = filter_nous_free_models(models, pricing)
assert result == ["anthropic/claude-opus-4.6"]
def test_no_pricing_returns_all(self):
"""When pricing data is unavailable, all models pass through."""
models = ["anthropic/claude-opus-4.6", "nvidia/nemotron-3-super-120b-a12b:free"]
assert filter_nous_free_models(models, {}) == models
def test_model_with_no_pricing_entry_treated_as_paid(self):
"""A model missing from the pricing dict is kept (assumed paid)."""
models = ["anthropic/claude-opus-4.6", "openai/gpt-5.4"]
pricing = {"anthropic/claude-opus-4.6": self._PAID} # gpt-5.4 not in pricing
result = filter_nous_free_models(models, pricing)
assert result == models
def test_mixed_scenario(self):
"""End-to-end: mix of paid, free-allowed, free-disallowed, allowlist-not-free."""
models = [
"anthropic/claude-opus-4.6", # paid, not allowlist → keep
"nvidia/nemotron-3-super-120b-a12b:free", # free, not allowlist → drop
"xiaomi/mimo-v2-pro", # free, allowlist → keep
"xiaomi/mimo-v2-omni", # paid, allowlist → drop
"openai/gpt-5.4", # paid, not allowlist → keep
]
pricing = {
"anthropic/claude-opus-4.6": self._PAID,
"nvidia/nemotron-3-super-120b-a12b:free": self._FREE,
"xiaomi/mimo-v2-pro": self._FREE,
"xiaomi/mimo-v2-omni": self._PAID,
"openai/gpt-5.4": self._PAID,
}
result = filter_nous_free_models(models, pricing)
assert result == [
"anthropic/claude-opus-4.6",
"xiaomi/mimo-v2-pro",
"openai/gpt-5.4",
]
def test_allowlist_contains_expected_models(self):
"""Sanity: the allowlist has the models we expect."""
assert "xiaomi/mimo-v2-pro" in _NOUS_ALLOWED_FREE_MODELS
assert "xiaomi/mimo-v2-omni" in _NOUS_ALLOWED_FREE_MODELS
class TestIsNousFreeTier:
"""Tests for is_nous_free_tier — account tier detection."""
@@ -501,3 +417,190 @@ class TestCheckNousFreeTierCache:
def test_cache_ttl_is_short(self):
"""TTL should be short enough to catch upgrades quickly (<=5 min)."""
assert _FREE_TIER_CACHE_TTL <= 300
class TestNousRecommendedModels:
"""Tests for fetch_nous_recommended_models + get_nous_recommended_aux_model."""
_SAMPLE_PAYLOAD = {
"paidRecommendedModels": [],
"freeRecommendedModels": [],
"paidRecommendedCompactionModel": None,
"paidRecommendedVisionModel": None,
"freeRecommendedCompactionModel": {
"modelName": "google/gemini-3-flash-preview",
"displayName": "Google: Gemini 3 Flash Preview",
},
"freeRecommendedVisionModel": {
"modelName": "google/gemini-3-flash-preview",
"displayName": "Google: Gemini 3 Flash Preview",
},
}
def setup_method(self):
_models_mod._nous_recommended_cache.clear()
def teardown_method(self):
_models_mod._nous_recommended_cache.clear()
def _mock_urlopen(self, payload):
"""Return a context-manager mock mimicking urllib.request.urlopen()."""
import json as _json
response = MagicMock()
response.read.return_value = _json.dumps(payload).encode()
cm = MagicMock()
cm.__enter__.return_value = response
cm.__exit__.return_value = False
return cm
def test_fetch_caches_per_portal_url(self):
from hermes_cli.models import fetch_nous_recommended_models
mock_cm = self._mock_urlopen(self._SAMPLE_PAYLOAD)
with patch("urllib.request.urlopen", return_value=mock_cm) as mock_urlopen:
a = fetch_nous_recommended_models("https://portal.example.com")
b = fetch_nous_recommended_models("https://portal.example.com")
assert a == self._SAMPLE_PAYLOAD
assert b == self._SAMPLE_PAYLOAD
assert mock_urlopen.call_count == 1 # second call served from cache
def test_fetch_cache_is_keyed_per_portal(self):
from hermes_cli.models import fetch_nous_recommended_models
mock_cm = self._mock_urlopen(self._SAMPLE_PAYLOAD)
with patch("urllib.request.urlopen", return_value=mock_cm) as mock_urlopen:
fetch_nous_recommended_models("https://portal.example.com")
fetch_nous_recommended_models("https://portal.staging-nousresearch.com")
assert mock_urlopen.call_count == 2 # different portals → separate fetches
def test_fetch_returns_empty_on_network_failure(self):
from hermes_cli.models import fetch_nous_recommended_models
with patch("urllib.request.urlopen", side_effect=OSError("boom")):
result = fetch_nous_recommended_models("https://portal.example.com")
assert result == {}
def test_fetch_force_refresh_bypasses_cache(self):
from hermes_cli.models import fetch_nous_recommended_models
mock_cm = self._mock_urlopen(self._SAMPLE_PAYLOAD)
with patch("urllib.request.urlopen", return_value=mock_cm) as mock_urlopen:
fetch_nous_recommended_models("https://portal.example.com")
fetch_nous_recommended_models("https://portal.example.com", force_refresh=True)
assert mock_urlopen.call_count == 2
def test_get_aux_model_returns_vision_recommendation(self):
from hermes_cli.models import get_nous_recommended_aux_model
with patch(
"hermes_cli.models.fetch_nous_recommended_models",
return_value=self._SAMPLE_PAYLOAD,
):
# Free tier → free vision recommendation.
model = get_nous_recommended_aux_model(vision=True, free_tier=True)
assert model == "google/gemini-3-flash-preview"
def test_get_aux_model_returns_compaction_recommendation(self):
from hermes_cli.models import get_nous_recommended_aux_model
payload = dict(self._SAMPLE_PAYLOAD)
payload["freeRecommendedCompactionModel"] = {"modelName": "minimax/minimax-m2.7"}
with patch(
"hermes_cli.models.fetch_nous_recommended_models",
return_value=payload,
):
model = get_nous_recommended_aux_model(vision=False, free_tier=True)
assert model == "minimax/minimax-m2.7"
def test_get_aux_model_returns_none_when_field_null(self):
from hermes_cli.models import get_nous_recommended_aux_model
payload = dict(self._SAMPLE_PAYLOAD)
payload["freeRecommendedCompactionModel"] = None
with patch(
"hermes_cli.models.fetch_nous_recommended_models",
return_value=payload,
):
model = get_nous_recommended_aux_model(vision=False, free_tier=True)
assert model is None
def test_get_aux_model_returns_none_on_empty_payload(self):
from hermes_cli.models import get_nous_recommended_aux_model
with patch("hermes_cli.models.fetch_nous_recommended_models", return_value={}):
assert get_nous_recommended_aux_model(vision=False, free_tier=True) is None
assert get_nous_recommended_aux_model(vision=True, free_tier=False) is None
def test_get_aux_model_returns_none_when_modelname_blank(self):
from hermes_cli.models import get_nous_recommended_aux_model
payload = {"freeRecommendedCompactionModel": {"modelName": " "}}
with patch(
"hermes_cli.models.fetch_nous_recommended_models",
return_value=payload,
):
assert get_nous_recommended_aux_model(vision=False, free_tier=True) is None
def test_paid_tier_prefers_paid_recommendation(self):
"""Paid-tier users should get the paid model when it's populated."""
from hermes_cli.models import get_nous_recommended_aux_model
payload = {
"paidRecommendedCompactionModel": {"modelName": "anthropic/claude-opus-4.7"},
"freeRecommendedCompactionModel": {"modelName": "google/gemini-3-flash-preview"},
"paidRecommendedVisionModel": {"modelName": "openai/gpt-5.4"},
"freeRecommendedVisionModel": {"modelName": "google/gemini-3-flash-preview"},
}
with patch("hermes_cli.models.fetch_nous_recommended_models", return_value=payload):
text = get_nous_recommended_aux_model(vision=False, free_tier=False)
vision = get_nous_recommended_aux_model(vision=True, free_tier=False)
assert text == "anthropic/claude-opus-4.7"
assert vision == "openai/gpt-5.4"
def test_paid_tier_falls_back_to_free_when_paid_is_null(self):
"""If the Portal returns null for the paid field, fall back to free."""
from hermes_cli.models import get_nous_recommended_aux_model
payload = {
"paidRecommendedCompactionModel": None,
"freeRecommendedCompactionModel": {"modelName": "google/gemini-3-flash-preview"},
"paidRecommendedVisionModel": None,
"freeRecommendedVisionModel": {"modelName": "google/gemini-3-flash-preview"},
}
with patch("hermes_cli.models.fetch_nous_recommended_models", return_value=payload):
text = get_nous_recommended_aux_model(vision=False, free_tier=False)
vision = get_nous_recommended_aux_model(vision=True, free_tier=False)
assert text == "google/gemini-3-flash-preview"
assert vision == "google/gemini-3-flash-preview"
def test_free_tier_never_uses_paid_recommendation(self):
"""Free-tier users must not get paid-only recommendations."""
from hermes_cli.models import get_nous_recommended_aux_model
payload = {
"paidRecommendedCompactionModel": {"modelName": "anthropic/claude-opus-4.7"},
"freeRecommendedCompactionModel": None, # no free recommendation
}
with patch("hermes_cli.models.fetch_nous_recommended_models", return_value=payload):
model = get_nous_recommended_aux_model(vision=False, free_tier=True)
# Free tier must return None — never leak the paid model.
assert model is None
def test_auto_detects_tier_when_not_supplied(self):
"""Default behaviour: call check_nous_free_tier() to pick the tier."""
from hermes_cli.models import get_nous_recommended_aux_model
payload = {
"paidRecommendedCompactionModel": {"modelName": "paid-model"},
"freeRecommendedCompactionModel": {"modelName": "free-model"},
}
with (
patch("hermes_cli.models.fetch_nous_recommended_models", return_value=payload),
patch("hermes_cli.models.check_nous_free_tier", return_value=True),
):
assert get_nous_recommended_aux_model(vision=False) == "free-model"
with (
patch("hermes_cli.models.fetch_nous_recommended_models", return_value=payload),
patch("hermes_cli.models.check_nous_free_tier", return_value=False),
):
assert get_nous_recommended_aux_model(vision=False) == "paid-model"
def test_tier_detection_error_defaults_to_paid(self):
"""If tier detection raises, assume paid so we don't downgrade silently."""
from hermes_cli.models import get_nous_recommended_aux_model
payload = {
"paidRecommendedCompactionModel": {"modelName": "paid-model"},
"freeRecommendedCompactionModel": {"modelName": "free-model"},
}
with (
patch("hermes_cli.models.fetch_nous_recommended_models", return_value=payload),
patch("hermes_cli.models.check_nous_free_tier", side_effect=RuntimeError("boom")),
):
assert get_nous_recommended_aux_model(vision=False) == "paid-model"