javis_bot/tests/test_engine_hot_window_caches.py

"""End-to-end coverage for the hot-window scratch caches in run_reply_engine.

Three caches share one primitive (DialogueMemory.hot_cache_*):

1. Warm profile block — query-agnostic, keyed on a constant.
2. Memory enrichment extractor — keyed on the redacted query (+topic hint).
3. Tool router output — keyed on redacted query + strategy + catalogue.

All three should fire on the second matching turn within the hot window so
follow-up queries don't pay for SQLite reads or LLM hops they already did.

Also covers the C1 fix: when the planner explicitly emits a `searchMemory`
step, the recall gate must NOT short-circuit memory enrichment even when
hot-window coverage is high.
"""

from unittest.mock import Mock, patch

import pytest

from src.jarvis.memory.conversation import DialogueMemory
from src.jarvis.reply.engine import run_reply_engine


def _mock_cfg():
    cfg = Mock()
    cfg.ollama_base_url = "http://localhost:11434"
    cfg.ollama_chat_model = "test-large"
    cfg.voice_debug = False
    cfg.llm_tools_timeout_sec = 8.0
    cfg.llm_embed_timeout_sec = 10.0
    cfg.llm_chat_timeout_sec = 45.0
    cfg.llm_digest_timeout_sec = 8.0
    cfg.memory_enrichment_max_results = 5
    cfg.memory_enrichment_source = "diary"
    cfg.memory_digest_enabled = False
    cfg.tool_result_digest_enabled = False
    cfg.location_ip_address = None
    cfg.location_auto_detect = False
    cfg.location_enabled = False
    cfg.agentic_max_turns = 8
    cfg.tool_search_max_calls = 3
    cfg.tool_selection_strategy = "all"
    cfg.tool_carryover_max_turns = 2
    cfg.tool_carryover_per_entry_chars = 1200
    cfg.mcps = {}
    cfg.llm_thinking_enabled = False
    cfg.tts_engine = "none"
    cfg.ollama_embed_model = "test-embed"
    cfg.db_path = ":memory:"
    return cfg


@pytest.mark.unit
@patch("src.jarvis.memory.graph_ops.format_warm_profile_block", return_value="")
@patch("src.jarvis.memory.graph_ops.build_warm_profile", return_value={"user": "", "directives": ""})
@patch("src.jarvis.memory.graph.GraphMemoryStore")
@patch("src.jarvis.reply.engine.select_tools", return_value=[])
@patch("src.jarvis.reply.engine.plan_query", return_value=[])
@patch("src.jarvis.reply.engine.extract_search_params_for_memory", return_value={})
@patch("src.jarvis.reply.engine.extract_text_from_response")
@patch("src.jarvis.reply.engine.chat_with_messages")
def test_tool_router_cached_across_turns(
    mock_chat, mock_extract, mock_extractor, mock_plan, mock_select,
    _mock_graph, _mock_warm, _mock_fmt,
):
    """Two identical queries within the same DialogueMemory should call the
    tool router exactly once — the second turn must hit the hot-window cache.
    """
    mock_chat.side_effect = [
        {"message": {"content": "hello"}},
        {"message": {"content": "hello again"}},
    ]
    mock_extract.side_effect = ["hello", "hello again"]

    db = Mock()
    cfg = _mock_cfg()
    dm = DialogueMemory()

    run_reply_engine(db=db, cfg=cfg, tts=None, text="say hi", dialogue_memory=dm)
    run_reply_engine(db=db, cfg=cfg, tts=None, text="say hi", dialogue_memory=dm)

    assert mock_select.call_count == 1, (
        f"router should be cached on identical query; called {mock_select.call_count} times"
    )


@pytest.mark.unit
@patch("src.jarvis.memory.graph_ops.format_warm_profile_block", return_value="")
@patch("src.jarvis.memory.graph_ops.build_warm_profile", return_value={"user": "", "directives": ""})
@patch("src.jarvis.memory.graph.GraphMemoryStore")
@patch("src.jarvis.reply.engine.plan_query", return_value=[])
@patch("src.jarvis.reply.engine.extract_search_params_for_memory", return_value={})
@patch("src.jarvis.reply.engine.extract_text_from_response")
@patch("src.jarvis.reply.engine.chat_with_messages")
def test_router_fallback_to_all_tools_is_not_cached(
    mock_chat, mock_extract, mock_extractor, mock_plan,
    _mock_graph, _mock_warm, _mock_fmt,
):
    """When the router falls open to the full tool catalogue (its parse-failure
    fail-open path), the engine must NOT persist that result in the
    conversation-scoped cache. Otherwise a single small-model fluke pins
    ``allowed_tools`` to "all N" for the rest of the session, overwhelms the
    planner, and starves the chat model.

    Field trace (2026-05-03): user said "navigate to youtube.com". The router
    LLM flaked, fell open to ~41 tools, the cache stored that, every
    subsequent navigate attempt replayed the cached 41-tool set, and the small
    chat model produced an empty reply ("Sorry, I had trouble processing
    that"). Pre-#281 this didn't happen because the router re-rolled per turn.
    """
    from src.jarvis.tools.registry import BUILTIN_TOOLS
    full_catalogue = list(BUILTIN_TOOLS.keys())

    mock_chat.side_effect = [
        {"message": {"content": "hello"}},
        {"message": {"content": "hello again"}},
    ]
    mock_extract.side_effect = ["hello", "hello again"]

    db = Mock()
    cfg = _mock_cfg()
    dm = DialogueMemory()

    with patch(
        "src.jarvis.reply.engine.select_tools",
        return_value=full_catalogue,
    ) as mock_select:
        run_reply_engine(db=db, cfg=cfg, tts=None, text="navigate to youtube", dialogue_memory=dm)
        run_reply_engine(db=db, cfg=cfg, tts=None, text="navigate to youtube", dialogue_memory=dm)

    assert mock_select.call_count == 2, (
        "fall-open-to-all-tools must not be cached; the router should re-run "
        f"on the second identical turn — was called {mock_select.call_count} times"
    )


@pytest.mark.unit
@patch("src.jarvis.memory.graph_ops.format_warm_profile_block", return_value="")
@patch("src.jarvis.memory.graph_ops.build_warm_profile", return_value={"user": "", "directives": ""})
@patch("src.jarvis.memory.graph.GraphMemoryStore")
@patch("src.jarvis.reply.engine.select_tools", return_value=[])
@patch("src.jarvis.reply.engine.plan_query", return_value=[])
@patch("src.jarvis.reply.engine.extract_search_params_for_memory", return_value={"keywords": ["x"], "questions": []})
@patch("src.jarvis.memory.conversation.search_conversation_memory_by_keywords", return_value=[])
@patch("src.jarvis.reply.engine.extract_text_from_response")
@patch("src.jarvis.reply.engine.chat_with_messages")
def test_memory_extractor_cached_across_turns(
    mock_chat, mock_extract, _mock_search, mock_extractor,
    _mock_plan, _mock_select, _mock_graph, _mock_warm, _mock_fmt,
):
    """Empty plan → fail-open path runs the extractor. The second identical
    follow-up must skip the extractor LLM call.

    The recall gate would also fire on a tool-grounded follow-up, so we
    keep the dialogue free of tool messages here to exercise the extractor
    path on both turns.
    """
    mock_chat.side_effect = [
        {"message": {"content": "first"}},
        {"message": {"content": "second"}},
    ]
    mock_extract.side_effect = ["first", "second"]

    db = Mock()
    cfg = _mock_cfg()
    dm = DialogueMemory()

    run_reply_engine(db=db, cfg=cfg, tts=None,
                     text="tell me about pushkin", dialogue_memory=dm)
    run_reply_engine(db=db, cfg=cfg, tts=None,
                     text="tell me about pushkin", dialogue_memory=dm)

    assert mock_extractor.call_count == 1, (
        f"extractor should be cached; called {mock_extractor.call_count} times"
    )


@pytest.mark.unit
@patch("src.jarvis.memory.graph_ops.format_warm_profile_block", return_value="warm-block")
@patch("src.jarvis.memory.graph_ops.build_warm_profile", return_value={"user": "u", "directives": "d"})
@patch("src.jarvis.memory.graph.GraphMemoryStore")
@patch("src.jarvis.reply.engine.select_tools", return_value=[])
@patch("src.jarvis.reply.engine.plan_query", return_value=[])
@patch("src.jarvis.reply.engine.extract_search_params_for_memory", return_value={})
@patch("src.jarvis.reply.engine.extract_text_from_response")
@patch("src.jarvis.reply.engine.chat_with_messages")
def test_warm_profile_cached_across_turns(
    mock_chat, mock_extract, _mock_extractor, _mock_plan,
    _mock_select, _mock_graph, mock_build, _mock_fmt,
):
    """Warm profile is query-agnostic; second turn must reuse the cached
    block instead of re-traversing the graph store.
    """
    mock_chat.side_effect = [
        {"message": {"content": "a"}},
        {"message": {"content": "b"}},
    ]
    mock_extract.side_effect = ["a", "b"]

    db = Mock()
    cfg = _mock_cfg()
    dm = DialogueMemory()

    run_reply_engine(db=db, cfg=cfg, tts=None, text="hi", dialogue_memory=dm)
    run_reply_engine(db=db, cfg=cfg, tts=None, text="anything else", dialogue_memory=dm)

    assert mock_build.call_count == 1, (
        f"warm profile should be built once and cached; got {mock_build.call_count} calls"
    )


@pytest.mark.unit
@patch("src.jarvis.memory.graph_ops.format_warm_profile_block", return_value="")
@patch("src.jarvis.memory.graph_ops.build_warm_profile", return_value={"user": "", "directives": ""})
@patch("src.jarvis.memory.graph.GraphMemoryStore")
@patch("src.jarvis.reply.engine.select_tools", return_value=[])
@patch(
    "src.jarvis.reply.engine.plan_query",
    return_value=["searchMemory topic='justin bieber'", "reply"],
)
@patch("src.jarvis.reply.engine.extract_search_params_for_memory",
       return_value={"keywords": ["bieber"], "questions": []})
@patch("src.jarvis.memory.conversation.search_conversation_memory_by_keywords", return_value=[])
@patch("src.jarvis.reply.engine.extract_text_from_response")
@patch("src.jarvis.reply.engine.chat_with_messages")
def test_planner_search_memory_overrides_recall_gate(
    mock_chat, mock_extract, _mock_search, mock_extractor,
    _mock_plan, _mock_select, _mock_graph, _mock_warm, _mock_fmt,
):
    """C1 fix: when the planner emits `searchMemory`, the recall gate must
    NOT short-circuit memory enrichment even though the hot window contains
    a fresh tool result that overlaps the query.
    """
    mock_chat.side_effect = [
        {"message": {"content": "Canadian singer."}},
    ]
    mock_extract.side_effect = ["Canadian singer."]

    db = Mock()
    cfg = _mock_cfg()
    dm = DialogueMemory()
    # Plant a fresh tool result that would otherwise satisfy the recall gate.
    dm.add_message("user", "who is justin bieber")
    dm.record_tool_turn([
        {"role": "tool", "tool_call_id": "c1",
         "content": "Justin Bieber is a Canadian singer with the song Baby."},
    ])
    dm.add_message("assistant", "Canadian singer.")

    run_reply_engine(db=db, cfg=cfg, tts=None,
                     text="bieber more about justin", dialogue_memory=dm)

    # Planner explicitly demanded memory → extractor must run.
    assert mock_extractor.call_count == 1, (
        "extractor must run when planner emits searchMemory, "
        "regardless of recall-gate coverage"
    )


@pytest.mark.unit
@patch("src.jarvis.memory.graph_ops.format_warm_profile_block", return_value="")
@patch("src.jarvis.memory.graph_ops.build_warm_profile", return_value={"user": "", "directives": ""})
@patch("src.jarvis.memory.graph.GraphMemoryStore")
@patch("src.jarvis.reply.engine.select_tools", return_value=[])
@patch("src.jarvis.reply.engine.plan_query", return_value=[])
@patch("src.jarvis.reply.engine.extract_search_params_for_memory", return_value={})
@patch("src.jarvis.reply.engine.extract_text_from_response")
@patch("src.jarvis.reply.engine.chat_with_messages")
def test_new_conversation_clears_cache_and_carryover(
    mock_chat, mock_extract, _mock_extractor, _mock_plan, mock_select,
    _mock_graph, _mock_warm, _mock_fmt,
):
    """When the previous conversation has lapsed past the inactivity
    window, the engine must wipe the conversation-scoped cache and any
    leftover tool carryover before running the new turn. Otherwise stale
    state from a previous session would leak into a fresh one.
    """
    mock_chat.side_effect = [
        {"message": {"content": "fresh"}},
    ]
    mock_extract.side_effect = ["fresh"]

    db = Mock()
    cfg = _mock_cfg()
    dm = DialogueMemory()

    # Plant cache + carryover from a prior (now-lapsed) session.
    dm.hot_cache_put(dm.WARM_PROFILE_CACHE_KEY, "old-block")
    dm.hot_cache_put("router:old", ["webSearch"])
    dm.record_tool_turn([
        {"role": "tool", "tool_call_id": "c1", "content": "ancient result"},
    ])
    assert dm._tool_turns
    assert dm.hot_cache_get(dm.WARM_PROFILE_CACHE_KEY) == "old-block"

    # No recent messages → engine treats this turn as a new conversation.
    run_reply_engine(db=db, cfg=cfg, tts=None, text="hello", dialogue_memory=dm)

    # Stale router entry must be gone (full hot-cache wipe), and the old
    # tool carryover must not be visible to the new conversation.
    assert dm.hot_cache_get("router:old") is None
    # The tool carryover from before must have been cleared on entry; any
    # tool turns recorded later in this turn would only come from THIS
    # reply (mock chat returns a final reply with no tool calls).
    assert dm._tool_turns == []