"""End-to-end coverage for the hot-window scratch caches in run_reply_engine. Three caches share one primitive (DialogueMemory.hot_cache_*): 1. Warm profile block — query-agnostic, keyed on a constant. 2. Memory enrichment extractor — keyed on the redacted query (+topic hint). 3. Tool router output — keyed on redacted query + strategy + catalogue. All three should fire on the second matching turn within the hot window so follow-up queries don't pay for SQLite reads or LLM hops they already did. Also covers the C1 fix: when the planner explicitly emits a `searchMemory` step, the recall gate must NOT short-circuit memory enrichment even when hot-window coverage is high. """ from unittest.mock import Mock, patch import pytest from src.jarvis.memory.conversation import DialogueMemory from src.jarvis.reply.engine import run_reply_engine def _mock_cfg(): cfg = Mock() cfg.ollama_base_url = "http://localhost:11434" cfg.ollama_chat_model = "test-large" cfg.voice_debug = False cfg.llm_tools_timeout_sec = 8.0 cfg.llm_embed_timeout_sec = 10.0 cfg.llm_chat_timeout_sec = 45.0 cfg.llm_digest_timeout_sec = 8.0 cfg.memory_enrichment_max_results = 5 cfg.memory_enrichment_source = "diary" cfg.memory_digest_enabled = False cfg.tool_result_digest_enabled = False cfg.location_ip_address = None cfg.location_auto_detect = False cfg.location_enabled = False cfg.agentic_max_turns = 8 cfg.tool_search_max_calls = 3 cfg.tool_selection_strategy = "all" cfg.tool_carryover_max_turns = 2 cfg.tool_carryover_per_entry_chars = 1200 cfg.mcps = {} cfg.llm_thinking_enabled = False cfg.tts_engine = "none" cfg.ollama_embed_model = "test-embed" cfg.db_path = ":memory:" return cfg @pytest.mark.unit @patch("src.jarvis.memory.graph_ops.format_warm_profile_block", return_value="") @patch("src.jarvis.memory.graph_ops.build_warm_profile", return_value={"user": "", "directives": ""}) @patch("src.jarvis.memory.graph.GraphMemoryStore") @patch("src.jarvis.reply.engine.select_tools", return_value=[]) @patch("src.jarvis.reply.engine.plan_query", return_value=[]) @patch("src.jarvis.reply.engine.extract_search_params_for_memory", return_value={}) @patch("src.jarvis.reply.engine.extract_text_from_response") @patch("src.jarvis.reply.engine.chat_with_messages") def test_tool_router_cached_across_turns( mock_chat, mock_extract, mock_extractor, mock_plan, mock_select, _mock_graph, _mock_warm, _mock_fmt, ): """Two identical queries within the same DialogueMemory should call the tool router exactly once — the second turn must hit the hot-window cache. """ mock_chat.side_effect = [ {"message": {"content": "hello"}}, {"message": {"content": "hello again"}}, ] mock_extract.side_effect = ["hello", "hello again"] db = Mock() cfg = _mock_cfg() dm = DialogueMemory() run_reply_engine(db=db, cfg=cfg, tts=None, text="say hi", dialogue_memory=dm) run_reply_engine(db=db, cfg=cfg, tts=None, text="say hi", dialogue_memory=dm) assert mock_select.call_count == 1, ( f"router should be cached on identical query; called {mock_select.call_count} times" ) @pytest.mark.unit @patch("src.jarvis.memory.graph_ops.format_warm_profile_block", return_value="") @patch("src.jarvis.memory.graph_ops.build_warm_profile", return_value={"user": "", "directives": ""}) @patch("src.jarvis.memory.graph.GraphMemoryStore") @patch("src.jarvis.reply.engine.plan_query", return_value=[]) @patch("src.jarvis.reply.engine.extract_search_params_for_memory", return_value={}) @patch("src.jarvis.reply.engine.extract_text_from_response") @patch("src.jarvis.reply.engine.chat_with_messages") def test_router_fallback_to_all_tools_is_not_cached( mock_chat, mock_extract, mock_extractor, mock_plan, _mock_graph, _mock_warm, _mock_fmt, ): """When the router falls open to the full tool catalogue (its parse-failure fail-open path), the engine must NOT persist that result in the conversation-scoped cache. Otherwise a single small-model fluke pins ``allowed_tools`` to "all N" for the rest of the session, overwhelms the planner, and starves the chat model. Field trace (2026-05-03): user said "navigate to youtube.com". The router LLM flaked, fell open to ~41 tools, the cache stored that, every subsequent navigate attempt replayed the cached 41-tool set, and the small chat model produced an empty reply ("Sorry, I had trouble processing that"). Pre-#281 this didn't happen because the router re-rolled per turn. """ from src.jarvis.tools.registry import BUILTIN_TOOLS full_catalogue = list(BUILTIN_TOOLS.keys()) mock_chat.side_effect = [ {"message": {"content": "hello"}}, {"message": {"content": "hello again"}}, ] mock_extract.side_effect = ["hello", "hello again"] db = Mock() cfg = _mock_cfg() dm = DialogueMemory() with patch( "src.jarvis.reply.engine.select_tools", return_value=full_catalogue, ) as mock_select: run_reply_engine(db=db, cfg=cfg, tts=None, text="navigate to youtube", dialogue_memory=dm) run_reply_engine(db=db, cfg=cfg, tts=None, text="navigate to youtube", dialogue_memory=dm) assert mock_select.call_count == 2, ( "fall-open-to-all-tools must not be cached; the router should re-run " f"on the second identical turn — was called {mock_select.call_count} times" ) @pytest.mark.unit @patch("src.jarvis.memory.graph_ops.format_warm_profile_block", return_value="") @patch("src.jarvis.memory.graph_ops.build_warm_profile", return_value={"user": "", "directives": ""}) @patch("src.jarvis.memory.graph.GraphMemoryStore") @patch("src.jarvis.reply.engine.select_tools", return_value=[]) @patch("src.jarvis.reply.engine.plan_query", return_value=[]) @patch("src.jarvis.reply.engine.extract_search_params_for_memory", return_value={"keywords": ["x"], "questions": []}) @patch("src.jarvis.memory.conversation.search_conversation_memory_by_keywords", return_value=[]) @patch("src.jarvis.reply.engine.extract_text_from_response") @patch("src.jarvis.reply.engine.chat_with_messages") def test_memory_extractor_cached_across_turns( mock_chat, mock_extract, _mock_search, mock_extractor, _mock_plan, _mock_select, _mock_graph, _mock_warm, _mock_fmt, ): """Empty plan → fail-open path runs the extractor. The second identical follow-up must skip the extractor LLM call. The recall gate would also fire on a tool-grounded follow-up, so we keep the dialogue free of tool messages here to exercise the extractor path on both turns. """ mock_chat.side_effect = [ {"message": {"content": "first"}}, {"message": {"content": "second"}}, ] mock_extract.side_effect = ["first", "second"] db = Mock() cfg = _mock_cfg() dm = DialogueMemory() run_reply_engine(db=db, cfg=cfg, tts=None, text="tell me about pushkin", dialogue_memory=dm) run_reply_engine(db=db, cfg=cfg, tts=None, text="tell me about pushkin", dialogue_memory=dm) assert mock_extractor.call_count == 1, ( f"extractor should be cached; called {mock_extractor.call_count} times" ) @pytest.mark.unit @patch("src.jarvis.memory.graph_ops.format_warm_profile_block", return_value="warm-block") @patch("src.jarvis.memory.graph_ops.build_warm_profile", return_value={"user": "u", "directives": "d"}) @patch("src.jarvis.memory.graph.GraphMemoryStore") @patch("src.jarvis.reply.engine.select_tools", return_value=[]) @patch("src.jarvis.reply.engine.plan_query", return_value=[]) @patch("src.jarvis.reply.engine.extract_search_params_for_memory", return_value={}) @patch("src.jarvis.reply.engine.extract_text_from_response") @patch("src.jarvis.reply.engine.chat_with_messages") def test_warm_profile_cached_across_turns( mock_chat, mock_extract, _mock_extractor, _mock_plan, _mock_select, _mock_graph, mock_build, _mock_fmt, ): """Warm profile is query-agnostic; second turn must reuse the cached block instead of re-traversing the graph store. """ mock_chat.side_effect = [ {"message": {"content": "a"}}, {"message": {"content": "b"}}, ] mock_extract.side_effect = ["a", "b"] db = Mock() cfg = _mock_cfg() dm = DialogueMemory() run_reply_engine(db=db, cfg=cfg, tts=None, text="hi", dialogue_memory=dm) run_reply_engine(db=db, cfg=cfg, tts=None, text="anything else", dialogue_memory=dm) assert mock_build.call_count == 1, ( f"warm profile should be built once and cached; got {mock_build.call_count} calls" ) @pytest.mark.unit @patch("src.jarvis.memory.graph_ops.format_warm_profile_block", return_value="") @patch("src.jarvis.memory.graph_ops.build_warm_profile", return_value={"user": "", "directives": ""}) @patch("src.jarvis.memory.graph.GraphMemoryStore") @patch("src.jarvis.reply.engine.select_tools", return_value=[]) @patch( "src.jarvis.reply.engine.plan_query", return_value=["searchMemory topic='justin bieber'", "reply"], ) @patch("src.jarvis.reply.engine.extract_search_params_for_memory", return_value={"keywords": ["bieber"], "questions": []}) @patch("src.jarvis.memory.conversation.search_conversation_memory_by_keywords", return_value=[]) @patch("src.jarvis.reply.engine.extract_text_from_response") @patch("src.jarvis.reply.engine.chat_with_messages") def test_planner_search_memory_overrides_recall_gate( mock_chat, mock_extract, _mock_search, mock_extractor, _mock_plan, _mock_select, _mock_graph, _mock_warm, _mock_fmt, ): """C1 fix: when the planner emits `searchMemory`, the recall gate must NOT short-circuit memory enrichment even though the hot window contains a fresh tool result that overlaps the query. """ mock_chat.side_effect = [ {"message": {"content": "Canadian singer."}}, ] mock_extract.side_effect = ["Canadian singer."] db = Mock() cfg = _mock_cfg() dm = DialogueMemory() # Plant a fresh tool result that would otherwise satisfy the recall gate. dm.add_message("user", "who is justin bieber") dm.record_tool_turn([ {"role": "tool", "tool_call_id": "c1", "content": "Justin Bieber is a Canadian singer with the song Baby."}, ]) dm.add_message("assistant", "Canadian singer.") run_reply_engine(db=db, cfg=cfg, tts=None, text="bieber more about justin", dialogue_memory=dm) # Planner explicitly demanded memory → extractor must run. assert mock_extractor.call_count == 1, ( "extractor must run when planner emits searchMemory, " "regardless of recall-gate coverage" ) @pytest.mark.unit @patch("src.jarvis.memory.graph_ops.format_warm_profile_block", return_value="") @patch("src.jarvis.memory.graph_ops.build_warm_profile", return_value={"user": "", "directives": ""}) @patch("src.jarvis.memory.graph.GraphMemoryStore") @patch("src.jarvis.reply.engine.select_tools", return_value=[]) @patch("src.jarvis.reply.engine.plan_query", return_value=[]) @patch("src.jarvis.reply.engine.extract_search_params_for_memory", return_value={}) @patch("src.jarvis.reply.engine.extract_text_from_response") @patch("src.jarvis.reply.engine.chat_with_messages") def test_new_conversation_clears_cache_and_carryover( mock_chat, mock_extract, _mock_extractor, _mock_plan, mock_select, _mock_graph, _mock_warm, _mock_fmt, ): """When the previous conversation has lapsed past the inactivity window, the engine must wipe the conversation-scoped cache and any leftover tool carryover before running the new turn. Otherwise stale state from a previous session would leak into a fresh one. """ mock_chat.side_effect = [ {"message": {"content": "fresh"}}, ] mock_extract.side_effect = ["fresh"] db = Mock() cfg = _mock_cfg() dm = DialogueMemory() # Plant cache + carryover from a prior (now-lapsed) session. dm.hot_cache_put(dm.WARM_PROFILE_CACHE_KEY, "old-block") dm.hot_cache_put("router:old", ["webSearch"]) dm.record_tool_turn([ {"role": "tool", "tool_call_id": "c1", "content": "ancient result"}, ]) assert dm._tool_turns assert dm.hot_cache_get(dm.WARM_PROFILE_CACHE_KEY) == "old-block" # No recent messages → engine treats this turn as a new conversation. run_reply_engine(db=db, cfg=cfg, tts=None, text="hello", dialogue_memory=dm) # Stale router entry must be gone (full hot-cache wipe), and the old # tool carryover must not be visible to the new conversation. assert dm.hot_cache_get("router:old") is None # The tool carryover from before must have been cleared on entry; any # tool turns recorded later in this turn would only come from THIS # reply (mock chat returns a final reply with no tool calls). assert dm._tool_turns == []