javis_bot/tests/test_graph_ops.py

"""Tests for graph_ops.py — LLM-dependent graph memory operations.

All LLM calls are mocked to test the logic independently.
"""

import json
import re
import sys
import types
from unittest.mock import patch, MagicMock

import pytest

# Mock 'requests' before importing graph_ops (which imports llm which needs requests)
if "requests" not in sys.modules:
    sys.modules["requests"] = types.ModuleType("requests")
    sys.modules["requests"].post = MagicMock()
    sys.modules["requests"].exceptions = types.ModuleType("requests.exceptions")
    sys.modules["requests"].exceptions.Timeout = type("Timeout", (Exception,), {})

from src.jarvis.memory.graph import GraphMemoryStore, SPLIT_THRESHOLD
from src.jarvis.memory.graph import BRANCH_USER, BRANCH_DIRECTIVES, BRANCH_WORLD
from src.jarvis.memory.graph_ops import (
    extract_graph_memories,
    _llm_pick_best_child,
    find_best_node,
    auto_split_node,
    update_graph_from_dialogue,
    build_warm_profile,
    format_warm_profile_block,
    merge_node_data,
    consolidate_all_populated_nodes,
    MergeResult,
)


# ── Fixtures ───────────────────────────────────────────────────────────


@pytest.fixture
def store(tmp_path):
    """Fresh GraphMemoryStore with temporary database."""
    s = GraphMemoryStore(str(tmp_path / "test_ops.db"))
    yield s
    s.close()


@pytest.fixture
def populated_store(store):
    """Store with a few topic nodes for traversal tests."""
    store.create_node(
        name="Music",
        description="Musical preferences and listening habits",
        data="Enjoys jazz and lo-fi hip hop",
        parent_id="root",
    )
    store.create_node(
        name="Work",
        description="Professional details and projects",
        data="Senior engineer at Acme Corp. Uses Python daily.",
        parent_id="root",
    )
    store.create_node(
        name="Health",
        description="Health, fitness, and dietary information",
        data="Runs 3 times a week. Prefers dark roast coffee.",
        parent_id="root",
    )
    return store


# ── extract_graph_memories ─────────────────────────────────────────────


@pytest.mark.unit
class TestExtractGraphMemories:
    """Tests for memory extraction from conversation summaries.

    The extractor now emits ``(branch_id, fact_text)`` tuples, where
    branch_id is one of ``user`` / ``directives`` / ``world``. Callers
    route each fact into the corresponding top-level branch of the
    knowledge graph.
    """

    @patch("src.jarvis.memory.graph_ops.call_llm_direct")
    def test_extracts_facts(self, mock_llm):
        mock_llm.return_value = (
            '[{"branch": "USER", "fact": "Prefers dark roast coffee"},'
            ' {"branch": "WORLD", "fact": "Acme Corp is based in London"}]'
        )
        facts = extract_graph_memories("summary text", "http://localhost", "model")
        assert len(facts) == 2
        assert facts[0] == ("user", "Prefers dark roast coffee")
        assert facts[1] == ("world", "Acme Corp is based in London")

    @patch("src.jarvis.memory.graph_ops.call_llm_direct")
    def test_classifies_directive_branch(self, mock_llm):
        """A user-issued behavioural rule must land in the DIRECTIVES
        branch so it survives verbatim into the warm system-prompt
        blob, rather than being summarised alongside descriptive user
        facts."""
        mock_llm.return_value = (
            '[{"branch": "DIRECTIVES", "fact": "Always answer in British English"}]'
        )
        facts = extract_graph_memories("summary", "http://localhost", "model")
        assert facts == [("directives", "Always answer in British English")]

    @patch("src.jarvis.memory.graph_ops.call_llm_direct")
    def test_returns_empty_when_nothing_worth_storing(self, mock_llm):

        mock_llm.return_value = "[]"
        facts = extract_graph_memories("just small talk", "http://localhost", "model")
        assert facts == []

    @patch("src.jarvis.memory.graph_ops.call_llm_direct")
    def test_handles_llm_returning_none(self, mock_llm):

        mock_llm.return_value = None
        facts = extract_graph_memories("summary", "http://localhost", "model")
        assert facts == []

    @patch("src.jarvis.memory.graph_ops.call_llm_direct")
    def test_handles_malformed_json(self, mock_llm):

        mock_llm.return_value = "Here are some facts: not valid json"
        facts = extract_graph_memories("summary", "http://localhost", "model")
        assert facts == []

    @patch("src.jarvis.memory.graph_ops.call_llm_direct")
    def test_handles_json_embedded_in_text(self, mock_llm):

        mock_llm.return_value = (
            'Sure! Here are the facts:\n'
            '[{"branch": "USER", "fact": "Likes hiking"},'
            ' {"branch": "USER", "fact": "Has a cat named Luna"}]\n'
            'Hope that helps!'
        )
        facts = extract_graph_memories("summary", "http://localhost", "model")
        assert len(facts) == 2

    @patch("src.jarvis.memory.graph_ops.call_llm_direct")
    def test_filters_empty_strings(self, mock_llm):

        mock_llm.return_value = (
            '[{"branch": "USER", "fact": "Valid fact"},'
            ' {"branch": "USER", "fact": ""},'
            ' {"branch": "USER", "fact": "   "},'
            ' {"branch": "USER", "fact": "Another fact"}]'
        )
        facts = extract_graph_memories("summary", "http://localhost", "model")
        assert len(facts) == 2

    @patch("src.jarvis.memory.graph_ops.call_llm_direct")
    def test_unknown_branch_defaults_to_user(self, mock_llm):
        """When the model emits a branch label we don't recognise, the
        fact still gets stored — under USER — rather than silently
        dropping a potentially useful piece of information. The
        assistant is a personal agent; user-scoped context is the
        safer default for unclassified items."""
        mock_llm.return_value = (
            '[{"branch": "MISC", "fact": "Some useful fact"}]'
        )
        facts = extract_graph_memories("summary", "http://localhost", "model")
        assert facts == [("user", "Some useful fact")]


# ── _llm_pick_best_child ──────────────────────────────────────────────


@pytest.mark.unit
class TestLLMPickBestChild:
    """Tests for the LLM child-picking logic."""

    @patch("src.jarvis.memory.graph_ops.call_llm_direct")
    def test_picks_numbered_child(self, mock_llm, populated_store):

        children = populated_store.get_children("root")
        mock_llm.return_value = "2"

        result = _llm_pick_best_child("fact", children, "http://localhost", "model")
        assert result == children[1].id

    @patch("src.jarvis.memory.graph_ops.call_llm_direct")
    def test_returns_none_for_NONE(self, mock_llm, populated_store):

        children = populated_store.get_children("root")
        mock_llm.return_value = "NONE"

        result = _llm_pick_best_child("unrelated fact", children, "http://localhost", "model")
        assert result is None

    @patch("src.jarvis.memory.graph_ops.call_llm_direct")
    def test_returns_none_for_empty_children(self, mock_llm):

        result = _llm_pick_best_child("fact", [], "http://localhost", "model")
        assert result is None
        mock_llm.assert_not_called()

    @patch("src.jarvis.memory.graph_ops.call_llm_direct")
    def test_returns_none_for_llm_failure(self, mock_llm, populated_store):

        children = populated_store.get_children("root")
        mock_llm.return_value = None

        result = _llm_pick_best_child("fact", children, "http://localhost", "model")
        assert result is None

    @patch("src.jarvis.memory.graph_ops.call_llm_direct")
    def test_handles_number_in_text(self, mock_llm, populated_store):

        children = populated_store.get_children("root")
        mock_llm.return_value = "I think option 1 is the best fit."

        result = _llm_pick_best_child("fact", children, "http://localhost", "model")
        assert result == children[0].id

    @patch("src.jarvis.memory.graph_ops.call_llm_direct")
    def test_handles_out_of_range_number(self, mock_llm, populated_store):

        children = populated_store.get_children("root")
        mock_llm.return_value = "99"

        result = _llm_pick_best_child("fact", children, "http://localhost", "model")
        assert result is None

    @patch("src.jarvis.memory.graph_ops.call_llm_direct")
    def test_uses_picker_model_when_provided(self, mock_llm, populated_store):
        # Behaviour: picker_model overrides the chat model for this classification-
        # shaped call, so placement runs on the small model without paging in the
        # big chat model. When absent, the chat model is used (backwards-compatible).
        children = populated_store.get_children("root")
        mock_llm.return_value = "1"

        _llm_pick_best_child(
            "fact", children, "http://localhost", "big-chat", picker_model="small-judge"
        )
        assert mock_llm.call_args.kwargs["chat_model"] == "small-judge"

        _llm_pick_best_child("fact", children, "http://localhost", "big-chat")
        assert mock_llm.call_args.kwargs["chat_model"] == "big-chat"


# ── find_best_node ─────────────────────────────────────────────────────


@pytest.mark.unit
class TestFindBestNode:
    """Tests for the three-entry-point traversal."""

    @patch("src.jarvis.memory.graph_ops._llm_pick_best_child")
    def test_matches_recent_node_first(self, mock_pick, populated_store):

        children = populated_store.get_children("root")
        music_node = [c for c in children if c.name == "Music"][0]
        # Touch Music so it appears in recent nodes
        populated_store.touch_node(music_node.id)

        # First call (recent nodes): return the music node
        mock_pick.return_value = music_node.id

        result = find_best_node(populated_store, "Likes jazz", "http://localhost", "model")
        assert result == music_node.id
        # Should only call once (matched on recent nodes)
        assert mock_pick.call_count == 1

    @patch("src.jarvis.memory.graph_ops._llm_pick_best_child")
    def test_falls_through_to_top_nodes(self, mock_pick, populated_store):

        children = populated_store.get_children("root")
        work_node = [c for c in children if c.name == "Work"][0]
        # Touch Work many times so it appears in top nodes
        for _ in range(5):
            populated_store.touch_node(work_node.id)

        # First call (recent): None. Second call (top): match work.
        mock_pick.side_effect = [None, work_node.id]

        result = find_best_node(populated_store, "Uses TypeScript", "http://localhost", "model")
        assert result == work_node.id

    @patch("src.jarvis.memory.graph_ops._llm_pick_best_child")
    def test_falls_through_to_root_traversal(self, mock_pick, populated_store):

        children = populated_store.get_children("root")
        health_node = [c for c in children if c.name == "Health"][0]

        # Recent: None, Top: skipped (all recent_ids overlap), Root children: pick Health
        mock_pick.side_effect = [None, health_node.id]

        result = find_best_node(populated_store, "Allergic to peanuts", "http://localhost", "model")
        assert result == health_node.id

    @patch("src.jarvis.memory.graph_ops._llm_pick_best_child")
    def test_writes_to_root_when_nothing_matches(self, mock_pick, populated_store):

        # Everything returns None — no match anywhere
        mock_pick.return_value = None

        result = find_best_node(populated_store, "Completely unrelated fact", "http://localhost", "model")
        assert result == "root"

    @patch("src.jarvis.memory.graph_ops._llm_pick_best_child")
    def test_empty_graph_writes_to_root(self, mock_pick, store):
        """With seeded branches under root but nothing else, an
        unclassified fact with no branch pin will try to pick among
        the seeded branches. If the picker declines all of them
        (returns None), traversal halts at root."""
        # Picker declines at every level so traversal breaks at root.
        mock_pick.return_value = None
        result = find_best_node(store, "First ever fact", "http://localhost", "model")
        assert result == "root"

    @patch("src.jarvis.memory.graph_ops._llm_pick_best_child")
    def test_branch_pin_skips_shortcut_entry_points(self, mock_pick, store):
        """When a branch is pinned, the recent / top shortcut entry
        points are skipped entirely — the fact descends only through
        the pinned branch's subtree. With an empty branch, that means
        the branch root itself is the write target, and the picker is
        never consulted."""
        mock_pick.return_value = None
        result = find_best_node(
            store, "Likes jazz music", "http://localhost", "model",
            branch_root_id="user",
        )
        assert result == "user"
        # The picker was never called because the User branch has no
        # children yet; descent terminated immediately at the branch root.
        mock_pick.assert_not_called()


# ── auto_split_node ────────────────────────────────────────────────────


@pytest.mark.unit
class TestAutoSplitNode:
    """Tests for the auto-split logic."""

    def _make_large_node(self, store, token_count=2000):
        """Create a node with data exceeding the split threshold."""
        # ~4 chars per token, so token_count * 4 chars
        data = "\n".join([f"Fact number {i}: some information here for padding" for i in range(token_count // 10)])
        node = store.create_node(
            name="Large Topic",
            description="A topic with lots of data",
            data=data,
            parent_id="root",
        )
        return node

    @patch("src.jarvis.memory.graph_ops.call_llm_direct")
    def test_successful_split(self, mock_llm, store):

        node = self._make_large_node(store)
        assert node.data_token_count > SPLIT_THRESHOLD

        mock_llm.return_value = json.dumps({
            "categories": [
                {"name": "Category A", "description": "First category", "facts": ["Fact 1", "Fact 2"]},
                {"name": "Category B", "description": "Second category", "facts": ["Fact 3", "Fact 4"]},
            ],
            "summary": "A topic covering categories A and B"
        })

        result = auto_split_node(store, node.id, "http://localhost", "model")
        assert result is True

        # Verify children were created
        children = store.get_children(node.id)
        assert len(children) == 2
        names = {c.name for c in children}
        assert "Category A" in names
        assert "Category B" in names

        # Verify parent data was cleared and description updated
        updated_parent = store.get_node(node.id)
        assert updated_parent.data == ""
        assert "categories A and B" in updated_parent.description

    @patch("src.jarvis.memory.graph_ops.call_llm_direct")
    def test_split_aborts_with_fewer_than_2_categories(self, mock_llm, store):

        node = self._make_large_node(store)

        mock_llm.return_value = json.dumps({
            "categories": [
                {"name": "Only One", "description": "Just one", "facts": ["All the facts"]},
            ],
            "summary": "Everything"
        })

        result = auto_split_node(store, node.id, "http://localhost", "model")
        assert result is False

        # Data should still be on the parent
        parent = store.get_node(node.id)
        assert parent.data != ""

    @patch("src.jarvis.memory.graph_ops.call_llm_direct")
    def test_split_aborts_on_llm_failure(self, mock_llm, store):

        node = self._make_large_node(store)
        mock_llm.return_value = None

        result = auto_split_node(store, node.id, "http://localhost", "model")
        assert result is False

    @patch("src.jarvis.memory.graph_ops.call_llm_direct")
    def test_split_aborts_on_malformed_json(self, mock_llm, store):

        node = self._make_large_node(store)
        mock_llm.return_value = "This is not JSON at all"

        result = auto_split_node(store, node.id, "http://localhost", "model")
        assert result is False

    def test_split_skips_below_threshold(self, store):

        node = store.create_node(name="Small", description="Tiny", data="Short data", parent_id="root")
        result = auto_split_node(store, node.id, "http://localhost", "model")
        assert result is False

    @patch("src.jarvis.memory.graph_ops.call_llm_direct")
    def test_split_aborts_on_category_missing_facts(self, mock_llm, store):

        node = self._make_large_node(store)
        mock_llm.return_value = json.dumps({
            "categories": [
                {"name": "Cat A", "description": "First", "facts": ["Fact 1"]},
                {"name": "Cat B", "description": "Second", "facts": []},
            ],
            "summary": "Summary"
        })

        result = auto_split_node(store, node.id, "http://localhost", "model")
        assert result is False


# ── append_to_node ─────────────────────────────────────────────────────


@pytest.mark.unit
class TestAppendToNode:
    """Tests for the append_to_node method on GraphMemoryStore."""

    def test_append_to_empty_node(self, store):
        node = store.create_node(name="Test", description="Test", data="", parent_id="root")
        exceeded = store.append_to_node(node.id, "First fact")
        updated = store.get_node(node.id)
        assert updated.data == "First fact"
        assert exceeded is False

    def test_append_to_existing_data(self, store):
        node = store.create_node(name="Test", description="Test", data="Existing", parent_id="root")
        store.append_to_node(node.id, "New fact")
        updated = store.get_node(node.id)
        assert "Existing" in updated.data
        assert "New fact" in updated.data
        assert "\n" in updated.data  # Separated by newline

    def test_returns_true_when_threshold_exceeded(self, store):
        # Create node with data just below threshold
        big_data = "x" * (SPLIT_THRESHOLD * 4 - 10)  # ~SPLIT_THRESHOLD tokens
        node = store.create_node(name="Big", description="Big", data=big_data, parent_id="root")
        exceeded = store.append_to_node(node.id, "More data that pushes it over")
        assert exceeded is True

    def test_returns_false_for_nonexistent_node(self, store):
        exceeded = store.append_to_node("nonexistent", "data")
        assert exceeded is False


@pytest.mark.unit
class TestNodeContainsFact:
    """Tests for GraphMemoryStore.node_contains_fact (dedupe primitive)."""

    def test_returns_false_for_empty_node(self, store):
        node = store.create_node(name="T", description="T", data="", parent_id="root")
        assert store.node_contains_fact(node.id, "anything") is False

    def test_returns_false_for_nonexistent_node(self, store):
        assert store.node_contains_fact("nope", "anything") is False

    def test_returns_false_for_empty_fact(self, store):
        node = store.create_node(name="T", description="T", data="hello", parent_id="root")
        assert store.node_contains_fact(node.id, "   ") is False

    def test_exact_line_match(self, store):
        node = store.create_node(
            name="T", description="T", data="Line A\nLine B", parent_id="root"
        )
        assert store.node_contains_fact(node.id, "Line A") is True
        assert store.node_contains_fact(node.id, "Line B") is True
        assert store.node_contains_fact(node.id, "Line C") is False

    def test_case_and_whitespace_insensitive(self, store):
        node = store.create_node(
            name="T", description="T", data="Justin Bieber is Canadian.", parent_id="root"
        )
        assert store.node_contains_fact(node.id, "justin bieber is canadian.") is True
        assert store.node_contains_fact(node.id, "  Justin   Bieber  is Canadian.  ") is True

    def test_turkish_dotted_i_folds(self, store):
        """Locale-naive .lower() returns the wrong key for Turkish İ; the
        store must use casefold + NFKC so İstanbul / i̇stanbul collide."""
        node = store.create_node(
            name="T", description="T", data="İstanbul is large.", parent_id="root"
        )
        assert store.node_contains_fact(node.id, "i̇stanbul is large.") is True

    def test_german_sharp_s_folds_to_ss(self, store):
        node = store.create_node(
            name="T", description="T", data="Straße", parent_id="root"
        )
        assert store.node_contains_fact(node.id, "strasse") is True

    def test_substring_is_not_a_match(self, store):
        """Dedupe is line-equality, not substring — avoid false positives."""
        node = store.create_node(
            name="T", description="T", data="Justin Bieber is Canadian.", parent_id="root"
        )
        assert store.node_contains_fact(node.id, "Justin Bieber") is False


# ── update_graph_from_dialogue (end-to-end) ────────────────────────────


@pytest.mark.unit
class TestUpdateGraphFromDialogue:
    """End-to-end tests for the orchestrator function."""

    @patch("src.jarvis.memory.graph_ops.call_llm_direct")
    def test_full_flow_extracts_and_stores(self, mock_llm, store):
        """End-to-end: extraction emits branch-tagged facts, the
        orchestrator pins traversal to each fact's branch, and the
        fact lands inside that branch's subtree. Because the fixed
        branches are seeded at store creation and the branch subtree
        is empty on a fresh store, each fact writes to the branch
        root node directly."""
        # First call: extraction. With empty branches, no LLM calls are
        # needed for traversal — find_best_node goes straight to the
        # branch root because it has no children.
        mock_llm.return_value = (
            '[{"branch": "USER", "fact": "Likes jazz music"},'
            ' {"branch": "WORLD", "fact": "Acme Corp is based in London"}]'
        )

        result = update_graph_from_dialogue(
            store=store,
            summary="User likes jazz; Acme Corp is in London",
            ollama_base_url="http://localhost",
            ollama_chat_model="model",
        )

        assert len(result.stored) == 2
        assert result.skipped == 0
        for fact, node_name in result.stored:
            assert isinstance(fact, str) and fact
            assert isinstance(node_name, str) and node_name

        user_node = store.get_node("user")
        world_node = store.get_node("world")
        assert user_node is not None and "jazz" in user_node.data
        assert world_node is not None and "Acme" in world_node.data
        # The un-classified facts should NOT have landed on the root
        # itself — the branch pinning keeps them inside their subtree.
        root = store.get_node("root")
        assert "jazz" not in root.data
        assert "Acme" not in root.data

    @patch("src.jarvis.memory.graph_ops.call_llm_direct")
    def test_no_facts_extracted(self, mock_llm, store):

        mock_llm.return_value = "[]"

        result = update_graph_from_dialogue(
            store=store,
            summary="User said hello and asked about the weather",
            ollama_base_url="http://localhost",
            ollama_chat_model="model",
        )

        assert result.stored == []
        assert result.skipped == 0

    @patch("src.jarvis.memory.graph_ops.call_llm_direct")
    def test_extraction_failure_returns_zero(self, mock_llm, store):

        mock_llm.return_value = None

        result = update_graph_from_dialogue(
            store=store,
            summary="summary",
            ollama_base_url="http://localhost",
            ollama_chat_model="model",
        )

        assert result.stored == []
        assert result.skipped == 0

    @patch("src.jarvis.memory.graph_ops.call_llm_direct")
    def test_skips_duplicate_facts_on_second_flush(self, mock_llm, store):
        """Re-extracting the same fact from a growing daily summary must
        not duplicate it in the graph.

        Mirrors production: two diary flushes in quick succession both
        extract the same fact from the cumulative summary. The second
        flush should be a no-op for the graph, not a duplicate append.
        """
        # First flush: branch root has no children, so extraction is the
        # only LLM call needed.
        mock_llm.return_value = (
            '[{"branch": "WORLD", "fact": "Justin Bieber is a Canadian singer."}]'
        )
        result1 = update_graph_from_dialogue(
            store=store,
            summary="User asked about Justin Bieber.",
            ollama_base_url="http://localhost",
            ollama_chat_model="model",
        )
        assert len(result1.stored) == 1
        assert result1.skipped == 0

        # Second flush: same fact re-extracted, should be deduped.
        mock_llm.return_value = (
            '[{"branch": "WORLD", "fact": "Justin Bieber is a Canadian singer."}]'
        )
        result2 = update_graph_from_dialogue(
            store=store,
            summary="User asked about Justin Bieber.",
            ollama_base_url="http://localhost",
            ollama_chat_model="model",
        )
        assert result2.stored == [], "duplicate fact should not be reported as learned"
        assert result2.skipped == 1, "duplicate must be counted so the CLI can still log it"

        world = store.get_node("world")
        assert world.data.count("Justin Bieber") == 1

    @patch("src.jarvis.memory.graph_ops.call_llm_direct")
    def test_dedupe_handles_non_latin_case_folding(self, mock_llm, store):
        """Locale-safe folding: Turkish İ/i̇ and German ß/ss collapse to the
        same dedupe key. Python's ``str.lower`` would miss these cases —
        the store uses ``casefold`` + NFKC instead."""
        mock_llm.return_value = (
            '[{"branch": "WORLD", "fact": "İstanbul is the largest city in Turkey."}]'
        )
        update_graph_from_dialogue(
            store=store,
            summary="s",
            ollama_base_url="http://localhost",
            ollama_chat_model="model",
        )

        mock_llm.return_value = (
            '[{"branch": "WORLD", "fact": "i̇stanbul is the largest city in turkey."}]'
        )
        result = update_graph_from_dialogue(
            store=store,
            summary="s",
            ollama_base_url="http://localhost",
            ollama_chat_model="model",
        )
        assert result.stored == [], "Turkish İ/i̇ variants should dedupe"
        assert result.skipped == 1

        mock_llm.return_value = (
            '[{"branch": "WORLD", "fact": "Straße names are ordered alphabetically."}]'
        )
        update_graph_from_dialogue(
            store=store,
            summary="s",
            ollama_base_url="http://localhost",
            ollama_chat_model="model",
        )

        mock_llm.return_value = (
            '[{"branch": "WORLD", "fact": "strasse names are ordered alphabetically."}]'
        )
        result = update_graph_from_dialogue(
            store=store,
            summary="s",
            ollama_base_url="http://localhost",
            ollama_chat_model="model",
        )
        assert result.stored == [], "German ß should casefold to ss for dedupe"
        assert result.skipped == 1

    @patch("src.jarvis.memory.graph_ops._llm_pick_best_child")
    @patch("src.jarvis.memory.graph_ops.call_llm_direct")
    def test_dedupe_on_child_after_split(self, mock_llm, mock_pick, store):
        """Dedupe must trigger on whichever node traversal lands on, not
        only on the branch root. Pre-populate a child of ``world`` with a
        fact, force the picker to descend into it, then re-extract the
        same fact and assert no duplicate append."""
        child = store.create_node(
            name="Music",
            description="Musicians, bands, songs.",
            data="Justin Bieber is a Canadian singer.",
            parent_id="world",
        )

        # Force the picker to descend into the Music child on every call.
        mock_pick.return_value = child.id

        mock_llm.return_value = (
            '[{"branch": "WORLD", "fact": "Justin Bieber is a Canadian singer."}]'
        )
        result = update_graph_from_dialogue(
            store=store,
            summary="User asked about Justin Bieber.",
            ollama_base_url="http://localhost",
            ollama_chat_model="model",
        )

        assert result.stored == [], "duplicate on a child node should still dedupe"
        assert result.skipped == 1
        refreshed = store.get_node(child.id)
        assert refreshed.data.count("Justin Bieber is a Canadian singer.") == 1


# ── Merge (rewrite-on-write consolidation) ────────────────────────────


@pytest.mark.unit
class TestMergeNodeData:
    """merge_node_data rewrites a node's data via an LLM consolidation pass."""

    @patch("src.jarvis.memory.graph_ops.call_llm_direct")
    def test_rewrites_node_with_consolidated_facts(self, mock_llm, store):
        node = store.create_node(
            name="Test",
            description="d",
            data="User likes coffee.\nUser is from Hackney.\nUser drives a Tesla.",
            parent_id="user",
        )
        new_fact = "User dislikes coffee and prefers cycling over driving."
        mock_llm.return_value = (
            '{"facts": ["' + new_fact + '", "User is from Hackney."]}'
        )

        result = merge_node_data(
            store=store,
            node_id=node.id,
            new_facts=[new_fact],
            ollama_base_url="http://localhost",
            ollama_chat_model="model",
        )

        assert result.success is True
        assert result.incorporated_indices == [0]
        refreshed = store.get_node(node.id)
        assert "User dislikes coffee" in refreshed.data
        assert "User likes coffee." not in refreshed.data
        assert "User is from Hackney." in refreshed.data

    @patch("src.jarvis.memory.graph_ops.call_llm_direct")
    def test_empty_node_skips_llm(self, mock_llm, store):
        node = store.create_node(name="T", description="d", data="", parent_id="user")

        result = merge_node_data(
            store=store,
            node_id=node.id,
            new_facts=["any"],
            ollama_base_url="http://localhost",
            ollama_chat_model="model",
        )

        assert result.success is False
        mock_llm.assert_not_called()

    @patch("src.jarvis.memory.graph_ops.call_llm_direct")
    def test_llm_failure_leaves_node_untouched(self, mock_llm, store):
        node = store.create_node(
            name="T", description="d", data="Existing fact.", parent_id="user",
        )
        mock_llm.return_value = None

        result = merge_node_data(
            store=store,
            node_id=node.id,
            new_facts=["any"],
            ollama_base_url="http://localhost",
            ollama_chat_model="model",
        )

        assert result.success is False
        assert store.get_node(node.id).data == "Existing fact."

    @patch("src.jarvis.memory.graph_ops.call_llm_direct")
    def test_unparseable_response_leaves_node_untouched(self, mock_llm, store):
        node = store.create_node(
            name="T", description="d", data="Existing fact.", parent_id="user",
        )
        mock_llm.return_value = "no json here"

        result = merge_node_data(
            store=store,
            node_id=node.id,
            new_facts=["any"],
            ollama_base_url="http://localhost",
            ollama_chat_model="model",
        )

        assert result.success is False
        assert store.get_node(node.id).data == "Existing fact."

    @patch("src.jarvis.memory.graph_ops.call_llm_direct")
    def test_empty_rewrite_treated_as_failure(self, mock_llm, store):
        """A non-empty existing payload should never collapse to nothing.
        Treat empty-list rewrites as suspect and refuse to wipe the node."""
        node = store.create_node(
            name="T", description="d", data="A.\nB.", parent_id="user",
        )
        mock_llm.return_value = '{"facts": []}'

        result = merge_node_data(
            store=store,
            node_id=node.id,
            new_facts=["C"],
            ollama_base_url="http://localhost",
            ollama_chat_model="model",
        )

        assert result.success is False
        assert store.get_node(node.id).data == "A.\nB."

    @patch("src.jarvis.memory.graph_ops.call_llm_direct")
    def test_non_string_facts_filtered(self, mock_llm, store):
        node = store.create_node(
            name="T", description="d", data="A.", parent_id="user",
        )
        mock_llm.return_value = (
            '{"facts": ["Kept fact.", 42, null, "  ", "Another kept."]}'
        )

        result = merge_node_data(
            store=store,
            node_id=node.id,
            new_facts=["x"],
            ollama_base_url="http://localhost",
            ollama_chat_model="model",
        )

        assert result.success is True
        assert store.get_node(node.id).data == "Kept fact.\nAnother kept."

    @patch("src.jarvis.memory.graph_ops.call_llm_direct")
    def test_hallucination_guard_rejects_oversized_rewrite(self, mock_llm, store):
        """Consolidation rules can shrink or hold but should never grow
        the node beyond `existing + new + small slack`. Reject rewrites
        that explode in size — they mean the model invented content."""
        node = store.create_node(
            name="T", description="d", data="One existing fact.", parent_id="user",
        )
        # 1 existing + 1 new + slack(2) = cap of 4. Return 8 facts.
        bogus = '{"facts": [' + ", ".join(f'"Invented {i}."' for i in range(8)) + "]}"
        mock_llm.return_value = bogus

        result = merge_node_data(
            store=store,
            node_id=node.id,
            new_facts=["A new fact."],
            ollama_base_url="http://localhost",
            ollama_chat_model="model",
        )

        assert result.success is False
        assert store.get_node(node.id).data == "One existing fact."

    @patch("src.jarvis.memory.graph_ops.call_llm_direct")
    def test_incorporated_indices_track_each_new_fact(self, mock_llm, store):
        """When a batch contains multiple new facts and the rewrite
        consolidates one of them out, the result should list only the
        indices that survived. Caller uses this to avoid reporting
        merged-out facts as 'newly stored'."""
        node = store.create_node(
            name="T", description="d", data="Old A.", parent_id="user",
        )
        # New facts at indices 0 and 1. Rewrite keeps only the first.
        mock_llm.return_value = '{"facts": ["Fresh One.", "Old A."]}'

        result = merge_node_data(
            store=store,
            node_id=node.id,
            new_facts=["Fresh One.", "Fresh Two."],
            ollama_base_url="http://localhost",
            ollama_chat_model="model",
        )

        assert result.success is True
        assert result.incorporated_indices == [0]

    @patch("src.jarvis.memory.graph_ops.call_llm_direct")
    def test_empty_new_facts_runs_self_consolidation(self, mock_llm, store):
        """Calling with new_facts=[] should still hit the LLM and run a
        consolidation pass over the existing data alone — the migration
        path for nodes that accumulated contradictions before merge-on-
        write landed."""
        node = store.create_node(
            name="T",
            description="d",
            data="User has a need for X.\nUser does not have a need for X.",
            parent_id="user",
        )
        mock_llm.return_value = '{"facts": ["User does not have a need for X."]}'

        result = merge_node_data(
            store=store,
            node_id=node.id,
            new_facts=[],
            ollama_base_url="http://localhost",
            ollama_chat_model="model",
        )

        assert result.success is True
        assert result.incorporated_indices == []
        assert store.get_node(node.id).data == "User does not have a need for X."

    @patch("src.jarvis.memory.graph_ops.call_llm_direct")
    def test_extracts_facts_object_from_markdown_fenced_response(self, mock_llm, store):
        """Tighter regex must still pull the object out when the model
        wraps it in a markdown code fence."""
        node = store.create_node(
            name="T", description="d", data="Old.", parent_id="user",
        )
        mock_llm.return_value = (
            '```json\n{"facts": ["New."]}\n```'
        )

        result = merge_node_data(
            store=store,
            node_id=node.id,
            new_facts=["New."],
            ollama_base_url="http://localhost",
            ollama_chat_model="model",
        )

        assert result.success is True
        assert "New." in store.get_node(node.id).data

    @patch("src.jarvis.memory.graph_ops.call_llm_direct")
    def test_hallucination_guard_boundary_pins_to_slack_constant(self, mock_llm, store):
        """The guard's cap is `existing + new + _MERGE_GROWTH_SLACK`.
        Pin both sides of the boundary against the named constant so a
        future tweak to the slack can't silently drift the guard."""
        from src.jarvis.memory.graph_ops import (
            _MERGE_GROWTH_SLACK,
            _split_data_lines,
        )

        existing_data = "E1.\nE2."
        node = store.create_node(
            name="T", description="d", data=existing_data, parent_id="user",
        )
        # Derive `existing_count` via the same helper production uses
        # so the boundary math can't drift if the parsing rule changes.
        existing_count = len(_split_data_lines(existing_data))
        new_facts = ["N1."]
        cap = existing_count + len(new_facts) + _MERGE_GROWTH_SLACK

        # At the cap → accepted.
        at_cap = '{"facts": [' + ", ".join(f'"L{i}."' for i in range(cap)) + "]}"
        mock_llm.return_value = at_cap
        result = merge_node_data(
            store=store, node_id=node.id, new_facts=new_facts,
            ollama_base_url="http://localhost", ollama_chat_model="model",
        )
        assert result.success is True

        # One over the cap → rejected.
        node2 = store.create_node(
            name="T2", description="d", data="E1.\nE2.", parent_id="user",
        )
        over_cap = '{"facts": [' + ", ".join(f'"L{i}."' for i in range(cap + 1)) + "]}"
        mock_llm.return_value = over_cap
        result = merge_node_data(
            store=store, node_id=node2.id, new_facts=new_facts,
            ollama_base_url="http://localhost", ollama_chat_model="model",
        )
        assert result.success is False

    @patch("src.jarvis.memory.graph_ops.call_llm_direct")
    def test_incorporated_indices_tolerant_to_trailing_punctuation(self, mock_llm, store):
        """Picker models routinely drop the trailing full stop when
        rewriting facts ("X." → "X"). A strict normalise_fact match
        would then return `incorporated_indices=[]` even when the
        fact clearly landed, and the orchestrator would silently
        under-report every batched flush as '0 stored'. Pin the
        tolerant match against this exact rephrasing."""
        node = store.create_node(
            name="T", description="d", data="Old.", parent_id="user",
        )
        # Picker drops the trailing period from the new fact.
        mock_llm.return_value = '{"facts": ["The user has a dog"]}'

        result = merge_node_data(
            store=store,
            node_id=node.id,
            new_facts=["The user has a dog."],
            ollama_base_url="http://localhost",
            ollama_chat_model="model",
        )

        assert result.success is True
        assert result.incorporated_indices == [0], (
            "Trailing-period rephrasing must still count as incorporation."
        )

    @patch("src.jarvis.memory.graph_ops.call_llm_direct")
    def test_prompt_body_matches_parsed_line_count(self, mock_llm, store):
        """The CURRENT facts block sent to the picker must contain
        exactly the lines `_split_data_lines` produced — blank lines
        and whitespace-only lines stripped from both signals
        consistently. Locks the round-6 consolidation that made the
        helper the sole parser."""
        node = store.create_node(
            name="T",
            description="d",
            # Mid-blob blank line + a whitespace-only line. The old
            # `node.data.strip()` path would have left these in the
            # prompt body while the parsed list dropped them.
            data="A.\n\n  \nB.",
            parent_id="user",
        )
        mock_llm.return_value = '{"facts": ["A.", "B."]}'

        merge_node_data(
            store=store,
            node_id=node.id,
            new_facts=[],
            ollama_base_url="http://localhost",
            ollama_chat_model="model",
        )

        sent_user_content = mock_llm.call_args.kwargs["user_content"]
        assert "CURRENT facts on the node" in sent_user_content
        assert "A.\nB." in sent_user_content
        # The dropped blank/whitespace lines must not survive into the prompt.
        assert "A.\n\n" not in sent_user_content
        assert "  \n" not in sent_user_content

    @patch("src.jarvis.memory.graph_ops.call_llm_direct")
    def test_extracts_object_with_braces_inside_fact_strings(self, mock_llm, store):
        """A fact whose text contains literal `{` or `}` must still
        parse — `raw_decode` handles balanced nesting that a
        `[^{}]`-scoped regex would have refused to match."""
        node = store.create_node(
            name="T", description="d", data="Old.", parent_id="user",
        )
        mock_llm.return_value = (
            'preamble {"facts": ["User uses {placeholder} syntax in templates."]} trailing'
        )

        result = merge_node_data(
            store=store,
            node_id=node.id,
            new_facts=["User uses {placeholder} syntax in templates."],
            ollama_base_url="http://localhost",
            ollama_chat_model="model",
        )

        assert result.success is True
        assert "{placeholder}" in store.get_node(node.id).data


@pytest.mark.unit
class TestMergeSystemPromptInvariants:
    """Pin the rule set the merge prompt must teach. Behaviour against a
    real picker model is covered by the merge_consolidation evals; this
    catches a future edit that silently drops a rule from the system
    prompt's text. Each rule is referenced at least once below."""

    def test_prompt_lists_supersession_rule(self):
        from src.jarvis.memory.graph_ops import _MERGE_SYSTEM_PROMPT
        assert "CONTRADICTION" in _MERGE_SYSTEM_PROMPT

    def test_prompt_lists_dedupe_rule(self):
        from src.jarvis.memory.graph_ops import _MERGE_SYSTEM_PROMPT
        assert "DUPLICATION" in _MERGE_SYSTEM_PROMPT

    def test_prompt_lists_consolidation_rule(self):
        from src.jarvis.memory.graph_ops import _MERGE_SYSTEM_PROMPT
        assert "CONSOLIDATION" in _MERGE_SYSTEM_PROMPT

    def test_prompt_lists_independence_rule(self):
        from src.jarvis.memory.graph_ops import _MERGE_SYSTEM_PROMPT
        assert "INDEPENDENCE" in _MERGE_SYSTEM_PROMPT

    def test_prompt_lists_pruning_rule(self):
        from src.jarvis.memory.graph_ops import _MERGE_SYSTEM_PROMPT
        assert "PRUNING" in _MERGE_SYSTEM_PROMPT

    def test_prompt_lists_meta_narrative_rule_with_assistant_examples(self):
        """The META-NARRATIVE rule must be present and must give the
        picker model concrete examples of the verb forms to drop. The
        bug it exists to fix was a 'The assistant is unable to ...'
        line surviving consolidate-all sweeps because no rule covered
        capability denials. If the rule label or its trigger phrasings
        get edited away, this test fails. Scoped to the rule's own
        section (META-NARRATIVE up to the next numbered rule) so the
        assertions can't be satisfied by unrelated text elsewhere in
        the prompt."""
        from src.jarvis.memory.graph_ops import _MERGE_SYSTEM_PROMPT
        assert "META-NARRATIVE" in _MERGE_SYSTEM_PROMPT
        rule_start = _MERGE_SYSTEM_PROMPT.index("META-NARRATIVE")
        # Bound the section by the next numbered rule (e.g. '\n7. ')
        # OR the response-format trailer ('\nRespond with ...') that
        # follows the rule list. The trailer fallback matters when
        # META-NARRATIVE is the LAST numbered rule — without it the
        # section would balloon to include the JSON schema text and
        # the in-section keyword checks could pass on a future prompt
        # that no longer mentions those keywords inside the rule
        # itself.
        end_pattern = re.search(
            r"\n\d+\. |\nRespond with\b",
            _MERGE_SYSTEM_PROMPT[rule_start:],
        )
        rule_end = rule_start + (
            end_pattern.start() if end_pattern else len(_MERGE_SYSTEM_PROMPT) - rule_start
        )
        section = _MERGE_SYSTEM_PROMPT[rule_start:rule_end]
        # The two shapes the bug report surfaced explicitly must be
        # named in this rule's section, not just somewhere else.
        assert "The assistant" in section
        assert "unable to" in section
        # Counter-protection: the rule must not over-prune real
        # directives, so an exception clause is required in-section.
        assert "directive" in section.lower()


@pytest.mark.unit
class TestConsolidateAllPopulatedNodes:
    """consolidate_all_populated_nodes runs a self-merge pass on every
    populated node. Migration path for the contradiction backlog."""

    @patch("src.jarvis.memory.graph_ops.call_llm_direct")
    def test_walks_only_populated_nodes(self, mock_llm, store):
        # Two populated nodes + one empty node + the seeded branch roots.
        store.create_node(
            name="A", description="d",
            data="Line 1.\nContradicts line 1.", parent_id="user",
        )
        store.create_node(
            name="B", description="d",
            data="Line X.\nDuplicate of line X.", parent_id="world",
        )
        store.create_node(name="Empty", description="d", data="", parent_id="user")

        # Two LLM calls expected (one per populated node).
        mock_llm.side_effect = [
            '{"facts": ["Line 1."]}',
            '{"facts": ["Line X."]}',
        ]

        results = list(consolidate_all_populated_nodes(
            store=store,
            ollama_base_url="http://localhost",
            ollama_chat_model="model",
        ))

        names = {n for n, _, _ in results}
        assert "A" in names and "B" in names
        assert "Empty" not in names
        assert mock_llm.call_count == 2
        # Each consolidated node shrank from 2 lines to 1.
        for _, before, after in results:
            assert before == 2
            assert after == 1

    @patch("src.jarvis.memory.graph_ops.call_llm_direct")
    def test_failure_per_node_does_not_abort_the_rest(self, mock_llm, store):
        store.create_node(name="A", description="d", data="X.", parent_id="user")
        store.create_node(name="B", description="d", data="Y.", parent_id="world")

        # First node's LLM returns junk → fail-open. Second succeeds.
        mock_llm.side_effect = ["garbage", '{"facts": ["Y."]}']

        results = list(consolidate_all_populated_nodes(
            store=store,
            ollama_base_url="http://localhost",
            ollama_chat_model="model",
        ))

        assert len(results) == 2
        # Both nodes still have their data — fail-open leaves untouched.
        names = {n for n, _, _ in results}
        assert names == {"A", "B"}

    @patch("src.jarvis.memory.graph_ops.call_llm_direct")
    def test_yields_per_node_for_streaming(self, mock_llm, store):
        """The op must be a generator that yields each result as the
        walk progresses — buffering the whole sweep before yielding
        defeats the streaming NDJSON endpoint that wraps it."""
        store.create_node(name="A", description="d", data="A.", parent_id="user")
        store.create_node(name="B", description="d", data="B.", parent_id="world")
        mock_llm.side_effect = ['{"facts": ["A."]}', '{"facts": ["B."]}']

        gen = consolidate_all_populated_nodes(
            store=store,
            ollama_base_url="http://localhost",
            ollama_chat_model="model",
        )

        # First call only triggers one LLM hit (the first node), which
        # proves the second node hasn't been processed yet.
        first = next(gen)
        assert mock_llm.call_count == 1
        assert first[0] in {"A", "B"}

        # Draining the generator runs the rest.
        rest = list(gen)
        assert len(rest) == 1
        assert mock_llm.call_count == 2


@pytest.mark.unit
class TestUpdateGraphMerge:
    """update_graph_from_dialogue runs the merge pass on populated nodes."""

    @patch("src.jarvis.memory.graph_ops.call_llm_direct")
    def test_contradiction_replaces_old_fact_via_merge(self, mock_llm, store):
        """Regression: 'user does not need a daily check-in' should
        replace the prior 'user has a need for a daily check-in' line
        on the User branch root via the merge rewrite, not coexist."""
        store.update_node(
            "user",
            data="The user has a need for a simple daily check-in system.",
        )

        # Two LLM calls: extraction then merge.
        mock_llm.side_effect = [
            '[{"branch": "USER", "fact": "The user does not need a daily check-in system."}]',
            '{"facts": ["The user does not need a daily check-in system."]}',
        ]

        result = update_graph_from_dialogue(
            store=store,
            summary="User clarified they do not need a check-in.",
            ollama_base_url="http://localhost",
            ollama_chat_model="model",
        )

        stored = result.stored
        assert len(stored) == 1
        user_data = store.get_node("user").data
        assert "does not need" in user_data
        assert "has a need for" not in user_data

    @patch("src.jarvis.memory.graph_ops.call_llm_direct")
    def test_merge_failure_falls_back_to_append(self, mock_llm, store):
        """A flaky merge LLM must not block the write — the fact still
        lands via plain append so we never lose data on transient
        failures."""
        store.update_node("user", data="Existing line.")

        mock_llm.side_effect = [
            '[{"branch": "USER", "fact": "Brand new fact."}]',
            "garbage with no json",
        ]

        result = update_graph_from_dialogue(
            store=store,
            summary="s",
            ollama_base_url="http://localhost",
            ollama_chat_model="model",
        )

        stored = result.stored
        assert len(stored) == 1
        data = store.get_node("user").data
        assert "Existing line." in data
        assert "Brand new fact." in data

    @patch("src.jarvis.memory.graph_ops.call_llm_direct")
    def test_cold_start_skips_merge_llm_call(self, mock_llm, store):
        """When the chosen node has no data, the merge pass should
        short-circuit (no LLM call) and the fact lands via plain
        append — keeps cold-start writes cheap."""
        # Only the extraction call should hit the LLM.
        mock_llm.return_value = (
            '[{"branch": "WORLD", "fact": "Acme Corp is based in London."}]'
        )

        result = update_graph_from_dialogue(
            store=store,
            summary="s",
            ollama_base_url="http://localhost",
            ollama_chat_model="model",
        )

        stored = result.stored
        assert len(stored) == 1
        assert "Acme Corp" in store.get_node("world").data
        # Exactly one LLM call: extraction. Empty branch root means the
        # picker is skipped (no children) and the merge step short-
        # circuits before hitting the LLM.
        assert mock_llm.call_count == 1


# ── Warm profile helpers ──────────────────────────────────────────────


@pytest.mark.unit
class TestBuildWarmProfile:
    """build_warm_profile reads User + Directives branches."""

    def test_empty_graph_returns_empty_sections(self, store):
        profile = build_warm_profile(store)
        assert profile == {"user": "", "directives": ""}

    def test_collects_user_branch_only(self, store):
        store.create_node(
            name="Identity",
            description="Who the user is",
            data="User's name is Baris.",
            parent_id=BRANCH_USER,
        )
        profile = build_warm_profile(store)
        assert "Baris" in profile["user"]
        assert profile["directives"] == ""

    def test_collects_directives_branch_only(self, store):
        store.create_node(
            name="Tone",
            description="Reply style",
            data="Always reply briefly.",
            parent_id=BRANCH_DIRECTIVES,
        )
        profile = build_warm_profile(store)
        assert "briefly" in profile["directives"]
        assert profile["user"] == ""

    def test_ignores_world_branch(self, store):
        store.create_node(
            name="News",
            description="External fact",
            data="Paris is the capital of France.",
            parent_id=BRANCH_WORLD,
        )
        profile = build_warm_profile(store)
        assert profile["user"] == ""
        assert profile["directives"] == ""

    def test_respects_char_caps(self, store):
        long_fact = "x" * 5000
        store.create_node(
            name="Long", description="d", data=long_fact, parent_id=BRANCH_USER,
        )
        profile = build_warm_profile(store, user_max_chars=200)
        assert len(profile["user"]) <= 200
        assert profile["user"].endswith("…")

    def test_walks_branch_subtree(self, store):
        child = store.create_node(
            name="Sub", description="child of user",
            data="User lives in Brighton.", parent_id=BRANCH_USER,
        )
        store.create_node(
            name="Grandchild", description="deeper",
            data="User moved in 2023.", parent_id=child.id,
        )
        profile = build_warm_profile(store)
        assert "Brighton" in profile["user"]
        assert "2023" in profile["user"]


@pytest.mark.unit
class TestFormatWarmProfileBlock:
    """format_warm_profile_block uses denial-template mirroring."""

    def test_empty_profile_returns_empty_string(self):
        assert format_warm_profile_block({"user": "", "directives": ""}) == ""

    def test_user_only_omits_directives_heading(self):
        out = format_warm_profile_block({"user": "Name is Baris.", "directives": ""})
        assert "INFORMATION THE USER HAS SHARED" in out
        assert "STANDING INSTRUCTIONS" not in out
        assert "Baris" in out

    def test_directives_only_omits_user_heading(self):
        out = format_warm_profile_block({"user": "", "directives": "Reply briefly."})
        assert "STANDING INSTRUCTIONS" in out
        assert "INFORMATION THE USER HAS SHARED" not in out
        assert "briefly" in out

    def test_both_sections_rendered(self):
        out = format_warm_profile_block(
            {"user": "Name is Baris.", "directives": "Reply briefly."}
        )
        assert "INFORMATION THE USER HAS SHARED" in out
        assert "STANDING INSTRUCTIONS" in out
        # User section appears before directives
        assert out.index("INFORMATION THE USER") < out.index("STANDING INSTRUCTIONS")

    def test_whitespace_only_treated_as_empty(self):
        assert format_warm_profile_block({"user": "   \n", "directives": "\t"}) == ""