Add Discord-native hybrid front-end for Jarvis (bot + bridge)
Some checks failed
Release / semantic-release (push) Successful in 59s
tests / Unit tests (Linux, Python 3.11) (push) Successful in 13m45s
Release / build-linux (push) Failing after 7m47s
Release / build-windows (push) Has been cancelled
Release / build-macos (arm64, macos-latest) (push) Has been cancelled
Release / build-macos (x64, macos-15-intel) (push) Has been cancelled
Release / release-main (push) Has been cancelled
Release / release-develop (push) Has been cancelled
Some checks failed
Release / semantic-release (push) Successful in 59s
tests / Unit tests (Linux, Python 3.11) (push) Successful in 13m45s
Release / build-linux (push) Failing after 7m47s
Release / build-windows (push) Has been cancelled
Release / build-macos (arm64, macos-latest) (push) Has been cancelled
Release / build-macos (x64, macos-15-intel) (push) Has been cancelled
Release / release-main (push) Has been cancelled
Release / release-develop (push) Has been cancelled
Transform isair/jarvis into a Discord-controlled voice assistant running on the Ubuntu VNC desktop, keeping the mature ~39k-line Python brain intact. - bot/ (Node + bun, discord.js): /자비스 slash commands (ephemeral), voice channel join + voice receive/playback, pluggable VNC screen broadcast (selfbot live / noVNC / screenshot) - bridge/ (Python, Flask): wraps jarvis STT + run_reply_engine + Piper TTS behind a thin localhost HTTP API - .env.example, scripts/ (start_bridge/start_bot/dev), README rewrite, docs/language-comparison.md and docs/vnc-xfce-setup.md Language decision: hybrid (Python brain + Node/bun Discord layer) because Discord blocks bot video; native screen broadcast only works via a Node selfbot library.
This commit is contained in:
192
tests/test_memory_viewer_diary_scrub_api.py
Normal file
192
tests/test_memory_viewer_diary_scrub_api.py
Normal file
@@ -0,0 +1,192 @@
|
||||
"""Tests for the diary scrub HTTP endpoint.
|
||||
|
||||
The endpoint streams NDJSON, and the contract under test is:
|
||||
1. it walks every diary row and writes back rewritten text;
|
||||
2. event payloads contain only counts, never raw summary text — the diary
|
||||
clean button must not become a data-exfiltration channel through the
|
||||
streaming progress UI.
|
||||
|
||||
The endpoint is now backed by an LLM rewrite (the chat model is asked to
|
||||
remove deflection narration from each row). Tests stub the LLM so they
|
||||
stay deterministic and offline.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import json
|
||||
|
||||
import pytest
|
||||
|
||||
try:
|
||||
import flask # noqa: F401
|
||||
|
||||
_HAS_FLASK = True
|
||||
except ImportError:
|
||||
_HAS_FLASK = False
|
||||
|
||||
|
||||
@pytest.mark.unit
|
||||
@pytest.mark.skipif(not _HAS_FLASK, reason="Flask not available")
|
||||
class TestDiaryScrubEndpoint:
|
||||
@pytest.fixture(autouse=True)
|
||||
def setup_app(self, tmp_path, monkeypatch):
|
||||
# Import via the same module paths the endpoint itself uses
|
||||
# (no ``src.`` prefix). With both repo-root and ``src/`` on
|
||||
# ``sys.path`` (see ``tests/conftest.py``), ``src.jarvis.x`` and
|
||||
# ``jarvis.x`` resolve to distinct module instances and a
|
||||
# monkeypatch on one does not land on the other.
|
||||
from desktop_app import memory_viewer
|
||||
import jarvis.memory.conversation as cmod
|
||||
from jarvis.memory.db import Database
|
||||
|
||||
db_path = str(tmp_path / "test.db")
|
||||
# Seed before the endpoint opens its own connection — the
|
||||
# endpoint's Database instance reads the same file.
|
||||
seed_db = Database(db_path)
|
||||
for date_utc, summary in [
|
||||
(
|
||||
"2026-04-10",
|
||||
"The user asked to open YouTube. The assistant explained it could not open applications.",
|
||||
),
|
||||
(
|
||||
"2026-04-15",
|
||||
"The user prefers Celsius. The user lives in Hackney.",
|
||||
),
|
||||
(
|
||||
"2026-04-27",
|
||||
"The user asked about a restaurant. The assistant did not have specific information.",
|
||||
),
|
||||
]:
|
||||
seed_db.upsert_conversation_summary(
|
||||
date_utc=date_utc, summary=summary, topics=None, source_app="jarvis",
|
||||
)
|
||||
|
||||
# Make the endpoint use the seeded path.
|
||||
monkeypatch.setattr(memory_viewer, "_get_db_path", lambda: db_path)
|
||||
|
||||
# Stub the LLM rewrite call. The fake model returns a text with the
|
||||
# known-bad sentences stripped out and everything else verbatim.
|
||||
# This keeps the endpoint test deterministic; the rewrite logic
|
||||
# itself is exercised in tests/test_diary_rewrite_sweep.py.
|
||||
def fake_rewrite(base_url, model, system_prompt, user_prompt, **kwargs):
|
||||
# The user prompt is the diary text wrapped in untrusted-input
|
||||
# fence markers — strip them to recover the original.
|
||||
text = user_prompt
|
||||
for marker in ("<<<BEGIN UNTRUSTED WEB EXTRACT>>>", "<<<END UNTRUSTED WEB EXTRACT>>>"):
|
||||
text = text.replace(marker, "")
|
||||
text = text.replace("Return the cleaned text only.", "").strip()
|
||||
# Drop any sentence containing "the assistant".
|
||||
sentences = [s.strip() for s in text.split(".") if s.strip()]
|
||||
kept = [s for s in sentences if "the assistant" not in s.lower()]
|
||||
return ". ".join(kept) + ("." if kept else "")
|
||||
|
||||
monkeypatch.setattr(cmod, "call_llm_direct", fake_rewrite)
|
||||
|
||||
memory_viewer.app.config["TESTING"] = True
|
||||
self.client = memory_viewer.app.test_client()
|
||||
self.db_path = db_path
|
||||
self.seed_db = seed_db
|
||||
yield
|
||||
|
||||
def _stream(self) -> list[dict]:
|
||||
resp = self.client.post("/api/diary/scrub-deflections")
|
||||
assert resp.status_code == 200
|
||||
events = []
|
||||
for line in resp.data.decode("utf-8").splitlines():
|
||||
if not line.strip():
|
||||
continue
|
||||
events.append(json.loads(line))
|
||||
return events
|
||||
|
||||
def test_endpoint_streams_start_progress_complete(self):
|
||||
events = self._stream()
|
||||
types = [e["type"] for e in events]
|
||||
assert types[0] == "start"
|
||||
assert types[-1] == "complete"
|
||||
assert types.count("progress") == 3
|
||||
|
||||
def test_endpoint_writes_back_cleaned_summaries(self):
|
||||
self._stream()
|
||||
rows = {r["date_utc"]: r["summary"] for r in self.seed_db.get_all_conversation_summaries()}
|
||||
assert "could not open" not in rows["2026-04-10"].lower()
|
||||
assert "did not have" not in rows["2026-04-27"].lower()
|
||||
# Untouched row is byte-identical.
|
||||
assert rows["2026-04-15"] == "The user prefers Celsius. The user lives in Hackney."
|
||||
|
||||
def test_endpoint_payload_never_includes_raw_summary_text(self):
|
||||
"""Privacy contract: the streaming UI must not echo diary content
|
||||
into the browser. Only counts and the date are allowed.
|
||||
"""
|
||||
events = self._stream()
|
||||
# Sentinel substrings unique to the seeded diary content.
|
||||
forbidden = ["youtube", "could not open", "celsius", "hackney", "restaurant", "did not have"]
|
||||
for ev in events:
|
||||
blob = json.dumps(ev).lower()
|
||||
for needle in forbidden:
|
||||
assert needle not in blob, (
|
||||
f"diary content {needle!r} leaked into event {ev}"
|
||||
)
|
||||
|
||||
def test_progress_event_keys_are_a_known_whitelist(self):
|
||||
"""Defence-in-depth for the privacy contract: rather than just
|
||||
proving sentinels are absent, lock down the *shape* of progress
|
||||
events. Any future field added to ``rewrite_all_diary_summaries``
|
||||
that could carry summary text must trip this test, forcing a
|
||||
review.
|
||||
"""
|
||||
events = self._stream()
|
||||
allowed = {
|
||||
"type", "processed", "total",
|
||||
"date_utc", "chars_before", "chars_after",
|
||||
"rewritten", "would_empty", "embedding_refreshed", "error",
|
||||
}
|
||||
for ev in events:
|
||||
if ev.get("type") != "progress":
|
||||
continue
|
||||
unknown = set(ev.keys()) - allowed
|
||||
assert not unknown, (
|
||||
f"unexpected progress-event keys leaked through the privacy "
|
||||
f"contract: {unknown}. Add to whitelist deliberately, never "
|
||||
f"by accident — any new field is a potential data exfiltration "
|
||||
f"channel through the streaming UI."
|
||||
)
|
||||
|
||||
def test_complete_event_reports_aggregate_counts(self):
|
||||
events = self._stream()
|
||||
complete = events[-1]
|
||||
assert complete["type"] == "complete"
|
||||
assert complete["rows"] == 3
|
||||
# Two of the three rows had assistant-deflection sentences.
|
||||
assert complete["rows_rewritten"] == 2
|
||||
assert complete["rows_would_empty"] == 0
|
||||
|
||||
def test_diary_button_handler_wired_outside_graph_init(self):
|
||||
"""Regression for the field bug where clicking the diary maintenance
|
||||
button did nothing.
|
||||
|
||||
The diary tab is the default tab and renders on page load, but the
|
||||
``btn-scrub-deflections`` click handler was originally wired inside
|
||||
``initGraph()`` — which only runs when the user opens the Knowledge
|
||||
tab. A user who clicked the button on the diary tab without ever
|
||||
visiting Knowledge first got no response and no error.
|
||||
|
||||
This test asserts the handler is wired in the always-run section
|
||||
of the page setup script, not nested inside ``initGraph``.
|
||||
"""
|
||||
from desktop_app import memory_viewer
|
||||
|
||||
client = memory_viewer.app.test_client()
|
||||
html = client.get("/").get_data(as_text=True)
|
||||
|
||||
wiring = "document.getElementById('btn-scrub-deflections')"
|
||||
assert wiring in html, "diary maintenance button has no click handler in the rendered page"
|
||||
|
||||
# The wiring must appear before the ``async function initGraph()``
|
||||
# block — anything inside that function only runs on Knowledge-tab
|
||||
# entry, which is the bug we are guarding against.
|
||||
wiring_idx = html.index(wiring)
|
||||
init_graph_idx = html.index("async function initGraph()")
|
||||
assert wiring_idx < init_graph_idx, (
|
||||
"btn-scrub-deflections wiring is nested inside initGraph(); "
|
||||
"diary button will not work until the user first opens the Knowledge tab"
|
||||
)
|
||||
Reference in New Issue
Block a user