Add Discord-native hybrid front-end for Jarvis (bot + bridge)
Some checks failed
Release / semantic-release (push) Successful in 59s
tests / Unit tests (Linux, Python 3.11) (push) Successful in 13m45s
Release / build-linux (push) Failing after 7m47s
Release / build-windows (push) Has been cancelled
Release / build-macos (arm64, macos-latest) (push) Has been cancelled
Release / build-macos (x64, macos-15-intel) (push) Has been cancelled
Release / release-main (push) Has been cancelled
Release / release-develop (push) Has been cancelled

Transform isair/jarvis into a Discord-controlled voice assistant running on
the Ubuntu VNC desktop, keeping the mature ~39k-line Python brain intact.

- bot/ (Node + bun, discord.js): /자비스 slash commands (ephemeral),
  voice channel join + voice receive/playback, pluggable VNC screen broadcast
  (selfbot live / noVNC / screenshot)
- bridge/ (Python, Flask): wraps jarvis STT + run_reply_engine + Piper TTS
  behind a thin localhost HTTP API
- .env.example, scripts/ (start_bridge/start_bot/dev), README rewrite,
  docs/language-comparison.md and docs/vnc-xfce-setup.md

Language decision: hybrid (Python brain + Node/bun Discord layer) because
Discord blocks bot video; native screen broadcast only works via a Node
selfbot library.
This commit is contained in:
javis-bot
2026-06-09 14:51:05 +09:00
parent a5bf8d1826
commit c4abf63f38
308 changed files with 94135 additions and 1 deletions

200
tests/test_eval_helpers.py Normal file
View File

@@ -0,0 +1,200 @@
"""Unit tests for shared eval helpers.
These helpers shape what the eval suite actually measures — specifically
the fallback-reply detection that turns the malformed-output guard from
a silent shield into a loud failure. Pinning the helpers at unit level
means a typo or drift in the canned fallback strings in
``src/jarvis/reply/engine.py`` is caught without needing to run a live
LLM eval.
"""
from pathlib import Path
import sys
import pytest
_ROOT = Path(__file__).resolve().parent.parent
_EVALS = _ROOT / "evals"
if str(_EVALS) not in sys.path:
sys.path.insert(0, str(_EVALS))
from helpers import ( # noqa: E402
FALLBACK_REPLY_PHRASES,
MAX_TURNS_DIGEST_PHRASES,
assert_not_fallback_reply,
assert_not_max_turns_digest,
is_fallback_reply,
is_max_turns_digest,
)
class TestIsFallbackReply:
"""The helper must recognise every canned fallback string the reply
engine might emit on malformed model output."""
def test_empty_and_none_are_not_fallback(self):
assert is_fallback_reply(None) is False
assert is_fallback_reply("") is False
@pytest.mark.parametrize(
"reply",
[
"I had trouble understanding that request. Could you try rephrasing it?",
"I had trouble understanding that request.",
"Sorry, I had trouble processing that. Could you try again?",
"sorry, i had trouble performing the web search.",
# Case-insensitive match.
"I HAD TROUBLE UNDERSTANDING THAT REQUEST.",
],
)
def test_canned_fallbacks_are_flagged(self, reply):
assert is_fallback_reply(reply), (
f"Helper should flag {reply!r} as the engine's canned "
"malformed-guard fallback."
)
@pytest.mark.parametrize(
"reply",
[
"The weather in Hackney is 14°C and partly cloudy.",
"I found three results: Annie Lennox, Lulu, and Shirley Manson.",
"Sure — I opened YouTube for you.",
"I don't have that information, but I can search for it.",
],
)
def test_real_replies_are_not_flagged(self, reply):
assert not is_fallback_reply(reply), (
f"Helper must NOT flag genuine replies: {reply!r}"
)
class TestFallbackPhrasesAgainstEngineSource:
"""Pin the helper's phrase list against the actual canned strings in
the reply engine. If someone changes a fallback string in
``engine.py`` without updating the helper, this test fails and the
eval suite doesn't silently revert to "fallback looks like success".
"""
def test_every_phrase_appears_in_engine_source(self):
engine_src = (_ROOT / "src" / "jarvis" / "reply" / "engine.py").read_text()
engine_src_lower = engine_src.lower()
for phrase in FALLBACK_REPLY_PHRASES:
assert phrase in engine_src_lower, (
f"Fallback phrase {phrase!r} no longer appears in "
f"engine.py. Either the engine's canned reply changed "
f"(update FALLBACK_REPLY_PHRASES in evals/helpers.py) "
f"or the phrase list has drifted."
)
class TestAssertNotFallbackReply:
def test_passes_on_real_reply(self):
# Should not raise.
assert_not_fallback_reply("Today is sunny in Hackney.", context="weather")
def test_fails_on_canned_fallback(self):
# pytest.fail raises _pytest.outcomes.Failed, which inherits from
# BaseException (not Exception), so catch the broader type.
with pytest.raises(BaseException) as exc_info:
assert_not_fallback_reply(
"I had trouble understanding that request. Could you try rephrasing it?",
context="weather-warm-memory",
)
# Context tag should show up in the message so failing evals point
# at the specific parametrised variant.
assert "weather-warm-memory" in str(exc_info.value)
def test_passes_on_empty(self):
# Empty response is a separate failure mode (no text at all),
# not the malformed-guard fallback — don't conflate them.
assert_not_fallback_reply("", context="x")
assert_not_fallback_reply(None, context="x")
class TestIsMaxTurnsDigest:
"""The helper must recognise the canonical caveat shapes the
``digest_loop_for_max_turns`` summariser produces."""
def test_empty_and_none_are_not_digest(self):
assert is_max_turns_digest(None) is False
assert is_max_turns_digest("") is False
@pytest.mark.parametrize(
"reply",
[
"I could not fully finish your request. I found the weather is 8°C.",
"I couldn't fully finish this. I found the London forecast looks cloudy today.",
"I was unable to fully finish the request, but I got the forecast.",
"I wasn't able to fully finish that, but here's what I found.",
# Case-insensitive match.
"I COULD NOT FULLY FINISH YOUR REQUEST.",
],
)
def test_caveats_are_flagged(self, reply):
assert is_max_turns_digest(reply), (
f"Helper should flag {reply!r} as the max-turns digest caveat."
)
@pytest.mark.parametrize(
"reply",
[
"The weather in Hackney is 14°C and partly cloudy.",
"I found three results: Annie Lennox, Lulu, and Shirley Manson.",
"Sure — I opened YouTube for you.",
# "Finish" appearing in a non-caveat sentence must not trigger.
"You can finish the task by pressing enter.",
],
)
def test_real_replies_are_not_flagged(self, reply):
assert not is_max_turns_digest(reply), (
f"Helper must NOT flag genuine replies: {reply!r}"
)
class TestMaxTurnsPhrasesAgainstEnrichmentSource:
"""Drift pin: every phrase in ``MAX_TURNS_DIGEST_PHRASES`` must
correspond to the caveat instruction in the digest prompt source.
If the prompt's caveat wording is changed, the phrase list must be
updated in lockstep or the eval silently stops catching the leak.
"""
def test_digest_prompt_mentions_fully_finish(self):
src = (_ROOT / "src" / "jarvis" / "reply" / "enrichment.py").read_text()
# The digest prompt instructs the LLM to open with a caveat about
# not being able to fully finish; the anchor phrase here is
# ``fully finish``, which is the semantic core every canonical
# phrase in MAX_TURNS_DIGEST_PHRASES shares.
assert "fully finish" in src.lower(), (
"Digest prompt in enrichment.py no longer contains the "
"'fully finish' caveat anchor — either the prompt wording "
"changed (update MAX_TURNS_DIGEST_PHRASES in evals/helpers.py) "
"or the anchor drifted."
)
# Every phrase we flag must contain the shared anchor; this keeps
# the helper honest about what it claims to detect.
for phrase in MAX_TURNS_DIGEST_PHRASES:
assert "fully finish" in phrase, (
f"MAX_TURNS_DIGEST_PHRASES entry {phrase!r} does not "
f"contain the 'fully finish' anchor — the helper would "
f"flag unrelated replies."
)
class TestAssertNotMaxTurnsDigest:
def test_passes_on_real_reply(self):
assert_not_max_turns_digest(
"The weather in Paris is 14°C and partly cloudy.",
context="weather",
)
def test_fails_on_digest_caveat(self):
with pytest.raises(BaseException) as exc_info:
assert_not_max_turns_digest(
"I could not fully finish your request. I found the weather is 8°C.",
context="single-weather-terminal",
)
assert "single-weather-terminal" in str(exc_info.value)
def test_passes_on_empty(self):
assert_not_max_turns_digest("", context="x")
assert_not_max_turns_digest(None, context="x")