Add Discord-native hybrid front-end for Jarvis (bot + bridge)

Transform isair/jarvis into a Discord-controlled voice assistant running on the Ubuntu VNC desktop, keeping the mature ~39k-line Python brain intact. - bot/ (Node + bun, discord.js): /자비스 slash commands (ephemeral), voice channel join + voice receive/playback, pluggable VNC screen broadcast (selfbot live / noVNC / screenshot) - bridge/ (Python, Flask): wraps jarvis STT + run_reply_engine + Piper TTS behind a thin localhost HTTP API - .env.example, scripts/ (start_bridge/start_bot/dev), README rewrite, docs/language-comparison.md and docs/vnc-xfce-setup.md Language decision: hybrid (Python brain + Node/bun Discord layer) because Discord blocks bot video; native screen broadcast only works via a Node selfbot library.
2026-06-09 14:51:05 +09:00
parent a5bf8d1826
commit c4abf63f38
308 changed files with 94135 additions and 1 deletions
--- a/tests/test_tune_player.py
+++ b/tests/test_tune_player.py
@@ -0,0 +1,250 @@
+"""Behavioural tests for the thinking-tune player.
+
+Covers:
+- Sample / WAV generation: right format/size, seam is effectively seamless.
+- TunePlayer lifecycle: idempotent start/stop, is_playing state, prompt
+  stop even when a "stream" is running.
+- Sounddevice dispatch: stop_tune closes the stream cleanly from the
+  owning thread (no cross-thread abort — that races with close on
+  macOS CoreAudio and logs a spurious !obj error).
+
+The sounddevice stream is exercised via a fake `sounddevice` module
+injected into sys.modules — works headlessly in CI.
+"""
+from __future__ import annotations
+
+import io
+import struct
+import sys
+import time
+import types
+import wave
+from unittest.mock import MagicMock
+
+import pytest
+
+from jarvis.output import tune_player
+from jarvis.output.tune_player import (
+    TunePlayer,
+    _generate_thinking_pad_samples,
+    _generate_thinking_pad_wav,
+    _get_thinking_pad_samples,
+    _get_thinking_pad_wav,
+)
+
+
+# --- Sample / WAV generation -----------------------------------------------
+
+def test_thinking_pad_samples_have_expected_shape():
+    samples, rate = _generate_thinking_pad_samples()
+    assert rate == 44100
+    assert samples.dtype.name == "int16"
+    assert samples.ndim == 1
+    # Long enough to contain several pulse-silence cycles.
+    assert samples.size / rate >= 5.0
+
+
+def test_thinking_pad_wav_is_well_formed():
+    data = _generate_thinking_pad_wav()
+    with wave.open(io.BytesIO(data)) as w:
+        assert w.getnchannels() == 1
+        assert w.getsampwidth() == 2
+        assert w.getframerate() == 44100
+
+
+def test_thinking_pad_samples_cached():
+    a = _get_thinking_pad_samples()
+    b = _get_thinking_pad_samples()
+    assert a is b
+
+
+def test_thinking_pad_wav_cached():
+    assert _get_thinking_pad_wav() is _get_thinking_pad_wav()
+
+
+def test_thinking_pad_seam_is_effectively_seamless():
+    samples, _ = _generate_thinking_pad_samples()
+    first = int(samples[0])
+    last = int(samples[-1])
+    # Seam step must be well under full-scale; observed ≈ 500.
+    assert abs(first - last) < 0.05 * 32767
+
+
+def test_thinking_pad_breathes():
+    """The pad is intentionally not continuous — it has a short audible
+    breath followed by a silent pause each loop so long thinking runs
+    aren't fatiguing. Verify both extremes exist."""
+    samples, rate = _generate_thinking_pad_samples()
+    win = rate // 10  # 100ms windows
+    peaks = [
+        int(abs(samples[i : i + win]).max())
+        for i in range(0, samples.size - win, win)
+    ]
+    # At least one window is clearly audible (the hold section).
+    assert max(peaks) > 0.10 * 32767
+    # At least one window is effectively silent (the rest pause).
+    assert min(peaks) < 0.005 * 32767
+
+
+# --- TunePlayer lifecycle --------------------------------------------------
+
+class _FakeStream:
+    """Minimal sounddevice.OutputStream stand-in."""
+
+    def __init__(self, *args, **kwargs):
+        self.started = False
+        self.aborted = False
+        self.closed = False
+        self._callback = kwargs.get("callback")
+
+    def start(self):
+        self.started = True
+
+    def abort(self):
+        self.aborted = True
+
+    def close(self):
+        self.closed = True
+
+
+def _install_fake_sounddevice(monkeypatch, stream_factory=None):
+    """Inject a fake sounddevice module that records the created stream."""
+    created = {}
+
+    def _OutputStream(*args, **kwargs):
+        stream = (stream_factory or _FakeStream)(*args, **kwargs)
+        created["stream"] = stream
+        return stream
+
+    fake_sd = types.ModuleType("sounddevice")
+    fake_sd.OutputStream = _OutputStream
+    monkeypatch.setitem(sys.modules, "sounddevice", fake_sd)
+    return created
+
+
+def test_disabled_player_never_starts():
+    tp = TunePlayer(enabled=False)
+    tp.start_tune()
+    try:
+        assert not tp.is_playing()
+        assert tp._thread is None
+    finally:
+        tp.stop_tune()
+
+
+def test_stop_is_idempotent():
+    tp = TunePlayer(enabled=False)
+    tp.stop_tune()
+    tp.stop_tune()
+
+
+def test_double_start_is_ignored(monkeypatch):
+    _install_fake_sounddevice(monkeypatch)
+    tp = TunePlayer(enabled=True)
+    tp.start_tune()
+    first = tp._thread
+    try:
+        tp.start_tune()
+        assert tp._thread is first
+    finally:
+        tp.stop_tune()
+
+
+def test_stop_closes_the_stream_and_returns_quickly(monkeypatch):
+    created = _install_fake_sounddevice(monkeypatch)
+    tp = TunePlayer(enabled=True)
+    tp.start_tune()
+
+    # Wait until the stream is actually started.
+    for _ in range(100):
+        stream = created.get("stream")
+        if stream is not None and stream.started:
+            break
+        time.sleep(0.01)
+    stream = created.get("stream")
+    assert stream is not None and stream.started
+
+    t0 = time.time()
+    tp.stop_tune()
+    elapsed = time.time() - t0
+
+    # Only the tune thread closes the stream; stop_tune must NOT abort
+    # from the caller's thread — that races with close() on macOS.
+    assert stream.closed
+    assert not stream.aborted
+    assert elapsed < 1.0
+    assert not tp.is_playing()
+
+
+def test_fallback_when_sounddevice_unavailable(monkeypatch):
+    # Force the sounddevice import inside _play_tune to fail.
+    fake_sd = types.ModuleType("sounddevice_broken")
+
+    def _raise(*a, **kw):
+        raise RuntimeError("no audio here")
+
+    fake_sd.OutputStream = _raise
+    monkeypatch.setitem(sys.modules, "sounddevice", fake_sd)
+
+    tp = TunePlayer(enabled=True)
+    tp.start_tune()
+    # Give the thread a moment to reach the fallback loop.
+    for _ in range(50):
+        if tp.is_playing():
+            break
+        time.sleep(0.01)
+    assert tp.is_playing()
+
+    t0 = time.time()
+    tp.stop_tune()
+    elapsed = time.time() - t0
+    assert elapsed < 1.5
+    assert not tp.is_playing()
+
+
+def test_stream_callback_wraps_seamlessly(monkeypatch):
+    """The internal callback must wrap from end-of-buffer back to start
+    without dropping a frame — that's the whole 'seamless loop' promise."""
+    captured = {}
+
+    class _SpyStream(_FakeStream):
+        def __init__(self, *a, **kw):
+            super().__init__(*a, **kw)
+            captured["callback"] = kw.get("callback")
+
+    _install_fake_sounddevice(monkeypatch, stream_factory=_SpyStream)
+    tp = TunePlayer(enabled=True)
+    tp.start_tune()
+    try:
+        for _ in range(100):
+            if captured.get("callback") is not None:
+                break
+            time.sleep(0.01)
+        cb = captured["callback"]
+        assert cb is not None
+
+        samples, _ = _get_thinking_pad_samples()
+        total = samples.size
+
+        # Position the read head just before the end of the buffer so
+        # the next callback crosses the seam.
+        import numpy as np
+        frames = 1024
+        # Simulate two back-to-back callbacks that span the wrap.
+        # First drain most of the buffer with a big fake call — we can
+        # do it via multiple calls to the real callback.
+        out = np.zeros((frames, 1), dtype=np.int16)
+
+        # Call the callback repeatedly until position wraps.
+        # The callback uses a closure; after enough calls we should cross.
+        seen_wrap = False
+        for _ in range(total // frames + 2):
+            cb(out, frames, None, None)
+            # When the internal position wraps, outdata will be a mix
+            # of end-of-buffer and start-of-buffer samples. Verify no
+            # exception raised and output is int16.
+            assert out.dtype.name == "int16"
+            seen_wrap = True
+        assert seen_wrap
+    finally:
+        tp.stop_tune()