Add Discord-native hybrid front-end for Jarvis (bot + bridge)

Transform isair/jarvis into a Discord-controlled voice assistant running on the Ubuntu VNC desktop, keeping the mature ~39k-line Python brain intact. - bot/ (Node + bun, discord.js): /자비스 slash commands (ephemeral), voice channel join + voice receive/playback, pluggable VNC screen broadcast (selfbot live / noVNC / screenshot) - bridge/ (Python, Flask): wraps jarvis STT + run_reply_engine + Piper TTS behind a thin localhost HTTP API - .env.example, scripts/ (start_bridge/start_bot/dev), README rewrite, docs/language-comparison.md and docs/vnc-xfce-setup.md Language decision: hybrid (Python brain + Node/bun Discord layer) because Discord blocks bot video; native screen broadcast only works via a Node selfbot library.
2026-06-09 14:51:05 +09:00
parent a5bf8d1826
commit c4abf63f38
308 changed files with 94135 additions and 1 deletions
--- a/src/jarvis/memory/db.py
+++ b/src/jarvis/memory/db.py
@@ -0,0 +1,442 @@
+from __future__ import annotations
+import sqlite3
+import re
+from typing import Sequence, Optional
+from pathlib import Path
+import threading
+from datetime import datetime, timezone
+
+from ..debug import debug_log
+
+_SCHEMA_SQL = """
+PRAGMA journal_mode=WAL;
+PRAGMA synchronous=NORMAL;
+
+-- Structured meals log (optional feature)
+CREATE TABLE IF NOT EXISTS meals (
+  id            INTEGER PRIMARY KEY,
+  ts_utc        TEXT NOT NULL,
+  source_app    TEXT NOT NULL,
+  description   TEXT NOT NULL,
+  calories_kcal REAL,
+  protein_g     REAL,
+  carbs_g       REAL,
+  fat_g         REAL,
+  fiber_g       REAL,
+  sugar_g       REAL,
+  sodium_mg     REAL,
+  potassium_mg  REAL,
+  micros_json   TEXT,
+  confidence    REAL
+);
+
+-- Conversation summaries for diary/memory system
+CREATE TABLE IF NOT EXISTS conversation_summaries (
+  id         INTEGER PRIMARY KEY,
+  date_utc   TEXT NOT NULL,  -- YYYY-MM-DD format
+  ts_utc     TEXT NOT NULL,  -- When summary was created
+  summary    TEXT NOT NULL,  -- Concise summary of the day's conversations
+  topics     TEXT,           -- Comma-separated list of main topics discussed
+  source_app TEXT NOT NULL,  -- Source app that generated the conversation
+  UNIQUE(date_utc, source_app)
+);
+
+CREATE VIRTUAL TABLE IF NOT EXISTS summaries_fts USING fts5(
+  summary,
+  topics,
+  content='conversation_summaries',
+  content_rowid='id',
+  tokenize='porter'
+);
+
+-- Triggers for conversation summaries FTS
+CREATE TRIGGER IF NOT EXISTS summaries_ai AFTER INSERT ON conversation_summaries BEGIN
+  INSERT INTO summaries_fts(rowid, summary, topics) VALUES (new.id, new.summary, new.topics);
+END;
+CREATE TRIGGER IF NOT EXISTS summaries_ad AFTER DELETE ON conversation_summaries BEGIN
+  INSERT INTO summaries_fts(summaries_fts, rowid, summary, topics) VALUES('delete', old.id, old.summary, old.topics);
+END;
+CREATE TRIGGER IF NOT EXISTS summaries_au AFTER UPDATE ON conversation_summaries BEGIN
+  INSERT INTO summaries_fts(summaries_fts, rowid, summary, topics) VALUES('delete', old.id, old.summary, old.topics);
+  INSERT INTO summaries_fts(rowid, summary, topics) VALUES (new.id, new.summary, new.topics);
+END;
+"""
+
+_VSS_SCHEMA_SQL = """
+CREATE VIRTUAL TABLE IF NOT EXISTS embeddings USING vss0(
+  id INTEGER PRIMARY KEY,
+  vec FLOAT[768]
+);
+
+CREATE TABLE IF NOT EXISTS summary_vec (
+  summary_id INTEGER PRIMARY KEY REFERENCES conversation_summaries(id) ON DELETE CASCADE,
+  emb_id     INTEGER NOT NULL REFERENCES embeddings(id)
+);
+"""
+
+
+def _normalize_fts_query(raw: str) -> str:
+    # Use improved fuzzy search query generation
+    try:
+        from .fuzzy_search import generate_flexible_fts_query
+        flexible_query = generate_flexible_fts_query(raw)
+        if flexible_query:
+            return flexible_query
+    except ImportError:
+        pass
+    
+    # Fallback: Extract alphanumeric tokens and join them with spaces (logical AND)
+    tokens = re.findall(r"[A-Za-z0-9_]+", raw)
+    return " ".join(tokens)
+
+
+class Database:
+    def __init__(self, db_path: str, sqlite_vss_path: Optional[str] = None) -> None:
+        Path(db_path).parent.mkdir(parents=True, exist_ok=True)
+        self.db_path = db_path
+        self.conn = sqlite3.connect(db_path, check_same_thread=False)
+        self.conn.row_factory = sqlite3.Row
+        self._lock = threading.RLock()
+        self.is_vss_enabled = False
+        self._python_vector_store = None
+        
+        if sqlite_vss_path:
+            try:
+                self.conn.enable_load_extension(True)
+                self.conn.load_extension(sqlite_vss_path)
+                self.is_vss_enabled = True
+            except Exception:
+                self.is_vss_enabled = False
+        
+        # If sqlite-vss is not available, use best available vector store (FAISS or Python fallback)
+        if not self.is_vss_enabled:
+            from ..utils.vector_store import get_best_vector_store
+            self._python_vector_store = get_best_vector_store(db_path, dimension=768)
+            
+            # Log which vector store implementation is being used
+            import sys
+            store_type = type(self._python_vector_store).__name__
+            if store_type == "FAISSVectorStore":
+                debug_log("Using FAISS vector store for fast search", "jarvis")
+            else:
+                debug_log("Using Python fallback vector store", "jarvis")
+        
+        self._init_schema()
+
+    def _init_schema(self) -> None:
+        with self._lock:
+            cur = self.conn.cursor()
+            cur.executescript(_SCHEMA_SQL)
+            if self.is_vss_enabled:
+                cur.executescript(_VSS_SCHEMA_SQL)
+            self.conn.commit()
+
+    
+
+    def search_hybrid(self, fts_query: str, query_vec_json: Optional[str], top_k: int = 8) -> list[sqlite3.Row]:
+        with self._lock:
+            cur = self.conn.cursor()
+            safe_q = _normalize_fts_query(fts_query)
+
+            # Use Python vector store if sqlite-vss is not available
+            if not self.is_vss_enabled and self._python_vector_store and query_vec_json is not None and safe_q:
+                # Parse query vector
+                import json as _json
+                query_vec = _json.loads(query_vec_json)
+                
+                # Get vector search results (use max of top_k*3 and 50 for good hybrid scoring)
+                vector_search_limit = max(top_k * 3, 50)
+                vector_results = self._python_vector_store.search(query_vec, top_k=vector_search_limit)
+                
+                # Get FTS results (use max of top_k*3 and 50 for good hybrid scoring)
+                fts_search_limit = max(top_k * 3, 50)
+                fts_sql = f"""
+                SELECT s.id, bm25(summaries_fts) AS bm
+                FROM summaries_fts
+                JOIN conversation_summaries s ON s.id = summaries_fts.rowid
+                WHERE summaries_fts MATCH ?
+                ORDER BY bm
+                LIMIT {fts_search_limit}
+                """
+                fts_rows = cur.execute(fts_sql, (safe_q,)).fetchall()
+                fts_scores = {row['id']: row['bm'] for row in fts_rows}
+                
+                # Combine scores
+                combined_scores = {}
+                
+                # Add vector scores (60% weight)
+                for summary_id, distance in vector_results:
+                    combined_scores[summary_id] = (1.0 / (1.0 + distance)) * 0.6
+                
+                # Add FTS scores (40% weight)
+                for summary_id, bm_score in fts_scores.items():
+                    if summary_id in combined_scores:
+                        combined_scores[summary_id] += (1.0 / (1.0 + bm_score)) * 0.4
+                    else:
+                        combined_scores[summary_id] = (1.0 / (1.0 + bm_score)) * 0.4
+                
+                # Sort by combined score and fetch summaries
+                sorted_ids = sorted(combined_scores.items(), key=lambda x: x[1], reverse=True)[:top_k]
+                
+                if sorted_ids:
+                    # Fetch summaries for top results
+                    placeholders = ','.join('?' * len(sorted_ids))
+                    summary_sql = f"""
+                    SELECT s.id, 
+                           '[' || s.date_utc || '] ' || s.summary || ' (Topics: ' || COALESCE(s.topics, '') || ')' AS text,
+                           'summary' AS result_type
+                    FROM conversation_summaries s
+                    WHERE s.id IN ({placeholders})
+                    """
+                    rows = cur.execute(summary_sql, [sid for sid, _ in sorted_ids]).fetchall()
+                    
+                    # Create result rows with scores
+                    results = []
+                    id_to_score = {sid: score for sid, score in sorted_ids}
+                    for row in rows:
+                        # Create a new row dict with score
+                        result = dict(row)
+                        result['score'] = id_to_score.get(row['id'], 0.0)
+                        results.append(result)
+                    
+                    # Sort by score again (in case DB returned in different order)
+                    results.sort(key=lambda x: x['score'], reverse=True)
+                    return results
+                else:
+                    return []
+                    
+            elif self.is_vss_enabled and query_vec_json is not None and safe_q:
+                # Hybrid search: 60% vector similarity (semantic) + 40% FTS (exact terms)
+                # This balances finding semantically related content with keyword matches
+                # Use dynamic limits for efficiency on large datasets
+                search_limit = max(top_k * 3, 50)
+                summary_sql = f"""
+                WITH fts_sum AS (
+                  SELECT s.id, bm25(summaries_fts) AS bm
+                  FROM summaries_fts
+                  JOIN conversation_summaries s ON s.id = summaries_fts.rowid
+                  WHERE summaries_fts MATCH ?
+                  ORDER BY bm LIMIT {search_limit}
+                ),
+                v_sum AS (
+                  SELECT sv.summary_id AS id, distance
+                  FROM vss_search(embeddings, 'vec', ?)
+                  JOIN summary_vec sv ON sv.emb_id = rowid
+                  LIMIT {search_limit}
+                )
+                SELECT s.id, (
+                    (1.0/(1.0+COALESCE(v_sum.distance, 1))) * 0.6 +
+                    (1.0/(1.0+COALESCE(fts_sum.bm, 10))) * 0.4
+                  ) AS score,
+                  '[' || s.date_utc || '] ' || s.summary || ' (Topics: ' || COALESCE(s.topics, '') || ')' AS text,
+                  'summary' AS result_type
+                FROM conversation_summaries s
+                LEFT JOIN v_sum     ON v_sum.id = s.id
+                LEFT JOIN fts_sum   ON fts_sum.id = s.id
+                WHERE v_sum.id IS NOT NULL OR fts_sum.id IS NOT NULL
+                ORDER BY score DESC
+                LIMIT {int(top_k)};
+                """
+                rows = cur.execute(summary_sql, (safe_q, query_vec_json)).fetchall()
+
+            elif safe_q:
+                # FTS-only search over conversation summaries
+                summary_sql = f"""
+                SELECT s.id, bm25(summaries_fts) AS score,
+                       '[' || s.date_utc || '] ' || s.summary || ' (Topics: ' || COALESCE(s.topics, '') || ')' AS text,
+                       'summary' AS result_type
+                FROM summaries_fts
+                JOIN conversation_summaries s ON s.id = summaries_fts.rowid
+                WHERE summaries_fts MATCH ?
+                ORDER BY score
+                LIMIT {int(top_k)};
+                """
+                rows = cur.execute(summary_sql, (safe_q,)).fetchall()
+
+            else:
+                # Fallback: latest conversation summaries
+                summary_sql = f"""
+                SELECT id, 0.0 AS score,
+                       '[' || date_utc || '] ' || summary || ' (Topics: ' || COALESCE(topics, '') || ')' AS text,
+                       'summary' AS result_type
+                FROM conversation_summaries
+                ORDER BY date_utc DESC
+                LIMIT {int(top_k)};
+                """
+                rows = cur.execute(summary_sql).fetchall()
+
+            return rows
+
+    @staticmethod
+    def _pack_vector(vec: Sequence[float]) -> bytes:
+        # SQLite-vss expects a float array; packing via array('f') ensures binary blob layout.
+        import array
+        arr = array.array('f', [float(x) for x in vec])
+        return arr.tobytes()
+
+    # --- Meals API ---
+    def insert_meal(
+        self,
+        ts_utc: str,
+        source_app: str,
+        description: str,
+        calories_kcal: Optional[float] = None,
+        protein_g: Optional[float] = None,
+        carbs_g: Optional[float] = None,
+        fat_g: Optional[float] = None,
+        fiber_g: Optional[float] = None,
+        sugar_g: Optional[float] = None,
+        sodium_mg: Optional[float] = None,
+        potassium_mg: Optional[float] = None,
+        micros_json: Optional[str] = None,
+        confidence: Optional[float] = None,
+    ) -> int:
+        with self._lock:
+            cur = self.conn.cursor()
+            cur.execute(
+                """
+                INSERT INTO meals(ts_utc, source_app, description, calories_kcal, protein_g, carbs_g, fat_g, fiber_g, sugar_g, sodium_mg, potassium_mg, micros_json, confidence)
+                VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)
+                """,
+                (
+                    ts_utc,
+                    source_app,
+                    description,
+                    calories_kcal,
+                    protein_g,
+                    carbs_g,
+                    fat_g,
+                    fiber_g,
+                    sugar_g,
+                    sodium_mg,
+                    potassium_mg,
+                    micros_json,
+                    confidence,
+                ),
+            )
+            self.conn.commit()
+            return int(cur.lastrowid)
+
+    def get_meals_between(self, ts_utc_min: str, ts_utc_max: str) -> list[sqlite3.Row]:
+        with self._lock:
+            cur = self.conn.cursor()
+            rows = cur.execute(
+                """
+                SELECT * FROM meals
+                WHERE ts_utc >= ? AND ts_utc <= ?
+                ORDER BY ts_utc ASC
+                """,
+                (ts_utc_min, ts_utc_max),
+            ).fetchall()
+            return rows
+
+    def delete_meal(self, meal_id: int) -> bool:
+        with self._lock:
+            cur = self.conn.cursor()
+            cur.execute("DELETE FROM meals WHERE id = ?", (meal_id,))
+            self.conn.commit()
+            return cur.rowcount > 0
+
+    # --- Conversation Summaries API ---
+    def upsert_conversation_summary(
+        self,
+        date_utc: str,  # YYYY-MM-DD format
+        summary: str,
+        topics: Optional[str] = None,
+        source_app: str = "jarvis",
+        ts_utc: Optional[str] = None,
+    ) -> int:
+        """Insert or update a conversation summary for a given date.
+
+        ``ts_utc`` defaults to "now". Maintenance ops that rewrite an
+        existing row's content without changing what it represents (e.g.
+        the deflection scrub bulk sweep) should pass through the row's
+        original ``ts_utc`` so the audit trail is preserved.
+        """
+        if ts_utc is None:
+            ts_utc = datetime.now(timezone.utc).isoformat()
+        with self._lock:
+            cur = self.conn.cursor()
+            cur.execute(
+                """
+                INSERT OR REPLACE INTO conversation_summaries(date_utc, ts_utc, summary, topics, source_app)
+                VALUES (?, ?, ?, ?, ?)
+                """,
+                (date_utc, ts_utc, summary, topics, source_app),
+            )
+            self.conn.commit()
+            return int(cur.lastrowid)
+
+    def get_conversation_summary(self, date_utc: str, source_app: str = "jarvis") -> Optional[sqlite3.Row]:
+        """Get conversation summary for a specific date."""
+        with self._lock:
+            cur = self.conn.cursor()
+            row = cur.execute(
+                """
+                SELECT * FROM conversation_summaries
+                WHERE date_utc = ? AND source_app = ?
+                """,
+                (date_utc, source_app),
+            ).fetchone()
+            return row
+
+    def get_recent_conversation_summaries(self, days: int = 7) -> list[sqlite3.Row]:
+        """Get conversation summaries from the last N days."""
+        from datetime import datetime, timedelta, timezone
+        cutoff_date = (datetime.now(timezone.utc) - timedelta(days=days)).date().isoformat()
+
+        with self._lock:
+            cur = self.conn.cursor()
+            rows = cur.execute(
+                """
+                SELECT * FROM conversation_summaries
+                WHERE date_utc >= ?
+                ORDER BY date_utc DESC
+                """,
+                (cutoff_date,),
+            ).fetchall()
+            return rows
+
+    def get_all_conversation_summaries(self) -> list[sqlite3.Row]:
+        """Get all conversation summaries, ordered by date ascending (oldest first).
+
+        Used for bulk import into graph memory — processes diary entries
+        chronologically so the graph builds up naturally.
+        """
+        with self._lock:
+            cur = self.conn.cursor()
+            rows = cur.execute(
+                """
+                SELECT * FROM conversation_summaries
+                ORDER BY date_utc ASC
+                """,
+            ).fetchall()
+            return rows
+
+    def upsert_summary_embedding(self, summary_id: int, vec: Sequence[float]) -> Optional[int]:
+        """Store or update embedding for a conversation summary."""
+        if self.is_vss_enabled:
+            # Use sqlite-vss
+            with self._lock:
+                cur = self.conn.cursor()
+                cur.execute("INSERT INTO embeddings(vec) VALUES (?)", (sqlite3.Binary(self._pack_vector(vec)),))
+                emb_id = cur.lastrowid
+                cur.execute(
+                    "INSERT OR REPLACE INTO summary_vec(summary_id, emb_id) VALUES (?, ?)",
+                    (summary_id, emb_id),
+                )
+                self.conn.commit()
+                return int(emb_id)
+        elif self._python_vector_store:
+            # Use Python vector store
+            self._python_vector_store.add_vector(summary_id, list(vec))
+            return summary_id  # Return summary_id as a placeholder for emb_id
+        else:
+            return None
+
+    def close(self) -> None:
+        try:
+            with self._lock:
+                self.conn.close()
+        except Exception:
+            pass