From 10e0dd75db143ec3ba8307a18f71ce5c44270a16 Mon Sep 17 00:00:00 2001 From: claude-bot Date: Fri, 1 May 2026 23:14:23 +0900 Subject: [PATCH] Reset project to README only --- .env.example | 34 -- .gitignore | 6 - README.md | 193 -------- bun.lock | 222 --------- package.json | 42 -- python/local_stt_worker.py | 237 ---------- python/local_tts_worker.py | 136 ------ python/requirements-windows.txt | 3 - python/requirements.txt | 3 - src/audio/ffmpeg-path.ts | 59 --- src/audio/guild-voice-session.ts | 456 ------------------ src/audio/local-voice-session.ts | 710 ----------------------------- src/audio/pcm.ts | 60 --- src/config.ts | 78 ---- src/discord-main.ts | 238 ---------- src/index.ts | 52 --- src/local-main.ts | 232 ---------- src/logger.ts | 63 --- src/python-runtime.ts | 100 ---- src/services/conversation.ts | 98 ---- src/services/create-tts-service.ts | 112 ----- src/services/llm.ts | 6 - src/services/local-stt.ts | 43 -- src/services/local-tts.ts | 97 ---- src/services/ollama-llm.ts | 159 ------- src/services/python-json-worker.ts | 208 --------- src/services/stt.ts | 4 - src/services/tts.ts | 13 - src/services/windows-media-tts.ts | 152 ------ src/services/windows-powershell.ts | 63 --- src/services/windows-system-tts.ts | 123 ----- src/setup-local-ai.ts | 132 ------ tsconfig.json | 21 - 33 files changed, 4155 deletions(-) delete mode 100644 .env.example delete mode 100644 .gitignore delete mode 100644 bun.lock delete mode 100644 package.json delete mode 100644 python/local_stt_worker.py delete mode 100644 python/local_tts_worker.py delete mode 100644 python/requirements-windows.txt delete mode 100644 python/requirements.txt delete mode 100644 src/audio/ffmpeg-path.ts delete mode 100644 src/audio/guild-voice-session.ts delete mode 100644 src/audio/local-voice-session.ts delete mode 100644 src/audio/pcm.ts delete mode 100644 src/config.ts delete mode 100644 src/discord-main.ts delete mode 100644 src/index.ts delete mode 100644 src/local-main.ts delete mode 100644 src/logger.ts delete mode 100644 src/python-runtime.ts delete mode 100644 src/services/conversation.ts delete mode 100644 src/services/create-tts-service.ts delete mode 100644 src/services/llm.ts delete mode 100644 src/services/local-stt.ts delete mode 100644 src/services/local-tts.ts delete mode 100644 src/services/ollama-llm.ts delete mode 100644 src/services/python-json-worker.ts delete mode 100644 src/services/stt.ts delete mode 100644 src/services/tts.ts delete mode 100644 src/services/windows-media-tts.ts delete mode 100644 src/services/windows-powershell.ts delete mode 100644 src/services/windows-system-tts.ts delete mode 100644 src/setup-local-ai.ts delete mode 100644 tsconfig.json diff --git a/.env.example b/.env.example deleted file mode 100644 index 4dd4333..0000000 --- a/.env.example +++ /dev/null @@ -1,34 +0,0 @@ -DISCORD_BOT_TOKEN= -DISCORD_APPLICATION_ID= -DISCORD_COMMAND_GUILD_ID= - -OLLAMA_BASE_URL=http://127.0.0.1:11434 -OLLAMA_MODEL=qwen3:0.6b -OLLAMA_KEEP_ALIVE=5m -OLLAMA_NUM_CTX=4096 - -LOCAL_AI_VENV_PATH=.local-ai/.venv -LOCAL_AI_CACHE_DIR=.local-ai/cache -# Windows면 `python` 또는 `py -3` -LOCAL_AI_PYTHON= -LOCAL_STT_MODEL=small -# CUDA dll 오류가 나면 `cpu` -LOCAL_STT_DEVICE=auto -LOCAL_STT_COMPUTE_TYPE=auto -LOCAL_STT_BEAM_SIZE=3 -LOCAL_TTS_ENGINE=auto -LOCAL_TTS_VOICE_NAME= -LOCAL_TTS_MODEL_PATH=.local-ai/models/kokoro-v1.0.onnx -LOCAL_TTS_VOICES_PATH=.local-ai/models/voices-v1.0.bin -LOCAL_TTS_LANGUAGE=ko -LOCAL_TTS_SPEAKER=af_heart -LOCAL_TTS_DEVICE=auto -LOCAL_TTS_SPEED=1.12 - -BOT_DEFAULT_LANGUAGE=ko -MAX_CONVERSATION_TURNS=12 -LOCAL_AUDIO_SOURCE= -LOCAL_AUDIO_SINK= -LOCAL_SPEAKER_NAME=local-user -DEBUG_TEXT_EVENTS=false -LOG_LEVEL=info diff --git a/.gitignore b/.gitignore deleted file mode 100644 index d677c4f..0000000 --- a/.gitignore +++ /dev/null @@ -1,6 +0,0 @@ -node_modules -dist -.env -.local-ai -__pycache__ -*.pyc diff --git a/README.md b/README.md index 171c945..bf94a48 100644 --- a/README.md +++ b/README.md @@ -1,194 +1 @@ # realtime_voice_bot - -디스코드 음성 채널 또는 로컬 PC 마이크에서 한국어 음성을 인식하고, 완전 로컬 스택으로 답변을 생성한 뒤 다시 음성으로 읽어주는 최소 프로토타입입니다. - -## 현재 스택 - -- STT: `faster-whisper` + Whisper multilingual -- LLM: `Ollama` + `qwen3:0.6b` -- TTS: -- Windows: `Windows.Media.SpeechSynthesis` 우선, 실패 시 시스템 기본 음성 엔진 fallback -- Linux/macOS: `kokoro-onnx` + `misaki[ko]` -- VAD: `avr-vad` - -외부 유료 API나 무료 한도형 API는 쓰지 않습니다. - -## 현재 구현 범위 - -- Discord slash command 기반 제어: `/join`, `/leave`, `/status`, `/reset`, `/say` -- 로컬 테스트 모드: PC 마이크로 직접 말하고 바로 응답 확인 -- `@discordjs/voice` 기반 음성 채널 입장 및 유저별 오디오 수신 -- 48k stereo PCM을 16k mono로 내려서 유저별 VAD 처리 -- 화자 발화 시작 시 현재 재생과 대기열 즉시 중단 -- Python 로컬 워커를 한 번 띄워 STT/TTS 모델을 메모리에 유지 - -## 필수 준비물 - -- Bun `1.3+` -- Node.js `22.12+` -- Python `3.11+` -- `ffmpeg` -- Ollama - -Discord 모드까지 쓸 거면 추가로: - -- Discord bot token -- Discord application id - -## 빠른 시작 - -```bash -bun install -ollama pull qwen3:0.6b -bun run setup:local-ai -``` - -Windows에서 Python 실행기는 환경마다 다릅니다. 둘 중 되는 쪽 하나만 넣으면 됩니다: - -```env -LOCAL_AI_PYTHON=python -# 또는 -LOCAL_AI_PYTHON=py -3 -``` - -Windows에서 Ollama는 `localhost` 대신 `127.0.0.1`를 쓰는 편이 안전합니다. - -그다음 로컬 장치 확인: - -```bash -bun run devices -``` - -실행: - -```bash -bun run start:local -``` - -TTS만 단독으로 확인: - -```bash -bun run tts:test -- "안녕하세요. 출력 장치 테스트입니다." -``` - -Windows 설치 음성 목록 확인: - -```bash -bun run tts:voices -``` - -TTS WAV 파일만 생성해서 확인: - -```bash -bun run tts:dump -- "안녕하세요. WAV 파일 테스트입니다." -``` - -Discord 모드: - -```bash -bun run start:discord -``` - -## 환경 변수 - -`.env.example`를 복사해서 `.env`를 채우면 됩니다. - -Discord 모드에서만 필수: - -- `DISCORD_BOT_TOKEN` -- `DISCORD_APPLICATION_ID` - -기본값이 이미 들어있는 로컬 AI 설정: - -- `OLLAMA_BASE_URL` -- `OLLAMA_MODEL` -- `OLLAMA_KEEP_ALIVE` -- `OLLAMA_NUM_CTX` -- `LOCAL_AI_VENV_PATH` -- `LOCAL_AI_CACHE_DIR` -- `LOCAL_STT_MODEL` -- `LOCAL_STT_DEVICE` -- `LOCAL_STT_COMPUTE_TYPE` -- `LOCAL_STT_BEAM_SIZE` -- `LOCAL_TTS_ENGINE` -- `LOCAL_TTS_VOICE_NAME` -- `LOCAL_TTS_MODEL_PATH` -- `LOCAL_TTS_VOICES_PATH` -- `LOCAL_TTS_LANGUAGE` -- `LOCAL_TTS_SPEAKER` -- `LOCAL_TTS_DEVICE` -- `LOCAL_TTS_SPEED` - -선택: - -- `DISCORD_COMMAND_GUILD_ID` - - 테스트 서버에만 slash command를 즉시 반영하려면 설정 -- `LOCAL_AI_PYTHON` - - Python 경로 자동 탐지가 안 되면 설정 - - 예시: `python` - - Windows 예시: `python` 또는 `py -3` -- `LOCAL_AUDIO_SOURCE` - - 로컬 입력 장치 - - Linux는 `pw-record --target`, Windows는 `ffmpeg dshow` 장치 이름 -- `LOCAL_AUDIO_SINK` - - Linux 로컬 출력 장치 - - Windows는 현재 시스템 기본 출력 장치 사용 -- `LOCAL_SPEAKER_NAME` - - 로컬 테스트에서 프롬프트에 넣을 화자 이름 -- `BOT_DEFAULT_LANGUAGE` - - 기본값 `ko` -- `DEBUG_TEXT_EVENTS` - - `true`면 transcript/reply를 콘솔에 같이 출력 - -Windows에서 GPU STT를 쓰려면 `LOCAL_STT_DEVICE=auto` 그대로 두고 `bun run setup:local-ai`를 다시 실행하세요. 현재 스크립트는 `faster-whisper`와 함께 CUDA 12용 `cuBLAS`, `cuDNN` 런타임 wheel도 같이 설치합니다. 그래도 `cublas64_12.dll` 또는 `cudnn` 오류가 남으면 시스템에 Visual C++ 런타임이 빠졌거나, 별도 CUDA 설치 경로가 PATH에 안 잡힌 경우입니다. - -## 속도 우선 기본값 - -- STT 기본 권장 모델은 `small` -- LLM 기본 모델은 `qwen3:0.6b` -- Windows TTS 기본 보이스는 설치된 `windows-media` 음성 중 현재 언어에 맞는 첫 번째 항목 -- Linux/macOS TTS 기본 보이스는 `af_heart` -- TTS 기본 속도는 `1.12` - -더 빠르게 돌리고 싶으면: - -```env -LOCAL_STT_MODEL=tiny -LOCAL_STT_BEAM_SIZE=1 -``` - -정확도가 아쉬우면: - -```env -LOCAL_STT_MODEL=small -LOCAL_STT_BEAM_SIZE=3 -OLLAMA_MODEL=qwen3:1.7b -``` - -## 로컬 테스트 순서 - -1. `bun install` -2. `ollama pull qwen3:0.6b` -3. Windows면 `.env` 에 `LOCAL_AI_PYTHON=python` 또는 `LOCAL_AI_PYTHON=py -3` 추가 -4. `bun run setup:local-ai` -5. `bun run devices` -6. `.env` 에 `LOCAL_AUDIO_SOURCE` 설정 -7. `bun run start:local` - -## Windows 메모 - -- `bun run devices` 와 Windows 로컬 녹음은 `ffmpeg`가 필요합니다. -- Windows는 기본적으로 `windows-media` 엔진을 우선 쓰고, 실패하면 `system` 엔진으로 자동 fallback 합니다. -- 출력 장치 직접 선택은 아직 미구현이라 시스템 기본 출력 장치로 재생됩니다. -- Python 탐지가 안 되면 `.env` 에 `LOCAL_AI_PYTHON=python` 또는 `LOCAL_AI_PYTHON=py -3` 를 넣으면 됩니다. -- Windows의 `setup:local-ai`는 STT와 CUDA 런타임 wheel을 함께 설치합니다. -- Linux/macOS의 `setup:local-ai`는 Kokoro ONNX 모델 파일도 자동으로 내려받습니다. -- 더 자연스러운 음성을 고르려면 `bun run tts:voices` 로 설치된 음성 이름을 확인한 뒤 `LOCAL_TTS_VOICE_NAME` 에 넣으면 됩니다. - -## 설계 메모 - -- 입력은 유저별 병렬 처리 -- 출력은 길드 세션당 단일 큐 -- 로컬 모드는 단일 화자 입력 기준 -- 화자 구분은 `speaker_id`, `speaker_name`을 LLM 프롬프트에 항상 포함 -- 모델 다운로드 캐시는 기본적으로 `.local-ai/cache` 아래에 저장 diff --git a/bun.lock b/bun.lock deleted file mode 100644 index 700a171..0000000 --- a/bun.lock +++ /dev/null @@ -1,222 +0,0 @@ -{ - "lockfileVersion": 1, - "configVersion": 1, - "workspaces": { - "": { - "name": "realtime_voice_bot", - "dependencies": { - "@discordjs/voice": "^0.19.2", - "avr-vad": "^1.0.10", - "discord.js": "^14.26.3", - "dotenv": "^17.4.2", - "ffmpeg-static": "^5.3.0", - "opusscript": "^0.1.1", - "prism-media": "^1.3.5", - "zod": "^4.3.6", - }, - "devDependencies": { - "@types/node": "^25.6.0", - "typescript": "^6.0.3", - }, - }, - }, - "trustedDependencies": [ - "ffmpeg-static", - "onnxruntime-node", - ], - "packages": { - "@derhuerst/http-basic": ["@derhuerst/http-basic@8.2.4", "", { "dependencies": { "caseless": "^0.12.0", "concat-stream": "^2.0.0", "http-response-object": "^3.0.1", "parse-cache-control": "^1.0.1" } }, "sha512-F9rL9k9Xjf5blCz8HsJRO4diy111cayL2vkY2XE4r4t3n0yPXVYy3KD3nJ1qbrSn9743UWSXH4IwuCa/HWlGFw=="], - - "@discordjs/builders": ["@discordjs/builders@1.14.1", "", { "dependencies": { "@discordjs/formatters": "^0.6.2", "@discordjs/util": "^1.2.0", "@sapphire/shapeshift": "^4.0.0", "discord-api-types": "^0.38.40", "fast-deep-equal": "^3.1.3", "ts-mixer": "^6.0.4", "tslib": "^2.6.3" } }, "sha512-gSKkhXLqs96TCzk66VZuHHl8z2bQMJFGwrXC0f33ngK+FLNau4hU1PYny3DNJfNdSH+gVMzE85/d5FQ2BpcNwQ=="], - - "@discordjs/collection": ["@discordjs/collection@1.5.3", "", {}, "sha512-SVb428OMd3WO1paV3rm6tSjM4wC+Kecaa1EUGX7vc6/fddvw/6lg90z4QtCqm21zvVe92vMMDt9+DkIvjXImQQ=="], - - "@discordjs/formatters": ["@discordjs/formatters@0.6.2", "", { "dependencies": { "discord-api-types": "^0.38.33" } }, "sha512-y4UPwWhH6vChKRkGdMB4odasUbHOUwy7KL+OVwF86PvT6QVOwElx+TiI1/6kcmcEe+g5YRXJFiXSXUdabqZOvQ=="], - - "@discordjs/rest": ["@discordjs/rest@2.6.1", "", { "dependencies": { "@discordjs/collection": "^2.1.1", "@discordjs/util": "^1.2.0", "@sapphire/async-queue": "^1.5.3", "@sapphire/snowflake": "^3.5.5", "@vladfrangu/async_event_emitter": "^2.4.6", "discord-api-types": "^0.38.40", "magic-bytes.js": "^1.13.0", "tslib": "^2.6.3", "undici": "6.24.1" } }, "sha512-wwQdgjeaoYFiaG+atbqx6aJDpqW7JHAo0HrQkBTbYzM3/PJ3GweQIpgElNcGZ26DCUOXMyawYd0YF7vtr+fZXg=="], - - "@discordjs/util": ["@discordjs/util@1.2.0", "", { "dependencies": { "discord-api-types": "^0.38.33" } }, "sha512-3LKP7F2+atl9vJFhaBjn4nOaSWahZ/yWjOvA4e5pnXkt2qyXRCHLxoBQy81GFtLGCq7K9lPm9R517M1U+/90Qg=="], - - "@discordjs/voice": ["@discordjs/voice@0.19.2", "", { "dependencies": { "@snazzah/davey": "^0.1.9", "@types/ws": "^8.18.1", "discord-api-types": "^0.38.41", "prism-media": "^1.3.5", "tslib": "^2.8.1", "ws": "^8.19.0" } }, "sha512-3yJ255e4ag3wfZu/DSxeOZK1UtnqNxnspmLaQetGT0pDkThNZoHs+Zg6dgZZ19JEVomXygvfHn9lNpICZuYtEA=="], - - "@discordjs/ws": ["@discordjs/ws@1.2.3", "", { "dependencies": { "@discordjs/collection": "^2.1.0", "@discordjs/rest": "^2.5.1", "@discordjs/util": "^1.1.0", "@sapphire/async-queue": "^1.5.2", "@types/ws": "^8.5.10", "@vladfrangu/async_event_emitter": "^2.2.4", "discord-api-types": "^0.38.1", "tslib": "^2.6.2", "ws": "^8.17.0" } }, "sha512-wPlQDxEmlDg5IxhJPuxXr3Vy9AjYq5xCvFWGJyD7w7Np8ZGu+Mc+97LCoEc/+AYCo2IDpKioiH0/c/mj5ZR9Uw=="], - - "@emnapi/core": ["@emnapi/core@1.10.0", "", { "dependencies": { "@emnapi/wasi-threads": "1.2.1", "tslib": "^2.4.0" } }, "sha512-yq6OkJ4p82CAfPl0u9mQebQHKPJkY7WrIuk205cTYnYe+k2Z8YBh11FrbRG/H6ihirqcacOgl2BIO8oyMQLeXw=="], - - "@emnapi/runtime": ["@emnapi/runtime@1.10.0", "", { "dependencies": { "tslib": "^2.4.0" } }, "sha512-ewvYlk86xUoGI0zQRNq/mC+16R1QeDlKQy21Ki3oSYXNgLb45GV1P6A0M+/s6nyCuNDqe5VpaY84BzXGwVbwFA=="], - - "@emnapi/wasi-threads": ["@emnapi/wasi-threads@1.2.1", "", { "dependencies": { "tslib": "^2.4.0" } }, "sha512-uTII7OYF+/Mes/MrcIOYp5yOtSMLBWSIoLPpcgwipoiKbli6k322tcoFsxoIIxPDqW01SQGAgko4EzZi2BNv2w=="], - - "@napi-rs/wasm-runtime": ["@napi-rs/wasm-runtime@1.1.4", "", { "dependencies": { "@tybys/wasm-util": "^0.10.1" }, "peerDependencies": { "@emnapi/core": "^1.7.1", "@emnapi/runtime": "^1.7.1" } }, "sha512-3NQNNgA1YSlJb/kMH1ildASP9HW7/7kYnRI2szWJaofaS1hWmbGI4H+d3+22aGzXXN9IJ+n+GiFVcGipJP18ow=="], - - "@sapphire/async-queue": ["@sapphire/async-queue@1.5.5", "", {}, "sha512-cvGzxbba6sav2zZkH8GPf2oGk9yYoD5qrNWdu9fRehifgnFZJMV+nuy2nON2roRO4yQQ+v7MK/Pktl/HgfsUXg=="], - - "@sapphire/shapeshift": ["@sapphire/shapeshift@4.0.0", "", { "dependencies": { "fast-deep-equal": "^3.1.3", "lodash": "^4.17.21" } }, "sha512-d9dUmWVA7MMiKobL3VpLF8P2aeanRTu6ypG2OIaEv/ZHH/SUQ2iHOVyi5wAPjQ+HmnMuL0whK9ez8I/raWbtIg=="], - - "@sapphire/snowflake": ["@sapphire/snowflake@3.5.3", "", {}, "sha512-jjmJywLAFoWeBi1W7994zZyiNWPIiqRRNAmSERxyg93xRGzNYvGjlZ0gR6x0F4gPRi2+0O6S71kOZYyr3cxaIQ=="], - - "@snazzah/davey": ["@snazzah/davey@0.1.11", "", { "optionalDependencies": { "@snazzah/davey-android-arm-eabi": "0.1.11", "@snazzah/davey-android-arm64": "0.1.11", "@snazzah/davey-darwin-arm64": "0.1.11", "@snazzah/davey-darwin-x64": "0.1.11", "@snazzah/davey-freebsd-x64": "0.1.11", "@snazzah/davey-linux-arm-gnueabihf": "0.1.11", "@snazzah/davey-linux-arm64-gnu": "0.1.11", "@snazzah/davey-linux-arm64-musl": "0.1.11", "@snazzah/davey-linux-x64-gnu": "0.1.11", "@snazzah/davey-linux-x64-musl": "0.1.11", "@snazzah/davey-wasm32-wasi": "0.1.11", "@snazzah/davey-win32-arm64-msvc": "0.1.11", "@snazzah/davey-win32-ia32-msvc": "0.1.11", "@snazzah/davey-win32-x64-msvc": "0.1.11" } }, "sha512-oBN+msHzPnm1M5DDx3wVD7iBwpNXFUtkh2MrAbUJu0OhKjliLChi28hq++mu1+qdMpAVQO5JKAvQQxYVbyneiw=="], - - "@snazzah/davey-android-arm-eabi": ["@snazzah/davey-android-arm-eabi@0.1.11", "", { "os": "android", "cpu": "arm" }, "sha512-T1RYbNYKN6tLOcGIDKJd8OI6FBSEemwL7DOYdTMmhqfhhMr3YVN8WOhfoxGg63OcnpTN2e2c5tdY2bAx25RmQQ=="], - - "@snazzah/davey-android-arm64": ["@snazzah/davey-android-arm64@0.1.11", "", { "os": "android", "cpu": "arm64" }, "sha512-ksJn/x2VU8h6w9eku1HT96ugSRZ7lKVkKNKbFleaFN+U99DJaPM+gMu2YvnFU4V54HR06ZBnRihnVG6VLXQpDw=="], - - "@snazzah/davey-darwin-arm64": ["@snazzah/davey-darwin-arm64@0.1.11", "", { "os": "darwin", "cpu": "arm64" }, "sha512-E1d7PbaaVMO3Lj9EiAPqOVbuV0xg5+PsHzHH097DDXiD1+zUDXvJaTnUWsnm5z50pJniHpi4GtaYmk+ieB/guA=="], - - "@snazzah/davey-darwin-x64": ["@snazzah/davey-darwin-x64@0.1.11", "", { "os": "darwin", "cpu": "x64" }, "sha512-Tl4TI/LTmgJZepgbgVMYDi8RqlAkPtPg1OEBPl7a9Tn3AwR36Vs6lyIT1cs/lGy/ds/+B+mKI4rPObN1cyILTw=="], - - "@snazzah/davey-freebsd-x64": ["@snazzah/davey-freebsd-x64@0.1.11", "", { "os": "freebsd", "cpu": "x64" }, "sha512-T8Iw9FXkuI1T+YBAFzh9v/TXf9IOTOSqnd/BFpTRTrlW72PR2lhIidzSmg027VxO7r5pX47iFwiOkb9I/NU/EA=="], - - "@snazzah/davey-linux-arm-gnueabihf": ["@snazzah/davey-linux-arm-gnueabihf@0.1.11", "", { "os": "linux", "cpu": "arm" }, "sha512-1Txj+8pqA8uq/OGtaUaBFWAPnNMQzFgIywj0iA7EI4xZl+mab48/pv+YZ1pNb/suC6ynsW44oB9efiXSdcUAgA=="], - - "@snazzah/davey-linux-arm64-gnu": ["@snazzah/davey-linux-arm64-gnu@0.1.11", "", { "os": "linux", "cpu": "arm64" }, "sha512-ERzF5nM/IYW1BcN3wLXpEwBCGLFf0kGJUVhaV6yfiInz0tkU8UmvrrgpaMaACfMjIhfWdq5CcX+aTkXo/saNcg=="], - - "@snazzah/davey-linux-arm64-musl": ["@snazzah/davey-linux-arm64-musl@0.1.11", "", { "os": "linux", "cpu": "arm64" }, "sha512-e6pX6Hiabtz99q+H/YHNkm9JVlpqN8HGh0qPib8G2+UY4/SSH8WvqWipk3v581dMy2oyCHt7MOoY1aU1P1N/xA=="], - - "@snazzah/davey-linux-x64-gnu": ["@snazzah/davey-linux-x64-gnu@0.1.11", "", { "os": "linux", "cpu": "x64" }, "sha512-TW5bSoqChOJMbvsDb4wAATYrxmAXuNnse7wFNVSAJUaZKSeRfZbu3UAiPWSNn7GwLwSfU6hg322KZUn8IWCuvg=="], - - "@snazzah/davey-linux-x64-musl": ["@snazzah/davey-linux-x64-musl@0.1.11", "", { "os": "linux", "cpu": "x64" }, "sha512-5j6Pmc+Wzv5lSxVP6quA7teYRJXibkZqQyYGfTDnTsUOO5dPpcojpqlXlkhyvsA1OAQTj4uxbOCciN3cVWwzug=="], - - "@snazzah/davey-wasm32-wasi": ["@snazzah/davey-wasm32-wasi@0.1.11", "", { "dependencies": { "@napi-rs/wasm-runtime": "^1.1.2" }, "cpu": "none" }, "sha512-rKOwZ/0J8lp+4VEyOdMDBRP9KR+PksZpa9V1Qn0veMzy4FqTVKthkxwGqewheFe0SFg9fdvt798l/PBFrfDeZw=="], - - "@snazzah/davey-win32-arm64-msvc": ["@snazzah/davey-win32-arm64-msvc@0.1.11", "", { "os": "win32", "cpu": "arm64" }, "sha512-5fptJU4tX901m3mj0SHiBljMrPT4ZEsynbBhR7bK1yn9TY1jjyhN8EFi7QF5IWtUEni+0mia2BCMHZ5ZkmFZqQ=="], - - "@snazzah/davey-win32-ia32-msvc": ["@snazzah/davey-win32-ia32-msvc@0.1.11", "", { "os": "win32", "cpu": "ia32" }, "sha512-ualexn8SeLsiMHhWfzVrzRcjHgcBapg++FPaVgJJxoh2S/jCRiklXOu3luqIZdJdNKvhe2V9SwO/cImPeIIBKw=="], - - "@snazzah/davey-win32-x64-msvc": ["@snazzah/davey-win32-x64-msvc@0.1.11", "", { "os": "win32", "cpu": "x64" }, "sha512-muNhc8UKXtknzsH/w4AIkbPR2I8BuvApn0pDXar0IEvY8PCjqU/M8MPbOOEYwQVvQRMwVTgExtxzrkBPSXB4nA=="], - - "@tybys/wasm-util": ["@tybys/wasm-util@0.10.1", "", { "dependencies": { "tslib": "^2.4.0" } }, "sha512-9tTaPJLSiejZKx+Bmog4uSubteqTvFrVrURwkmHixBo0G4seD0zUxp98E1DzUBJxLQ3NPwXrGKDiVjwx/DpPsg=="], - - "@types/node": ["@types/node@25.6.0", "", { "dependencies": { "undici-types": "~7.19.0" } }, "sha512-+qIYRKdNYJwY3vRCZMdJbPLJAtGjQBudzZzdzwQYkEPQd+PJGixUL5QfvCLDaULoLv+RhT3LDkwEfKaAkgSmNQ=="], - - "@types/ws": ["@types/ws@8.18.1", "", { "dependencies": { "@types/node": "*" } }, "sha512-ThVF6DCVhA8kUGy+aazFQ4kXQ7E1Ty7A3ypFOe0IcJV8O/M511G99AW24irKrW56Wt44yG9+ij8FaqoBGkuBXg=="], - - "@vladfrangu/async_event_emitter": ["@vladfrangu/async_event_emitter@2.4.7", "", {}, "sha512-Xfe6rpCTxSxfbswi/W/Pz7zp1WWSNn4A0eW4mLkQUewCrXXtMj31lCg+iQyTkh/CkusZSq9eDflu7tjEDXUY6g=="], - - "adm-zip": ["adm-zip@0.5.17", "", {}, "sha512-+Ut8d9LLqwEvHHJl1+PIHqoyDxFgVN847JTVM3Izi3xHDWPE4UtzzXysMZQs64DMcrJfBeS/uoEP4AD3HQHnQQ=="], - - "agent-base": ["agent-base@6.0.2", "", { "dependencies": { "debug": "4" } }, "sha512-RZNwNclF7+MS/8bDg70amg32dyeZGZxiDuQmZxKLAlQjr3jGyLx+4Kkk58UO7D2QdgFIQCovuSuZESne6RG6XQ=="], - - "avr-vad": ["avr-vad@1.0.10", "", { "dependencies": { "onnxruntime-node": "^1.22.0-rev" } }, "sha512-gM8SiQIebujfKMfy5w74tRPH+Fg78CMrBoDkMhCN3TmYVmmD8fmuVag7Q7ZCBITpFvYkOZnWEdGWuCb3YukBJw=="], - - "boolean": ["boolean@3.2.0", "", {}, "sha512-d0II/GO9uf9lfUHH2BQsjxzRJZBdsjgsBiW4BvhWk/3qoKwQFjIDVN19PfX8F2D/r9PCMTtLWjYVCFrpeYUzsw=="], - - "buffer-from": ["buffer-from@1.1.2", "", {}, "sha512-E+XQCRwSbaaiChtv6k6Dwgc+bx+Bs6vuKJHHl5kox/BaKbhiXzqQOwK4cO22yElGp2OCmjwVhT3HmxgyPGnJfQ=="], - - "caseless": ["caseless@0.12.0", "", {}, "sha512-4tYFyifaFfGacoiObjJegolkwSU4xQNGbVgUiNYVUxbQ2x2lUsFvY4hVgVzGiIe6WLOPqycWXA40l+PWsxthUw=="], - - "concat-stream": ["concat-stream@2.0.0", "", { "dependencies": { "buffer-from": "^1.0.0", "inherits": "^2.0.3", "readable-stream": "^3.0.2", "typedarray": "^0.0.6" } }, "sha512-MWufYdFw53ccGjCA+Ol7XJYpAlW6/prSMzuPOTRnJGcGzuhLn4Scrz7qf6o8bROZ514ltazcIFJZevcfbo0x7A=="], - - "debug": ["debug@4.4.3", "", { "dependencies": { "ms": "^2.1.3" } }, "sha512-RGwwWnwQvkVfavKVt22FGLw+xYSdzARwm0ru6DhTVA3umU5hZc28V3kO4stgYryrTlLpuvgI9GiijltAjNbcqA=="], - - "define-data-property": ["define-data-property@1.1.4", "", { "dependencies": { "es-define-property": "^1.0.0", "es-errors": "^1.3.0", "gopd": "^1.0.1" } }, "sha512-rBMvIzlpA8v6E+SJZoo++HAYqsLrkg7MSfIinMPFhmkorw7X+dOXVJQs+QT69zGkzMyfDnIMN2Wid1+NbL3T+A=="], - - "define-properties": ["define-properties@1.2.1", "", { "dependencies": { "define-data-property": "^1.0.1", "has-property-descriptors": "^1.0.0", "object-keys": "^1.1.1" } }, "sha512-8QmQKqEASLd5nx0U1B1okLElbUuuttJ/AnYmRXbbbGDWh6uS208EjD4Xqq/I9wK7u0v6O08XhTWnt5XtEbR6Dg=="], - - "detect-node": ["detect-node@2.1.0", "", {}, "sha512-T0NIuQpnTvFDATNuHN5roPwSBG83rFsuO+MXXH9/3N1eFbn4wcPjttvjMLEPWJ0RGUYgQE7cGgS3tNxbqCGM7g=="], - - "discord-api-types": ["discord-api-types@0.38.47", "", {}, "sha512-XgXQodHQBAE6kfD7kMvVo30863iHX1LHSqNq6MGUTDwIFCCvHva13+rwxyxVXDqudyApMNAd32PGjgVETi5rjA=="], - - "discord.js": ["discord.js@14.26.3", "", { "dependencies": { "@discordjs/builders": "^1.14.1", "@discordjs/collection": "1.5.3", "@discordjs/formatters": "^0.6.2", "@discordjs/rest": "^2.6.1", "@discordjs/util": "^1.2.0", "@discordjs/ws": "^1.2.3", "@sapphire/snowflake": "3.5.3", "discord-api-types": "^0.38.40", "fast-deep-equal": "3.1.3", "lodash.snakecase": "4.1.1", "magic-bytes.js": "^1.13.0", "tslib": "^2.6.3", "undici": "6.24.1" } }, "sha512-XEKtYn28YFsiJ5l4fLRyikdbo6RD5oFyqfVHQlvXz2104JhH/E8slN28dbky05w3DCrJcNVWvhVvcJCTSl/KIg=="], - - "dotenv": ["dotenv@17.4.2", "", {}, "sha512-nI4U3TottKAcAD9LLud4Cb7b2QztQMUEfHbvhTH09bqXTxnSie8WnjPALV/WMCrJZ6UV/qHJ6L03OqO3LcdYZw=="], - - "env-paths": ["env-paths@2.2.1", "", {}, "sha512-+h1lkLKhZMTYjog1VEpJNG7NZJWcuc2DDk/qsqSTRRCOXiLjeQ1d1/udrUGhqMxUgAlwKNZ0cf2uqan5GLuS2A=="], - - "es-define-property": ["es-define-property@1.0.1", "", {}, "sha512-e3nRfgfUZ4rNGL232gUgX06QNyyez04KdjFrF+LTRoOXmrOgFKDg4BCdsjW8EnT69eqdYGmRpJwiPVYNrCaW3g=="], - - "es-errors": ["es-errors@1.3.0", "", {}, "sha512-Zf5H2Kxt2xjTvbJvP2ZWLEICxA6j+hAmMzIlypy4xcBg1vKVnx89Wy0GbS+kf5cwCVFFzdCFh2XSCFNULS6csw=="], - - "es6-error": ["es6-error@4.1.1", "", {}, "sha512-Um/+FxMr9CISWh0bi5Zv0iOD+4cFh5qLeks1qhAopKVAJw3drgKbKySikp7wGhDL0HPeaja0P5ULZrxLkniUVg=="], - - "escape-string-regexp": ["escape-string-regexp@4.0.0", "", {}, "sha512-TtpcNJ3XAzx3Gq8sWRzJaVajRs0uVxA2YAkdb1jm2YkPz4G6egUFAyA3n5vtEIZefPk5Wa4UXbKuS5fKkJWdgA=="], - - "fast-deep-equal": ["fast-deep-equal@3.1.3", "", {}, "sha512-f3qQ9oQy9j2AhBe/H9VC91wLmKBCCU/gDOnKNAYG5hswO7BLKj09Hc5HYNz9cGI++xlpDCIgDaitVs03ATR84Q=="], - - "ffmpeg-static": ["ffmpeg-static@5.3.0", "", { "dependencies": { "@derhuerst/http-basic": "^8.2.0", "env-paths": "^2.2.0", "https-proxy-agent": "^5.0.0", "progress": "^2.0.3" } }, "sha512-H+K6sW6TiIX6VGend0KQwthe+kaceeH/luE8dIZyOP35ik7ahYojDuqlTV1bOrtEwl01sy2HFNGQfi5IDJvotg=="], - - "global-agent": ["global-agent@3.0.0", "", { "dependencies": { "boolean": "^3.0.1", "es6-error": "^4.1.1", "matcher": "^3.0.0", "roarr": "^2.15.3", "semver": "^7.3.2", "serialize-error": "^7.0.1" } }, "sha512-PT6XReJ+D07JvGoxQMkT6qji/jVNfX/h364XHZOWeRzy64sSFr+xJ5OX7LI3b4MPQzdL4H8Y8M0xzPpsVMwA8Q=="], - - "globalthis": ["globalthis@1.0.4", "", { "dependencies": { "define-properties": "^1.2.1", "gopd": "^1.0.1" } }, "sha512-DpLKbNU4WylpxJykQujfCcwYWiV/Jhm50Goo0wrVILAv5jOr9d+H+UR3PhSCD2rCCEIg0uc+G+muBTwD54JhDQ=="], - - "gopd": ["gopd@1.2.0", "", {}, "sha512-ZUKRh6/kUFoAiTAtTYPZJ3hw9wNxx+BIBOijnlG9PnrJsCcSjs1wyyD6vJpaYtgnzDrKYRSqf3OO6Rfa93xsRg=="], - - "has-property-descriptors": ["has-property-descriptors@1.0.2", "", { "dependencies": { "es-define-property": "^1.0.0" } }, "sha512-55JNKuIW+vq4Ke1BjOTjM2YctQIvCT7GFzHwmfZPGo5wnrgkid0YQtnAleFSqumZm4az3n2BS+erby5ipJdgrg=="], - - "http-response-object": ["http-response-object@3.0.2", "", { "dependencies": { "@types/node": "^10.0.3" } }, "sha512-bqX0XTF6fnXSQcEJ2Iuyr75yVakyjIDCqroJQ/aHfSdlM743Cwqoi2nDYMzLGWUcuTWGWy8AAvOKXTfiv6q9RA=="], - - "https-proxy-agent": ["https-proxy-agent@5.0.1", "", { "dependencies": { "agent-base": "6", "debug": "4" } }, "sha512-dFcAjpTQFgoLMzC2VwU+C/CbS7uRL0lWmxDITmqm7C+7F0Odmj6s9l6alZc6AELXhrnggM2CeWSXHGOdX2YtwA=="], - - "inherits": ["inherits@2.0.4", "", {}, "sha512-k/vGaX4/Yla3WzyMCvTQOXYeIHvqOKtnqBduzTHpzpQZzAskKMhZ2K+EnBiSM9zGSoIFeMpXKxa4dYeZIQqewQ=="], - - "json-stringify-safe": ["json-stringify-safe@5.0.1", "", {}, "sha512-ZClg6AaYvamvYEE82d3Iyd3vSSIjQ+odgjaTzRuO3s7toCdFKczob2i0zCh7JE8kWn17yvAWhUVxvqGwUalsRA=="], - - "lodash": ["lodash@4.18.1", "", {}, "sha512-dMInicTPVE8d1e5otfwmmjlxkZoUpiVLwyeTdUsi/Caj/gfzzblBcCE5sRHV/AsjuCmxWrte2TNGSYuCeCq+0Q=="], - - "lodash.snakecase": ["lodash.snakecase@4.1.1", "", {}, "sha512-QZ1d4xoBHYUeuouhEq3lk3Uq7ldgyFXGBhg04+oRLnIz8o9T65Eh+8YdroUwn846zchkA9yDsDl5CVVaV2nqYw=="], - - "magic-bytes.js": ["magic-bytes.js@1.13.0", "", {}, "sha512-afO2mnxW7GDTXMm5/AoN1WuOcdoKhtgXjIvHmobqTD1grNplhGdv3PFOyjCVmrnOZBIT/gD/koDKpYG+0mvHcg=="], - - "matcher": ["matcher@3.0.0", "", { "dependencies": { "escape-string-regexp": "^4.0.0" } }, "sha512-OkeDaAZ/bQCxeFAozM55PKcKU0yJMPGifLwV4Qgjitu+5MoAfSQN4lsLJeXZ1b8w0x+/Emda6MZgXS1jvsapng=="], - - "ms": ["ms@2.1.3", "", {}, "sha512-6FlzubTLZG3J2a/NVCAleEhjzq5oxgHyaCU9yYXvcLsvoVaHJq/s5xXI6/XXP6tz7R9xAOtHnSO/tXtF3WRTlA=="], - - "object-keys": ["object-keys@1.1.1", "", {}, "sha512-NuAESUOUMrlIXOfHKzD6bpPu3tYt3xvjNdRIQ+FeT0lNb4K8WR70CaDxhuNguS2XG+GjkyMwOzsN5ZktImfhLA=="], - - "onnxruntime-common": ["onnxruntime-common@1.24.3", "", {}, "sha512-GeuPZO6U/LBJXvwdaqHbuUmoXiEdeCjWi/EG7Y1HNnDwJYuk6WUbNXpF6luSUY8yASul3cmUlLGrCCL1ZgVXqA=="], - - "onnxruntime-node": ["onnxruntime-node@1.24.3", "", { "dependencies": { "adm-zip": "^0.5.16", "global-agent": "^3.0.0", "onnxruntime-common": "1.24.3" }, "os": [ "linux", "win32", "darwin", ] }, "sha512-JH7+czbc8ALA819vlTgcV+Q214/+VjGeBHDjX81+ZCD0PCVCIFGFNtT0V4sXG/1JXypKPgScQcB3ij/hk3YnTg=="], - - "opusscript": ["opusscript@0.1.1", "", {}, "sha512-mL0fZZOUnXdZ78woRXp18lApwpp0lF5tozJOD1Wut0dgrA9WuQTgSels/CSmFleaAZrJi/nci5KOVtbuxeWoQA=="], - - "parse-cache-control": ["parse-cache-control@1.0.1", "", {}, "sha512-60zvsJReQPX5/QP0Kzfd/VrpjScIQ7SHBW6bFCYfEP+fp0Eppr1SHhIO5nd1PjZtvclzSzES9D/p5nFJurwfWg=="], - - "prism-media": ["prism-media@1.3.5", "", { "peerDependencies": { "@discordjs/opus": ">=0.8.0 <1.0.0", "ffmpeg-static": "^5.0.2 || ^4.2.7 || ^3.0.0 || ^2.4.0", "node-opus": "^0.3.3", "opusscript": "^0.0.8" }, "optionalPeers": ["@discordjs/opus", "ffmpeg-static", "node-opus", "opusscript"] }, "sha512-IQdl0Q01m4LrkN1EGIE9lphov5Hy7WWlH6ulf5QdGePLlPas9p2mhgddTEHrlaXYjjFToM1/rWuwF37VF4taaA=="], - - "progress": ["progress@2.0.3", "", {}, "sha512-7PiHtLll5LdnKIMw100I+8xJXR5gW2QwWYkT6iJva0bXitZKa/XMrSbdmg3r2Xnaidz9Qumd0VPaMrZlF9V9sA=="], - - "readable-stream": ["readable-stream@3.6.2", "", { "dependencies": { "inherits": "^2.0.3", "string_decoder": "^1.1.1", "util-deprecate": "^1.0.1" } }, "sha512-9u/sniCrY3D5WdsERHzHE4G2YCXqoG5FTHUiCC4SIbr6XcLZBY05ya9EKjYek9O5xOAwjGq+1JdGBAS7Q9ScoA=="], - - "roarr": ["roarr@2.15.4", "", { "dependencies": { "boolean": "^3.0.1", "detect-node": "^2.0.4", "globalthis": "^1.0.1", "json-stringify-safe": "^5.0.1", "semver-compare": "^1.0.0", "sprintf-js": "^1.1.2" } }, "sha512-CHhPh+UNHD2GTXNYhPWLnU8ONHdI+5DI+4EYIAOaiD63rHeYlZvyh8P+in5999TTSFgUYuKUAjzRI4mdh/p+2A=="], - - "safe-buffer": ["safe-buffer@5.2.1", "", {}, "sha512-rp3So07KcdmmKbGvgaNxQSJr7bGVSVk5S9Eq1F+ppbRo70+YeaDxkw5Dd8NPN+GD6bjnYm2VuPuCXmpuYvmCXQ=="], - - "semver": ["semver@7.7.4", "", { "bin": { "semver": "bin/semver.js" } }, "sha512-vFKC2IEtQnVhpT78h1Yp8wzwrf8CM+MzKMHGJZfBtzhZNycRFnXsHk6E5TxIkkMsgNS7mdX3AGB7x2QM2di4lA=="], - - "semver-compare": ["semver-compare@1.0.0", "", {}, "sha512-YM3/ITh2MJ5MtzaM429anh+x2jiLVjqILF4m4oyQB18W7Ggea7BfqdH/wGMK7dDiMghv/6WG7znWMwUDzJiXow=="], - - "serialize-error": ["serialize-error@7.0.1", "", { "dependencies": { "type-fest": "^0.13.1" } }, "sha512-8I8TjW5KMOKsZQTvoxjuSIa7foAwPWGOts+6o7sgjz41/qMD9VQHEDxi6PBvK2l0MXUmqZyNpUK+T2tQaaElvw=="], - - "sprintf-js": ["sprintf-js@1.1.3", "", {}, "sha512-Oo+0REFV59/rz3gfJNKQiBlwfHaSESl1pcGyABQsnnIfWOFt6JNj5gCog2U6MLZ//IGYD+nA8nI+mTShREReaA=="], - - "string_decoder": ["string_decoder@1.3.0", "", { "dependencies": { "safe-buffer": "~5.2.0" } }, "sha512-hkRX8U1WjJFd8LsDJ2yQ/wWWxaopEsABU1XfkM8A+j0+85JAGppt16cr1Whg6KIbb4okU6Mql6BOj+uup/wKeA=="], - - "ts-mixer": ["ts-mixer@6.0.4", "", {}, "sha512-ufKpbmrugz5Aou4wcr5Wc1UUFWOLhq+Fm6qa6P0w0K5Qw2yhaUoiWszhCVuNQyNwrlGiscHOmqYoAox1PtvgjA=="], - - "tslib": ["tslib@2.8.1", "", {}, "sha512-oJFu94HQb+KVduSUQL7wnpmqnfmLsOA/nAh6b6EH0wCEoK0/mPeXU6c3wKDV83MkOuHPRHtSXKKU99IBazS/2w=="], - - "type-fest": ["type-fest@0.13.1", "", {}, "sha512-34R7HTnG0XIJcBSn5XhDd7nNFPRcXYRZrBB2O2jdKqYODldSzBAqzsWoZYYvduky73toYS/ESqxPvkDf/F0XMg=="], - - "typedarray": ["typedarray@0.0.6", "", {}, "sha512-/aCDEGatGvZ2BIk+HmLf4ifCJFwvKFNb9/JeZPMulfgFracn9QFcAf5GO8B/mweUjSoblS5In0cWhqpfs/5PQA=="], - - "typescript": ["typescript@6.0.3", "", { "bin": { "tsc": "bin/tsc", "tsserver": "bin/tsserver" } }, "sha512-y2TvuxSZPDyQakkFRPZHKFm+KKVqIisdg9/CZwm9ftvKXLP8NRWj38/ODjNbr43SsoXqNuAisEf1GdCxqWcdBw=="], - - "undici": ["undici@6.24.1", "", {}, "sha512-sC+b0tB1whOCzbtlx20fx3WgCXwkW627p4EA9uM+/tNNPkSS+eSEld6pAs9nDv7WbY1UUljBMYPtu9BCOrCWKA=="], - - "undici-types": ["undici-types@7.19.2", "", {}, "sha512-qYVnV5OEm2AW8cJMCpdV20CDyaN3g0AjDlOGf1OW4iaDEx8MwdtChUp4zu4H0VP3nDRF/8RKWH+IPp9uW0YGZg=="], - - "util-deprecate": ["util-deprecate@1.0.2", "", {}, "sha512-EPD5q1uXyFxJpCrLnCc1nHnq3gOa6DZBocAIiI2TaSCA7VCJ1UJDMagCzIkXNsUYfD1daK//LTEQ8xiIbrHtcw=="], - - "ws": ["ws@8.20.0", "", { "peerDependencies": { "bufferutil": "^4.0.1", "utf-8-validate": ">=5.0.2" }, "optionalPeers": ["bufferutil", "utf-8-validate"] }, "sha512-sAt8BhgNbzCtgGbt2OxmpuryO63ZoDk/sqaB/znQm94T4fCEsy/yV+7CdC1kJhOU9lboAEU7R3kquuycDoibVA=="], - - "zod": ["zod@4.3.6", "", {}, "sha512-rftlrkhHZOcjDwkGlnUtZZkvaPHCsDATp4pGpuOOMDaTdDDXF91wuVDJoWoPsKX/3YPQ5fHuF3STjcYyKr+Qhg=="], - - "@discordjs/rest/@discordjs/collection": ["@discordjs/collection@2.1.1", "", {}, "sha512-LiSusze9Tc7qF03sLCujF5iZp7K+vRNEDBZ86FT9aQAv3vxMLihUvKvpsCWiQ2DJq1tVckopKm1rxomgNUc9hg=="], - - "@discordjs/rest/@sapphire/snowflake": ["@sapphire/snowflake@3.5.5", "", {}, "sha512-xzvBr1Q1c4lCe7i6sRnrofxeO1QTP/LKQ6A6qy0iB4x5yfiSfARMEQEghojzTNALDTcv8En04qYNIco9/K9eZQ=="], - - "@discordjs/ws/@discordjs/collection": ["@discordjs/collection@2.1.1", "", {}, "sha512-LiSusze9Tc7qF03sLCujF5iZp7K+vRNEDBZ86FT9aQAv3vxMLihUvKvpsCWiQ2DJq1tVckopKm1rxomgNUc9hg=="], - - "http-response-object/@types/node": ["@types/node@10.17.60", "", {}, "sha512-F0KIgDJfy2nA3zMLmWGKxcH2ZVEtCZXHHdOQs2gSaQ27+lNeEfGxzkIw90aXswATX7AZ33tahPbzy6KAfUreVw=="], - } -} diff --git a/package.json b/package.json deleted file mode 100644 index df7d284..0000000 --- a/package.json +++ /dev/null @@ -1,42 +0,0 @@ -{ - "name": "realtime_voice_bot", - "version": "0.1.0", - "private": true, - "type": "module", - "scripts": { - "dev": "bun --watch src/index.ts", - "start": "bun src/index.ts discord", - "start:discord": "bun src/index.ts discord", - "start:local": "bun src/index.ts local", - "tts:test": "bun src/index.ts local-say", - "tts:dump": "bun src/index.ts local-say-dump", - "tts:voices": "bun src/index.ts local-tts-voices", - "setup:local-ai": "bun src/setup-local-ai.ts", - "devices": "bun src/index.ts local-devices", - "audio:devices": "bun src/index.ts local-devices", - "check": "tsc --noEmit", - "build": "tsc -p tsconfig.json" - }, - "engines": { - "bun": ">=1.3.0", - "node": ">=22.12.0" - }, - "dependencies": { - "@discordjs/voice": "^0.19.2", - "avr-vad": "^1.0.10", - "discord.js": "^14.26.3", - "dotenv": "^17.4.2", - "ffmpeg-static": "^5.3.0", - "opusscript": "^0.1.1", - "prism-media": "^1.3.5", - "zod": "^4.3.6" - }, - "devDependencies": { - "@types/node": "^25.6.0", - "typescript": "^6.0.3" - }, - "trustedDependencies": [ - "onnxruntime-node", - "ffmpeg-static" - ] -} diff --git a/python/local_stt_worker.py b/python/local_stt_worker.py deleted file mode 100644 index 1d55ac7..0000000 --- a/python/local_stt_worker.py +++ /dev/null @@ -1,237 +0,0 @@ -import base64 -import glob -import json -import os -from pathlib import Path -import site -import sys -import sysconfig -import tempfile -import traceback -import wave - - -os.environ.setdefault("PYTHONIOENCODING", "utf-8") - - -def log(message: str) -> None: - print(message, file=sys.stderr, flush=True) - - -def write_response(request_id: int, ok: bool, result=None, error: str | None = None) -> None: - payload = { - "id": request_id, - "ok": ok, - } - if ok: - payload["result"] = result - else: - payload["error"] = error or "unknown error" - - sys.stdout.write(json.dumps(payload, ensure_ascii=False) + "\n") - sys.stdout.flush() - - -def resolve_device() -> str: - raw = os.environ.get("LOCAL_STT_DEVICE", "auto").strip().lower() - if raw and raw != "auto": - return raw - - try: - import ctranslate2 - - if ctranslate2.get_cuda_device_count() > 0: - return "cuda" - except Exception: - pass - - return "cpu" - - -def configure_windows_cuda_runtime() -> None: - if os.name != "nt": - return - - candidate_dirs: list[str] = [] - - for key in ("CUDA_PATH", "CUDA_HOME"): - value = os.environ.get(key) - if value: - candidate_dirs.append(os.path.join(value, "bin")) - - for key, value in os.environ.items(): - if key.startswith("CUDA_PATH_V") and value: - candidate_dirs.append(os.path.join(value, "bin")) - - candidate_dirs.extend( - sorted(glob.glob(r"C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v*\bin"), reverse=True) - ) - - site_roots: list[str] = [] - try: - site_roots.extend(site.getsitepackages()) - except Exception: - pass - - try: - site_roots.append(site.getusersitepackages()) - except Exception: - pass - - for key in ("purelib", "platlib"): - value = sysconfig.get_paths().get(key) - if value: - site_roots.append(value) - - for root in site_roots: - nvidia_root = Path(root) / "nvidia" - if not nvidia_root.is_dir(): - continue - - for pattern in ("**/cublas64_12.dll", "**/cudnn*.dll", "**/cudart64*.dll"): - for dll_path in nvidia_root.glob(pattern): - candidate_dirs.append(str(dll_path.parent)) - - unique_dirs: list[str] = [] - for candidate in candidate_dirs: - normalized = os.path.normpath(candidate) - if not os.path.isdir(normalized): - continue - if normalized in unique_dirs: - continue - unique_dirs.append(normalized) - - for directory in unique_dirs: - try: - os.add_dll_directory(directory) - except (AttributeError, FileNotFoundError, OSError): - pass - - if unique_dirs: - existing_path = os.environ.get("PATH", "") - os.environ["PATH"] = os.pathsep.join(unique_dirs + [existing_path]) - log(f"configured CUDA DLL search paths: {', '.join(unique_dirs)}") - - -def resolve_compute_type(device: str) -> str: - raw = os.environ.get("LOCAL_STT_COMPUTE_TYPE", "auto").strip().lower() - if raw and raw != "auto": - return raw - if device == "cuda": - return "int8_float16" - return "int8" - - -class SttWorker: - def __init__(self) -> None: - configure_windows_cuda_runtime() - from faster_whisper import WhisperModel - - self.model_name = os.environ.get("LOCAL_STT_MODEL", "tiny").strip() or "tiny" - requested_device = resolve_device() - requested_compute_type = resolve_compute_type(requested_device) - self.beam_size = int(os.environ.get("LOCAL_STT_BEAM_SIZE", "1")) - auto_requested = os.environ.get("LOCAL_STT_DEVICE", "auto").strip().lower() in {"", "auto"} - - try: - self.model = WhisperModel( - self.model_name, - device=requested_device, - compute_type=requested_compute_type, - ) - self.device = requested_device - self.compute_type = requested_compute_type - except RuntimeError as exc: - lowered = str(exc).lower() - should_fallback = auto_requested and requested_device == "cuda" and any( - token in lowered for token in ("cublas", "cudnn", "cuda") - ) - if not should_fallback: - raise - - log("CUDA runtime is incomplete; falling back to CPU STT") - self.model = WhisperModel( - self.model_name, - device="cpu", - compute_type=resolve_compute_type("cpu"), - ) - self.device = "cpu" - self.compute_type = resolve_compute_type("cpu") - - log( - f"local-stt ready model={self.model_name} device={self.device} compute={self.compute_type} beam={self.beam_size}" - ) - - def transcribe(self, audio_base64: str, language: str | None) -> str: - pcm_bytes = base64.b64decode(audio_base64) - temp_path = "" - - try: - with tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as handle: - temp_path = handle.name - - with wave.open(temp_path, "wb") as wav_file: - wav_file.setnchannels(1) - wav_file.setsampwidth(2) - wav_file.setframerate(16000) - wav_file.writeframes(pcm_bytes) - - segments, _info = self.model.transcribe( - temp_path, - language=language, - beam_size=self.beam_size, - best_of=1, - condition_on_previous_text=False, - vad_filter=False, - without_timestamps=True, - temperature=0.0, - ) - return " ".join(segment.text.strip() for segment in segments if segment.text.strip()).strip() - finally: - if temp_path: - try: - os.unlink(temp_path) - except OSError: - pass - - -def main() -> int: - try: - worker = SttWorker() - except Exception as exc: - log("failed to initialize local STT worker") - log("run `bun run setup:local-ai` first if dependencies are missing") - log("".join(traceback.format_exception(exc))) - return 1 - - for line in sys.stdin: - line = line.strip() - if not line: - continue - - try: - request = json.loads(line) - request_id = int(request["id"]) - method = request["method"] - params = request.get("params", {}) - - if method == "ping": - write_response(request_id, True, {"ready": True}) - continue - if method != "transcribe": - raise ValueError(f"unsupported method: {method}") - - text = worker.transcribe( - audio_base64=str(params.get("audio_base64", "")), - language=str(params.get("language") or "").strip() or None, - ) - write_response(request_id, True, {"text": text}) - except Exception as exc: - error_text = "".join(traceback.format_exception_only(type(exc), exc)).strip() - write_response(request_id, False, error=error_text) - - return 0 - - -if __name__ == "__main__": - raise SystemExit(main()) diff --git a/python/local_tts_worker.py b/python/local_tts_worker.py deleted file mode 100644 index 94d0179..0000000 --- a/python/local_tts_worker.py +++ /dev/null @@ -1,136 +0,0 @@ -import base64 -import io -import json -import os -import sys -import traceback -import wave - -import numpy as np - - -os.environ.setdefault("PYTHONIOENCODING", "utf-8") - - -def log(message: str) -> None: - print(message, file=sys.stderr, flush=True) - - -def write_response(request_id: int, ok: bool, result=None, error: str | None = None) -> None: - payload = { - "id": request_id, - "ok": ok, - } - if ok: - payload["result"] = result - else: - payload["error"] = error or "unknown error" - - sys.stdout.write(json.dumps(payload, ensure_ascii=False) + "\n") - sys.stdout.flush() - - -def normalize_lang(raw: str) -> str: - lowered = raw.strip().lower() - if lowered in {"kr", "ko-kr"}: - return "ko" - return lowered or "ko" - - -def normalize_voice(raw: str) -> str: - value = raw.strip() - if value.upper() in {"KR", "KO"} or not value: - return "af_heart" - return value - - -class TtsWorker: - def __init__(self) -> None: - from kokoro_onnx import Kokoro - from misaki import ko - - self.model_path = os.environ["LOCAL_TTS_MODEL_PATH"] - self.voices_path = os.environ["LOCAL_TTS_VOICES_PATH"] - self.language = normalize_lang(os.environ.get("LOCAL_TTS_LANGUAGE", "ko")) - self.voice = normalize_voice(os.environ.get("LOCAL_TTS_SPEAKER", "af_heart")) - self.speed = float(os.environ.get("LOCAL_TTS_SPEED", "1.12")) - self.g2p = ko.KOG2P() - self.model = Kokoro(self.model_path, self.voices_path) - - log( - f"local-tts ready model={os.path.basename(self.model_path)} voice={self.voice} language={self.language} speed={self.speed}" - ) - - def synthesize(self, text: str) -> bytes: - phonemes, _tokens = self.g2p(text) - samples, sample_rate = self.model.create( - phonemes, - voice=self.voice, - speed=self.speed, - lang="en-us", - is_phonemes=True, - ) - return build_wav_bytes(samples, sample_rate) - - -def build_wav_bytes(samples: np.ndarray, sample_rate: int) -> bytes: - clipped = np.clip(samples, -1.0, 1.0) - pcm = (clipped * 32767.0).astype(np.int16) - buffer = io.BytesIO() - - with wave.open(buffer, "wb") as wav_file: - wav_file.setnchannels(1) - wav_file.setsampwidth(2) - wav_file.setframerate(sample_rate) - wav_file.writeframes(pcm.tobytes()) - - return buffer.getvalue() - - -def main() -> int: - try: - worker = TtsWorker() - except Exception as exc: - log("failed to initialize local TTS worker") - log("run `bun run setup:local-ai` first if dependencies are missing") - log("".join(traceback.format_exception(exc))) - return 1 - - for line in sys.stdin: - line = line.strip() - if not line: - continue - - try: - request = json.loads(line) - request_id = int(request["id"]) - method = request["method"] - params = request.get("params", {}) - - if method == "ping": - write_response(request_id, True, {"ready": True}) - continue - if method != "synthesize": - raise ValueError(f"unsupported method: {method}") - - text = str(params.get("text", "")).strip() - if not text: - raise ValueError("text is empty") - - audio = worker.synthesize(text) - write_response( - request_id, - True, - { - "wav_base64": base64.b64encode(audio).decode("ascii"), - }, - ) - except Exception as exc: - error_text = "".join(traceback.format_exception_only(type(exc), exc)).strip() - write_response(request_id, False, error=error_text) - - return 0 - - -if __name__ == "__main__": - raise SystemExit(main()) diff --git a/python/requirements-windows.txt b/python/requirements-windows.txt deleted file mode 100644 index a61b145..0000000 --- a/python/requirements-windows.txt +++ /dev/null @@ -1,3 +0,0 @@ -faster-whisper==1.2.1 -nvidia-cublas-cu12 -nvidia-cudnn-cu12>=9,<10 diff --git a/python/requirements.txt b/python/requirements.txt deleted file mode 100644 index a24203c..0000000 --- a/python/requirements.txt +++ /dev/null @@ -1,3 +0,0 @@ -faster-whisper==1.2.1 -kokoro-onnx==0.5.0 -misaki[ko]==0.7.4 diff --git a/src/audio/ffmpeg-path.ts b/src/audio/ffmpeg-path.ts deleted file mode 100644 index 1480d34..0000000 --- a/src/audio/ffmpeg-path.ts +++ /dev/null @@ -1,59 +0,0 @@ -import { existsSync } from "node:fs"; -import { spawnSync } from "node:child_process"; -import process from "node:process"; - -import ffmpegStatic from "ffmpeg-static"; - -function firstExisting(paths: Array): string | null { - for (const candidate of paths) { - if (candidate && existsSync(candidate)) { - return candidate; - } - } - return null; -} - -function findOnPath(): string | null { - const locator = process.platform === "win32" ? "where" : "which"; - const binaryName = process.platform === "win32" ? "ffmpeg.exe" : "ffmpeg"; - const result = spawnSync(locator, [binaryName], { - encoding: "utf8", - }); - - if (result.status !== 0) { - return null; - } - - const match = result.stdout - .split(/\r?\n/) - .map((line) => line.trim()) - .find((line) => line.length > 0 && existsSync(line)); - - return match ?? null; -} - -export function resolveFfmpegPath(): string | null { - const staticPath = ffmpegStatic as unknown as string | null; - return firstExisting([ - process.env.FFMPEG_PATH, - process.env.FFMPEG_BIN, - staticPath, - findOnPath(), - ]); -} - -export function requireFfmpegPath(): string { - const resolved = resolveFfmpegPath(); - if (resolved) { - return resolved; - } - - throw new Error( - [ - "ffmpeg를 찾지 못했습니다.", - "1. `bun install` 재실행", - "2. 안 되면 `bun pm trust ffmpeg-static` 후 다시 `bun install`", - "3. 또는 시스템 ffmpeg를 설치해서 PATH에 추가", - ].join("\n"), - ); -} diff --git a/src/audio/guild-voice-session.ts b/src/audio/guild-voice-session.ts deleted file mode 100644 index 30517d3..0000000 --- a/src/audio/guild-voice-session.ts +++ /dev/null @@ -1,456 +0,0 @@ -import { EventEmitter } from "node:events"; - -import prism from "prism-media"; -import { RealTimeVAD } from "avr-vad"; -import { - AudioPlayerStatus, - EndBehaviorType, - NoSubscriberBehavior, - VoiceConnectionStatus, - createAudioPlayer, - createAudioResource, - entersState, - joinVoiceChannel, - StreamType, - type AudioPlayer, - type AudioReceiveStream, - type VoiceConnection, -} from "@discordjs/voice"; -import type { Client, Guild, VoiceBasedChannel } from "discord.js"; - -import type { AppConfig } from "../config.js"; -import { Logger } from "../logger.js"; -import { float32ToPcm16Buffer, int16ArrayToFloat32, Stereo48kToMono16kDownsampler, takeFrame } from "./pcm.js"; -import { ConversationMemory, type UserUtterance } from "../services/conversation.js"; -import type { LlmService } from "../services/llm.js"; -import type { SttService } from "../services/stt.js"; -import type { PreparedSpeechAudio, TtsService } from "../services/tts.js"; - -interface GuildVoiceSessionOptions { - client: Client; - config: AppConfig; - logger: Logger; - guild: Guild; - voiceChannel: VoiceBasedChannel; - textChannelId?: string; - stt: SttService; - tts: TtsService; - llm: LlmService; -} - -interface SpeechJob { - text: string; - source: "assistant" | "manual"; -} - -class UserAudioSession { - private readonly downsampler = new Stereo48kToMono16kDownsampler(); - private readonly pendingSamples: number[] = []; - private readonly vad: RealTimeVAD; - private processing = Promise.resolve(); - - private constructor( - private readonly logger: Logger, - private readonly speakerId: string, - private readonly speakerName: string, - private readonly receiveStream: AudioReceiveStream, - private readonly decoder: NodeJS.ReadWriteStream & { destroy: () => void }, - vad: RealTimeVAD, - private readonly onSpeechEnd: (utterance: UserUtterance, audio: Float32Array) => void, - ) { - this.vad = vad; - } - - static async create(options: { - logger: Logger; - speakerId: string; - speakerName: string; - receiveStream: AudioReceiveStream; - decoder: NodeJS.ReadWriteStream & { destroy: () => void }; - onSpeechStart: () => void; - onSpeechEnd: (utterance: UserUtterance, audio: Float32Array) => void; - }): Promise { - const vadInstance = await RealTimeVAD.new({ - model: "v5", - sampleRate: 16000, - frameSamples: 1536, - positiveSpeechThreshold: 0.55, - negativeSpeechThreshold: 0.35, - redemptionFrames: 8, - preSpeechPadFrames: 2, - minSpeechFrames: 3, - onFrameProcessed: () => undefined, - onVADMisfire: () => undefined, - onSpeechStart: () => { - options.onSpeechStart(); - }, - onSpeechRealStart: () => undefined, - onSpeechEnd: (audio: Float32Array) => { - options.onSpeechEnd( - { - speakerId: options.speakerId, - speakerName: options.speakerName, - text: "", - }, - audio, - ); - }, - }); - - const session = new UserAudioSession( - options.logger, - options.speakerId, - options.speakerName, - options.receiveStream, - options.decoder, - vadInstance, - options.onSpeechEnd, - ); - - session.decoder.on("data", (chunk: Buffer) => { - session.pushPcmChunk(chunk); - }); - - session.decoder.on("error", (error) => { - options.logger.warn("PCM decoder error", options.speakerId, error); - }); - - session.receiveStream.on("error", (error) => { - options.logger.warn("Audio receive stream error", options.speakerId, error); - }); - - return session; - } - - private pushPcmChunk(chunk: Buffer): void { - const mono16k = this.downsampler.pushStereo48kChunk(chunk); - if (mono16k.length === 0) { - return; - } - - for (const sample of mono16k) { - this.pendingSamples.push(sample); - } - - while (true) { - const frame = takeFrame(this.pendingSamples, 1536); - if (!frame) { - return; - } - - const floatFrame = int16ArrayToFloat32(frame); - this.processing = this.processing - .then(() => this.vad.processAudio(floatFrame)) - .catch((error) => { - this.logger.warn("VAD frame processing failed", this.speakerId, this.speakerName, error); - }); - } - } - - destroy(): void { - this.receiveStream.destroy(); - this.decoder.destroy(); - void this.vad.destroy().catch((error) => { - this.logger.warn("VAD destroy failed", this.speakerId, this.speakerName, error); - }); - } -} - -export class GuildVoiceSession extends EventEmitter { - readonly guildId: string; - readonly voiceChannelId: string; - - private readonly connection: VoiceConnection; - private readonly player: AudioPlayer; - private readonly memory: ConversationMemory; - private readonly trackedUsers = new Map(); - private readonly pendingUsers = new Map>(); - private readonly queue: SpeechJob[] = []; - - private draining = false; - private currentAbortController: AbortController | null = null; - private currentPlayback: PreparedSpeechAudio | null = null; - private textChannelId?: string; - - private constructor(private readonly options: GuildVoiceSessionOptions) { - super(); - - this.guildId = options.guild.id; - this.voiceChannelId = options.voiceChannel.id; - this.textChannelId = options.textChannelId; - this.memory = new ConversationMemory(options.config.MAX_CONVERSATION_TURNS); - this.player = createAudioPlayer({ - behaviors: { - noSubscriber: NoSubscriberBehavior.Pause, - }, - }); - this.connection = joinVoiceChannel({ - guildId: options.guild.id, - channelId: options.voiceChannel.id, - adapterCreator: options.guild.voiceAdapterCreator, - selfDeaf: false, - selfMute: false, - }); - } - - static async create(options: GuildVoiceSessionOptions): Promise { - const session = new GuildVoiceSession(options); - await session.initialize(); - return session; - } - - private async initialize(): Promise { - this.player.on("error", (error) => { - this.options.logger.warn("Audio player error", this.guildId, error); - }); - - this.connection.on("stateChange", (_oldState, newState) => { - if (newState.status === VoiceConnectionStatus.Destroyed) { - this.options.logger.info("Voice connection destroyed", this.guildId); - } - }); - - this.connection.subscribe(this.player); - await entersState(this.connection, VoiceConnectionStatus.Ready, 30_000); - - this.connection.receiver.speaking.on("start", (userId: string) => { - if (userId === this.options.client.user?.id) { - return; - } - - void this.ensureTrackedUser(userId); - }); - } - - setTextChannel(textChannelId?: string): void { - this.textChannelId = textChannelId; - } - - clearConversation(): void { - this.memory.clear(); - this.interruptPlayback("conversation-reset"); - } - - statusSummary(): string { - const playbackState = this.player.state.status; - return [ - `세션 활성: 예`, - `음성 채널: ${this.options.voiceChannel.name}`, - `추적 유저 수: ${this.trackedUsers.size}`, - `재생 상태: ${playbackState}`, - `대기열: ${this.queue.length}`, - `최근 대화 턴: ${this.memory.recentTurns().length}`, - ].join("\n"); - } - - async speakText(text: string): Promise { - this.queue.push({ - text, - source: "manual", - }); - await this.drainQueue(); - } - - interruptPlayback(reason: string): void { - if (this.queue.length > 0 || this.player.state.status !== AudioPlayerStatus.Idle) { - this.options.logger.info("Interrupting playback", this.guildId, reason); - } - - this.queue.splice(0, this.queue.length); - this.currentAbortController?.abort(); - this.currentAbortController = null; - this.currentPlayback?.dispose(); - this.currentPlayback = null; - this.player.stop(true); - } - - async destroy(): Promise { - this.interruptPlayback("session-destroy"); - for (const session of this.trackedUsers.values()) { - session.destroy(); - } - this.trackedUsers.clear(); - this.pendingUsers.clear(); - this.connection.destroy(); - } - - private async ensureTrackedUser(userId: string): Promise { - if (this.trackedUsers.has(userId)) { - return; - } - - const existing = this.pendingUsers.get(userId); - if (existing) { - await existing; - return; - } - - const pending = this.createTrackedUser(userId).finally(() => { - this.pendingUsers.delete(userId); - }); - this.pendingUsers.set(userId, pending); - await pending; - } - - private async createTrackedUser(userId: string): Promise { - const speakerName = await this.resolveSpeakerName(userId); - const receiveStream = this.connection.receiver.subscribe(userId, { - end: { - behavior: EndBehaviorType.Manual, - }, - }); - - const decoder = new prism.opus.Decoder({ - rate: 48000, - channels: 2, - frameSize: 960, - }) as NodeJS.ReadWriteStream & { destroy: () => void }; - - receiveStream.pipe(decoder); - - const session = await UserAudioSession.create({ - logger: this.options.logger, - speakerId: userId, - speakerName, - receiveStream, - decoder, - onSpeechStart: () => { - this.interruptPlayback(`barge-in:${speakerName}`); - }, - onSpeechEnd: (utterance, audio) => { - void this.handleSpeechEnd(utterance, audio); - }, - }); - - this.trackedUsers.set(userId, session); - this.options.logger.info("Tracking speaker", this.guildId, userId, speakerName); - } - - private async resolveSpeakerName(userId: string): Promise { - try { - const user = await this.options.client.users.fetch(userId); - return user.globalName ?? user.username; - } catch { - return `user-${userId.slice(-6)}`; - } - } - - private async handleSpeechEnd(utterance: UserUtterance, audio: Float32Array): Promise { - if (audio.length < 16000 * 0.25) { - return; - } - - const pcmBuffer = float32ToPcm16Buffer(audio); - let transcript: string | null = null; - - try { - transcript = await this.options.stt.transcribePcm16(pcmBuffer); - } catch (error) { - this.options.logger.warn("STT failed", this.guildId, utterance.speakerId, error); - await this.announce(`음성 인식 실패: ${utterance.speakerName}`); - return; - } - - if (!transcript || transcript.trim().length === 0) { - return; - } - - const hydratedUtterance: UserUtterance = { - ...utterance, - text: transcript.trim(), - }; - - this.options.logger.info("Transcript committed", this.guildId, hydratedUtterance.speakerName, hydratedUtterance.text); - - if (this.options.config.DEBUG_TEXT_EVENTS) { - await this.announce(`🗣️ ${hydratedUtterance.speakerName}: ${hydratedUtterance.text}`); - } - - let reply: string; - try { - reply = await this.options.llm.generateReply(this.memory, hydratedUtterance); - } catch (error) { - this.options.logger.warn("LLM failed", this.guildId, utterance.speakerId, error); - reply = "지금은 답변 생성에 실패했습니다. 잠시 후 다시 말씀해 주세요."; - } - - this.memory.addUserTurn(hydratedUtterance); - this.memory.addAssistantTurn(reply); - if (this.options.config.DEBUG_TEXT_EVENTS) { - await this.announce(`🤖 ${reply}`); - } - - this.queue.push({ - text: reply, - source: "assistant", - }); - await this.drainQueue(); - } - - private async drainQueue(): Promise { - if (this.draining) { - return; - } - - this.draining = true; - - try { - while (this.queue.length > 0) { - const job = this.queue.shift(); - if (!job) { - continue; - } - - const abortController = new AbortController(); - this.currentAbortController = abortController; - - try { - this.currentPlayback = await this.options.tts.preparePlayback(job.text, abortController.signal); - } catch (error) { - if (abortController.signal.aborted) { - continue; - } - - this.options.logger.warn("TTS synthesis failed", this.guildId, job.source, error); - await this.announce("음성 출력 생성에 실패했습니다."); - continue; - } - - try { - const resource = createAudioResource(this.currentPlayback.stream, { - inputType: StreamType.Raw, - }); - this.player.play(resource); - - await entersState(this.player, AudioPlayerStatus.Playing, 20_000).catch(() => null); - await entersState(this.player, AudioPlayerStatus.Idle, 300_000); - } catch (error) { - if (!abortController.signal.aborted) { - this.options.logger.warn("Audio playback failed", this.guildId, error); - } - } finally { - this.currentPlayback?.dispose(); - this.currentPlayback = null; - if (this.currentAbortController === abortController) { - this.currentAbortController = null; - } - } - } - } finally { - this.draining = false; - } - } - - private async announce(message: string): Promise { - if (!this.textChannelId) { - return; - } - - const channel = await this.options.client.channels.fetch(this.textChannelId).catch(() => null); - if (!channel?.isTextBased() || !("send" in channel) || typeof channel.send !== "function") { - return; - } - - await channel.send(message).catch(() => null); - } -} diff --git a/src/audio/local-voice-session.ts b/src/audio/local-voice-session.ts deleted file mode 100644 index ab9e2cf..0000000 --- a/src/audio/local-voice-session.ts +++ /dev/null @@ -1,710 +0,0 @@ -import { spawn, type ChildProcess, type ChildProcessByStdio } from "node:child_process"; -import { once } from "node:events"; -import { promises as fs } from "node:fs"; -import os from "node:os"; -import path from "node:path"; -import type { Readable, Writable } from "node:stream"; - -import { RealTimeVAD } from "avr-vad"; - -import type { AssistantRuntimeConfig } from "../config.js"; -import { Logger } from "../logger.js"; -import { requireFfmpegPath } from "./ffmpeg-path.js"; -import { takeFrame, int16ArrayToFloat32, float32ToPcm16Buffer } from "./pcm.js"; -import { ConversationMemory, type UserUtterance } from "../services/conversation.js"; -import type { LlmService } from "../services/llm.js"; -import type { SttService } from "../services/stt.js"; -import type { PreparedSpeechAudio, TtsService } from "../services/tts.js"; - -interface LocalVoiceSessionOptions { - config: AssistantRuntimeConfig; - logger: Logger; - stt: SttService; - tts: TtsService; - llm: LlmService; -} - -interface SpeechJob { - text: string; - source: "assistant" | "manual"; -} - -export class LocalVoiceSession { - private readonly memory: ConversationMemory; - private readonly queue: SpeechJob[] = []; - private readonly pendingSamples: number[] = []; - private readonly silenceThreshold = 900; - private readonly windowsFrameSamples = 320; - private readonly windowsPreRollSamples = 3_200; - private readonly windowsSpeechStartThreshold = 520; - private readonly windowsSpeechContinueThreshold = 260; - private readonly windowsSpeechStartFrames = 2; - private readonly windowsSpeechEndFrames = 18; - private readonly windowsMinSpeechSamples = 7_200; - - private vad: RealTimeVAD | null = null; - private recorder: ChildProcessByStdio | null = null; - private currentPlayer: ChildProcess | null = null; - private currentAbortController: AbortController | null = null; - private currentPlayback: PreparedSpeechAudio | null = null; - private processing = Promise.resolve(); - private draining = false; - private destroyed = false; - private inputWatchdog: NodeJS.Timeout | null = null; - private recorderStartedAt = 0; - private lastPcmChunkAt = 0; - private lastNonSilentAudioAt = 0; - private warnedNoPcm = false; - private warnedSilent = false; - private windowsSpeechBuffer: number[] = []; - private windowsPreRollBuffer: number[] = []; - private windowsSpeechActive = false; - private windowsSpeechCandidateFrames = 0; - private windowsSilenceFrames = 0; - - constructor(private readonly options: LocalVoiceSessionOptions) { - this.memory = new ConversationMemory(options.config.MAX_CONVERSATION_TURNS); - } - - async start(): Promise { - if (process.platform !== "win32") { - this.vad = await RealTimeVAD.new({ - model: "v5", - sampleRate: 16000, - frameSamples: 1536, - positiveSpeechThreshold: 0.55, - negativeSpeechThreshold: 0.35, - redemptionFrames: 8, - preSpeechPadFrames: 2, - minSpeechFrames: 3, - onFrameProcessed: () => undefined, - onVADMisfire: () => undefined, - onSpeechStart: () => { - this.interruptPlayback("local-barge-in"); - }, - onSpeechRealStart: () => undefined, - onSpeechEnd: (audio: Float32Array) => { - void this.handleSpeechEnd(audio); - }, - }); - } else { - this.options.logger.info("Windows local mode uses amplitude-based speech detection"); - } - - this.recorder = this.spawnRecorder(); - this.recorderStartedAt = Date.now(); - this.lastPcmChunkAt = 0; - this.lastNonSilentAudioAt = 0; - this.warnedNoPcm = false; - this.warnedSilent = false; - this.recorder.stdout.on("data", (chunk: Buffer) => { - this.pushPcm16Chunk(chunk); - }); - this.recorder.stderr.on("data", (chunk: Buffer) => { - const text = chunk.toString().trim(); - if (text.length > 0) { - this.options.logger.debug("[pw-record]", text); - } - }); - this.recorder.on("exit", (code, signal) => { - if (!this.destroyed) { - this.options.logger.warn("pw-record exited unexpectedly", { code, signal }); - } - }); - - this.inputWatchdog = setInterval(() => { - this.reportInputHealth(); - }, 3_000); - } - - async destroy(): Promise { - this.destroyed = true; - this.interruptPlayback("local-shutdown"); - - if (this.inputWatchdog) { - clearInterval(this.inputWatchdog); - this.inputWatchdog = null; - } - - if (this.recorder && !this.recorder.killed) { - this.recorder.kill("SIGTERM"); - await once(this.recorder, "exit").catch(() => null); - } - - if (this.vad) { - await this.vad.destroy().catch((error) => { - this.options.logger.warn("Local VAD destroy failed", error); - }); - this.vad = null; - } - } - - clearConversation(): void { - this.memory.clear(); - this.interruptPlayback("local-reset"); - } - - async speakText(text: string): Promise { - this.queue.push({ - text, - source: "manual", - }); - await this.drainQueue(); - } - - statusSummary(): string { - return [ - "모드: local", - `플랫폼: ${process.platform}`, - `입력 source: ${this.options.config.LOCAL_AUDIO_SOURCE ?? "default"}`, - `출력 sink: ${this.describeSink()}`, - `대기열: ${this.queue.length}`, - `최근 대화 턴: ${this.memory.recentTurns().length}`, - ].join("\n"); - } - - private spawnRecorder(): ChildProcessByStdio { - if (process.platform === "win32") { - return this.spawnWindowsRecorder(); - } - - const args = [ - "--rate", - "16000", - "--channels", - "1", - "--format", - "s16", - "--raw", - ]; - - if (this.options.config.LOCAL_AUDIO_SOURCE) { - args.push("--target", this.options.config.LOCAL_AUDIO_SOURCE); - } - - args.push("-"); - - this.options.logger.info("Starting local recorder", { - source: this.options.config.LOCAL_AUDIO_SOURCE ?? "default", - }); - - return spawn("pw-record", args, { - stdio: ["ignore", "pipe", "pipe"], - }); - } - - private spawnWindowsRecorder(): ChildProcessByStdio { - const ffmpegPath = this.getFfmpegPath(); - const sourceName = this.options.config.LOCAL_AUDIO_SOURCE; - if (!sourceName) { - throw new Error("Windows 로컬 모드는 LOCAL_AUDIO_SOURCE 설정이 필요합니다. `bun run audio:devices` 로 이름을 확인해 주세요."); - } - - const args = [ - "-hide_banner", - "-loglevel", - "warning", - "-f", - "dshow", - "-i", - `audio=${sourceName}`, - "-ac", - "1", - "-ar", - "16000", - "-f", - "s16le", - "pipe:1", - ]; - - this.options.logger.info("Starting local recorder", { - source: sourceName, - backend: "ffmpeg-dshow", - }); - - return spawn(ffmpegPath, args, { - stdio: ["ignore", "pipe", "pipe"], - }); - } - - private pushPcm16Chunk(chunk: Buffer): void { - if (this.destroyed) { - return; - } - - this.lastPcmChunkAt = Date.now(); - let peak = 0; - - for (let offset = 0; offset + 1 < chunk.length; offset += 2) { - const sample = chunk.readInt16LE(offset); - const abs = Math.abs(sample); - if (abs > peak) { - peak = abs; - } - this.pendingSamples.push(sample); - } - - if (peak >= this.silenceThreshold) { - this.lastNonSilentAudioAt = Date.now(); - } - - if (process.platform === "win32") { - this.processWindowsSpeechFrames(); - return; - } - - if (!this.vad) { - return; - } - - while (true) { - const frame = takeFrame(this.pendingSamples, 1536); - if (!frame) { - return; - } - - const floatFrame = int16ArrayToFloat32(frame); - this.processing = this.processing - .then(() => this.vad?.processAudio(floatFrame)) - .catch((error) => { - this.options.logger.warn("Local VAD processing failed", error); - }); - } - } - - private processWindowsSpeechFrames(): void { - while (true) { - const frame = takeFrame(this.pendingSamples, this.windowsFrameSamples); - if (!frame) { - return; - } - - let peak = 0; - for (const sample of frame) { - const abs = Math.abs(sample); - if (abs > peak) { - peak = abs; - } - } - - if (!this.windowsSpeechActive) { - this.appendWithCap(this.windowsPreRollBuffer, frame, this.windowsPreRollSamples); - - if (peak >= this.windowsSpeechStartThreshold) { - this.windowsSpeechCandidateFrames += 1; - } else { - this.windowsSpeechCandidateFrames = 0; - } - - if (this.windowsSpeechCandidateFrames >= this.windowsSpeechStartFrames) { - this.windowsSpeechActive = true; - this.windowsSilenceFrames = 0; - this.windowsSpeechBuffer = [...this.windowsPreRollBuffer]; - this.windowsPreRollBuffer = []; - this.interruptPlayback("local-barge-in"); - this.options.logger.debug("Windows speech start detected", { peak }); - } else { - continue; - } - } - - this.windowsSpeechBuffer.push(...frame); - - if (peak >= this.windowsSpeechContinueThreshold) { - this.windowsSilenceFrames = 0; - } else { - this.windowsSilenceFrames += 1; - } - - if (this.windowsSilenceFrames < this.windowsSpeechEndFrames) { - continue; - } - - const speech = Int16Array.from(this.windowsSpeechBuffer); - this.windowsSpeechActive = false; - this.windowsSpeechBuffer = []; - this.windowsSilenceFrames = 0; - this.windowsSpeechCandidateFrames = 0; - - if (speech.length < this.windowsMinSpeechSamples) { - this.options.logger.debug("Ignored short Windows speech segment", { samples: speech.length }); - continue; - } - - this.options.logger.debug("Windows speech end detected", { samples: speech.length }); - void this.handleSpeechEnd(int16ArrayToFloat32(speech)); - } - } - - private async handleSpeechEnd(audio: Float32Array): Promise { - if (audio.length < 16000 * 0.25) { - this.options.logger.debug("Ignored short local speech segment", { samples: audio.length }); - return; - } - - const utterance: UserUtterance = { - speakerId: "local-user", - speakerName: this.options.config.LOCAL_SPEAKER_NAME, - text: "", - }; - - let transcript: string | null = null; - try { - transcript = await this.options.stt.transcribePcm16(float32ToPcm16Buffer(audio)); - } catch (error) { - this.options.logger.warn("Local STT failed", error); - return; - } - - if (!transcript || transcript.trim().length === 0) { - this.options.logger.info("Local STT returned empty transcript"); - return; - } - - utterance.text = transcript.trim(); - this.options.logger.info("Local transcript", utterance.text); - if (this.options.config.DEBUG_TEXT_EVENTS) { - console.log(`\n[you] ${utterance.text}`); - } - - let reply: string; - try { - reply = await this.options.llm.generateReply(this.memory, utterance); - } catch (error) { - this.options.logger.warn("Local LLM failed", error); - reply = "지금은 답변 생성에 실패했습니다. 잠시 후 다시 말씀해 주세요."; - } - - this.memory.addUserTurn(utterance); - this.memory.addAssistantTurn(reply); - this.options.logger.info("Local reply", reply); - if (this.options.config.DEBUG_TEXT_EVENTS) { - console.log(`[bot] ${reply}\n`); - } - - this.queue.push({ - text: reply, - source: "assistant", - }); - await this.drainQueue(); - } - - private interruptPlayback(reason: string): void { - if (this.queue.length > 0 || this.currentPlayer) { - this.options.logger.info("Interrupting local playback", reason); - } - - this.queue.splice(0, this.queue.length); - this.currentAbortController?.abort(); - this.currentAbortController = null; - this.currentPlayback?.dispose(); - this.currentPlayback = null; - - if (this.currentPlayer && !this.currentPlayer.killed) { - this.currentPlayer.kill("SIGKILL"); - } - this.currentPlayer = null; - } - - private async drainQueue(): Promise { - if (this.draining || this.destroyed) { - return; - } - - this.draining = true; - - try { - while (this.queue.length > 0 && !this.destroyed) { - const job = this.queue.shift(); - if (!job) { - continue; - } - - const abortController = new AbortController(); - this.currentAbortController = abortController; - - try { - this.currentPlayback = await this.options.tts.preparePlayback(job.text, abortController.signal); - } catch (error) { - if (!abortController.signal.aborted) { - this.options.logger.warn("Local TTS synthesis failed", error); - } - continue; - } - - try { - await this.playToSink(this.currentPlayback, abortController.signal); - } catch (error) { - if (!abortController.signal.aborted) { - this.options.logger.warn("Local playback failed", error); - } - } finally { - this.currentPlayback?.dispose(); - this.currentPlayback = null; - if (this.currentAbortController === abortController) { - this.currentAbortController = null; - } - } - } - } finally { - this.draining = false; - } - } - - private async playToSink(playback: PreparedSpeechAudio, signal: AbortSignal): Promise { - if (process.platform === "win32") { - await this.playToWindowsDefaultSink(playback, signal); - return; - } - - const args = [ - "--rate", - "48000", - "--channels", - "2", - "--format", - "s16", - "--raw", - ]; - - if (this.options.config.LOCAL_AUDIO_SINK) { - args.push("--target", this.options.config.LOCAL_AUDIO_SINK); - } - - args.push("-"); - - const player = spawn("pw-play", args, { - stdio: ["pipe", "ignore", "pipe"], - }); - this.currentPlayer = player; - - player.stderr.on("data", (chunk: Buffer) => { - const text = chunk.toString().trim(); - if (text.length > 0) { - this.options.logger.debug("[pw-play]", text); - } - }); - - signal.addEventListener( - "abort", - () => { - playback.stream.destroy(); - if (!player.killed) { - player.kill("SIGKILL"); - } - }, - { once: true }, - ); - - playback.stream.pipe(player.stdin); - - const [code, playSignal] = (await once(player, "exit")) as [number | null, NodeJS.Signals | null]; - this.currentPlayer = null; - - if (signal.aborted) { - return; - } - - if (code !== 0) { - throw new Error(`pw-play exited with code=${code ?? "null"} signal=${playSignal ?? "null"}`); - } - } - - private async playToWindowsDefaultSink(playback: PreparedSpeechAudio, signal: AbortSignal): Promise { - if (playback.sourceFilePath) { - await this.playWindowsWaveFile(playback.sourceFilePath, signal); - return; - } - - const chunks: Buffer[] = []; - - await new Promise((resolve, reject) => { - playback.stream.on("data", (chunk: Buffer) => { - chunks.push(Buffer.from(chunk)); - }); - playback.stream.once("end", resolve); - playback.stream.once("error", reject); - signal.addEventListener( - "abort", - () => { - playback.stream.destroy(); - reject(new Error("playback aborted")); - }, - { once: true }, - ); - }).catch((error) => { - if (signal.aborted) { - return; - } - throw error; - }); - - if (signal.aborted) { - return; - } - - const pcm = Buffer.concat(chunks); - const wav = createWaveFileBuffer(pcm, 48000, 2, 16); - const tempPath = path.join(os.tmpdir(), `realtime-voice-bot-${Date.now()}.wav`); - await fs.writeFile(tempPath, wav); - - const psScript = [ - "Add-Type -AssemblyName System;", - `$player = New-Object System.Media.SoundPlayer('${tempPath.replace(/'/g, "''")}');`, - "$player.PlaySync();", - ].join(" "); - - const player = spawn("powershell", ["-NoProfile", "-Command", psScript], { - stdio: ["ignore", "ignore", "pipe"], - }); - this.currentPlayer = player; - - player.stderr.on("data", (chunk: Buffer) => { - const text = chunk.toString().trim(); - if (text.length > 0) { - this.options.logger.debug("[powershell-player]", text); - } - }); - - signal.addEventListener( - "abort", - () => { - if (!player.killed) { - player.kill("SIGKILL"); - } - }, - { once: true }, - ); - - const [code, playSignal] = (await once(player, "exit")) as [number | null, NodeJS.Signals | null]; - this.currentPlayer = null; - await fs.unlink(tempPath).catch(() => null); - - if (signal.aborted) { - return; - } - - if (code !== 0) { - throw new Error(`powershell playback exited with code=${code ?? "null"} signal=${playSignal ?? "null"}`); - } - } - - private async playWindowsWaveFile(filePath: string, signal: AbortSignal): Promise { - const psScript = [ - "Add-Type -AssemblyName System;", - `$player = New-Object System.Media.SoundPlayer('${filePath.replace(/'/g, "''")}');`, - "$player.PlaySync();", - ].join(" "); - - const player = spawn("powershell", ["-NoProfile", "-Command", psScript], { - stdio: ["ignore", "ignore", "pipe"], - }); - this.currentPlayer = player; - - player.stderr.on("data", (chunk: Buffer) => { - const text = chunk.toString().trim(); - if (text.length > 0) { - this.options.logger.debug("[powershell-player]", text); - } - }); - - signal.addEventListener( - "abort", - () => { - if (!player.killed) { - player.kill("SIGKILL"); - } - }, - { once: true }, - ); - - const [code, playSignal] = (await once(player, "exit")) as [number | null, NodeJS.Signals | null]; - this.currentPlayer = null; - - if (signal.aborted) { - return; - } - - if (code !== 0) { - throw new Error(`powershell playback exited with code=${code ?? "null"} signal=${playSignal ?? "null"}`); - } - } - - private getFfmpegPath(): string { - return requireFfmpegPath(); - } - - private reportInputHealth(): void { - if (this.destroyed) { - return; - } - - const now = Date.now(); - - if (!this.warnedNoPcm && this.lastPcmChunkAt === 0 && now - this.recorderStartedAt >= 6_000) { - this.warnedNoPcm = true; - this.options.logger.warn( - [ - "입력 장치에서 PCM 데이터가 들어오지 않습니다.", - `현재 source: ${this.options.config.LOCAL_AUDIO_SOURCE ?? "default"}`, - "Windows에서는 마이크 입력이 아니라 SPDIF/ADAT 같은 디지털 입력을 고르면 반응이 없습니다.", - "`bun run devices`로 실제 마이크 이름을 다시 고르세요.", - ].join("\n"), - ); - return; - } - - if (!this.warnedSilent && this.lastPcmChunkAt > 0 && this.lastNonSilentAudioAt === 0 && now - this.recorderStartedAt >= 6_000) { - this.warnedSilent = true; - this.options.logger.warn( - [ - "입력 장치에서는 데이터가 오지만 말소리 수준으로 올라오지 않습니다.", - `현재 source: ${this.options.config.LOCAL_AUDIO_SOURCE ?? "default"}`, - "잘못된 입력 채널이거나, 마이크가 그 장치로 라우팅되지 않은 상태일 가능성이 큽니다.", - "RME Babyface Pro라면 SPDIF/ADAT 대신 아날로그 마이크 입력 채널 이름을 선택해야 합니다.", - ].join("\n"), - ); - } - } - - private describeSink(): string { - if (process.platform === "win32") { - return this.options.config.LOCAL_AUDIO_SINK ?? "system-default"; - } - return this.options.config.LOCAL_AUDIO_SINK ?? "default"; - } - - private appendWithCap(target: number[], samples: Int16Array, cap: number): void { - target.push(...samples); - if (target.length > cap) { - target.splice(0, target.length - cap); - } - } -} - -function createWaveFileBuffer( - pcm: Buffer, - sampleRate: number, - channels: number, - bitsPerSample: number, -): Buffer { - const header = Buffer.alloc(44); - const byteRate = sampleRate * channels * (bitsPerSample / 8); - const blockAlign = channels * (bitsPerSample / 8); - - header.write("RIFF", 0, 4, "ascii"); - header.writeUInt32LE(36 + pcm.length, 4); - header.write("WAVE", 8, 4, "ascii"); - header.write("fmt ", 12, 4, "ascii"); - header.writeUInt32LE(16, 16); - header.writeUInt16LE(1, 20); - header.writeUInt16LE(channels, 22); - header.writeUInt32LE(sampleRate, 24); - header.writeUInt32LE(byteRate, 28); - header.writeUInt16LE(blockAlign, 32); - header.writeUInt16LE(bitsPerSample, 34); - header.write("data", 36, 4, "ascii"); - header.writeUInt32LE(pcm.length, 40); - - return Buffer.concat([header, pcm]); -} diff --git a/src/audio/pcm.ts b/src/audio/pcm.ts deleted file mode 100644 index efdc468..0000000 --- a/src/audio/pcm.ts +++ /dev/null @@ -1,60 +0,0 @@ -export class Stereo48kToMono16kDownsampler { - private readonly pendingMono48k: number[] = []; - - pushStereo48kChunk(chunk: Buffer): Int16Array { - if (chunk.length < 4) { - return new Int16Array(); - } - - for (let offset = 0; offset + 3 < chunk.length; offset += 4) { - const left = chunk.readInt16LE(offset); - const right = chunk.readInt16LE(offset + 2); - this.pendingMono48k.push(Math.round((left + right) / 2)); - } - - const outputLength = Math.floor(this.pendingMono48k.length / 3); - if (outputLength === 0) { - return new Int16Array(); - } - - const output = new Int16Array(outputLength); - let readIndex = 0; - for (let index = 0; index < outputLength; index += 1) { - const a = this.pendingMono48k[readIndex]; - const b = this.pendingMono48k[readIndex + 1]; - const c = this.pendingMono48k[readIndex + 2]; - output[index] = Math.round((a + b + c) / 3); - readIndex += 3; - } - - this.pendingMono48k.splice(0, readIndex); - return output; - } -} - -export function int16ArrayToFloat32(input: Int16Array): Float32Array { - const output = new Float32Array(input.length); - for (let index = 0; index < input.length; index += 1) { - output[index] = input[index] / 32768; - } - return output; -} - -export function float32ToPcm16Buffer(input: Float32Array): Buffer { - const buffer = Buffer.allocUnsafe(input.length * 2); - for (let index = 0; index < input.length; index += 1) { - const value = Math.max(-1, Math.min(1, input[index])); - const scaled = value < 0 ? value * 32768 : value * 32767; - buffer.writeInt16LE(Math.round(scaled), index * 2); - } - return buffer; -} - -export function takeFrame(source: number[], frameSize: number): Int16Array | null { - if (source.length < frameSize) { - return null; - } - - const values = source.splice(0, frameSize); - return Int16Array.from(values); -} diff --git a/src/config.ts b/src/config.ts deleted file mode 100644 index f4d56e3..0000000 --- a/src/config.ts +++ /dev/null @@ -1,78 +0,0 @@ -import { config as loadDotenv } from "dotenv"; -import { z } from "zod"; - -loadDotenv(); - -const emptyToUndefined = z.preprocess((value) => { - if (typeof value !== "string") { - return value; - } - const trimmed = value.trim(); - return trimmed.length === 0 ? undefined : trimmed; -}, z.string().min(1).optional()); - -const envSchema = z.object({ - DISCORD_BOT_TOKEN: emptyToUndefined, - DISCORD_APPLICATION_ID: emptyToUndefined, - DISCORD_COMMAND_GUILD_ID: emptyToUndefined, - OLLAMA_BASE_URL: z.string().min(1).default("http://127.0.0.1:11434"), - OLLAMA_MODEL: z.string().min(1).default("qwen3:0.6b"), - OLLAMA_KEEP_ALIVE: z.string().min(1).default("5m"), - OLLAMA_NUM_CTX: z.coerce.number().int().min(512).max(32768).default(4096), - LOCAL_AI_VENV_PATH: z.string().min(1).default(".local-ai/.venv"), - LOCAL_AI_CACHE_DIR: z.string().min(1).default(".local-ai/cache"), - LOCAL_AI_PYTHON: emptyToUndefined, - LOCAL_STT_MODEL: z.string().min(1).default("small"), - LOCAL_STT_DEVICE: z.string().min(1).default("auto"), - LOCAL_STT_COMPUTE_TYPE: z.string().min(1).default("auto"), - LOCAL_STT_BEAM_SIZE: z.coerce.number().int().min(1).max(8).default(3), - LOCAL_TTS_MODEL_PATH: z.string().min(1).default(".local-ai/models/kokoro-v1.0.onnx"), - LOCAL_TTS_VOICES_PATH: z.string().min(1).default(".local-ai/models/voices-v1.0.bin"), - LOCAL_TTS_ENGINE: z.enum(["auto", "windows-media", "system", "kokoro"]).default("auto"), - LOCAL_TTS_VOICE_NAME: emptyToUndefined, - LOCAL_TTS_LANGUAGE: z.string().min(1).default("ko"), - LOCAL_TTS_SPEAKER: z.string().min(1).default("af_heart"), - LOCAL_TTS_DEVICE: z.string().min(1).default("auto"), - LOCAL_TTS_SPEED: z.coerce.number().min(0.8).max(1.6).default(1.12), - BOT_DEFAULT_LANGUAGE: z.string().min(2).default("ko"), - MAX_CONVERSATION_TURNS: z.coerce.number().int().min(4).max(30).default(12), - LOCAL_AUDIO_SOURCE: emptyToUndefined, - LOCAL_AUDIO_SINK: emptyToUndefined, - LOCAL_SPEAKER_NAME: z.string().min(1).default("local-user"), - DEBUG_TEXT_EVENTS: z - .string() - .optional() - .transform((value) => value === "true"), - LOG_LEVEL: z.enum(["debug", "info", "warn", "error"]).default("info"), -}); - -export type AppConfig = z.infer; -export type AssistantRuntimeConfig = AppConfig; -export type DiscordRuntimeConfig = AssistantRuntimeConfig & { - DISCORD_BOT_TOKEN: string; - DISCORD_APPLICATION_ID: string; -}; - -export function loadConfig(): AppConfig { - return envSchema.parse(process.env); -} - -function requirePresent(value: string | undefined, name: string): string { - if (!value) { - throw new Error(`${name} 환경변수가 필요합니다.`); - } - return value; -} - -export function requireAssistantRuntimeConfig(config: AppConfig): AssistantRuntimeConfig { - return config; -} - -export function requireDiscordRuntimeConfig(config: AppConfig): DiscordRuntimeConfig { - const assistant = requireAssistantRuntimeConfig(config); - return { - ...assistant, - DISCORD_BOT_TOKEN: requirePresent(config.DISCORD_BOT_TOKEN, "DISCORD_BOT_TOKEN"), - DISCORD_APPLICATION_ID: requirePresent(config.DISCORD_APPLICATION_ID, "DISCORD_APPLICATION_ID"), - }; -} diff --git a/src/discord-main.ts b/src/discord-main.ts deleted file mode 100644 index 126b1d4..0000000 --- a/src/discord-main.ts +++ /dev/null @@ -1,238 +0,0 @@ -import process from "node:process"; - -import { - GatewayIntentBits, - REST, - Routes, - SlashCommandBuilder, - type ChatInputCommandInteraction, - type Client, - type GuildMember, - type VoiceBasedChannel, -} from "discord.js"; -import { Client as DiscordClient } from "discord.js"; - -import { GuildVoiceSession } from "./audio/guild-voice-session.js"; -import { type DiscordRuntimeConfig } from "./config.js"; -import { Logger } from "./logger.js"; -import { LocalFasterWhisperSttService } from "./services/local-stt.js"; -import { OllamaLlmService } from "./services/ollama-llm.js"; -import { createTtsService } from "./services/create-tts-service.js"; - -export async function runDiscordBot(config: DiscordRuntimeConfig, logger: Logger): Promise { - const commands = [ - new SlashCommandBuilder().setName("join").setDescription("현재 들어가 있는 음성 채널에 봇을 입장시킵니다."), - new SlashCommandBuilder().setName("leave").setDescription("현재 음성 세션을 종료합니다."), - new SlashCommandBuilder().setName("status").setDescription("현재 음성 세션 상태를 확인합니다."), - new SlashCommandBuilder().setName("reset").setDescription("대화 문맥과 재생 큐를 초기화합니다."), - new SlashCommandBuilder() - .setName("say") - .setDescription("텍스트를 바로 음성으로 읽습니다.") - .addStringOption((option) => - option.setName("text").setDescription("읽을 문장").setRequired(true).setMaxLength(400), - ), - ].map((command) => command.toJSON()); - - const client = new DiscordClient({ - intents: [GatewayIntentBits.Guilds, GatewayIntentBits.GuildVoiceStates], - }); - - const stt = new LocalFasterWhisperSttService(config, logger); - const tts = createTtsService(config, logger); - const llm = new OllamaLlmService(config); - const sessions = new Map(); - - await stt.warmup(); - await tts.warmup?.(); - - function getVoiceChannel(interaction: ChatInputCommandInteraction): VoiceBasedChannel | null { - const member = interaction.member as GuildMember | null; - return member?.voice.channel ?? null; - } - - async function registerCommands(_appClient: Client): Promise { - const rest = new REST({ version: "10" }).setToken(config.DISCORD_BOT_TOKEN); - if (config.DISCORD_COMMAND_GUILD_ID) { - await rest.put( - Routes.applicationGuildCommands(config.DISCORD_APPLICATION_ID, config.DISCORD_COMMAND_GUILD_ID), - { - body: commands, - }, - ); - logger.info("Registered guild commands", config.DISCORD_COMMAND_GUILD_ID); - return; - } - - await rest.put(Routes.applicationCommands(config.DISCORD_APPLICATION_ID), { - body: commands, - }); - logger.info("Registered global commands"); - } - - async function createSession(interaction: ChatInputCommandInteraction): Promise { - if (!interaction.guild) { - throw new Error("Guild interaction required"); - } - - const voiceChannel = getVoiceChannel(interaction); - if (!voiceChannel) { - throw new Error("먼저 음성 채널에 들어가 주세요."); - } - - const existing = sessions.get(interaction.guild.id); - if (existing && existing.voiceChannelId === voiceChannel.id) { - existing.setTextChannel(interaction.channelId); - return existing; - } - - if (existing) { - await existing.destroy(); - sessions.delete(interaction.guild.id); - } - - const session = await GuildVoiceSession.create({ - client, - config, - logger, - guild: interaction.guild, - voiceChannel, - textChannelId: interaction.channelId, - stt, - tts, - llm, - }); - sessions.set(interaction.guild.id, session); - return session; - } - - async function handleJoin(interaction: ChatInputCommandInteraction): Promise { - await interaction.deferReply({ ephemeral: true }); - - try { - const session = await createSession(interaction); - await interaction.editReply( - `음성 비서를 시작했습니다. 채널: ${session.statusSummary().split("\n")[1]?.replace("음성 채널: ", "") ?? "알 수 없음"}`, - ); - } catch (error) { - const message = error instanceof Error ? error.message : "세션 생성에 실패했습니다."; - await interaction.editReply(message); - } - } - - async function handleLeave(interaction: ChatInputCommandInteraction): Promise { - const session = interaction.guild ? sessions.get(interaction.guild.id) : undefined; - if (!session) { - await interaction.reply({ content: "현재 활성화된 음성 세션이 없습니다.", ephemeral: true }); - return; - } - - await session.destroy(); - sessions.delete(interaction.guildId!); - await interaction.reply({ content: "음성 세션을 종료했습니다.", ephemeral: true }); - } - - async function handleStatus(interaction: ChatInputCommandInteraction): Promise { - const session = interaction.guild ? sessions.get(interaction.guild.id) : undefined; - if (!session) { - await interaction.reply({ content: "현재 활성화된 음성 세션이 없습니다.", ephemeral: true }); - return; - } - - await interaction.reply({ - content: session.statusSummary(), - ephemeral: true, - }); - } - - async function handleReset(interaction: ChatInputCommandInteraction): Promise { - const session = interaction.guild ? sessions.get(interaction.guild.id) : undefined; - if (!session) { - await interaction.reply({ content: "현재 활성화된 음성 세션이 없습니다.", ephemeral: true }); - return; - } - - session.clearConversation(); - await interaction.reply({ content: "대화 문맥과 재생 큐를 초기화했습니다.", ephemeral: true }); - } - - async function handleSay(interaction: ChatInputCommandInteraction): Promise { - await interaction.deferReply({ ephemeral: true }); - - const session = interaction.guild ? sessions.get(interaction.guild.id) : undefined; - if (!session) { - await interaction.editReply("먼저 `/join` 으로 음성 세션을 시작해 주세요."); - return; - } - - const text = interaction.options.getString("text", true).trim(); - await session.speakText(text); - await interaction.editReply("읽기 요청을 대기열에 추가했습니다."); - } - - async function shutdown(exitCode = 0): Promise { - logger.info("Shutting down"); - for (const session of sessions.values()) { - await session.destroy().catch((error) => { - logger.warn("Session shutdown failed", error); - }); - } - sessions.clear(); - await Promise.allSettled([stt.destroy?.(), tts.destroy?.()]); - await client.destroy(); - process.exit(exitCode); - } - - client.once("ready", async () => { - logger.info("Discord client ready", client.user?.tag ?? "unknown"); - try { - await registerCommands(client); - } catch (error) { - logger.error("Command registration failed", error); - } - }); - - client.on("interactionCreate", async (interaction) => { - if (!interaction.isChatInputCommand()) { - return; - } - - try { - switch (interaction.commandName) { - case "join": - await handleJoin(interaction); - return; - case "leave": - await handleLeave(interaction); - return; - case "status": - await handleStatus(interaction); - return; - case "reset": - await handleReset(interaction); - return; - case "say": - await handleSay(interaction); - return; - default: - await interaction.reply({ content: "알 수 없는 명령입니다.", ephemeral: true }); - } - } catch (error) { - logger.error("Interaction handler failed", error); - if (interaction.deferred || interaction.replied) { - await interaction.editReply("명령 처리 중 오류가 발생했습니다.").catch(() => null); - return; - } - await interaction.reply({ content: "명령 처리 중 오류가 발생했습니다.", ephemeral: true }).catch(() => null); - } - }); - - process.on("SIGINT", () => { - void shutdown(0); - }); - - process.on("SIGTERM", () => { - void shutdown(0); - }); - - await client.login(config.DISCORD_BOT_TOKEN); -} diff --git a/src/index.ts b/src/index.ts deleted file mode 100644 index 00c8830..0000000 --- a/src/index.ts +++ /dev/null @@ -1,52 +0,0 @@ -import process from "node:process"; - -import { loadConfig, requireAssistantRuntimeConfig, requireDiscordRuntimeConfig } from "./config.js"; -import { runDiscordBot } from "./discord-main.js"; -import { Logger } from "./logger.js"; -import { - dumpLocalTtsWave, - printLocalAudioDevices, - printLocalTtsVoices, - runLocalAssistant, - runLocalTtsSmokeTest, -} from "./local-main.js"; - -const mode = process.argv[2] ?? "discord"; -const config = loadConfig(); -const logger = new Logger(config.LOG_LEVEL); - -async function main(): Promise { - switch (mode) { - case "discord": - await runDiscordBot(requireDiscordRuntimeConfig(config), logger); - return; - case "local": - await runLocalAssistant(requireAssistantRuntimeConfig(config), logger); - return; - case "local-devices": - await printLocalAudioDevices(); - return; - case "local-say": { - const text = process.argv.slice(3).join(" ").trim() || "안녕하세요. TTS 단독 재생 테스트입니다."; - await runLocalTtsSmokeTest(requireAssistantRuntimeConfig(config), logger, text); - return; - } - case "local-say-dump": { - const text = process.argv.slice(3).join(" ").trim() || "안녕하세요. TTS WAV 파일 테스트입니다."; - await dumpLocalTtsWave(requireAssistantRuntimeConfig(config), logger, text); - return; - } - case "local-tts-voices": - await printLocalTtsVoices(requireAssistantRuntimeConfig(config)); - return; - default: - throw new Error( - `알 수 없는 실행 모드입니다: ${mode}. 사용 가능: discord, local, local-devices, local-say, local-say-dump, local-tts-voices`, - ); - } -} - -void main().catch((error) => { - logger.error("Fatal startup error", error); - process.exit(1); -}); diff --git a/src/local-main.ts b/src/local-main.ts deleted file mode 100644 index f67cd48..0000000 --- a/src/local-main.ts +++ /dev/null @@ -1,232 +0,0 @@ -import { spawn } from "node:child_process"; -import { copyFile, mkdir } from "node:fs/promises"; -import path from "node:path"; -import process from "node:process"; - -import type { AssistantRuntimeConfig } from "./config.js"; -import { Logger } from "./logger.js"; -import { LocalVoiceSession } from "./audio/local-voice-session.js"; -import { requireFfmpegPath } from "./audio/ffmpeg-path.js"; -import type { LlmService } from "./services/llm.js"; -import { LocalFasterWhisperSttService } from "./services/local-stt.js"; -import { OllamaLlmService } from "./services/ollama-llm.js"; -import type { SttService } from "./services/stt.js"; -import { createTtsService } from "./services/create-tts-service.js"; -import { listWindowsMediaVoices } from "./services/windows-media-tts.js"; -import { listWindowsSystemVoices } from "./services/windows-system-tts.js"; - -export async function printLocalAudioDevices(): Promise { - if (process.platform === "win32") { - const ffmpegPath = requireFfmpegPath(); - - console.log("\n=== ffmpeg dshow audio devices ==="); - await new Promise((resolve, reject) => { - const child = spawn( - ffmpegPath, - ["-hide_banner", "-list_devices", "true", "-f", "dshow", "-i", "dummy"], - { - stdio: ["ignore", "ignore", "inherit"], - }, - ); - child.on("exit", (code) => { - if (code === 0 || code === 1) { - resolve(); - return; - } - reject(new Error(`ffmpeg exited with code ${code ?? "null"}`)); - }); - child.on("error", reject); - }); - - console.log("\n위 목록의 오디오 장치 이름을 `LOCAL_AUDIO_SOURCE` 에 그대로 넣으면 됩니다."); - console.log("Windows 로컬 모드는 현재 출력 장치 직접 선택 대신 시스템 기본 출력 장치를 사용합니다."); - return; - } - - const runs = [ - { - label: "wpctl status", - args: ["status"], - }, - { - label: "wpctl status -n", - args: ["status", "-n"], - }, - ] as const; - - for (const run of runs) { - console.log(`\n=== ${run.label} ===`); - await new Promise((resolve, reject) => { - const child = spawn("wpctl", run.args, { - stdio: ["ignore", "inherit", "inherit"], - }); - child.on("exit", (code) => { - if (code === 0) { - resolve(); - return; - } - reject(new Error(`wpctl exited with code ${code ?? "null"}`)); - }); - child.on("error", reject); - }); - } -} - -export async function runLocalAssistant(config: AssistantRuntimeConfig, logger: Logger): Promise { - const stt = new LocalFasterWhisperSttService(config, logger); - const tts = createTtsService(config, logger); - const llm = new OllamaLlmService(config); - - await stt.warmup(); - await tts.warmup?.(); - await llm.warmup?.(); - - if (config.BOT_DEFAULT_LANGUAGE.startsWith("ko") && config.LOCAL_STT_MODEL === "tiny") { - logger.warn( - "LOCAL_STT_MODEL=tiny 는 한국어 인식률이 낮을 수 있습니다. GPU 환경이면 small 이상을 권장합니다.", - ); - } - - const session = new LocalVoiceSession({ - config, - logger, - stt, - tts, - llm, - }); - - console.log(session.statusSummary()); - console.log("로컬 음성 테스트를 시작합니다. Ctrl+C 로 종료합니다."); - if (process.platform === "win32") { - console.log("Windows 로컬 모드는 현재 시스템 기본 출력 장치로 재생됩니다."); - } - if (config.DEBUG_TEXT_EVENTS) { - console.log("텍스트 로그 출력이 켜져 있습니다."); - } - - const shutdown = async (exitCode = 0) => { - await session.destroy().catch((error) => { - logger.warn("Local session shutdown failed", error); - }); - await Promise.allSettled([stt.destroy?.(), tts.destroy?.()]); - process.exit(exitCode); - }; - - process.on("SIGINT", () => { - void shutdown(0); - }); - - process.on("SIGTERM", () => { - void shutdown(0); - }); - - await session.start(); -} - -export async function runLocalTtsSmokeTest( - config: AssistantRuntimeConfig, - logger: Logger, - text: string, -): Promise { - const tts = createTtsService(config, logger); - - const noOpStt: SttService = { - async transcribePcm16() { - return null; - }, - }; - const noOpLlm: LlmService = { - async generateReply() { - return ""; - }, - }; - - await tts.warmup?.(); - - const session = new LocalVoiceSession({ - config, - logger, - stt: noOpStt, - tts, - llm: noOpLlm, - }); - - console.log("TTS 단독 재생 테스트를 시작합니다."); - console.log(`재생 문장: ${text}`); - if (process.platform === "win32") { - console.log("Windows에서는 시스템 기본 출력 장치로 재생됩니다."); - } - - try { - await session.speakText(text); - } finally { - await Promise.allSettled([session.destroy(), tts.destroy?.()]); - } -} - -export async function dumpLocalTtsWave( - config: AssistantRuntimeConfig, - logger: Logger, - text: string, - outputPath?: string, -): Promise { - if (process.platform !== "win32") { - throw new Error("현재 TTS WAV 덤프 모드는 Windows에서만 구현되어 있습니다."); - } - - const resolvedPath = path.resolve(outputPath?.trim() || "tts-test.wav"); - await mkdir(path.dirname(resolvedPath), { recursive: true }); - const tts = createTtsService(config, logger); - await tts.warmup?.(); - const playback = await tts.preparePlayback(text); - - try { - if (!playback.sourceFilePath) { - throw new Error("현재 선택된 TTS 엔진은 직접 WAV 덤프를 지원하지 않습니다."); - } - await copyFile(playback.sourceFilePath, resolvedPath); - } finally { - playback.dispose(); - await tts.destroy?.(); - } - - console.log("TTS WAV 파일 생성 완료"); - console.log(`출력 파일: ${resolvedPath}`); - console.log("이 파일이 재생되면 TTS 합성은 정상이고, 실시간 재생 경로만 따로 보면 됩니다."); -} - -export async function printLocalTtsVoices(config: AssistantRuntimeConfig): Promise { - if (process.platform !== "win32") { - console.log("현재 플랫폼은 Windows가 아니므로 설치된 시스템 TTS 목록 대신 Kokoro 설정만 사용합니다."); - console.log(`LOCAL_TTS_ENGINE=${config.LOCAL_TTS_ENGINE}`); - console.log(`LOCAL_TTS_SPEAKER=${config.LOCAL_TTS_SPEAKER}`); - return; - } - - const [windowsMediaVoices, windowsSystemVoices] = await Promise.all([ - listWindowsMediaVoices(), - listWindowsSystemVoices(), - ]); - - console.log("\n=== Windows.Media.SpeechSynthesis voices (권장) ==="); - if (windowsMediaVoices.length === 0) { - console.log("설치된 Windows Media 음성이 없습니다."); - } else { - for (const voice of windowsMediaVoices) { - console.log(`- ${voice.description} | name=${voice.displayName} | lang=${voice.language}`); - } - } - - console.log("\n=== System.Speech voices (fallback) ==="); - if (windowsSystemVoices.length === 0) { - console.log("설치된 System.Speech 음성이 없습니다."); - } else { - for (const voice of windowsSystemVoices) { - console.log(`- ${voice.description} | name=${voice.name} | lang=${voice.culture}`); - } - } - - console.log("\n설정 예시"); - console.log("LOCAL_TTS_ENGINE=windows-media"); - console.log("LOCAL_TTS_VOICE_NAME=위 목록의 description 또는 name"); -} diff --git a/src/logger.ts b/src/logger.ts deleted file mode 100644 index 28a8ef8..0000000 --- a/src/logger.ts +++ /dev/null @@ -1,63 +0,0 @@ -type LogLevel = "debug" | "info" | "warn" | "error"; - -const levelOrder: Record = { - debug: 10, - info: 20, - warn: 30, - error: 40, -}; - -function formatParts(parts: unknown[]): string { - return parts - .map((part) => { - if (part instanceof Error) { - return `${part.name}: ${part.message}`; - } - if (typeof part === "string") { - return part; - } - return JSON.stringify(part); - }) - .join(" "); -} - -export class Logger { - constructor(private readonly level: LogLevel) {} - - private shouldLog(target: LogLevel): boolean { - return levelOrder[target] >= levelOrder[this.level]; - } - - private write(target: LogLevel, ...parts: unknown[]): void { - if (!this.shouldLog(target)) { - return; - } - - const line = `[${new Date().toISOString()}] [${target.toUpperCase()}] ${formatParts(parts)}`; - if (target === "error") { - console.error(line); - return; - } - if (target === "warn") { - console.warn(line); - return; - } - console.log(line); - } - - debug(...parts: unknown[]): void { - this.write("debug", ...parts); - } - - info(...parts: unknown[]): void { - this.write("info", ...parts); - } - - warn(...parts: unknown[]): void { - this.write("warn", ...parts); - } - - error(...parts: unknown[]): void { - this.write("error", ...parts); - } -} diff --git a/src/python-runtime.ts b/src/python-runtime.ts deleted file mode 100644 index 264aa00..0000000 --- a/src/python-runtime.ts +++ /dev/null @@ -1,100 +0,0 @@ -import { existsSync } from "node:fs"; -import { spawnSync } from "node:child_process"; -import path from "node:path"; - -import type { AppConfig } from "./config.js"; - -export interface PythonLaunch { - command: string; - args: string[]; - source: "venv" | "configured" | "system"; -} - -function splitCommandSpec(spec: string): string[] { - return spec.match(/(?:[^\s"]+|"[^"]*")+/g)?.map((part) => part.replace(/^"|"$/g, "")) ?? []; -} - -function canRun(command: string, args: string[]): boolean { - const result = spawnSync(command, [...args, "--version"], { - encoding: "utf8", - shell: process.platform === "win32", - }); - return result.error == null && result.status === 0; -} - -export function resolveLocalAiVenvPath(config: AppConfig): string { - return path.resolve(process.cwd(), config.LOCAL_AI_VENV_PATH); -} - -export function resolveLocalAiCachePath(config: AppConfig): string { - return path.resolve(process.cwd(), config.LOCAL_AI_CACHE_DIR); -} - -export function resolveLocalAiTtsModelPath(config: AppConfig): string { - return path.resolve(process.cwd(), config.LOCAL_TTS_MODEL_PATH); -} - -export function resolveLocalAiTtsVoicesPath(config: AppConfig): string { - return path.resolve(process.cwd(), config.LOCAL_TTS_VOICES_PATH); -} - -export function resolveVenvPythonPath(config: AppConfig): string { - const venvPath = resolveLocalAiVenvPath(config); - return process.platform === "win32" - ? path.join(venvPath, "Scripts", "python.exe") - : path.join(venvPath, "bin", "python"); -} - -export function resolvePythonLaunch(config: AppConfig, options?: { preferVenv?: boolean }): PythonLaunch { - const preferVenv = options?.preferVenv ?? true; - const venvPython = resolveVenvPythonPath(config); - - if (preferVenv && existsSync(venvPython)) { - return { - command: venvPython, - args: [], - source: "venv", - }; - } - - const configured = config.LOCAL_AI_PYTHON ? splitCommandSpec(config.LOCAL_AI_PYTHON) : []; - if (configured.length > 0 && canRun(configured[0]!, configured.slice(1))) { - return { - command: configured[0]!, - args: configured.slice(1), - source: "configured", - }; - } - - const candidates = - process.platform === "win32" - ? [ - ["py", "-3"], - ["python"], - ["python3"], - ] - : [ - ["python3"], - ["python"], - ]; - - for (const [command, ...args] of candidates) { - if (canRun(command, args)) { - return { - command, - args, - source: "system", - }; - } - } - - throw new Error( - [ - "Python 실행 파일을 찾지 못했습니다.", - "1. Python 3.11 이상을 설치", - "2. Windows면 `py -3 --version` 이 되는지 먼저 확인", - "3. 되면 `.env` 에 `LOCAL_AI_PYTHON=py -3` 설정", - "4. 그 다음 `bun run setup:local-ai` 실행", - ].join("\n"), - ); -} diff --git a/src/services/conversation.ts b/src/services/conversation.ts deleted file mode 100644 index 1b4bb90..0000000 --- a/src/services/conversation.ts +++ /dev/null @@ -1,98 +0,0 @@ -export interface ConversationTurn { - role: "user" | "assistant"; - text: string; - speakerId?: string; - speakerName?: string; - createdAt: number; -} - -export interface UserUtterance { - speakerId: string; - speakerName: string; - text: string; -} - -export interface ChatPromptMessage { - role: "user" | "assistant"; - content: string; -} - -function renderSpeakerLabel(speakerName?: string): string | null { - const normalized = speakerName?.trim(); - if (!normalized || normalized === "unknown" || normalized === "local-user") { - return null; - } - return normalized; -} - -function renderUserMessage(speakerName: string | undefined, text: string): string { - const label = renderSpeakerLabel(speakerName); - if (!label) { - return text; - } - return `${label}: ${text}`; -} - -export class ConversationMemory { - private readonly turns: ConversationTurn[] = []; - - constructor(private readonly maxTurns: number) {} - - addUserTurn(utterance: UserUtterance): void { - this.turns.push({ - role: "user", - text: utterance.text, - speakerId: utterance.speakerId, - speakerName: utterance.speakerName, - createdAt: Date.now(), - }); - this.trim(); - } - - addAssistantTurn(text: string): void { - this.turns.push({ - role: "assistant", - text, - createdAt: Date.now(), - }); - this.trim(); - } - - clear(): void { - this.turns.splice(0, this.turns.length); - } - - recentTurns(): ConversationTurn[] { - return [...this.turns]; - } - - buildMessages(currentUtterance: UserUtterance): ChatPromptMessage[] { - return [ - ...this.turns - .slice(-this.maxTurns) - .map((turn) => { - if (turn.role === "assistant") { - return { - role: "assistant" as const, - content: turn.text, - }; - } - return { - role: "user" as const, - content: renderUserMessage(turn.speakerName, turn.text), - }; - }), - { - role: "user", - content: renderUserMessage(currentUtterance.speakerName, currentUtterance.text), - }, - ]; - } - - private trim(): void { - const overflow = this.turns.length - this.maxTurns; - if (overflow > 0) { - this.turns.splice(0, overflow); - } - } -} diff --git a/src/services/create-tts-service.ts b/src/services/create-tts-service.ts deleted file mode 100644 index 25ef392..0000000 --- a/src/services/create-tts-service.ts +++ /dev/null @@ -1,112 +0,0 @@ -import process from "node:process"; - -import type { AssistantRuntimeConfig } from "../config.js"; -import type { Logger } from "../logger.js"; -import { LocalKokoroTtsService } from "./local-tts.js"; -import type { PreparedSpeechAudio, TtsService } from "./tts.js"; -import { WindowsMediaTtsService } from "./windows-media-tts.js"; -import { WindowsSystemTtsService } from "./windows-system-tts.js"; - -interface NamedTtsService { - name: string; - service: TtsService; -} - -class FallbackTtsService implements TtsService { - private activeIndex: number | null = null; - - constructor( - private readonly logger: Logger, - private readonly services: NamedTtsService[], - ) {} - - async warmup(): Promise { - await this.ensureActive(); - } - - async preparePlayback(text: string, signal?: AbortSignal): Promise { - const active = await this.ensureActive(); - - try { - return await active.service.preparePlayback(text, signal); - } catch (error) { - if (this.activeIndex === null || this.activeIndex >= this.services.length - 1) { - throw error; - } - - const failedName = active.name; - this.activeIndex += 1; - const fallback = await this.activate(this.activeIndex); - this.logger.warn(`TTS 엔진 ${failedName} 이 실패해 ${fallback.name} 로 전환합니다.`, error); - return await fallback.service.preparePlayback(text, signal); - } - } - - async destroy(): Promise { - await Promise.allSettled(this.services.map((entry) => entry.service.destroy?.())); - } - - private async ensureActive(): Promise { - if (this.activeIndex !== null) { - return this.services[this.activeIndex]!; - } - - let lastError: unknown = null; - for (let index = 0; index < this.services.length; index += 1) { - try { - return await this.activate(index); - } catch (error) { - lastError = error; - this.logger.warn(`TTS 엔진 ${this.services[index]!.name} 초기화 실패`, error); - } - } - - throw lastError instanceof Error ? lastError : new Error("사용 가능한 TTS 엔진을 찾지 못했습니다."); - } - - private async activate(index: number): Promise { - const selected = this.services[index]!; - await selected.service.warmup?.(); - this.activeIndex = index; - this.logger.info("Selected TTS engine", selected.name); - return selected; - } -} - -export function createTtsService(config: AssistantRuntimeConfig, logger: Logger): TtsService { - if (process.platform !== "win32") { - return new LocalKokoroTtsService(config, logger); - } - - const systemTts = new WindowsSystemTtsService( - config.LOCAL_TTS_SPEED, - config.LOCAL_TTS_VOICE_NAME, - config.LOCAL_TTS_LANGUAGE, - ); - const windowsMediaTts = new WindowsMediaTtsService( - config.LOCAL_TTS_SPEED, - config.LOCAL_TTS_VOICE_NAME, - config.LOCAL_TTS_LANGUAGE, - ); - - switch (config.LOCAL_TTS_ENGINE) { - case "system": - return systemTts; - case "windows-media": - return windowsMediaTts; - case "kokoro": - return new LocalKokoroTtsService(config, logger); - case "auto": - default: - return new FallbackTtsService(logger, [ - { - name: "windows-media", - service: windowsMediaTts, - }, - { - name: "system", - service: systemTts, - }, - ]); - } -} diff --git a/src/services/llm.ts b/src/services/llm.ts deleted file mode 100644 index 0d2b8af..0000000 --- a/src/services/llm.ts +++ /dev/null @@ -1,6 +0,0 @@ -import type { ConversationMemory, UserUtterance } from "./conversation.js"; - -export interface LlmService { - warmup?(): Promise; - generateReply(memory: ConversationMemory, utterance: UserUtterance): Promise; -} diff --git a/src/services/local-stt.ts b/src/services/local-stt.ts deleted file mode 100644 index 60c3339..0000000 --- a/src/services/local-stt.ts +++ /dev/null @@ -1,43 +0,0 @@ -import type { AssistantRuntimeConfig } from "../config.js"; -import type { Logger } from "../logger.js"; -import { PythonJsonWorker } from "./python-json-worker.js"; -import type { SttService } from "./stt.js"; - -interface TranscribeResult { - text?: string; -} - -export class LocalFasterWhisperSttService implements SttService { - private readonly worker: PythonJsonWorker; - - constructor(private readonly config: AssistantRuntimeConfig, logger: Logger) { - this.worker = new PythonJsonWorker(config, logger, "local_stt_worker.py", "local-stt", { - LOCAL_STT_MODEL: config.LOCAL_STT_MODEL, - LOCAL_STT_DEVICE: config.LOCAL_STT_DEVICE, - LOCAL_STT_COMPUTE_TYPE: config.LOCAL_STT_COMPUTE_TYPE, - LOCAL_STT_BEAM_SIZE: String(config.LOCAL_STT_BEAM_SIZE), - }); - } - - async warmup(): Promise { - await this.worker.request("ping", {}); - } - - async transcribePcm16(pcm16MonoAudio: Buffer): Promise { - if (pcm16MonoAudio.byteLength === 0) { - return null; - } - - const result = await this.worker.request("transcribe", { - audio_base64: pcm16MonoAudio.toString("base64"), - language: this.config.BOT_DEFAULT_LANGUAGE, - }); - - const transcript = result.text?.trim() ?? ""; - return transcript.length > 0 ? transcript : null; - } - - async destroy(): Promise { - await this.worker.destroy(); - } -} diff --git a/src/services/local-tts.ts b/src/services/local-tts.ts deleted file mode 100644 index 36a837d..0000000 --- a/src/services/local-tts.ts +++ /dev/null @@ -1,97 +0,0 @@ -import { Readable } from "node:stream"; - -import prism from "prism-media"; - -import type { AssistantRuntimeConfig } from "../config.js"; -import type { Logger } from "../logger.js"; -import { resolveFfmpegPath } from "../audio/ffmpeg-path.js"; -import { PythonJsonWorker } from "./python-json-worker.js"; -import type { PreparedSpeechAudio, TtsService } from "./tts.js"; -import { resolveLocalAiTtsModelPath, resolveLocalAiTtsVoicesPath } from "../python-runtime.js"; - -interface SynthesizeResult { - wav_base64?: string; -} - -export class LocalKokoroTtsService implements TtsService { - private readonly worker: PythonJsonWorker; - - constructor(config: AssistantRuntimeConfig, logger: Logger) { - const resolvedFfmpegPath = resolveFfmpegPath(); - if (resolvedFfmpegPath && !process.env.FFMPEG_PATH) { - process.env.FFMPEG_PATH = resolvedFfmpegPath; - } - - this.worker = new PythonJsonWorker(config, logger, "local_tts_worker.py", "local-tts", { - LOCAL_TTS_MODEL_PATH: resolveLocalAiTtsModelPath(config), - LOCAL_TTS_VOICES_PATH: resolveLocalAiTtsVoicesPath(config), - LOCAL_TTS_LANGUAGE: config.LOCAL_TTS_LANGUAGE, - LOCAL_TTS_SPEAKER: config.LOCAL_TTS_SPEAKER, - LOCAL_TTS_DEVICE: config.LOCAL_TTS_DEVICE, - LOCAL_TTS_SPEED: String(config.LOCAL_TTS_SPEED), - }); - } - - async warmup(): Promise { - await this.worker.request("ping", {}); - } - - async preparePlayback(text: string, signal?: AbortSignal): Promise { - const result = await this.worker.request( - "synthesize", - { - text, - }, - signal, - ); - - const wavBase64 = result.wav_base64; - if (!wavBase64) { - throw new Error("로컬 TTS가 빈 오디오를 반환했습니다."); - } - - const input = Readable.from([Buffer.from(wavBase64, "base64")]); - const ffmpeg = new prism.FFmpeg({ - args: [ - "-analyzeduration", - "0", - "-loglevel", - "0", - "-i", - "pipe:0", - "-f", - "s16le", - "-ar", - "48000", - "-ac", - "2", - "pipe:1", - ], - }); - - if (signal) { - signal.addEventListener( - "abort", - () => { - input.destroy(); - ffmpeg.destroy(); - }, - { once: true }, - ); - } - - input.pipe(ffmpeg); - - return { - stream: ffmpeg, - dispose: () => { - input.destroy(); - ffmpeg.destroy(); - }, - }; - } - - async destroy(): Promise { - await this.worker.destroy(); - } -} diff --git a/src/services/ollama-llm.ts b/src/services/ollama-llm.ts deleted file mode 100644 index b48de2c..0000000 --- a/src/services/ollama-llm.ts +++ /dev/null @@ -1,159 +0,0 @@ -import type { AssistantRuntimeConfig } from "../config.js"; -import type { ConversationMemory, UserUtterance } from "./conversation.js"; -import type { LlmService } from "./llm.js"; - -const ASSISTANT_INSTRUCTIONS = [ - "너는 디스코드 음성 채널 또는 로컬 마이크 테스트에서 동작하는 한국어 음성 비서다.", - "사용자의 마지막 말에만 직접 답한다.", - "답변은 짧고 실용적으로 한다.", - "기본은 한 문장, 길어도 두 문장을 넘기지 않는다.", - "말투는 자연스러운 한국어로 유지한다.", - "사용자가 정체를 명확히 묻지 않는 한 자기소개하지 않는다.", - "자기소개가 필요할 때만 '저는 로컬 음성 비서입니다.'처럼 짧게 말한다.", - "\"저는 화자입니다\", \"로컬 음성 비서 모드입니다\" 같은 어색한 메타 응답은 하지 않는다.", - "대화 기록에 이름이 붙어 있어도 이름이나 메타 정보를 그대로 따라 말하지 않는다.", - "잘 못 들었거나 의미가 불명확하면 짧게 다시 물어본다.", - "목록, 마크다운, 코드블록, 설명문은 쓰지 않는다.", - "생각 과정을 드러내지 말고 최종 답변만 말한다.", -].join(" "); - -const EXAMPLE_MESSAGES = [ - { - role: "user" as const, - content: "안녕하세요", - }, - { - role: "assistant" as const, - content: "안녕하세요. 무엇을 도와드릴까요?", - }, - { - role: "user" as const, - content: "당신은 누구십니까?", - }, - { - role: "assistant" as const, - content: "저는 로컬 음성 비서입니다.", - }, - { - role: "user" as const, - content: "계속 똑같은 말만 반복합니까?", - }, - { - role: "assistant" as const, - content: "아니요. 질문에 맞춰 짧게 답변합니다.", - }, -]; - -interface OllamaChatResponse { - message?: { - content?: string; - thinking?: string; - }; - error?: string; -} - -interface OllamaTagsResponse { - models?: Array<{ - name?: string; - model?: string; - }>; -} - -function normalizeReply(text: string): string { - const strippedThink = text.replace(/[\s\S]*?<\/think>/gi, " "); - const compact = strippedThink.replace(/\s+/g, " ").trim(); - if (compact.length <= 180) { - return compact; - } - - const sentences = compact.match(/[^.!?]+[.!?]?/g); - if (!sentences || sentences.length === 0) { - return compact.slice(0, 180).trim(); - } - - return sentences.slice(0, 2).join(" ").trim().slice(0, 180).trim(); -} - -export class OllamaLlmService implements LlmService { - constructor(private readonly config: AssistantRuntimeConfig) {} - - async warmup(): Promise { - const url = new URL("/api/tags", this.config.OLLAMA_BASE_URL); - let response: Response; - - try { - response = await fetch(url); - } catch { - throw new Error( - `Ollama 서버에 연결할 수 없습니다. ${this.config.OLLAMA_BASE_URL} 확인 후 Ollama 앱이 실행 중인지 확인해 주세요. Windows에서는 \`localhost\` 대신 \`http://127.0.0.1:11434\` 를 권장합니다. 모델이 없으면 \`ollama pull ${this.config.OLLAMA_MODEL}\` 를 먼저 실행하세요.`, - ); - } - - const body = (await response.json().catch(() => ({}))) as OllamaTagsResponse & { error?: string }; - if (!response.ok) { - throw new Error(body.error ?? `Ollama 상태 확인 실패: HTTP ${response.status}`); - } - - const models = body.models ?? []; - const exists = models.some((model) => { - const name = model.name?.trim(); - const alias = model.model?.trim(); - return name === this.config.OLLAMA_MODEL || alias === this.config.OLLAMA_MODEL; - }); - - if (!exists) { - throw new Error( - `Ollama 모델 ${this.config.OLLAMA_MODEL} 이 없습니다. \`ollama pull ${this.config.OLLAMA_MODEL}\` 를 먼저 실행해 주세요.`, - ); - } - } - - async generateReply(memory: ConversationMemory, utterance: UserUtterance): Promise { - const url = new URL("/api/chat", this.config.OLLAMA_BASE_URL); - let response: Response; - try { - response = await fetch(url, { - method: "POST", - headers: { - "Content-Type": "application/json", - }, - body: JSON.stringify({ - model: this.config.OLLAMA_MODEL, - messages: [ - { - role: "system", - content: ASSISTANT_INSTRUCTIONS, - }, - ...EXAMPLE_MESSAGES, - ...memory.buildMessages(utterance), - ], - think: false, - stream: false, - keep_alive: this.config.OLLAMA_KEEP_ALIVE, - options: { - num_ctx: this.config.OLLAMA_NUM_CTX, - temperature: 0.4, - num_predict: 120, - }, - }), - }); - } catch { - throw new Error( - `Ollama 서버에 연결할 수 없습니다. ${this.config.OLLAMA_BASE_URL} 확인 후 Ollama 앱이 실행 중인지 확인해 주세요. Windows에서는 \`localhost\` 대신 \`http://127.0.0.1:11434\` 를 권장합니다.`, - ); - } - - const body = (await response.json().catch(() => ({}))) as OllamaChatResponse; - - if (!response.ok) { - throw new Error(body.error ?? `Ollama request failed with status ${response.status}`); - } - - const output = body.message?.content?.trim(); - if (!output) { - return "잘 못 들었습니다. 한 번만 다시 말씀해 주세요."; - } - - return normalizeReply(output); - } -} diff --git a/src/services/python-json-worker.ts b/src/services/python-json-worker.ts deleted file mode 100644 index 8369f34..0000000 --- a/src/services/python-json-worker.ts +++ /dev/null @@ -1,208 +0,0 @@ -import { spawn, type ChildProcessWithoutNullStreams } from "node:child_process"; -import { createInterface } from "node:readline"; -import path from "node:path"; - -import type { AssistantRuntimeConfig } from "../config.js"; -import type { Logger } from "../logger.js"; -import { resolveLocalAiCachePath, resolvePythonLaunch } from "../python-runtime.js"; - -interface WorkerRequest { - id: number; - method: string; - params: Record; -} - -interface WorkerResponse { - id: number; - ok: boolean; - result?: unknown; - error?: string; -} - -export class PythonJsonWorker { - private child: ChildProcessWithoutNullStreams | null = null; - private nextId = 1; - private readonly pending = new Map< - number, - { - resolve: (value: unknown) => void; - reject: (error: Error) => void; - } - >(); - - constructor( - private readonly config: AssistantRuntimeConfig, - private readonly logger: Logger, - private readonly scriptName: string, - private readonly label: string, - private readonly workerEnv: Record, - ) {} - - async request(method: string, params: Record, signal?: AbortSignal): Promise { - const child = this.ensureStarted(); - const id = this.nextId++; - - return await new Promise((resolve, reject) => { - if (signal?.aborted) { - reject(new Error(`${this.label} request aborted before start`)); - return; - } - - const abortHandler = () => { - this.pending.delete(id); - reject(new Error(`${this.label} request aborted`)); - }; - - if (signal) { - signal.addEventListener("abort", abortHandler, { once: true }); - } - - this.pending.set(id, { - resolve: (value) => { - if (signal) { - signal.removeEventListener("abort", abortHandler); - } - resolve(value as T); - }, - reject: (error) => { - if (signal) { - signal.removeEventListener("abort", abortHandler); - } - reject(error); - }, - }); - - const message: WorkerRequest = { - id, - method, - params, - }; - - child.stdin.write(`${JSON.stringify(message)}\n`); - }); - } - - async destroy(): Promise { - this.rejectAll(new Error(`${this.label} worker terminated`)); - - if (!this.child) { - return; - } - - const child = this.child; - this.child = null; - - child.kill("SIGTERM"); - await new Promise((resolve) => { - child.once("exit", () => resolve()); - setTimeout(resolve, 1_500); - }); - } - - private ensureStarted(): ChildProcessWithoutNullStreams { - if (this.child) { - return this.child; - } - - const launch = resolvePythonLaunch(this.config); - const scriptPath = path.resolve(process.cwd(), "python", this.scriptName); - const cachePath = resolveLocalAiCachePath(this.config); - const recentStderr: string[] = []; - - const child = spawn(launch.command, [...launch.args, scriptPath], { - stdio: ["pipe", "pipe", "pipe"], - shell: process.platform === "win32", - env: { - ...process.env, - HF_HOME: cachePath, - TRANSFORMERS_CACHE: cachePath, - PYTHONIOENCODING: "utf-8", - HF_HUB_DISABLE_SYMLINKS_WARNING: "1", - BOT_DEFAULT_LANGUAGE: this.config.BOT_DEFAULT_LANGUAGE, - ...this.workerEnv, - }, - }); - - createInterface({ - input: child.stdout, - crlfDelay: Number.POSITIVE_INFINITY, - }).on("line", (line) => { - if (!line.trim()) { - return; - } - - let payload: WorkerResponse; - try { - if (!line.startsWith("{")) { - return; - } - payload = JSON.parse(line) as WorkerResponse; - } catch (error) { - this.logger.warn(`${this.label} stdout parse failed`, error); - return; - } - - const pending = this.pending.get(payload.id); - if (!pending) { - return; - } - - this.pending.delete(payload.id); - if (payload.ok) { - pending.resolve(payload.result); - return; - } - - pending.reject(new Error(payload.error ?? `${this.label} worker error`)); - }); - - child.stderr.on("data", (chunk: Buffer) => { - const text = chunk.toString().trim(); - if (text.length > 0) { - recentStderr.push(text); - if (recentStderr.length > 20) { - recentStderr.shift(); - } - this.logger.warn(`[${this.label}]`, text); - } - }); - - child.on("exit", (code, signal) => { - if (this.child === child) { - this.child = null; - } - - const detail = recentStderr.length > 0 ? `\n${recentStderr.join("\n")}` : ""; - this.rejectAll(new Error(`${this.label} worker exited code=${code ?? "null"} signal=${signal ?? "null"}${detail}`)); - }); - - child.on("error", (error) => { - const spawnError = error as NodeJS.ErrnoException; - if (spawnError.code === "ENOENT") { - this.rejectAll( - new Error( - [ - `Python 실행에 실패했습니다: ${launch.command}`, - "Windows면 `.env` 에 `LOCAL_AI_PYTHON=py -3` 를 넣고 다시 실행하세요.", - "최초 1회는 `bun run setup:local-ai` 를 먼저 실행해야 합니다.", - ].join("\n"), - ), - ); - return; - } - - this.rejectAll(spawnError); - }); - - this.child = child; - return child; - } - - private rejectAll(error: Error): void { - const pending = [...this.pending.values()]; - this.pending.clear(); - for (const item of pending) { - item.reject(error); - } - } -} diff --git a/src/services/stt.ts b/src/services/stt.ts deleted file mode 100644 index 393aeb8..0000000 --- a/src/services/stt.ts +++ /dev/null @@ -1,4 +0,0 @@ -export interface SttService { - transcribePcm16(pcm16MonoAudio: Buffer): Promise; - destroy?(): Promise; -} diff --git a/src/services/tts.ts b/src/services/tts.ts deleted file mode 100644 index 10facdf..0000000 --- a/src/services/tts.ts +++ /dev/null @@ -1,13 +0,0 @@ -import type { Readable } from "node:stream"; - -export interface PreparedSpeechAudio { - stream: Readable; - sourceFilePath?: string; - dispose: () => void; -} - -export interface TtsService { - warmup?(): Promise; - preparePlayback(text: string, signal?: AbortSignal): Promise; - destroy?(): Promise; -} diff --git a/src/services/windows-media-tts.ts b/src/services/windows-media-tts.ts deleted file mode 100644 index c1e0f2a..0000000 --- a/src/services/windows-media-tts.ts +++ /dev/null @@ -1,152 +0,0 @@ -import { createReadStream } from "node:fs"; -import { unlink } from "node:fs/promises"; -import os from "node:os"; -import path from "node:path"; - -import type { PreparedSpeechAudio, TtsService } from "./tts.js"; -import { parsePowerShellJsonArray, runPowerShell } from "./windows-powershell.js"; - -export interface WindowsMediaVoiceInfo { - displayName: string; - description: string; - language: string; - gender: string; - id: string; -} - -function escapePowerShellSingleQuoted(text: string): string { - return text.replace(/\r?\n/g, " ").replace(/'/g, "''"); -} - -function windowsMediaPreamble(): string { - return [ - "$ErrorActionPreference = 'Stop';", - "$ProgressPreference = 'SilentlyContinue';", - "Add-Type -AssemblyName System.Runtime.WindowsRuntime;", - "$null = [Windows.Media.SpeechSynthesis.SpeechSynthesizer, Windows.Media.SpeechSynthesis, ContentType=WindowsRuntime];", - "$null = [Windows.Storage.Streams.DataReader, Windows.Storage.Streams, ContentType=WindowsRuntime];", - "function Await-WinRt($operation) {", - " $interfaceType = $operation.GetType().GetInterfaces() | Where-Object {", - " $_.IsGenericType -and $_.GetGenericTypeDefinition().FullName -eq 'Windows.Foundation.IAsyncOperation`1'", - " } | Select-Object -First 1;", - " if (-not $interfaceType) { throw 'IAsyncOperation 인터페이스를 찾지 못했습니다.' }", - " $resultType = $interfaceType.GetGenericArguments()[0];", - " $method = [System.WindowsRuntimeSystemExtensions].GetMethods() | Where-Object {", - " $_.Name -eq 'AsTask' -and", - " $_.IsGenericMethodDefinition -and", - " $_.GetGenericArguments().Count -eq 1 -and", - " $_.GetParameters().Count -eq 1 -and", - " $_.GetParameters()[0].ParameterType.IsGenericType -and", - " $_.GetParameters()[0].ParameterType.GetGenericTypeDefinition().FullName -eq 'Windows.Foundation.IAsyncOperation`1'", - " } | Select-Object -First 1;", - " if (-not $method) { throw 'System.WindowsRuntimeSystemExtensions.AsTask(IAsyncOperation) 를 찾지 못했습니다.' }", - " $task = $method.MakeGenericMethod(@($resultType)).Invoke($null, @($operation));", - " return $task.GetAwaiter().GetResult();", - "}", - ].join(" "); -} - -export async function listWindowsMediaVoices(signal?: AbortSignal): Promise { - const script = [ - windowsMediaPreamble(), - "$voices = @([Windows.Media.SpeechSynthesis.SpeechSynthesizer]::AllVoices | ForEach-Object {", - " [PSCustomObject]@{", - " displayName = $_.DisplayName;", - " description = $_.Description;", - " language = $_.Language;", - " gender = [string]$_.Gender;", - " id = $_.Id;", - " }", - "});", - "ConvertTo-Json -InputObject $voices -Compress;", - ].join(" "); - - const { stdout } = await runPowerShell(script, signal); - return parsePowerShellJsonArray(stdout); -} - -export async function synthesizeWindowsMediaSpeechToWaveFile( - text: string, - speed: number, - outputPath: string, - voiceName?: string, - language = "ko", - signal?: AbortSignal, -): Promise { - const script = [ - windowsMediaPreamble(), - `$text = '${escapePowerShellSingleQuoted(text)}';`, - `$outputPath = '${escapePowerShellSingleQuoted(outputPath)}';`, - `$preferredVoice = '${escapePowerShellSingleQuoted(voiceName ?? "")}';`, - `$preferredLanguage = '${escapePowerShellSingleQuoted(language)}';`, - `$speakingRate = ${speed.toFixed(2)};`, - "$synth = [Windows.Media.SpeechSynthesis.SpeechSynthesizer]::new();", - "try {", - " $voices = [Windows.Media.SpeechSynthesis.SpeechSynthesizer]::AllVoices;", - " $selected = $null;", - " if ($preferredVoice) {", - " $selected = $voices | Where-Object {", - " $_.DisplayName -eq $preferredVoice -or $_.Description -eq $preferredVoice -or $_.Id -eq $preferredVoice -or $_.DisplayName -like ('*' + $preferredVoice + '*') -or $_.Description -like ('*' + $preferredVoice + '*')", - " } | Select-Object -First 1;", - " }", - " if (-not $selected -and $preferredLanguage) {", - " $selected = $voices | Where-Object { $_.Language -like ($preferredLanguage + '*') } | Sort-Object @{Expression={ if ($_.DisplayName -match 'Natural' -or $_.Description -match 'Natural') { 0 } else { 1 } }}, Description | Select-Object -First 1;", - " }", - " if (-not $selected) { $selected = [Windows.Media.SpeechSynthesis.SpeechSynthesizer]::DefaultVoice }", - " if ($selected) { $synth.Voice = $selected }", - " try { $synth.Options.SpeakingRate = $speakingRate } catch {}", - " $stream = Await-WinRt ($synth.SynthesizeTextToStreamAsync($text));", - " try {", - " $size = [uint32]$stream.Size;", - " $reader = [Windows.Storage.Streams.DataReader]::new($stream.GetInputStreamAt(0));", - " try {", - " $null = Await-WinRt ($reader.LoadAsync($size));", - " $bytes = New-Object byte[] ([int]$size);", - " $reader.ReadBytes($bytes);", - " [System.IO.File]::WriteAllBytes($outputPath, $bytes);", - " } finally { $reader.Dispose() }", - " } finally { $stream.Dispose() }", - "} finally { $synth.Dispose() }", - ].join(" "); - - await runPowerShell(script, signal); -} - -export class WindowsMediaTtsService implements TtsService { - constructor( - private readonly speed: number, - private readonly voiceName?: string, - private readonly language = "ko", - ) {} - - async warmup(): Promise { - await listWindowsMediaVoices(); - } - - async preparePlayback(text: string, signal?: AbortSignal): Promise { - const tempPath = path.join(os.tmpdir(), `realtime-voice-bot-wmtts-${Date.now()}.wav`); - await synthesizeWindowsMediaSpeechToWaveFile( - text, - this.speed, - tempPath, - this.voiceName, - this.language, - signal, - ).catch(async (error) => { - await unlink(tempPath).catch(() => null); - throw error; - }); - - return { - stream: createReadStream(tempPath), - sourceFilePath: tempPath, - dispose: () => { - void unlink(tempPath).catch(() => null); - }, - }; - } - - async destroy(): Promise { - return; - } -} diff --git a/src/services/windows-powershell.ts b/src/services/windows-powershell.ts deleted file mode 100644 index 392b404..0000000 --- a/src/services/windows-powershell.ts +++ /dev/null @@ -1,63 +0,0 @@ -import { spawn } from "node:child_process"; - -export interface PowerShellRunResult { - stdout: string; - stderr: string; -} - -export async function runPowerShell(script: string, signal?: AbortSignal): Promise { - const encodedCommand = Buffer.from(script, "utf16le").toString("base64"); - - return await new Promise((resolve, reject) => { - const child = spawn("powershell", ["-NoProfile", "-EncodedCommand", encodedCommand], { - stdio: ["ignore", "pipe", "pipe"], - }); - - let stdout = ""; - let stderr = ""; - - child.stdout.on("data", (chunk: Buffer) => { - stdout += chunk.toString(); - }); - - child.stderr.on("data", (chunk: Buffer) => { - stderr += chunk.toString(); - }); - - signal?.addEventListener( - "abort", - () => { - if (!child.killed) { - child.kill("SIGKILL"); - } - }, - { once: true }, - ); - - child.on("exit", (code) => { - if (signal?.aborted) { - reject(new Error("powershell aborted")); - return; - } - - if (code === 0) { - resolve({ stdout, stderr }); - return; - } - - reject(new Error(stderr.trim() || stdout.trim() || `powershell exited with code ${code ?? "null"}`)); - }); - - child.on("error", reject); - }); -} - -export function parsePowerShellJsonArray(stdout: string): T[] { - const trimmed = stdout.trim(); - if (!trimmed) { - return []; - } - - const parsed: unknown = JSON.parse(trimmed); - return Array.isArray(parsed) ? (parsed as T[]) : ([parsed] as T[]); -} diff --git a/src/services/windows-system-tts.ts b/src/services/windows-system-tts.ts deleted file mode 100644 index 1369483..0000000 --- a/src/services/windows-system-tts.ts +++ /dev/null @@ -1,123 +0,0 @@ -import { createReadStream } from "node:fs"; -import { unlink } from "node:fs/promises"; -import os from "node:os"; -import path from "node:path"; - -import type { PreparedSpeechAudio, TtsService } from "./tts.js"; -import { parsePowerShellJsonArray, runPowerShell } from "./windows-powershell.js"; - -export interface WindowsSystemVoiceInfo { - name: string; - culture: string; - description: string; - gender: string; - enabled: boolean; -} - -function escapePowerShellSingleQuoted(text: string): string { - return text.replace(/\r?\n/g, " ").replace(/'/g, "''"); -} - -function toSpeechRate(speed: number): number { - const mapped = Math.round((speed - 1) * 8); - return Math.max(-10, Math.min(10, mapped)); -} - -export async function listWindowsSystemVoices(signal?: AbortSignal): Promise { - const script = [ - "$ErrorActionPreference = 'Stop';", - "$ProgressPreference = 'SilentlyContinue';", - "Add-Type -AssemblyName System.Speech;", - "$synth = New-Object System.Speech.Synthesis.SpeechSynthesizer;", - "try {", - " $voices = @($synth.GetInstalledVoices() | ForEach-Object {", - " [PSCustomObject]@{", - " name = $_.VoiceInfo.Name;", - " culture = $_.VoiceInfo.Culture.Name;", - " description = $_.VoiceInfo.Description;", - " gender = [string]$_.VoiceInfo.Gender;", - " enabled = [bool]$_.Enabled;", - " }", - " });", - " ConvertTo-Json -InputObject $voices -Compress;", - "} finally { $synth.Dispose() }", - ].join(" "); - - const { stdout } = await runPowerShell(script, signal); - return parsePowerShellJsonArray(stdout); -} - -export async function synthesizeWindowsSpeechToWaveFile( - text: string, - speed: number, - outputPath: string, - voiceName?: string, - language = "ko", - signal?: AbortSignal, -): Promise { - const rate = toSpeechRate(speed); - const script = [ - "$ErrorActionPreference = 'Stop';", - "$ProgressPreference = 'SilentlyContinue';", - "Add-Type -AssemblyName System.Speech;", - "$synth = New-Object System.Speech.Synthesis.SpeechSynthesizer;", - `$preferredVoice = '${escapePowerShellSingleQuoted(voiceName ?? "")}';`, - `$preferredLanguage = '${escapePowerShellSingleQuoted(language)}';`, - "try {", - " $voices = $synth.GetInstalledVoices();", - " $selected = $null;", - " if ($preferredVoice) {", - " $selected = $voices | Where-Object {", - " $_.VoiceInfo.Name -eq $preferredVoice -or $_.VoiceInfo.Description -eq $preferredVoice -or $_.VoiceInfo.Name -like ('*' + $preferredVoice + '*') -or $_.VoiceInfo.Description -like ('*' + $preferredVoice + '*')", - " } | Select-Object -First 1;", - " }", - " if (-not $selected -and $preferredLanguage) {", - " $selected = $voices | Where-Object { $_.VoiceInfo.Culture.Name -like ($preferredLanguage + '*') } | Select-Object -First 1;", - " }", - " if ($selected) { $synth.SelectVoice($selected.VoiceInfo.Name) }", - `$synth.Rate = ${rate};`, - `$synth.SetOutputToWaveFile('${escapePowerShellSingleQuoted(outputPath)}');`, - `$synth.Speak('${escapePowerShellSingleQuoted(text)}');`, - "} finally { $synth.Dispose() }", - ].join(" "); - - await runPowerShell(script, signal); -} - -export class WindowsSystemTtsService implements TtsService { - constructor( - private readonly speed: number, - private readonly voiceName?: string, - private readonly language = "ko", - ) {} - - async warmup(): Promise { - await listWindowsSystemVoices(); - } - - async preparePlayback(text: string, signal?: AbortSignal): Promise { - const tempPath = path.join(os.tmpdir(), `realtime-voice-bot-tts-${Date.now()}.wav`); - await synthesizeWindowsSpeechToWaveFile(text, this.speed, tempPath, this.voiceName, this.language, signal).catch( - async (error) => { - await unlink(tempPath).catch(() => null); - throw error; - }, - ); - - return { - stream: createReadStream(tempPath), - sourceFilePath: tempPath, - dispose: () => { - this.cleanupTempWave(tempPath); - }, - }; - } - - private cleanupTempWave(filePath: string): void { - void unlink(filePath).catch(() => null); - } - - async destroy(): Promise { - return; - } -} diff --git a/src/setup-local-ai.ts b/src/setup-local-ai.ts deleted file mode 100644 index e190edc..0000000 --- a/src/setup-local-ai.ts +++ /dev/null @@ -1,132 +0,0 @@ -import { existsSync } from "node:fs"; -import { mkdir, writeFile } from "node:fs/promises"; -import { spawn } from "node:child_process"; -import path from "node:path"; - -import { loadConfig } from "./config.js"; -import { - resolveLocalAiCachePath, - resolveLocalAiTtsModelPath, - resolveLocalAiTtsVoicesPath, - resolveLocalAiVenvPath, - resolvePythonLaunch, - resolveVenvPythonPath, -} from "./python-runtime.js"; - -const KOKORO_MODEL_URL = - "https://github.com/thewh1teagle/kokoro-onnx/releases/download/model-files-v1.0/kokoro-v1.0.onnx"; -const KOKORO_VOICES_URL = - "https://github.com/thewh1teagle/kokoro-onnx/releases/download/model-files-v1.0/voices-v1.0.bin"; - -async function run(command: string, args: string[], extraEnv?: NodeJS.ProcessEnv): Promise { - await new Promise((resolve, reject) => { - const child = spawn(command, args, { - stdio: "inherit", - shell: process.platform === "win32", - env: { - ...process.env, - ...extraEnv, - }, - }); - - child.on("exit", (code) => { - if (code === 0) { - resolve(); - return; - } - reject(new Error(`${command} ${args.join(" ")} exited with code ${code ?? "null"}`)); - }); - child.on("error", reject); - }); -} - -async function ensurePip(pythonBin: string, env: NodeJS.ProcessEnv): Promise { - await new Promise((resolve, reject) => { - const child = spawn(pythonBin, ["-m", "pip", "--version"], { - stdio: "ignore", - shell: process.platform === "win32", - env, - }); - child.on("exit", (code) => { - if (code === 0) { - resolve(); - return; - } - reject(new Error("pip missing")); - }); - child.on("error", reject); - }).catch(async () => { - await run(pythonBin, ["-m", "ensurepip", "--upgrade"], env); - }); -} - -async function ensureDownload(url: string, filePath: string): Promise { - if (existsSync(filePath)) { - return; - } - - await mkdir(path.dirname(filePath), { recursive: true }); - const response = await fetch(url); - if (!response.ok) { - throw new Error(`다운로드 실패: ${url} (${response.status})`); - } - - const bytes = Buffer.from(await response.arrayBuffer()); - await writeFile(filePath, bytes); -} - -async function main(): Promise { - const config = loadConfig(); - const venvPath = resolveLocalAiVenvPath(config); - const venvPython = resolveVenvPythonPath(config); - const cachePath = resolveLocalAiCachePath(config); - const ttsModelPath = resolveLocalAiTtsModelPath(config); - const ttsVoicesPath = resolveLocalAiTtsVoicesPath(config); - const requirementsPath = path.resolve( - process.cwd(), - "python", - process.platform === "win32" ? "requirements-windows.txt" : "requirements.txt", - ); - const baseEnv = { - HF_HOME: cachePath, - TRANSFORMERS_CACHE: cachePath, - PYTHONIOENCODING: "utf-8", - HF_HUB_DISABLE_SYMLINKS_WARNING: "1", - }; - - await mkdir(cachePath, { recursive: true }); - - if (!existsSync(venvPython)) { - const launch = resolvePythonLaunch(config, { preferVenv: false }); - console.log(`기본 Python 확인: ${launch.command} ${launch.args.join(" ")}`.trim()); - console.log(`가상환경 생성: ${venvPath}`); - await run(launch.command, [...launch.args, "-m", "venv", venvPath], baseEnv); - } - - await ensurePip(venvPython, { - ...process.env, - ...baseEnv, - }); - - console.log("로컬 AI 의존성 설치를 시작합니다."); - if (process.platform === "win32") { - console.log("Windows GPU STT용 CUDA 런타임 패키지도 함께 확인합니다."); - } - await run(venvPython, ["-m", "pip", "install", "--upgrade", "pip", "setuptools", "wheel"], baseEnv); - await run(venvPython, ["-m", "pip", "install", "-r", requirementsPath], baseEnv); - if (process.platform !== "win32") { - console.log("로컬 TTS 모델 파일을 확인합니다."); - await ensureDownload(KOKORO_MODEL_URL, ttsModelPath); - await ensureDownload(KOKORO_VOICES_URL, ttsVoicesPath); - } - - console.log("설치가 끝났습니다."); - console.log("다음 순서:"); - console.log("1. bun run devices"); - console.log("2. bun run start:local"); -} - -void main().catch((error) => { - console.error(error instanceof Error ? error.message : String(error)); - process.exit(1); -}); diff --git a/tsconfig.json b/tsconfig.json deleted file mode 100644 index 7edb43f..0000000 --- a/tsconfig.json +++ /dev/null @@ -1,21 +0,0 @@ -{ - "compilerOptions": { - "target": "ES2022", - "module": "NodeNext", - "moduleResolution": "NodeNext", - "strict": true, - "noEmit": false, - "rootDir": "src", - "outDir": "dist", - "esModuleInterop": true, - "forceConsistentCasingInFileNames": true, - "skipLibCheck": true, - "resolveJsonModule": true, - "types": [ - "node" - ] - }, - "include": [ - "src/**/*.ts" - ] -}