Compare commits

...

57 Commits

Author SHA1 Message Date
4c5f8f9ceb Reduce default TTS playback rate 2026-05-03 22:47:37 +09:00
e41212e000 Silence MeloTTS stdout in worker 2026-05-03 22:29:43 +09:00
2ae52b5796 Add setup:all alias 2026-05-03 22:04:58 +09:00
99857cdaa8 Add full STT LLM TTS test mode 2026-05-03 21:54:51 +09:00
a5f47393ee Speed up TTS playback by default 2026-05-03 21:33:51 +09:00
f5194f55a1 Tune MeloTTS speed and prosody defaults 2026-05-03 18:24:39 +09:00
46a6b926df Fix TTS playback and preinstall mecab 2026-05-03 17:56:42 +09:00
caae552d47 Use official MeloTTS Docker install flow 2026-05-03 17:39:23 +09:00
b6284323be Fix Docker credential helper path for TTS 2026-05-03 17:35:20 +09:00
3abb6f81bb Add local docker cmd shim for Windows 2026-05-03 17:28:24 +09:00
74715c0546 Fix Docker path quoting and VSCode terminal env 2026-05-03 17:25:16 +09:00
aa14ebc447 Resolve Docker path for VSCode terminals 2026-05-03 17:22:52 +09:00
c328ef517e Fix Windows docker invocation for TTS 2026-05-03 01:57:59 +09:00
ad357a6ede Add local MeloTTS support 2026-05-03 01:56:09 +09:00
3360015179 Load LLM prompts from markdown files 2026-05-03 01:24:56 +09:00
28ffbf02e1 Repair non-Korean reply output 2026-05-03 01:14:16 +09:00
ec02943538 Split STT-only and STT+LLM test modes 2026-05-03 01:04:31 +09:00
c53dcc853d Integrate LLM into STT flow with reply gating 2026-05-03 01:00:44 +09:00
b28f163217 Add conditional web tools to LLM agent 2026-05-03 00:55:56 +09:00
82f98ceb07 Add tool-calling agent loop for LLM CLI 2026-05-03 00:50:53 +09:00
7e59013fa4 Add separate STT and LLM test commands 2026-05-03 00:44:26 +09:00
48937c684b Gate loopback logs behind DEBUG env 2026-05-02 21:54:49 +09:00
962ff7037b Tune realtime STT defaults 2026-05-02 21:24:41 +09:00
11cfd7cc04 Install and wire CUDA runtime for Windows STT 2026-05-02 21:08:16 +09:00
c4baca1739 Add loopback STT debug logging 2026-05-02 21:05:28 +09:00
f0f62c2307 Fix Windows cmd quoting for Python 2026-05-02 20:56:40 +09:00
39efd3aeed Default to cmd python on Windows 2026-05-02 20:54:45 +09:00
6a4fb067cd Handle pyenv Windows shims 2026-05-02 20:52:52 +09:00
dca5b2c9c4 Wrap Windows Python calls through cmd 2026-05-02 20:49:45 +09:00
2667fc2632 Resolve Python via where on Windows 2026-05-02 20:46:09 +09:00
4202911b3e Auto-detect Python launcher on Windows 2026-05-02 20:41:58 +09:00
3ccc10c706 Fix Python command resolution 2026-05-02 20:40:17 +09:00
d7d1d21240 Document Windows-first setup 2026-05-02 20:35:34 +09:00
a33167ff69 Harden loopback worker shutdown 2026-05-02 20:27:50 +09:00
5775c4809a Add realtime loopback STT prototype 2026-05-02 20:20:54 +09:00
10e0dd75db Reset project to README only 2026-05-01 23:14:23 +09:00
53777be675 Fix WinRT async interop in Windows TTS 2026-05-01 21:06:31 +09:00
96252528b4 Fix Windows Media TTS async wait 2026-05-01 21:04:05 +09:00
52d7f74049 Add Windows Media TTS engine selection 2026-05-01 04:01:33 +09:00
1a8e8d0a8f Simplify Windows TTS playback path 2026-05-01 03:38:30 +09:00
0a88e8dab1 Add Windows TTS wave dump mode 2026-05-01 03:34:43 +09:00
ac88b8c50a Add local TTS smoke test mode 2026-05-01 03:30:34 +09:00
03b06bcc6e Guide local LLM with short chat examples 2026-04-30 18:24:44 +09:00
10fa109084 Simplify chat prompts for local LLM 2026-04-30 18:21:41 +09:00
0005352be7 Fix repeated local replies 2026-04-30 18:18:16 +09:00
133118ca29 Log local assistant replies 2026-04-30 18:14:40 +09:00
88c18ee69e Default Ollama URL to 127.0.0.1 2026-04-30 18:11:31 +09:00
645a5109a2 Improve local startup checks and Korean STT defaults 2026-04-30 18:08:38 +09:00
4c7cef8c18 Install Windows CUDA runtime for STT 2026-04-30 17:59:29 +09:00
60dce65b0f Fallback to CPU when CUDA STT runtime is missing 2026-04-30 17:52:59 +09:00
ab4e0b38b0 Use simpler speech detection on Windows local mode 2026-04-30 17:50:03 +09:00
e74f71e45b Warn when Windows local input is silent 2026-04-30 06:13:49 +09:00
7ba392c0e7 Use Windows system TTS for local mode 2026-04-30 04:01:24 +09:00
dc39998241 Pin misaki for Python 3.13 2026-04-30 03:53:55 +09:00
18369ea7cb Switch local TTS to Kokoro ONNX 2026-04-30 03:51:08 +09:00
178283be61 Fix Windows Python spawning 2026-04-30 03:43:17 +09:00
bb965c061e Improve Windows Python setup guidance 2026-04-30 03:31:43 +09:00
48 changed files with 3157 additions and 2684 deletions

View File

@@ -1,28 +1,42 @@
DISCORD_BOT_TOKEN=
DISCORD_APPLICATION_ID=
DISCORD_COMMAND_GUILD_ID=
OLLAMA_BASE_URL=http://localhost:11434
OLLAMA_MODEL=qwen3:0.6b
OLLAMA_KEEP_ALIVE=5m
OLLAMA_NUM_CTX=4096
LOCAL_AI_VENV_PATH=.local-ai/.venv LOCAL_AI_VENV_PATH=.local-ai/.venv
LOCAL_AI_CACHE_DIR=.local-ai/cache # Windows면 보통 python 또는 py -3
LOCAL_AI_PYTHON= LOCAL_AI_PYTHON=python
LOCAL_STT_MODEL=tiny
LOCAL_STT_DEVICE=auto
LOCAL_STT_COMPUTE_TYPE=auto
LOCAL_STT_BEAM_SIZE=1
LOCAL_TTS_LANGUAGE=KR
LOCAL_TTS_SPEAKER=KR
LOCAL_TTS_DEVICE=auto
LOCAL_TTS_SPEED=1.12
BOT_DEFAULT_LANGUAGE=ko # Windows: ffmpeg dshow 장치 이름
MAX_CONVERSATION_TURNS=12 # Linux: pactl list sources short 에서 monitor/source 이름
LOCAL_AUDIO_SOURCE= AUDIO_SOURCE=
LOCAL_AUDIO_SINK= DOCKER_BIN=
LOCAL_SPEAKER_NAME=local-user
DEBUG_TEXT_EVENTS=false DEBUG=false
TTS_ENABLED=true
TTS_IMAGE=realtime-voice-bot-melotts:v0.1.2
TTS_LANGUAGE=KR
TTS_SPEAKER=KR
TTS_DEVICE=cpu
TTS_SPEED=1.18
TTS_PLAYBACK_RATE=2.2
TTS_SDP_RATIO=0.22
TTS_NOISE_SCALE=0.55
TTS_NOISE_SCALE_W=0.75
TTS_CACHE_DIR=.local-ai/tts-cache
TTS_OUTPUT_DIR=.local-ai/tts-output
OLLAMA_BASE_URL=http://127.0.0.1:11434
OLLAMA_MODEL=qwen3:8b
OLLAMA_KEEP_ALIVE=5m
MAX_CONVERSATION_TURNS=6
WHISPER_MODEL=large-v3-turbo
WHISPER_LANGUAGE=ko
WHISPER_DEVICE=auto
WHISPER_COMPUTE_TYPE=auto
WHISPER_BEAM_SIZE=2
SEGMENT_START_THRESHOLD=900
SEGMENT_CONTINUE_THRESHOLD=450
SEGMENT_START_FRAMES=2
SEGMENT_END_FRAMES=24
SEGMENT_PREROLL_SAMPLES=3200
SEGMENT_MIN_SPEECH_SAMPLES=7200
SEGMENT_MAX_SPEECH_SAMPLES=160000
DEBUG_TRANSCRIPTS=true
LOG_LEVEL=info LOG_LEVEL=info

2
.gitignore vendored
View File

@@ -2,5 +2,5 @@ node_modules
dist dist
.env .env
.local-ai .local-ai
__pycache__
*.pyc *.pyc
__pycache__

5
.vscode/settings.json vendored Normal file
View File

@@ -0,0 +1,5 @@
{
"terminal.integrated.env.windows": {
"PATH": "${env:PATH};C:\\Program Files\\Docker\\Docker\\resources\\bin"
}
}

300
README.md
View File

@@ -1,143 +1,233 @@
# realtime_voice_bot # realtime_voice_bot
디스코드 음성 채널 또는 로컬 PC 마이크에서 한국어 음성을 인식하고, 완전 로컬 스택으로 답변을 생성한 뒤 다시 음성으로 읽어주는 최소 프로토타입입니다. 출력장치로 재생되는 소리를 파일 저장 없이 바로 받아서 `faster-whisper`로 STT 테스트를 하고, 필요하면 `Ollama` LLM과 연결된 통합 테스트와 LLM CLI 테스트를 할 수 있는 최소 프로토타입입니다.
## 현재 스택 현재 문서는 **Windows PC에서 실행하는 기준**으로 적었습니다.
- STT: `faster-whisper` + Whisper multilingual ## 현재 범위
- LLM: `Ollama` + `qwen3:0.6b`
- TTS: `MeloTTS` Korean
- VAD: `avr-vad`
외부 유료 API나 무료 한도형 API는 쓰지 않습니다. - Node.js + TypeScript 메인 프로세스
- 출력 오디오 실시간 캡처
## 현재 구현 범위 - 메모리 버퍼 기반 간단한 저지연 발화 분리
- 미리 로드한 `faster-whisper` 워커에 PCM 직접 전달
- Discord slash command 기반 제어: `/join`, `/leave`, `/status`, `/reset`, `/say` - 디스크에 WAV 저장 없이 바로 전사
- 로컬 테스트 모드: PC 마이크로 직접 말하고 바로 응답 확인 - STT 전용 테스트
- `@discordjs/voice` 기반 음성 채널 입장 및 유저별 오디오 수신 - STT 결과에 대해 답변 가치 판단 후 필요할 때만 LLM 답변하는 통합 테스트
- 48k stereo PCM을 16k mono로 내려서 유저별 VAD 처리 - 로컬 `Ollama` LLM 에이전트 CLI 테스트
- 화자 발화 시작 시 현재 재생과 대기열 즉시 중단 - 무료 로컬 `MeloTTS` 기반 음성 출력 테스트
- Python 로컬 워커를 한 번 띄워 STT/TTS 모델을 메모리에 유지
## 필수 준비물
- Bun `1.3+`
- Node.js `22.12+`
- Python `3.11+`
- `ffmpeg`
- Ollama
Discord 모드까지 쓸 거면 추가로:
- Discord bot token
- Discord application id
## 빠른 시작 ## 빠른 시작
```bash ```bat
bun install bun install
ollama pull qwen3:0.6b bun run setup
bun run setup:local-ai copy .env.example .env
``` ```
그다음 로컬 장치 확인: 또는 전체 준비를 명시적으로:
```bash ```bat
bun run setup:all
copy .env.example .env
```
장치 목록 확인:
```bat
bun run devices bun run devices
``` ```
실행: 실행:
```bash ```bat
bun run start:local bun run test:stt
``` ```
Discord 모드: STT + LLM 통합 테스트:
```bash ```bat
bun run start:discord bun run test:sttllm
```
STT + LLM + TTS 전체 연결 테스트:
```bat
bun run test:all
```
LLM 단독 테스트:
```bat
bun run test:llm
```
TTS 단독 테스트:
```bat
bun run test:tts -- "안녕하세요. 로컬 티티에스 테스트입니다."
``` ```
## 환경 변수 ## 환경 변수
`.env.example`를 복사해서 `.env`를 채우면 됩니다. - `AUDIO_SOURCE`
- `bun run devices` 에서 보이는 `ffmpeg dshow` 오디오 장치 이름
Discord 모드에서만 필수: - 보통 `Stereo Mix`, 오디오 인터페이스 loopback 채널, 가상 케이블 입력 같은 이름을 넣습니다
- `DOCKER_BIN`
- `DISCORD_BOT_TOKEN` - 비워두면 자동 탐색
- `DISCORD_APPLICATION_ID` - VSCode가 오래 떠 있어서 `docker` PATH를 못 잡을 때만 설정
- 예: `C:\Program Files\Docker\Docker\resources\bin\docker.exe`
기본값이 이미 들어있는 로컬 AI 설정: - `DEBUG`
- `true`면 상세 로그 출력
- `false`면 전사 결과만 출력
- `WHISPER_MODEL`
- 기본값 `large-v3-turbo`
- `OLLAMA_BASE_URL` - `OLLAMA_BASE_URL`
- 기본값 `http://127.0.0.1:11434`
- `OLLAMA_MODEL` - `OLLAMA_MODEL`
- 기본값 `qwen3:8b`
- `TTS_ENABLED`
- 기본값 `true`
- `TTS_IMAGE`
- 기본값 `realtime-voice-bot-melotts:v0.1.2`
- `TTS_LANGUAGE`
- 기본값 `KR`
- `TTS_SPEAKER`
- 기본값 `KR`
- `TTS_DEVICE`
- 기본값 `cpu`
- Docker GPU passthrough를 쓸 때만 `cuda`로 바꿉니다
- `TTS_SPEED`
- 기본값 `1.18`
- `TTS_PLAYBACK_RATE`
- 기본값 `2.2`
- 생성된 WAV를 `ffmpeg`로 더 빠르게 재생합니다
- `TTS_SDP_RATIO`
- 기본값 `0.22`
- `TTS_NOISE_SCALE`
- 기본값 `0.55`
- `TTS_NOISE_SCALE_W`
- 기본값 `0.75`
- `OLLAMA_KEEP_ALIVE` - `OLLAMA_KEEP_ALIVE`
- `OLLAMA_NUM_CTX` - 기본값 `5m`
- `LOCAL_AI_VENV_PATH` - `MAX_CONVERSATION_TURNS`
- `LOCAL_AI_CACHE_DIR` - 기본값 `6`
- `LOCAL_STT_MODEL` - 최근 대화 몇 턴까지 LLM 문맥으로 넘길지 정합니다
- `LOCAL_STT_DEVICE` - `WHISPER_LANGUAGE`
- `LOCAL_STT_COMPUTE_TYPE`
- `LOCAL_STT_BEAM_SIZE`
- `LOCAL_TTS_LANGUAGE`
- `LOCAL_TTS_SPEAKER`
- `LOCAL_TTS_DEVICE`
- `LOCAL_TTS_SPEED`
선택:
- `DISCORD_COMMAND_GUILD_ID`
- 테스트 서버에만 slash command를 즉시 반영하려면 설정
- `LOCAL_AI_PYTHON`
- Python 경로 자동 탐지가 안 되면 설정
- 예시: `python`
- Windows 예시: `py -3`
- `LOCAL_AUDIO_SOURCE`
- 로컬 입력 장치
- Linux는 `pw-record --target`, Windows는 `ffmpeg dshow` 장치 이름
- `LOCAL_AUDIO_SINK`
- Linux 로컬 출력 장치
- Windows는 현재 시스템 기본 출력 장치 사용
- `LOCAL_SPEAKER_NAME`
- 로컬 테스트에서 프롬프트에 넣을 화자 이름
- `BOT_DEFAULT_LANGUAGE`
- 기본값 `ko` - 기본값 `ko`
- `DEBUG_TEXT_EVENTS` - `WHISPER_DEVICE`
- `true`면 transcript/reply를 콘솔에 같이 출력 - `auto`, `cuda`, `cpu`
- `WHISPER_COMPUTE_TYPE`
- `auto`, `float16`, `int8_float16`, `int8`, `float32`
- `WHISPER_BEAM_SIZE`
- 기본값 `2`
- `SEGMENT_END_FRAMES`
- 기본값 `24`
- 끝을 조금 더 늦게 잘라서 문장이 잘게 끊기는 현상을 줄입니다
- `SEGMENT_MAX_SPEECH_SAMPLES`
- 기본값 `160000`
- 너무 긴 구간은 강제로 끊어서 지연이 과하게 커지는 걸 막습니다
## 속도 우선 기본값 ## 메모
- STT 기본 모델은 `tiny` - 이 버전은 `STT`, `STT+LLM`, `LLM` 테스트를 따로 제공합니다.
- LLM 기본 모델은 `qwen3:0.6b` - `test:sttllm`은 STT와 LLM만 연결합니다.
- TTS 기본 속도는 `1.12` - `test:all`은 STT, LLM, TTS를 모두 연결합니다.
- `test:all`에서는 자기 음성을 다시 전사하지 않도록 TTS 재생 중에는 캡처를 잠시 멈춥니다.
- LLM 프롬프트는 `prompts/*.md` 에 분리되어 있습니다.
- 최소 지연을 위해 파일 저장은 하지 않습니다.
- VAD는 현재 모델 기반이 아니라 진폭 기반 단순 분리입니다.
- Windows에서는 보통 출력 루프백이 가능한 장치나 `Stereo Mix`, 오디오 인터페이스 loopback 채널을 `AUDIO_SOURCE`로 잡아야 합니다.
- 단순히 스피커 이름을 넣는 구조가 아니라, **루프백/캡처 가능한 입력 장치 이름**을 넣어야 합니다.
- `ffmpeg`가 PATH에 잡혀 있어야 합니다.
- TTS는 Windows에서 Docker Desktop이 필요합니다. MeloTTS 공식 문서도 Windows/macOS에서는 Docker 실행을 권장합니다.
- `cmd` 기준으로 `.env``copy .env.example .env`로 만들면 됩니다.
정확도가 아쉬우면: ## Windows 테스트 순서
```env
LOCAL_STT_MODEL=small
OLLAMA_MODEL=qwen3:1.7b
```
## 로컬 테스트 순서
1. `bun install` 1. `bun install`
2. `ollama pull qwen3:0.6b` 2. `bun run setup:stt`
3. `bun run setup:local-ai` 3. `copy .env.example .env`
4. `bun run devices` 4. `bun run devices`
5. 필요하면 `.env` `LOCAL_AUDIO_SOURCE` 설정 5. `.env` `AUDIO_SOURCE=`에 루프백 장치 이름 입력
6. `bun run start:local` 6. `bun run test:stt`
7. 유튜브, 디스코드 통화, 동영상 같은 소리를 재생해서 전사만 확인
## Windows 메모 ## Windows STT+LLM 통합 테스트 순서
- `bun run devices` 와 Windows 로컬 녹음은 `ffmpeg`가 필요합니다. 1. `bun run setup:llm`
- 출력 장치 직접 선택은 아직 미구현이라 시스템 기본 출력 장치로 재생됩니다. 2. `bun run setup:tts`
- Python 탐지가 안 되면 `.env``LOCAL_AI_PYTHON=py -3` 또는 `LOCAL_AI_PYTHON=python` 을 넣으면 됩니다. 3. `bun run test:sttllm`
4. 유튜브, 디스코드 통화, 동영상 같은 소리를 재생해서 전사와 답변 확인
## 설계 메모 ## Windows 전체 연결 테스트 순서
- 입력은 유저별 병렬 처리 1. `bun run setup:llm`
- 출력은 길드 세션당 단일 큐 2. `bun run setup:tts`
- 로컬 모드는 단일 화자 입력 기준 3. `bun run test:all`
- 화자 구분은 `speaker_id`, `speaker_name`을 LLM 프롬프트에 항상 포함 4. 유튜브, 디스코드 통화, 동영상 같은 소리를 재생해서 전사, 답변, 음성 출력 확인
- 모델 다운로드 캐시는 기본적으로 `.local-ai/cache` 아래에 저장
## Windows LLM 테스트 순서
1. `bun run setup:llm`
2. `bun run test:llm`
3. 콘솔에 직접 문장을 입력하고 답변 확인
4. `/reset` 으로 문맥 초기화, `/exit` 로 종료
## Windows TTS 테스트 순서
1. Docker Desktop 실행
2. `bun run setup:tts`
3. `bun run test:tts -- "안녕하세요. 로컬 티티에스 테스트입니다."`
현재 `test:llm` 에이전트 도구:
- 현재 시간 조회
- 현재 런타임 설정 조회
- 주요 bun 명령 목록 조회
- 간단한 산술식 계산
- 웹 검색
- URL 본문 읽기
동작 원칙:
- 일반 대화는 로컬 LLM만 답변
- 최신 정보, 뉴스, 사실 확인, 검색 요청일 때만 웹 도구 사용
- `test:sttllm` 에서는 먼저 "대답할 가치가 있는 텍스트인지" 판정한 뒤 필요할 때만 답변
- 웹 검색이 실제로 시작되면 결과 전에 `검색해볼게요.` 같은 진행 메시지를 먼저 출력
- 답변에 한글 외 다른 문자군이 섞이면 한국어만으로 한 번 더 교정
현재 프롬프트 파일:
- `prompts/assistant.md`
- `prompts/reply-gate.md`
- `prompts/rewrite-korean.md`
## Windows용 .env 예시
```env
LOCAL_AI_PYTHON=python
AUDIO_SOURCE=
DEBUG=false
TTS_ENABLED=true
TTS_IMAGE=realtime-voice-bot-melotts:v0.1.2
TTS_LANGUAGE=KR
TTS_SPEAKER=KR
TTS_DEVICE=cpu
TTS_SPEED=1.18
TTS_PLAYBACK_RATE=2.2
TTS_SDP_RATIO=0.22
TTS_NOISE_SCALE=0.55
TTS_NOISE_SCALE_W=0.75
TTS_CACHE_DIR=.local-ai/tts-cache
TTS_OUTPUT_DIR=.local-ai/tts-output
OLLAMA_BASE_URL=http://127.0.0.1:11434
OLLAMA_MODEL=qwen3:8b
OLLAMA_KEEP_ALIVE=5m
MAX_CONVERSATION_TURNS=6
WHISPER_MODEL=large-v3-turbo
WHISPER_LANGUAGE=ko
WHISPER_DEVICE=auto
WHISPER_COMPUTE_TYPE=auto
WHISPER_BEAM_SIZE=2
SEGMENT_END_FRAMES=24
SEGMENT_MAX_SPEECH_SAMPLES=160000
DEBUG_TRANSCRIPTS=true
LOG_LEVEL=info
```

196
bun.lock
View File

@@ -5,13 +5,7 @@
"": { "": {
"name": "realtime_voice_bot", "name": "realtime_voice_bot",
"dependencies": { "dependencies": {
"@discordjs/voice": "^0.19.2",
"avr-vad": "^1.0.10",
"discord.js": "^14.26.3",
"dotenv": "^17.4.2", "dotenv": "^17.4.2",
"ffmpeg-static": "^5.3.0",
"opusscript": "^0.1.1",
"prism-media": "^1.3.5",
"zod": "^4.3.6", "zod": "^4.3.6",
}, },
"devDependencies": { "devDependencies": {
@@ -20,203 +14,15 @@
}, },
}, },
}, },
"trustedDependencies": [
"ffmpeg-static",
"onnxruntime-node",
],
"packages": { "packages": {
"@derhuerst/http-basic": ["@derhuerst/http-basic@8.2.4", "", { "dependencies": { "caseless": "^0.12.0", "concat-stream": "^2.0.0", "http-response-object": "^3.0.1", "parse-cache-control": "^1.0.1" } }, "sha512-F9rL9k9Xjf5blCz8HsJRO4diy111cayL2vkY2XE4r4t3n0yPXVYy3KD3nJ1qbrSn9743UWSXH4IwuCa/HWlGFw=="],
"@discordjs/builders": ["@discordjs/builders@1.14.1", "", { "dependencies": { "@discordjs/formatters": "^0.6.2", "@discordjs/util": "^1.2.0", "@sapphire/shapeshift": "^4.0.0", "discord-api-types": "^0.38.40", "fast-deep-equal": "^3.1.3", "ts-mixer": "^6.0.4", "tslib": "^2.6.3" } }, "sha512-gSKkhXLqs96TCzk66VZuHHl8z2bQMJFGwrXC0f33ngK+FLNau4hU1PYny3DNJfNdSH+gVMzE85/d5FQ2BpcNwQ=="],
"@discordjs/collection": ["@discordjs/collection@1.5.3", "", {}, "sha512-SVb428OMd3WO1paV3rm6tSjM4wC+Kecaa1EUGX7vc6/fddvw/6lg90z4QtCqm21zvVe92vMMDt9+DkIvjXImQQ=="],
"@discordjs/formatters": ["@discordjs/formatters@0.6.2", "", { "dependencies": { "discord-api-types": "^0.38.33" } }, "sha512-y4UPwWhH6vChKRkGdMB4odasUbHOUwy7KL+OVwF86PvT6QVOwElx+TiI1/6kcmcEe+g5YRXJFiXSXUdabqZOvQ=="],
"@discordjs/rest": ["@discordjs/rest@2.6.1", "", { "dependencies": { "@discordjs/collection": "^2.1.1", "@discordjs/util": "^1.2.0", "@sapphire/async-queue": "^1.5.3", "@sapphire/snowflake": "^3.5.5", "@vladfrangu/async_event_emitter": "^2.4.6", "discord-api-types": "^0.38.40", "magic-bytes.js": "^1.13.0", "tslib": "^2.6.3", "undici": "6.24.1" } }, "sha512-wwQdgjeaoYFiaG+atbqx6aJDpqW7JHAo0HrQkBTbYzM3/PJ3GweQIpgElNcGZ26DCUOXMyawYd0YF7vtr+fZXg=="],
"@discordjs/util": ["@discordjs/util@1.2.0", "", { "dependencies": { "discord-api-types": "^0.38.33" } }, "sha512-3LKP7F2+atl9vJFhaBjn4nOaSWahZ/yWjOvA4e5pnXkt2qyXRCHLxoBQy81GFtLGCq7K9lPm9R517M1U+/90Qg=="],
"@discordjs/voice": ["@discordjs/voice@0.19.2", "", { "dependencies": { "@snazzah/davey": "^0.1.9", "@types/ws": "^8.18.1", "discord-api-types": "^0.38.41", "prism-media": "^1.3.5", "tslib": "^2.8.1", "ws": "^8.19.0" } }, "sha512-3yJ255e4ag3wfZu/DSxeOZK1UtnqNxnspmLaQetGT0pDkThNZoHs+Zg6dgZZ19JEVomXygvfHn9lNpICZuYtEA=="],
"@discordjs/ws": ["@discordjs/ws@1.2.3", "", { "dependencies": { "@discordjs/collection": "^2.1.0", "@discordjs/rest": "^2.5.1", "@discordjs/util": "^1.1.0", "@sapphire/async-queue": "^1.5.2", "@types/ws": "^8.5.10", "@vladfrangu/async_event_emitter": "^2.2.4", "discord-api-types": "^0.38.1", "tslib": "^2.6.2", "ws": "^8.17.0" } }, "sha512-wPlQDxEmlDg5IxhJPuxXr3Vy9AjYq5xCvFWGJyD7w7Np8ZGu+Mc+97LCoEc/+AYCo2IDpKioiH0/c/mj5ZR9Uw=="],
"@emnapi/core": ["@emnapi/core@1.10.0", "", { "dependencies": { "@emnapi/wasi-threads": "1.2.1", "tslib": "^2.4.0" } }, "sha512-yq6OkJ4p82CAfPl0u9mQebQHKPJkY7WrIuk205cTYnYe+k2Z8YBh11FrbRG/H6ihirqcacOgl2BIO8oyMQLeXw=="],
"@emnapi/runtime": ["@emnapi/runtime@1.10.0", "", { "dependencies": { "tslib": "^2.4.0" } }, "sha512-ewvYlk86xUoGI0zQRNq/mC+16R1QeDlKQy21Ki3oSYXNgLb45GV1P6A0M+/s6nyCuNDqe5VpaY84BzXGwVbwFA=="],
"@emnapi/wasi-threads": ["@emnapi/wasi-threads@1.2.1", "", { "dependencies": { "tslib": "^2.4.0" } }, "sha512-uTII7OYF+/Mes/MrcIOYp5yOtSMLBWSIoLPpcgwipoiKbli6k322tcoFsxoIIxPDqW01SQGAgko4EzZi2BNv2w=="],
"@napi-rs/wasm-runtime": ["@napi-rs/wasm-runtime@1.1.4", "", { "dependencies": { "@tybys/wasm-util": "^0.10.1" }, "peerDependencies": { "@emnapi/core": "^1.7.1", "@emnapi/runtime": "^1.7.1" } }, "sha512-3NQNNgA1YSlJb/kMH1ildASP9HW7/7kYnRI2szWJaofaS1hWmbGI4H+d3+22aGzXXN9IJ+n+GiFVcGipJP18ow=="],
"@sapphire/async-queue": ["@sapphire/async-queue@1.5.5", "", {}, "sha512-cvGzxbba6sav2zZkH8GPf2oGk9yYoD5qrNWdu9fRehifgnFZJMV+nuy2nON2roRO4yQQ+v7MK/Pktl/HgfsUXg=="],
"@sapphire/shapeshift": ["@sapphire/shapeshift@4.0.0", "", { "dependencies": { "fast-deep-equal": "^3.1.3", "lodash": "^4.17.21" } }, "sha512-d9dUmWVA7MMiKobL3VpLF8P2aeanRTu6ypG2OIaEv/ZHH/SUQ2iHOVyi5wAPjQ+HmnMuL0whK9ez8I/raWbtIg=="],
"@sapphire/snowflake": ["@sapphire/snowflake@3.5.3", "", {}, "sha512-jjmJywLAFoWeBi1W7994zZyiNWPIiqRRNAmSERxyg93xRGzNYvGjlZ0gR6x0F4gPRi2+0O6S71kOZYyr3cxaIQ=="],
"@snazzah/davey": ["@snazzah/davey@0.1.11", "", { "optionalDependencies": { "@snazzah/davey-android-arm-eabi": "0.1.11", "@snazzah/davey-android-arm64": "0.1.11", "@snazzah/davey-darwin-arm64": "0.1.11", "@snazzah/davey-darwin-x64": "0.1.11", "@snazzah/davey-freebsd-x64": "0.1.11", "@snazzah/davey-linux-arm-gnueabihf": "0.1.11", "@snazzah/davey-linux-arm64-gnu": "0.1.11", "@snazzah/davey-linux-arm64-musl": "0.1.11", "@snazzah/davey-linux-x64-gnu": "0.1.11", "@snazzah/davey-linux-x64-musl": "0.1.11", "@snazzah/davey-wasm32-wasi": "0.1.11", "@snazzah/davey-win32-arm64-msvc": "0.1.11", "@snazzah/davey-win32-ia32-msvc": "0.1.11", "@snazzah/davey-win32-x64-msvc": "0.1.11" } }, "sha512-oBN+msHzPnm1M5DDx3wVD7iBwpNXFUtkh2MrAbUJu0OhKjliLChi28hq++mu1+qdMpAVQO5JKAvQQxYVbyneiw=="],
"@snazzah/davey-android-arm-eabi": ["@snazzah/davey-android-arm-eabi@0.1.11", "", { "os": "android", "cpu": "arm" }, "sha512-T1RYbNYKN6tLOcGIDKJd8OI6FBSEemwL7DOYdTMmhqfhhMr3YVN8WOhfoxGg63OcnpTN2e2c5tdY2bAx25RmQQ=="],
"@snazzah/davey-android-arm64": ["@snazzah/davey-android-arm64@0.1.11", "", { "os": "android", "cpu": "arm64" }, "sha512-ksJn/x2VU8h6w9eku1HT96ugSRZ7lKVkKNKbFleaFN+U99DJaPM+gMu2YvnFU4V54HR06ZBnRihnVG6VLXQpDw=="],
"@snazzah/davey-darwin-arm64": ["@snazzah/davey-darwin-arm64@0.1.11", "", { "os": "darwin", "cpu": "arm64" }, "sha512-E1d7PbaaVMO3Lj9EiAPqOVbuV0xg5+PsHzHH097DDXiD1+zUDXvJaTnUWsnm5z50pJniHpi4GtaYmk+ieB/guA=="],
"@snazzah/davey-darwin-x64": ["@snazzah/davey-darwin-x64@0.1.11", "", { "os": "darwin", "cpu": "x64" }, "sha512-Tl4TI/LTmgJZepgbgVMYDi8RqlAkPtPg1OEBPl7a9Tn3AwR36Vs6lyIT1cs/lGy/ds/+B+mKI4rPObN1cyILTw=="],
"@snazzah/davey-freebsd-x64": ["@snazzah/davey-freebsd-x64@0.1.11", "", { "os": "freebsd", "cpu": "x64" }, "sha512-T8Iw9FXkuI1T+YBAFzh9v/TXf9IOTOSqnd/BFpTRTrlW72PR2lhIidzSmg027VxO7r5pX47iFwiOkb9I/NU/EA=="],
"@snazzah/davey-linux-arm-gnueabihf": ["@snazzah/davey-linux-arm-gnueabihf@0.1.11", "", { "os": "linux", "cpu": "arm" }, "sha512-1Txj+8pqA8uq/OGtaUaBFWAPnNMQzFgIywj0iA7EI4xZl+mab48/pv+YZ1pNb/suC6ynsW44oB9efiXSdcUAgA=="],
"@snazzah/davey-linux-arm64-gnu": ["@snazzah/davey-linux-arm64-gnu@0.1.11", "", { "os": "linux", "cpu": "arm64" }, "sha512-ERzF5nM/IYW1BcN3wLXpEwBCGLFf0kGJUVhaV6yfiInz0tkU8UmvrrgpaMaACfMjIhfWdq5CcX+aTkXo/saNcg=="],
"@snazzah/davey-linux-arm64-musl": ["@snazzah/davey-linux-arm64-musl@0.1.11", "", { "os": "linux", "cpu": "arm64" }, "sha512-e6pX6Hiabtz99q+H/YHNkm9JVlpqN8HGh0qPib8G2+UY4/SSH8WvqWipk3v581dMy2oyCHt7MOoY1aU1P1N/xA=="],
"@snazzah/davey-linux-x64-gnu": ["@snazzah/davey-linux-x64-gnu@0.1.11", "", { "os": "linux", "cpu": "x64" }, "sha512-TW5bSoqChOJMbvsDb4wAATYrxmAXuNnse7wFNVSAJUaZKSeRfZbu3UAiPWSNn7GwLwSfU6hg322KZUn8IWCuvg=="],
"@snazzah/davey-linux-x64-musl": ["@snazzah/davey-linux-x64-musl@0.1.11", "", { "os": "linux", "cpu": "x64" }, "sha512-5j6Pmc+Wzv5lSxVP6quA7teYRJXibkZqQyYGfTDnTsUOO5dPpcojpqlXlkhyvsA1OAQTj4uxbOCciN3cVWwzug=="],
"@snazzah/davey-wasm32-wasi": ["@snazzah/davey-wasm32-wasi@0.1.11", "", { "dependencies": { "@napi-rs/wasm-runtime": "^1.1.2" }, "cpu": "none" }, "sha512-rKOwZ/0J8lp+4VEyOdMDBRP9KR+PksZpa9V1Qn0veMzy4FqTVKthkxwGqewheFe0SFg9fdvt798l/PBFrfDeZw=="],
"@snazzah/davey-win32-arm64-msvc": ["@snazzah/davey-win32-arm64-msvc@0.1.11", "", { "os": "win32", "cpu": "arm64" }, "sha512-5fptJU4tX901m3mj0SHiBljMrPT4ZEsynbBhR7bK1yn9TY1jjyhN8EFi7QF5IWtUEni+0mia2BCMHZ5ZkmFZqQ=="],
"@snazzah/davey-win32-ia32-msvc": ["@snazzah/davey-win32-ia32-msvc@0.1.11", "", { "os": "win32", "cpu": "ia32" }, "sha512-ualexn8SeLsiMHhWfzVrzRcjHgcBapg++FPaVgJJxoh2S/jCRiklXOu3luqIZdJdNKvhe2V9SwO/cImPeIIBKw=="],
"@snazzah/davey-win32-x64-msvc": ["@snazzah/davey-win32-x64-msvc@0.1.11", "", { "os": "win32", "cpu": "x64" }, "sha512-muNhc8UKXtknzsH/w4AIkbPR2I8BuvApn0pDXar0IEvY8PCjqU/M8MPbOOEYwQVvQRMwVTgExtxzrkBPSXB4nA=="],
"@tybys/wasm-util": ["@tybys/wasm-util@0.10.1", "", { "dependencies": { "tslib": "^2.4.0" } }, "sha512-9tTaPJLSiejZKx+Bmog4uSubteqTvFrVrURwkmHixBo0G4seD0zUxp98E1DzUBJxLQ3NPwXrGKDiVjwx/DpPsg=="],
"@types/node": ["@types/node@25.6.0", "", { "dependencies": { "undici-types": "~7.19.0" } }, "sha512-+qIYRKdNYJwY3vRCZMdJbPLJAtGjQBudzZzdzwQYkEPQd+PJGixUL5QfvCLDaULoLv+RhT3LDkwEfKaAkgSmNQ=="], "@types/node": ["@types/node@25.6.0", "", { "dependencies": { "undici-types": "~7.19.0" } }, "sha512-+qIYRKdNYJwY3vRCZMdJbPLJAtGjQBudzZzdzwQYkEPQd+PJGixUL5QfvCLDaULoLv+RhT3LDkwEfKaAkgSmNQ=="],
"@types/ws": ["@types/ws@8.18.1", "", { "dependencies": { "@types/node": "*" } }, "sha512-ThVF6DCVhA8kUGy+aazFQ4kXQ7E1Ty7A3ypFOe0IcJV8O/M511G99AW24irKrW56Wt44yG9+ij8FaqoBGkuBXg=="],
"@vladfrangu/async_event_emitter": ["@vladfrangu/async_event_emitter@2.4.7", "", {}, "sha512-Xfe6rpCTxSxfbswi/W/Pz7zp1WWSNn4A0eW4mLkQUewCrXXtMj31lCg+iQyTkh/CkusZSq9eDflu7tjEDXUY6g=="],
"adm-zip": ["adm-zip@0.5.17", "", {}, "sha512-+Ut8d9LLqwEvHHJl1+PIHqoyDxFgVN847JTVM3Izi3xHDWPE4UtzzXysMZQs64DMcrJfBeS/uoEP4AD3HQHnQQ=="],
"agent-base": ["agent-base@6.0.2", "", { "dependencies": { "debug": "4" } }, "sha512-RZNwNclF7+MS/8bDg70amg32dyeZGZxiDuQmZxKLAlQjr3jGyLx+4Kkk58UO7D2QdgFIQCovuSuZESne6RG6XQ=="],
"avr-vad": ["avr-vad@1.0.10", "", { "dependencies": { "onnxruntime-node": "^1.22.0-rev" } }, "sha512-gM8SiQIebujfKMfy5w74tRPH+Fg78CMrBoDkMhCN3TmYVmmD8fmuVag7Q7ZCBITpFvYkOZnWEdGWuCb3YukBJw=="],
"boolean": ["boolean@3.2.0", "", {}, "sha512-d0II/GO9uf9lfUHH2BQsjxzRJZBdsjgsBiW4BvhWk/3qoKwQFjIDVN19PfX8F2D/r9PCMTtLWjYVCFrpeYUzsw=="],
"buffer-from": ["buffer-from@1.1.2", "", {}, "sha512-E+XQCRwSbaaiChtv6k6Dwgc+bx+Bs6vuKJHHl5kox/BaKbhiXzqQOwK4cO22yElGp2OCmjwVhT3HmxgyPGnJfQ=="],
"caseless": ["caseless@0.12.0", "", {}, "sha512-4tYFyifaFfGacoiObjJegolkwSU4xQNGbVgUiNYVUxbQ2x2lUsFvY4hVgVzGiIe6WLOPqycWXA40l+PWsxthUw=="],
"concat-stream": ["concat-stream@2.0.0", "", { "dependencies": { "buffer-from": "^1.0.0", "inherits": "^2.0.3", "readable-stream": "^3.0.2", "typedarray": "^0.0.6" } }, "sha512-MWufYdFw53ccGjCA+Ol7XJYpAlW6/prSMzuPOTRnJGcGzuhLn4Scrz7qf6o8bROZ514ltazcIFJZevcfbo0x7A=="],
"debug": ["debug@4.4.3", "", { "dependencies": { "ms": "^2.1.3" } }, "sha512-RGwwWnwQvkVfavKVt22FGLw+xYSdzARwm0ru6DhTVA3umU5hZc28V3kO4stgYryrTlLpuvgI9GiijltAjNbcqA=="],
"define-data-property": ["define-data-property@1.1.4", "", { "dependencies": { "es-define-property": "^1.0.0", "es-errors": "^1.3.0", "gopd": "^1.0.1" } }, "sha512-rBMvIzlpA8v6E+SJZoo++HAYqsLrkg7MSfIinMPFhmkorw7X+dOXVJQs+QT69zGkzMyfDnIMN2Wid1+NbL3T+A=="],
"define-properties": ["define-properties@1.2.1", "", { "dependencies": { "define-data-property": "^1.0.1", "has-property-descriptors": "^1.0.0", "object-keys": "^1.1.1" } }, "sha512-8QmQKqEASLd5nx0U1B1okLElbUuuttJ/AnYmRXbbbGDWh6uS208EjD4Xqq/I9wK7u0v6O08XhTWnt5XtEbR6Dg=="],
"detect-node": ["detect-node@2.1.0", "", {}, "sha512-T0NIuQpnTvFDATNuHN5roPwSBG83rFsuO+MXXH9/3N1eFbn4wcPjttvjMLEPWJ0RGUYgQE7cGgS3tNxbqCGM7g=="],
"discord-api-types": ["discord-api-types@0.38.47", "", {}, "sha512-XgXQodHQBAE6kfD7kMvVo30863iHX1LHSqNq6MGUTDwIFCCvHva13+rwxyxVXDqudyApMNAd32PGjgVETi5rjA=="],
"discord.js": ["discord.js@14.26.3", "", { "dependencies": { "@discordjs/builders": "^1.14.1", "@discordjs/collection": "1.5.3", "@discordjs/formatters": "^0.6.2", "@discordjs/rest": "^2.6.1", "@discordjs/util": "^1.2.0", "@discordjs/ws": "^1.2.3", "@sapphire/snowflake": "3.5.3", "discord-api-types": "^0.38.40", "fast-deep-equal": "3.1.3", "lodash.snakecase": "4.1.1", "magic-bytes.js": "^1.13.0", "tslib": "^2.6.3", "undici": "6.24.1" } }, "sha512-XEKtYn28YFsiJ5l4fLRyikdbo6RD5oFyqfVHQlvXz2104JhH/E8slN28dbky05w3DCrJcNVWvhVvcJCTSl/KIg=="],
"dotenv": ["dotenv@17.4.2", "", {}, "sha512-nI4U3TottKAcAD9LLud4Cb7b2QztQMUEfHbvhTH09bqXTxnSie8WnjPALV/WMCrJZ6UV/qHJ6L03OqO3LcdYZw=="], "dotenv": ["dotenv@17.4.2", "", {}, "sha512-nI4U3TottKAcAD9LLud4Cb7b2QztQMUEfHbvhTH09bqXTxnSie8WnjPALV/WMCrJZ6UV/qHJ6L03OqO3LcdYZw=="],
"env-paths": ["env-paths@2.2.1", "", {}, "sha512-+h1lkLKhZMTYjog1VEpJNG7NZJWcuc2DDk/qsqSTRRCOXiLjeQ1d1/udrUGhqMxUgAlwKNZ0cf2uqan5GLuS2A=="],
"es-define-property": ["es-define-property@1.0.1", "", {}, "sha512-e3nRfgfUZ4rNGL232gUgX06QNyyez04KdjFrF+LTRoOXmrOgFKDg4BCdsjW8EnT69eqdYGmRpJwiPVYNrCaW3g=="],
"es-errors": ["es-errors@1.3.0", "", {}, "sha512-Zf5H2Kxt2xjTvbJvP2ZWLEICxA6j+hAmMzIlypy4xcBg1vKVnx89Wy0GbS+kf5cwCVFFzdCFh2XSCFNULS6csw=="],
"es6-error": ["es6-error@4.1.1", "", {}, "sha512-Um/+FxMr9CISWh0bi5Zv0iOD+4cFh5qLeks1qhAopKVAJw3drgKbKySikp7wGhDL0HPeaja0P5ULZrxLkniUVg=="],
"escape-string-regexp": ["escape-string-regexp@4.0.0", "", {}, "sha512-TtpcNJ3XAzx3Gq8sWRzJaVajRs0uVxA2YAkdb1jm2YkPz4G6egUFAyA3n5vtEIZefPk5Wa4UXbKuS5fKkJWdgA=="],
"fast-deep-equal": ["fast-deep-equal@3.1.3", "", {}, "sha512-f3qQ9oQy9j2AhBe/H9VC91wLmKBCCU/gDOnKNAYG5hswO7BLKj09Hc5HYNz9cGI++xlpDCIgDaitVs03ATR84Q=="],
"ffmpeg-static": ["ffmpeg-static@5.3.0", "", { "dependencies": { "@derhuerst/http-basic": "^8.2.0", "env-paths": "^2.2.0", "https-proxy-agent": "^5.0.0", "progress": "^2.0.3" } }, "sha512-H+K6sW6TiIX6VGend0KQwthe+kaceeH/luE8dIZyOP35ik7ahYojDuqlTV1bOrtEwl01sy2HFNGQfi5IDJvotg=="],
"global-agent": ["global-agent@3.0.0", "", { "dependencies": { "boolean": "^3.0.1", "es6-error": "^4.1.1", "matcher": "^3.0.0", "roarr": "^2.15.3", "semver": "^7.3.2", "serialize-error": "^7.0.1" } }, "sha512-PT6XReJ+D07JvGoxQMkT6qji/jVNfX/h364XHZOWeRzy64sSFr+xJ5OX7LI3b4MPQzdL4H8Y8M0xzPpsVMwA8Q=="],
"globalthis": ["globalthis@1.0.4", "", { "dependencies": { "define-properties": "^1.2.1", "gopd": "^1.0.1" } }, "sha512-DpLKbNU4WylpxJykQujfCcwYWiV/Jhm50Goo0wrVILAv5jOr9d+H+UR3PhSCD2rCCEIg0uc+G+muBTwD54JhDQ=="],
"gopd": ["gopd@1.2.0", "", {}, "sha512-ZUKRh6/kUFoAiTAtTYPZJ3hw9wNxx+BIBOijnlG9PnrJsCcSjs1wyyD6vJpaYtgnzDrKYRSqf3OO6Rfa93xsRg=="],
"has-property-descriptors": ["has-property-descriptors@1.0.2", "", { "dependencies": { "es-define-property": "^1.0.0" } }, "sha512-55JNKuIW+vq4Ke1BjOTjM2YctQIvCT7GFzHwmfZPGo5wnrgkid0YQtnAleFSqumZm4az3n2BS+erby5ipJdgrg=="],
"http-response-object": ["http-response-object@3.0.2", "", { "dependencies": { "@types/node": "^10.0.3" } }, "sha512-bqX0XTF6fnXSQcEJ2Iuyr75yVakyjIDCqroJQ/aHfSdlM743Cwqoi2nDYMzLGWUcuTWGWy8AAvOKXTfiv6q9RA=="],
"https-proxy-agent": ["https-proxy-agent@5.0.1", "", { "dependencies": { "agent-base": "6", "debug": "4" } }, "sha512-dFcAjpTQFgoLMzC2VwU+C/CbS7uRL0lWmxDITmqm7C+7F0Odmj6s9l6alZc6AELXhrnggM2CeWSXHGOdX2YtwA=="],
"inherits": ["inherits@2.0.4", "", {}, "sha512-k/vGaX4/Yla3WzyMCvTQOXYeIHvqOKtnqBduzTHpzpQZzAskKMhZ2K+EnBiSM9zGSoIFeMpXKxa4dYeZIQqewQ=="],
"json-stringify-safe": ["json-stringify-safe@5.0.1", "", {}, "sha512-ZClg6AaYvamvYEE82d3Iyd3vSSIjQ+odgjaTzRuO3s7toCdFKczob2i0zCh7JE8kWn17yvAWhUVxvqGwUalsRA=="],
"lodash": ["lodash@4.18.1", "", {}, "sha512-dMInicTPVE8d1e5otfwmmjlxkZoUpiVLwyeTdUsi/Caj/gfzzblBcCE5sRHV/AsjuCmxWrte2TNGSYuCeCq+0Q=="],
"lodash.snakecase": ["lodash.snakecase@4.1.1", "", {}, "sha512-QZ1d4xoBHYUeuouhEq3lk3Uq7ldgyFXGBhg04+oRLnIz8o9T65Eh+8YdroUwn846zchkA9yDsDl5CVVaV2nqYw=="],
"magic-bytes.js": ["magic-bytes.js@1.13.0", "", {}, "sha512-afO2mnxW7GDTXMm5/AoN1WuOcdoKhtgXjIvHmobqTD1grNplhGdv3PFOyjCVmrnOZBIT/gD/koDKpYG+0mvHcg=="],
"matcher": ["matcher@3.0.0", "", { "dependencies": { "escape-string-regexp": "^4.0.0" } }, "sha512-OkeDaAZ/bQCxeFAozM55PKcKU0yJMPGifLwV4Qgjitu+5MoAfSQN4lsLJeXZ1b8w0x+/Emda6MZgXS1jvsapng=="],
"ms": ["ms@2.1.3", "", {}, "sha512-6FlzubTLZG3J2a/NVCAleEhjzq5oxgHyaCU9yYXvcLsvoVaHJq/s5xXI6/XXP6tz7R9xAOtHnSO/tXtF3WRTlA=="],
"object-keys": ["object-keys@1.1.1", "", {}, "sha512-NuAESUOUMrlIXOfHKzD6bpPu3tYt3xvjNdRIQ+FeT0lNb4K8WR70CaDxhuNguS2XG+GjkyMwOzsN5ZktImfhLA=="],
"onnxruntime-common": ["onnxruntime-common@1.24.3", "", {}, "sha512-GeuPZO6U/LBJXvwdaqHbuUmoXiEdeCjWi/EG7Y1HNnDwJYuk6WUbNXpF6luSUY8yASul3cmUlLGrCCL1ZgVXqA=="],
"onnxruntime-node": ["onnxruntime-node@1.24.3", "", { "dependencies": { "adm-zip": "^0.5.16", "global-agent": "^3.0.0", "onnxruntime-common": "1.24.3" }, "os": [ "linux", "win32", "darwin", ] }, "sha512-JH7+czbc8ALA819vlTgcV+Q214/+VjGeBHDjX81+ZCD0PCVCIFGFNtT0V4sXG/1JXypKPgScQcB3ij/hk3YnTg=="],
"opusscript": ["opusscript@0.1.1", "", {}, "sha512-mL0fZZOUnXdZ78woRXp18lApwpp0lF5tozJOD1Wut0dgrA9WuQTgSels/CSmFleaAZrJi/nci5KOVtbuxeWoQA=="],
"parse-cache-control": ["parse-cache-control@1.0.1", "", {}, "sha512-60zvsJReQPX5/QP0Kzfd/VrpjScIQ7SHBW6bFCYfEP+fp0Eppr1SHhIO5nd1PjZtvclzSzES9D/p5nFJurwfWg=="],
"prism-media": ["prism-media@1.3.5", "", { "peerDependencies": { "@discordjs/opus": ">=0.8.0 <1.0.0", "ffmpeg-static": "^5.0.2 || ^4.2.7 || ^3.0.0 || ^2.4.0", "node-opus": "^0.3.3", "opusscript": "^0.0.8" }, "optionalPeers": ["@discordjs/opus", "ffmpeg-static", "node-opus", "opusscript"] }, "sha512-IQdl0Q01m4LrkN1EGIE9lphov5Hy7WWlH6ulf5QdGePLlPas9p2mhgddTEHrlaXYjjFToM1/rWuwF37VF4taaA=="],
"progress": ["progress@2.0.3", "", {}, "sha512-7PiHtLll5LdnKIMw100I+8xJXR5gW2QwWYkT6iJva0bXitZKa/XMrSbdmg3r2Xnaidz9Qumd0VPaMrZlF9V9sA=="],
"readable-stream": ["readable-stream@3.6.2", "", { "dependencies": { "inherits": "^2.0.3", "string_decoder": "^1.1.1", "util-deprecate": "^1.0.1" } }, "sha512-9u/sniCrY3D5WdsERHzHE4G2YCXqoG5FTHUiCC4SIbr6XcLZBY05ya9EKjYek9O5xOAwjGq+1JdGBAS7Q9ScoA=="],
"roarr": ["roarr@2.15.4", "", { "dependencies": { "boolean": "^3.0.1", "detect-node": "^2.0.4", "globalthis": "^1.0.1", "json-stringify-safe": "^5.0.1", "semver-compare": "^1.0.0", "sprintf-js": "^1.1.2" } }, "sha512-CHhPh+UNHD2GTXNYhPWLnU8ONHdI+5DI+4EYIAOaiD63rHeYlZvyh8P+in5999TTSFgUYuKUAjzRI4mdh/p+2A=="],
"safe-buffer": ["safe-buffer@5.2.1", "", {}, "sha512-rp3So07KcdmmKbGvgaNxQSJr7bGVSVk5S9Eq1F+ppbRo70+YeaDxkw5Dd8NPN+GD6bjnYm2VuPuCXmpuYvmCXQ=="],
"semver": ["semver@7.7.4", "", { "bin": { "semver": "bin/semver.js" } }, "sha512-vFKC2IEtQnVhpT78h1Yp8wzwrf8CM+MzKMHGJZfBtzhZNycRFnXsHk6E5TxIkkMsgNS7mdX3AGB7x2QM2di4lA=="],
"semver-compare": ["semver-compare@1.0.0", "", {}, "sha512-YM3/ITh2MJ5MtzaM429anh+x2jiLVjqILF4m4oyQB18W7Ggea7BfqdH/wGMK7dDiMghv/6WG7znWMwUDzJiXow=="],
"serialize-error": ["serialize-error@7.0.1", "", { "dependencies": { "type-fest": "^0.13.1" } }, "sha512-8I8TjW5KMOKsZQTvoxjuSIa7foAwPWGOts+6o7sgjz41/qMD9VQHEDxi6PBvK2l0MXUmqZyNpUK+T2tQaaElvw=="],
"sprintf-js": ["sprintf-js@1.1.3", "", {}, "sha512-Oo+0REFV59/rz3gfJNKQiBlwfHaSESl1pcGyABQsnnIfWOFt6JNj5gCog2U6MLZ//IGYD+nA8nI+mTShREReaA=="],
"string_decoder": ["string_decoder@1.3.0", "", { "dependencies": { "safe-buffer": "~5.2.0" } }, "sha512-hkRX8U1WjJFd8LsDJ2yQ/wWWxaopEsABU1XfkM8A+j0+85JAGppt16cr1Whg6KIbb4okU6Mql6BOj+uup/wKeA=="],
"ts-mixer": ["ts-mixer@6.0.4", "", {}, "sha512-ufKpbmrugz5Aou4wcr5Wc1UUFWOLhq+Fm6qa6P0w0K5Qw2yhaUoiWszhCVuNQyNwrlGiscHOmqYoAox1PtvgjA=="],
"tslib": ["tslib@2.8.1", "", {}, "sha512-oJFu94HQb+KVduSUQL7wnpmqnfmLsOA/nAh6b6EH0wCEoK0/mPeXU6c3wKDV83MkOuHPRHtSXKKU99IBazS/2w=="],
"type-fest": ["type-fest@0.13.1", "", {}, "sha512-34R7HTnG0XIJcBSn5XhDd7nNFPRcXYRZrBB2O2jdKqYODldSzBAqzsWoZYYvduky73toYS/ESqxPvkDf/F0XMg=="],
"typedarray": ["typedarray@0.0.6", "", {}, "sha512-/aCDEGatGvZ2BIk+HmLf4ifCJFwvKFNb9/JeZPMulfgFracn9QFcAf5GO8B/mweUjSoblS5In0cWhqpfs/5PQA=="],
"typescript": ["typescript@6.0.3", "", { "bin": { "tsc": "bin/tsc", "tsserver": "bin/tsserver" } }, "sha512-y2TvuxSZPDyQakkFRPZHKFm+KKVqIisdg9/CZwm9ftvKXLP8NRWj38/ODjNbr43SsoXqNuAisEf1GdCxqWcdBw=="], "typescript": ["typescript@6.0.3", "", { "bin": { "tsc": "bin/tsc", "tsserver": "bin/tsserver" } }, "sha512-y2TvuxSZPDyQakkFRPZHKFm+KKVqIisdg9/CZwm9ftvKXLP8NRWj38/ODjNbr43SsoXqNuAisEf1GdCxqWcdBw=="],
"undici": ["undici@6.24.1", "", {}, "sha512-sC+b0tB1whOCzbtlx20fx3WgCXwkW627p4EA9uM+/tNNPkSS+eSEld6pAs9nDv7WbY1UUljBMYPtu9BCOrCWKA=="],
"undici-types": ["undici-types@7.19.2", "", {}, "sha512-qYVnV5OEm2AW8cJMCpdV20CDyaN3g0AjDlOGf1OW4iaDEx8MwdtChUp4zu4H0VP3nDRF/8RKWH+IPp9uW0YGZg=="], "undici-types": ["undici-types@7.19.2", "", {}, "sha512-qYVnV5OEm2AW8cJMCpdV20CDyaN3g0AjDlOGf1OW4iaDEx8MwdtChUp4zu4H0VP3nDRF/8RKWH+IPp9uW0YGZg=="],
"util-deprecate": ["util-deprecate@1.0.2", "", {}, "sha512-EPD5q1uXyFxJpCrLnCc1nHnq3gOa6DZBocAIiI2TaSCA7VCJ1UJDMagCzIkXNsUYfD1daK//LTEQ8xiIbrHtcw=="], "zod": ["zod@4.4.2", "", {}, "sha512-IynmDyxsEsb9RKzO3J9+4SxXnl2FTFSzNBaKKaMV6tsSk0rw9gYw9gs+JFCq/qk2LCZ78KDwyj+Z289TijSkUw=="],
"ws": ["ws@8.20.0", "", { "peerDependencies": { "bufferutil": "^4.0.1", "utf-8-validate": ">=5.0.2" }, "optionalPeers": ["bufferutil", "utf-8-validate"] }, "sha512-sAt8BhgNbzCtgGbt2OxmpuryO63ZoDk/sqaB/znQm94T4fCEsy/yV+7CdC1kJhOU9lboAEU7R3kquuycDoibVA=="],
"zod": ["zod@4.3.6", "", {}, "sha512-rftlrkhHZOcjDwkGlnUtZZkvaPHCsDATp4pGpuOOMDaTdDDXF91wuVDJoWoPsKX/3YPQ5fHuF3STjcYyKr+Qhg=="],
"@discordjs/rest/@discordjs/collection": ["@discordjs/collection@2.1.1", "", {}, "sha512-LiSusze9Tc7qF03sLCujF5iZp7K+vRNEDBZ86FT9aQAv3vxMLihUvKvpsCWiQ2DJq1tVckopKm1rxomgNUc9hg=="],
"@discordjs/rest/@sapphire/snowflake": ["@sapphire/snowflake@3.5.5", "", {}, "sha512-xzvBr1Q1c4lCe7i6sRnrofxeO1QTP/LKQ6A6qy0iB4x5yfiSfARMEQEghojzTNALDTcv8En04qYNIco9/K9eZQ=="],
"@discordjs/ws/@discordjs/collection": ["@discordjs/collection@2.1.1", "", {}, "sha512-LiSusze9Tc7qF03sLCujF5iZp7K+vRNEDBZ86FT9aQAv3vxMLihUvKvpsCWiQ2DJq1tVckopKm1rxomgNUc9hg=="],
"http-response-object/@types/node": ["@types/node@10.17.60", "", {}, "sha512-F0KIgDJfy2nA3zMLmWGKxcH2ZVEtCZXHHdOQs2gSaQ27+lNeEfGxzkIw90aXswATX7AZ33tahPbzy6KAfUreVw=="],
} }
} }

13
docker.cmd Normal file
View File

@@ -0,0 +1,13 @@
@echo off
setlocal
set "DOCKER_EXE=C:\Program Files\Docker\Docker\resources\bin\docker.exe"
if exist "%DOCKER_EXE%" (
"%DOCKER_EXE%" %*
exit /b %ERRORLEVEL%
)
echo Docker executable not found: "%DOCKER_EXE%"
echo Install Docker Desktop or update this shim path.
exit /b 1

22
docker/melotts/Dockerfile Normal file
View File

@@ -0,0 +1,22 @@
FROM python:3.11-slim
ENV PYTHONUNBUFFERED=1
WORKDIR /opt/realtime-voice-bot
RUN apt-get update && apt-get install -y --no-install-recommends \
build-essential \
git \
libsndfile1 \
&& rm -rf /var/lib/apt/lists/*
RUN git clone https://github.com/myshell-ai/MeloTTS.git /opt/MeloTTS
RUN pip install --no-cache-dir -e /opt/MeloTTS
RUN pip install --no-cache-dir python-mecab-ko python-mecab-ko-dic
RUN python -m unidic download
RUN python /opt/MeloTTS/melo/init_downloads.py
COPY melo_tts_cli.py /opt/realtime-voice-bot/melo_tts_cli.py
COPY melo_tts_worker.py /opt/realtime-voice-bot/melo_tts_worker.py
ENTRYPOINT ["python", "/opt/realtime-voice-bot/melo_tts_cli.py"]

View File

@@ -0,0 +1,42 @@
import argparse
from pathlib import Path
from melo.api import TTS
def main() -> None:
parser = argparse.ArgumentParser()
parser.add_argument("--text", required=True)
parser.add_argument("--output", required=True)
parser.add_argument("--language", default="KR")
parser.add_argument("--speaker", default="KR")
parser.add_argument("--speed", type=float, default=1.0)
parser.add_argument("--sdp-ratio", type=float, default=0.2)
parser.add_argument("--noise-scale", type=float, default=0.6)
parser.add_argument("--noise-scale-w", type=float, default=0.8)
parser.add_argument("--device", default="cpu")
args = parser.parse_args()
output_path = Path(args.output)
output_path.parent.mkdir(parents=True, exist_ok=True)
model = TTS(language=args.language, device=args.device)
speaker_ids = model.hps.data.spk2id
if args.speaker not in speaker_ids:
supported = ", ".join(sorted(speaker_ids.keys()))
raise SystemExit(f"지원하지 않는 speaker 입니다: {args.speaker}. 사용 가능: {supported}")
model.tts_to_file(
args.text,
speaker_ids[args.speaker],
str(output_path),
speed=args.speed,
sdp_ratio=args.sdp_ratio,
noise_scale=args.noise_scale,
noise_scale_w=args.noise_scale_w,
)
if __name__ == "__main__":
main()

View File

@@ -0,0 +1,122 @@
import contextlib
import io
import json
import os
import sys
from pathlib import Path
from melo.api import TTS
LANGUAGE = os.getenv("TTS_LANGUAGE", "KR")
SPEAKER = os.getenv("TTS_SPEAKER", "KR")
DEVICE = os.getenv("TTS_DEVICE", "cpu")
SPEED = float(os.getenv("TTS_SPEED", "1.18"))
SDP_RATIO = float(os.getenv("TTS_SDP_RATIO", "0.22"))
NOISE_SCALE = float(os.getenv("TTS_NOISE_SCALE", "0.55"))
NOISE_SCALE_W = float(os.getenv("TTS_NOISE_SCALE_W", "0.75"))
_MODEL = None
_SPEAKER_ID = None
def silence_stdout(func):
def wrapper(*args, **kwargs):
with contextlib.redirect_stdout(io.StringIO()):
return func(*args, **kwargs)
return wrapper
@silence_stdout
def load_model():
global _MODEL
global _SPEAKER_ID
if _MODEL is not None and _SPEAKER_ID is not None:
return _MODEL, _SPEAKER_ID
model = TTS(language=LANGUAGE, device=DEVICE)
speaker_ids = model.hps.data.spk2id
if SPEAKER not in speaker_ids:
supported = ", ".join(sorted(speaker_ids.keys()))
raise RuntimeError(f"지원하지 않는 speaker 입니다: {SPEAKER}. 사용 가능: {supported}")
_MODEL = model
_SPEAKER_ID = speaker_ids[SPEAKER]
return _MODEL, _SPEAKER_ID
def handle_ping():
model, speaker_id = load_model()
return {
"language": LANGUAGE,
"speaker": SPEAKER,
"speaker_id": speaker_id,
"device": DEVICE,
"speed": SPEED,
"sdp_ratio": SDP_RATIO,
"noise_scale": NOISE_SCALE,
"noise_scale_w": NOISE_SCALE_W,
"speaker_count": len(model.hps.data.spk2id),
}
@silence_stdout
def handle_synthesize(params):
text = str(params["text"]).strip()
output_path = Path(str(params["output_path"]))
output_path.parent.mkdir(parents=True, exist_ok=True)
model, speaker_id = load_model()
model.tts_to_file(
text,
speaker_id,
str(output_path),
speed=SPEED,
sdp_ratio=SDP_RATIO,
noise_scale=NOISE_SCALE,
noise_scale_w=NOISE_SCALE_W,
)
return {
"output_path": str(output_path),
"text_length": len(text),
}
def main():
for raw_line in sys.stdin:
line = raw_line.strip()
if not line:
continue
try:
payload = json.loads(line)
request_id = str(payload["id"])
method = payload["method"]
params = payload.get("params", {})
if method == "ping":
result = handle_ping()
elif method == "synthesize":
result = handle_synthesize(params)
else:
raise RuntimeError(f"알 수 없는 method 입니다: {method}")
sys.stdout.write(json.dumps({"id": request_id, "result": result}, ensure_ascii=False) + "\n")
sys.stdout.flush()
except Exception as error:
request_id = "unknown"
try:
request_id = str(payload.get("id", "unknown"))
except Exception:
pass
sys.stdout.write(json.dumps({"id": request_id, "error": str(error)}, ensure_ascii=False) + "\n")
sys.stdout.flush()
if __name__ == "__main__":
main()

View File

@@ -4,13 +4,18 @@
"private": true, "private": true,
"type": "module", "type": "module",
"scripts": { "scripts": {
"dev": "bun --watch src/index.ts", "setup": "bun src/setup.ts",
"start": "bun src/index.ts discord", "setup:all": "bun run setup",
"start:discord": "bun src/index.ts discord", "setup:stt": "bun src/setup-python.ts",
"start:local": "bun src/index.ts local", "setup:llm": "bun src/setup-llm.ts",
"setup:local-ai": "bun src/setup-local-ai.ts", "setup:tts": "bun src/setup-tts.ts",
"devices": "bun src/index.ts local-devices", "setup:python": "bun run setup:stt",
"audio:devices": "bun src/index.ts local-devices", "test:stt": "bun src/index.ts test-stt",
"test:sttllm": "bun src/index.ts test-sttllm",
"test:all": "bun src/index.ts test-all",
"test:llm": "bun src/index.ts test-llm",
"test:tts": "bun src/index.ts test-tts",
"devices": "bun src/index.ts devices",
"check": "tsc --noEmit", "check": "tsc --noEmit",
"build": "tsc -p tsconfig.json" "build": "tsc -p tsconfig.json"
}, },
@@ -19,21 +24,11 @@
"node": ">=22.12.0" "node": ">=22.12.0"
}, },
"dependencies": { "dependencies": {
"@discordjs/voice": "^0.19.2",
"avr-vad": "^1.0.10",
"discord.js": "^14.26.3",
"dotenv": "^17.4.2", "dotenv": "^17.4.2",
"ffmpeg-static": "^5.3.0",
"opusscript": "^0.1.1",
"prism-media": "^1.3.5",
"zod": "^4.3.6" "zod": "^4.3.6"
}, },
"devDependencies": { "devDependencies": {
"@types/node": "^25.6.0", "@types/node": "^25.6.0",
"typescript": "^6.0.3" "typescript": "^6.0.3"
}, }
"trustedDependencies": [
"onnxruntime-node",
"ffmpeg-static"
]
} }

14
prompts/assistant.md Normal file
View File

@@ -0,0 +1,14 @@
너는 한국어로 짧고 자연스럽게 답하는 로컬 음성 비서다.
규칙:
- 반드시 한국어로만 답한다.
- 한자, 중국어, 일본어, 아랍어, 키릴 문자, 기타 외국 문자, 이모지 사용 금지.
- 영어 단어는 꼭 필요한 기술명 외에는 피하고 자연스러운 한국어 표현으로 바꾼다.
- 답변은 TTS가 읽기 쉽도록 짧고 단순한 문장으로 만든다.
- 기본적으로 1~3문장으로 답한다.
- 불필요한 장식, 불릿, 번호 목록, 괄호 남용, 과한 감탄 표현은 피한다.
- 사용자의 말에 바로 답하고, 군더더기 없이 핵심만 말한다.
- 정확한 시간, 설정 확인, 계산이 필요하면 도구를 우선 사용한다.
- 최신 정보, 오늘/최근 정보, 뉴스, 검색 요청, 사실 확인, 외부 웹페이지 내용이 필요한 경우에만 `web_search``fetch_url` 을 사용한다.
- 내부 지식만으로 충분한 일반 대화에는 웹 도구를 쓰지 않는다.
- 도구가 필요한 작업이 시작되면 결과 전에 짧은 진행 메시지를 출력할 수 있다.

10
prompts/reply-gate.md Normal file
View File

@@ -0,0 +1,10 @@
다음 텍스트에 로컬 비서가 실제로 대답해야 하는지 판정한다.
판정 기준:
- 의미 없는 감탄사, 중얼거림, 문맥 없는 짧은 파편, 노래 가사 조각, 잡음성 문장은 `should_reply=false`
- 질문, 요청, 확인, 명령, 대화 시도는 `should_reply=true`
- 최신 정보나 사실 확인, 검색이 필요해 보이면 `likely_needs_lookup=true`
- reason 은 아주 짧게 쓴다
반드시 JSON만 출력:
{"should_reply":true,"likely_needs_lookup":false,"reason":"짧게"}

View File

@@ -0,0 +1,8 @@
다음 답변을 의미를 유지한 채 자연스러운 한국어로만 다시 쓴다.
규칙:
- 한글, 숫자, 기본 문장부호 외 다른 문자 사용 금지
- 이모지 사용 금지
- 짧고 읽기 쉬운 문장으로 만든다
- TTS가 읽기 쉽도록 불필요한 기호와 장식을 줄인다
- 설명하지 말고 최종 답변 문장만 출력한다

View File

@@ -1,145 +0,0 @@
import base64
import json
import os
import sys
import tempfile
import traceback
import wave
os.environ.setdefault("PYTHONIOENCODING", "utf-8")
def log(message: str) -> None:
print(message, file=sys.stderr, flush=True)
def write_response(request_id: int, ok: bool, result=None, error: str | None = None) -> None:
payload = {
"id": request_id,
"ok": ok,
}
if ok:
payload["result"] = result
else:
payload["error"] = error or "unknown error"
sys.stdout.write(json.dumps(payload, ensure_ascii=False) + "\n")
sys.stdout.flush()
def resolve_device() -> str:
raw = os.environ.get("LOCAL_STT_DEVICE", "auto").strip().lower()
if raw and raw != "auto":
return raw
try:
import ctranslate2
if ctranslate2.get_cuda_device_count() > 0:
return "cuda"
except Exception:
pass
return "cpu"
def resolve_compute_type(device: str) -> str:
raw = os.environ.get("LOCAL_STT_COMPUTE_TYPE", "auto").strip().lower()
if raw and raw != "auto":
return raw
if device == "cuda":
return "int8_float16"
return "int8"
class SttWorker:
def __init__(self) -> None:
from faster_whisper import WhisperModel
self.model_name = os.environ.get("LOCAL_STT_MODEL", "tiny").strip() or "tiny"
self.device = resolve_device()
self.compute_type = resolve_compute_type(self.device)
self.beam_size = int(os.environ.get("LOCAL_STT_BEAM_SIZE", "1"))
self.model = WhisperModel(
self.model_name,
device=self.device,
compute_type=self.compute_type,
)
log(
f"local-stt ready model={self.model_name} device={self.device} compute={self.compute_type} beam={self.beam_size}"
)
def transcribe(self, audio_base64: str, language: str | None) -> str:
pcm_bytes = base64.b64decode(audio_base64)
temp_path = ""
try:
with tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as handle:
temp_path = handle.name
with wave.open(temp_path, "wb") as wav_file:
wav_file.setnchannels(1)
wav_file.setsampwidth(2)
wav_file.setframerate(16000)
wav_file.writeframes(pcm_bytes)
segments, _info = self.model.transcribe(
temp_path,
language=language,
beam_size=self.beam_size,
best_of=1,
condition_on_previous_text=False,
vad_filter=False,
without_timestamps=True,
temperature=0.0,
)
return " ".join(segment.text.strip() for segment in segments if segment.text.strip()).strip()
finally:
if temp_path:
try:
os.unlink(temp_path)
except OSError:
pass
def main() -> int:
try:
worker = SttWorker()
except Exception as exc:
log("failed to initialize local STT worker")
log("run `bun run setup:local-ai` first if dependencies are missing")
log("".join(traceback.format_exception(exc)))
return 1
for line in sys.stdin:
line = line.strip()
if not line:
continue
try:
request = json.loads(line)
request_id = int(request["id"])
method = request["method"]
params = request.get("params", {})
if method == "ping":
write_response(request_id, True, {"ready": True})
continue
if method != "transcribe":
raise ValueError(f"unsupported method: {method}")
text = worker.transcribe(
audio_base64=str(params.get("audio_base64", "")),
language=str(params.get("language") or "").strip() or None,
)
write_response(request_id, True, {"text": text})
except Exception as exc:
error_text = "".join(traceback.format_exception_only(type(exc), exc)).strip()
write_response(request_id, False, error=error_text)
return 0
if __name__ == "__main__":
raise SystemExit(main())

View File

@@ -1,125 +0,0 @@
import base64
import json
import os
import sys
import tempfile
import traceback
os.environ.setdefault("PYTHONIOENCODING", "utf-8")
def log(message: str) -> None:
print(message, file=sys.stderr, flush=True)
def write_response(request_id: int, ok: bool, result=None, error: str | None = None) -> None:
payload = {
"id": request_id,
"ok": ok,
}
if ok:
payload["result"] = result
else:
payload["error"] = error or "unknown error"
sys.stdout.write(json.dumps(payload, ensure_ascii=False) + "\n")
sys.stdout.flush()
class TtsWorker:
def __init__(self) -> None:
from melo.api import TTS
self.language = os.environ.get("LOCAL_TTS_LANGUAGE", "KR").strip() or "KR"
self.speaker_key = os.environ.get("LOCAL_TTS_SPEAKER", "KR").strip() or "KR"
self.device = os.environ.get("LOCAL_TTS_DEVICE", "auto").strip() or "auto"
self.speed = float(os.environ.get("LOCAL_TTS_SPEED", "1.12"))
self.model = TTS(language=self.language, device=self.device)
speaker_ids = self.model.hps.data.spk2id
self.speaker_id = speaker_ids.get(self.speaker_key)
if self.speaker_id is None:
normalized = self.speaker_key.upper()
self.speaker_id = speaker_ids.get(normalized)
if self.speaker_id is None:
self.speaker_id = next(iter(speaker_ids.values()))
log(
f"local-tts ready language={self.language} speaker={self.speaker_key} device={self.device} speed={self.speed}"
)
def synthesize(self, text: str) -> bytes:
temp_path = ""
try:
with tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as handle:
temp_path = handle.name
self.model.tts_to_file(
text,
self.speaker_id,
temp_path,
speed=self.speed,
quiet=True,
)
with open(temp_path, "rb") as handle:
return handle.read()
finally:
if temp_path:
try:
os.unlink(temp_path)
except OSError:
pass
def main() -> int:
try:
worker = TtsWorker()
except Exception as exc:
log("failed to initialize local TTS worker")
log("run `bun run setup:local-ai` first if dependencies are missing")
log("".join(traceback.format_exception(exc)))
return 1
for line in sys.stdin:
line = line.strip()
if not line:
continue
try:
request = json.loads(line)
request_id = int(request["id"])
method = request["method"]
params = request.get("params", {})
if method == "ping":
write_response(request_id, True, {"ready": True})
continue
if method != "synthesize":
raise ValueError(f"unsupported method: {method}")
text = str(params.get("text", "")).strip()
if not text:
raise ValueError("text is empty")
audio = worker.synthesize(text)
write_response(
request_id,
True,
{
"wav_base64": base64.b64encode(audio).decode("ascii"),
},
)
except Exception as exc:
error_text = "".join(traceback.format_exception_only(type(exc), exc)).strip()
write_response(request_id, False, error=error_text)
return 0
if __name__ == "__main__":
raise SystemExit(main())

View File

@@ -0,0 +1,186 @@
import base64
import json
import os
import sys
import site
import traceback
from typing import Any
from pathlib import Path
import numpy as np
from faster_whisper import WhisperModel
def configure_windows_dll_search_paths() -> list[str]:
if sys.platform != "win32":
return []
candidates: list[Path] = []
executable_dir = Path(sys.executable).resolve().parent
venv_root = executable_dir.parent
candidates.extend(
[
venv_root / "Lib" / "site-packages" / "nvidia" / "cublas" / "bin",
venv_root / "Lib" / "site-packages" / "nvidia" / "cudnn" / "bin",
]
)
for package_path in site.getsitepackages():
base = Path(package_path)
candidates.extend(
[
base / "nvidia" / "cublas" / "bin",
base / "nvidia" / "cudnn" / "bin",
]
)
added: list[str] = []
seen: set[str] = set()
for candidate in candidates:
normalized = str(candidate.resolve())
if normalized in seen:
continue
seen.add(normalized)
if not candidate.exists():
continue
os.add_dll_directory(normalized)
if normalized not in os.environ.get("PATH", ""):
os.environ["PATH"] = normalized + os.pathsep + os.environ.get("PATH", "")
added.append(normalized)
return added
os.environ.setdefault("HF_HUB_DISABLE_SYMLINKS_WARNING", "1")
CONFIGURED_DLL_PATHS = configure_windows_dll_search_paths()
if CONFIGURED_DLL_PATHS:
print(
f"configured CUDA DLL search paths: {', '.join(CONFIGURED_DLL_PATHS)}",
file=sys.stderr,
flush=True,
)
def resolve_model() -> WhisperModel:
model_name = os.environ.get("WHISPER_MODEL", "large-v3-turbo")
requested_device = os.environ.get("WHISPER_DEVICE", "auto")
requested_compute = os.environ.get("WHISPER_COMPUTE_TYPE", "auto")
attempts: list[tuple[str, str]] = []
if requested_device == "auto":
if requested_compute == "auto":
attempts.extend(
[
("cuda", "float16"),
("cuda", "int8_float16"),
("cpu", "int8"),
("cpu", "float32"),
]
)
else:
attempts.extend(
[
("cuda", requested_compute),
("cpu", requested_compute),
]
)
else:
if requested_compute == "auto":
compute = "float16" if requested_device == "cuda" else "int8"
else:
compute = requested_compute
attempts.append((requested_device, compute))
last_error: Exception | None = None
for device, compute_type in attempts:
try:
model = WhisperModel(model_name, device=device, compute_type=compute_type)
setattr(model, "_resolved_device", device)
setattr(model, "_resolved_compute_type", compute_type)
return model
except Exception as error: # noqa: BLE001
last_error = error
assert last_error is not None
raise last_error
MODEL = resolve_model()
LANGUAGE = os.environ.get("WHISPER_LANGUAGE", "ko")
BEAM_SIZE = int(os.environ.get("WHISPER_BEAM_SIZE", "1"))
def write(payload: dict[str, Any]) -> None:
sys.stdout.write(json.dumps(payload, ensure_ascii=False) + "\n")
sys.stdout.flush()
def transcribe_pcm16_base64(pcm16_base64: str) -> str:
audio_bytes = base64.b64decode(pcm16_base64)
audio = np.frombuffer(audio_bytes, dtype=np.int16).astype(np.float32) / 32768.0
segments, _info = MODEL.transcribe(
audio,
language=LANGUAGE,
task="transcribe",
beam_size=BEAM_SIZE,
condition_on_previous_text=False,
vad_filter=False,
without_timestamps=True,
word_timestamps=False,
temperature=0.0,
)
text_parts: list[str] = []
for segment in segments:
if segment.text:
text_parts.append(segment.text.strip())
return " ".join(part for part in text_parts if part).strip()
for raw_line in sys.stdin:
line = raw_line.strip()
if not line:
continue
request = json.loads(line)
request_id = request["id"]
method = request["method"]
params = request.get("params", {})
try:
if method == "ping":
write(
{
"id": request_id,
"result": {
"model": os.environ.get("WHISPER_MODEL", "large-v3-turbo"),
"device": getattr(MODEL, "_resolved_device", "unknown"),
"compute_type": getattr(MODEL, "_resolved_compute_type", "unknown"),
},
}
)
continue
if method == "transcribe":
text = transcribe_pcm16_base64(params["pcm16_base64"])
write(
{
"id": request_id,
"result": {
"text": text,
},
}
)
continue
raise RuntimeError(f"unknown method: {method}")
except Exception as error: # noqa: BLE001
traceback.print_exc(file=sys.stderr)
write(
{
"id": request_id,
"error": f"{type(error).__name__}: {error}",
}
)

View File

@@ -0,0 +1,3 @@
faster-whisper==1.2.1
nvidia-cublas-cu12
nvidia-cudnn-cu12>=9,<10

View File

@@ -1,2 +1 @@
faster-whisper==1.2.1 faster-whisper==1.2.1
git+https://github.com/myshell-ai/MeloTTS.git@v0.1.2

138
src/audio/capture.ts Normal file
View File

@@ -0,0 +1,138 @@
import { spawn, type ChildProcessByStdio } from "node:child_process";
import process from "node:process";
import type { Readable } from "node:stream";
import type { AppConfig } from "../config.js";
import type { Logger } from "../logger.js";
export function printAudioDevices(): Promise<void> {
if (process.platform === "win32") {
return new Promise<void>((resolve, reject) => {
const child = spawn("ffmpeg", ["-hide_banner", "-list_devices", "true", "-f", "dshow", "-i", "dummy"], {
stdio: ["ignore", "ignore", "inherit"],
});
child.on("exit", (code) => {
if (code === 0 || code === 1) {
resolve();
return;
}
reject(new Error(`ffmpeg exited with code ${code ?? "null"}`));
});
child.on("error", reject);
});
}
return new Promise<void>((resolve, reject) => {
const pactl = spawn("pactl", ["list", "sources", "short"], {
stdio: ["ignore", "inherit", "inherit"],
});
pactl.on("exit", (code) => {
if (code === 0) {
resolve();
return;
}
const wpctl = spawn("wpctl", ["status", "-n"], {
stdio: ["ignore", "inherit", "inherit"],
});
wpctl.on("exit", (wpctlCode) => {
if (wpctlCode === 0) {
resolve();
return;
}
reject(new Error(`pactl exited with code ${code ?? "null"} and wpctl exited with code ${wpctlCode ?? "null"}`));
});
wpctl.on("error", reject);
});
pactl.on("error", () => {
const wpctl = spawn("wpctl", ["status", "-n"], {
stdio: ["ignore", "inherit", "inherit"],
});
wpctl.on("exit", (code) => {
if (code === 0) {
resolve();
return;
}
reject(new Error(`pactl, wpctl 둘 다 실행할 수 없습니다. code=${code ?? "null"}`));
});
wpctl.on("error", reject);
});
});
}
export function spawnLoopbackCapture(
config: AppConfig,
logger: Logger,
): ChildProcessByStdio<null, Readable, Readable> {
if (!config.AUDIO_SOURCE) {
throw new Error("AUDIO_SOURCE 설정이 필요합니다. 먼저 `bun run devices` 로 장치 이름을 확인하세요.");
}
if (process.platform === "win32") {
const args = [
"-hide_banner",
"-loglevel",
"warning",
"-fflags",
"nobuffer",
"-flags",
"low_delay",
"-f",
"dshow",
"-i",
`audio=${config.AUDIO_SOURCE}`,
"-ac",
"1",
"-ar",
"16000",
"-f",
"s16le",
"pipe:1",
];
logger.info("Starting Windows loopback capture", {
source: config.AUDIO_SOURCE,
backend: "ffmpeg-dshow",
});
return spawn("ffmpeg", args, {
stdio: ["ignore", "pipe", "pipe"],
});
}
if (process.platform === "linux") {
const args = [
"-hide_banner",
"-loglevel",
"warning",
"-fflags",
"nobuffer",
"-flags",
"low_delay",
"-f",
"pulse",
"-i",
config.AUDIO_SOURCE,
"-ac",
"1",
"-ar",
"16000",
"-f",
"s16le",
"pipe:1",
];
logger.info("Starting Linux loopback capture", {
source: config.AUDIO_SOURCE,
backend: "ffmpeg-pulse",
});
return spawn("ffmpeg", args, {
stdio: ["ignore", "pipe", "pipe"],
});
}
throw new Error(`지원하지 않는 플랫폼입니다: ${process.platform}`);
}

View File

@@ -1,59 +0,0 @@
import { existsSync } from "node:fs";
import { spawnSync } from "node:child_process";
import process from "node:process";
import ffmpegStatic from "ffmpeg-static";
function firstExisting(paths: Array<string | null | undefined>): string | null {
for (const candidate of paths) {
if (candidate && existsSync(candidate)) {
return candidate;
}
}
return null;
}
function findOnPath(): string | null {
const locator = process.platform === "win32" ? "where" : "which";
const binaryName = process.platform === "win32" ? "ffmpeg.exe" : "ffmpeg";
const result = spawnSync(locator, [binaryName], {
encoding: "utf8",
});
if (result.status !== 0) {
return null;
}
const match = result.stdout
.split(/\r?\n/)
.map((line) => line.trim())
.find((line) => line.length > 0 && existsSync(line));
return match ?? null;
}
export function resolveFfmpegPath(): string | null {
const staticPath = ffmpegStatic as unknown as string | null;
return firstExisting([
process.env.FFMPEG_PATH,
process.env.FFMPEG_BIN,
staticPath,
findOnPath(),
]);
}
export function requireFfmpegPath(): string {
const resolved = resolveFfmpegPath();
if (resolved) {
return resolved;
}
throw new Error(
[
"ffmpeg를 찾지 못했습니다.",
"1. `bun install` 재실행",
"2. 안 되면 `bun pm trust ffmpeg-static` 후 다시 `bun install`",
"3. 또는 시스템 ffmpeg를 설치해서 PATH에 추가",
].join("\n"),
);
}

View File

@@ -1,456 +0,0 @@
import { EventEmitter } from "node:events";
import prism from "prism-media";
import { RealTimeVAD } from "avr-vad";
import {
AudioPlayerStatus,
EndBehaviorType,
NoSubscriberBehavior,
VoiceConnectionStatus,
createAudioPlayer,
createAudioResource,
entersState,
joinVoiceChannel,
StreamType,
type AudioPlayer,
type AudioReceiveStream,
type VoiceConnection,
} from "@discordjs/voice";
import type { Client, Guild, VoiceBasedChannel } from "discord.js";
import type { AppConfig } from "../config.js";
import { Logger } from "../logger.js";
import { float32ToPcm16Buffer, int16ArrayToFloat32, Stereo48kToMono16kDownsampler, takeFrame } from "./pcm.js";
import { ConversationMemory, type UserUtterance } from "../services/conversation.js";
import type { LlmService } from "../services/llm.js";
import type { SttService } from "../services/stt.js";
import type { PreparedSpeechAudio, TtsService } from "../services/tts.js";
interface GuildVoiceSessionOptions {
client: Client;
config: AppConfig;
logger: Logger;
guild: Guild;
voiceChannel: VoiceBasedChannel;
textChannelId?: string;
stt: SttService;
tts: TtsService;
llm: LlmService;
}
interface SpeechJob {
text: string;
source: "assistant" | "manual";
}
class UserAudioSession {
private readonly downsampler = new Stereo48kToMono16kDownsampler();
private readonly pendingSamples: number[] = [];
private readonly vad: RealTimeVAD;
private processing = Promise.resolve();
private constructor(
private readonly logger: Logger,
private readonly speakerId: string,
private readonly speakerName: string,
private readonly receiveStream: AudioReceiveStream,
private readonly decoder: NodeJS.ReadWriteStream & { destroy: () => void },
vad: RealTimeVAD,
private readonly onSpeechEnd: (utterance: UserUtterance, audio: Float32Array) => void,
) {
this.vad = vad;
}
static async create(options: {
logger: Logger;
speakerId: string;
speakerName: string;
receiveStream: AudioReceiveStream;
decoder: NodeJS.ReadWriteStream & { destroy: () => void };
onSpeechStart: () => void;
onSpeechEnd: (utterance: UserUtterance, audio: Float32Array) => void;
}): Promise<UserAudioSession> {
const vadInstance = await RealTimeVAD.new({
model: "v5",
sampleRate: 16000,
frameSamples: 1536,
positiveSpeechThreshold: 0.55,
negativeSpeechThreshold: 0.35,
redemptionFrames: 8,
preSpeechPadFrames: 2,
minSpeechFrames: 3,
onFrameProcessed: () => undefined,
onVADMisfire: () => undefined,
onSpeechStart: () => {
options.onSpeechStart();
},
onSpeechRealStart: () => undefined,
onSpeechEnd: (audio: Float32Array) => {
options.onSpeechEnd(
{
speakerId: options.speakerId,
speakerName: options.speakerName,
text: "",
},
audio,
);
},
});
const session = new UserAudioSession(
options.logger,
options.speakerId,
options.speakerName,
options.receiveStream,
options.decoder,
vadInstance,
options.onSpeechEnd,
);
session.decoder.on("data", (chunk: Buffer) => {
session.pushPcmChunk(chunk);
});
session.decoder.on("error", (error) => {
options.logger.warn("PCM decoder error", options.speakerId, error);
});
session.receiveStream.on("error", (error) => {
options.logger.warn("Audio receive stream error", options.speakerId, error);
});
return session;
}
private pushPcmChunk(chunk: Buffer): void {
const mono16k = this.downsampler.pushStereo48kChunk(chunk);
if (mono16k.length === 0) {
return;
}
for (const sample of mono16k) {
this.pendingSamples.push(sample);
}
while (true) {
const frame = takeFrame(this.pendingSamples, 1536);
if (!frame) {
return;
}
const floatFrame = int16ArrayToFloat32(frame);
this.processing = this.processing
.then(() => this.vad.processAudio(floatFrame))
.catch((error) => {
this.logger.warn("VAD frame processing failed", this.speakerId, this.speakerName, error);
});
}
}
destroy(): void {
this.receiveStream.destroy();
this.decoder.destroy();
void this.vad.destroy().catch((error) => {
this.logger.warn("VAD destroy failed", this.speakerId, this.speakerName, error);
});
}
}
export class GuildVoiceSession extends EventEmitter {
readonly guildId: string;
readonly voiceChannelId: string;
private readonly connection: VoiceConnection;
private readonly player: AudioPlayer;
private readonly memory: ConversationMemory;
private readonly trackedUsers = new Map<string, UserAudioSession>();
private readonly pendingUsers = new Map<string, Promise<void>>();
private readonly queue: SpeechJob[] = [];
private draining = false;
private currentAbortController: AbortController | null = null;
private currentPlayback: PreparedSpeechAudio | null = null;
private textChannelId?: string;
private constructor(private readonly options: GuildVoiceSessionOptions) {
super();
this.guildId = options.guild.id;
this.voiceChannelId = options.voiceChannel.id;
this.textChannelId = options.textChannelId;
this.memory = new ConversationMemory(options.config.MAX_CONVERSATION_TURNS);
this.player = createAudioPlayer({
behaviors: {
noSubscriber: NoSubscriberBehavior.Pause,
},
});
this.connection = joinVoiceChannel({
guildId: options.guild.id,
channelId: options.voiceChannel.id,
adapterCreator: options.guild.voiceAdapterCreator,
selfDeaf: false,
selfMute: false,
});
}
static async create(options: GuildVoiceSessionOptions): Promise<GuildVoiceSession> {
const session = new GuildVoiceSession(options);
await session.initialize();
return session;
}
private async initialize(): Promise<void> {
this.player.on("error", (error) => {
this.options.logger.warn("Audio player error", this.guildId, error);
});
this.connection.on("stateChange", (_oldState, newState) => {
if (newState.status === VoiceConnectionStatus.Destroyed) {
this.options.logger.info("Voice connection destroyed", this.guildId);
}
});
this.connection.subscribe(this.player);
await entersState(this.connection, VoiceConnectionStatus.Ready, 30_000);
this.connection.receiver.speaking.on("start", (userId: string) => {
if (userId === this.options.client.user?.id) {
return;
}
void this.ensureTrackedUser(userId);
});
}
setTextChannel(textChannelId?: string): void {
this.textChannelId = textChannelId;
}
clearConversation(): void {
this.memory.clear();
this.interruptPlayback("conversation-reset");
}
statusSummary(): string {
const playbackState = this.player.state.status;
return [
`세션 활성: 예`,
`음성 채널: ${this.options.voiceChannel.name}`,
`추적 유저 수: ${this.trackedUsers.size}`,
`재생 상태: ${playbackState}`,
`대기열: ${this.queue.length}`,
`최근 대화 턴: ${this.memory.recentTurns().length}`,
].join("\n");
}
async speakText(text: string): Promise<void> {
this.queue.push({
text,
source: "manual",
});
await this.drainQueue();
}
interruptPlayback(reason: string): void {
if (this.queue.length > 0 || this.player.state.status !== AudioPlayerStatus.Idle) {
this.options.logger.info("Interrupting playback", this.guildId, reason);
}
this.queue.splice(0, this.queue.length);
this.currentAbortController?.abort();
this.currentAbortController = null;
this.currentPlayback?.dispose();
this.currentPlayback = null;
this.player.stop(true);
}
async destroy(): Promise<void> {
this.interruptPlayback("session-destroy");
for (const session of this.trackedUsers.values()) {
session.destroy();
}
this.trackedUsers.clear();
this.pendingUsers.clear();
this.connection.destroy();
}
private async ensureTrackedUser(userId: string): Promise<void> {
if (this.trackedUsers.has(userId)) {
return;
}
const existing = this.pendingUsers.get(userId);
if (existing) {
await existing;
return;
}
const pending = this.createTrackedUser(userId).finally(() => {
this.pendingUsers.delete(userId);
});
this.pendingUsers.set(userId, pending);
await pending;
}
private async createTrackedUser(userId: string): Promise<void> {
const speakerName = await this.resolveSpeakerName(userId);
const receiveStream = this.connection.receiver.subscribe(userId, {
end: {
behavior: EndBehaviorType.Manual,
},
});
const decoder = new prism.opus.Decoder({
rate: 48000,
channels: 2,
frameSize: 960,
}) as NodeJS.ReadWriteStream & { destroy: () => void };
receiveStream.pipe(decoder);
const session = await UserAudioSession.create({
logger: this.options.logger,
speakerId: userId,
speakerName,
receiveStream,
decoder,
onSpeechStart: () => {
this.interruptPlayback(`barge-in:${speakerName}`);
},
onSpeechEnd: (utterance, audio) => {
void this.handleSpeechEnd(utterance, audio);
},
});
this.trackedUsers.set(userId, session);
this.options.logger.info("Tracking speaker", this.guildId, userId, speakerName);
}
private async resolveSpeakerName(userId: string): Promise<string> {
try {
const user = await this.options.client.users.fetch(userId);
return user.globalName ?? user.username;
} catch {
return `user-${userId.slice(-6)}`;
}
}
private async handleSpeechEnd(utterance: UserUtterance, audio: Float32Array): Promise<void> {
if (audio.length < 16000 * 0.25) {
return;
}
const pcmBuffer = float32ToPcm16Buffer(audio);
let transcript: string | null = null;
try {
transcript = await this.options.stt.transcribePcm16(pcmBuffer);
} catch (error) {
this.options.logger.warn("STT failed", this.guildId, utterance.speakerId, error);
await this.announce(`음성 인식 실패: ${utterance.speakerName}`);
return;
}
if (!transcript || transcript.trim().length === 0) {
return;
}
const hydratedUtterance: UserUtterance = {
...utterance,
text: transcript.trim(),
};
this.options.logger.info("Transcript committed", this.guildId, hydratedUtterance.speakerName, hydratedUtterance.text);
this.memory.addUserTurn(hydratedUtterance);
if (this.options.config.DEBUG_TEXT_EVENTS) {
await this.announce(`🗣️ ${hydratedUtterance.speakerName}: ${hydratedUtterance.text}`);
}
let reply: string;
try {
reply = await this.options.llm.generateReply(this.memory, hydratedUtterance);
} catch (error) {
this.options.logger.warn("LLM failed", this.guildId, utterance.speakerId, error);
reply = "지금은 답변 생성에 실패했습니다. 잠시 후 다시 말씀해 주세요.";
}
this.memory.addAssistantTurn(reply);
if (this.options.config.DEBUG_TEXT_EVENTS) {
await this.announce(`🤖 ${reply}`);
}
this.queue.push({
text: reply,
source: "assistant",
});
await this.drainQueue();
}
private async drainQueue(): Promise<void> {
if (this.draining) {
return;
}
this.draining = true;
try {
while (this.queue.length > 0) {
const job = this.queue.shift();
if (!job) {
continue;
}
const abortController = new AbortController();
this.currentAbortController = abortController;
try {
this.currentPlayback = await this.options.tts.preparePlayback(job.text, abortController.signal);
} catch (error) {
if (abortController.signal.aborted) {
continue;
}
this.options.logger.warn("TTS synthesis failed", this.guildId, job.source, error);
await this.announce("음성 출력 생성에 실패했습니다.");
continue;
}
try {
const resource = createAudioResource(this.currentPlayback.stream, {
inputType: StreamType.Raw,
});
this.player.play(resource);
await entersState(this.player, AudioPlayerStatus.Playing, 20_000).catch(() => null);
await entersState(this.player, AudioPlayerStatus.Idle, 300_000);
} catch (error) {
if (!abortController.signal.aborted) {
this.options.logger.warn("Audio playback failed", this.guildId, error);
}
} finally {
this.currentPlayback?.dispose();
this.currentPlayback = null;
if (this.currentAbortController === abortController) {
this.currentAbortController = null;
}
}
}
} finally {
this.draining = false;
}
}
private async announce(message: string): Promise<void> {
if (!this.textChannelId) {
return;
}
const channel = await this.options.client.channels.fetch(this.textChannelId).catch(() => null);
if (!channel?.isTextBased() || !("send" in channel) || typeof channel.send !== "function") {
return;
}
await channel.send(message).catch(() => null);
}
}

View File

@@ -1,499 +0,0 @@
import { spawn, type ChildProcess, type ChildProcessByStdio } from "node:child_process";
import { once } from "node:events";
import { promises as fs } from "node:fs";
import os from "node:os";
import path from "node:path";
import type { Readable, Writable } from "node:stream";
import { RealTimeVAD } from "avr-vad";
import type { AssistantRuntimeConfig } from "../config.js";
import { Logger } from "../logger.js";
import { requireFfmpegPath } from "./ffmpeg-path.js";
import { takeFrame, int16ArrayToFloat32, float32ToPcm16Buffer } from "./pcm.js";
import { ConversationMemory, type UserUtterance } from "../services/conversation.js";
import type { LlmService } from "../services/llm.js";
import type { SttService } from "../services/stt.js";
import type { PreparedSpeechAudio, TtsService } from "../services/tts.js";
interface LocalVoiceSessionOptions {
config: AssistantRuntimeConfig;
logger: Logger;
stt: SttService;
tts: TtsService;
llm: LlmService;
}
interface SpeechJob {
text: string;
source: "assistant" | "manual";
}
export class LocalVoiceSession {
private readonly memory: ConversationMemory;
private readonly queue: SpeechJob[] = [];
private readonly pendingSamples: number[] = [];
private vad: RealTimeVAD | null = null;
private recorder: ChildProcessByStdio<null, Readable, Readable> | null = null;
private currentPlayer: ChildProcess | null = null;
private currentAbortController: AbortController | null = null;
private currentPlayback: PreparedSpeechAudio | null = null;
private processing = Promise.resolve();
private draining = false;
private destroyed = false;
constructor(private readonly options: LocalVoiceSessionOptions) {
this.memory = new ConversationMemory(options.config.MAX_CONVERSATION_TURNS);
}
async start(): Promise<void> {
this.vad = await RealTimeVAD.new({
model: "v5",
sampleRate: 16000,
frameSamples: 1536,
positiveSpeechThreshold: 0.55,
negativeSpeechThreshold: 0.35,
redemptionFrames: 8,
preSpeechPadFrames: 2,
minSpeechFrames: 3,
onFrameProcessed: () => undefined,
onVADMisfire: () => undefined,
onSpeechStart: () => {
this.interruptPlayback("local-barge-in");
},
onSpeechRealStart: () => undefined,
onSpeechEnd: (audio: Float32Array) => {
void this.handleSpeechEnd(audio);
},
});
this.recorder = this.spawnRecorder();
this.recorder.stdout.on("data", (chunk: Buffer) => {
this.pushPcm16Chunk(chunk);
});
this.recorder.stderr.on("data", (chunk: Buffer) => {
const text = chunk.toString().trim();
if (text.length > 0) {
this.options.logger.debug("[pw-record]", text);
}
});
this.recorder.on("exit", (code, signal) => {
if (!this.destroyed) {
this.options.logger.warn("pw-record exited unexpectedly", { code, signal });
}
});
}
async destroy(): Promise<void> {
this.destroyed = true;
this.interruptPlayback("local-shutdown");
if (this.recorder && !this.recorder.killed) {
this.recorder.kill("SIGTERM");
await once(this.recorder, "exit").catch(() => null);
}
if (this.vad) {
await this.vad.destroy().catch((error) => {
this.options.logger.warn("Local VAD destroy failed", error);
});
this.vad = null;
}
}
clearConversation(): void {
this.memory.clear();
this.interruptPlayback("local-reset");
}
async speakText(text: string): Promise<void> {
this.queue.push({
text,
source: "manual",
});
await this.drainQueue();
}
statusSummary(): string {
return [
"모드: local",
`플랫폼: ${process.platform}`,
`입력 source: ${this.options.config.LOCAL_AUDIO_SOURCE ?? "default"}`,
`출력 sink: ${this.describeSink()}`,
`대기열: ${this.queue.length}`,
`최근 대화 턴: ${this.memory.recentTurns().length}`,
].join("\n");
}
private spawnRecorder(): ChildProcessByStdio<null, Readable, Readable> {
if (process.platform === "win32") {
return this.spawnWindowsRecorder();
}
const args = [
"--rate",
"16000",
"--channels",
"1",
"--format",
"s16",
"--raw",
];
if (this.options.config.LOCAL_AUDIO_SOURCE) {
args.push("--target", this.options.config.LOCAL_AUDIO_SOURCE);
}
args.push("-");
this.options.logger.info("Starting local recorder", {
source: this.options.config.LOCAL_AUDIO_SOURCE ?? "default",
});
return spawn("pw-record", args, {
stdio: ["ignore", "pipe", "pipe"],
});
}
private spawnWindowsRecorder(): ChildProcessByStdio<null, Readable, Readable> {
const ffmpegPath = this.getFfmpegPath();
const sourceName = this.options.config.LOCAL_AUDIO_SOURCE;
if (!sourceName) {
throw new Error("Windows 로컬 모드는 LOCAL_AUDIO_SOURCE 설정이 필요합니다. `bun run audio:devices` 로 이름을 확인해 주세요.");
}
const args = [
"-hide_banner",
"-loglevel",
"warning",
"-f",
"dshow",
"-i",
`audio=${sourceName}`,
"-ac",
"1",
"-ar",
"16000",
"-f",
"s16le",
"pipe:1",
];
this.options.logger.info("Starting local recorder", {
source: sourceName,
backend: "ffmpeg-dshow",
});
return spawn(ffmpegPath, args, {
stdio: ["ignore", "pipe", "pipe"],
});
}
private pushPcm16Chunk(chunk: Buffer): void {
if (this.destroyed || !this.vad) {
return;
}
for (let offset = 0; offset + 1 < chunk.length; offset += 2) {
this.pendingSamples.push(chunk.readInt16LE(offset));
}
while (true) {
const frame = takeFrame(this.pendingSamples, 1536);
if (!frame) {
return;
}
const floatFrame = int16ArrayToFloat32(frame);
this.processing = this.processing
.then(() => this.vad?.processAudio(floatFrame))
.catch((error) => {
this.options.logger.warn("Local VAD processing failed", error);
});
}
}
private async handleSpeechEnd(audio: Float32Array): Promise<void> {
if (audio.length < 16000 * 0.25) {
return;
}
const utterance: UserUtterance = {
speakerId: "local-user",
speakerName: this.options.config.LOCAL_SPEAKER_NAME,
text: "",
};
let transcript: string | null = null;
try {
transcript = await this.options.stt.transcribePcm16(float32ToPcm16Buffer(audio));
} catch (error) {
this.options.logger.warn("Local STT failed", error);
return;
}
if (!transcript || transcript.trim().length === 0) {
return;
}
utterance.text = transcript.trim();
this.memory.addUserTurn(utterance);
this.options.logger.info("Local transcript", utterance.text);
if (this.options.config.DEBUG_TEXT_EVENTS) {
console.log(`\n[you] ${utterance.text}`);
}
let reply: string;
try {
reply = await this.options.llm.generateReply(this.memory, utterance);
} catch (error) {
this.options.logger.warn("Local LLM failed", error);
reply = "지금은 답변 생성에 실패했습니다. 잠시 후 다시 말씀해 주세요.";
}
this.memory.addAssistantTurn(reply);
if (this.options.config.DEBUG_TEXT_EVENTS) {
console.log(`[bot] ${reply}\n`);
}
this.queue.push({
text: reply,
source: "assistant",
});
await this.drainQueue();
}
private interruptPlayback(reason: string): void {
if (this.queue.length > 0 || this.currentPlayer) {
this.options.logger.info("Interrupting local playback", reason);
}
this.queue.splice(0, this.queue.length);
this.currentAbortController?.abort();
this.currentAbortController = null;
this.currentPlayback?.dispose();
this.currentPlayback = null;
if (this.currentPlayer && !this.currentPlayer.killed) {
this.currentPlayer.kill("SIGKILL");
}
this.currentPlayer = null;
}
private async drainQueue(): Promise<void> {
if (this.draining || this.destroyed) {
return;
}
this.draining = true;
try {
while (this.queue.length > 0 && !this.destroyed) {
const job = this.queue.shift();
if (!job) {
continue;
}
const abortController = new AbortController();
this.currentAbortController = abortController;
try {
this.currentPlayback = await this.options.tts.preparePlayback(job.text, abortController.signal);
} catch (error) {
if (!abortController.signal.aborted) {
this.options.logger.warn("Local TTS synthesis failed", error);
}
continue;
}
try {
await this.playToSink(this.currentPlayback, abortController.signal);
} catch (error) {
if (!abortController.signal.aborted) {
this.options.logger.warn("Local playback failed", error);
}
} finally {
this.currentPlayback?.dispose();
this.currentPlayback = null;
if (this.currentAbortController === abortController) {
this.currentAbortController = null;
}
}
}
} finally {
this.draining = false;
}
}
private async playToSink(playback: PreparedSpeechAudio, signal: AbortSignal): Promise<void> {
if (process.platform === "win32") {
await this.playToWindowsDefaultSink(playback, signal);
return;
}
const args = [
"--rate",
"48000",
"--channels",
"2",
"--format",
"s16",
"--raw",
];
if (this.options.config.LOCAL_AUDIO_SINK) {
args.push("--target", this.options.config.LOCAL_AUDIO_SINK);
}
args.push("-");
const player = spawn("pw-play", args, {
stdio: ["pipe", "ignore", "pipe"],
});
this.currentPlayer = player;
player.stderr.on("data", (chunk: Buffer) => {
const text = chunk.toString().trim();
if (text.length > 0) {
this.options.logger.debug("[pw-play]", text);
}
});
signal.addEventListener(
"abort",
() => {
playback.stream.destroy();
if (!player.killed) {
player.kill("SIGKILL");
}
},
{ once: true },
);
playback.stream.pipe(player.stdin);
const [code, playSignal] = (await once(player, "exit")) as [number | null, NodeJS.Signals | null];
this.currentPlayer = null;
if (signal.aborted) {
return;
}
if (code !== 0) {
throw new Error(`pw-play exited with code=${code ?? "null"} signal=${playSignal ?? "null"}`);
}
}
private async playToWindowsDefaultSink(playback: PreparedSpeechAudio, signal: AbortSignal): Promise<void> {
const chunks: Buffer[] = [];
await new Promise<void>((resolve, reject) => {
playback.stream.on("data", (chunk: Buffer) => {
chunks.push(Buffer.from(chunk));
});
playback.stream.once("end", resolve);
playback.stream.once("error", reject);
signal.addEventListener(
"abort",
() => {
playback.stream.destroy();
reject(new Error("playback aborted"));
},
{ once: true },
);
}).catch((error) => {
if (signal.aborted) {
return;
}
throw error;
});
if (signal.aborted) {
return;
}
const pcm = Buffer.concat(chunks);
const wav = createWaveFileBuffer(pcm, 48000, 2, 16);
const tempPath = path.join(os.tmpdir(), `realtime-voice-bot-${Date.now()}.wav`);
await fs.writeFile(tempPath, wav);
const psScript = [
"Add-Type -AssemblyName System;",
`$player = New-Object System.Media.SoundPlayer('${tempPath.replace(/'/g, "''")}');`,
"$player.PlaySync();",
].join(" ");
const player = spawn("powershell", ["-NoProfile", "-Command", psScript], {
stdio: ["ignore", "ignore", "pipe"],
});
this.currentPlayer = player;
player.stderr.on("data", (chunk: Buffer) => {
const text = chunk.toString().trim();
if (text.length > 0) {
this.options.logger.debug("[powershell-player]", text);
}
});
signal.addEventListener(
"abort",
() => {
if (!player.killed) {
player.kill("SIGKILL");
}
},
{ once: true },
);
const [code, playSignal] = (await once(player, "exit")) as [number | null, NodeJS.Signals | null];
this.currentPlayer = null;
await fs.unlink(tempPath).catch(() => null);
if (signal.aborted) {
return;
}
if (code !== 0) {
throw new Error(`powershell playback exited with code=${code ?? "null"} signal=${playSignal ?? "null"}`);
}
}
private getFfmpegPath(): string {
return requireFfmpegPath();
}
private describeSink(): string {
if (process.platform === "win32") {
return this.options.config.LOCAL_AUDIO_SINK ?? "system-default";
}
return this.options.config.LOCAL_AUDIO_SINK ?? "default";
}
}
function createWaveFileBuffer(
pcm: Buffer,
sampleRate: number,
channels: number,
bitsPerSample: number,
): Buffer {
const header = Buffer.alloc(44);
const byteRate = sampleRate * channels * (bitsPerSample / 8);
const blockAlign = channels * (bitsPerSample / 8);
header.write("RIFF", 0, 4, "ascii");
header.writeUInt32LE(36 + pcm.length, 4);
header.write("WAVE", 8, 4, "ascii");
header.write("fmt ", 12, 4, "ascii");
header.writeUInt32LE(16, 16);
header.writeUInt16LE(1, 20);
header.writeUInt16LE(channels, 22);
header.writeUInt32LE(sampleRate, 24);
header.writeUInt32LE(byteRate, 28);
header.writeUInt16LE(blockAlign, 32);
header.writeUInt16LE(bitsPerSample, 34);
header.write("data", 36, 4, "ascii");
header.writeUInt32LE(pcm.length, 40);
return Buffer.concat([header, pcm]);
}

View File

@@ -1,60 +0,0 @@
export class Stereo48kToMono16kDownsampler {
private readonly pendingMono48k: number[] = [];
pushStereo48kChunk(chunk: Buffer): Int16Array {
if (chunk.length < 4) {
return new Int16Array();
}
for (let offset = 0; offset + 3 < chunk.length; offset += 4) {
const left = chunk.readInt16LE(offset);
const right = chunk.readInt16LE(offset + 2);
this.pendingMono48k.push(Math.round((left + right) / 2));
}
const outputLength = Math.floor(this.pendingMono48k.length / 3);
if (outputLength === 0) {
return new Int16Array();
}
const output = new Int16Array(outputLength);
let readIndex = 0;
for (let index = 0; index < outputLength; index += 1) {
const a = this.pendingMono48k[readIndex];
const b = this.pendingMono48k[readIndex + 1];
const c = this.pendingMono48k[readIndex + 2];
output[index] = Math.round((a + b + c) / 3);
readIndex += 3;
}
this.pendingMono48k.splice(0, readIndex);
return output;
}
}
export function int16ArrayToFloat32(input: Int16Array): Float32Array {
const output = new Float32Array(input.length);
for (let index = 0; index < input.length; index += 1) {
output[index] = input[index] / 32768;
}
return output;
}
export function float32ToPcm16Buffer(input: Float32Array): Buffer {
const buffer = Buffer.allocUnsafe(input.length * 2);
for (let index = 0; index < input.length; index += 1) {
const value = Math.max(-1, Math.min(1, input[index]));
const scaled = value < 0 ? value * 32768 : value * 32767;
buffer.writeInt16LE(Math.round(scaled), index * 2);
}
return buffer;
}
export function takeFrame(source: number[], frameSize: number): Int16Array | null {
if (source.length < frameSize) {
return null;
}
const values = source.splice(0, frameSize);
return Int16Array.from(values);
}

View File

@@ -0,0 +1,148 @@
interface RealtimeSegmenterOptions {
onSegment: (pcm16: Buffer) => void;
onLevel?: (peak: number) => void;
onSpeechStart?: (peak: number) => void;
onSpeechDiscarded?: (samples: number) => void;
onSpeechReady?: (samples: number) => void;
preRollSamples?: number;
speechStartThreshold?: number;
speechContinueThreshold?: number;
speechStartFrames?: number;
speechEndFrames?: number;
minSpeechSamples?: number;
maxSpeechSamples?: number;
}
export class RealtimeSegmenter {
private readonly pendingSamples: number[] = [];
private readonly preRoll: number[] = [];
private readonly speech: number[] = [];
private readonly frameSamples = 320;
private readonly preRollSamples: number;
private readonly speechStartThreshold: number;
private readonly speechContinueThreshold: number;
private readonly speechStartFrames: number;
private readonly speechEndFrames: number;
private readonly minSpeechSamples: number;
private readonly maxSpeechSamples: number;
private speechActive = false;
private speechCandidateFrames = 0;
private silenceFrames = 0;
constructor(private readonly options: RealtimeSegmenterOptions) {
this.preRollSamples = options.preRollSamples ?? 3200;
this.speechStartThreshold = options.speechStartThreshold ?? 900;
this.speechContinueThreshold = options.speechContinueThreshold ?? 450;
this.speechStartFrames = options.speechStartFrames ?? 2;
this.speechEndFrames = options.speechEndFrames ?? 24;
this.minSpeechSamples = options.minSpeechSamples ?? 7200;
this.maxSpeechSamples = options.maxSpeechSamples ?? 160000;
}
pushChunk(chunk: Buffer): void {
for (let offset = 0; offset + 1 < chunk.length; offset += 2) {
this.pendingSamples.push(chunk.readInt16LE(offset));
}
while (true) {
const frame = takeFrame(this.pendingSamples, this.frameSamples);
if (!frame) {
return;
}
this.processFrame(frame);
}
}
reset(): void {
this.pendingSamples.splice(0, this.pendingSamples.length);
this.preRoll.splice(0, this.preRoll.length);
this.speech.splice(0, this.speech.length);
this.speechActive = false;
this.speechCandidateFrames = 0;
this.silenceFrames = 0;
}
private processFrame(frame: Int16Array): void {
let peak = 0;
for (const sample of frame) {
const abs = Math.abs(sample);
if (abs > peak) {
peak = abs;
}
}
this.options.onLevel?.(peak);
if (!this.speechActive) {
appendWithCap(this.preRoll, frame, this.preRollSamples);
if (peak >= this.speechStartThreshold) {
this.speechCandidateFrames += 1;
} else {
this.speechCandidateFrames = 0;
}
if (this.speechCandidateFrames < this.speechStartFrames) {
return;
}
this.speechActive = true;
this.silenceFrames = 0;
this.speech.splice(0, this.speech.length, ...this.preRoll);
this.preRoll.splice(0, this.preRoll.length);
this.options.onSpeechStart?.(peak);
}
this.speech.push(...frame);
if (peak >= this.speechContinueThreshold) {
this.silenceFrames = 0;
} else {
this.silenceFrames += 1;
}
if (this.silenceFrames < this.speechEndFrames) {
if (this.speech.length < this.maxSpeechSamples) {
return;
}
}
const speechPcm = int16ArrayToBuffer(Int16Array.from(this.speech));
this.speechActive = false;
this.speech.splice(0, this.speech.length);
this.silenceFrames = 0;
this.speechCandidateFrames = 0;
if (speechPcm.length < this.minSpeechSamples * 2) {
this.options.onSpeechDiscarded?.(speechPcm.length / 2);
return;
}
this.options.onSpeechReady?.(speechPcm.length / 2);
this.options.onSegment(speechPcm);
}
}
function takeFrame(source: number[], size: number): Int16Array | null {
if (source.length < size) {
return null;
}
const samples = source.splice(0, size);
return Int16Array.from(samples);
}
function appendWithCap(target: number[], samples: Int16Array, cap: number): void {
target.push(...samples);
if (target.length > cap) {
target.splice(0, target.length - cap);
}
}
function int16ArrayToBuffer(input: Int16Array): Buffer {
const output = Buffer.allocUnsafe(input.length * 2);
for (let index = 0; index < input.length; index += 1) {
output.writeInt16LE(input[index]!, index * 2);
}
return output;
}

View File

@@ -12,30 +12,46 @@ const emptyToUndefined = z.preprocess((value) => {
}, z.string().min(1).optional()); }, z.string().min(1).optional());
const envSchema = z.object({ const envSchema = z.object({
DISCORD_BOT_TOKEN: emptyToUndefined,
DISCORD_APPLICATION_ID: emptyToUndefined,
DISCORD_COMMAND_GUILD_ID: emptyToUndefined,
OLLAMA_BASE_URL: z.string().min(1).default("http://localhost:11434"),
OLLAMA_MODEL: z.string().min(1).default("qwen3:0.6b"),
OLLAMA_KEEP_ALIVE: z.string().min(1).default("5m"),
OLLAMA_NUM_CTX: z.coerce.number().int().min(512).max(32768).default(4096),
LOCAL_AI_VENV_PATH: z.string().min(1).default(".local-ai/.venv"), LOCAL_AI_VENV_PATH: z.string().min(1).default(".local-ai/.venv"),
LOCAL_AI_CACHE_DIR: z.string().min(1).default(".local-ai/cache"),
LOCAL_AI_PYTHON: emptyToUndefined, LOCAL_AI_PYTHON: emptyToUndefined,
LOCAL_STT_MODEL: z.string().min(1).default("tiny"), AUDIO_SOURCE: emptyToUndefined,
LOCAL_STT_DEVICE: z.string().min(1).default("auto"), DOCKER_BIN: emptyToUndefined,
LOCAL_STT_COMPUTE_TYPE: z.string().min(1).default("auto"), TTS_ENABLED: z
LOCAL_STT_BEAM_SIZE: z.coerce.number().int().min(1).max(8).default(1), .string()
LOCAL_TTS_LANGUAGE: z.string().min(1).default("KR"), .optional()
LOCAL_TTS_SPEAKER: z.string().min(1).default("KR"), .transform((value) => value?.trim().toLowerCase() !== "false"),
LOCAL_TTS_DEVICE: z.string().min(1).default("auto"), TTS_IMAGE: z.string().min(1).default("realtime-voice-bot-melotts:v0.1.2"),
LOCAL_TTS_SPEED: z.coerce.number().min(0.8).max(1.6).default(1.12), TTS_LANGUAGE: z.string().min(1).default("KR"),
BOT_DEFAULT_LANGUAGE: z.string().min(2).default("ko"), TTS_SPEAKER: z.string().min(1).default("KR"),
MAX_CONVERSATION_TURNS: z.coerce.number().int().min(4).max(30).default(12), TTS_DEVICE: z.string().min(1).default("cpu"),
LOCAL_AUDIO_SOURCE: emptyToUndefined, TTS_SPEED: z.coerce.number().min(0.5).max(2).default(1.18),
LOCAL_AUDIO_SINK: emptyToUndefined, TTS_PLAYBACK_RATE: z.coerce.number().min(0.5).max(4).default(2.2),
LOCAL_SPEAKER_NAME: z.string().min(1).default("local-user"), TTS_SDP_RATIO: z.coerce.number().min(0).max(1).default(0.22),
DEBUG_TEXT_EVENTS: z TTS_NOISE_SCALE: z.coerce.number().min(0).max(2).default(0.55),
TTS_NOISE_SCALE_W: z.coerce.number().min(0).max(2).default(0.75),
TTS_CACHE_DIR: z.string().min(1).default(".local-ai/tts-cache"),
TTS_OUTPUT_DIR: z.string().min(1).default(".local-ai/tts-output"),
DEBUG: z
.string()
.optional()
.transform((value) => value?.trim().toLowerCase() === "true"),
OLLAMA_BASE_URL: z.string().min(1).default("http://127.0.0.1:11434"),
OLLAMA_MODEL: z.string().min(1).default("qwen3:8b"),
OLLAMA_KEEP_ALIVE: z.string().min(1).default("5m"),
MAX_CONVERSATION_TURNS: z.coerce.number().int().min(1).max(20).default(6),
WHISPER_MODEL: z.string().min(1).default("large-v3-turbo"),
WHISPER_LANGUAGE: z.string().min(1).default("ko"),
WHISPER_DEVICE: z.enum(["auto", "cuda", "cpu"]).default("auto"),
WHISPER_COMPUTE_TYPE: z.string().min(1).default("auto"),
WHISPER_BEAM_SIZE: z.coerce.number().int().min(1).max(8).default(2),
SEGMENT_START_THRESHOLD: z.coerce.number().int().min(100).max(10000).default(900),
SEGMENT_CONTINUE_THRESHOLD: z.coerce.number().int().min(50).max(10000).default(450),
SEGMENT_START_FRAMES: z.coerce.number().int().min(1).max(10).default(2),
SEGMENT_END_FRAMES: z.coerce.number().int().min(4).max(60).default(24),
SEGMENT_PREROLL_SAMPLES: z.coerce.number().int().min(320).max(16000).default(3200),
SEGMENT_MIN_SPEECH_SAMPLES: z.coerce.number().int().min(1600).max(64000).default(7200),
SEGMENT_MAX_SPEECH_SAMPLES: z.coerce.number().int().min(16000).max(320000).default(160000),
DEBUG_TRANSCRIPTS: z
.string() .string()
.optional() .optional()
.transform((value) => value === "true"), .transform((value) => value === "true"),
@@ -43,32 +59,7 @@ const envSchema = z.object({
}); });
export type AppConfig = z.infer<typeof envSchema>; export type AppConfig = z.infer<typeof envSchema>;
export type AssistantRuntimeConfig = AppConfig;
export type DiscordRuntimeConfig = AssistantRuntimeConfig & {
DISCORD_BOT_TOKEN: string;
DISCORD_APPLICATION_ID: string;
};
export function loadConfig(): AppConfig { export function loadConfig(): AppConfig {
return envSchema.parse(process.env); return envSchema.parse(process.env);
} }
function requirePresent(value: string | undefined, name: string): string {
if (!value) {
throw new Error(`${name} 환경변수가 필요합니다.`);
}
return value;
}
export function requireAssistantRuntimeConfig(config: AppConfig): AssistantRuntimeConfig {
return config;
}
export function requireDiscordRuntimeConfig(config: AppConfig): DiscordRuntimeConfig {
const assistant = requireAssistantRuntimeConfig(config);
return {
...assistant,
DISCORD_BOT_TOKEN: requirePresent(config.DISCORD_BOT_TOKEN, "DISCORD_BOT_TOKEN"),
DISCORD_APPLICATION_ID: requirePresent(config.DISCORD_APPLICATION_ID, "DISCORD_APPLICATION_ID"),
};
}

View File

@@ -1,238 +0,0 @@
import process from "node:process";
import {
GatewayIntentBits,
REST,
Routes,
SlashCommandBuilder,
type ChatInputCommandInteraction,
type Client,
type GuildMember,
type VoiceBasedChannel,
} from "discord.js";
import { Client as DiscordClient } from "discord.js";
import { GuildVoiceSession } from "./audio/guild-voice-session.js";
import { type DiscordRuntimeConfig } from "./config.js";
import { Logger } from "./logger.js";
import { LocalFasterWhisperSttService } from "./services/local-stt.js";
import { LocalMeloTtsService } from "./services/local-tts.js";
import { OllamaLlmService } from "./services/ollama-llm.js";
export async function runDiscordBot(config: DiscordRuntimeConfig, logger: Logger): Promise<void> {
const commands = [
new SlashCommandBuilder().setName("join").setDescription("현재 들어가 있는 음성 채널에 봇을 입장시킵니다."),
new SlashCommandBuilder().setName("leave").setDescription("현재 음성 세션을 종료합니다."),
new SlashCommandBuilder().setName("status").setDescription("현재 음성 세션 상태를 확인합니다."),
new SlashCommandBuilder().setName("reset").setDescription("대화 문맥과 재생 큐를 초기화합니다."),
new SlashCommandBuilder()
.setName("say")
.setDescription("텍스트를 바로 음성으로 읽습니다.")
.addStringOption((option) =>
option.setName("text").setDescription("읽을 문장").setRequired(true).setMaxLength(400),
),
].map((command) => command.toJSON());
const client = new DiscordClient({
intents: [GatewayIntentBits.Guilds, GatewayIntentBits.GuildVoiceStates],
});
const stt = new LocalFasterWhisperSttService(config, logger);
const tts = new LocalMeloTtsService(config, logger);
const llm = new OllamaLlmService(config);
const sessions = new Map<string, GuildVoiceSession>();
await stt.warmup();
await tts.warmup();
function getVoiceChannel(interaction: ChatInputCommandInteraction): VoiceBasedChannel | null {
const member = interaction.member as GuildMember | null;
return member?.voice.channel ?? null;
}
async function registerCommands(_appClient: Client): Promise<void> {
const rest = new REST({ version: "10" }).setToken(config.DISCORD_BOT_TOKEN);
if (config.DISCORD_COMMAND_GUILD_ID) {
await rest.put(
Routes.applicationGuildCommands(config.DISCORD_APPLICATION_ID, config.DISCORD_COMMAND_GUILD_ID),
{
body: commands,
},
);
logger.info("Registered guild commands", config.DISCORD_COMMAND_GUILD_ID);
return;
}
await rest.put(Routes.applicationCommands(config.DISCORD_APPLICATION_ID), {
body: commands,
});
logger.info("Registered global commands");
}
async function createSession(interaction: ChatInputCommandInteraction): Promise<GuildVoiceSession> {
if (!interaction.guild) {
throw new Error("Guild interaction required");
}
const voiceChannel = getVoiceChannel(interaction);
if (!voiceChannel) {
throw new Error("먼저 음성 채널에 들어가 주세요.");
}
const existing = sessions.get(interaction.guild.id);
if (existing && existing.voiceChannelId === voiceChannel.id) {
existing.setTextChannel(interaction.channelId);
return existing;
}
if (existing) {
await existing.destroy();
sessions.delete(interaction.guild.id);
}
const session = await GuildVoiceSession.create({
client,
config,
logger,
guild: interaction.guild,
voiceChannel,
textChannelId: interaction.channelId,
stt,
tts,
llm,
});
sessions.set(interaction.guild.id, session);
return session;
}
async function handleJoin(interaction: ChatInputCommandInteraction): Promise<void> {
await interaction.deferReply({ ephemeral: true });
try {
const session = await createSession(interaction);
await interaction.editReply(
`음성 비서를 시작했습니다. 채널: ${session.statusSummary().split("\n")[1]?.replace("음성 채널: ", "") ?? "알 수 없음"}`,
);
} catch (error) {
const message = error instanceof Error ? error.message : "세션 생성에 실패했습니다.";
await interaction.editReply(message);
}
}
async function handleLeave(interaction: ChatInputCommandInteraction): Promise<void> {
const session = interaction.guild ? sessions.get(interaction.guild.id) : undefined;
if (!session) {
await interaction.reply({ content: "현재 활성화된 음성 세션이 없습니다.", ephemeral: true });
return;
}
await session.destroy();
sessions.delete(interaction.guildId!);
await interaction.reply({ content: "음성 세션을 종료했습니다.", ephemeral: true });
}
async function handleStatus(interaction: ChatInputCommandInteraction): Promise<void> {
const session = interaction.guild ? sessions.get(interaction.guild.id) : undefined;
if (!session) {
await interaction.reply({ content: "현재 활성화된 음성 세션이 없습니다.", ephemeral: true });
return;
}
await interaction.reply({
content: session.statusSummary(),
ephemeral: true,
});
}
async function handleReset(interaction: ChatInputCommandInteraction): Promise<void> {
const session = interaction.guild ? sessions.get(interaction.guild.id) : undefined;
if (!session) {
await interaction.reply({ content: "현재 활성화된 음성 세션이 없습니다.", ephemeral: true });
return;
}
session.clearConversation();
await interaction.reply({ content: "대화 문맥과 재생 큐를 초기화했습니다.", ephemeral: true });
}
async function handleSay(interaction: ChatInputCommandInteraction): Promise<void> {
await interaction.deferReply({ ephemeral: true });
const session = interaction.guild ? sessions.get(interaction.guild.id) : undefined;
if (!session) {
await interaction.editReply("먼저 `/join` 으로 음성 세션을 시작해 주세요.");
return;
}
const text = interaction.options.getString("text", true).trim();
await session.speakText(text);
await interaction.editReply("읽기 요청을 대기열에 추가했습니다.");
}
async function shutdown(exitCode = 0): Promise<void> {
logger.info("Shutting down");
for (const session of sessions.values()) {
await session.destroy().catch((error) => {
logger.warn("Session shutdown failed", error);
});
}
sessions.clear();
await Promise.allSettled([stt.destroy?.(), tts.destroy?.()]);
await client.destroy();
process.exit(exitCode);
}
client.once("ready", async () => {
logger.info("Discord client ready", client.user?.tag ?? "unknown");
try {
await registerCommands(client);
} catch (error) {
logger.error("Command registration failed", error);
}
});
client.on("interactionCreate", async (interaction) => {
if (!interaction.isChatInputCommand()) {
return;
}
try {
switch (interaction.commandName) {
case "join":
await handleJoin(interaction);
return;
case "leave":
await handleLeave(interaction);
return;
case "status":
await handleStatus(interaction);
return;
case "reset":
await handleReset(interaction);
return;
case "say":
await handleSay(interaction);
return;
default:
await interaction.reply({ content: "알 수 없는 명령입니다.", ephemeral: true });
}
} catch (error) {
logger.error("Interaction handler failed", error);
if (interaction.deferred || interaction.replied) {
await interaction.editReply("명령 처리 중 오류가 발생했습니다.").catch(() => null);
return;
}
await interaction.reply({ content: "명령 처리 중 오류가 발생했습니다.", ephemeral: true }).catch(() => null);
}
});
process.on("SIGINT", () => {
void shutdown(0);
});
process.on("SIGTERM", () => {
void shutdown(0);
});
await client.login(config.DISCORD_BOT_TOKEN);
}

93
src/docker-runtime.ts Normal file
View File

@@ -0,0 +1,93 @@
import { spawn } from "node:child_process";
import { constants as fsConstants } from "node:fs";
import { access } from "node:fs/promises";
import path from "node:path";
import process from "node:process";
import type { AppConfig } from "./config.js";
async function fileExists(target: string): Promise<boolean> {
try {
await access(target, fsConstants.F_OK);
return true;
} catch {
return false;
}
}
async function captureStdout(command: string, args: string[]): Promise<string | null> {
return await new Promise<string | null>((resolve) => {
const child = spawn(command, args, {
stdio: ["ignore", "pipe", "ignore"],
windowsHide: true,
});
let stdout = "";
child.stdout.on("data", (chunk: Buffer) => {
stdout += chunk.toString();
});
child.on("error", () => resolve(null));
child.on("exit", (code) => {
if (code === 0) {
resolve(stdout);
return;
}
resolve(null);
});
});
}
async function resolveWithWhere(): Promise<string | null> {
const stdout = await captureStdout("cmd.exe", ["/d", "/s", "/c", "where docker"]);
if (!stdout) {
return null;
}
const candidates = stdout
.split(/\r?\n/)
.map((line) => line.trim())
.filter((line) => line.length > 0);
for (const candidate of candidates) {
if (await fileExists(candidate)) {
return candidate;
}
}
return null;
}
export async function resolveDockerCommand(config: AppConfig): Promise<string> {
if (config.DOCKER_BIN && await fileExists(config.DOCKER_BIN)) {
return config.DOCKER_BIN;
}
if (process.platform !== "win32") {
return "docker";
}
const commonPaths = [
"C:\\Program Files\\Docker\\Docker\\resources\\bin\\docker.exe",
"C:\\Program Files\\Docker\\Docker\\resources\\bin\\docker-cli.exe",
];
for (const candidate of commonPaths) {
if (await fileExists(candidate)) {
return candidate;
}
}
const found = await resolveWithWhere();
if (found) {
return found;
}
throw new Error(
[
"Docker 실행 파일을 찾지 못했습니다.",
"VSCode를 완전히 다시 열어 PATH를 새로 고치거나,",
".env에 DOCKER_BIN=C:\\Program Files\\Docker\\Docker\\resources\\bin\\docker.exe 를 넣어주세요.",
].join(" "),
);
}

View File

@@ -1,31 +1,427 @@
import process from "node:process"; import process from "node:process";
import { createInterface } from "node:readline";
import { loadConfig, requireAssistantRuntimeConfig, requireDiscordRuntimeConfig } from "./config.js"; import { loadConfig } from "./config.js";
import { runDiscordBot } from "./discord-main.js";
import { Logger } from "./logger.js"; import { Logger } from "./logger.js";
import { printLocalAudioDevices, runLocalAssistant } from "./local-main.js"; import { printAudioDevices, spawnLoopbackCapture } from "./audio/capture.js";
import { RealtimeSegmenter } from "./audio/realtime-segmenter.js";
import { FasterWhisperSttService } from "./services/faster-whisper-stt.js";
import { MeloTtsService } from "./services/melo-tts.js";
import { OllamaLlmService } from "./services/ollama-llm.js";
const mode = process.argv[2] ?? "discord"; const mode = process.argv[2] ?? "test-stt";
const config = loadConfig();
const logger = new Logger(config.LOG_LEVEL); async function runSttTest(options: { enableLlm: boolean; enableTts: boolean }): Promise<void> {
const config = loadConfig();
const logger = new Logger(config.DEBUG ? config.LOG_LEVEL : "error");
const stt = new FasterWhisperSttService(config, logger);
const llm = options.enableLlm ? new OllamaLlmService(config, logger) : null;
let tts = options.enableTts && config.TTS_ENABLED ? new MeloTtsService(config, logger) : null;
let capture = null as ReturnType<typeof spawnLoopbackCapture> | null;
let shuttingDown: Promise<void> | null = null;
let suppressCapture = false;
let receivedChunks = 0;
let receivedBytes = 0;
let maxPeak = 0;
let lastChunkAt = 0;
let lastLevelLogAt = 0;
let sawSpeechStart = false;
let emittedSegmentCount = 0;
const shutdown = async (exitCode: number, reason: string, error?: unknown): Promise<void> => {
if (shuttingDown) {
return await shuttingDown;
}
shuttingDown = (async () => {
if (error) {
logger.error(`Shutting down: ${reason}`, error);
} else {
logger.info("Shutting down", reason);
}
if (capture && !capture.killed && capture.exitCode === null) {
capture.kill("SIGTERM");
}
await stt.destroy().catch((destroyError) => {
logger.warn("STT destroy failed", destroyError);
});
if (tts) {
await tts.destroy().catch((destroyError) => {
logger.warn("TTS destroy failed", destroyError);
});
}
})();
await shuttingDown;
process.exit(exitCode);
};
process.once("SIGINT", () => {
void shutdown(0, "SIGINT");
});
process.once("SIGTERM", () => {
void shutdown(0, "SIGTERM");
});
process.once("uncaughtException", (error) => {
void shutdown(1, "uncaughtException", error);
});
process.once("unhandledRejection", (reason) => {
void shutdown(1, "unhandledRejection", reason);
});
process.once("exit", () => {
if (capture && !capture.killed && capture.exitCode === null) {
capture.kill("SIGKILL");
}
void stt.destroy();
if (tts) {
void tts.destroy();
}
});
console.log("STT 준비중...");
await stt.warmup();
logger.info("STT warmup finished");
console.log("STT 준비 완료");
if (llm) {
console.log("LLM 준비중...");
await llm.warmup();
logger.info("LLM warmup finished");
console.log("LLM 준비 완료");
}
if (tts) {
console.log("TTS 준비중...");
try {
await tts.warmup();
logger.info("TTS warmup finished", {
image: config.TTS_IMAGE,
language: config.TTS_LANGUAGE,
speaker: config.TTS_SPEAKER,
});
console.log("TTS 준비 완료");
} catch (error) {
logger.warn("TTS warmup failed", error);
console.log("TTS 비활성화: bun run setup:tts 를 먼저 실행하세요.");
tts = null;
}
}
const transcriptionQueue: Array<{ pcm16: Buffer; queuedAt: number; index: number }> = [];
let transcribing = false;
let nextSegmentIndex = 1;
const runNext = async (): Promise<void> => {
if (transcribing) {
return;
}
const next = transcriptionQueue.shift();
if (!next) {
return;
}
transcribing = true;
try {
const startedAt = Date.now();
const text = await stt.transcribePcm16(next.pcm16);
logger.info("STT latency", {
index: next.index,
wait_ms: startedAt - next.queuedAt,
transcribe_ms: Date.now() - startedAt,
});
if (!text) {
logger.info("빈 전사 결과");
} else {
logger.info("Transcript", { index: next.index, text });
if (config.DEBUG) {
if (config.DEBUG_TRANSCRIPTS) {
console.log(`\n[text] ${text}\n`);
}
} else {
console.log(`사용자> ${text}`);
}
if (llm) {
const assessmentStartedAt = Date.now();
const assessment = await llm.assessReplyNeed(text);
logger.info("Reply assessment", {
index: next.index,
should_reply: assessment.shouldReply,
likely_needs_lookup: assessment.likelyNeedsLookup,
reason: assessment.reason,
assessment_ms: Date.now() - assessmentStartedAt,
});
if (!assessment.shouldReply) {
if (config.DEBUG) {
console.log(`[skip] ${assessment.reason}\n`);
}
return;
}
const llmStartedAt = Date.now();
const reply = await llm.generateReply(text, {
onProgress: (message) => {
if (config.DEBUG) {
console.log(`[assistant] ${message}`);
return;
}
console.log(`답변> ${message}`);
},
});
logger.info("LLM latency", {
index: next.index,
llm_ms: Date.now() - llmStartedAt,
});
logger.info("LLM reply", { index: next.index, text: reply });
if (config.DEBUG) {
if (config.DEBUG_TRANSCRIPTS) {
console.log(`[assistant] ${reply}\n`);
}
} else {
console.log(`답변> ${reply}`);
}
if (tts) {
suppressCapture = true;
segmenter.reset();
try {
await tts.speak(reply);
} catch (error) {
logger.warn("TTS playback failed", error);
} finally {
suppressCapture = false;
sawSpeechStart = false;
maxPeak = 0;
}
}
}
}
} catch (error) {
logger.error("STT/LLM failed", error);
} finally {
transcribing = false;
void runNext();
}
};
const segmenter = new RealtimeSegmenter({
preRollSamples: config.SEGMENT_PREROLL_SAMPLES,
speechStartThreshold: config.SEGMENT_START_THRESHOLD,
speechContinueThreshold: config.SEGMENT_CONTINUE_THRESHOLD,
speechStartFrames: config.SEGMENT_START_FRAMES,
speechEndFrames: config.SEGMENT_END_FRAMES,
minSpeechSamples: config.SEGMENT_MIN_SPEECH_SAMPLES,
maxSpeechSamples: config.SEGMENT_MAX_SPEECH_SAMPLES,
onLevel: (peak) => {
if (peak > maxPeak) {
maxPeak = peak;
}
const now = Date.now();
if (now - lastLevelLogAt >= 3000) {
lastLevelLogAt = now;
logger.info("Audio input heartbeat", {
chunks: receivedChunks,
bytes: receivedBytes,
peak: maxPeak,
speech_started: sawSpeechStart,
emitted_segments: emittedSegmentCount,
});
maxPeak = 0;
}
},
onSpeechStart: (peak) => {
sawSpeechStart = true;
logger.info("Speech start detected", { peak });
},
onSpeechDiscarded: (samples) => {
logger.info("Discarded short speech segment", { samples });
},
onSpeechReady: (samples) => {
emittedSegmentCount += 1;
logger.info("Speech segment ready", {
index: emittedSegmentCount,
samples,
ms: Math.round((samples / 16000) * 1000),
});
},
onSegment: (pcm16) => {
const index = nextSegmentIndex++;
transcriptionQueue.push({
pcm16,
queuedAt: Date.now(),
index,
});
logger.info("Queued segment for STT", {
index,
queue: transcriptionQueue.length,
bytes: pcm16.length,
});
void runNext();
},
});
capture = spawnLoopbackCapture(config, logger);
capture.stdout.on("data", (chunk: Buffer) => {
receivedChunks += 1;
receivedBytes += chunk.length;
lastChunkAt = Date.now();
if (suppressCapture) {
return;
}
segmenter.pushChunk(chunk);
});
capture.stderr.on("data", (chunk: Buffer) => {
const text = chunk.toString().trim();
if (text) {
logger.debug("[capture]", text);
}
});
capture.on("error", (error) => {
void shutdown(1, "capture-error", error);
});
capture.on("exit", (code, signal) => {
logger.warn("capture exited", { code, signal });
if (!shuttingDown) {
void shutdown(1, "capture-exit");
}
});
if (config.DEBUG) {
if (options.enableLlm && options.enableTts) {
console.log("실시간 출력장치 STT+LLM+TTS 테스트를 시작합니다. Ctrl+C 로 종료합니다.");
} else if (options.enableLlm) {
console.log("실시간 출력장치 STT+LLM 테스트를 시작합니다. Ctrl+C 로 종료합니다.");
} else {
console.log("실시간 출력장치 STT 테스트를 시작합니다. Ctrl+C 로 종료합니다.");
}
console.log(`source: ${config.AUDIO_SOURCE ?? "unset"}`);
console.log(`model: ${config.WHISPER_MODEL}`);
console.log(`language: ${config.WHISPER_LANGUAGE}`);
console.log(`beam: ${config.WHISPER_BEAM_SIZE}`);
if (options.enableLlm) {
console.log(`llm: ${config.OLLAMA_MODEL}`);
}
if (options.enableTts) {
console.log(`tts: ${config.TTS_IMAGE}`);
}
}
setInterval(() => {
const now = Date.now();
if (lastChunkAt === 0 && !shuttingDown) {
logger.warn("아직 캡처 PCM 데이터가 들어오지 않았습니다. AUDIO_SOURCE 가 잘못됐거나 loopback 입력이 아닌 장치일 수 있습니다.");
return;
}
if (lastChunkAt > 0 && now - lastChunkAt >= 5000 && !shuttingDown) {
logger.warn("최근 5초 동안 새 PCM chunk 가 들어오지 않았습니다.");
}
}, 5000).unref();
}
async function runLlmCli(): Promise<void> {
const config = loadConfig();
const logger = new Logger(config.DEBUG ? config.LOG_LEVEL : "error");
const llm = new OllamaLlmService(config, logger);
await llm.warmup();
console.log(`LLM CLI 테스트를 시작합니다. model=${config.OLLAMA_MODEL}`);
console.log("/exit 로 종료, /reset 으로 대화 초기화");
const rl = createInterface({
input: process.stdin,
output: process.stdout,
prompt: "you> ",
});
rl.prompt();
rl.on("line", async (line) => {
const text = line.trim();
if (!text) {
rl.prompt();
return;
}
if (text === "/exit") {
rl.close();
return;
}
if (text === "/reset") {
llm.resetConversation();
console.log("assistant> 대화 문맥을 초기화했습니다.");
rl.prompt();
return;
}
try {
const startedAt = Date.now();
const reply = await llm.generateReply(text, {
onProgress: (message) => {
console.log(`assistant> ${message}`);
},
});
logger.info("LLM latency", {
llm_ms: Date.now() - startedAt,
});
console.log(`assistant> ${reply}`);
} catch (error) {
console.error(error instanceof Error ? error.message : String(error));
}
rl.prompt();
});
rl.on("close", () => {
process.exit(0);
});
}
async function runTtsTest(): Promise<void> {
const text = process.argv.slice(3).join(" ").trim() || "안녕하세요. 로컬 티티에스 테스트입니다.";
const config = loadConfig();
const logger = new Logger(config.DEBUG ? config.LOG_LEVEL : "error");
const tts = new MeloTtsService(config, logger);
console.log("TTS 준비중...");
await tts.warmup();
console.log("TTS 준비 완료");
console.log(`재생 문장: ${text}`);
await tts.speak(text);
}
async function main(): Promise<void> { async function main(): Promise<void> {
switch (mode) { switch (mode) {
case "discord": case "devices":
await runDiscordBot(requireDiscordRuntimeConfig(config), logger); await printAudioDevices();
return; return;
case "local": case "test-stt":
await runLocalAssistant(requireAssistantRuntimeConfig(config), logger); await runSttTest({ enableLlm: false, enableTts: false });
return; return;
case "local-devices": case "test-sttllm":
await printLocalAudioDevices(); await runSttTest({ enableLlm: true, enableTts: false });
return;
case "test-all":
await runSttTest({ enableLlm: true, enableTts: true });
return;
case "test-llm":
await runLlmCli();
return;
case "test-tts":
await runTtsTest();
return; return;
default: default:
throw new Error(`알 수 없는 실행 모드입니다: ${mode}. 사용 가능: discord, local, local-devices`); throw new Error(`알 수 없는 실행 모드입니다: ${mode}. 사용 가능: test-stt, test-sttllm, test-all, test-llm, test-tts, devices`);
} }
} }
void main().catch((error) => { void main().catch((error) => {
logger.error("Fatal startup error", error); console.error(error instanceof Error ? error.message : String(error));
process.exit(1); process.exit(1);
}); });

View File

@@ -1,111 +0,0 @@
import { spawn } from "node:child_process";
import process from "node:process";
import type { AssistantRuntimeConfig } from "./config.js";
import { Logger } from "./logger.js";
import { LocalVoiceSession } from "./audio/local-voice-session.js";
import { requireFfmpegPath } from "./audio/ffmpeg-path.js";
import { LocalFasterWhisperSttService } from "./services/local-stt.js";
import { LocalMeloTtsService } from "./services/local-tts.js";
import { OllamaLlmService } from "./services/ollama-llm.js";
export async function printLocalAudioDevices(): Promise<void> {
if (process.platform === "win32") {
const ffmpegPath = requireFfmpegPath();
console.log("\n=== ffmpeg dshow audio devices ===");
await new Promise<void>((resolve, reject) => {
const child = spawn(
ffmpegPath,
["-hide_banner", "-list_devices", "true", "-f", "dshow", "-i", "dummy"],
{
stdio: ["ignore", "ignore", "inherit"],
},
);
child.on("exit", (code) => {
if (code === 0 || code === 1) {
resolve();
return;
}
reject(new Error(`ffmpeg exited with code ${code ?? "null"}`));
});
child.on("error", reject);
});
console.log("\n위 목록의 오디오 장치 이름을 `LOCAL_AUDIO_SOURCE` 에 그대로 넣으면 됩니다.");
console.log("Windows 로컬 모드는 현재 출력 장치 직접 선택 대신 시스템 기본 출력 장치를 사용합니다.");
return;
}
const runs = [
{
label: "wpctl status",
args: ["status"],
},
{
label: "wpctl status -n",
args: ["status", "-n"],
},
] as const;
for (const run of runs) {
console.log(`\n=== ${run.label} ===`);
await new Promise<void>((resolve, reject) => {
const child = spawn("wpctl", run.args, {
stdio: ["ignore", "inherit", "inherit"],
});
child.on("exit", (code) => {
if (code === 0) {
resolve();
return;
}
reject(new Error(`wpctl exited with code ${code ?? "null"}`));
});
child.on("error", reject);
});
}
}
export async function runLocalAssistant(config: AssistantRuntimeConfig, logger: Logger): Promise<void> {
const stt = new LocalFasterWhisperSttService(config, logger);
const tts = new LocalMeloTtsService(config, logger);
const llm = new OllamaLlmService(config);
await stt.warmup();
await tts.warmup();
const session = new LocalVoiceSession({
config,
logger,
stt,
tts,
llm,
});
console.log(session.statusSummary());
console.log("로컬 음성 테스트를 시작합니다. Ctrl+C 로 종료합니다.");
if (process.platform === "win32") {
console.log("Windows 로컬 모드는 현재 시스템 기본 출력 장치로 재생됩니다.");
}
if (config.DEBUG_TEXT_EVENTS) {
console.log("텍스트 로그 출력이 켜져 있습니다.");
}
const shutdown = async (exitCode = 0) => {
await session.destroy().catch((error) => {
logger.warn("Local session shutdown failed", error);
});
await Promise.allSettled([stt.destroy?.(), tts.destroy?.()]);
process.exit(exitCode);
};
process.on("SIGINT", () => {
void shutdown(0);
});
process.on("SIGTERM", () => {
void shutdown(0);
});
await session.start();
}

20
src/prompt-loader.ts Normal file
View File

@@ -0,0 +1,20 @@
import { readFileSync } from "node:fs";
import path from "node:path";
const cache = new Map<string, string>();
export function loadPrompt(name: string): string {
const cached = cache.get(name);
if (cached) {
return cached;
}
const promptPath = path.resolve(process.cwd(), "prompts", name);
const content = readFileSync(promptPath, "utf8").trim();
if (!content) {
throw new Error(`프롬프트 파일이 비어 있습니다: ${promptPath}`);
}
cache.set(name, content);
return content;
}

View File

@@ -1,90 +1,203 @@
import { existsSync } from "node:fs"; import { spawn } from "node:child_process";
import { spawnSync } from "node:child_process"; import { constants as fsConstants } from "node:fs";
import { access } from "node:fs/promises";
import path from "node:path"; import path from "node:path";
import process from "node:process";
import type { AppConfig } from "./config.js"; import type { AppConfig } from "./config.js";
export interface PythonLaunch { export interface PythonCommandSpec {
command: string; command: string;
args: string[]; args: string[];
source: "venv" | "configured" | "system"; viaCmdShell?: boolean;
} }
function splitCommandSpec(spec: string): string[] { function shouldUseCmdShell(command: string): boolean {
return spec.match(/(?:[^\s"]+|"[^"]*")+/g)?.map((part) => part.replace(/^"|"$/g, "")) ?? []; if (process.platform !== "win32") {
return false;
}
const lower = command.toLowerCase();
return !path.isAbsolute(command) || lower.endsWith(".bat") || lower.endsWith(".cmd");
} }
function canRun(command: string, args: string[]): boolean { function splitCommand(command: string): string[] {
const result = spawnSync(command, [...args, "--version"], { const parts = command.match(/(?:[^\s"]+|"[^"]*")+/g) ?? [];
encoding: "utf8", return parts.map((part) => part.replace(/^"(.*)"$/, "$1"));
}
function quoteWindowsCmdArg(value: string): string {
if (!/[ \t"&()<>^|]/.test(value)) {
return value;
}
return `"${value.replace(/"/g, '""')}"`;
}
function buildWindowsCommandLine(parts: string[]): string {
return parts.map((part) => quoteWindowsCmdArg(part)).join(" ");
}
export function buildPythonInvocation(spec: PythonCommandSpec, extraArgs: string[]): PythonCommandSpec {
if (process.platform === "win32" && spec.viaCmdShell) {
return {
command: "cmd.exe",
args: ["/d", "/s", "/c", buildWindowsCommandLine([spec.command, ...spec.args, ...extraArgs])],
};
}
return {
command: spec.command,
args: [...spec.args, ...extraArgs],
};
}
async function canRun(command: string, args: string[], viaCmdShell = false): Promise<boolean> {
const invocation = viaCmdShell
? {
command: "cmd.exe",
args: ["/d", "/s", "/c", buildWindowsCommandLine([command, ...args, "--version"])],
}
: {
command,
args: [...args, "--version"],
};
return await new Promise<boolean>((resolve) => {
const child = spawn(invocation.command, invocation.args, {
stdio: ["ignore", "ignore", "ignore"],
windowsHide: true,
});
child.on("error", () => {
resolve(false);
});
child.on("exit", (code) => {
resolve(code === 0);
});
}); });
return result.status === 0;
} }
export function resolveLocalAiVenvPath(config: AppConfig): string { async function captureStdout(command: string, args: string[]): Promise<string | null> {
return path.resolve(process.cwd(), config.LOCAL_AI_VENV_PATH); return await new Promise<string | null>((resolve) => {
const child = spawn(command, args, {
stdio: ["ignore", "pipe", "ignore"],
windowsHide: true,
});
let stdout = "";
child.stdout.on("data", (chunk: Buffer) => {
stdout += chunk.toString();
});
child.on("error", () => {
resolve(null);
});
child.on("exit", (code) => {
if (code === 0) {
resolve(stdout);
return;
}
resolve(null);
});
});
} }
export function resolveLocalAiCachePath(config: AppConfig): string { async function resolveWindowsExecutable(name: string): Promise<string | null> {
return path.resolve(process.cwd(), config.LOCAL_AI_CACHE_DIR); const stdout = await captureStdout("cmd.exe", ["/d", "/s", "/c", `where ${name}`]);
} if (!stdout) {
return null;
export function resolveVenvPythonPath(config: AppConfig): string {
const venvPath = resolveLocalAiVenvPath(config);
return process.platform === "win32"
? path.join(venvPath, "Scripts", "python.exe")
: path.join(venvPath, "bin", "python");
}
export function resolvePythonLaunch(config: AppConfig, options?: { preferVenv?: boolean }): PythonLaunch {
const preferVenv = options?.preferVenv ?? true;
const venvPython = resolveVenvPythonPath(config);
if (preferVenv && existsSync(venvPython)) {
return {
command: venvPython,
args: [],
source: "venv",
};
} }
const configured = config.LOCAL_AI_PYTHON ? splitCommandSpec(config.LOCAL_AI_PYTHON) : []; const candidates = stdout
if (configured.length > 0 && canRun(configured[0]!, configured.slice(1))) { .split(/\r?\n/)
return { .map((line) => line.trim())
command: configured[0]!, .filter((line) => line.length > 0);
args: configured.slice(1),
source: "configured", for (const candidate of candidates) {
}; try {
await access(candidate, fsConstants.F_OK);
return candidate;
} catch {
// ignore
}
} }
const candidates = return null;
process.platform === "win32" }
? [
["py", "-3"],
["python"],
["python3"],
]
: [
["python3"],
["python"],
];
for (const [command, ...args] of candidates) { async function fileExists(target: string): Promise<boolean> {
if (canRun(command, args)) { try {
await access(target, fsConstants.X_OK);
return true;
} catch {
return false;
}
}
export async function resolvePythonCommand(config: AppConfig): Promise<{ command: string; args: string[] }> {
return await resolveWorkerPythonCommand(config);
}
export async function resolveBasePythonCommand(config: AppConfig): Promise<PythonCommandSpec> {
const configured = config.LOCAL_AI_PYTHON?.trim();
if (configured) {
const [command, ...args] = splitCommand(configured);
if (!command) {
throw new Error("LOCAL_AI_PYTHON 값이 비어 있습니다.");
}
return { return {
command, command,
args, args,
source: "system", viaCmdShell: shouldUseCmdShell(command),
}; };
} }
const venvPath = resolveVenvPythonPath(config);
if (await fileExists(venvPath)) {
return { command: venvPath, args: [] };
}
if (process.platform === "win32") {
return {
command: "python",
args: [],
viaCmdShell: true,
};
}
const unixCandidates = [
{ command: "python3", args: [] as string[] },
{ command: "python", args: [] as string[] },
];
for (const candidate of unixCandidates) {
if (await canRun(candidate.command, candidate.args)) {
return candidate;
}
} }
throw new Error( throw new Error("사용 가능한 Python 실행기를 찾지 못했습니다. `python3 --version` 또는 `python --version` 이 먼저 동작해야 합니다.");
[ }
"Python 실행 파일을 찾지 못했습니다.",
"1. Python 3.11 이상을 설치", export async function resolveWorkerPythonCommand(config: AppConfig): Promise<PythonCommandSpec> {
"2. 필요하면 `.env` 에 `LOCAL_AI_PYTHON=python` 또는 `LOCAL_AI_PYTHON=py -3` 설정", const venvPath = resolveVenvPythonPath(config);
"3. 그 다음 `bun run setup:local-ai` 실행", if (await fileExists(venvPath)) {
].join("\n"), return { command: venvPath, args: [] };
); }
return await resolveBasePythonCommand(config);
}
export function resolveVenvPythonPath(config: AppConfig): string {
const root = path.resolve(process.cwd(), config.LOCAL_AI_VENV_PATH);
if (process.platform === "win32") {
return path.join(root, "Scripts", "python.exe");
}
return path.join(root, "bin", "python");
}
export function resolveWorkerScript(name: string): string {
return path.resolve(process.cwd(), "python", name);
} }

View File

@@ -0,0 +1,104 @@
import { spawn } from "node:child_process";
import { rm } from "node:fs/promises";
import os from "node:os";
import path from "node:path";
import process from "node:process";
import { randomUUID } from "node:crypto";
async function run(command: string, args: string[], env?: NodeJS.ProcessEnv): Promise<void> {
await new Promise<void>((resolve, reject) => {
const child = spawn(command, args, {
stdio: ["ignore", "inherit", "inherit"],
windowsHide: true,
env,
});
child.on("error", reject);
child.on("exit", (code) => {
if (code === 0) {
resolve();
return;
}
reject(new Error(`${command} ${args.join(" ")} exited with code ${code ?? "null"}`));
});
});
}
function buildAtempoFilter(rate: number): string {
const filters: string[] = [];
let remaining = rate;
while (remaining > 2) {
filters.push("atempo=2.0");
remaining /= 2;
}
while (remaining < 0.5) {
filters.push("atempo=0.5");
remaining /= 0.5;
}
filters.push(`atempo=${remaining.toFixed(3)}`);
return filters.join(",");
}
async function applyPlaybackRate(filePath: string, playbackRate: number): Promise<string> {
if (Math.abs(playbackRate - 1) < 0.01) {
return filePath;
}
const targetPath = path.join(os.tmpdir(), `realtime-voice-bot-tts-${randomUUID()}.wav`);
const filter = buildAtempoFilter(playbackRate);
await run("ffmpeg", [
"-y",
"-hide_banner",
"-loglevel",
"error",
"-i",
filePath,
"-filter:a",
filter,
targetPath,
]);
return targetPath;
}
async function playNativeWavFile(filePath: string): Promise<void> {
if (process.platform === "win32") {
const env = {
...process.env,
TTS_WAV_PATH: filePath,
};
await run("powershell.exe", [
"-NoProfile",
"-NonInteractive",
"-ExecutionPolicy",
"Bypass",
"-Command",
[
"$path = $env:TTS_WAV_PATH",
"$player = New-Object System.Media.SoundPlayer $path",
"$player.Load()",
"$player.PlaySync()",
].join("; "),
], env);
return;
}
throw new Error(`지원하지 않는 플랫폼입니다: ${process.platform}`);
}
export async function playWavFile(filePath: string, playbackRate = 1): Promise<void> {
const playablePath = await applyPlaybackRate(filePath, playbackRate);
try {
await playNativeWavFile(playablePath);
} finally {
if (playablePath !== filePath) {
await rm(playablePath, { force: true }).catch(() => undefined);
}
}
}

View File

@@ -1,77 +0,0 @@
export interface ConversationTurn {
role: "user" | "assistant";
text: string;
speakerId?: string;
speakerName?: string;
createdAt: number;
}
export interface UserUtterance {
speakerId: string;
speakerName: string;
text: string;
}
export class ConversationMemory {
private readonly turns: ConversationTurn[] = [];
constructor(private readonly maxTurns: number) {}
addUserTurn(utterance: UserUtterance): void {
this.turns.push({
role: "user",
text: utterance.text,
speakerId: utterance.speakerId,
speakerName: utterance.speakerName,
createdAt: Date.now(),
});
this.trim();
}
addAssistantTurn(text: string): void {
this.turns.push({
role: "assistant",
text,
createdAt: Date.now(),
});
this.trim();
}
clear(): void {
this.turns.splice(0, this.turns.length);
}
recentTurns(): ConversationTurn[] {
return [...this.turns];
}
buildPrompt(currentUtterance: UserUtterance): string {
const recent = this.turns
.slice(-this.maxTurns)
.map((turn) => {
if (turn.role === "assistant") {
return `[assistant]\n${turn.text}`;
}
return `[user speaker_id=${turn.speakerId ?? "unknown"} speaker_name=${turn.speakerName ?? "unknown"}]\n${turn.text}`;
})
.join("\n\n");
const historyBlock = recent.length > 0 ? recent : "(이전 대화 없음)";
return [
"최근 대화:",
historyBlock,
"",
"이번 발화:",
`[user speaker_id=${currentUtterance.speakerId} speaker_name=${currentUtterance.speakerName}]`,
currentUtterance.text,
].join("\n");
}
private trim(): void {
const overflow = this.turns.length - this.maxTurns;
if (overflow > 0) {
this.turns.splice(0, overflow);
}
}
}

View File

@@ -0,0 +1,40 @@
import type { AppConfig } from "../config.js";
import type { Logger } from "../logger.js";
import { PythonJsonWorker } from "./python-json-worker.js";
interface PingResult {
model: string;
device: string;
compute_type: string;
}
interface TranscribeResult {
text: string;
}
export class FasterWhisperSttService {
private readonly worker: PythonJsonWorker;
constructor(
private readonly config: AppConfig,
private readonly logger: Logger,
) {
this.worker = new PythonJsonWorker(config, logger, "loopback_stt_worker.py", "faster-whisper");
}
async warmup(): Promise<void> {
const result = await this.worker.request<PingResult>("ping", {});
this.logger.info("STT worker ready", result);
}
async transcribePcm16(pcm16: Buffer): Promise<string> {
const result = await this.worker.request<TranscribeResult>("transcribe", {
pcm16_base64: pcm16.toString("base64"),
});
return result.text.trim();
}
async destroy(): Promise<void> {
await this.worker.destroy();
}
}

View File

@@ -1,5 +0,0 @@
import type { ConversationMemory, UserUtterance } from "./conversation.js";
export interface LlmService {
generateReply(memory: ConversationMemory, utterance: UserUtterance): Promise<string>;
}

View File

@@ -1,43 +0,0 @@
import type { AssistantRuntimeConfig } from "../config.js";
import type { Logger } from "../logger.js";
import { PythonJsonWorker } from "./python-json-worker.js";
import type { SttService } from "./stt.js";
interface TranscribeResult {
text?: string;
}
export class LocalFasterWhisperSttService implements SttService {
private readonly worker: PythonJsonWorker;
constructor(private readonly config: AssistantRuntimeConfig, logger: Logger) {
this.worker = new PythonJsonWorker(config, logger, "local_stt_worker.py", "local-stt", {
LOCAL_STT_MODEL: config.LOCAL_STT_MODEL,
LOCAL_STT_DEVICE: config.LOCAL_STT_DEVICE,
LOCAL_STT_COMPUTE_TYPE: config.LOCAL_STT_COMPUTE_TYPE,
LOCAL_STT_BEAM_SIZE: String(config.LOCAL_STT_BEAM_SIZE),
});
}
async warmup(): Promise<void> {
await this.worker.request("ping", {});
}
async transcribePcm16(pcm16MonoAudio: Buffer): Promise<string | null> {
if (pcm16MonoAudio.byteLength === 0) {
return null;
}
const result = await this.worker.request<TranscribeResult>("transcribe", {
audio_base64: pcm16MonoAudio.toString("base64"),
language: this.config.BOT_DEFAULT_LANGUAGE,
});
const transcript = result.text?.trim() ?? "";
return transcript.length > 0 ? transcript : null;
}
async destroy(): Promise<void> {
await this.worker.destroy();
}
}

View File

@@ -1,94 +0,0 @@
import { Readable } from "node:stream";
import prism from "prism-media";
import type { AssistantRuntimeConfig } from "../config.js";
import type { Logger } from "../logger.js";
import { resolveFfmpegPath } from "../audio/ffmpeg-path.js";
import { PythonJsonWorker } from "./python-json-worker.js";
import type { PreparedSpeechAudio, TtsService } from "./tts.js";
interface SynthesizeResult {
wav_base64?: string;
}
export class LocalMeloTtsService implements TtsService {
private readonly worker: PythonJsonWorker;
constructor(config: AssistantRuntimeConfig, logger: Logger) {
const resolvedFfmpegPath = resolveFfmpegPath();
if (resolvedFfmpegPath && !process.env.FFMPEG_PATH) {
process.env.FFMPEG_PATH = resolvedFfmpegPath;
}
this.worker = new PythonJsonWorker(config, logger, "local_tts_worker.py", "local-tts", {
LOCAL_TTS_LANGUAGE: config.LOCAL_TTS_LANGUAGE,
LOCAL_TTS_SPEAKER: config.LOCAL_TTS_SPEAKER,
LOCAL_TTS_DEVICE: config.LOCAL_TTS_DEVICE,
LOCAL_TTS_SPEED: String(config.LOCAL_TTS_SPEED),
});
}
async warmup(): Promise<void> {
await this.worker.request("ping", {});
}
async preparePlayback(text: string, signal?: AbortSignal): Promise<PreparedSpeechAudio> {
const result = await this.worker.request<SynthesizeResult>(
"synthesize",
{
text,
},
signal,
);
const wavBase64 = result.wav_base64;
if (!wavBase64) {
throw new Error("로컬 TTS가 빈 오디오를 반환했습니다.");
}
const input = Readable.from([Buffer.from(wavBase64, "base64")]);
const ffmpeg = new prism.FFmpeg({
args: [
"-analyzeduration",
"0",
"-loglevel",
"0",
"-i",
"pipe:0",
"-f",
"s16le",
"-ar",
"48000",
"-ac",
"2",
"pipe:1",
],
});
if (signal) {
signal.addEventListener(
"abort",
() => {
input.destroy();
ffmpeg.destroy();
},
{ once: true },
);
}
input.pipe(ffmpeg);
return {
stream: ffmpeg,
dispose: () => {
input.destroy();
ffmpeg.destroy();
},
};
}
async destroy(): Promise<void> {
await this.worker.destroy();
}
}

364
src/services/melo-tts.ts Normal file
View File

@@ -0,0 +1,364 @@
import { spawn, type ChildProcessWithoutNullStreams } from "node:child_process";
import { randomUUID } from "node:crypto";
import { mkdir, rm } from "node:fs/promises";
import { once } from "node:events";
import path from "node:path";
import { createInterface } from "node:readline";
import type { AppConfig } from "../config.js";
import { resolveDockerCommand } from "../docker-runtime.js";
import type { Logger } from "../logger.js";
import { playWavFile } from "./audio-playback.js";
async function run(command: string, args: string[], stdio: "ignore" | "inherit" = "ignore"): Promise<void> {
const env = { ...process.env };
if (path.isAbsolute(command)) {
const dockerBinDir = path.dirname(command);
const currentPath = env.PATH ?? env.Path ?? "";
env.PATH = `${dockerBinDir}${path.delimiter}${currentPath}`;
}
await new Promise<void>((resolve, reject) => {
const child = spawn(command, args, {
stdio: ["ignore", stdio, "inherit"],
windowsHide: true,
shell: process.platform === "win32" && !path.isAbsolute(command),
env,
});
child.on("error", (error) => {
if ((error as NodeJS.ErrnoException).code === "ENOENT" && command === "docker") {
reject(new Error("Docker를 찾지 못했습니다. Docker Desktop을 설치하고 실행한 뒤 다시 시도하세요."));
return;
}
reject(error);
});
child.on("exit", (code) => {
if (code === 0) {
resolve();
return;
}
reject(new Error(`${command} ${args.join(" ")} exited with code ${code ?? "null"}`));
});
});
}
interface RpcSuccess<T> {
id: string;
result: T;
}
interface RpcFailure {
id: string;
error: string;
}
type RpcResponse<T> = RpcSuccess<T> | RpcFailure;
function isFailure<T>(value: RpcResponse<T>): value is RpcFailure {
return "error" in value;
}
interface TtsPingResult {
language: string;
speaker: string;
speaker_id: number;
device: string;
speed: number;
sdp_ratio: number;
noise_scale: number;
noise_scale_w: number;
speaker_count: number;
}
export class MeloTtsService {
private processRef: ChildProcessWithoutNullStreams | null = null;
private shuttingDown = false;
private warmedUp = false;
private readonly pending = new Map<
string,
{
resolve: (value: unknown) => void;
reject: (reason?: unknown) => void;
}
>();
private nextId = 1;
constructor(
private readonly config: AppConfig,
private readonly logger: Logger,
) {}
async warmup(): Promise<void> {
if (this.warmedUp) {
return;
}
await mkdir(path.resolve(process.cwd(), this.config.TTS_CACHE_DIR), { recursive: true });
await mkdir(path.resolve(process.cwd(), this.config.TTS_OUTPUT_DIR), { recursive: true });
const docker = await resolveDockerCommand(this.config);
await run(docker, ["--version"]);
await run(docker, ["image", "inspect", this.config.TTS_IMAGE]);
await this.start();
const result = await this.request<TtsPingResult>("ping", {});
this.logger.info("TTS worker ready", result);
const warmupFileName = `warmup-${randomUUID()}.wav`;
const warmupHostPath = path.resolve(process.cwd(), this.config.TTS_OUTPUT_DIR, warmupFileName);
try {
await this.request("synthesize", {
text: "안녕하세요. 로컬 티티에스 준비 테스트입니다.",
output_path: `/work/output/${warmupFileName}`,
});
} finally {
await rm(warmupHostPath, { force: true }).catch(() => undefined);
}
this.warmedUp = true;
}
async speak(text: string): Promise<void> {
const trimmed = this.normalizeText(text);
if (!trimmed) {
return;
}
const fileName = `tts-${Date.now()}-${randomUUID()}.wav`;
const targetPath = path.resolve(process.cwd(), this.config.TTS_OUTPUT_DIR, fileName);
try {
await this.synthesizeToFile(trimmed, targetPath, fileName);
await playWavFile(targetPath, this.config.TTS_PLAYBACK_RATE);
} finally {
await rm(targetPath, { force: true }).catch(() => undefined);
}
}
async synthesizeToFile(text: string, targetPath: string, fileName?: string): Promise<void> {
await this.warmup();
const resolvedFileName = fileName ?? path.basename(targetPath);
this.logger.info("Starting MeloTTS synthesis", {
image: this.config.TTS_IMAGE,
language: this.config.TTS_LANGUAGE,
speaker: this.config.TTS_SPEAKER,
speed: this.config.TTS_SPEED,
playback_rate: this.config.TTS_PLAYBACK_RATE,
sdp_ratio: this.config.TTS_SDP_RATIO,
noise_scale: this.config.TTS_NOISE_SCALE,
noise_scale_w: this.config.TTS_NOISE_SCALE_W,
device: this.config.TTS_DEVICE,
});
await this.request("synthesize", {
text,
output_path: `/work/output/${resolvedFileName}`,
});
}
async destroy(): Promise<void> {
if (!this.processRef) {
return;
}
const child = this.processRef;
this.shuttingDown = true;
try {
child.stdin.end();
} catch {
// ignore
}
if (!child.killed && child.exitCode === null) {
child.kill("SIGTERM");
}
const timedWait = Promise.race([
once(child, "exit"),
new Promise<null>((resolve) => setTimeout(() => resolve(null), 1500)),
]);
await timedWait;
if (child.exitCode === null && !child.killed) {
child.kill("SIGKILL");
await once(child, "exit").catch(() => null);
}
this.processRef = null;
this.shuttingDown = false;
this.warmedUp = false;
}
private normalizeText(input: string): string {
const collapsed = input
.replace(/[`*_#>\[\]\(\)]/g, " ")
.replace(/\s+/g, " ")
.trim();
if (!collapsed) {
return "";
}
if (/[.!?…]$/.test(collapsed)) {
return collapsed;
}
return `${collapsed}.`;
}
private async start(): Promise<void> {
if (this.processRef) {
return;
}
if (this.shuttingDown) {
throw new Error("tts worker is shutting down");
}
const docker = await resolveDockerCommand(this.config);
const outputDir = path.resolve(process.cwd(), this.config.TTS_OUTPUT_DIR);
const cacheDir = path.resolve(process.cwd(), this.config.TTS_CACHE_DIR);
await mkdir(outputDir, { recursive: true });
await mkdir(cacheDir, { recursive: true });
const args = [
"run",
"--rm",
"-i",
"-v",
`${outputDir}:/work/output`,
"-v",
`${cacheDir}:/cache`,
"-e",
"HF_HOME=/cache/huggingface",
"-e",
"HF_HUB_CACHE=/cache/huggingface/hub",
"-e",
"TRANSFORMERS_CACHE=/cache/transformers",
"-e",
`TTS_LANGUAGE=${this.config.TTS_LANGUAGE}`,
"-e",
`TTS_SPEAKER=${this.config.TTS_SPEAKER}`,
"-e",
`TTS_DEVICE=${this.config.TTS_DEVICE}`,
"-e",
`TTS_SPEED=${this.config.TTS_SPEED}`,
"-e",
`TTS_SDP_RATIO=${this.config.TTS_SDP_RATIO}`,
"-e",
`TTS_NOISE_SCALE=${this.config.TTS_NOISE_SCALE}`,
"-e",
`TTS_NOISE_SCALE_W=${this.config.TTS_NOISE_SCALE_W}`,
"--entrypoint",
"python",
];
if (this.config.TTS_DEVICE !== "cpu") {
args.push("--gpus", "all");
}
args.push(
this.config.TTS_IMAGE,
"/opt/realtime-voice-bot/melo_tts_worker.py",
);
const env = { ...process.env };
if (path.isAbsolute(docker)) {
const dockerBinDir = path.dirname(docker);
const currentPath = env.PATH ?? env.Path ?? "";
env.PATH = `${dockerBinDir}${path.delimiter}${currentPath}`;
}
this.processRef = spawn(docker, args, {
stdio: ["pipe", "pipe", "pipe"],
windowsHide: true,
shell: process.platform === "win32" && !path.isAbsolute(docker),
env,
});
const rl = createInterface({
input: this.processRef.stdout,
crlfDelay: Infinity,
});
rl.on("line", (line) => {
this.handleStdoutLine(line);
});
this.processRef.stderr.on("data", (chunk: Buffer) => {
const text = chunk.toString().trim();
if (text.length > 0) {
this.logger.warn(`[melotts] ${text}`);
}
});
this.processRef.stdin.on("error", (error) => {
this.logger.debug("melotts stdin error", error);
});
this.processRef.on("exit", (code, signal) => {
const error = new Error(`melotts worker exited code=${code ?? "null"} signal=${signal ?? "null"}`);
for (const entry of this.pending.values()) {
entry.reject(error);
}
this.pending.clear();
this.processRef = null;
});
}
private async request<T>(method: string, params: Record<string, unknown>): Promise<T> {
await this.start();
if (!this.processRef) {
throw new Error("melotts worker is not running");
}
const id = String(this.nextId++);
const payload = JSON.stringify({
id,
method,
params,
});
const promise = new Promise<T>((resolve, reject) => {
this.pending.set(id, {
resolve: (value) => resolve(value as T),
reject,
});
});
this.processRef.stdin.write(`${payload}\n`);
return await promise;
}
private handleStdoutLine(line: string): void {
const trimmed = line.trim();
if (!trimmed) {
return;
}
let message: RpcResponse<unknown>;
try {
message = JSON.parse(trimmed) as RpcResponse<unknown>;
} catch (error) {
this.logger.warn("melotts stdout parse failed", error);
return;
}
const pending = this.pending.get(message.id);
if (!pending) {
return;
}
this.pending.delete(message.id);
if (isFailure(message)) {
pending.reject(new Error(message.error));
return;
}
pending.resolve(message.result);
}
}

View File

@@ -1,85 +1,550 @@
import type { AssistantRuntimeConfig } from "../config.js"; import type { AppConfig } from "../config.js";
import type { ConversationMemory, UserUtterance } from "./conversation.js"; import type { Logger } from "../logger.js";
import type { LlmService } from "./llm.js"; import { loadPrompt } from "../prompt-loader.js";
import { webFetch, webSearch } from "./web-tools.js";
const ASSISTANT_INSTRUCTIONS = [ interface OllamaChatMessage {
"너는 디스코드 음성 채널 또는 로컬 마이크 테스트에서 동작하는 한국어 음성 비서다.", role: "system" | "user" | "assistant";
"답변은 짧고 실용적으로 한다.", content: string;
"기본은 한 문장, 길어도 두 문장을 넘기지 않는다.", tool_calls?: OllamaToolCall[];
"말투는 자연스러운 한국어로 유지한다.", }
"speaker_id와 speaker_name은 화자 구분용이므로 필요할 때만 자연스럽게 반영한다.",
"잘 못 들었거나 의미가 불명확하면 짧게 다시 물어본다.",
"목록, 마크다운, 코드블록은 쓰지 않는다.",
"생각 과정을 드러내지 말고 최종 답변만 말한다.",
].join(" ");
interface OllamaChatResponse { interface OllamaChatResponse {
message?: { message?: {
content?: string; content?: string;
thinking?: string; tool_calls?: OllamaToolCall[];
}; };
error?: string;
} }
function normalizeReply(text: string): string { interface OllamaToolCall {
const strippedThink = text.replace(/<think>[\s\S]*?<\/think>/gi, " "); type: "function";
const compact = strippedThink.replace(/\s+/g, " ").trim(); function: {
if (compact.length <= 180) { name: string;
return compact; arguments: Record<string, unknown>;
} };
const sentences = compact.match(/[^.!?]+[.!?]?/g);
if (!sentences || sentences.length === 0) {
return compact.slice(0, 180).trim();
}
return sentences.slice(0, 2).join(" ").trim().slice(0, 180).trim();
} }
export class OllamaLlmService implements LlmService { interface OllamaToolDefinition {
constructor(private readonly config: AssistantRuntimeConfig) {} type: "function";
function: {
name: string;
description: string;
parameters: {
type: "object";
required?: string[];
properties: Record<string, unknown>;
};
};
}
async generateReply(memory: ConversationMemory, utterance: UserUtterance): Promise<string> { interface OllamaToolResultMessage {
const url = new URL("/api/chat", this.config.OLLAMA_BASE_URL); role: "tool";
const response = await fetch(url, { tool_name: string;
content: string;
}
interface GenerateReplyOptions {
onProgress?: (message: string) => void;
}
export interface ReplyAssessment {
shouldReply: boolean;
likelyNeedsLookup: boolean;
reason: string;
}
const ASSISTANT_PROMPT = loadPrompt("assistant.md");
const REPLY_GATE_PROMPT = loadPrompt("reply-gate.md");
const REWRITE_KOREAN_PROMPT = loadPrompt("rewrite-korean.md");
const TOOL_DEFINITIONS: OllamaToolDefinition[] = [
{
type: "function",
function: {
name: "get_current_time",
description: "현재 시스템 시간을 Asia/Seoul 기준 ISO 문자열과 사람이 읽기 쉬운 형식으로 반환한다.",
parameters: {
type: "object",
properties: {},
},
},
},
{
type: "function",
function: {
name: "get_runtime_settings",
description: "현재 로컬 LLM 및 STT 실행 설정의 핵심 값만 반환한다.",
parameters: {
type: "object",
properties: {},
},
},
},
{
type: "function",
function: {
name: "list_project_commands",
description: "현재 프로젝트에서 사용 가능한 주요 bun 스크립트 명령 목록을 반환한다.",
parameters: {
type: "object",
properties: {},
},
},
},
{
type: "function",
function: {
name: "evaluate_math",
description: "간단한 산술식을 정확히 계산한다. 숫자, 공백, 소수점, 괄호, + - * / % 만 허용한다.",
parameters: {
type: "object",
required: ["expression"],
properties: {
expression: {
type: "string",
description: "예: (11434+12341)*412",
},
},
},
},
},
{
type: "function",
function: {
name: "web_search",
description: "웹 검색 결과 제목, URL, 요약을 가져온다. 최신 정보, 뉴스, 사실 확인이 필요할 때만 사용한다.",
parameters: {
type: "object",
required: ["query"],
properties: {
query: {
type: "string",
description: "검색어",
},
max_results: {
type: "number",
description: "가져올 최대 결과 수. 보통 3~5",
},
},
},
},
},
{
type: "function",
function: {
name: "fetch_url",
description: "주어진 URL의 페이지 제목과 본문 텍스트를 읽어온다. 검색 결과 상세 확인에 사용한다.",
parameters: {
type: "object",
required: ["url"],
properties: {
url: {
type: "string",
description: "http 또는 https URL",
},
max_chars: {
type: "number",
description: "본문에서 가져올 최대 글자 수",
},
},
},
},
},
];
export class OllamaLlmService {
private history: OllamaChatMessage[] = [];
constructor(
private readonly config: AppConfig,
private readonly logger: Logger,
) {}
async warmup(): Promise<void> {
const reply = await this.chat(
[
{ role: "system", content: ASSISTANT_PROMPT },
{ role: "user", content: "준비 상태 확인입니다. 한 단어로만 답하세요." },
],
);
this.logger.info("LLM warmup finished", { model: this.config.OLLAMA_MODEL, reply: reply.content });
}
async assessReplyNeed(userText: string): Promise<ReplyAssessment> {
const heuristic = this.assessReplyNeedHeuristically(userText);
if (heuristic) {
return heuristic;
}
const reply = await this.chat([
{ role: "system", content: REPLY_GATE_PROMPT },
{ role: "user", content: userText },
], { enableTools: false });
const parsed = this.parseAssessment(reply.content);
if (parsed) {
return parsed;
}
return {
shouldReply: true,
likelyNeedsLookup: this.mightNeedLookup(userText),
reason: "fallback",
};
}
async generateReply(userText: string, options?: GenerateReplyOptions): Promise<string> {
const messages: Array<OllamaChatMessage | OllamaToolResultMessage> = [
{ role: "system", content: ASSISTANT_PROMPT },
...this.history,
{ role: "user", content: userText },
];
const rawReply = await this.runAgentLoop(messages, options);
const reply = await this.repairReplyLanguageIfNeeded(rawReply, userText);
this.history.push({ role: "user", content: userText });
this.history.push({ role: "assistant", content: reply });
this.trimHistory();
return reply;
}
resetConversation(): void {
this.history = [];
}
private trimHistory(): void {
const maxMessages = this.config.MAX_CONVERSATION_TURNS * 2;
if (this.history.length <= maxMessages) {
return;
}
this.history = this.history.slice(-maxMessages);
}
private async runAgentLoop(
messages: Array<OllamaChatMessage | OllamaToolResultMessage>,
options?: GenerateReplyOptions,
): Promise<string> {
let progressEmitted = false;
for (let step = 0; step < 6; step += 1) {
const response = await this.chat(messages, { enableTools: true });
const toolCalls = response.toolCalls ?? [];
messages.push({
role: "assistant",
content: response.content,
tool_calls: toolCalls.length > 0 ? toolCalls : undefined,
});
if (toolCalls.length === 0) {
return response.content;
}
for (const call of toolCalls) {
if (!progressEmitted) {
const progressMessage = this.getProgressMessage(call.function.name);
if (progressMessage) {
options?.onProgress?.(progressMessage);
progressEmitted = true;
}
}
const result = await this.executeTool(call);
this.logger.info("LLM tool call", {
name: call.function.name,
arguments: call.function.arguments,
result,
});
messages.push({
role: "tool",
tool_name: call.function.name,
content: result,
});
}
}
throw new Error("도구 호출 루프가 제한 횟수를 넘었습니다.");
}
private async chat(
messages: Array<OllamaChatMessage | OllamaToolResultMessage>,
options?: { enableTools: boolean },
): Promise<{ content: string; toolCalls: OllamaToolCall[] }> {
const response = await fetch(`${this.config.OLLAMA_BASE_URL}/api/chat`, {
method: "POST", method: "POST",
headers: { headers: {
"Content-Type": "application/json", "content-type": "application/json",
}, },
body: JSON.stringify({ body: JSON.stringify({
model: this.config.OLLAMA_MODEL, model: this.config.OLLAMA_MODEL,
messages: [ messages,
{ tools: options?.enableTools ? TOOL_DEFINITIONS : undefined,
role: "system",
content: ASSISTANT_INSTRUCTIONS,
},
{
role: "user",
content: memory.buildPrompt(utterance),
},
],
think: false,
stream: false, stream: false,
think: false,
keep_alive: this.config.OLLAMA_KEEP_ALIVE, keep_alive: this.config.OLLAMA_KEEP_ALIVE,
options: {
num_ctx: this.config.OLLAMA_NUM_CTX,
temperature: 0.4,
num_predict: 120,
},
}), }),
}); });
const body = (await response.json().catch(() => ({}))) as OllamaChatResponse;
if (!response.ok) { if (!response.ok) {
throw new Error(body.error ?? `Ollama request failed with status ${response.status}`); const body = await response.text();
throw new Error(`Ollama API ${response.status}: ${body}`);
} }
const output = body.message?.content?.trim(); const payload = (await response.json()) as OllamaChatResponse;
if (!output) { const content = payload.message?.content?.trim() ?? "";
return "잘 못 들었습니다. 한 번만 다시 말씀해 주세요."; const toolCalls = payload.message?.tool_calls ?? [];
if (!content && toolCalls.length === 0) {
throw new Error("Ollama 응답에 message.content 와 tool_calls 가 모두 없습니다.");
} }
return normalizeReply(output); return {
content,
toolCalls,
};
}
private async executeTool(call: OllamaToolCall): Promise<string> {
switch (call.function.name) {
case "get_current_time":
return JSON.stringify(this.getCurrentTime());
case "get_runtime_settings":
return JSON.stringify(this.getRuntimeSettings());
case "list_project_commands":
return JSON.stringify(this.listProjectCommands());
case "evaluate_math":
return JSON.stringify({
expression: this.getStringArg(call.function.arguments, "expression"),
result: this.evaluateMath(this.getStringArg(call.function.arguments, "expression")),
});
case "web_search":
return JSON.stringify(
await webSearch(
this.getStringArg(call.function.arguments, "query"),
Math.min(5, Math.max(1, Math.trunc(this.getNumberArg(call.function.arguments, "max_results", 4)))),
),
);
case "fetch_url":
return JSON.stringify(
await webFetch(
this.getStringArg(call.function.arguments, "url"),
Math.min(10000, Math.max(1000, Math.trunc(this.getNumberArg(call.function.arguments, "max_chars", 6000)))),
),
);
default:
return JSON.stringify({
error: `unknown tool: ${call.function.name}`,
});
}
}
private getCurrentTime(): { timezone: string; iso: string; local: string } {
const now = new Date();
return {
timezone: "Asia/Seoul",
iso: now.toISOString(),
local: new Intl.DateTimeFormat("ko-KR", {
timeZone: "Asia/Seoul",
dateStyle: "full",
timeStyle: "long",
}).format(now),
};
}
private getRuntimeSettings(): Record<string, unknown> {
return {
ollama_base_url: this.config.OLLAMA_BASE_URL,
ollama_model: this.config.OLLAMA_MODEL,
ollama_keep_alive: this.config.OLLAMA_KEEP_ALIVE,
max_conversation_turns: this.config.MAX_CONVERSATION_TURNS,
whisper_model: this.config.WHISPER_MODEL,
whisper_language: this.config.WHISPER_LANGUAGE,
whisper_device: this.config.WHISPER_DEVICE,
whisper_compute_type: this.config.WHISPER_COMPUTE_TYPE,
whisper_beam_size: this.config.WHISPER_BEAM_SIZE,
audio_source: this.config.AUDIO_SOURCE ?? null,
debug: this.config.DEBUG,
};
}
private listProjectCommands(): { commands: string[] } {
return {
commands: [
"bun run setup",
"bun run setup:stt",
"bun run setup:llm",
"bun run setup:tts",
"bun run setup:all",
"bun run devices",
"bun run test:stt",
"bun run test:sttllm",
"bun run test:all",
"bun run test:llm",
"bun run test:tts -- \"안녕하세요\"",
],
};
}
private getStringArg(args: Record<string, unknown>, name: string): string {
const value = args[name];
if (typeof value !== "string" || value.trim().length === 0) {
throw new Error(`도구 인자 ${name} 가 비어 있습니다.`);
}
return value.trim();
}
private evaluateMath(expression: string): number {
if (!/^[0-9+\-*/%().\s]+$/.test(expression)) {
throw new Error("허용되지 않은 문자가 포함된 산술식입니다.");
}
const result = Function(`"use strict"; return (${expression});`)();
if (typeof result !== "number" || !Number.isFinite(result)) {
throw new Error("산술식 계산 결과가 유효하지 않습니다.");
}
return result;
}
private getNumberArg(args: Record<string, unknown>, name: string, fallback: number): number {
const value = args[name];
if (typeof value === "number" && Number.isFinite(value)) {
return value;
}
if (typeof value === "string") {
const parsed = Number(value);
if (Number.isFinite(parsed)) {
return parsed;
}
}
return fallback;
}
private async repairReplyLanguageIfNeeded(reply: string, userText: string): Promise<string> {
if (!this.needsLanguageRepair(reply)) {
return reply;
}
this.logger.warn("Reply language repair triggered", {
reply,
analysis: this.analyzeScriptUsage(reply),
});
const repaired = await this.chat(
[
{
role: "system",
content: REWRITE_KOREAN_PROMPT,
},
{
role: "user",
content: `원문 질문: ${userText}\n기존 답변: ${reply}`,
},
],
{ enableTools: false },
);
const normalized = repaired.content.trim();
if (!normalized) {
return reply;
}
return normalized;
}
private needsLanguageRepair(text: string): boolean {
const analysis = this.analyzeScriptUsage(text);
if (analysis.otherLetters > 0) {
return true;
}
if (analysis.hangul === 0 && analysis.latin > 0) {
return true;
}
return false;
}
private analyzeScriptUsage(text: string): { hangul: number; latin: number; otherLetters: number } {
let hangul = 0;
let latin = 0;
let otherLetters = 0;
for (const char of text) {
if (!/\p{Letter}/u.test(char)) {
continue;
}
if (/\p{Script=Hangul}/u.test(char)) {
hangul += 1;
continue;
}
if (/\p{Script=Latin}/u.test(char)) {
latin += 1;
continue;
}
otherLetters += 1;
}
return { hangul, latin, otherLetters };
}
private getProgressMessage(toolName: string): string | null {
switch (toolName) {
case "web_search":
case "fetch_url":
return "검색해볼게요.";
default:
return null;
}
}
private parseAssessment(content: string): ReplyAssessment | null {
const match = content.match(/\{[\s\S]*\}/);
if (!match) {
return null;
}
try {
const parsed = JSON.parse(match[0]) as Record<string, unknown>;
return {
shouldReply: parsed.should_reply === true || parsed.shouldReply === true,
likelyNeedsLookup: parsed.likely_needs_lookup === true || parsed.likelyNeedsLookup === true,
reason: typeof parsed.reason === "string" ? parsed.reason : "parsed",
};
} catch {
return null;
}
}
private assessReplyNeedHeuristically(userText: string): ReplyAssessment | null {
const normalized = userText.trim();
if (!normalized) {
return {
shouldReply: false,
likelyNeedsLookup: false,
reason: "empty",
};
}
if (/^(+|+|+|+|+|+|+|+|+|+|+|+|+|+|+|\.?)$/u.test(normalized)) {
return {
shouldReply: false,
likelyNeedsLookup: false,
reason: "filler",
};
}
if (normalized.length <= 2 && !/[?]/.test(normalized)) {
return {
shouldReply: false,
likelyNeedsLookup: false,
reason: "too_short",
};
}
return null;
}
private mightNeedLookup(text: string): boolean {
return /(||||||||||||)/u.test(text);
} }
} }

View File

@@ -1,189 +1,184 @@
import { spawn, type ChildProcessWithoutNullStreams } from "node:child_process"; import { spawn, type ChildProcessWithoutNullStreams } from "node:child_process";
import { once } from "node:events";
import { createInterface } from "node:readline"; import { createInterface } from "node:readline";
import path from "node:path";
import type { AssistantRuntimeConfig } from "../config.js"; import type { AppConfig } from "../config.js";
import type { Logger } from "../logger.js"; import type { Logger } from "../logger.js";
import { resolveLocalAiCachePath, resolvePythonLaunch } from "../python-runtime.js"; import { buildPythonInvocation, resolveWorkerPythonCommand, resolveWorkerScript } from "../python-runtime.js";
interface WorkerRequest { interface RpcSuccess<T> {
id: number; id: string;
method: string; result: T;
params: Record<string, unknown>;
} }
interface WorkerResponse { interface RpcFailure {
id: number; id: string;
ok: boolean; error: string;
result?: unknown; }
error?: string;
type RpcResponse<T> = RpcSuccess<T> | RpcFailure;
function isFailure<T>(value: RpcResponse<T>): value is RpcFailure {
return "error" in value;
} }
export class PythonJsonWorker { export class PythonJsonWorker {
private child: ChildProcessWithoutNullStreams | null = null; private processRef: ChildProcessWithoutNullStreams | null = null;
private nextId = 1; private shuttingDown = false;
private readonly pending = new Map< private readonly pending = new Map<
number, string,
{ {
resolve: (value: unknown) => void; resolve: (value: unknown) => void;
reject: (error: Error) => void; reject: (reason?: unknown) => void;
} }
>(); >();
private nextId = 1;
constructor( constructor(
private readonly config: AssistantRuntimeConfig, private readonly config: AppConfig,
private readonly logger: Logger, private readonly logger: Logger,
private readonly scriptName: string, private readonly scriptName: string,
private readonly label: string, private readonly logPrefix: string,
private readonly workerEnv: Record<string, string>,
) {} ) {}
async request<T>(method: string, params: Record<string, unknown>, signal?: AbortSignal): Promise<T> { async start(): Promise<void> {
const child = this.ensureStarted(); if (this.processRef) {
const id = this.nextId++;
return await new Promise<T>((resolve, reject) => {
if (signal?.aborted) {
reject(new Error(`${this.label} request aborted before start`));
return; return;
} }
if (this.shuttingDown) {
const abortHandler = () => { throw new Error(`${this.logPrefix} worker is shutting down`);
this.pending.delete(id);
reject(new Error(`${this.label} request aborted`));
};
if (signal) {
signal.addEventListener("abort", abortHandler, { once: true });
} }
this.pending.set(id, { const python = await resolveWorkerPythonCommand(this.config);
resolve: (value) => { const scriptPath = resolveWorkerScript(this.scriptName);
if (signal) { const invocation = buildPythonInvocation(python, [scriptPath]);
signal.removeEventListener("abort", abortHandler);
} this.processRef = spawn(invocation.command, invocation.args, {
resolve(value as T); stdio: ["pipe", "pipe", "pipe"],
}, windowsHide: true,
reject: (error) => { env: {
if (signal) { ...process.env,
signal.removeEventListener("abort", abortHandler); WHISPER_MODEL: this.config.WHISPER_MODEL,
} WHISPER_LANGUAGE: this.config.WHISPER_LANGUAGE,
reject(error); WHISPER_DEVICE: this.config.WHISPER_DEVICE,
WHISPER_COMPUTE_TYPE: this.config.WHISPER_COMPUTE_TYPE,
WHISPER_BEAM_SIZE: String(this.config.WHISPER_BEAM_SIZE),
}, },
}); });
const message: WorkerRequest = { const rl = createInterface({
input: this.processRef.stdout,
crlfDelay: Infinity,
});
rl.on("line", (line) => {
this.handleStdoutLine(line);
});
this.processRef.stderr.on("data", (chunk: Buffer) => {
const text = chunk.toString().trim();
if (text.length > 0) {
this.logger.warn(`[${this.logPrefix}] ${text}`);
}
});
this.processRef.stdin.on("error", (error) => {
this.logger.debug(`${this.logPrefix} stdin error`, error);
});
this.processRef.on("exit", (code, signal) => {
const error = new Error(`${this.logPrefix} worker exited code=${code ?? "null"} signal=${signal ?? "null"}`);
for (const entry of this.pending.values()) {
entry.reject(error);
}
this.pending.clear();
this.processRef = null;
});
}
async request<T>(method: string, params: Record<string, unknown>): Promise<T> {
await this.start();
if (!this.processRef) {
throw new Error(`${this.logPrefix} worker is not running`);
}
const id = String(this.nextId++);
const payload = JSON.stringify({
id, id,
method, method,
params, params,
};
child.stdin.write(`${JSON.stringify(message)}\n`);
}); });
const promise = new Promise<T>((resolve, reject) => {
this.pending.set(id, {
resolve: (value) => resolve(value as T),
reject,
});
});
this.processRef.stdin.write(`${payload}\n`);
return await promise;
} }
async destroy(): Promise<void> { async destroy(): Promise<void> {
this.rejectAll(new Error(`${this.label} worker terminated`)); if (!this.processRef) {
if (!this.child) {
return; return;
} }
const child = this.child; const child = this.processRef;
this.child = null; this.shuttingDown = true;
child.kill("SIGTERM");
await new Promise<void>((resolve) => {
child.once("exit", () => resolve());
setTimeout(resolve, 1_500);
});
}
private ensureStarted(): ChildProcessWithoutNullStreams {
if (this.child) {
return this.child;
}
const launch = resolvePythonLaunch(this.config);
const scriptPath = path.resolve(process.cwd(), "python", this.scriptName);
const cachePath = resolveLocalAiCachePath(this.config);
const recentStderr: string[] = [];
const child = spawn(launch.command, [...launch.args, scriptPath], {
stdio: ["pipe", "pipe", "pipe"],
env: {
...process.env,
HF_HOME: cachePath,
TRANSFORMERS_CACHE: cachePath,
PYTHONIOENCODING: "utf-8",
BOT_DEFAULT_LANGUAGE: this.config.BOT_DEFAULT_LANGUAGE,
...this.workerEnv,
},
});
createInterface({
input: child.stdout,
crlfDelay: Number.POSITIVE_INFINITY,
}).on("line", (line) => {
if (!line.trim()) {
return;
}
let payload: WorkerResponse;
try { try {
payload = JSON.parse(line) as WorkerResponse; child.stdin.end();
} catch (error) { } catch {
this.logger.warn(`${this.label} stdout parse failed`, error); // ignore
}
if (!child.killed && child.exitCode === null) {
child.kill("SIGTERM");
}
const timedWait = Promise.race([
once(child, "exit"),
new Promise<null>((resolve) => setTimeout(() => resolve(null), 1500)),
]);
await timedWait;
if (child.exitCode === null && !child.killed) {
child.kill("SIGKILL");
await once(child, "exit").catch(() => null);
}
this.processRef = null;
this.shuttingDown = false;
}
private handleStdoutLine(line: string): void {
const trimmed = line.trim();
if (!trimmed) {
return; return;
} }
const pending = this.pending.get(payload.id); let message: RpcResponse<unknown>;
try {
message = JSON.parse(trimmed) as RpcResponse<unknown>;
} catch (error) {
this.logger.warn(`${this.logPrefix} stdout parse failed`, error);
return;
}
const pending = this.pending.get(message.id);
if (!pending) { if (!pending) {
return; return;
} }
this.pending.delete(payload.id); this.pending.delete(message.id);
if (payload.ok) { if (isFailure(message)) {
pending.resolve(payload.result); pending.reject(new Error(message.error));
return; return;
} }
pending.resolve(message.result);
pending.reject(new Error(payload.error ?? `${this.label} worker error`));
});
child.stderr.on("data", (chunk: Buffer) => {
const text = chunk.toString().trim();
if (text.length > 0) {
recentStderr.push(text);
if (recentStderr.length > 20) {
recentStderr.shift();
}
this.logger.warn(`[${this.label}]`, text);
}
});
child.on("exit", (code, signal) => {
if (this.child === child) {
this.child = null;
}
const detail = recentStderr.length > 0 ? `\n${recentStderr.join("\n")}` : "";
this.rejectAll(new Error(`${this.label} worker exited code=${code ?? "null"} signal=${signal ?? "null"}${detail}`));
});
child.on("error", (error) => {
this.rejectAll(error as Error);
});
this.child = child;
return child;
}
private rejectAll(error: Error): void {
const pending = [...this.pending.values()];
this.pending.clear();
for (const item of pending) {
item.reject(error);
}
} }
} }

View File

@@ -1,4 +0,0 @@
export interface SttService {
transcribePcm16(pcm16MonoAudio: Buffer): Promise<string | null>;
destroy?(): Promise<void>;
}

View File

@@ -1,11 +0,0 @@
import type { Readable } from "node:stream";
export interface PreparedSpeechAudio {
stream: Readable;
dispose: () => void;
}
export interface TtsService {
preparePlayback(text: string, signal?: AbortSignal): Promise<PreparedSpeechAudio>;
destroy?(): Promise<void>;
}

105
src/services/web-tools.ts Normal file
View File

@@ -0,0 +1,105 @@
interface WebSearchResult {
title: string;
url: string;
snippet: string;
}
interface WebFetchResult {
url: string;
title: string;
content: string;
}
function stripTags(html: string): string {
return html
.replace(/<script[\s\S]*?<\/script>/gi, " ")
.replace(/<style[\s\S]*?<\/style>/gi, " ")
.replace(/<noscript[\s\S]*?<\/noscript>/gi, " ")
.replace(/<[^>]+>/g, " ");
}
function decodeEntities(text: string): string {
return text
.replace(/&amp;/g, "&")
.replace(/&lt;/g, "<")
.replace(/&gt;/g, ">")
.replace(/&quot;/g, '"')
.replace(/&#39;/g, "'")
.replace(/&nbsp;/g, " ");
}
function normalizeWhitespace(text: string): string {
return decodeEntities(text).replace(/\s+/g, " ").trim();
}
function extractTitle(html: string): string {
const match = html.match(/<title[^>]*>([\s\S]*?)<\/title>/i);
return normalizeWhitespace(match?.[1] ?? "");
}
function extractSearchResults(html: string, maxResults: number): WebSearchResult[] {
const results: WebSearchResult[] = [];
const pattern =
/<a[^>]*class="result__a"[^>]*href="([^"]+)"[^>]*>([\s\S]*?)<\/a>[\s\S]*?(?:<a[^>]*class="result__snippet"[^>]*>|<div[^>]*class="result__snippet"[^>]*>)([\s\S]*?)(?:<\/a>|<\/div>)/gi;
for (const match of html.matchAll(pattern)) {
const url = match[1]?.trim();
const title = normalizeWhitespace(stripTags(match[2] ?? ""));
const snippet = normalizeWhitespace(stripTags(match[3] ?? ""));
if (!url || !title) {
continue;
}
results.push({ title, url, snippet });
if (results.length >= maxResults) {
break;
}
}
return results;
}
export async function webSearch(query: string, maxResults = 5): Promise<{ query: string; results: WebSearchResult[] }> {
const url = `https://duckduckgo.com/html/?q=${encodeURIComponent(query)}`;
const response = await fetch(url, {
headers: {
"user-agent":
"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/136.0.0.0 Safari/537.36",
},
});
if (!response.ok) {
throw new Error(`web search failed: ${response.status}`);
}
const html = await response.text();
const results = extractSearchResults(html, Math.min(Math.max(maxResults, 1), 8));
return { query, results };
}
export async function webFetch(url: string, maxChars = 6000): Promise<WebFetchResult> {
if (!/^https?:\/\//i.test(url)) {
throw new Error("http 또는 https URL만 허용됩니다.");
}
const response = await fetch(url, {
headers: {
"user-agent":
"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/136.0.0.0 Safari/537.36",
},
redirect: "follow",
});
if (!response.ok) {
throw new Error(`web fetch failed: ${response.status}`);
}
const html = await response.text();
const title = extractTitle(html);
const content = normalizeWhitespace(stripTags(html)).slice(0, Math.max(500, maxChars));
return {
url,
title,
content,
};
}

38
src/setup-llm.ts Normal file
View File

@@ -0,0 +1,38 @@
import process from "node:process";
import { spawn } from "node:child_process";
import { loadConfig } from "./config.js";
async function run(command: string, args: string[]): Promise<void> {
await new Promise<void>((resolve, reject) => {
const child = spawn(command, args, {
stdio: "inherit",
windowsHide: true,
shell: process.platform === "win32",
});
child.on("exit", (code) => {
if (code === 0) {
resolve();
return;
}
reject(new Error(`${command} ${args.join(" ")} exited with code ${code ?? "null"}`));
});
child.on("error", reject);
});
}
export async function setupLlm(): Promise<void> {
const config = loadConfig();
console.log(`Ollama 모델 준비: ${config.OLLAMA_MODEL}`);
await run("ollama", ["pull", config.OLLAMA_MODEL]);
console.log("Ollama LLM 환경 준비 완료");
}
if (import.meta.main) {
void setupLlm().catch((error) => {
console.error(error instanceof Error ? error.message : String(error));
process.exit(1);
});
}

View File

@@ -1,88 +0,0 @@
import { existsSync } from "node:fs";
import { mkdir } from "node:fs/promises";
import { spawn } from "node:child_process";
import path from "node:path";
import { loadConfig } from "./config.js";
import { resolveLocalAiCachePath, resolveLocalAiVenvPath, resolvePythonLaunch, resolveVenvPythonPath } from "./python-runtime.js";
async function run(command: string, args: string[], extraEnv?: NodeJS.ProcessEnv): Promise<void> {
await new Promise<void>((resolve, reject) => {
const child = spawn(command, args, {
stdio: "inherit",
env: {
...process.env,
...extraEnv,
},
});
child.on("exit", (code) => {
if (code === 0) {
resolve();
return;
}
reject(new Error(`${command} ${args.join(" ")} exited with code ${code ?? "null"}`));
});
child.on("error", reject);
});
}
async function ensurePip(pythonBin: string, env: NodeJS.ProcessEnv): Promise<void> {
await new Promise<void>((resolve, reject) => {
const child = spawn(pythonBin, ["-m", "pip", "--version"], {
stdio: "ignore",
env,
});
child.on("exit", (code) => {
if (code === 0) {
resolve();
return;
}
reject(new Error("pip missing"));
});
child.on("error", reject);
}).catch(async () => {
await run(pythonBin, ["-m", "ensurepip", "--upgrade"], env);
});
}
async function main(): Promise<void> {
const config = loadConfig();
const venvPath = resolveLocalAiVenvPath(config);
const venvPython = resolveVenvPythonPath(config);
const cachePath = resolveLocalAiCachePath(config);
const requirementsPath = path.resolve(process.cwd(), "python", "requirements.txt");
const baseEnv = {
HF_HOME: cachePath,
TRANSFORMERS_CACHE: cachePath,
PYTHONIOENCODING: "utf-8",
};
await mkdir(cachePath, { recursive: true });
if (!existsSync(venvPython)) {
const launch = resolvePythonLaunch(config, { preferVenv: false });
console.log(`기본 Python 확인: ${launch.command} ${launch.args.join(" ")}`.trim());
console.log(`가상환경 생성: ${venvPath}`);
await run(launch.command, [...launch.args, "-m", "venv", venvPath], baseEnv);
}
await ensurePip(venvPython, {
...process.env,
...baseEnv,
});
console.log("로컬 AI 의존성 설치를 시작합니다.");
await run(venvPython, ["-m", "pip", "install", "--upgrade", "pip", "setuptools", "wheel"], baseEnv);
await run(venvPython, ["-m", "pip", "install", "-r", requirementsPath], baseEnv);
console.log("설치가 끝났습니다.");
console.log("다음 순서:");
console.log("1. bun run devices");
console.log("2. bun run start:local");
}
void main().catch((error) => {
console.error(error instanceof Error ? error.message : String(error));
process.exit(1);
});

55
src/setup-python.ts Normal file
View File

@@ -0,0 +1,55 @@
import process from "node:process";
import { mkdir } from "node:fs/promises";
import path from "node:path";
import { spawn } from "node:child_process";
import { loadConfig } from "./config.js";
import { buildPythonInvocation, resolveBasePythonCommand, resolveVenvPythonPath } from "./python-runtime.js";
async function run(command: string, args: string[], cwd: string): Promise<void> {
await new Promise<void>((resolve, reject) => {
const child = spawn(command, args, {
cwd,
stdio: "inherit",
windowsHide: true,
});
child.on("exit", (code) => {
if (code === 0) {
resolve();
return;
}
reject(new Error(`${command} ${args.join(" ")} exited with code ${code ?? "null"}`));
});
child.on("error", reject);
});
}
export async function setupSttPython(): Promise<void> {
const config = loadConfig();
const python = await resolveBasePythonCommand(config);
const venvRoot = path.resolve(process.cwd(), config.LOCAL_AI_VENV_PATH);
const requirementsPath = path.resolve(
process.cwd(),
"python",
process.platform === "win32" ? "requirements-windows.txt" : "requirements.txt",
);
await mkdir(path.dirname(venvRoot), { recursive: true });
console.log(`가상환경 생성: ${venvRoot}`);
const createVenv = buildPythonInvocation(python, ["-m", "venv", venvRoot]);
await run(createVenv.command, createVenv.args, process.cwd());
const venvPython = resolveVenvPythonPath(config);
await run(venvPython, ["-m", "pip", "install", "--upgrade", "pip", "setuptools", "wheel"], process.cwd());
await run(venvPython, ["-m", "pip", "install", "-r", requirementsPath], process.cwd());
console.log("Python STT 환경 준비 완료");
}
if (import.meta.main) {
void setupSttPython().catch((error) => {
console.error(error instanceof Error ? error.message : String(error));
process.exit(1);
});
}

76
src/setup-tts.ts Normal file
View File

@@ -0,0 +1,76 @@
import process from "node:process";
import { mkdir } from "node:fs/promises";
import path from "node:path";
import { spawn } from "node:child_process";
import { loadConfig } from "./config.js";
import { resolveDockerCommand } from "./docker-runtime.js";
import { Logger } from "./logger.js";
import { MeloTtsService } from "./services/melo-tts.js";
async function run(command: string, args: string[], cwd = process.cwd()): Promise<void> {
const env = { ...process.env };
if (path.isAbsolute(command)) {
const dockerBinDir = path.dirname(command);
const currentPath = env.PATH ?? env.Path ?? "";
env.PATH = `${dockerBinDir}${path.delimiter}${currentPath}`;
}
await new Promise<void>((resolve, reject) => {
const child = spawn(command, args, {
cwd,
stdio: "inherit",
windowsHide: true,
shell: process.platform === "win32" && !path.isAbsolute(command),
env,
});
child.on("error", (error) => {
if ((error as NodeJS.ErrnoException).code === "ENOENT" && command === "docker") {
reject(new Error("Docker를 찾지 못했습니다. Docker Desktop을 설치하고 실행한 뒤 다시 시도하세요."));
return;
}
reject(error);
});
child.on("exit", (code) => {
if (code === 0) {
resolve();
return;
}
reject(new Error(`${command} ${args.join(" ")} exited with code ${code ?? "null"}`));
});
});
}
export async function setupTts(): Promise<void> {
const config = loadConfig();
const logger = new Logger(config.DEBUG ? config.LOG_LEVEL : "error");
const docker = await resolveDockerCommand(config);
const dockerContext = path.resolve(process.cwd(), "docker", "melotts");
const cacheDir = path.resolve(process.cwd(), config.TTS_CACHE_DIR);
const outputDir = path.resolve(process.cwd(), config.TTS_OUTPUT_DIR);
await mkdir(cacheDir, { recursive: true });
await mkdir(outputDir, { recursive: true });
console.log(`MeloTTS Docker 이미지 빌드: ${config.TTS_IMAGE}`);
await run(docker, ["build", "-t", config.TTS_IMAGE, dockerContext]);
const tts = new MeloTtsService(config, logger);
console.log("MeloTTS 모델 워밍업...");
try {
await tts.warmup();
} finally {
await tts.destroy().catch(() => undefined);
}
console.log("로컬 TTS 환경 준비 완료");
}
if (import.meta.main) {
void setupTts().catch((error) => {
console.error(error instanceof Error ? error.message : String(error));
process.exit(1);
});
}

18
src/setup.ts Normal file
View File

@@ -0,0 +1,18 @@
import process from "node:process";
import { setupLlm } from "./setup-llm.js";
import { setupSttPython } from "./setup-python.js";
import { setupTts } from "./setup-tts.js";
async function main(): Promise<void> {
await setupSttPython();
await setupLlm();
await setupTts();
}
if (import.meta.main) {
void main().catch((error) => {
console.error(error instanceof Error ? error.message : String(error));
process.exit(1);
});
}