Compare commits
57 Commits
9f2fdc1369
...
main
| Author | SHA1 | Date | |
|---|---|---|---|
| 4c5f8f9ceb | |||
| e41212e000 | |||
| 2ae52b5796 | |||
| 99857cdaa8 | |||
| a5f47393ee | |||
| f5194f55a1 | |||
| 46a6b926df | |||
| caae552d47 | |||
| b6284323be | |||
| 3abb6f81bb | |||
| 74715c0546 | |||
| aa14ebc447 | |||
| c328ef517e | |||
| ad357a6ede | |||
| 3360015179 | |||
| 28ffbf02e1 | |||
| ec02943538 | |||
| c53dcc853d | |||
| b28f163217 | |||
| 82f98ceb07 | |||
| 7e59013fa4 | |||
| 48937c684b | |||
| 962ff7037b | |||
| 11cfd7cc04 | |||
| c4baca1739 | |||
| f0f62c2307 | |||
| 39efd3aeed | |||
| 6a4fb067cd | |||
| dca5b2c9c4 | |||
| 2667fc2632 | |||
| 4202911b3e | |||
| 3ccc10c706 | |||
| d7d1d21240 | |||
| a33167ff69 | |||
| 5775c4809a | |||
| 10e0dd75db | |||
| 53777be675 | |||
| 96252528b4 | |||
| 52d7f74049 | |||
| 1a8e8d0a8f | |||
| 0a88e8dab1 | |||
| ac88b8c50a | |||
| 03b06bcc6e | |||
| 10fa109084 | |||
| 0005352be7 | |||
| 133118ca29 | |||
| 88c18ee69e | |||
| 645a5109a2 | |||
| 4c7cef8c18 | |||
| 60dce65b0f | |||
| ab4e0b38b0 | |||
| e74f71e45b | |||
| 7ba392c0e7 | |||
| dc39998241 | |||
| 18369ea7cb | |||
| 178283be61 | |||
| bb965c061e |
64
.env.example
64
.env.example
@@ -1,28 +1,42 @@
|
||||
DISCORD_BOT_TOKEN=
|
||||
DISCORD_APPLICATION_ID=
|
||||
DISCORD_COMMAND_GUILD_ID=
|
||||
|
||||
OLLAMA_BASE_URL=http://localhost:11434
|
||||
OLLAMA_MODEL=qwen3:0.6b
|
||||
OLLAMA_KEEP_ALIVE=5m
|
||||
OLLAMA_NUM_CTX=4096
|
||||
|
||||
LOCAL_AI_VENV_PATH=.local-ai/.venv
|
||||
LOCAL_AI_CACHE_DIR=.local-ai/cache
|
||||
LOCAL_AI_PYTHON=
|
||||
LOCAL_STT_MODEL=tiny
|
||||
LOCAL_STT_DEVICE=auto
|
||||
LOCAL_STT_COMPUTE_TYPE=auto
|
||||
LOCAL_STT_BEAM_SIZE=1
|
||||
LOCAL_TTS_LANGUAGE=KR
|
||||
LOCAL_TTS_SPEAKER=KR
|
||||
LOCAL_TTS_DEVICE=auto
|
||||
LOCAL_TTS_SPEED=1.12
|
||||
# Windows면 보통 python 또는 py -3
|
||||
LOCAL_AI_PYTHON=python
|
||||
|
||||
BOT_DEFAULT_LANGUAGE=ko
|
||||
MAX_CONVERSATION_TURNS=12
|
||||
LOCAL_AUDIO_SOURCE=
|
||||
LOCAL_AUDIO_SINK=
|
||||
LOCAL_SPEAKER_NAME=local-user
|
||||
DEBUG_TEXT_EVENTS=false
|
||||
# Windows: ffmpeg dshow 장치 이름
|
||||
# Linux: pactl list sources short 에서 monitor/source 이름
|
||||
AUDIO_SOURCE=
|
||||
DOCKER_BIN=
|
||||
|
||||
DEBUG=false
|
||||
TTS_ENABLED=true
|
||||
TTS_IMAGE=realtime-voice-bot-melotts:v0.1.2
|
||||
TTS_LANGUAGE=KR
|
||||
TTS_SPEAKER=KR
|
||||
TTS_DEVICE=cpu
|
||||
TTS_SPEED=1.18
|
||||
TTS_PLAYBACK_RATE=2.2
|
||||
TTS_SDP_RATIO=0.22
|
||||
TTS_NOISE_SCALE=0.55
|
||||
TTS_NOISE_SCALE_W=0.75
|
||||
TTS_CACHE_DIR=.local-ai/tts-cache
|
||||
TTS_OUTPUT_DIR=.local-ai/tts-output
|
||||
OLLAMA_BASE_URL=http://127.0.0.1:11434
|
||||
OLLAMA_MODEL=qwen3:8b
|
||||
OLLAMA_KEEP_ALIVE=5m
|
||||
MAX_CONVERSATION_TURNS=6
|
||||
WHISPER_MODEL=large-v3-turbo
|
||||
WHISPER_LANGUAGE=ko
|
||||
WHISPER_DEVICE=auto
|
||||
WHISPER_COMPUTE_TYPE=auto
|
||||
WHISPER_BEAM_SIZE=2
|
||||
|
||||
SEGMENT_START_THRESHOLD=900
|
||||
SEGMENT_CONTINUE_THRESHOLD=450
|
||||
SEGMENT_START_FRAMES=2
|
||||
SEGMENT_END_FRAMES=24
|
||||
SEGMENT_PREROLL_SAMPLES=3200
|
||||
SEGMENT_MIN_SPEECH_SAMPLES=7200
|
||||
SEGMENT_MAX_SPEECH_SAMPLES=160000
|
||||
|
||||
DEBUG_TRANSCRIPTS=true
|
||||
LOG_LEVEL=info
|
||||
|
||||
2
.gitignore
vendored
2
.gitignore
vendored
@@ -2,5 +2,5 @@ node_modules
|
||||
dist
|
||||
.env
|
||||
.local-ai
|
||||
__pycache__
|
||||
*.pyc
|
||||
__pycache__
|
||||
|
||||
5
.vscode/settings.json
vendored
Normal file
5
.vscode/settings.json
vendored
Normal file
@@ -0,0 +1,5 @@
|
||||
{
|
||||
"terminal.integrated.env.windows": {
|
||||
"PATH": "${env:PATH};C:\\Program Files\\Docker\\Docker\\resources\\bin"
|
||||
}
|
||||
}
|
||||
300
README.md
300
README.md
@@ -1,143 +1,233 @@
|
||||
# realtime_voice_bot
|
||||
|
||||
디스코드 음성 채널 또는 로컬 PC 마이크에서 한국어 음성을 인식하고, 완전 로컬 스택으로 답변을 생성한 뒤 다시 음성으로 읽어주는 최소 프로토타입입니다.
|
||||
출력장치로 재생되는 소리를 파일 저장 없이 바로 받아서 `faster-whisper`로 STT 테스트를 하고, 필요하면 `Ollama` LLM과 연결된 통합 테스트와 LLM CLI 테스트를 할 수 있는 최소 프로토타입입니다.
|
||||
|
||||
## 현재 스택
|
||||
현재 문서는 **Windows PC에서 실행하는 기준**으로 적었습니다.
|
||||
|
||||
- STT: `faster-whisper` + Whisper multilingual
|
||||
- LLM: `Ollama` + `qwen3:0.6b`
|
||||
- TTS: `MeloTTS` Korean
|
||||
- VAD: `avr-vad`
|
||||
## 현재 범위
|
||||
|
||||
외부 유료 API나 무료 한도형 API는 쓰지 않습니다.
|
||||
|
||||
## 현재 구현 범위
|
||||
|
||||
- Discord slash command 기반 제어: `/join`, `/leave`, `/status`, `/reset`, `/say`
|
||||
- 로컬 테스트 모드: PC 마이크로 직접 말하고 바로 응답 확인
|
||||
- `@discordjs/voice` 기반 음성 채널 입장 및 유저별 오디오 수신
|
||||
- 48k stereo PCM을 16k mono로 내려서 유저별 VAD 처리
|
||||
- 화자 발화 시작 시 현재 재생과 대기열 즉시 중단
|
||||
- Python 로컬 워커를 한 번 띄워 STT/TTS 모델을 메모리에 유지
|
||||
|
||||
## 필수 준비물
|
||||
|
||||
- Bun `1.3+`
|
||||
- Node.js `22.12+`
|
||||
- Python `3.11+`
|
||||
- `ffmpeg`
|
||||
- Ollama
|
||||
|
||||
Discord 모드까지 쓸 거면 추가로:
|
||||
|
||||
- Discord bot token
|
||||
- Discord application id
|
||||
- Node.js + TypeScript 메인 프로세스
|
||||
- 출력 오디오 실시간 캡처
|
||||
- 메모리 버퍼 기반 간단한 저지연 발화 분리
|
||||
- 미리 로드한 `faster-whisper` 워커에 PCM 직접 전달
|
||||
- 디스크에 WAV 저장 없이 바로 전사
|
||||
- STT 전용 테스트
|
||||
- STT 결과에 대해 답변 가치 판단 후 필요할 때만 LLM 답변하는 통합 테스트
|
||||
- 로컬 `Ollama` LLM 에이전트 CLI 테스트
|
||||
- 무료 로컬 `MeloTTS` 기반 음성 출력 테스트
|
||||
|
||||
## 빠른 시작
|
||||
|
||||
```bash
|
||||
```bat
|
||||
bun install
|
||||
ollama pull qwen3:0.6b
|
||||
bun run setup:local-ai
|
||||
bun run setup
|
||||
copy .env.example .env
|
||||
```
|
||||
|
||||
그다음 로컬 장치 확인:
|
||||
또는 전체 준비를 명시적으로:
|
||||
|
||||
```bash
|
||||
```bat
|
||||
bun run setup:all
|
||||
copy .env.example .env
|
||||
```
|
||||
|
||||
장치 목록 확인:
|
||||
|
||||
```bat
|
||||
bun run devices
|
||||
```
|
||||
|
||||
실행:
|
||||
|
||||
```bash
|
||||
bun run start:local
|
||||
```bat
|
||||
bun run test:stt
|
||||
```
|
||||
|
||||
Discord 모드:
|
||||
STT + LLM 통합 테스트:
|
||||
|
||||
```bash
|
||||
bun run start:discord
|
||||
```bat
|
||||
bun run test:sttllm
|
||||
```
|
||||
|
||||
STT + LLM + TTS 전체 연결 테스트:
|
||||
|
||||
```bat
|
||||
bun run test:all
|
||||
```
|
||||
|
||||
LLM 단독 테스트:
|
||||
|
||||
```bat
|
||||
bun run test:llm
|
||||
```
|
||||
|
||||
TTS 단독 테스트:
|
||||
|
||||
```bat
|
||||
bun run test:tts -- "안녕하세요. 로컬 티티에스 테스트입니다."
|
||||
```
|
||||
|
||||
## 환경 변수
|
||||
|
||||
`.env.example`를 복사해서 `.env`를 채우면 됩니다.
|
||||
|
||||
Discord 모드에서만 필수:
|
||||
|
||||
- `DISCORD_BOT_TOKEN`
|
||||
- `DISCORD_APPLICATION_ID`
|
||||
|
||||
기본값이 이미 들어있는 로컬 AI 설정:
|
||||
|
||||
- `AUDIO_SOURCE`
|
||||
- `bun run devices` 에서 보이는 `ffmpeg dshow` 오디오 장치 이름
|
||||
- 보통 `Stereo Mix`, 오디오 인터페이스 loopback 채널, 가상 케이블 입력 같은 이름을 넣습니다
|
||||
- `DOCKER_BIN`
|
||||
- 비워두면 자동 탐색
|
||||
- VSCode가 오래 떠 있어서 `docker` PATH를 못 잡을 때만 설정
|
||||
- 예: `C:\Program Files\Docker\Docker\resources\bin\docker.exe`
|
||||
- `DEBUG`
|
||||
- `true`면 상세 로그 출력
|
||||
- `false`면 전사 결과만 출력
|
||||
- `WHISPER_MODEL`
|
||||
- 기본값 `large-v3-turbo`
|
||||
- `OLLAMA_BASE_URL`
|
||||
- 기본값 `http://127.0.0.1:11434`
|
||||
- `OLLAMA_MODEL`
|
||||
- 기본값 `qwen3:8b`
|
||||
- `TTS_ENABLED`
|
||||
- 기본값 `true`
|
||||
- `TTS_IMAGE`
|
||||
- 기본값 `realtime-voice-bot-melotts:v0.1.2`
|
||||
- `TTS_LANGUAGE`
|
||||
- 기본값 `KR`
|
||||
- `TTS_SPEAKER`
|
||||
- 기본값 `KR`
|
||||
- `TTS_DEVICE`
|
||||
- 기본값 `cpu`
|
||||
- Docker GPU passthrough를 쓸 때만 `cuda`로 바꿉니다
|
||||
- `TTS_SPEED`
|
||||
- 기본값 `1.18`
|
||||
- `TTS_PLAYBACK_RATE`
|
||||
- 기본값 `2.2`
|
||||
- 생성된 WAV를 `ffmpeg`로 더 빠르게 재생합니다
|
||||
- `TTS_SDP_RATIO`
|
||||
- 기본값 `0.22`
|
||||
- `TTS_NOISE_SCALE`
|
||||
- 기본값 `0.55`
|
||||
- `TTS_NOISE_SCALE_W`
|
||||
- 기본값 `0.75`
|
||||
- `OLLAMA_KEEP_ALIVE`
|
||||
- `OLLAMA_NUM_CTX`
|
||||
- `LOCAL_AI_VENV_PATH`
|
||||
- `LOCAL_AI_CACHE_DIR`
|
||||
- `LOCAL_STT_MODEL`
|
||||
- `LOCAL_STT_DEVICE`
|
||||
- `LOCAL_STT_COMPUTE_TYPE`
|
||||
- `LOCAL_STT_BEAM_SIZE`
|
||||
- `LOCAL_TTS_LANGUAGE`
|
||||
- `LOCAL_TTS_SPEAKER`
|
||||
- `LOCAL_TTS_DEVICE`
|
||||
- `LOCAL_TTS_SPEED`
|
||||
|
||||
선택:
|
||||
|
||||
- `DISCORD_COMMAND_GUILD_ID`
|
||||
- 테스트 서버에만 slash command를 즉시 반영하려면 설정
|
||||
- `LOCAL_AI_PYTHON`
|
||||
- Python 경로 자동 탐지가 안 되면 설정
|
||||
- 예시: `python`
|
||||
- Windows 예시: `py -3`
|
||||
- `LOCAL_AUDIO_SOURCE`
|
||||
- 로컬 입력 장치
|
||||
- Linux는 `pw-record --target`, Windows는 `ffmpeg dshow` 장치 이름
|
||||
- `LOCAL_AUDIO_SINK`
|
||||
- Linux 로컬 출력 장치
|
||||
- Windows는 현재 시스템 기본 출력 장치 사용
|
||||
- `LOCAL_SPEAKER_NAME`
|
||||
- 로컬 테스트에서 프롬프트에 넣을 화자 이름
|
||||
- `BOT_DEFAULT_LANGUAGE`
|
||||
- 기본값 `5m`
|
||||
- `MAX_CONVERSATION_TURNS`
|
||||
- 기본값 `6`
|
||||
- 최근 대화 몇 턴까지 LLM 문맥으로 넘길지 정합니다
|
||||
- `WHISPER_LANGUAGE`
|
||||
- 기본값 `ko`
|
||||
- `DEBUG_TEXT_EVENTS`
|
||||
- `true`면 transcript/reply를 콘솔에 같이 출력
|
||||
- `WHISPER_DEVICE`
|
||||
- `auto`, `cuda`, `cpu`
|
||||
- `WHISPER_COMPUTE_TYPE`
|
||||
- `auto`, `float16`, `int8_float16`, `int8`, `float32`
|
||||
- `WHISPER_BEAM_SIZE`
|
||||
- 기본값 `2`
|
||||
- `SEGMENT_END_FRAMES`
|
||||
- 기본값 `24`
|
||||
- 끝을 조금 더 늦게 잘라서 문장이 잘게 끊기는 현상을 줄입니다
|
||||
- `SEGMENT_MAX_SPEECH_SAMPLES`
|
||||
- 기본값 `160000`
|
||||
- 너무 긴 구간은 강제로 끊어서 지연이 과하게 커지는 걸 막습니다
|
||||
|
||||
## 속도 우선 기본값
|
||||
## 메모
|
||||
|
||||
- STT 기본 모델은 `tiny`
|
||||
- LLM 기본 모델은 `qwen3:0.6b`
|
||||
- TTS 기본 속도는 `1.12`
|
||||
- 이 버전은 `STT`, `STT+LLM`, `LLM` 테스트를 따로 제공합니다.
|
||||
- `test:sttllm`은 STT와 LLM만 연결합니다.
|
||||
- `test:all`은 STT, LLM, TTS를 모두 연결합니다.
|
||||
- `test:all`에서는 자기 음성을 다시 전사하지 않도록 TTS 재생 중에는 캡처를 잠시 멈춥니다.
|
||||
- LLM 프롬프트는 `prompts/*.md` 에 분리되어 있습니다.
|
||||
- 최소 지연을 위해 파일 저장은 하지 않습니다.
|
||||
- VAD는 현재 모델 기반이 아니라 진폭 기반 단순 분리입니다.
|
||||
- Windows에서는 보통 출력 루프백이 가능한 장치나 `Stereo Mix`, 오디오 인터페이스 loopback 채널을 `AUDIO_SOURCE`로 잡아야 합니다.
|
||||
- 단순히 스피커 이름을 넣는 구조가 아니라, **루프백/캡처 가능한 입력 장치 이름**을 넣어야 합니다.
|
||||
- `ffmpeg`가 PATH에 잡혀 있어야 합니다.
|
||||
- TTS는 Windows에서 Docker Desktop이 필요합니다. MeloTTS 공식 문서도 Windows/macOS에서는 Docker 실행을 권장합니다.
|
||||
- `cmd` 기준으로 `.env`는 `copy .env.example .env`로 만들면 됩니다.
|
||||
|
||||
정확도가 아쉬우면:
|
||||
|
||||
```env
|
||||
LOCAL_STT_MODEL=small
|
||||
OLLAMA_MODEL=qwen3:1.7b
|
||||
```
|
||||
|
||||
## 로컬 테스트 순서
|
||||
## Windows 테스트 순서
|
||||
|
||||
1. `bun install`
|
||||
2. `ollama pull qwen3:0.6b`
|
||||
3. `bun run setup:local-ai`
|
||||
2. `bun run setup:stt`
|
||||
3. `copy .env.example .env`
|
||||
4. `bun run devices`
|
||||
5. 필요하면 `.env` 에 `LOCAL_AUDIO_SOURCE` 설정
|
||||
6. `bun run start:local`
|
||||
5. `.env`에서 `AUDIO_SOURCE=`에 루프백 장치 이름 입력
|
||||
6. `bun run test:stt`
|
||||
7. 유튜브, 디스코드 통화, 동영상 같은 소리를 재생해서 전사만 확인
|
||||
|
||||
## Windows 메모
|
||||
## Windows STT+LLM 통합 테스트 순서
|
||||
|
||||
- `bun run devices` 와 Windows 로컬 녹음은 `ffmpeg`가 필요합니다.
|
||||
- 출력 장치 직접 선택은 아직 미구현이라 시스템 기본 출력 장치로 재생됩니다.
|
||||
- Python 탐지가 안 되면 `.env` 에 `LOCAL_AI_PYTHON=py -3` 또는 `LOCAL_AI_PYTHON=python` 을 넣으면 됩니다.
|
||||
1. `bun run setup:llm`
|
||||
2. `bun run setup:tts`
|
||||
3. `bun run test:sttllm`
|
||||
4. 유튜브, 디스코드 통화, 동영상 같은 소리를 재생해서 전사와 답변 확인
|
||||
|
||||
## 설계 메모
|
||||
## Windows 전체 연결 테스트 순서
|
||||
|
||||
- 입력은 유저별 병렬 처리
|
||||
- 출력은 길드 세션당 단일 큐
|
||||
- 로컬 모드는 단일 화자 입력 기준
|
||||
- 화자 구분은 `speaker_id`, `speaker_name`을 LLM 프롬프트에 항상 포함
|
||||
- 모델 다운로드 캐시는 기본적으로 `.local-ai/cache` 아래에 저장
|
||||
1. `bun run setup:llm`
|
||||
2. `bun run setup:tts`
|
||||
3. `bun run test:all`
|
||||
4. 유튜브, 디스코드 통화, 동영상 같은 소리를 재생해서 전사, 답변, 음성 출력 확인
|
||||
|
||||
## Windows LLM 테스트 순서
|
||||
|
||||
1. `bun run setup:llm`
|
||||
2. `bun run test:llm`
|
||||
3. 콘솔에 직접 문장을 입력하고 답변 확인
|
||||
4. `/reset` 으로 문맥 초기화, `/exit` 로 종료
|
||||
|
||||
## Windows TTS 테스트 순서
|
||||
|
||||
1. Docker Desktop 실행
|
||||
2. `bun run setup:tts`
|
||||
3. `bun run test:tts -- "안녕하세요. 로컬 티티에스 테스트입니다."`
|
||||
|
||||
현재 `test:llm` 에이전트 도구:
|
||||
- 현재 시간 조회
|
||||
- 현재 런타임 설정 조회
|
||||
- 주요 bun 명령 목록 조회
|
||||
- 간단한 산술식 계산
|
||||
- 웹 검색
|
||||
- URL 본문 읽기
|
||||
|
||||
동작 원칙:
|
||||
- 일반 대화는 로컬 LLM만 답변
|
||||
- 최신 정보, 뉴스, 사실 확인, 검색 요청일 때만 웹 도구 사용
|
||||
- `test:sttllm` 에서는 먼저 "대답할 가치가 있는 텍스트인지" 판정한 뒤 필요할 때만 답변
|
||||
- 웹 검색이 실제로 시작되면 결과 전에 `검색해볼게요.` 같은 진행 메시지를 먼저 출력
|
||||
- 답변에 한글 외 다른 문자군이 섞이면 한국어만으로 한 번 더 교정
|
||||
|
||||
현재 프롬프트 파일:
|
||||
- `prompts/assistant.md`
|
||||
- `prompts/reply-gate.md`
|
||||
- `prompts/rewrite-korean.md`
|
||||
|
||||
## Windows용 .env 예시
|
||||
|
||||
```env
|
||||
LOCAL_AI_PYTHON=python
|
||||
AUDIO_SOURCE=
|
||||
DEBUG=false
|
||||
TTS_ENABLED=true
|
||||
TTS_IMAGE=realtime-voice-bot-melotts:v0.1.2
|
||||
TTS_LANGUAGE=KR
|
||||
TTS_SPEAKER=KR
|
||||
TTS_DEVICE=cpu
|
||||
TTS_SPEED=1.18
|
||||
TTS_PLAYBACK_RATE=2.2
|
||||
TTS_SDP_RATIO=0.22
|
||||
TTS_NOISE_SCALE=0.55
|
||||
TTS_NOISE_SCALE_W=0.75
|
||||
TTS_CACHE_DIR=.local-ai/tts-cache
|
||||
TTS_OUTPUT_DIR=.local-ai/tts-output
|
||||
OLLAMA_BASE_URL=http://127.0.0.1:11434
|
||||
OLLAMA_MODEL=qwen3:8b
|
||||
OLLAMA_KEEP_ALIVE=5m
|
||||
MAX_CONVERSATION_TURNS=6
|
||||
WHISPER_MODEL=large-v3-turbo
|
||||
WHISPER_LANGUAGE=ko
|
||||
WHISPER_DEVICE=auto
|
||||
WHISPER_COMPUTE_TYPE=auto
|
||||
WHISPER_BEAM_SIZE=2
|
||||
SEGMENT_END_FRAMES=24
|
||||
SEGMENT_MAX_SPEECH_SAMPLES=160000
|
||||
DEBUG_TRANSCRIPTS=true
|
||||
LOG_LEVEL=info
|
||||
```
|
||||
|
||||
196
bun.lock
196
bun.lock
@@ -5,13 +5,7 @@
|
||||
"": {
|
||||
"name": "realtime_voice_bot",
|
||||
"dependencies": {
|
||||
"@discordjs/voice": "^0.19.2",
|
||||
"avr-vad": "^1.0.10",
|
||||
"discord.js": "^14.26.3",
|
||||
"dotenv": "^17.4.2",
|
||||
"ffmpeg-static": "^5.3.0",
|
||||
"opusscript": "^0.1.1",
|
||||
"prism-media": "^1.3.5",
|
||||
"zod": "^4.3.6",
|
||||
},
|
||||
"devDependencies": {
|
||||
@@ -20,203 +14,15 @@
|
||||
},
|
||||
},
|
||||
},
|
||||
"trustedDependencies": [
|
||||
"ffmpeg-static",
|
||||
"onnxruntime-node",
|
||||
],
|
||||
"packages": {
|
||||
"@derhuerst/http-basic": ["@derhuerst/http-basic@8.2.4", "", { "dependencies": { "caseless": "^0.12.0", "concat-stream": "^2.0.0", "http-response-object": "^3.0.1", "parse-cache-control": "^1.0.1" } }, "sha512-F9rL9k9Xjf5blCz8HsJRO4diy111cayL2vkY2XE4r4t3n0yPXVYy3KD3nJ1qbrSn9743UWSXH4IwuCa/HWlGFw=="],
|
||||
|
||||
"@discordjs/builders": ["@discordjs/builders@1.14.1", "", { "dependencies": { "@discordjs/formatters": "^0.6.2", "@discordjs/util": "^1.2.0", "@sapphire/shapeshift": "^4.0.0", "discord-api-types": "^0.38.40", "fast-deep-equal": "^3.1.3", "ts-mixer": "^6.0.4", "tslib": "^2.6.3" } }, "sha512-gSKkhXLqs96TCzk66VZuHHl8z2bQMJFGwrXC0f33ngK+FLNau4hU1PYny3DNJfNdSH+gVMzE85/d5FQ2BpcNwQ=="],
|
||||
|
||||
"@discordjs/collection": ["@discordjs/collection@1.5.3", "", {}, "sha512-SVb428OMd3WO1paV3rm6tSjM4wC+Kecaa1EUGX7vc6/fddvw/6lg90z4QtCqm21zvVe92vMMDt9+DkIvjXImQQ=="],
|
||||
|
||||
"@discordjs/formatters": ["@discordjs/formatters@0.6.2", "", { "dependencies": { "discord-api-types": "^0.38.33" } }, "sha512-y4UPwWhH6vChKRkGdMB4odasUbHOUwy7KL+OVwF86PvT6QVOwElx+TiI1/6kcmcEe+g5YRXJFiXSXUdabqZOvQ=="],
|
||||
|
||||
"@discordjs/rest": ["@discordjs/rest@2.6.1", "", { "dependencies": { "@discordjs/collection": "^2.1.1", "@discordjs/util": "^1.2.0", "@sapphire/async-queue": "^1.5.3", "@sapphire/snowflake": "^3.5.5", "@vladfrangu/async_event_emitter": "^2.4.6", "discord-api-types": "^0.38.40", "magic-bytes.js": "^1.13.0", "tslib": "^2.6.3", "undici": "6.24.1" } }, "sha512-wwQdgjeaoYFiaG+atbqx6aJDpqW7JHAo0HrQkBTbYzM3/PJ3GweQIpgElNcGZ26DCUOXMyawYd0YF7vtr+fZXg=="],
|
||||
|
||||
"@discordjs/util": ["@discordjs/util@1.2.0", "", { "dependencies": { "discord-api-types": "^0.38.33" } }, "sha512-3LKP7F2+atl9vJFhaBjn4nOaSWahZ/yWjOvA4e5pnXkt2qyXRCHLxoBQy81GFtLGCq7K9lPm9R517M1U+/90Qg=="],
|
||||
|
||||
"@discordjs/voice": ["@discordjs/voice@0.19.2", "", { "dependencies": { "@snazzah/davey": "^0.1.9", "@types/ws": "^8.18.1", "discord-api-types": "^0.38.41", "prism-media": "^1.3.5", "tslib": "^2.8.1", "ws": "^8.19.0" } }, "sha512-3yJ255e4ag3wfZu/DSxeOZK1UtnqNxnspmLaQetGT0pDkThNZoHs+Zg6dgZZ19JEVomXygvfHn9lNpICZuYtEA=="],
|
||||
|
||||
"@discordjs/ws": ["@discordjs/ws@1.2.3", "", { "dependencies": { "@discordjs/collection": "^2.1.0", "@discordjs/rest": "^2.5.1", "@discordjs/util": "^1.1.0", "@sapphire/async-queue": "^1.5.2", "@types/ws": "^8.5.10", "@vladfrangu/async_event_emitter": "^2.2.4", "discord-api-types": "^0.38.1", "tslib": "^2.6.2", "ws": "^8.17.0" } }, "sha512-wPlQDxEmlDg5IxhJPuxXr3Vy9AjYq5xCvFWGJyD7w7Np8ZGu+Mc+97LCoEc/+AYCo2IDpKioiH0/c/mj5ZR9Uw=="],
|
||||
|
||||
"@emnapi/core": ["@emnapi/core@1.10.0", "", { "dependencies": { "@emnapi/wasi-threads": "1.2.1", "tslib": "^2.4.0" } }, "sha512-yq6OkJ4p82CAfPl0u9mQebQHKPJkY7WrIuk205cTYnYe+k2Z8YBh11FrbRG/H6ihirqcacOgl2BIO8oyMQLeXw=="],
|
||||
|
||||
"@emnapi/runtime": ["@emnapi/runtime@1.10.0", "", { "dependencies": { "tslib": "^2.4.0" } }, "sha512-ewvYlk86xUoGI0zQRNq/mC+16R1QeDlKQy21Ki3oSYXNgLb45GV1P6A0M+/s6nyCuNDqe5VpaY84BzXGwVbwFA=="],
|
||||
|
||||
"@emnapi/wasi-threads": ["@emnapi/wasi-threads@1.2.1", "", { "dependencies": { "tslib": "^2.4.0" } }, "sha512-uTII7OYF+/Mes/MrcIOYp5yOtSMLBWSIoLPpcgwipoiKbli6k322tcoFsxoIIxPDqW01SQGAgko4EzZi2BNv2w=="],
|
||||
|
||||
"@napi-rs/wasm-runtime": ["@napi-rs/wasm-runtime@1.1.4", "", { "dependencies": { "@tybys/wasm-util": "^0.10.1" }, "peerDependencies": { "@emnapi/core": "^1.7.1", "@emnapi/runtime": "^1.7.1" } }, "sha512-3NQNNgA1YSlJb/kMH1ildASP9HW7/7kYnRI2szWJaofaS1hWmbGI4H+d3+22aGzXXN9IJ+n+GiFVcGipJP18ow=="],
|
||||
|
||||
"@sapphire/async-queue": ["@sapphire/async-queue@1.5.5", "", {}, "sha512-cvGzxbba6sav2zZkH8GPf2oGk9yYoD5qrNWdu9fRehifgnFZJMV+nuy2nON2roRO4yQQ+v7MK/Pktl/HgfsUXg=="],
|
||||
|
||||
"@sapphire/shapeshift": ["@sapphire/shapeshift@4.0.0", "", { "dependencies": { "fast-deep-equal": "^3.1.3", "lodash": "^4.17.21" } }, "sha512-d9dUmWVA7MMiKobL3VpLF8P2aeanRTu6ypG2OIaEv/ZHH/SUQ2iHOVyi5wAPjQ+HmnMuL0whK9ez8I/raWbtIg=="],
|
||||
|
||||
"@sapphire/snowflake": ["@sapphire/snowflake@3.5.3", "", {}, "sha512-jjmJywLAFoWeBi1W7994zZyiNWPIiqRRNAmSERxyg93xRGzNYvGjlZ0gR6x0F4gPRi2+0O6S71kOZYyr3cxaIQ=="],
|
||||
|
||||
"@snazzah/davey": ["@snazzah/davey@0.1.11", "", { "optionalDependencies": { "@snazzah/davey-android-arm-eabi": "0.1.11", "@snazzah/davey-android-arm64": "0.1.11", "@snazzah/davey-darwin-arm64": "0.1.11", "@snazzah/davey-darwin-x64": "0.1.11", "@snazzah/davey-freebsd-x64": "0.1.11", "@snazzah/davey-linux-arm-gnueabihf": "0.1.11", "@snazzah/davey-linux-arm64-gnu": "0.1.11", "@snazzah/davey-linux-arm64-musl": "0.1.11", "@snazzah/davey-linux-x64-gnu": "0.1.11", "@snazzah/davey-linux-x64-musl": "0.1.11", "@snazzah/davey-wasm32-wasi": "0.1.11", "@snazzah/davey-win32-arm64-msvc": "0.1.11", "@snazzah/davey-win32-ia32-msvc": "0.1.11", "@snazzah/davey-win32-x64-msvc": "0.1.11" } }, "sha512-oBN+msHzPnm1M5DDx3wVD7iBwpNXFUtkh2MrAbUJu0OhKjliLChi28hq++mu1+qdMpAVQO5JKAvQQxYVbyneiw=="],
|
||||
|
||||
"@snazzah/davey-android-arm-eabi": ["@snazzah/davey-android-arm-eabi@0.1.11", "", { "os": "android", "cpu": "arm" }, "sha512-T1RYbNYKN6tLOcGIDKJd8OI6FBSEemwL7DOYdTMmhqfhhMr3YVN8WOhfoxGg63OcnpTN2e2c5tdY2bAx25RmQQ=="],
|
||||
|
||||
"@snazzah/davey-android-arm64": ["@snazzah/davey-android-arm64@0.1.11", "", { "os": "android", "cpu": "arm64" }, "sha512-ksJn/x2VU8h6w9eku1HT96ugSRZ7lKVkKNKbFleaFN+U99DJaPM+gMu2YvnFU4V54HR06ZBnRihnVG6VLXQpDw=="],
|
||||
|
||||
"@snazzah/davey-darwin-arm64": ["@snazzah/davey-darwin-arm64@0.1.11", "", { "os": "darwin", "cpu": "arm64" }, "sha512-E1d7PbaaVMO3Lj9EiAPqOVbuV0xg5+PsHzHH097DDXiD1+zUDXvJaTnUWsnm5z50pJniHpi4GtaYmk+ieB/guA=="],
|
||||
|
||||
"@snazzah/davey-darwin-x64": ["@snazzah/davey-darwin-x64@0.1.11", "", { "os": "darwin", "cpu": "x64" }, "sha512-Tl4TI/LTmgJZepgbgVMYDi8RqlAkPtPg1OEBPl7a9Tn3AwR36Vs6lyIT1cs/lGy/ds/+B+mKI4rPObN1cyILTw=="],
|
||||
|
||||
"@snazzah/davey-freebsd-x64": ["@snazzah/davey-freebsd-x64@0.1.11", "", { "os": "freebsd", "cpu": "x64" }, "sha512-T8Iw9FXkuI1T+YBAFzh9v/TXf9IOTOSqnd/BFpTRTrlW72PR2lhIidzSmg027VxO7r5pX47iFwiOkb9I/NU/EA=="],
|
||||
|
||||
"@snazzah/davey-linux-arm-gnueabihf": ["@snazzah/davey-linux-arm-gnueabihf@0.1.11", "", { "os": "linux", "cpu": "arm" }, "sha512-1Txj+8pqA8uq/OGtaUaBFWAPnNMQzFgIywj0iA7EI4xZl+mab48/pv+YZ1pNb/suC6ynsW44oB9efiXSdcUAgA=="],
|
||||
|
||||
"@snazzah/davey-linux-arm64-gnu": ["@snazzah/davey-linux-arm64-gnu@0.1.11", "", { "os": "linux", "cpu": "arm64" }, "sha512-ERzF5nM/IYW1BcN3wLXpEwBCGLFf0kGJUVhaV6yfiInz0tkU8UmvrrgpaMaACfMjIhfWdq5CcX+aTkXo/saNcg=="],
|
||||
|
||||
"@snazzah/davey-linux-arm64-musl": ["@snazzah/davey-linux-arm64-musl@0.1.11", "", { "os": "linux", "cpu": "arm64" }, "sha512-e6pX6Hiabtz99q+H/YHNkm9JVlpqN8HGh0qPib8G2+UY4/SSH8WvqWipk3v581dMy2oyCHt7MOoY1aU1P1N/xA=="],
|
||||
|
||||
"@snazzah/davey-linux-x64-gnu": ["@snazzah/davey-linux-x64-gnu@0.1.11", "", { "os": "linux", "cpu": "x64" }, "sha512-TW5bSoqChOJMbvsDb4wAATYrxmAXuNnse7wFNVSAJUaZKSeRfZbu3UAiPWSNn7GwLwSfU6hg322KZUn8IWCuvg=="],
|
||||
|
||||
"@snazzah/davey-linux-x64-musl": ["@snazzah/davey-linux-x64-musl@0.1.11", "", { "os": "linux", "cpu": "x64" }, "sha512-5j6Pmc+Wzv5lSxVP6quA7teYRJXibkZqQyYGfTDnTsUOO5dPpcojpqlXlkhyvsA1OAQTj4uxbOCciN3cVWwzug=="],
|
||||
|
||||
"@snazzah/davey-wasm32-wasi": ["@snazzah/davey-wasm32-wasi@0.1.11", "", { "dependencies": { "@napi-rs/wasm-runtime": "^1.1.2" }, "cpu": "none" }, "sha512-rKOwZ/0J8lp+4VEyOdMDBRP9KR+PksZpa9V1Qn0veMzy4FqTVKthkxwGqewheFe0SFg9fdvt798l/PBFrfDeZw=="],
|
||||
|
||||
"@snazzah/davey-win32-arm64-msvc": ["@snazzah/davey-win32-arm64-msvc@0.1.11", "", { "os": "win32", "cpu": "arm64" }, "sha512-5fptJU4tX901m3mj0SHiBljMrPT4ZEsynbBhR7bK1yn9TY1jjyhN8EFi7QF5IWtUEni+0mia2BCMHZ5ZkmFZqQ=="],
|
||||
|
||||
"@snazzah/davey-win32-ia32-msvc": ["@snazzah/davey-win32-ia32-msvc@0.1.11", "", { "os": "win32", "cpu": "ia32" }, "sha512-ualexn8SeLsiMHhWfzVrzRcjHgcBapg++FPaVgJJxoh2S/jCRiklXOu3luqIZdJdNKvhe2V9SwO/cImPeIIBKw=="],
|
||||
|
||||
"@snazzah/davey-win32-x64-msvc": ["@snazzah/davey-win32-x64-msvc@0.1.11", "", { "os": "win32", "cpu": "x64" }, "sha512-muNhc8UKXtknzsH/w4AIkbPR2I8BuvApn0pDXar0IEvY8PCjqU/M8MPbOOEYwQVvQRMwVTgExtxzrkBPSXB4nA=="],
|
||||
|
||||
"@tybys/wasm-util": ["@tybys/wasm-util@0.10.1", "", { "dependencies": { "tslib": "^2.4.0" } }, "sha512-9tTaPJLSiejZKx+Bmog4uSubteqTvFrVrURwkmHixBo0G4seD0zUxp98E1DzUBJxLQ3NPwXrGKDiVjwx/DpPsg=="],
|
||||
|
||||
"@types/node": ["@types/node@25.6.0", "", { "dependencies": { "undici-types": "~7.19.0" } }, "sha512-+qIYRKdNYJwY3vRCZMdJbPLJAtGjQBudzZzdzwQYkEPQd+PJGixUL5QfvCLDaULoLv+RhT3LDkwEfKaAkgSmNQ=="],
|
||||
|
||||
"@types/ws": ["@types/ws@8.18.1", "", { "dependencies": { "@types/node": "*" } }, "sha512-ThVF6DCVhA8kUGy+aazFQ4kXQ7E1Ty7A3ypFOe0IcJV8O/M511G99AW24irKrW56Wt44yG9+ij8FaqoBGkuBXg=="],
|
||||
|
||||
"@vladfrangu/async_event_emitter": ["@vladfrangu/async_event_emitter@2.4.7", "", {}, "sha512-Xfe6rpCTxSxfbswi/W/Pz7zp1WWSNn4A0eW4mLkQUewCrXXtMj31lCg+iQyTkh/CkusZSq9eDflu7tjEDXUY6g=="],
|
||||
|
||||
"adm-zip": ["adm-zip@0.5.17", "", {}, "sha512-+Ut8d9LLqwEvHHJl1+PIHqoyDxFgVN847JTVM3Izi3xHDWPE4UtzzXysMZQs64DMcrJfBeS/uoEP4AD3HQHnQQ=="],
|
||||
|
||||
"agent-base": ["agent-base@6.0.2", "", { "dependencies": { "debug": "4" } }, "sha512-RZNwNclF7+MS/8bDg70amg32dyeZGZxiDuQmZxKLAlQjr3jGyLx+4Kkk58UO7D2QdgFIQCovuSuZESne6RG6XQ=="],
|
||||
|
||||
"avr-vad": ["avr-vad@1.0.10", "", { "dependencies": { "onnxruntime-node": "^1.22.0-rev" } }, "sha512-gM8SiQIebujfKMfy5w74tRPH+Fg78CMrBoDkMhCN3TmYVmmD8fmuVag7Q7ZCBITpFvYkOZnWEdGWuCb3YukBJw=="],
|
||||
|
||||
"boolean": ["boolean@3.2.0", "", {}, "sha512-d0II/GO9uf9lfUHH2BQsjxzRJZBdsjgsBiW4BvhWk/3qoKwQFjIDVN19PfX8F2D/r9PCMTtLWjYVCFrpeYUzsw=="],
|
||||
|
||||
"buffer-from": ["buffer-from@1.1.2", "", {}, "sha512-E+XQCRwSbaaiChtv6k6Dwgc+bx+Bs6vuKJHHl5kox/BaKbhiXzqQOwK4cO22yElGp2OCmjwVhT3HmxgyPGnJfQ=="],
|
||||
|
||||
"caseless": ["caseless@0.12.0", "", {}, "sha512-4tYFyifaFfGacoiObjJegolkwSU4xQNGbVgUiNYVUxbQ2x2lUsFvY4hVgVzGiIe6WLOPqycWXA40l+PWsxthUw=="],
|
||||
|
||||
"concat-stream": ["concat-stream@2.0.0", "", { "dependencies": { "buffer-from": "^1.0.0", "inherits": "^2.0.3", "readable-stream": "^3.0.2", "typedarray": "^0.0.6" } }, "sha512-MWufYdFw53ccGjCA+Ol7XJYpAlW6/prSMzuPOTRnJGcGzuhLn4Scrz7qf6o8bROZ514ltazcIFJZevcfbo0x7A=="],
|
||||
|
||||
"debug": ["debug@4.4.3", "", { "dependencies": { "ms": "^2.1.3" } }, "sha512-RGwwWnwQvkVfavKVt22FGLw+xYSdzARwm0ru6DhTVA3umU5hZc28V3kO4stgYryrTlLpuvgI9GiijltAjNbcqA=="],
|
||||
|
||||
"define-data-property": ["define-data-property@1.1.4", "", { "dependencies": { "es-define-property": "^1.0.0", "es-errors": "^1.3.0", "gopd": "^1.0.1" } }, "sha512-rBMvIzlpA8v6E+SJZoo++HAYqsLrkg7MSfIinMPFhmkorw7X+dOXVJQs+QT69zGkzMyfDnIMN2Wid1+NbL3T+A=="],
|
||||
|
||||
"define-properties": ["define-properties@1.2.1", "", { "dependencies": { "define-data-property": "^1.0.1", "has-property-descriptors": "^1.0.0", "object-keys": "^1.1.1" } }, "sha512-8QmQKqEASLd5nx0U1B1okLElbUuuttJ/AnYmRXbbbGDWh6uS208EjD4Xqq/I9wK7u0v6O08XhTWnt5XtEbR6Dg=="],
|
||||
|
||||
"detect-node": ["detect-node@2.1.0", "", {}, "sha512-T0NIuQpnTvFDATNuHN5roPwSBG83rFsuO+MXXH9/3N1eFbn4wcPjttvjMLEPWJ0RGUYgQE7cGgS3tNxbqCGM7g=="],
|
||||
|
||||
"discord-api-types": ["discord-api-types@0.38.47", "", {}, "sha512-XgXQodHQBAE6kfD7kMvVo30863iHX1LHSqNq6MGUTDwIFCCvHva13+rwxyxVXDqudyApMNAd32PGjgVETi5rjA=="],
|
||||
|
||||
"discord.js": ["discord.js@14.26.3", "", { "dependencies": { "@discordjs/builders": "^1.14.1", "@discordjs/collection": "1.5.3", "@discordjs/formatters": "^0.6.2", "@discordjs/rest": "^2.6.1", "@discordjs/util": "^1.2.0", "@discordjs/ws": "^1.2.3", "@sapphire/snowflake": "3.5.3", "discord-api-types": "^0.38.40", "fast-deep-equal": "3.1.3", "lodash.snakecase": "4.1.1", "magic-bytes.js": "^1.13.0", "tslib": "^2.6.3", "undici": "6.24.1" } }, "sha512-XEKtYn28YFsiJ5l4fLRyikdbo6RD5oFyqfVHQlvXz2104JhH/E8slN28dbky05w3DCrJcNVWvhVvcJCTSl/KIg=="],
|
||||
|
||||
"dotenv": ["dotenv@17.4.2", "", {}, "sha512-nI4U3TottKAcAD9LLud4Cb7b2QztQMUEfHbvhTH09bqXTxnSie8WnjPALV/WMCrJZ6UV/qHJ6L03OqO3LcdYZw=="],
|
||||
|
||||
"env-paths": ["env-paths@2.2.1", "", {}, "sha512-+h1lkLKhZMTYjog1VEpJNG7NZJWcuc2DDk/qsqSTRRCOXiLjeQ1d1/udrUGhqMxUgAlwKNZ0cf2uqan5GLuS2A=="],
|
||||
|
||||
"es-define-property": ["es-define-property@1.0.1", "", {}, "sha512-e3nRfgfUZ4rNGL232gUgX06QNyyez04KdjFrF+LTRoOXmrOgFKDg4BCdsjW8EnT69eqdYGmRpJwiPVYNrCaW3g=="],
|
||||
|
||||
"es-errors": ["es-errors@1.3.0", "", {}, "sha512-Zf5H2Kxt2xjTvbJvP2ZWLEICxA6j+hAmMzIlypy4xcBg1vKVnx89Wy0GbS+kf5cwCVFFzdCFh2XSCFNULS6csw=="],
|
||||
|
||||
"es6-error": ["es6-error@4.1.1", "", {}, "sha512-Um/+FxMr9CISWh0bi5Zv0iOD+4cFh5qLeks1qhAopKVAJw3drgKbKySikp7wGhDL0HPeaja0P5ULZrxLkniUVg=="],
|
||||
|
||||
"escape-string-regexp": ["escape-string-regexp@4.0.0", "", {}, "sha512-TtpcNJ3XAzx3Gq8sWRzJaVajRs0uVxA2YAkdb1jm2YkPz4G6egUFAyA3n5vtEIZefPk5Wa4UXbKuS5fKkJWdgA=="],
|
||||
|
||||
"fast-deep-equal": ["fast-deep-equal@3.1.3", "", {}, "sha512-f3qQ9oQy9j2AhBe/H9VC91wLmKBCCU/gDOnKNAYG5hswO7BLKj09Hc5HYNz9cGI++xlpDCIgDaitVs03ATR84Q=="],
|
||||
|
||||
"ffmpeg-static": ["ffmpeg-static@5.3.0", "", { "dependencies": { "@derhuerst/http-basic": "^8.2.0", "env-paths": "^2.2.0", "https-proxy-agent": "^5.0.0", "progress": "^2.0.3" } }, "sha512-H+K6sW6TiIX6VGend0KQwthe+kaceeH/luE8dIZyOP35ik7ahYojDuqlTV1bOrtEwl01sy2HFNGQfi5IDJvotg=="],
|
||||
|
||||
"global-agent": ["global-agent@3.0.0", "", { "dependencies": { "boolean": "^3.0.1", "es6-error": "^4.1.1", "matcher": "^3.0.0", "roarr": "^2.15.3", "semver": "^7.3.2", "serialize-error": "^7.0.1" } }, "sha512-PT6XReJ+D07JvGoxQMkT6qji/jVNfX/h364XHZOWeRzy64sSFr+xJ5OX7LI3b4MPQzdL4H8Y8M0xzPpsVMwA8Q=="],
|
||||
|
||||
"globalthis": ["globalthis@1.0.4", "", { "dependencies": { "define-properties": "^1.2.1", "gopd": "^1.0.1" } }, "sha512-DpLKbNU4WylpxJykQujfCcwYWiV/Jhm50Goo0wrVILAv5jOr9d+H+UR3PhSCD2rCCEIg0uc+G+muBTwD54JhDQ=="],
|
||||
|
||||
"gopd": ["gopd@1.2.0", "", {}, "sha512-ZUKRh6/kUFoAiTAtTYPZJ3hw9wNxx+BIBOijnlG9PnrJsCcSjs1wyyD6vJpaYtgnzDrKYRSqf3OO6Rfa93xsRg=="],
|
||||
|
||||
"has-property-descriptors": ["has-property-descriptors@1.0.2", "", { "dependencies": { "es-define-property": "^1.0.0" } }, "sha512-55JNKuIW+vq4Ke1BjOTjM2YctQIvCT7GFzHwmfZPGo5wnrgkid0YQtnAleFSqumZm4az3n2BS+erby5ipJdgrg=="],
|
||||
|
||||
"http-response-object": ["http-response-object@3.0.2", "", { "dependencies": { "@types/node": "^10.0.3" } }, "sha512-bqX0XTF6fnXSQcEJ2Iuyr75yVakyjIDCqroJQ/aHfSdlM743Cwqoi2nDYMzLGWUcuTWGWy8AAvOKXTfiv6q9RA=="],
|
||||
|
||||
"https-proxy-agent": ["https-proxy-agent@5.0.1", "", { "dependencies": { "agent-base": "6", "debug": "4" } }, "sha512-dFcAjpTQFgoLMzC2VwU+C/CbS7uRL0lWmxDITmqm7C+7F0Odmj6s9l6alZc6AELXhrnggM2CeWSXHGOdX2YtwA=="],
|
||||
|
||||
"inherits": ["inherits@2.0.4", "", {}, "sha512-k/vGaX4/Yla3WzyMCvTQOXYeIHvqOKtnqBduzTHpzpQZzAskKMhZ2K+EnBiSM9zGSoIFeMpXKxa4dYeZIQqewQ=="],
|
||||
|
||||
"json-stringify-safe": ["json-stringify-safe@5.0.1", "", {}, "sha512-ZClg6AaYvamvYEE82d3Iyd3vSSIjQ+odgjaTzRuO3s7toCdFKczob2i0zCh7JE8kWn17yvAWhUVxvqGwUalsRA=="],
|
||||
|
||||
"lodash": ["lodash@4.18.1", "", {}, "sha512-dMInicTPVE8d1e5otfwmmjlxkZoUpiVLwyeTdUsi/Caj/gfzzblBcCE5sRHV/AsjuCmxWrte2TNGSYuCeCq+0Q=="],
|
||||
|
||||
"lodash.snakecase": ["lodash.snakecase@4.1.1", "", {}, "sha512-QZ1d4xoBHYUeuouhEq3lk3Uq7ldgyFXGBhg04+oRLnIz8o9T65Eh+8YdroUwn846zchkA9yDsDl5CVVaV2nqYw=="],
|
||||
|
||||
"magic-bytes.js": ["magic-bytes.js@1.13.0", "", {}, "sha512-afO2mnxW7GDTXMm5/AoN1WuOcdoKhtgXjIvHmobqTD1grNplhGdv3PFOyjCVmrnOZBIT/gD/koDKpYG+0mvHcg=="],
|
||||
|
||||
"matcher": ["matcher@3.0.0", "", { "dependencies": { "escape-string-regexp": "^4.0.0" } }, "sha512-OkeDaAZ/bQCxeFAozM55PKcKU0yJMPGifLwV4Qgjitu+5MoAfSQN4lsLJeXZ1b8w0x+/Emda6MZgXS1jvsapng=="],
|
||||
|
||||
"ms": ["ms@2.1.3", "", {}, "sha512-6FlzubTLZG3J2a/NVCAleEhjzq5oxgHyaCU9yYXvcLsvoVaHJq/s5xXI6/XXP6tz7R9xAOtHnSO/tXtF3WRTlA=="],
|
||||
|
||||
"object-keys": ["object-keys@1.1.1", "", {}, "sha512-NuAESUOUMrlIXOfHKzD6bpPu3tYt3xvjNdRIQ+FeT0lNb4K8WR70CaDxhuNguS2XG+GjkyMwOzsN5ZktImfhLA=="],
|
||||
|
||||
"onnxruntime-common": ["onnxruntime-common@1.24.3", "", {}, "sha512-GeuPZO6U/LBJXvwdaqHbuUmoXiEdeCjWi/EG7Y1HNnDwJYuk6WUbNXpF6luSUY8yASul3cmUlLGrCCL1ZgVXqA=="],
|
||||
|
||||
"onnxruntime-node": ["onnxruntime-node@1.24.3", "", { "dependencies": { "adm-zip": "^0.5.16", "global-agent": "^3.0.0", "onnxruntime-common": "1.24.3" }, "os": [ "linux", "win32", "darwin", ] }, "sha512-JH7+czbc8ALA819vlTgcV+Q214/+VjGeBHDjX81+ZCD0PCVCIFGFNtT0V4sXG/1JXypKPgScQcB3ij/hk3YnTg=="],
|
||||
|
||||
"opusscript": ["opusscript@0.1.1", "", {}, "sha512-mL0fZZOUnXdZ78woRXp18lApwpp0lF5tozJOD1Wut0dgrA9WuQTgSels/CSmFleaAZrJi/nci5KOVtbuxeWoQA=="],
|
||||
|
||||
"parse-cache-control": ["parse-cache-control@1.0.1", "", {}, "sha512-60zvsJReQPX5/QP0Kzfd/VrpjScIQ7SHBW6bFCYfEP+fp0Eppr1SHhIO5nd1PjZtvclzSzES9D/p5nFJurwfWg=="],
|
||||
|
||||
"prism-media": ["prism-media@1.3.5", "", { "peerDependencies": { "@discordjs/opus": ">=0.8.0 <1.0.0", "ffmpeg-static": "^5.0.2 || ^4.2.7 || ^3.0.0 || ^2.4.0", "node-opus": "^0.3.3", "opusscript": "^0.0.8" }, "optionalPeers": ["@discordjs/opus", "ffmpeg-static", "node-opus", "opusscript"] }, "sha512-IQdl0Q01m4LrkN1EGIE9lphov5Hy7WWlH6ulf5QdGePLlPas9p2mhgddTEHrlaXYjjFToM1/rWuwF37VF4taaA=="],
|
||||
|
||||
"progress": ["progress@2.0.3", "", {}, "sha512-7PiHtLll5LdnKIMw100I+8xJXR5gW2QwWYkT6iJva0bXitZKa/XMrSbdmg3r2Xnaidz9Qumd0VPaMrZlF9V9sA=="],
|
||||
|
||||
"readable-stream": ["readable-stream@3.6.2", "", { "dependencies": { "inherits": "^2.0.3", "string_decoder": "^1.1.1", "util-deprecate": "^1.0.1" } }, "sha512-9u/sniCrY3D5WdsERHzHE4G2YCXqoG5FTHUiCC4SIbr6XcLZBY05ya9EKjYek9O5xOAwjGq+1JdGBAS7Q9ScoA=="],
|
||||
|
||||
"roarr": ["roarr@2.15.4", "", { "dependencies": { "boolean": "^3.0.1", "detect-node": "^2.0.4", "globalthis": "^1.0.1", "json-stringify-safe": "^5.0.1", "semver-compare": "^1.0.0", "sprintf-js": "^1.1.2" } }, "sha512-CHhPh+UNHD2GTXNYhPWLnU8ONHdI+5DI+4EYIAOaiD63rHeYlZvyh8P+in5999TTSFgUYuKUAjzRI4mdh/p+2A=="],
|
||||
|
||||
"safe-buffer": ["safe-buffer@5.2.1", "", {}, "sha512-rp3So07KcdmmKbGvgaNxQSJr7bGVSVk5S9Eq1F+ppbRo70+YeaDxkw5Dd8NPN+GD6bjnYm2VuPuCXmpuYvmCXQ=="],
|
||||
|
||||
"semver": ["semver@7.7.4", "", { "bin": { "semver": "bin/semver.js" } }, "sha512-vFKC2IEtQnVhpT78h1Yp8wzwrf8CM+MzKMHGJZfBtzhZNycRFnXsHk6E5TxIkkMsgNS7mdX3AGB7x2QM2di4lA=="],
|
||||
|
||||
"semver-compare": ["semver-compare@1.0.0", "", {}, "sha512-YM3/ITh2MJ5MtzaM429anh+x2jiLVjqILF4m4oyQB18W7Ggea7BfqdH/wGMK7dDiMghv/6WG7znWMwUDzJiXow=="],
|
||||
|
||||
"serialize-error": ["serialize-error@7.0.1", "", { "dependencies": { "type-fest": "^0.13.1" } }, "sha512-8I8TjW5KMOKsZQTvoxjuSIa7foAwPWGOts+6o7sgjz41/qMD9VQHEDxi6PBvK2l0MXUmqZyNpUK+T2tQaaElvw=="],
|
||||
|
||||
"sprintf-js": ["sprintf-js@1.1.3", "", {}, "sha512-Oo+0REFV59/rz3gfJNKQiBlwfHaSESl1pcGyABQsnnIfWOFt6JNj5gCog2U6MLZ//IGYD+nA8nI+mTShREReaA=="],
|
||||
|
||||
"string_decoder": ["string_decoder@1.3.0", "", { "dependencies": { "safe-buffer": "~5.2.0" } }, "sha512-hkRX8U1WjJFd8LsDJ2yQ/wWWxaopEsABU1XfkM8A+j0+85JAGppt16cr1Whg6KIbb4okU6Mql6BOj+uup/wKeA=="],
|
||||
|
||||
"ts-mixer": ["ts-mixer@6.0.4", "", {}, "sha512-ufKpbmrugz5Aou4wcr5Wc1UUFWOLhq+Fm6qa6P0w0K5Qw2yhaUoiWszhCVuNQyNwrlGiscHOmqYoAox1PtvgjA=="],
|
||||
|
||||
"tslib": ["tslib@2.8.1", "", {}, "sha512-oJFu94HQb+KVduSUQL7wnpmqnfmLsOA/nAh6b6EH0wCEoK0/mPeXU6c3wKDV83MkOuHPRHtSXKKU99IBazS/2w=="],
|
||||
|
||||
"type-fest": ["type-fest@0.13.1", "", {}, "sha512-34R7HTnG0XIJcBSn5XhDd7nNFPRcXYRZrBB2O2jdKqYODldSzBAqzsWoZYYvduky73toYS/ESqxPvkDf/F0XMg=="],
|
||||
|
||||
"typedarray": ["typedarray@0.0.6", "", {}, "sha512-/aCDEGatGvZ2BIk+HmLf4ifCJFwvKFNb9/JeZPMulfgFracn9QFcAf5GO8B/mweUjSoblS5In0cWhqpfs/5PQA=="],
|
||||
|
||||
"typescript": ["typescript@6.0.3", "", { "bin": { "tsc": "bin/tsc", "tsserver": "bin/tsserver" } }, "sha512-y2TvuxSZPDyQakkFRPZHKFm+KKVqIisdg9/CZwm9ftvKXLP8NRWj38/ODjNbr43SsoXqNuAisEf1GdCxqWcdBw=="],
|
||||
|
||||
"undici": ["undici@6.24.1", "", {}, "sha512-sC+b0tB1whOCzbtlx20fx3WgCXwkW627p4EA9uM+/tNNPkSS+eSEld6pAs9nDv7WbY1UUljBMYPtu9BCOrCWKA=="],
|
||||
|
||||
"undici-types": ["undici-types@7.19.2", "", {}, "sha512-qYVnV5OEm2AW8cJMCpdV20CDyaN3g0AjDlOGf1OW4iaDEx8MwdtChUp4zu4H0VP3nDRF/8RKWH+IPp9uW0YGZg=="],
|
||||
|
||||
"util-deprecate": ["util-deprecate@1.0.2", "", {}, "sha512-EPD5q1uXyFxJpCrLnCc1nHnq3gOa6DZBocAIiI2TaSCA7VCJ1UJDMagCzIkXNsUYfD1daK//LTEQ8xiIbrHtcw=="],
|
||||
|
||||
"ws": ["ws@8.20.0", "", { "peerDependencies": { "bufferutil": "^4.0.1", "utf-8-validate": ">=5.0.2" }, "optionalPeers": ["bufferutil", "utf-8-validate"] }, "sha512-sAt8BhgNbzCtgGbt2OxmpuryO63ZoDk/sqaB/znQm94T4fCEsy/yV+7CdC1kJhOU9lboAEU7R3kquuycDoibVA=="],
|
||||
|
||||
"zod": ["zod@4.3.6", "", {}, "sha512-rftlrkhHZOcjDwkGlnUtZZkvaPHCsDATp4pGpuOOMDaTdDDXF91wuVDJoWoPsKX/3YPQ5fHuF3STjcYyKr+Qhg=="],
|
||||
|
||||
"@discordjs/rest/@discordjs/collection": ["@discordjs/collection@2.1.1", "", {}, "sha512-LiSusze9Tc7qF03sLCujF5iZp7K+vRNEDBZ86FT9aQAv3vxMLihUvKvpsCWiQ2DJq1tVckopKm1rxomgNUc9hg=="],
|
||||
|
||||
"@discordjs/rest/@sapphire/snowflake": ["@sapphire/snowflake@3.5.5", "", {}, "sha512-xzvBr1Q1c4lCe7i6sRnrofxeO1QTP/LKQ6A6qy0iB4x5yfiSfARMEQEghojzTNALDTcv8En04qYNIco9/K9eZQ=="],
|
||||
|
||||
"@discordjs/ws/@discordjs/collection": ["@discordjs/collection@2.1.1", "", {}, "sha512-LiSusze9Tc7qF03sLCujF5iZp7K+vRNEDBZ86FT9aQAv3vxMLihUvKvpsCWiQ2DJq1tVckopKm1rxomgNUc9hg=="],
|
||||
|
||||
"http-response-object/@types/node": ["@types/node@10.17.60", "", {}, "sha512-F0KIgDJfy2nA3zMLmWGKxcH2ZVEtCZXHHdOQs2gSaQ27+lNeEfGxzkIw90aXswATX7AZ33tahPbzy6KAfUreVw=="],
|
||||
"zod": ["zod@4.4.2", "", {}, "sha512-IynmDyxsEsb9RKzO3J9+4SxXnl2FTFSzNBaKKaMV6tsSk0rw9gYw9gs+JFCq/qk2LCZ78KDwyj+Z289TijSkUw=="],
|
||||
}
|
||||
}
|
||||
|
||||
13
docker.cmd
Normal file
13
docker.cmd
Normal file
@@ -0,0 +1,13 @@
|
||||
@echo off
|
||||
setlocal
|
||||
|
||||
set "DOCKER_EXE=C:\Program Files\Docker\Docker\resources\bin\docker.exe"
|
||||
|
||||
if exist "%DOCKER_EXE%" (
|
||||
"%DOCKER_EXE%" %*
|
||||
exit /b %ERRORLEVEL%
|
||||
)
|
||||
|
||||
echo Docker executable not found: "%DOCKER_EXE%"
|
||||
echo Install Docker Desktop or update this shim path.
|
||||
exit /b 1
|
||||
22
docker/melotts/Dockerfile
Normal file
22
docker/melotts/Dockerfile
Normal file
@@ -0,0 +1,22 @@
|
||||
FROM python:3.11-slim
|
||||
|
||||
ENV PYTHONUNBUFFERED=1
|
||||
|
||||
WORKDIR /opt/realtime-voice-bot
|
||||
|
||||
RUN apt-get update && apt-get install -y --no-install-recommends \
|
||||
build-essential \
|
||||
git \
|
||||
libsndfile1 \
|
||||
&& rm -rf /var/lib/apt/lists/*
|
||||
|
||||
RUN git clone https://github.com/myshell-ai/MeloTTS.git /opt/MeloTTS
|
||||
RUN pip install --no-cache-dir -e /opt/MeloTTS
|
||||
RUN pip install --no-cache-dir python-mecab-ko python-mecab-ko-dic
|
||||
RUN python -m unidic download
|
||||
RUN python /opt/MeloTTS/melo/init_downloads.py
|
||||
|
||||
COPY melo_tts_cli.py /opt/realtime-voice-bot/melo_tts_cli.py
|
||||
COPY melo_tts_worker.py /opt/realtime-voice-bot/melo_tts_worker.py
|
||||
|
||||
ENTRYPOINT ["python", "/opt/realtime-voice-bot/melo_tts_cli.py"]
|
||||
42
docker/melotts/melo_tts_cli.py
Normal file
42
docker/melotts/melo_tts_cli.py
Normal file
@@ -0,0 +1,42 @@
|
||||
import argparse
|
||||
from pathlib import Path
|
||||
|
||||
from melo.api import TTS
|
||||
|
||||
|
||||
def main() -> None:
|
||||
parser = argparse.ArgumentParser()
|
||||
parser.add_argument("--text", required=True)
|
||||
parser.add_argument("--output", required=True)
|
||||
parser.add_argument("--language", default="KR")
|
||||
parser.add_argument("--speaker", default="KR")
|
||||
parser.add_argument("--speed", type=float, default=1.0)
|
||||
parser.add_argument("--sdp-ratio", type=float, default=0.2)
|
||||
parser.add_argument("--noise-scale", type=float, default=0.6)
|
||||
parser.add_argument("--noise-scale-w", type=float, default=0.8)
|
||||
parser.add_argument("--device", default="cpu")
|
||||
args = parser.parse_args()
|
||||
|
||||
output_path = Path(args.output)
|
||||
output_path.parent.mkdir(parents=True, exist_ok=True)
|
||||
|
||||
model = TTS(language=args.language, device=args.device)
|
||||
speaker_ids = model.hps.data.spk2id
|
||||
|
||||
if args.speaker not in speaker_ids:
|
||||
supported = ", ".join(sorted(speaker_ids.keys()))
|
||||
raise SystemExit(f"지원하지 않는 speaker 입니다: {args.speaker}. 사용 가능: {supported}")
|
||||
|
||||
model.tts_to_file(
|
||||
args.text,
|
||||
speaker_ids[args.speaker],
|
||||
str(output_path),
|
||||
speed=args.speed,
|
||||
sdp_ratio=args.sdp_ratio,
|
||||
noise_scale=args.noise_scale,
|
||||
noise_scale_w=args.noise_scale_w,
|
||||
)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
122
docker/melotts/melo_tts_worker.py
Normal file
122
docker/melotts/melo_tts_worker.py
Normal file
@@ -0,0 +1,122 @@
|
||||
import contextlib
|
||||
import io
|
||||
import json
|
||||
import os
|
||||
import sys
|
||||
from pathlib import Path
|
||||
|
||||
from melo.api import TTS
|
||||
|
||||
|
||||
LANGUAGE = os.getenv("TTS_LANGUAGE", "KR")
|
||||
SPEAKER = os.getenv("TTS_SPEAKER", "KR")
|
||||
DEVICE = os.getenv("TTS_DEVICE", "cpu")
|
||||
SPEED = float(os.getenv("TTS_SPEED", "1.18"))
|
||||
SDP_RATIO = float(os.getenv("TTS_SDP_RATIO", "0.22"))
|
||||
NOISE_SCALE = float(os.getenv("TTS_NOISE_SCALE", "0.55"))
|
||||
NOISE_SCALE_W = float(os.getenv("TTS_NOISE_SCALE_W", "0.75"))
|
||||
|
||||
_MODEL = None
|
||||
_SPEAKER_ID = None
|
||||
|
||||
|
||||
def silence_stdout(func):
|
||||
def wrapper(*args, **kwargs):
|
||||
with contextlib.redirect_stdout(io.StringIO()):
|
||||
return func(*args, **kwargs)
|
||||
|
||||
return wrapper
|
||||
|
||||
|
||||
@silence_stdout
|
||||
def load_model():
|
||||
global _MODEL
|
||||
global _SPEAKER_ID
|
||||
|
||||
if _MODEL is not None and _SPEAKER_ID is not None:
|
||||
return _MODEL, _SPEAKER_ID
|
||||
|
||||
model = TTS(language=LANGUAGE, device=DEVICE)
|
||||
speaker_ids = model.hps.data.spk2id
|
||||
|
||||
if SPEAKER not in speaker_ids:
|
||||
supported = ", ".join(sorted(speaker_ids.keys()))
|
||||
raise RuntimeError(f"지원하지 않는 speaker 입니다: {SPEAKER}. 사용 가능: {supported}")
|
||||
|
||||
_MODEL = model
|
||||
_SPEAKER_ID = speaker_ids[SPEAKER]
|
||||
return _MODEL, _SPEAKER_ID
|
||||
|
||||
|
||||
def handle_ping():
|
||||
model, speaker_id = load_model()
|
||||
return {
|
||||
"language": LANGUAGE,
|
||||
"speaker": SPEAKER,
|
||||
"speaker_id": speaker_id,
|
||||
"device": DEVICE,
|
||||
"speed": SPEED,
|
||||
"sdp_ratio": SDP_RATIO,
|
||||
"noise_scale": NOISE_SCALE,
|
||||
"noise_scale_w": NOISE_SCALE_W,
|
||||
"speaker_count": len(model.hps.data.spk2id),
|
||||
}
|
||||
|
||||
|
||||
@silence_stdout
|
||||
def handle_synthesize(params):
|
||||
text = str(params["text"]).strip()
|
||||
output_path = Path(str(params["output_path"]))
|
||||
output_path.parent.mkdir(parents=True, exist_ok=True)
|
||||
|
||||
model, speaker_id = load_model()
|
||||
model.tts_to_file(
|
||||
text,
|
||||
speaker_id,
|
||||
str(output_path),
|
||||
speed=SPEED,
|
||||
sdp_ratio=SDP_RATIO,
|
||||
noise_scale=NOISE_SCALE,
|
||||
noise_scale_w=NOISE_SCALE_W,
|
||||
)
|
||||
|
||||
return {
|
||||
"output_path": str(output_path),
|
||||
"text_length": len(text),
|
||||
}
|
||||
|
||||
|
||||
def main():
|
||||
for raw_line in sys.stdin:
|
||||
line = raw_line.strip()
|
||||
if not line:
|
||||
continue
|
||||
|
||||
try:
|
||||
payload = json.loads(line)
|
||||
request_id = str(payload["id"])
|
||||
method = payload["method"]
|
||||
params = payload.get("params", {})
|
||||
|
||||
if method == "ping":
|
||||
result = handle_ping()
|
||||
elif method == "synthesize":
|
||||
result = handle_synthesize(params)
|
||||
else:
|
||||
raise RuntimeError(f"알 수 없는 method 입니다: {method}")
|
||||
|
||||
sys.stdout.write(json.dumps({"id": request_id, "result": result}, ensure_ascii=False) + "\n")
|
||||
sys.stdout.flush()
|
||||
except Exception as error:
|
||||
request_id = "unknown"
|
||||
try:
|
||||
request_id = str(payload.get("id", "unknown"))
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
sys.stdout.write(json.dumps({"id": request_id, "error": str(error)}, ensure_ascii=False) + "\n")
|
||||
sys.stdout.flush()
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
31
package.json
31
package.json
@@ -4,13 +4,18 @@
|
||||
"private": true,
|
||||
"type": "module",
|
||||
"scripts": {
|
||||
"dev": "bun --watch src/index.ts",
|
||||
"start": "bun src/index.ts discord",
|
||||
"start:discord": "bun src/index.ts discord",
|
||||
"start:local": "bun src/index.ts local",
|
||||
"setup:local-ai": "bun src/setup-local-ai.ts",
|
||||
"devices": "bun src/index.ts local-devices",
|
||||
"audio:devices": "bun src/index.ts local-devices",
|
||||
"setup": "bun src/setup.ts",
|
||||
"setup:all": "bun run setup",
|
||||
"setup:stt": "bun src/setup-python.ts",
|
||||
"setup:llm": "bun src/setup-llm.ts",
|
||||
"setup:tts": "bun src/setup-tts.ts",
|
||||
"setup:python": "bun run setup:stt",
|
||||
"test:stt": "bun src/index.ts test-stt",
|
||||
"test:sttllm": "bun src/index.ts test-sttllm",
|
||||
"test:all": "bun src/index.ts test-all",
|
||||
"test:llm": "bun src/index.ts test-llm",
|
||||
"test:tts": "bun src/index.ts test-tts",
|
||||
"devices": "bun src/index.ts devices",
|
||||
"check": "tsc --noEmit",
|
||||
"build": "tsc -p tsconfig.json"
|
||||
},
|
||||
@@ -19,21 +24,11 @@
|
||||
"node": ">=22.12.0"
|
||||
},
|
||||
"dependencies": {
|
||||
"@discordjs/voice": "^0.19.2",
|
||||
"avr-vad": "^1.0.10",
|
||||
"discord.js": "^14.26.3",
|
||||
"dotenv": "^17.4.2",
|
||||
"ffmpeg-static": "^5.3.0",
|
||||
"opusscript": "^0.1.1",
|
||||
"prism-media": "^1.3.5",
|
||||
"zod": "^4.3.6"
|
||||
},
|
||||
"devDependencies": {
|
||||
"@types/node": "^25.6.0",
|
||||
"typescript": "^6.0.3"
|
||||
},
|
||||
"trustedDependencies": [
|
||||
"onnxruntime-node",
|
||||
"ffmpeg-static"
|
||||
]
|
||||
}
|
||||
}
|
||||
|
||||
14
prompts/assistant.md
Normal file
14
prompts/assistant.md
Normal file
@@ -0,0 +1,14 @@
|
||||
너는 한국어로 짧고 자연스럽게 답하는 로컬 음성 비서다.
|
||||
|
||||
규칙:
|
||||
- 반드시 한국어로만 답한다.
|
||||
- 한자, 중국어, 일본어, 아랍어, 키릴 문자, 기타 외국 문자, 이모지 사용 금지.
|
||||
- 영어 단어는 꼭 필요한 기술명 외에는 피하고 자연스러운 한국어 표현으로 바꾼다.
|
||||
- 답변은 TTS가 읽기 쉽도록 짧고 단순한 문장으로 만든다.
|
||||
- 기본적으로 1~3문장으로 답한다.
|
||||
- 불필요한 장식, 불릿, 번호 목록, 괄호 남용, 과한 감탄 표현은 피한다.
|
||||
- 사용자의 말에 바로 답하고, 군더더기 없이 핵심만 말한다.
|
||||
- 정확한 시간, 설정 확인, 계산이 필요하면 도구를 우선 사용한다.
|
||||
- 최신 정보, 오늘/최근 정보, 뉴스, 검색 요청, 사실 확인, 외부 웹페이지 내용이 필요한 경우에만 `web_search` 와 `fetch_url` 을 사용한다.
|
||||
- 내부 지식만으로 충분한 일반 대화에는 웹 도구를 쓰지 않는다.
|
||||
- 도구가 필요한 작업이 시작되면 결과 전에 짧은 진행 메시지를 출력할 수 있다.
|
||||
10
prompts/reply-gate.md
Normal file
10
prompts/reply-gate.md
Normal file
@@ -0,0 +1,10 @@
|
||||
다음 텍스트에 로컬 비서가 실제로 대답해야 하는지 판정한다.
|
||||
|
||||
판정 기준:
|
||||
- 의미 없는 감탄사, 중얼거림, 문맥 없는 짧은 파편, 노래 가사 조각, 잡음성 문장은 `should_reply=false`
|
||||
- 질문, 요청, 확인, 명령, 대화 시도는 `should_reply=true`
|
||||
- 최신 정보나 사실 확인, 검색이 필요해 보이면 `likely_needs_lookup=true`
|
||||
- reason 은 아주 짧게 쓴다
|
||||
|
||||
반드시 JSON만 출력:
|
||||
{"should_reply":true,"likely_needs_lookup":false,"reason":"짧게"}
|
||||
8
prompts/rewrite-korean.md
Normal file
8
prompts/rewrite-korean.md
Normal file
@@ -0,0 +1,8 @@
|
||||
다음 답변을 의미를 유지한 채 자연스러운 한국어로만 다시 쓴다.
|
||||
|
||||
규칙:
|
||||
- 한글, 숫자, 기본 문장부호 외 다른 문자 사용 금지
|
||||
- 이모지 사용 금지
|
||||
- 짧고 읽기 쉬운 문장으로 만든다
|
||||
- TTS가 읽기 쉽도록 불필요한 기호와 장식을 줄인다
|
||||
- 설명하지 말고 최종 답변 문장만 출력한다
|
||||
@@ -1,145 +0,0 @@
|
||||
import base64
|
||||
import json
|
||||
import os
|
||||
import sys
|
||||
import tempfile
|
||||
import traceback
|
||||
import wave
|
||||
|
||||
|
||||
os.environ.setdefault("PYTHONIOENCODING", "utf-8")
|
||||
|
||||
|
||||
def log(message: str) -> None:
|
||||
print(message, file=sys.stderr, flush=True)
|
||||
|
||||
|
||||
def write_response(request_id: int, ok: bool, result=None, error: str | None = None) -> None:
|
||||
payload = {
|
||||
"id": request_id,
|
||||
"ok": ok,
|
||||
}
|
||||
if ok:
|
||||
payload["result"] = result
|
||||
else:
|
||||
payload["error"] = error or "unknown error"
|
||||
|
||||
sys.stdout.write(json.dumps(payload, ensure_ascii=False) + "\n")
|
||||
sys.stdout.flush()
|
||||
|
||||
|
||||
def resolve_device() -> str:
|
||||
raw = os.environ.get("LOCAL_STT_DEVICE", "auto").strip().lower()
|
||||
if raw and raw != "auto":
|
||||
return raw
|
||||
|
||||
try:
|
||||
import ctranslate2
|
||||
|
||||
if ctranslate2.get_cuda_device_count() > 0:
|
||||
return "cuda"
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
return "cpu"
|
||||
|
||||
|
||||
def resolve_compute_type(device: str) -> str:
|
||||
raw = os.environ.get("LOCAL_STT_COMPUTE_TYPE", "auto").strip().lower()
|
||||
if raw and raw != "auto":
|
||||
return raw
|
||||
if device == "cuda":
|
||||
return "int8_float16"
|
||||
return "int8"
|
||||
|
||||
|
||||
class SttWorker:
|
||||
def __init__(self) -> None:
|
||||
from faster_whisper import WhisperModel
|
||||
|
||||
self.model_name = os.environ.get("LOCAL_STT_MODEL", "tiny").strip() or "tiny"
|
||||
self.device = resolve_device()
|
||||
self.compute_type = resolve_compute_type(self.device)
|
||||
self.beam_size = int(os.environ.get("LOCAL_STT_BEAM_SIZE", "1"))
|
||||
self.model = WhisperModel(
|
||||
self.model_name,
|
||||
device=self.device,
|
||||
compute_type=self.compute_type,
|
||||
)
|
||||
log(
|
||||
f"local-stt ready model={self.model_name} device={self.device} compute={self.compute_type} beam={self.beam_size}"
|
||||
)
|
||||
|
||||
def transcribe(self, audio_base64: str, language: str | None) -> str:
|
||||
pcm_bytes = base64.b64decode(audio_base64)
|
||||
temp_path = ""
|
||||
|
||||
try:
|
||||
with tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as handle:
|
||||
temp_path = handle.name
|
||||
|
||||
with wave.open(temp_path, "wb") as wav_file:
|
||||
wav_file.setnchannels(1)
|
||||
wav_file.setsampwidth(2)
|
||||
wav_file.setframerate(16000)
|
||||
wav_file.writeframes(pcm_bytes)
|
||||
|
||||
segments, _info = self.model.transcribe(
|
||||
temp_path,
|
||||
language=language,
|
||||
beam_size=self.beam_size,
|
||||
best_of=1,
|
||||
condition_on_previous_text=False,
|
||||
vad_filter=False,
|
||||
without_timestamps=True,
|
||||
temperature=0.0,
|
||||
)
|
||||
return " ".join(segment.text.strip() for segment in segments if segment.text.strip()).strip()
|
||||
finally:
|
||||
if temp_path:
|
||||
try:
|
||||
os.unlink(temp_path)
|
||||
except OSError:
|
||||
pass
|
||||
|
||||
|
||||
def main() -> int:
|
||||
try:
|
||||
worker = SttWorker()
|
||||
except Exception as exc:
|
||||
log("failed to initialize local STT worker")
|
||||
log("run `bun run setup:local-ai` first if dependencies are missing")
|
||||
log("".join(traceback.format_exception(exc)))
|
||||
return 1
|
||||
|
||||
for line in sys.stdin:
|
||||
line = line.strip()
|
||||
if not line:
|
||||
continue
|
||||
|
||||
try:
|
||||
request = json.loads(line)
|
||||
request_id = int(request["id"])
|
||||
method = request["method"]
|
||||
params = request.get("params", {})
|
||||
|
||||
if method == "ping":
|
||||
write_response(request_id, True, {"ready": True})
|
||||
continue
|
||||
if method != "transcribe":
|
||||
raise ValueError(f"unsupported method: {method}")
|
||||
|
||||
text = worker.transcribe(
|
||||
audio_base64=str(params.get("audio_base64", "")),
|
||||
language=str(params.get("language") or "").strip() or None,
|
||||
)
|
||||
write_response(request_id, True, {"text": text})
|
||||
except Exception as exc:
|
||||
error_text = "".join(traceback.format_exception_only(type(exc), exc)).strip()
|
||||
write_response(request_id, False, error=error_text)
|
||||
|
||||
return 0
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
raise SystemExit(main())
|
||||
@@ -1,125 +0,0 @@
|
||||
import base64
|
||||
import json
|
||||
import os
|
||||
import sys
|
||||
import tempfile
|
||||
import traceback
|
||||
|
||||
|
||||
os.environ.setdefault("PYTHONIOENCODING", "utf-8")
|
||||
|
||||
|
||||
def log(message: str) -> None:
|
||||
print(message, file=sys.stderr, flush=True)
|
||||
|
||||
|
||||
def write_response(request_id: int, ok: bool, result=None, error: str | None = None) -> None:
|
||||
payload = {
|
||||
"id": request_id,
|
||||
"ok": ok,
|
||||
}
|
||||
if ok:
|
||||
payload["result"] = result
|
||||
else:
|
||||
payload["error"] = error or "unknown error"
|
||||
|
||||
sys.stdout.write(json.dumps(payload, ensure_ascii=False) + "\n")
|
||||
sys.stdout.flush()
|
||||
|
||||
|
||||
class TtsWorker:
|
||||
def __init__(self) -> None:
|
||||
from melo.api import TTS
|
||||
|
||||
self.language = os.environ.get("LOCAL_TTS_LANGUAGE", "KR").strip() or "KR"
|
||||
self.speaker_key = os.environ.get("LOCAL_TTS_SPEAKER", "KR").strip() or "KR"
|
||||
self.device = os.environ.get("LOCAL_TTS_DEVICE", "auto").strip() or "auto"
|
||||
self.speed = float(os.environ.get("LOCAL_TTS_SPEED", "1.12"))
|
||||
|
||||
self.model = TTS(language=self.language, device=self.device)
|
||||
speaker_ids = self.model.hps.data.spk2id
|
||||
self.speaker_id = speaker_ids.get(self.speaker_key)
|
||||
|
||||
if self.speaker_id is None:
|
||||
normalized = self.speaker_key.upper()
|
||||
self.speaker_id = speaker_ids.get(normalized)
|
||||
|
||||
if self.speaker_id is None:
|
||||
self.speaker_id = next(iter(speaker_ids.values()))
|
||||
|
||||
log(
|
||||
f"local-tts ready language={self.language} speaker={self.speaker_key} device={self.device} speed={self.speed}"
|
||||
)
|
||||
|
||||
def synthesize(self, text: str) -> bytes:
|
||||
temp_path = ""
|
||||
|
||||
try:
|
||||
with tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as handle:
|
||||
temp_path = handle.name
|
||||
|
||||
self.model.tts_to_file(
|
||||
text,
|
||||
self.speaker_id,
|
||||
temp_path,
|
||||
speed=self.speed,
|
||||
quiet=True,
|
||||
)
|
||||
|
||||
with open(temp_path, "rb") as handle:
|
||||
return handle.read()
|
||||
finally:
|
||||
if temp_path:
|
||||
try:
|
||||
os.unlink(temp_path)
|
||||
except OSError:
|
||||
pass
|
||||
|
||||
|
||||
def main() -> int:
|
||||
try:
|
||||
worker = TtsWorker()
|
||||
except Exception as exc:
|
||||
log("failed to initialize local TTS worker")
|
||||
log("run `bun run setup:local-ai` first if dependencies are missing")
|
||||
log("".join(traceback.format_exception(exc)))
|
||||
return 1
|
||||
|
||||
for line in sys.stdin:
|
||||
line = line.strip()
|
||||
if not line:
|
||||
continue
|
||||
|
||||
try:
|
||||
request = json.loads(line)
|
||||
request_id = int(request["id"])
|
||||
method = request["method"]
|
||||
params = request.get("params", {})
|
||||
|
||||
if method == "ping":
|
||||
write_response(request_id, True, {"ready": True})
|
||||
continue
|
||||
if method != "synthesize":
|
||||
raise ValueError(f"unsupported method: {method}")
|
||||
|
||||
text = str(params.get("text", "")).strip()
|
||||
if not text:
|
||||
raise ValueError("text is empty")
|
||||
|
||||
audio = worker.synthesize(text)
|
||||
write_response(
|
||||
request_id,
|
||||
True,
|
||||
{
|
||||
"wav_base64": base64.b64encode(audio).decode("ascii"),
|
||||
},
|
||||
)
|
||||
except Exception as exc:
|
||||
error_text = "".join(traceback.format_exception_only(type(exc), exc)).strip()
|
||||
write_response(request_id, False, error=error_text)
|
||||
|
||||
return 0
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
raise SystemExit(main())
|
||||
186
python/loopback_stt_worker.py
Normal file
186
python/loopback_stt_worker.py
Normal file
@@ -0,0 +1,186 @@
|
||||
import base64
|
||||
import json
|
||||
import os
|
||||
import sys
|
||||
import site
|
||||
import traceback
|
||||
from typing import Any
|
||||
from pathlib import Path
|
||||
|
||||
import numpy as np
|
||||
from faster_whisper import WhisperModel
|
||||
|
||||
|
||||
def configure_windows_dll_search_paths() -> list[str]:
|
||||
if sys.platform != "win32":
|
||||
return []
|
||||
|
||||
candidates: list[Path] = []
|
||||
executable_dir = Path(sys.executable).resolve().parent
|
||||
venv_root = executable_dir.parent
|
||||
candidates.extend(
|
||||
[
|
||||
venv_root / "Lib" / "site-packages" / "nvidia" / "cublas" / "bin",
|
||||
venv_root / "Lib" / "site-packages" / "nvidia" / "cudnn" / "bin",
|
||||
]
|
||||
)
|
||||
|
||||
for package_path in site.getsitepackages():
|
||||
base = Path(package_path)
|
||||
candidates.extend(
|
||||
[
|
||||
base / "nvidia" / "cublas" / "bin",
|
||||
base / "nvidia" / "cudnn" / "bin",
|
||||
]
|
||||
)
|
||||
|
||||
added: list[str] = []
|
||||
seen: set[str] = set()
|
||||
for candidate in candidates:
|
||||
normalized = str(candidate.resolve())
|
||||
if normalized in seen:
|
||||
continue
|
||||
seen.add(normalized)
|
||||
if not candidate.exists():
|
||||
continue
|
||||
|
||||
os.add_dll_directory(normalized)
|
||||
if normalized not in os.environ.get("PATH", ""):
|
||||
os.environ["PATH"] = normalized + os.pathsep + os.environ.get("PATH", "")
|
||||
added.append(normalized)
|
||||
|
||||
return added
|
||||
|
||||
|
||||
os.environ.setdefault("HF_HUB_DISABLE_SYMLINKS_WARNING", "1")
|
||||
CONFIGURED_DLL_PATHS = configure_windows_dll_search_paths()
|
||||
if CONFIGURED_DLL_PATHS:
|
||||
print(
|
||||
f"configured CUDA DLL search paths: {', '.join(CONFIGURED_DLL_PATHS)}",
|
||||
file=sys.stderr,
|
||||
flush=True,
|
||||
)
|
||||
|
||||
|
||||
def resolve_model() -> WhisperModel:
|
||||
model_name = os.environ.get("WHISPER_MODEL", "large-v3-turbo")
|
||||
requested_device = os.environ.get("WHISPER_DEVICE", "auto")
|
||||
requested_compute = os.environ.get("WHISPER_COMPUTE_TYPE", "auto")
|
||||
|
||||
attempts: list[tuple[str, str]] = []
|
||||
if requested_device == "auto":
|
||||
if requested_compute == "auto":
|
||||
attempts.extend(
|
||||
[
|
||||
("cuda", "float16"),
|
||||
("cuda", "int8_float16"),
|
||||
("cpu", "int8"),
|
||||
("cpu", "float32"),
|
||||
]
|
||||
)
|
||||
else:
|
||||
attempts.extend(
|
||||
[
|
||||
("cuda", requested_compute),
|
||||
("cpu", requested_compute),
|
||||
]
|
||||
)
|
||||
else:
|
||||
if requested_compute == "auto":
|
||||
compute = "float16" if requested_device == "cuda" else "int8"
|
||||
else:
|
||||
compute = requested_compute
|
||||
attempts.append((requested_device, compute))
|
||||
|
||||
last_error: Exception | None = None
|
||||
for device, compute_type in attempts:
|
||||
try:
|
||||
model = WhisperModel(model_name, device=device, compute_type=compute_type)
|
||||
setattr(model, "_resolved_device", device)
|
||||
setattr(model, "_resolved_compute_type", compute_type)
|
||||
return model
|
||||
except Exception as error: # noqa: BLE001
|
||||
last_error = error
|
||||
|
||||
assert last_error is not None
|
||||
raise last_error
|
||||
|
||||
|
||||
MODEL = resolve_model()
|
||||
LANGUAGE = os.environ.get("WHISPER_LANGUAGE", "ko")
|
||||
BEAM_SIZE = int(os.environ.get("WHISPER_BEAM_SIZE", "1"))
|
||||
|
||||
|
||||
def write(payload: dict[str, Any]) -> None:
|
||||
sys.stdout.write(json.dumps(payload, ensure_ascii=False) + "\n")
|
||||
sys.stdout.flush()
|
||||
|
||||
|
||||
def transcribe_pcm16_base64(pcm16_base64: str) -> str:
|
||||
audio_bytes = base64.b64decode(pcm16_base64)
|
||||
audio = np.frombuffer(audio_bytes, dtype=np.int16).astype(np.float32) / 32768.0
|
||||
|
||||
segments, _info = MODEL.transcribe(
|
||||
audio,
|
||||
language=LANGUAGE,
|
||||
task="transcribe",
|
||||
beam_size=BEAM_SIZE,
|
||||
condition_on_previous_text=False,
|
||||
vad_filter=False,
|
||||
without_timestamps=True,
|
||||
word_timestamps=False,
|
||||
temperature=0.0,
|
||||
)
|
||||
|
||||
text_parts: list[str] = []
|
||||
for segment in segments:
|
||||
if segment.text:
|
||||
text_parts.append(segment.text.strip())
|
||||
return " ".join(part for part in text_parts if part).strip()
|
||||
|
||||
|
||||
for raw_line in sys.stdin:
|
||||
line = raw_line.strip()
|
||||
if not line:
|
||||
continue
|
||||
|
||||
request = json.loads(line)
|
||||
request_id = request["id"]
|
||||
method = request["method"]
|
||||
params = request.get("params", {})
|
||||
|
||||
try:
|
||||
if method == "ping":
|
||||
write(
|
||||
{
|
||||
"id": request_id,
|
||||
"result": {
|
||||
"model": os.environ.get("WHISPER_MODEL", "large-v3-turbo"),
|
||||
"device": getattr(MODEL, "_resolved_device", "unknown"),
|
||||
"compute_type": getattr(MODEL, "_resolved_compute_type", "unknown"),
|
||||
},
|
||||
}
|
||||
)
|
||||
continue
|
||||
|
||||
if method == "transcribe":
|
||||
text = transcribe_pcm16_base64(params["pcm16_base64"])
|
||||
write(
|
||||
{
|
||||
"id": request_id,
|
||||
"result": {
|
||||
"text": text,
|
||||
},
|
||||
}
|
||||
)
|
||||
continue
|
||||
|
||||
raise RuntimeError(f"unknown method: {method}")
|
||||
except Exception as error: # noqa: BLE001
|
||||
traceback.print_exc(file=sys.stderr)
|
||||
write(
|
||||
{
|
||||
"id": request_id,
|
||||
"error": f"{type(error).__name__}: {error}",
|
||||
}
|
||||
)
|
||||
3
python/requirements-windows.txt
Normal file
3
python/requirements-windows.txt
Normal file
@@ -0,0 +1,3 @@
|
||||
faster-whisper==1.2.1
|
||||
nvidia-cublas-cu12
|
||||
nvidia-cudnn-cu12>=9,<10
|
||||
@@ -1,2 +1 @@
|
||||
faster-whisper==1.2.1
|
||||
git+https://github.com/myshell-ai/MeloTTS.git@v0.1.2
|
||||
|
||||
138
src/audio/capture.ts
Normal file
138
src/audio/capture.ts
Normal file
@@ -0,0 +1,138 @@
|
||||
import { spawn, type ChildProcessByStdio } from "node:child_process";
|
||||
import process from "node:process";
|
||||
import type { Readable } from "node:stream";
|
||||
|
||||
import type { AppConfig } from "../config.js";
|
||||
import type { Logger } from "../logger.js";
|
||||
|
||||
export function printAudioDevices(): Promise<void> {
|
||||
if (process.platform === "win32") {
|
||||
return new Promise<void>((resolve, reject) => {
|
||||
const child = spawn("ffmpeg", ["-hide_banner", "-list_devices", "true", "-f", "dshow", "-i", "dummy"], {
|
||||
stdio: ["ignore", "ignore", "inherit"],
|
||||
});
|
||||
child.on("exit", (code) => {
|
||||
if (code === 0 || code === 1) {
|
||||
resolve();
|
||||
return;
|
||||
}
|
||||
reject(new Error(`ffmpeg exited with code ${code ?? "null"}`));
|
||||
});
|
||||
child.on("error", reject);
|
||||
});
|
||||
}
|
||||
|
||||
return new Promise<void>((resolve, reject) => {
|
||||
const pactl = spawn("pactl", ["list", "sources", "short"], {
|
||||
stdio: ["ignore", "inherit", "inherit"],
|
||||
});
|
||||
|
||||
pactl.on("exit", (code) => {
|
||||
if (code === 0) {
|
||||
resolve();
|
||||
return;
|
||||
}
|
||||
|
||||
const wpctl = spawn("wpctl", ["status", "-n"], {
|
||||
stdio: ["ignore", "inherit", "inherit"],
|
||||
});
|
||||
wpctl.on("exit", (wpctlCode) => {
|
||||
if (wpctlCode === 0) {
|
||||
resolve();
|
||||
return;
|
||||
}
|
||||
reject(new Error(`pactl exited with code ${code ?? "null"} and wpctl exited with code ${wpctlCode ?? "null"}`));
|
||||
});
|
||||
wpctl.on("error", reject);
|
||||
});
|
||||
|
||||
pactl.on("error", () => {
|
||||
const wpctl = spawn("wpctl", ["status", "-n"], {
|
||||
stdio: ["ignore", "inherit", "inherit"],
|
||||
});
|
||||
wpctl.on("exit", (code) => {
|
||||
if (code === 0) {
|
||||
resolve();
|
||||
return;
|
||||
}
|
||||
reject(new Error(`pactl, wpctl 둘 다 실행할 수 없습니다. code=${code ?? "null"}`));
|
||||
});
|
||||
wpctl.on("error", reject);
|
||||
});
|
||||
});
|
||||
}
|
||||
|
||||
export function spawnLoopbackCapture(
|
||||
config: AppConfig,
|
||||
logger: Logger,
|
||||
): ChildProcessByStdio<null, Readable, Readable> {
|
||||
if (!config.AUDIO_SOURCE) {
|
||||
throw new Error("AUDIO_SOURCE 설정이 필요합니다. 먼저 `bun run devices` 로 장치 이름을 확인하세요.");
|
||||
}
|
||||
|
||||
if (process.platform === "win32") {
|
||||
const args = [
|
||||
"-hide_banner",
|
||||
"-loglevel",
|
||||
"warning",
|
||||
"-fflags",
|
||||
"nobuffer",
|
||||
"-flags",
|
||||
"low_delay",
|
||||
"-f",
|
||||
"dshow",
|
||||
"-i",
|
||||
`audio=${config.AUDIO_SOURCE}`,
|
||||
"-ac",
|
||||
"1",
|
||||
"-ar",
|
||||
"16000",
|
||||
"-f",
|
||||
"s16le",
|
||||
"pipe:1",
|
||||
];
|
||||
|
||||
logger.info("Starting Windows loopback capture", {
|
||||
source: config.AUDIO_SOURCE,
|
||||
backend: "ffmpeg-dshow",
|
||||
});
|
||||
|
||||
return spawn("ffmpeg", args, {
|
||||
stdio: ["ignore", "pipe", "pipe"],
|
||||
});
|
||||
}
|
||||
|
||||
if (process.platform === "linux") {
|
||||
const args = [
|
||||
"-hide_banner",
|
||||
"-loglevel",
|
||||
"warning",
|
||||
"-fflags",
|
||||
"nobuffer",
|
||||
"-flags",
|
||||
"low_delay",
|
||||
"-f",
|
||||
"pulse",
|
||||
"-i",
|
||||
config.AUDIO_SOURCE,
|
||||
"-ac",
|
||||
"1",
|
||||
"-ar",
|
||||
"16000",
|
||||
"-f",
|
||||
"s16le",
|
||||
"pipe:1",
|
||||
];
|
||||
|
||||
logger.info("Starting Linux loopback capture", {
|
||||
source: config.AUDIO_SOURCE,
|
||||
backend: "ffmpeg-pulse",
|
||||
});
|
||||
|
||||
return spawn("ffmpeg", args, {
|
||||
stdio: ["ignore", "pipe", "pipe"],
|
||||
});
|
||||
}
|
||||
|
||||
throw new Error(`지원하지 않는 플랫폼입니다: ${process.platform}`);
|
||||
}
|
||||
@@ -1,59 +0,0 @@
|
||||
import { existsSync } from "node:fs";
|
||||
import { spawnSync } from "node:child_process";
|
||||
import process from "node:process";
|
||||
|
||||
import ffmpegStatic from "ffmpeg-static";
|
||||
|
||||
function firstExisting(paths: Array<string | null | undefined>): string | null {
|
||||
for (const candidate of paths) {
|
||||
if (candidate && existsSync(candidate)) {
|
||||
return candidate;
|
||||
}
|
||||
}
|
||||
return null;
|
||||
}
|
||||
|
||||
function findOnPath(): string | null {
|
||||
const locator = process.platform === "win32" ? "where" : "which";
|
||||
const binaryName = process.platform === "win32" ? "ffmpeg.exe" : "ffmpeg";
|
||||
const result = spawnSync(locator, [binaryName], {
|
||||
encoding: "utf8",
|
||||
});
|
||||
|
||||
if (result.status !== 0) {
|
||||
return null;
|
||||
}
|
||||
|
||||
const match = result.stdout
|
||||
.split(/\r?\n/)
|
||||
.map((line) => line.trim())
|
||||
.find((line) => line.length > 0 && existsSync(line));
|
||||
|
||||
return match ?? null;
|
||||
}
|
||||
|
||||
export function resolveFfmpegPath(): string | null {
|
||||
const staticPath = ffmpegStatic as unknown as string | null;
|
||||
return firstExisting([
|
||||
process.env.FFMPEG_PATH,
|
||||
process.env.FFMPEG_BIN,
|
||||
staticPath,
|
||||
findOnPath(),
|
||||
]);
|
||||
}
|
||||
|
||||
export function requireFfmpegPath(): string {
|
||||
const resolved = resolveFfmpegPath();
|
||||
if (resolved) {
|
||||
return resolved;
|
||||
}
|
||||
|
||||
throw new Error(
|
||||
[
|
||||
"ffmpeg를 찾지 못했습니다.",
|
||||
"1. `bun install` 재실행",
|
||||
"2. 안 되면 `bun pm trust ffmpeg-static` 후 다시 `bun install`",
|
||||
"3. 또는 시스템 ffmpeg를 설치해서 PATH에 추가",
|
||||
].join("\n"),
|
||||
);
|
||||
}
|
||||
@@ -1,456 +0,0 @@
|
||||
import { EventEmitter } from "node:events";
|
||||
|
||||
import prism from "prism-media";
|
||||
import { RealTimeVAD } from "avr-vad";
|
||||
import {
|
||||
AudioPlayerStatus,
|
||||
EndBehaviorType,
|
||||
NoSubscriberBehavior,
|
||||
VoiceConnectionStatus,
|
||||
createAudioPlayer,
|
||||
createAudioResource,
|
||||
entersState,
|
||||
joinVoiceChannel,
|
||||
StreamType,
|
||||
type AudioPlayer,
|
||||
type AudioReceiveStream,
|
||||
type VoiceConnection,
|
||||
} from "@discordjs/voice";
|
||||
import type { Client, Guild, VoiceBasedChannel } from "discord.js";
|
||||
|
||||
import type { AppConfig } from "../config.js";
|
||||
import { Logger } from "../logger.js";
|
||||
import { float32ToPcm16Buffer, int16ArrayToFloat32, Stereo48kToMono16kDownsampler, takeFrame } from "./pcm.js";
|
||||
import { ConversationMemory, type UserUtterance } from "../services/conversation.js";
|
||||
import type { LlmService } from "../services/llm.js";
|
||||
import type { SttService } from "../services/stt.js";
|
||||
import type { PreparedSpeechAudio, TtsService } from "../services/tts.js";
|
||||
|
||||
interface GuildVoiceSessionOptions {
|
||||
client: Client;
|
||||
config: AppConfig;
|
||||
logger: Logger;
|
||||
guild: Guild;
|
||||
voiceChannel: VoiceBasedChannel;
|
||||
textChannelId?: string;
|
||||
stt: SttService;
|
||||
tts: TtsService;
|
||||
llm: LlmService;
|
||||
}
|
||||
|
||||
interface SpeechJob {
|
||||
text: string;
|
||||
source: "assistant" | "manual";
|
||||
}
|
||||
|
||||
class UserAudioSession {
|
||||
private readonly downsampler = new Stereo48kToMono16kDownsampler();
|
||||
private readonly pendingSamples: number[] = [];
|
||||
private readonly vad: RealTimeVAD;
|
||||
private processing = Promise.resolve();
|
||||
|
||||
private constructor(
|
||||
private readonly logger: Logger,
|
||||
private readonly speakerId: string,
|
||||
private readonly speakerName: string,
|
||||
private readonly receiveStream: AudioReceiveStream,
|
||||
private readonly decoder: NodeJS.ReadWriteStream & { destroy: () => void },
|
||||
vad: RealTimeVAD,
|
||||
private readonly onSpeechEnd: (utterance: UserUtterance, audio: Float32Array) => void,
|
||||
) {
|
||||
this.vad = vad;
|
||||
}
|
||||
|
||||
static async create(options: {
|
||||
logger: Logger;
|
||||
speakerId: string;
|
||||
speakerName: string;
|
||||
receiveStream: AudioReceiveStream;
|
||||
decoder: NodeJS.ReadWriteStream & { destroy: () => void };
|
||||
onSpeechStart: () => void;
|
||||
onSpeechEnd: (utterance: UserUtterance, audio: Float32Array) => void;
|
||||
}): Promise<UserAudioSession> {
|
||||
const vadInstance = await RealTimeVAD.new({
|
||||
model: "v5",
|
||||
sampleRate: 16000,
|
||||
frameSamples: 1536,
|
||||
positiveSpeechThreshold: 0.55,
|
||||
negativeSpeechThreshold: 0.35,
|
||||
redemptionFrames: 8,
|
||||
preSpeechPadFrames: 2,
|
||||
minSpeechFrames: 3,
|
||||
onFrameProcessed: () => undefined,
|
||||
onVADMisfire: () => undefined,
|
||||
onSpeechStart: () => {
|
||||
options.onSpeechStart();
|
||||
},
|
||||
onSpeechRealStart: () => undefined,
|
||||
onSpeechEnd: (audio: Float32Array) => {
|
||||
options.onSpeechEnd(
|
||||
{
|
||||
speakerId: options.speakerId,
|
||||
speakerName: options.speakerName,
|
||||
text: "",
|
||||
},
|
||||
audio,
|
||||
);
|
||||
},
|
||||
});
|
||||
|
||||
const session = new UserAudioSession(
|
||||
options.logger,
|
||||
options.speakerId,
|
||||
options.speakerName,
|
||||
options.receiveStream,
|
||||
options.decoder,
|
||||
vadInstance,
|
||||
options.onSpeechEnd,
|
||||
);
|
||||
|
||||
session.decoder.on("data", (chunk: Buffer) => {
|
||||
session.pushPcmChunk(chunk);
|
||||
});
|
||||
|
||||
session.decoder.on("error", (error) => {
|
||||
options.logger.warn("PCM decoder error", options.speakerId, error);
|
||||
});
|
||||
|
||||
session.receiveStream.on("error", (error) => {
|
||||
options.logger.warn("Audio receive stream error", options.speakerId, error);
|
||||
});
|
||||
|
||||
return session;
|
||||
}
|
||||
|
||||
private pushPcmChunk(chunk: Buffer): void {
|
||||
const mono16k = this.downsampler.pushStereo48kChunk(chunk);
|
||||
if (mono16k.length === 0) {
|
||||
return;
|
||||
}
|
||||
|
||||
for (const sample of mono16k) {
|
||||
this.pendingSamples.push(sample);
|
||||
}
|
||||
|
||||
while (true) {
|
||||
const frame = takeFrame(this.pendingSamples, 1536);
|
||||
if (!frame) {
|
||||
return;
|
||||
}
|
||||
|
||||
const floatFrame = int16ArrayToFloat32(frame);
|
||||
this.processing = this.processing
|
||||
.then(() => this.vad.processAudio(floatFrame))
|
||||
.catch((error) => {
|
||||
this.logger.warn("VAD frame processing failed", this.speakerId, this.speakerName, error);
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
destroy(): void {
|
||||
this.receiveStream.destroy();
|
||||
this.decoder.destroy();
|
||||
void this.vad.destroy().catch((error) => {
|
||||
this.logger.warn("VAD destroy failed", this.speakerId, this.speakerName, error);
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
export class GuildVoiceSession extends EventEmitter {
|
||||
readonly guildId: string;
|
||||
readonly voiceChannelId: string;
|
||||
|
||||
private readonly connection: VoiceConnection;
|
||||
private readonly player: AudioPlayer;
|
||||
private readonly memory: ConversationMemory;
|
||||
private readonly trackedUsers = new Map<string, UserAudioSession>();
|
||||
private readonly pendingUsers = new Map<string, Promise<void>>();
|
||||
private readonly queue: SpeechJob[] = [];
|
||||
|
||||
private draining = false;
|
||||
private currentAbortController: AbortController | null = null;
|
||||
private currentPlayback: PreparedSpeechAudio | null = null;
|
||||
private textChannelId?: string;
|
||||
|
||||
private constructor(private readonly options: GuildVoiceSessionOptions) {
|
||||
super();
|
||||
|
||||
this.guildId = options.guild.id;
|
||||
this.voiceChannelId = options.voiceChannel.id;
|
||||
this.textChannelId = options.textChannelId;
|
||||
this.memory = new ConversationMemory(options.config.MAX_CONVERSATION_TURNS);
|
||||
this.player = createAudioPlayer({
|
||||
behaviors: {
|
||||
noSubscriber: NoSubscriberBehavior.Pause,
|
||||
},
|
||||
});
|
||||
this.connection = joinVoiceChannel({
|
||||
guildId: options.guild.id,
|
||||
channelId: options.voiceChannel.id,
|
||||
adapterCreator: options.guild.voiceAdapterCreator,
|
||||
selfDeaf: false,
|
||||
selfMute: false,
|
||||
});
|
||||
}
|
||||
|
||||
static async create(options: GuildVoiceSessionOptions): Promise<GuildVoiceSession> {
|
||||
const session = new GuildVoiceSession(options);
|
||||
await session.initialize();
|
||||
return session;
|
||||
}
|
||||
|
||||
private async initialize(): Promise<void> {
|
||||
this.player.on("error", (error) => {
|
||||
this.options.logger.warn("Audio player error", this.guildId, error);
|
||||
});
|
||||
|
||||
this.connection.on("stateChange", (_oldState, newState) => {
|
||||
if (newState.status === VoiceConnectionStatus.Destroyed) {
|
||||
this.options.logger.info("Voice connection destroyed", this.guildId);
|
||||
}
|
||||
});
|
||||
|
||||
this.connection.subscribe(this.player);
|
||||
await entersState(this.connection, VoiceConnectionStatus.Ready, 30_000);
|
||||
|
||||
this.connection.receiver.speaking.on("start", (userId: string) => {
|
||||
if (userId === this.options.client.user?.id) {
|
||||
return;
|
||||
}
|
||||
|
||||
void this.ensureTrackedUser(userId);
|
||||
});
|
||||
}
|
||||
|
||||
setTextChannel(textChannelId?: string): void {
|
||||
this.textChannelId = textChannelId;
|
||||
}
|
||||
|
||||
clearConversation(): void {
|
||||
this.memory.clear();
|
||||
this.interruptPlayback("conversation-reset");
|
||||
}
|
||||
|
||||
statusSummary(): string {
|
||||
const playbackState = this.player.state.status;
|
||||
return [
|
||||
`세션 활성: 예`,
|
||||
`음성 채널: ${this.options.voiceChannel.name}`,
|
||||
`추적 유저 수: ${this.trackedUsers.size}`,
|
||||
`재생 상태: ${playbackState}`,
|
||||
`대기열: ${this.queue.length}`,
|
||||
`최근 대화 턴: ${this.memory.recentTurns().length}`,
|
||||
].join("\n");
|
||||
}
|
||||
|
||||
async speakText(text: string): Promise<void> {
|
||||
this.queue.push({
|
||||
text,
|
||||
source: "manual",
|
||||
});
|
||||
await this.drainQueue();
|
||||
}
|
||||
|
||||
interruptPlayback(reason: string): void {
|
||||
if (this.queue.length > 0 || this.player.state.status !== AudioPlayerStatus.Idle) {
|
||||
this.options.logger.info("Interrupting playback", this.guildId, reason);
|
||||
}
|
||||
|
||||
this.queue.splice(0, this.queue.length);
|
||||
this.currentAbortController?.abort();
|
||||
this.currentAbortController = null;
|
||||
this.currentPlayback?.dispose();
|
||||
this.currentPlayback = null;
|
||||
this.player.stop(true);
|
||||
}
|
||||
|
||||
async destroy(): Promise<void> {
|
||||
this.interruptPlayback("session-destroy");
|
||||
for (const session of this.trackedUsers.values()) {
|
||||
session.destroy();
|
||||
}
|
||||
this.trackedUsers.clear();
|
||||
this.pendingUsers.clear();
|
||||
this.connection.destroy();
|
||||
}
|
||||
|
||||
private async ensureTrackedUser(userId: string): Promise<void> {
|
||||
if (this.trackedUsers.has(userId)) {
|
||||
return;
|
||||
}
|
||||
|
||||
const existing = this.pendingUsers.get(userId);
|
||||
if (existing) {
|
||||
await existing;
|
||||
return;
|
||||
}
|
||||
|
||||
const pending = this.createTrackedUser(userId).finally(() => {
|
||||
this.pendingUsers.delete(userId);
|
||||
});
|
||||
this.pendingUsers.set(userId, pending);
|
||||
await pending;
|
||||
}
|
||||
|
||||
private async createTrackedUser(userId: string): Promise<void> {
|
||||
const speakerName = await this.resolveSpeakerName(userId);
|
||||
const receiveStream = this.connection.receiver.subscribe(userId, {
|
||||
end: {
|
||||
behavior: EndBehaviorType.Manual,
|
||||
},
|
||||
});
|
||||
|
||||
const decoder = new prism.opus.Decoder({
|
||||
rate: 48000,
|
||||
channels: 2,
|
||||
frameSize: 960,
|
||||
}) as NodeJS.ReadWriteStream & { destroy: () => void };
|
||||
|
||||
receiveStream.pipe(decoder);
|
||||
|
||||
const session = await UserAudioSession.create({
|
||||
logger: this.options.logger,
|
||||
speakerId: userId,
|
||||
speakerName,
|
||||
receiveStream,
|
||||
decoder,
|
||||
onSpeechStart: () => {
|
||||
this.interruptPlayback(`barge-in:${speakerName}`);
|
||||
},
|
||||
onSpeechEnd: (utterance, audio) => {
|
||||
void this.handleSpeechEnd(utterance, audio);
|
||||
},
|
||||
});
|
||||
|
||||
this.trackedUsers.set(userId, session);
|
||||
this.options.logger.info("Tracking speaker", this.guildId, userId, speakerName);
|
||||
}
|
||||
|
||||
private async resolveSpeakerName(userId: string): Promise<string> {
|
||||
try {
|
||||
const user = await this.options.client.users.fetch(userId);
|
||||
return user.globalName ?? user.username;
|
||||
} catch {
|
||||
return `user-${userId.slice(-6)}`;
|
||||
}
|
||||
}
|
||||
|
||||
private async handleSpeechEnd(utterance: UserUtterance, audio: Float32Array): Promise<void> {
|
||||
if (audio.length < 16000 * 0.25) {
|
||||
return;
|
||||
}
|
||||
|
||||
const pcmBuffer = float32ToPcm16Buffer(audio);
|
||||
let transcript: string | null = null;
|
||||
|
||||
try {
|
||||
transcript = await this.options.stt.transcribePcm16(pcmBuffer);
|
||||
} catch (error) {
|
||||
this.options.logger.warn("STT failed", this.guildId, utterance.speakerId, error);
|
||||
await this.announce(`음성 인식 실패: ${utterance.speakerName}`);
|
||||
return;
|
||||
}
|
||||
|
||||
if (!transcript || transcript.trim().length === 0) {
|
||||
return;
|
||||
}
|
||||
|
||||
const hydratedUtterance: UserUtterance = {
|
||||
...utterance,
|
||||
text: transcript.trim(),
|
||||
};
|
||||
|
||||
this.options.logger.info("Transcript committed", this.guildId, hydratedUtterance.speakerName, hydratedUtterance.text);
|
||||
this.memory.addUserTurn(hydratedUtterance);
|
||||
|
||||
if (this.options.config.DEBUG_TEXT_EVENTS) {
|
||||
await this.announce(`🗣️ ${hydratedUtterance.speakerName}: ${hydratedUtterance.text}`);
|
||||
}
|
||||
|
||||
let reply: string;
|
||||
try {
|
||||
reply = await this.options.llm.generateReply(this.memory, hydratedUtterance);
|
||||
} catch (error) {
|
||||
this.options.logger.warn("LLM failed", this.guildId, utterance.speakerId, error);
|
||||
reply = "지금은 답변 생성에 실패했습니다. 잠시 후 다시 말씀해 주세요.";
|
||||
}
|
||||
|
||||
this.memory.addAssistantTurn(reply);
|
||||
if (this.options.config.DEBUG_TEXT_EVENTS) {
|
||||
await this.announce(`🤖 ${reply}`);
|
||||
}
|
||||
|
||||
this.queue.push({
|
||||
text: reply,
|
||||
source: "assistant",
|
||||
});
|
||||
await this.drainQueue();
|
||||
}
|
||||
|
||||
private async drainQueue(): Promise<void> {
|
||||
if (this.draining) {
|
||||
return;
|
||||
}
|
||||
|
||||
this.draining = true;
|
||||
|
||||
try {
|
||||
while (this.queue.length > 0) {
|
||||
const job = this.queue.shift();
|
||||
if (!job) {
|
||||
continue;
|
||||
}
|
||||
|
||||
const abortController = new AbortController();
|
||||
this.currentAbortController = abortController;
|
||||
|
||||
try {
|
||||
this.currentPlayback = await this.options.tts.preparePlayback(job.text, abortController.signal);
|
||||
} catch (error) {
|
||||
if (abortController.signal.aborted) {
|
||||
continue;
|
||||
}
|
||||
|
||||
this.options.logger.warn("TTS synthesis failed", this.guildId, job.source, error);
|
||||
await this.announce("음성 출력 생성에 실패했습니다.");
|
||||
continue;
|
||||
}
|
||||
|
||||
try {
|
||||
const resource = createAudioResource(this.currentPlayback.stream, {
|
||||
inputType: StreamType.Raw,
|
||||
});
|
||||
this.player.play(resource);
|
||||
|
||||
await entersState(this.player, AudioPlayerStatus.Playing, 20_000).catch(() => null);
|
||||
await entersState(this.player, AudioPlayerStatus.Idle, 300_000);
|
||||
} catch (error) {
|
||||
if (!abortController.signal.aborted) {
|
||||
this.options.logger.warn("Audio playback failed", this.guildId, error);
|
||||
}
|
||||
} finally {
|
||||
this.currentPlayback?.dispose();
|
||||
this.currentPlayback = null;
|
||||
if (this.currentAbortController === abortController) {
|
||||
this.currentAbortController = null;
|
||||
}
|
||||
}
|
||||
}
|
||||
} finally {
|
||||
this.draining = false;
|
||||
}
|
||||
}
|
||||
|
||||
private async announce(message: string): Promise<void> {
|
||||
if (!this.textChannelId) {
|
||||
return;
|
||||
}
|
||||
|
||||
const channel = await this.options.client.channels.fetch(this.textChannelId).catch(() => null);
|
||||
if (!channel?.isTextBased() || !("send" in channel) || typeof channel.send !== "function") {
|
||||
return;
|
||||
}
|
||||
|
||||
await channel.send(message).catch(() => null);
|
||||
}
|
||||
}
|
||||
@@ -1,499 +0,0 @@
|
||||
import { spawn, type ChildProcess, type ChildProcessByStdio } from "node:child_process";
|
||||
import { once } from "node:events";
|
||||
import { promises as fs } from "node:fs";
|
||||
import os from "node:os";
|
||||
import path from "node:path";
|
||||
import type { Readable, Writable } from "node:stream";
|
||||
|
||||
import { RealTimeVAD } from "avr-vad";
|
||||
|
||||
import type { AssistantRuntimeConfig } from "../config.js";
|
||||
import { Logger } from "../logger.js";
|
||||
import { requireFfmpegPath } from "./ffmpeg-path.js";
|
||||
import { takeFrame, int16ArrayToFloat32, float32ToPcm16Buffer } from "./pcm.js";
|
||||
import { ConversationMemory, type UserUtterance } from "../services/conversation.js";
|
||||
import type { LlmService } from "../services/llm.js";
|
||||
import type { SttService } from "../services/stt.js";
|
||||
import type { PreparedSpeechAudio, TtsService } from "../services/tts.js";
|
||||
|
||||
interface LocalVoiceSessionOptions {
|
||||
config: AssistantRuntimeConfig;
|
||||
logger: Logger;
|
||||
stt: SttService;
|
||||
tts: TtsService;
|
||||
llm: LlmService;
|
||||
}
|
||||
|
||||
interface SpeechJob {
|
||||
text: string;
|
||||
source: "assistant" | "manual";
|
||||
}
|
||||
|
||||
export class LocalVoiceSession {
|
||||
private readonly memory: ConversationMemory;
|
||||
private readonly queue: SpeechJob[] = [];
|
||||
private readonly pendingSamples: number[] = [];
|
||||
|
||||
private vad: RealTimeVAD | null = null;
|
||||
private recorder: ChildProcessByStdio<null, Readable, Readable> | null = null;
|
||||
private currentPlayer: ChildProcess | null = null;
|
||||
private currentAbortController: AbortController | null = null;
|
||||
private currentPlayback: PreparedSpeechAudio | null = null;
|
||||
private processing = Promise.resolve();
|
||||
private draining = false;
|
||||
private destroyed = false;
|
||||
|
||||
constructor(private readonly options: LocalVoiceSessionOptions) {
|
||||
this.memory = new ConversationMemory(options.config.MAX_CONVERSATION_TURNS);
|
||||
}
|
||||
|
||||
async start(): Promise<void> {
|
||||
this.vad = await RealTimeVAD.new({
|
||||
model: "v5",
|
||||
sampleRate: 16000,
|
||||
frameSamples: 1536,
|
||||
positiveSpeechThreshold: 0.55,
|
||||
negativeSpeechThreshold: 0.35,
|
||||
redemptionFrames: 8,
|
||||
preSpeechPadFrames: 2,
|
||||
minSpeechFrames: 3,
|
||||
onFrameProcessed: () => undefined,
|
||||
onVADMisfire: () => undefined,
|
||||
onSpeechStart: () => {
|
||||
this.interruptPlayback("local-barge-in");
|
||||
},
|
||||
onSpeechRealStart: () => undefined,
|
||||
onSpeechEnd: (audio: Float32Array) => {
|
||||
void this.handleSpeechEnd(audio);
|
||||
},
|
||||
});
|
||||
|
||||
this.recorder = this.spawnRecorder();
|
||||
this.recorder.stdout.on("data", (chunk: Buffer) => {
|
||||
this.pushPcm16Chunk(chunk);
|
||||
});
|
||||
this.recorder.stderr.on("data", (chunk: Buffer) => {
|
||||
const text = chunk.toString().trim();
|
||||
if (text.length > 0) {
|
||||
this.options.logger.debug("[pw-record]", text);
|
||||
}
|
||||
});
|
||||
this.recorder.on("exit", (code, signal) => {
|
||||
if (!this.destroyed) {
|
||||
this.options.logger.warn("pw-record exited unexpectedly", { code, signal });
|
||||
}
|
||||
});
|
||||
}
|
||||
|
||||
async destroy(): Promise<void> {
|
||||
this.destroyed = true;
|
||||
this.interruptPlayback("local-shutdown");
|
||||
|
||||
if (this.recorder && !this.recorder.killed) {
|
||||
this.recorder.kill("SIGTERM");
|
||||
await once(this.recorder, "exit").catch(() => null);
|
||||
}
|
||||
|
||||
if (this.vad) {
|
||||
await this.vad.destroy().catch((error) => {
|
||||
this.options.logger.warn("Local VAD destroy failed", error);
|
||||
});
|
||||
this.vad = null;
|
||||
}
|
||||
}
|
||||
|
||||
clearConversation(): void {
|
||||
this.memory.clear();
|
||||
this.interruptPlayback("local-reset");
|
||||
}
|
||||
|
||||
async speakText(text: string): Promise<void> {
|
||||
this.queue.push({
|
||||
text,
|
||||
source: "manual",
|
||||
});
|
||||
await this.drainQueue();
|
||||
}
|
||||
|
||||
statusSummary(): string {
|
||||
return [
|
||||
"모드: local",
|
||||
`플랫폼: ${process.platform}`,
|
||||
`입력 source: ${this.options.config.LOCAL_AUDIO_SOURCE ?? "default"}`,
|
||||
`출력 sink: ${this.describeSink()}`,
|
||||
`대기열: ${this.queue.length}`,
|
||||
`최근 대화 턴: ${this.memory.recentTurns().length}`,
|
||||
].join("\n");
|
||||
}
|
||||
|
||||
private spawnRecorder(): ChildProcessByStdio<null, Readable, Readable> {
|
||||
if (process.platform === "win32") {
|
||||
return this.spawnWindowsRecorder();
|
||||
}
|
||||
|
||||
const args = [
|
||||
"--rate",
|
||||
"16000",
|
||||
"--channels",
|
||||
"1",
|
||||
"--format",
|
||||
"s16",
|
||||
"--raw",
|
||||
];
|
||||
|
||||
if (this.options.config.LOCAL_AUDIO_SOURCE) {
|
||||
args.push("--target", this.options.config.LOCAL_AUDIO_SOURCE);
|
||||
}
|
||||
|
||||
args.push("-");
|
||||
|
||||
this.options.logger.info("Starting local recorder", {
|
||||
source: this.options.config.LOCAL_AUDIO_SOURCE ?? "default",
|
||||
});
|
||||
|
||||
return spawn("pw-record", args, {
|
||||
stdio: ["ignore", "pipe", "pipe"],
|
||||
});
|
||||
}
|
||||
|
||||
private spawnWindowsRecorder(): ChildProcessByStdio<null, Readable, Readable> {
|
||||
const ffmpegPath = this.getFfmpegPath();
|
||||
const sourceName = this.options.config.LOCAL_AUDIO_SOURCE;
|
||||
if (!sourceName) {
|
||||
throw new Error("Windows 로컬 모드는 LOCAL_AUDIO_SOURCE 설정이 필요합니다. `bun run audio:devices` 로 이름을 확인해 주세요.");
|
||||
}
|
||||
|
||||
const args = [
|
||||
"-hide_banner",
|
||||
"-loglevel",
|
||||
"warning",
|
||||
"-f",
|
||||
"dshow",
|
||||
"-i",
|
||||
`audio=${sourceName}`,
|
||||
"-ac",
|
||||
"1",
|
||||
"-ar",
|
||||
"16000",
|
||||
"-f",
|
||||
"s16le",
|
||||
"pipe:1",
|
||||
];
|
||||
|
||||
this.options.logger.info("Starting local recorder", {
|
||||
source: sourceName,
|
||||
backend: "ffmpeg-dshow",
|
||||
});
|
||||
|
||||
return spawn(ffmpegPath, args, {
|
||||
stdio: ["ignore", "pipe", "pipe"],
|
||||
});
|
||||
}
|
||||
|
||||
private pushPcm16Chunk(chunk: Buffer): void {
|
||||
if (this.destroyed || !this.vad) {
|
||||
return;
|
||||
}
|
||||
|
||||
for (let offset = 0; offset + 1 < chunk.length; offset += 2) {
|
||||
this.pendingSamples.push(chunk.readInt16LE(offset));
|
||||
}
|
||||
|
||||
while (true) {
|
||||
const frame = takeFrame(this.pendingSamples, 1536);
|
||||
if (!frame) {
|
||||
return;
|
||||
}
|
||||
|
||||
const floatFrame = int16ArrayToFloat32(frame);
|
||||
this.processing = this.processing
|
||||
.then(() => this.vad?.processAudio(floatFrame))
|
||||
.catch((error) => {
|
||||
this.options.logger.warn("Local VAD processing failed", error);
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
private async handleSpeechEnd(audio: Float32Array): Promise<void> {
|
||||
if (audio.length < 16000 * 0.25) {
|
||||
return;
|
||||
}
|
||||
|
||||
const utterance: UserUtterance = {
|
||||
speakerId: "local-user",
|
||||
speakerName: this.options.config.LOCAL_SPEAKER_NAME,
|
||||
text: "",
|
||||
};
|
||||
|
||||
let transcript: string | null = null;
|
||||
try {
|
||||
transcript = await this.options.stt.transcribePcm16(float32ToPcm16Buffer(audio));
|
||||
} catch (error) {
|
||||
this.options.logger.warn("Local STT failed", error);
|
||||
return;
|
||||
}
|
||||
|
||||
if (!transcript || transcript.trim().length === 0) {
|
||||
return;
|
||||
}
|
||||
|
||||
utterance.text = transcript.trim();
|
||||
this.memory.addUserTurn(utterance);
|
||||
this.options.logger.info("Local transcript", utterance.text);
|
||||
if (this.options.config.DEBUG_TEXT_EVENTS) {
|
||||
console.log(`\n[you] ${utterance.text}`);
|
||||
}
|
||||
|
||||
let reply: string;
|
||||
try {
|
||||
reply = await this.options.llm.generateReply(this.memory, utterance);
|
||||
} catch (error) {
|
||||
this.options.logger.warn("Local LLM failed", error);
|
||||
reply = "지금은 답변 생성에 실패했습니다. 잠시 후 다시 말씀해 주세요.";
|
||||
}
|
||||
|
||||
this.memory.addAssistantTurn(reply);
|
||||
if (this.options.config.DEBUG_TEXT_EVENTS) {
|
||||
console.log(`[bot] ${reply}\n`);
|
||||
}
|
||||
|
||||
this.queue.push({
|
||||
text: reply,
|
||||
source: "assistant",
|
||||
});
|
||||
await this.drainQueue();
|
||||
}
|
||||
|
||||
private interruptPlayback(reason: string): void {
|
||||
if (this.queue.length > 0 || this.currentPlayer) {
|
||||
this.options.logger.info("Interrupting local playback", reason);
|
||||
}
|
||||
|
||||
this.queue.splice(0, this.queue.length);
|
||||
this.currentAbortController?.abort();
|
||||
this.currentAbortController = null;
|
||||
this.currentPlayback?.dispose();
|
||||
this.currentPlayback = null;
|
||||
|
||||
if (this.currentPlayer && !this.currentPlayer.killed) {
|
||||
this.currentPlayer.kill("SIGKILL");
|
||||
}
|
||||
this.currentPlayer = null;
|
||||
}
|
||||
|
||||
private async drainQueue(): Promise<void> {
|
||||
if (this.draining || this.destroyed) {
|
||||
return;
|
||||
}
|
||||
|
||||
this.draining = true;
|
||||
|
||||
try {
|
||||
while (this.queue.length > 0 && !this.destroyed) {
|
||||
const job = this.queue.shift();
|
||||
if (!job) {
|
||||
continue;
|
||||
}
|
||||
|
||||
const abortController = new AbortController();
|
||||
this.currentAbortController = abortController;
|
||||
|
||||
try {
|
||||
this.currentPlayback = await this.options.tts.preparePlayback(job.text, abortController.signal);
|
||||
} catch (error) {
|
||||
if (!abortController.signal.aborted) {
|
||||
this.options.logger.warn("Local TTS synthesis failed", error);
|
||||
}
|
||||
continue;
|
||||
}
|
||||
|
||||
try {
|
||||
await this.playToSink(this.currentPlayback, abortController.signal);
|
||||
} catch (error) {
|
||||
if (!abortController.signal.aborted) {
|
||||
this.options.logger.warn("Local playback failed", error);
|
||||
}
|
||||
} finally {
|
||||
this.currentPlayback?.dispose();
|
||||
this.currentPlayback = null;
|
||||
if (this.currentAbortController === abortController) {
|
||||
this.currentAbortController = null;
|
||||
}
|
||||
}
|
||||
}
|
||||
} finally {
|
||||
this.draining = false;
|
||||
}
|
||||
}
|
||||
|
||||
private async playToSink(playback: PreparedSpeechAudio, signal: AbortSignal): Promise<void> {
|
||||
if (process.platform === "win32") {
|
||||
await this.playToWindowsDefaultSink(playback, signal);
|
||||
return;
|
||||
}
|
||||
|
||||
const args = [
|
||||
"--rate",
|
||||
"48000",
|
||||
"--channels",
|
||||
"2",
|
||||
"--format",
|
||||
"s16",
|
||||
"--raw",
|
||||
];
|
||||
|
||||
if (this.options.config.LOCAL_AUDIO_SINK) {
|
||||
args.push("--target", this.options.config.LOCAL_AUDIO_SINK);
|
||||
}
|
||||
|
||||
args.push("-");
|
||||
|
||||
const player = spawn("pw-play", args, {
|
||||
stdio: ["pipe", "ignore", "pipe"],
|
||||
});
|
||||
this.currentPlayer = player;
|
||||
|
||||
player.stderr.on("data", (chunk: Buffer) => {
|
||||
const text = chunk.toString().trim();
|
||||
if (text.length > 0) {
|
||||
this.options.logger.debug("[pw-play]", text);
|
||||
}
|
||||
});
|
||||
|
||||
signal.addEventListener(
|
||||
"abort",
|
||||
() => {
|
||||
playback.stream.destroy();
|
||||
if (!player.killed) {
|
||||
player.kill("SIGKILL");
|
||||
}
|
||||
},
|
||||
{ once: true },
|
||||
);
|
||||
|
||||
playback.stream.pipe(player.stdin);
|
||||
|
||||
const [code, playSignal] = (await once(player, "exit")) as [number | null, NodeJS.Signals | null];
|
||||
this.currentPlayer = null;
|
||||
|
||||
if (signal.aborted) {
|
||||
return;
|
||||
}
|
||||
|
||||
if (code !== 0) {
|
||||
throw new Error(`pw-play exited with code=${code ?? "null"} signal=${playSignal ?? "null"}`);
|
||||
}
|
||||
}
|
||||
|
||||
private async playToWindowsDefaultSink(playback: PreparedSpeechAudio, signal: AbortSignal): Promise<void> {
|
||||
const chunks: Buffer[] = [];
|
||||
|
||||
await new Promise<void>((resolve, reject) => {
|
||||
playback.stream.on("data", (chunk: Buffer) => {
|
||||
chunks.push(Buffer.from(chunk));
|
||||
});
|
||||
playback.stream.once("end", resolve);
|
||||
playback.stream.once("error", reject);
|
||||
signal.addEventListener(
|
||||
"abort",
|
||||
() => {
|
||||
playback.stream.destroy();
|
||||
reject(new Error("playback aborted"));
|
||||
},
|
||||
{ once: true },
|
||||
);
|
||||
}).catch((error) => {
|
||||
if (signal.aborted) {
|
||||
return;
|
||||
}
|
||||
throw error;
|
||||
});
|
||||
|
||||
if (signal.aborted) {
|
||||
return;
|
||||
}
|
||||
|
||||
const pcm = Buffer.concat(chunks);
|
||||
const wav = createWaveFileBuffer(pcm, 48000, 2, 16);
|
||||
const tempPath = path.join(os.tmpdir(), `realtime-voice-bot-${Date.now()}.wav`);
|
||||
await fs.writeFile(tempPath, wav);
|
||||
|
||||
const psScript = [
|
||||
"Add-Type -AssemblyName System;",
|
||||
`$player = New-Object System.Media.SoundPlayer('${tempPath.replace(/'/g, "''")}');`,
|
||||
"$player.PlaySync();",
|
||||
].join(" ");
|
||||
|
||||
const player = spawn("powershell", ["-NoProfile", "-Command", psScript], {
|
||||
stdio: ["ignore", "ignore", "pipe"],
|
||||
});
|
||||
this.currentPlayer = player;
|
||||
|
||||
player.stderr.on("data", (chunk: Buffer) => {
|
||||
const text = chunk.toString().trim();
|
||||
if (text.length > 0) {
|
||||
this.options.logger.debug("[powershell-player]", text);
|
||||
}
|
||||
});
|
||||
|
||||
signal.addEventListener(
|
||||
"abort",
|
||||
() => {
|
||||
if (!player.killed) {
|
||||
player.kill("SIGKILL");
|
||||
}
|
||||
},
|
||||
{ once: true },
|
||||
);
|
||||
|
||||
const [code, playSignal] = (await once(player, "exit")) as [number | null, NodeJS.Signals | null];
|
||||
this.currentPlayer = null;
|
||||
await fs.unlink(tempPath).catch(() => null);
|
||||
|
||||
if (signal.aborted) {
|
||||
return;
|
||||
}
|
||||
|
||||
if (code !== 0) {
|
||||
throw new Error(`powershell playback exited with code=${code ?? "null"} signal=${playSignal ?? "null"}`);
|
||||
}
|
||||
}
|
||||
|
||||
private getFfmpegPath(): string {
|
||||
return requireFfmpegPath();
|
||||
}
|
||||
|
||||
private describeSink(): string {
|
||||
if (process.platform === "win32") {
|
||||
return this.options.config.LOCAL_AUDIO_SINK ?? "system-default";
|
||||
}
|
||||
return this.options.config.LOCAL_AUDIO_SINK ?? "default";
|
||||
}
|
||||
}
|
||||
|
||||
function createWaveFileBuffer(
|
||||
pcm: Buffer,
|
||||
sampleRate: number,
|
||||
channels: number,
|
||||
bitsPerSample: number,
|
||||
): Buffer {
|
||||
const header = Buffer.alloc(44);
|
||||
const byteRate = sampleRate * channels * (bitsPerSample / 8);
|
||||
const blockAlign = channels * (bitsPerSample / 8);
|
||||
|
||||
header.write("RIFF", 0, 4, "ascii");
|
||||
header.writeUInt32LE(36 + pcm.length, 4);
|
||||
header.write("WAVE", 8, 4, "ascii");
|
||||
header.write("fmt ", 12, 4, "ascii");
|
||||
header.writeUInt32LE(16, 16);
|
||||
header.writeUInt16LE(1, 20);
|
||||
header.writeUInt16LE(channels, 22);
|
||||
header.writeUInt32LE(sampleRate, 24);
|
||||
header.writeUInt32LE(byteRate, 28);
|
||||
header.writeUInt16LE(blockAlign, 32);
|
||||
header.writeUInt16LE(bitsPerSample, 34);
|
||||
header.write("data", 36, 4, "ascii");
|
||||
header.writeUInt32LE(pcm.length, 40);
|
||||
|
||||
return Buffer.concat([header, pcm]);
|
||||
}
|
||||
@@ -1,60 +0,0 @@
|
||||
export class Stereo48kToMono16kDownsampler {
|
||||
private readonly pendingMono48k: number[] = [];
|
||||
|
||||
pushStereo48kChunk(chunk: Buffer): Int16Array {
|
||||
if (chunk.length < 4) {
|
||||
return new Int16Array();
|
||||
}
|
||||
|
||||
for (let offset = 0; offset + 3 < chunk.length; offset += 4) {
|
||||
const left = chunk.readInt16LE(offset);
|
||||
const right = chunk.readInt16LE(offset + 2);
|
||||
this.pendingMono48k.push(Math.round((left + right) / 2));
|
||||
}
|
||||
|
||||
const outputLength = Math.floor(this.pendingMono48k.length / 3);
|
||||
if (outputLength === 0) {
|
||||
return new Int16Array();
|
||||
}
|
||||
|
||||
const output = new Int16Array(outputLength);
|
||||
let readIndex = 0;
|
||||
for (let index = 0; index < outputLength; index += 1) {
|
||||
const a = this.pendingMono48k[readIndex];
|
||||
const b = this.pendingMono48k[readIndex + 1];
|
||||
const c = this.pendingMono48k[readIndex + 2];
|
||||
output[index] = Math.round((a + b + c) / 3);
|
||||
readIndex += 3;
|
||||
}
|
||||
|
||||
this.pendingMono48k.splice(0, readIndex);
|
||||
return output;
|
||||
}
|
||||
}
|
||||
|
||||
export function int16ArrayToFloat32(input: Int16Array): Float32Array {
|
||||
const output = new Float32Array(input.length);
|
||||
for (let index = 0; index < input.length; index += 1) {
|
||||
output[index] = input[index] / 32768;
|
||||
}
|
||||
return output;
|
||||
}
|
||||
|
||||
export function float32ToPcm16Buffer(input: Float32Array): Buffer {
|
||||
const buffer = Buffer.allocUnsafe(input.length * 2);
|
||||
for (let index = 0; index < input.length; index += 1) {
|
||||
const value = Math.max(-1, Math.min(1, input[index]));
|
||||
const scaled = value < 0 ? value * 32768 : value * 32767;
|
||||
buffer.writeInt16LE(Math.round(scaled), index * 2);
|
||||
}
|
||||
return buffer;
|
||||
}
|
||||
|
||||
export function takeFrame(source: number[], frameSize: number): Int16Array | null {
|
||||
if (source.length < frameSize) {
|
||||
return null;
|
||||
}
|
||||
|
||||
const values = source.splice(0, frameSize);
|
||||
return Int16Array.from(values);
|
||||
}
|
||||
148
src/audio/realtime-segmenter.ts
Normal file
148
src/audio/realtime-segmenter.ts
Normal file
@@ -0,0 +1,148 @@
|
||||
interface RealtimeSegmenterOptions {
|
||||
onSegment: (pcm16: Buffer) => void;
|
||||
onLevel?: (peak: number) => void;
|
||||
onSpeechStart?: (peak: number) => void;
|
||||
onSpeechDiscarded?: (samples: number) => void;
|
||||
onSpeechReady?: (samples: number) => void;
|
||||
preRollSamples?: number;
|
||||
speechStartThreshold?: number;
|
||||
speechContinueThreshold?: number;
|
||||
speechStartFrames?: number;
|
||||
speechEndFrames?: number;
|
||||
minSpeechSamples?: number;
|
||||
maxSpeechSamples?: number;
|
||||
}
|
||||
|
||||
export class RealtimeSegmenter {
|
||||
private readonly pendingSamples: number[] = [];
|
||||
private readonly preRoll: number[] = [];
|
||||
private readonly speech: number[] = [];
|
||||
|
||||
private readonly frameSamples = 320;
|
||||
private readonly preRollSamples: number;
|
||||
private readonly speechStartThreshold: number;
|
||||
private readonly speechContinueThreshold: number;
|
||||
private readonly speechStartFrames: number;
|
||||
private readonly speechEndFrames: number;
|
||||
private readonly minSpeechSamples: number;
|
||||
private readonly maxSpeechSamples: number;
|
||||
|
||||
private speechActive = false;
|
||||
private speechCandidateFrames = 0;
|
||||
private silenceFrames = 0;
|
||||
|
||||
constructor(private readonly options: RealtimeSegmenterOptions) {
|
||||
this.preRollSamples = options.preRollSamples ?? 3200;
|
||||
this.speechStartThreshold = options.speechStartThreshold ?? 900;
|
||||
this.speechContinueThreshold = options.speechContinueThreshold ?? 450;
|
||||
this.speechStartFrames = options.speechStartFrames ?? 2;
|
||||
this.speechEndFrames = options.speechEndFrames ?? 24;
|
||||
this.minSpeechSamples = options.minSpeechSamples ?? 7200;
|
||||
this.maxSpeechSamples = options.maxSpeechSamples ?? 160000;
|
||||
}
|
||||
|
||||
pushChunk(chunk: Buffer): void {
|
||||
for (let offset = 0; offset + 1 < chunk.length; offset += 2) {
|
||||
this.pendingSamples.push(chunk.readInt16LE(offset));
|
||||
}
|
||||
|
||||
while (true) {
|
||||
const frame = takeFrame(this.pendingSamples, this.frameSamples);
|
||||
if (!frame) {
|
||||
return;
|
||||
}
|
||||
this.processFrame(frame);
|
||||
}
|
||||
}
|
||||
|
||||
reset(): void {
|
||||
this.pendingSamples.splice(0, this.pendingSamples.length);
|
||||
this.preRoll.splice(0, this.preRoll.length);
|
||||
this.speech.splice(0, this.speech.length);
|
||||
this.speechActive = false;
|
||||
this.speechCandidateFrames = 0;
|
||||
this.silenceFrames = 0;
|
||||
}
|
||||
|
||||
private processFrame(frame: Int16Array): void {
|
||||
let peak = 0;
|
||||
for (const sample of frame) {
|
||||
const abs = Math.abs(sample);
|
||||
if (abs > peak) {
|
||||
peak = abs;
|
||||
}
|
||||
}
|
||||
|
||||
this.options.onLevel?.(peak);
|
||||
|
||||
if (!this.speechActive) {
|
||||
appendWithCap(this.preRoll, frame, this.preRollSamples);
|
||||
if (peak >= this.speechStartThreshold) {
|
||||
this.speechCandidateFrames += 1;
|
||||
} else {
|
||||
this.speechCandidateFrames = 0;
|
||||
}
|
||||
|
||||
if (this.speechCandidateFrames < this.speechStartFrames) {
|
||||
return;
|
||||
}
|
||||
|
||||
this.speechActive = true;
|
||||
this.silenceFrames = 0;
|
||||
this.speech.splice(0, this.speech.length, ...this.preRoll);
|
||||
this.preRoll.splice(0, this.preRoll.length);
|
||||
this.options.onSpeechStart?.(peak);
|
||||
}
|
||||
|
||||
this.speech.push(...frame);
|
||||
|
||||
if (peak >= this.speechContinueThreshold) {
|
||||
this.silenceFrames = 0;
|
||||
} else {
|
||||
this.silenceFrames += 1;
|
||||
}
|
||||
|
||||
if (this.silenceFrames < this.speechEndFrames) {
|
||||
if (this.speech.length < this.maxSpeechSamples) {
|
||||
return;
|
||||
}
|
||||
}
|
||||
|
||||
const speechPcm = int16ArrayToBuffer(Int16Array.from(this.speech));
|
||||
this.speechActive = false;
|
||||
this.speech.splice(0, this.speech.length);
|
||||
this.silenceFrames = 0;
|
||||
this.speechCandidateFrames = 0;
|
||||
|
||||
if (speechPcm.length < this.minSpeechSamples * 2) {
|
||||
this.options.onSpeechDiscarded?.(speechPcm.length / 2);
|
||||
return;
|
||||
}
|
||||
|
||||
this.options.onSpeechReady?.(speechPcm.length / 2);
|
||||
this.options.onSegment(speechPcm);
|
||||
}
|
||||
}
|
||||
|
||||
function takeFrame(source: number[], size: number): Int16Array | null {
|
||||
if (source.length < size) {
|
||||
return null;
|
||||
}
|
||||
const samples = source.splice(0, size);
|
||||
return Int16Array.from(samples);
|
||||
}
|
||||
|
||||
function appendWithCap(target: number[], samples: Int16Array, cap: number): void {
|
||||
target.push(...samples);
|
||||
if (target.length > cap) {
|
||||
target.splice(0, target.length - cap);
|
||||
}
|
||||
}
|
||||
|
||||
function int16ArrayToBuffer(input: Int16Array): Buffer {
|
||||
const output = Buffer.allocUnsafe(input.length * 2);
|
||||
for (let index = 0; index < input.length; index += 1) {
|
||||
output.writeInt16LE(input[index]!, index * 2);
|
||||
}
|
||||
return output;
|
||||
}
|
||||
@@ -12,30 +12,46 @@ const emptyToUndefined = z.preprocess((value) => {
|
||||
}, z.string().min(1).optional());
|
||||
|
||||
const envSchema = z.object({
|
||||
DISCORD_BOT_TOKEN: emptyToUndefined,
|
||||
DISCORD_APPLICATION_ID: emptyToUndefined,
|
||||
DISCORD_COMMAND_GUILD_ID: emptyToUndefined,
|
||||
OLLAMA_BASE_URL: z.string().min(1).default("http://localhost:11434"),
|
||||
OLLAMA_MODEL: z.string().min(1).default("qwen3:0.6b"),
|
||||
OLLAMA_KEEP_ALIVE: z.string().min(1).default("5m"),
|
||||
OLLAMA_NUM_CTX: z.coerce.number().int().min(512).max(32768).default(4096),
|
||||
LOCAL_AI_VENV_PATH: z.string().min(1).default(".local-ai/.venv"),
|
||||
LOCAL_AI_CACHE_DIR: z.string().min(1).default(".local-ai/cache"),
|
||||
LOCAL_AI_PYTHON: emptyToUndefined,
|
||||
LOCAL_STT_MODEL: z.string().min(1).default("tiny"),
|
||||
LOCAL_STT_DEVICE: z.string().min(1).default("auto"),
|
||||
LOCAL_STT_COMPUTE_TYPE: z.string().min(1).default("auto"),
|
||||
LOCAL_STT_BEAM_SIZE: z.coerce.number().int().min(1).max(8).default(1),
|
||||
LOCAL_TTS_LANGUAGE: z.string().min(1).default("KR"),
|
||||
LOCAL_TTS_SPEAKER: z.string().min(1).default("KR"),
|
||||
LOCAL_TTS_DEVICE: z.string().min(1).default("auto"),
|
||||
LOCAL_TTS_SPEED: z.coerce.number().min(0.8).max(1.6).default(1.12),
|
||||
BOT_DEFAULT_LANGUAGE: z.string().min(2).default("ko"),
|
||||
MAX_CONVERSATION_TURNS: z.coerce.number().int().min(4).max(30).default(12),
|
||||
LOCAL_AUDIO_SOURCE: emptyToUndefined,
|
||||
LOCAL_AUDIO_SINK: emptyToUndefined,
|
||||
LOCAL_SPEAKER_NAME: z.string().min(1).default("local-user"),
|
||||
DEBUG_TEXT_EVENTS: z
|
||||
AUDIO_SOURCE: emptyToUndefined,
|
||||
DOCKER_BIN: emptyToUndefined,
|
||||
TTS_ENABLED: z
|
||||
.string()
|
||||
.optional()
|
||||
.transform((value) => value?.trim().toLowerCase() !== "false"),
|
||||
TTS_IMAGE: z.string().min(1).default("realtime-voice-bot-melotts:v0.1.2"),
|
||||
TTS_LANGUAGE: z.string().min(1).default("KR"),
|
||||
TTS_SPEAKER: z.string().min(1).default("KR"),
|
||||
TTS_DEVICE: z.string().min(1).default("cpu"),
|
||||
TTS_SPEED: z.coerce.number().min(0.5).max(2).default(1.18),
|
||||
TTS_PLAYBACK_RATE: z.coerce.number().min(0.5).max(4).default(2.2),
|
||||
TTS_SDP_RATIO: z.coerce.number().min(0).max(1).default(0.22),
|
||||
TTS_NOISE_SCALE: z.coerce.number().min(0).max(2).default(0.55),
|
||||
TTS_NOISE_SCALE_W: z.coerce.number().min(0).max(2).default(0.75),
|
||||
TTS_CACHE_DIR: z.string().min(1).default(".local-ai/tts-cache"),
|
||||
TTS_OUTPUT_DIR: z.string().min(1).default(".local-ai/tts-output"),
|
||||
DEBUG: z
|
||||
.string()
|
||||
.optional()
|
||||
.transform((value) => value?.trim().toLowerCase() === "true"),
|
||||
OLLAMA_BASE_URL: z.string().min(1).default("http://127.0.0.1:11434"),
|
||||
OLLAMA_MODEL: z.string().min(1).default("qwen3:8b"),
|
||||
OLLAMA_KEEP_ALIVE: z.string().min(1).default("5m"),
|
||||
MAX_CONVERSATION_TURNS: z.coerce.number().int().min(1).max(20).default(6),
|
||||
WHISPER_MODEL: z.string().min(1).default("large-v3-turbo"),
|
||||
WHISPER_LANGUAGE: z.string().min(1).default("ko"),
|
||||
WHISPER_DEVICE: z.enum(["auto", "cuda", "cpu"]).default("auto"),
|
||||
WHISPER_COMPUTE_TYPE: z.string().min(1).default("auto"),
|
||||
WHISPER_BEAM_SIZE: z.coerce.number().int().min(1).max(8).default(2),
|
||||
SEGMENT_START_THRESHOLD: z.coerce.number().int().min(100).max(10000).default(900),
|
||||
SEGMENT_CONTINUE_THRESHOLD: z.coerce.number().int().min(50).max(10000).default(450),
|
||||
SEGMENT_START_FRAMES: z.coerce.number().int().min(1).max(10).default(2),
|
||||
SEGMENT_END_FRAMES: z.coerce.number().int().min(4).max(60).default(24),
|
||||
SEGMENT_PREROLL_SAMPLES: z.coerce.number().int().min(320).max(16000).default(3200),
|
||||
SEGMENT_MIN_SPEECH_SAMPLES: z.coerce.number().int().min(1600).max(64000).default(7200),
|
||||
SEGMENT_MAX_SPEECH_SAMPLES: z.coerce.number().int().min(16000).max(320000).default(160000),
|
||||
DEBUG_TRANSCRIPTS: z
|
||||
.string()
|
||||
.optional()
|
||||
.transform((value) => value === "true"),
|
||||
@@ -43,32 +59,7 @@ const envSchema = z.object({
|
||||
});
|
||||
|
||||
export type AppConfig = z.infer<typeof envSchema>;
|
||||
export type AssistantRuntimeConfig = AppConfig;
|
||||
export type DiscordRuntimeConfig = AssistantRuntimeConfig & {
|
||||
DISCORD_BOT_TOKEN: string;
|
||||
DISCORD_APPLICATION_ID: string;
|
||||
};
|
||||
|
||||
export function loadConfig(): AppConfig {
|
||||
return envSchema.parse(process.env);
|
||||
}
|
||||
|
||||
function requirePresent(value: string | undefined, name: string): string {
|
||||
if (!value) {
|
||||
throw new Error(`${name} 환경변수가 필요합니다.`);
|
||||
}
|
||||
return value;
|
||||
}
|
||||
|
||||
export function requireAssistantRuntimeConfig(config: AppConfig): AssistantRuntimeConfig {
|
||||
return config;
|
||||
}
|
||||
|
||||
export function requireDiscordRuntimeConfig(config: AppConfig): DiscordRuntimeConfig {
|
||||
const assistant = requireAssistantRuntimeConfig(config);
|
||||
return {
|
||||
...assistant,
|
||||
DISCORD_BOT_TOKEN: requirePresent(config.DISCORD_BOT_TOKEN, "DISCORD_BOT_TOKEN"),
|
||||
DISCORD_APPLICATION_ID: requirePresent(config.DISCORD_APPLICATION_ID, "DISCORD_APPLICATION_ID"),
|
||||
};
|
||||
}
|
||||
|
||||
@@ -1,238 +0,0 @@
|
||||
import process from "node:process";
|
||||
|
||||
import {
|
||||
GatewayIntentBits,
|
||||
REST,
|
||||
Routes,
|
||||
SlashCommandBuilder,
|
||||
type ChatInputCommandInteraction,
|
||||
type Client,
|
||||
type GuildMember,
|
||||
type VoiceBasedChannel,
|
||||
} from "discord.js";
|
||||
import { Client as DiscordClient } from "discord.js";
|
||||
|
||||
import { GuildVoiceSession } from "./audio/guild-voice-session.js";
|
||||
import { type DiscordRuntimeConfig } from "./config.js";
|
||||
import { Logger } from "./logger.js";
|
||||
import { LocalFasterWhisperSttService } from "./services/local-stt.js";
|
||||
import { LocalMeloTtsService } from "./services/local-tts.js";
|
||||
import { OllamaLlmService } from "./services/ollama-llm.js";
|
||||
|
||||
export async function runDiscordBot(config: DiscordRuntimeConfig, logger: Logger): Promise<void> {
|
||||
const commands = [
|
||||
new SlashCommandBuilder().setName("join").setDescription("현재 들어가 있는 음성 채널에 봇을 입장시킵니다."),
|
||||
new SlashCommandBuilder().setName("leave").setDescription("현재 음성 세션을 종료합니다."),
|
||||
new SlashCommandBuilder().setName("status").setDescription("현재 음성 세션 상태를 확인합니다."),
|
||||
new SlashCommandBuilder().setName("reset").setDescription("대화 문맥과 재생 큐를 초기화합니다."),
|
||||
new SlashCommandBuilder()
|
||||
.setName("say")
|
||||
.setDescription("텍스트를 바로 음성으로 읽습니다.")
|
||||
.addStringOption((option) =>
|
||||
option.setName("text").setDescription("읽을 문장").setRequired(true).setMaxLength(400),
|
||||
),
|
||||
].map((command) => command.toJSON());
|
||||
|
||||
const client = new DiscordClient({
|
||||
intents: [GatewayIntentBits.Guilds, GatewayIntentBits.GuildVoiceStates],
|
||||
});
|
||||
|
||||
const stt = new LocalFasterWhisperSttService(config, logger);
|
||||
const tts = new LocalMeloTtsService(config, logger);
|
||||
const llm = new OllamaLlmService(config);
|
||||
const sessions = new Map<string, GuildVoiceSession>();
|
||||
|
||||
await stt.warmup();
|
||||
await tts.warmup();
|
||||
|
||||
function getVoiceChannel(interaction: ChatInputCommandInteraction): VoiceBasedChannel | null {
|
||||
const member = interaction.member as GuildMember | null;
|
||||
return member?.voice.channel ?? null;
|
||||
}
|
||||
|
||||
async function registerCommands(_appClient: Client): Promise<void> {
|
||||
const rest = new REST({ version: "10" }).setToken(config.DISCORD_BOT_TOKEN);
|
||||
if (config.DISCORD_COMMAND_GUILD_ID) {
|
||||
await rest.put(
|
||||
Routes.applicationGuildCommands(config.DISCORD_APPLICATION_ID, config.DISCORD_COMMAND_GUILD_ID),
|
||||
{
|
||||
body: commands,
|
||||
},
|
||||
);
|
||||
logger.info("Registered guild commands", config.DISCORD_COMMAND_GUILD_ID);
|
||||
return;
|
||||
}
|
||||
|
||||
await rest.put(Routes.applicationCommands(config.DISCORD_APPLICATION_ID), {
|
||||
body: commands,
|
||||
});
|
||||
logger.info("Registered global commands");
|
||||
}
|
||||
|
||||
async function createSession(interaction: ChatInputCommandInteraction): Promise<GuildVoiceSession> {
|
||||
if (!interaction.guild) {
|
||||
throw new Error("Guild interaction required");
|
||||
}
|
||||
|
||||
const voiceChannel = getVoiceChannel(interaction);
|
||||
if (!voiceChannel) {
|
||||
throw new Error("먼저 음성 채널에 들어가 주세요.");
|
||||
}
|
||||
|
||||
const existing = sessions.get(interaction.guild.id);
|
||||
if (existing && existing.voiceChannelId === voiceChannel.id) {
|
||||
existing.setTextChannel(interaction.channelId);
|
||||
return existing;
|
||||
}
|
||||
|
||||
if (existing) {
|
||||
await existing.destroy();
|
||||
sessions.delete(interaction.guild.id);
|
||||
}
|
||||
|
||||
const session = await GuildVoiceSession.create({
|
||||
client,
|
||||
config,
|
||||
logger,
|
||||
guild: interaction.guild,
|
||||
voiceChannel,
|
||||
textChannelId: interaction.channelId,
|
||||
stt,
|
||||
tts,
|
||||
llm,
|
||||
});
|
||||
sessions.set(interaction.guild.id, session);
|
||||
return session;
|
||||
}
|
||||
|
||||
async function handleJoin(interaction: ChatInputCommandInteraction): Promise<void> {
|
||||
await interaction.deferReply({ ephemeral: true });
|
||||
|
||||
try {
|
||||
const session = await createSession(interaction);
|
||||
await interaction.editReply(
|
||||
`음성 비서를 시작했습니다. 채널: ${session.statusSummary().split("\n")[1]?.replace("음성 채널: ", "") ?? "알 수 없음"}`,
|
||||
);
|
||||
} catch (error) {
|
||||
const message = error instanceof Error ? error.message : "세션 생성에 실패했습니다.";
|
||||
await interaction.editReply(message);
|
||||
}
|
||||
}
|
||||
|
||||
async function handleLeave(interaction: ChatInputCommandInteraction): Promise<void> {
|
||||
const session = interaction.guild ? sessions.get(interaction.guild.id) : undefined;
|
||||
if (!session) {
|
||||
await interaction.reply({ content: "현재 활성화된 음성 세션이 없습니다.", ephemeral: true });
|
||||
return;
|
||||
}
|
||||
|
||||
await session.destroy();
|
||||
sessions.delete(interaction.guildId!);
|
||||
await interaction.reply({ content: "음성 세션을 종료했습니다.", ephemeral: true });
|
||||
}
|
||||
|
||||
async function handleStatus(interaction: ChatInputCommandInteraction): Promise<void> {
|
||||
const session = interaction.guild ? sessions.get(interaction.guild.id) : undefined;
|
||||
if (!session) {
|
||||
await interaction.reply({ content: "현재 활성화된 음성 세션이 없습니다.", ephemeral: true });
|
||||
return;
|
||||
}
|
||||
|
||||
await interaction.reply({
|
||||
content: session.statusSummary(),
|
||||
ephemeral: true,
|
||||
});
|
||||
}
|
||||
|
||||
async function handleReset(interaction: ChatInputCommandInteraction): Promise<void> {
|
||||
const session = interaction.guild ? sessions.get(interaction.guild.id) : undefined;
|
||||
if (!session) {
|
||||
await interaction.reply({ content: "현재 활성화된 음성 세션이 없습니다.", ephemeral: true });
|
||||
return;
|
||||
}
|
||||
|
||||
session.clearConversation();
|
||||
await interaction.reply({ content: "대화 문맥과 재생 큐를 초기화했습니다.", ephemeral: true });
|
||||
}
|
||||
|
||||
async function handleSay(interaction: ChatInputCommandInteraction): Promise<void> {
|
||||
await interaction.deferReply({ ephemeral: true });
|
||||
|
||||
const session = interaction.guild ? sessions.get(interaction.guild.id) : undefined;
|
||||
if (!session) {
|
||||
await interaction.editReply("먼저 `/join` 으로 음성 세션을 시작해 주세요.");
|
||||
return;
|
||||
}
|
||||
|
||||
const text = interaction.options.getString("text", true).trim();
|
||||
await session.speakText(text);
|
||||
await interaction.editReply("읽기 요청을 대기열에 추가했습니다.");
|
||||
}
|
||||
|
||||
async function shutdown(exitCode = 0): Promise<void> {
|
||||
logger.info("Shutting down");
|
||||
for (const session of sessions.values()) {
|
||||
await session.destroy().catch((error) => {
|
||||
logger.warn("Session shutdown failed", error);
|
||||
});
|
||||
}
|
||||
sessions.clear();
|
||||
await Promise.allSettled([stt.destroy?.(), tts.destroy?.()]);
|
||||
await client.destroy();
|
||||
process.exit(exitCode);
|
||||
}
|
||||
|
||||
client.once("ready", async () => {
|
||||
logger.info("Discord client ready", client.user?.tag ?? "unknown");
|
||||
try {
|
||||
await registerCommands(client);
|
||||
} catch (error) {
|
||||
logger.error("Command registration failed", error);
|
||||
}
|
||||
});
|
||||
|
||||
client.on("interactionCreate", async (interaction) => {
|
||||
if (!interaction.isChatInputCommand()) {
|
||||
return;
|
||||
}
|
||||
|
||||
try {
|
||||
switch (interaction.commandName) {
|
||||
case "join":
|
||||
await handleJoin(interaction);
|
||||
return;
|
||||
case "leave":
|
||||
await handleLeave(interaction);
|
||||
return;
|
||||
case "status":
|
||||
await handleStatus(interaction);
|
||||
return;
|
||||
case "reset":
|
||||
await handleReset(interaction);
|
||||
return;
|
||||
case "say":
|
||||
await handleSay(interaction);
|
||||
return;
|
||||
default:
|
||||
await interaction.reply({ content: "알 수 없는 명령입니다.", ephemeral: true });
|
||||
}
|
||||
} catch (error) {
|
||||
logger.error("Interaction handler failed", error);
|
||||
if (interaction.deferred || interaction.replied) {
|
||||
await interaction.editReply("명령 처리 중 오류가 발생했습니다.").catch(() => null);
|
||||
return;
|
||||
}
|
||||
await interaction.reply({ content: "명령 처리 중 오류가 발생했습니다.", ephemeral: true }).catch(() => null);
|
||||
}
|
||||
});
|
||||
|
||||
process.on("SIGINT", () => {
|
||||
void shutdown(0);
|
||||
});
|
||||
|
||||
process.on("SIGTERM", () => {
|
||||
void shutdown(0);
|
||||
});
|
||||
|
||||
await client.login(config.DISCORD_BOT_TOKEN);
|
||||
}
|
||||
93
src/docker-runtime.ts
Normal file
93
src/docker-runtime.ts
Normal file
@@ -0,0 +1,93 @@
|
||||
import { spawn } from "node:child_process";
|
||||
import { constants as fsConstants } from "node:fs";
|
||||
import { access } from "node:fs/promises";
|
||||
import path from "node:path";
|
||||
import process from "node:process";
|
||||
|
||||
import type { AppConfig } from "./config.js";
|
||||
|
||||
async function fileExists(target: string): Promise<boolean> {
|
||||
try {
|
||||
await access(target, fsConstants.F_OK);
|
||||
return true;
|
||||
} catch {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
async function captureStdout(command: string, args: string[]): Promise<string | null> {
|
||||
return await new Promise<string | null>((resolve) => {
|
||||
const child = spawn(command, args, {
|
||||
stdio: ["ignore", "pipe", "ignore"],
|
||||
windowsHide: true,
|
||||
});
|
||||
|
||||
let stdout = "";
|
||||
child.stdout.on("data", (chunk: Buffer) => {
|
||||
stdout += chunk.toString();
|
||||
});
|
||||
|
||||
child.on("error", () => resolve(null));
|
||||
child.on("exit", (code) => {
|
||||
if (code === 0) {
|
||||
resolve(stdout);
|
||||
return;
|
||||
}
|
||||
resolve(null);
|
||||
});
|
||||
});
|
||||
}
|
||||
|
||||
async function resolveWithWhere(): Promise<string | null> {
|
||||
const stdout = await captureStdout("cmd.exe", ["/d", "/s", "/c", "where docker"]);
|
||||
if (!stdout) {
|
||||
return null;
|
||||
}
|
||||
|
||||
const candidates = stdout
|
||||
.split(/\r?\n/)
|
||||
.map((line) => line.trim())
|
||||
.filter((line) => line.length > 0);
|
||||
|
||||
for (const candidate of candidates) {
|
||||
if (await fileExists(candidate)) {
|
||||
return candidate;
|
||||
}
|
||||
}
|
||||
|
||||
return null;
|
||||
}
|
||||
|
||||
export async function resolveDockerCommand(config: AppConfig): Promise<string> {
|
||||
if (config.DOCKER_BIN && await fileExists(config.DOCKER_BIN)) {
|
||||
return config.DOCKER_BIN;
|
||||
}
|
||||
|
||||
if (process.platform !== "win32") {
|
||||
return "docker";
|
||||
}
|
||||
|
||||
const commonPaths = [
|
||||
"C:\\Program Files\\Docker\\Docker\\resources\\bin\\docker.exe",
|
||||
"C:\\Program Files\\Docker\\Docker\\resources\\bin\\docker-cli.exe",
|
||||
];
|
||||
|
||||
for (const candidate of commonPaths) {
|
||||
if (await fileExists(candidate)) {
|
||||
return candidate;
|
||||
}
|
||||
}
|
||||
|
||||
const found = await resolveWithWhere();
|
||||
if (found) {
|
||||
return found;
|
||||
}
|
||||
|
||||
throw new Error(
|
||||
[
|
||||
"Docker 실행 파일을 찾지 못했습니다.",
|
||||
"VSCode를 완전히 다시 열어 PATH를 새로 고치거나,",
|
||||
".env에 DOCKER_BIN=C:\\Program Files\\Docker\\Docker\\resources\\bin\\docker.exe 를 넣어주세요.",
|
||||
].join(" "),
|
||||
);
|
||||
}
|
||||
424
src/index.ts
424
src/index.ts
@@ -1,31 +1,427 @@
|
||||
import process from "node:process";
|
||||
import { createInterface } from "node:readline";
|
||||
|
||||
import { loadConfig, requireAssistantRuntimeConfig, requireDiscordRuntimeConfig } from "./config.js";
|
||||
import { runDiscordBot } from "./discord-main.js";
|
||||
import { loadConfig } from "./config.js";
|
||||
import { Logger } from "./logger.js";
|
||||
import { printLocalAudioDevices, runLocalAssistant } from "./local-main.js";
|
||||
import { printAudioDevices, spawnLoopbackCapture } from "./audio/capture.js";
|
||||
import { RealtimeSegmenter } from "./audio/realtime-segmenter.js";
|
||||
import { FasterWhisperSttService } from "./services/faster-whisper-stt.js";
|
||||
import { MeloTtsService } from "./services/melo-tts.js";
|
||||
import { OllamaLlmService } from "./services/ollama-llm.js";
|
||||
|
||||
const mode = process.argv[2] ?? "discord";
|
||||
const config = loadConfig();
|
||||
const logger = new Logger(config.LOG_LEVEL);
|
||||
const mode = process.argv[2] ?? "test-stt";
|
||||
|
||||
async function runSttTest(options: { enableLlm: boolean; enableTts: boolean }): Promise<void> {
|
||||
const config = loadConfig();
|
||||
const logger = new Logger(config.DEBUG ? config.LOG_LEVEL : "error");
|
||||
const stt = new FasterWhisperSttService(config, logger);
|
||||
const llm = options.enableLlm ? new OllamaLlmService(config, logger) : null;
|
||||
let tts = options.enableTts && config.TTS_ENABLED ? new MeloTtsService(config, logger) : null;
|
||||
let capture = null as ReturnType<typeof spawnLoopbackCapture> | null;
|
||||
let shuttingDown: Promise<void> | null = null;
|
||||
let suppressCapture = false;
|
||||
let receivedChunks = 0;
|
||||
let receivedBytes = 0;
|
||||
let maxPeak = 0;
|
||||
let lastChunkAt = 0;
|
||||
let lastLevelLogAt = 0;
|
||||
let sawSpeechStart = false;
|
||||
let emittedSegmentCount = 0;
|
||||
|
||||
const shutdown = async (exitCode: number, reason: string, error?: unknown): Promise<void> => {
|
||||
if (shuttingDown) {
|
||||
return await shuttingDown;
|
||||
}
|
||||
|
||||
shuttingDown = (async () => {
|
||||
if (error) {
|
||||
logger.error(`Shutting down: ${reason}`, error);
|
||||
} else {
|
||||
logger.info("Shutting down", reason);
|
||||
}
|
||||
|
||||
if (capture && !capture.killed && capture.exitCode === null) {
|
||||
capture.kill("SIGTERM");
|
||||
}
|
||||
|
||||
await stt.destroy().catch((destroyError) => {
|
||||
logger.warn("STT destroy failed", destroyError);
|
||||
});
|
||||
if (tts) {
|
||||
await tts.destroy().catch((destroyError) => {
|
||||
logger.warn("TTS destroy failed", destroyError);
|
||||
});
|
||||
}
|
||||
})();
|
||||
|
||||
await shuttingDown;
|
||||
process.exit(exitCode);
|
||||
};
|
||||
|
||||
process.once("SIGINT", () => {
|
||||
void shutdown(0, "SIGINT");
|
||||
});
|
||||
process.once("SIGTERM", () => {
|
||||
void shutdown(0, "SIGTERM");
|
||||
});
|
||||
process.once("uncaughtException", (error) => {
|
||||
void shutdown(1, "uncaughtException", error);
|
||||
});
|
||||
process.once("unhandledRejection", (reason) => {
|
||||
void shutdown(1, "unhandledRejection", reason);
|
||||
});
|
||||
process.once("exit", () => {
|
||||
if (capture && !capture.killed && capture.exitCode === null) {
|
||||
capture.kill("SIGKILL");
|
||||
}
|
||||
void stt.destroy();
|
||||
if (tts) {
|
||||
void tts.destroy();
|
||||
}
|
||||
});
|
||||
|
||||
console.log("STT 준비중...");
|
||||
await stt.warmup();
|
||||
logger.info("STT warmup finished");
|
||||
console.log("STT 준비 완료");
|
||||
if (llm) {
|
||||
console.log("LLM 준비중...");
|
||||
await llm.warmup();
|
||||
logger.info("LLM warmup finished");
|
||||
console.log("LLM 준비 완료");
|
||||
}
|
||||
if (tts) {
|
||||
console.log("TTS 준비중...");
|
||||
try {
|
||||
await tts.warmup();
|
||||
logger.info("TTS warmup finished", {
|
||||
image: config.TTS_IMAGE,
|
||||
language: config.TTS_LANGUAGE,
|
||||
speaker: config.TTS_SPEAKER,
|
||||
});
|
||||
console.log("TTS 준비 완료");
|
||||
} catch (error) {
|
||||
logger.warn("TTS warmup failed", error);
|
||||
console.log("TTS 비활성화: bun run setup:tts 를 먼저 실행하세요.");
|
||||
tts = null;
|
||||
}
|
||||
}
|
||||
|
||||
const transcriptionQueue: Array<{ pcm16: Buffer; queuedAt: number; index: number }> = [];
|
||||
let transcribing = false;
|
||||
let nextSegmentIndex = 1;
|
||||
|
||||
const runNext = async (): Promise<void> => {
|
||||
if (transcribing) {
|
||||
return;
|
||||
}
|
||||
const next = transcriptionQueue.shift();
|
||||
if (!next) {
|
||||
return;
|
||||
}
|
||||
|
||||
transcribing = true;
|
||||
try {
|
||||
const startedAt = Date.now();
|
||||
const text = await stt.transcribePcm16(next.pcm16);
|
||||
logger.info("STT latency", {
|
||||
index: next.index,
|
||||
wait_ms: startedAt - next.queuedAt,
|
||||
transcribe_ms: Date.now() - startedAt,
|
||||
});
|
||||
if (!text) {
|
||||
logger.info("빈 전사 결과");
|
||||
} else {
|
||||
logger.info("Transcript", { index: next.index, text });
|
||||
if (config.DEBUG) {
|
||||
if (config.DEBUG_TRANSCRIPTS) {
|
||||
console.log(`\n[text] ${text}\n`);
|
||||
}
|
||||
} else {
|
||||
console.log(`사용자> ${text}`);
|
||||
}
|
||||
|
||||
if (llm) {
|
||||
const assessmentStartedAt = Date.now();
|
||||
const assessment = await llm.assessReplyNeed(text);
|
||||
logger.info("Reply assessment", {
|
||||
index: next.index,
|
||||
should_reply: assessment.shouldReply,
|
||||
likely_needs_lookup: assessment.likelyNeedsLookup,
|
||||
reason: assessment.reason,
|
||||
assessment_ms: Date.now() - assessmentStartedAt,
|
||||
});
|
||||
|
||||
if (!assessment.shouldReply) {
|
||||
if (config.DEBUG) {
|
||||
console.log(`[skip] ${assessment.reason}\n`);
|
||||
}
|
||||
return;
|
||||
}
|
||||
|
||||
const llmStartedAt = Date.now();
|
||||
const reply = await llm.generateReply(text, {
|
||||
onProgress: (message) => {
|
||||
if (config.DEBUG) {
|
||||
console.log(`[assistant] ${message}`);
|
||||
return;
|
||||
}
|
||||
console.log(`답변> ${message}`);
|
||||
},
|
||||
});
|
||||
logger.info("LLM latency", {
|
||||
index: next.index,
|
||||
llm_ms: Date.now() - llmStartedAt,
|
||||
});
|
||||
logger.info("LLM reply", { index: next.index, text: reply });
|
||||
|
||||
if (config.DEBUG) {
|
||||
if (config.DEBUG_TRANSCRIPTS) {
|
||||
console.log(`[assistant] ${reply}\n`);
|
||||
}
|
||||
} else {
|
||||
console.log(`답변> ${reply}`);
|
||||
}
|
||||
|
||||
if (tts) {
|
||||
suppressCapture = true;
|
||||
segmenter.reset();
|
||||
try {
|
||||
await tts.speak(reply);
|
||||
} catch (error) {
|
||||
logger.warn("TTS playback failed", error);
|
||||
} finally {
|
||||
suppressCapture = false;
|
||||
sawSpeechStart = false;
|
||||
maxPeak = 0;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
} catch (error) {
|
||||
logger.error("STT/LLM failed", error);
|
||||
} finally {
|
||||
transcribing = false;
|
||||
void runNext();
|
||||
}
|
||||
};
|
||||
|
||||
const segmenter = new RealtimeSegmenter({
|
||||
preRollSamples: config.SEGMENT_PREROLL_SAMPLES,
|
||||
speechStartThreshold: config.SEGMENT_START_THRESHOLD,
|
||||
speechContinueThreshold: config.SEGMENT_CONTINUE_THRESHOLD,
|
||||
speechStartFrames: config.SEGMENT_START_FRAMES,
|
||||
speechEndFrames: config.SEGMENT_END_FRAMES,
|
||||
minSpeechSamples: config.SEGMENT_MIN_SPEECH_SAMPLES,
|
||||
maxSpeechSamples: config.SEGMENT_MAX_SPEECH_SAMPLES,
|
||||
onLevel: (peak) => {
|
||||
if (peak > maxPeak) {
|
||||
maxPeak = peak;
|
||||
}
|
||||
|
||||
const now = Date.now();
|
||||
if (now - lastLevelLogAt >= 3000) {
|
||||
lastLevelLogAt = now;
|
||||
logger.info("Audio input heartbeat", {
|
||||
chunks: receivedChunks,
|
||||
bytes: receivedBytes,
|
||||
peak: maxPeak,
|
||||
speech_started: sawSpeechStart,
|
||||
emitted_segments: emittedSegmentCount,
|
||||
});
|
||||
maxPeak = 0;
|
||||
}
|
||||
},
|
||||
onSpeechStart: (peak) => {
|
||||
sawSpeechStart = true;
|
||||
logger.info("Speech start detected", { peak });
|
||||
},
|
||||
onSpeechDiscarded: (samples) => {
|
||||
logger.info("Discarded short speech segment", { samples });
|
||||
},
|
||||
onSpeechReady: (samples) => {
|
||||
emittedSegmentCount += 1;
|
||||
logger.info("Speech segment ready", {
|
||||
index: emittedSegmentCount,
|
||||
samples,
|
||||
ms: Math.round((samples / 16000) * 1000),
|
||||
});
|
||||
},
|
||||
onSegment: (pcm16) => {
|
||||
const index = nextSegmentIndex++;
|
||||
transcriptionQueue.push({
|
||||
pcm16,
|
||||
queuedAt: Date.now(),
|
||||
index,
|
||||
});
|
||||
logger.info("Queued segment for STT", {
|
||||
index,
|
||||
queue: transcriptionQueue.length,
|
||||
bytes: pcm16.length,
|
||||
});
|
||||
void runNext();
|
||||
},
|
||||
});
|
||||
|
||||
capture = spawnLoopbackCapture(config, logger);
|
||||
capture.stdout.on("data", (chunk: Buffer) => {
|
||||
receivedChunks += 1;
|
||||
receivedBytes += chunk.length;
|
||||
lastChunkAt = Date.now();
|
||||
if (suppressCapture) {
|
||||
return;
|
||||
}
|
||||
segmenter.pushChunk(chunk);
|
||||
});
|
||||
capture.stderr.on("data", (chunk: Buffer) => {
|
||||
const text = chunk.toString().trim();
|
||||
if (text) {
|
||||
logger.debug("[capture]", text);
|
||||
}
|
||||
});
|
||||
capture.on("error", (error) => {
|
||||
void shutdown(1, "capture-error", error);
|
||||
});
|
||||
capture.on("exit", (code, signal) => {
|
||||
logger.warn("capture exited", { code, signal });
|
||||
if (!shuttingDown) {
|
||||
void shutdown(1, "capture-exit");
|
||||
}
|
||||
});
|
||||
|
||||
if (config.DEBUG) {
|
||||
if (options.enableLlm && options.enableTts) {
|
||||
console.log("실시간 출력장치 STT+LLM+TTS 테스트를 시작합니다. Ctrl+C 로 종료합니다.");
|
||||
} else if (options.enableLlm) {
|
||||
console.log("실시간 출력장치 STT+LLM 테스트를 시작합니다. Ctrl+C 로 종료합니다.");
|
||||
} else {
|
||||
console.log("실시간 출력장치 STT 테스트를 시작합니다. Ctrl+C 로 종료합니다.");
|
||||
}
|
||||
console.log(`source: ${config.AUDIO_SOURCE ?? "unset"}`);
|
||||
console.log(`model: ${config.WHISPER_MODEL}`);
|
||||
console.log(`language: ${config.WHISPER_LANGUAGE}`);
|
||||
console.log(`beam: ${config.WHISPER_BEAM_SIZE}`);
|
||||
if (options.enableLlm) {
|
||||
console.log(`llm: ${config.OLLAMA_MODEL}`);
|
||||
}
|
||||
if (options.enableTts) {
|
||||
console.log(`tts: ${config.TTS_IMAGE}`);
|
||||
}
|
||||
}
|
||||
|
||||
setInterval(() => {
|
||||
const now = Date.now();
|
||||
if (lastChunkAt === 0 && !shuttingDown) {
|
||||
logger.warn("아직 캡처 PCM 데이터가 들어오지 않았습니다. AUDIO_SOURCE 가 잘못됐거나 loopback 입력이 아닌 장치일 수 있습니다.");
|
||||
return;
|
||||
}
|
||||
|
||||
if (lastChunkAt > 0 && now - lastChunkAt >= 5000 && !shuttingDown) {
|
||||
logger.warn("최근 5초 동안 새 PCM chunk 가 들어오지 않았습니다.");
|
||||
}
|
||||
}, 5000).unref();
|
||||
}
|
||||
|
||||
async function runLlmCli(): Promise<void> {
|
||||
const config = loadConfig();
|
||||
const logger = new Logger(config.DEBUG ? config.LOG_LEVEL : "error");
|
||||
const llm = new OllamaLlmService(config, logger);
|
||||
|
||||
await llm.warmup();
|
||||
|
||||
console.log(`LLM CLI 테스트를 시작합니다. model=${config.OLLAMA_MODEL}`);
|
||||
console.log("/exit 로 종료, /reset 으로 대화 초기화");
|
||||
|
||||
const rl = createInterface({
|
||||
input: process.stdin,
|
||||
output: process.stdout,
|
||||
prompt: "you> ",
|
||||
});
|
||||
|
||||
rl.prompt();
|
||||
|
||||
rl.on("line", async (line) => {
|
||||
const text = line.trim();
|
||||
|
||||
if (!text) {
|
||||
rl.prompt();
|
||||
return;
|
||||
}
|
||||
|
||||
if (text === "/exit") {
|
||||
rl.close();
|
||||
return;
|
||||
}
|
||||
|
||||
if (text === "/reset") {
|
||||
llm.resetConversation();
|
||||
console.log("assistant> 대화 문맥을 초기화했습니다.");
|
||||
rl.prompt();
|
||||
return;
|
||||
}
|
||||
|
||||
try {
|
||||
const startedAt = Date.now();
|
||||
const reply = await llm.generateReply(text, {
|
||||
onProgress: (message) => {
|
||||
console.log(`assistant> ${message}`);
|
||||
},
|
||||
});
|
||||
logger.info("LLM latency", {
|
||||
llm_ms: Date.now() - startedAt,
|
||||
});
|
||||
console.log(`assistant> ${reply}`);
|
||||
} catch (error) {
|
||||
console.error(error instanceof Error ? error.message : String(error));
|
||||
}
|
||||
|
||||
rl.prompt();
|
||||
});
|
||||
|
||||
rl.on("close", () => {
|
||||
process.exit(0);
|
||||
});
|
||||
}
|
||||
|
||||
async function runTtsTest(): Promise<void> {
|
||||
const text = process.argv.slice(3).join(" ").trim() || "안녕하세요. 로컬 티티에스 테스트입니다.";
|
||||
const config = loadConfig();
|
||||
const logger = new Logger(config.DEBUG ? config.LOG_LEVEL : "error");
|
||||
const tts = new MeloTtsService(config, logger);
|
||||
|
||||
console.log("TTS 준비중...");
|
||||
await tts.warmup();
|
||||
console.log("TTS 준비 완료");
|
||||
console.log(`재생 문장: ${text}`);
|
||||
await tts.speak(text);
|
||||
}
|
||||
|
||||
async function main(): Promise<void> {
|
||||
switch (mode) {
|
||||
case "discord":
|
||||
await runDiscordBot(requireDiscordRuntimeConfig(config), logger);
|
||||
case "devices":
|
||||
await printAudioDevices();
|
||||
return;
|
||||
case "local":
|
||||
await runLocalAssistant(requireAssistantRuntimeConfig(config), logger);
|
||||
case "test-stt":
|
||||
await runSttTest({ enableLlm: false, enableTts: false });
|
||||
return;
|
||||
case "local-devices":
|
||||
await printLocalAudioDevices();
|
||||
case "test-sttllm":
|
||||
await runSttTest({ enableLlm: true, enableTts: false });
|
||||
return;
|
||||
case "test-all":
|
||||
await runSttTest({ enableLlm: true, enableTts: true });
|
||||
return;
|
||||
case "test-llm":
|
||||
await runLlmCli();
|
||||
return;
|
||||
case "test-tts":
|
||||
await runTtsTest();
|
||||
return;
|
||||
default:
|
||||
throw new Error(`알 수 없는 실행 모드입니다: ${mode}. 사용 가능: discord, local, local-devices`);
|
||||
throw new Error(`알 수 없는 실행 모드입니다: ${mode}. 사용 가능: test-stt, test-sttllm, test-all, test-llm, test-tts, devices`);
|
||||
}
|
||||
}
|
||||
|
||||
void main().catch((error) => {
|
||||
logger.error("Fatal startup error", error);
|
||||
console.error(error instanceof Error ? error.message : String(error));
|
||||
process.exit(1);
|
||||
});
|
||||
|
||||
@@ -1,111 +0,0 @@
|
||||
import { spawn } from "node:child_process";
|
||||
import process from "node:process";
|
||||
|
||||
import type { AssistantRuntimeConfig } from "./config.js";
|
||||
import { Logger } from "./logger.js";
|
||||
import { LocalVoiceSession } from "./audio/local-voice-session.js";
|
||||
import { requireFfmpegPath } from "./audio/ffmpeg-path.js";
|
||||
import { LocalFasterWhisperSttService } from "./services/local-stt.js";
|
||||
import { LocalMeloTtsService } from "./services/local-tts.js";
|
||||
import { OllamaLlmService } from "./services/ollama-llm.js";
|
||||
|
||||
export async function printLocalAudioDevices(): Promise<void> {
|
||||
if (process.platform === "win32") {
|
||||
const ffmpegPath = requireFfmpegPath();
|
||||
|
||||
console.log("\n=== ffmpeg dshow audio devices ===");
|
||||
await new Promise<void>((resolve, reject) => {
|
||||
const child = spawn(
|
||||
ffmpegPath,
|
||||
["-hide_banner", "-list_devices", "true", "-f", "dshow", "-i", "dummy"],
|
||||
{
|
||||
stdio: ["ignore", "ignore", "inherit"],
|
||||
},
|
||||
);
|
||||
child.on("exit", (code) => {
|
||||
if (code === 0 || code === 1) {
|
||||
resolve();
|
||||
return;
|
||||
}
|
||||
reject(new Error(`ffmpeg exited with code ${code ?? "null"}`));
|
||||
});
|
||||
child.on("error", reject);
|
||||
});
|
||||
|
||||
console.log("\n위 목록의 오디오 장치 이름을 `LOCAL_AUDIO_SOURCE` 에 그대로 넣으면 됩니다.");
|
||||
console.log("Windows 로컬 모드는 현재 출력 장치 직접 선택 대신 시스템 기본 출력 장치를 사용합니다.");
|
||||
return;
|
||||
}
|
||||
|
||||
const runs = [
|
||||
{
|
||||
label: "wpctl status",
|
||||
args: ["status"],
|
||||
},
|
||||
{
|
||||
label: "wpctl status -n",
|
||||
args: ["status", "-n"],
|
||||
},
|
||||
] as const;
|
||||
|
||||
for (const run of runs) {
|
||||
console.log(`\n=== ${run.label} ===`);
|
||||
await new Promise<void>((resolve, reject) => {
|
||||
const child = spawn("wpctl", run.args, {
|
||||
stdio: ["ignore", "inherit", "inherit"],
|
||||
});
|
||||
child.on("exit", (code) => {
|
||||
if (code === 0) {
|
||||
resolve();
|
||||
return;
|
||||
}
|
||||
reject(new Error(`wpctl exited with code ${code ?? "null"}`));
|
||||
});
|
||||
child.on("error", reject);
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
export async function runLocalAssistant(config: AssistantRuntimeConfig, logger: Logger): Promise<void> {
|
||||
const stt = new LocalFasterWhisperSttService(config, logger);
|
||||
const tts = new LocalMeloTtsService(config, logger);
|
||||
const llm = new OllamaLlmService(config);
|
||||
|
||||
await stt.warmup();
|
||||
await tts.warmup();
|
||||
|
||||
const session = new LocalVoiceSession({
|
||||
config,
|
||||
logger,
|
||||
stt,
|
||||
tts,
|
||||
llm,
|
||||
});
|
||||
|
||||
console.log(session.statusSummary());
|
||||
console.log("로컬 음성 테스트를 시작합니다. Ctrl+C 로 종료합니다.");
|
||||
if (process.platform === "win32") {
|
||||
console.log("Windows 로컬 모드는 현재 시스템 기본 출력 장치로 재생됩니다.");
|
||||
}
|
||||
if (config.DEBUG_TEXT_EVENTS) {
|
||||
console.log("텍스트 로그 출력이 켜져 있습니다.");
|
||||
}
|
||||
|
||||
const shutdown = async (exitCode = 0) => {
|
||||
await session.destroy().catch((error) => {
|
||||
logger.warn("Local session shutdown failed", error);
|
||||
});
|
||||
await Promise.allSettled([stt.destroy?.(), tts.destroy?.()]);
|
||||
process.exit(exitCode);
|
||||
};
|
||||
|
||||
process.on("SIGINT", () => {
|
||||
void shutdown(0);
|
||||
});
|
||||
|
||||
process.on("SIGTERM", () => {
|
||||
void shutdown(0);
|
||||
});
|
||||
|
||||
await session.start();
|
||||
}
|
||||
20
src/prompt-loader.ts
Normal file
20
src/prompt-loader.ts
Normal file
@@ -0,0 +1,20 @@
|
||||
import { readFileSync } from "node:fs";
|
||||
import path from "node:path";
|
||||
|
||||
const cache = new Map<string, string>();
|
||||
|
||||
export function loadPrompt(name: string): string {
|
||||
const cached = cache.get(name);
|
||||
if (cached) {
|
||||
return cached;
|
||||
}
|
||||
|
||||
const promptPath = path.resolve(process.cwd(), "prompts", name);
|
||||
const content = readFileSync(promptPath, "utf8").trim();
|
||||
if (!content) {
|
||||
throw new Error(`프롬프트 파일이 비어 있습니다: ${promptPath}`);
|
||||
}
|
||||
|
||||
cache.set(name, content);
|
||||
return content;
|
||||
}
|
||||
@@ -1,90 +1,203 @@
|
||||
import { existsSync } from "node:fs";
|
||||
import { spawnSync } from "node:child_process";
|
||||
import { spawn } from "node:child_process";
|
||||
import { constants as fsConstants } from "node:fs";
|
||||
import { access } from "node:fs/promises";
|
||||
import path from "node:path";
|
||||
import process from "node:process";
|
||||
|
||||
import type { AppConfig } from "./config.js";
|
||||
|
||||
export interface PythonLaunch {
|
||||
export interface PythonCommandSpec {
|
||||
command: string;
|
||||
args: string[];
|
||||
source: "venv" | "configured" | "system";
|
||||
viaCmdShell?: boolean;
|
||||
}
|
||||
|
||||
function splitCommandSpec(spec: string): string[] {
|
||||
return spec.match(/(?:[^\s"]+|"[^"]*")+/g)?.map((part) => part.replace(/^"|"$/g, "")) ?? [];
|
||||
function shouldUseCmdShell(command: string): boolean {
|
||||
if (process.platform !== "win32") {
|
||||
return false;
|
||||
}
|
||||
|
||||
const lower = command.toLowerCase();
|
||||
return !path.isAbsolute(command) || lower.endsWith(".bat") || lower.endsWith(".cmd");
|
||||
}
|
||||
|
||||
function canRun(command: string, args: string[]): boolean {
|
||||
const result = spawnSync(command, [...args, "--version"], {
|
||||
encoding: "utf8",
|
||||
function splitCommand(command: string): string[] {
|
||||
const parts = command.match(/(?:[^\s"]+|"[^"]*")+/g) ?? [];
|
||||
return parts.map((part) => part.replace(/^"(.*)"$/, "$1"));
|
||||
}
|
||||
|
||||
function quoteWindowsCmdArg(value: string): string {
|
||||
if (!/[ \t"&()<>^|]/.test(value)) {
|
||||
return value;
|
||||
}
|
||||
return `"${value.replace(/"/g, '""')}"`;
|
||||
}
|
||||
|
||||
function buildWindowsCommandLine(parts: string[]): string {
|
||||
return parts.map((part) => quoteWindowsCmdArg(part)).join(" ");
|
||||
}
|
||||
|
||||
export function buildPythonInvocation(spec: PythonCommandSpec, extraArgs: string[]): PythonCommandSpec {
|
||||
if (process.platform === "win32" && spec.viaCmdShell) {
|
||||
return {
|
||||
command: "cmd.exe",
|
||||
args: ["/d", "/s", "/c", buildWindowsCommandLine([spec.command, ...spec.args, ...extraArgs])],
|
||||
};
|
||||
}
|
||||
|
||||
return {
|
||||
command: spec.command,
|
||||
args: [...spec.args, ...extraArgs],
|
||||
};
|
||||
}
|
||||
|
||||
async function canRun(command: string, args: string[], viaCmdShell = false): Promise<boolean> {
|
||||
const invocation = viaCmdShell
|
||||
? {
|
||||
command: "cmd.exe",
|
||||
args: ["/d", "/s", "/c", buildWindowsCommandLine([command, ...args, "--version"])],
|
||||
}
|
||||
: {
|
||||
command,
|
||||
args: [...args, "--version"],
|
||||
};
|
||||
|
||||
return await new Promise<boolean>((resolve) => {
|
||||
const child = spawn(invocation.command, invocation.args, {
|
||||
stdio: ["ignore", "ignore", "ignore"],
|
||||
windowsHide: true,
|
||||
});
|
||||
|
||||
child.on("error", () => {
|
||||
resolve(false);
|
||||
});
|
||||
|
||||
child.on("exit", (code) => {
|
||||
resolve(code === 0);
|
||||
});
|
||||
});
|
||||
return result.status === 0;
|
||||
}
|
||||
|
||||
export function resolveLocalAiVenvPath(config: AppConfig): string {
|
||||
return path.resolve(process.cwd(), config.LOCAL_AI_VENV_PATH);
|
||||
async function captureStdout(command: string, args: string[]): Promise<string | null> {
|
||||
return await new Promise<string | null>((resolve) => {
|
||||
const child = spawn(command, args, {
|
||||
stdio: ["ignore", "pipe", "ignore"],
|
||||
windowsHide: true,
|
||||
});
|
||||
|
||||
let stdout = "";
|
||||
child.stdout.on("data", (chunk: Buffer) => {
|
||||
stdout += chunk.toString();
|
||||
});
|
||||
|
||||
child.on("error", () => {
|
||||
resolve(null);
|
||||
});
|
||||
|
||||
child.on("exit", (code) => {
|
||||
if (code === 0) {
|
||||
resolve(stdout);
|
||||
return;
|
||||
}
|
||||
resolve(null);
|
||||
});
|
||||
});
|
||||
}
|
||||
|
||||
export function resolveLocalAiCachePath(config: AppConfig): string {
|
||||
return path.resolve(process.cwd(), config.LOCAL_AI_CACHE_DIR);
|
||||
}
|
||||
|
||||
export function resolveVenvPythonPath(config: AppConfig): string {
|
||||
const venvPath = resolveLocalAiVenvPath(config);
|
||||
return process.platform === "win32"
|
||||
? path.join(venvPath, "Scripts", "python.exe")
|
||||
: path.join(venvPath, "bin", "python");
|
||||
}
|
||||
|
||||
export function resolvePythonLaunch(config: AppConfig, options?: { preferVenv?: boolean }): PythonLaunch {
|
||||
const preferVenv = options?.preferVenv ?? true;
|
||||
const venvPython = resolveVenvPythonPath(config);
|
||||
|
||||
if (preferVenv && existsSync(venvPython)) {
|
||||
return {
|
||||
command: venvPython,
|
||||
args: [],
|
||||
source: "venv",
|
||||
};
|
||||
async function resolveWindowsExecutable(name: string): Promise<string | null> {
|
||||
const stdout = await captureStdout("cmd.exe", ["/d", "/s", "/c", `where ${name}`]);
|
||||
if (!stdout) {
|
||||
return null;
|
||||
}
|
||||
|
||||
const configured = config.LOCAL_AI_PYTHON ? splitCommandSpec(config.LOCAL_AI_PYTHON) : [];
|
||||
if (configured.length > 0 && canRun(configured[0]!, configured.slice(1))) {
|
||||
return {
|
||||
command: configured[0]!,
|
||||
args: configured.slice(1),
|
||||
source: "configured",
|
||||
};
|
||||
const candidates = stdout
|
||||
.split(/\r?\n/)
|
||||
.map((line) => line.trim())
|
||||
.filter((line) => line.length > 0);
|
||||
|
||||
for (const candidate of candidates) {
|
||||
try {
|
||||
await access(candidate, fsConstants.F_OK);
|
||||
return candidate;
|
||||
} catch {
|
||||
// ignore
|
||||
}
|
||||
}
|
||||
|
||||
const candidates =
|
||||
process.platform === "win32"
|
||||
? [
|
||||
["py", "-3"],
|
||||
["python"],
|
||||
["python3"],
|
||||
]
|
||||
: [
|
||||
["python3"],
|
||||
["python"],
|
||||
];
|
||||
return null;
|
||||
}
|
||||
|
||||
for (const [command, ...args] of candidates) {
|
||||
if (canRun(command, args)) {
|
||||
async function fileExists(target: string): Promise<boolean> {
|
||||
try {
|
||||
await access(target, fsConstants.X_OK);
|
||||
return true;
|
||||
} catch {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
export async function resolvePythonCommand(config: AppConfig): Promise<{ command: string; args: string[] }> {
|
||||
return await resolveWorkerPythonCommand(config);
|
||||
}
|
||||
|
||||
export async function resolveBasePythonCommand(config: AppConfig): Promise<PythonCommandSpec> {
|
||||
const configured = config.LOCAL_AI_PYTHON?.trim();
|
||||
if (configured) {
|
||||
const [command, ...args] = splitCommand(configured);
|
||||
if (!command) {
|
||||
throw new Error("LOCAL_AI_PYTHON 값이 비어 있습니다.");
|
||||
}
|
||||
return {
|
||||
command,
|
||||
args,
|
||||
source: "system",
|
||||
viaCmdShell: shouldUseCmdShell(command),
|
||||
};
|
||||
}
|
||||
|
||||
const venvPath = resolveVenvPythonPath(config);
|
||||
if (await fileExists(venvPath)) {
|
||||
return { command: venvPath, args: [] };
|
||||
}
|
||||
|
||||
if (process.platform === "win32") {
|
||||
return {
|
||||
command: "python",
|
||||
args: [],
|
||||
viaCmdShell: true,
|
||||
};
|
||||
}
|
||||
|
||||
const unixCandidates = [
|
||||
{ command: "python3", args: [] as string[] },
|
||||
{ command: "python", args: [] as string[] },
|
||||
];
|
||||
|
||||
for (const candidate of unixCandidates) {
|
||||
if (await canRun(candidate.command, candidate.args)) {
|
||||
return candidate;
|
||||
}
|
||||
}
|
||||
|
||||
throw new Error(
|
||||
[
|
||||
"Python 실행 파일을 찾지 못했습니다.",
|
||||
"1. Python 3.11 이상을 설치",
|
||||
"2. 필요하면 `.env` 에 `LOCAL_AI_PYTHON=python` 또는 `LOCAL_AI_PYTHON=py -3` 설정",
|
||||
"3. 그 다음 `bun run setup:local-ai` 실행",
|
||||
].join("\n"),
|
||||
);
|
||||
throw new Error("사용 가능한 Python 실행기를 찾지 못했습니다. `python3 --version` 또는 `python --version` 이 먼저 동작해야 합니다.");
|
||||
}
|
||||
|
||||
export async function resolveWorkerPythonCommand(config: AppConfig): Promise<PythonCommandSpec> {
|
||||
const venvPath = resolveVenvPythonPath(config);
|
||||
if (await fileExists(venvPath)) {
|
||||
return { command: venvPath, args: [] };
|
||||
}
|
||||
|
||||
return await resolveBasePythonCommand(config);
|
||||
}
|
||||
|
||||
export function resolveVenvPythonPath(config: AppConfig): string {
|
||||
const root = path.resolve(process.cwd(), config.LOCAL_AI_VENV_PATH);
|
||||
if (process.platform === "win32") {
|
||||
return path.join(root, "Scripts", "python.exe");
|
||||
}
|
||||
return path.join(root, "bin", "python");
|
||||
}
|
||||
|
||||
export function resolveWorkerScript(name: string): string {
|
||||
return path.resolve(process.cwd(), "python", name);
|
||||
}
|
||||
|
||||
104
src/services/audio-playback.ts
Normal file
104
src/services/audio-playback.ts
Normal file
@@ -0,0 +1,104 @@
|
||||
import { spawn } from "node:child_process";
|
||||
import { rm } from "node:fs/promises";
|
||||
import os from "node:os";
|
||||
import path from "node:path";
|
||||
import process from "node:process";
|
||||
import { randomUUID } from "node:crypto";
|
||||
|
||||
async function run(command: string, args: string[], env?: NodeJS.ProcessEnv): Promise<void> {
|
||||
await new Promise<void>((resolve, reject) => {
|
||||
const child = spawn(command, args, {
|
||||
stdio: ["ignore", "inherit", "inherit"],
|
||||
windowsHide: true,
|
||||
env,
|
||||
});
|
||||
|
||||
child.on("error", reject);
|
||||
child.on("exit", (code) => {
|
||||
if (code === 0) {
|
||||
resolve();
|
||||
return;
|
||||
}
|
||||
reject(new Error(`${command} ${args.join(" ")} exited with code ${code ?? "null"}`));
|
||||
});
|
||||
});
|
||||
}
|
||||
|
||||
function buildAtempoFilter(rate: number): string {
|
||||
const filters: string[] = [];
|
||||
let remaining = rate;
|
||||
|
||||
while (remaining > 2) {
|
||||
filters.push("atempo=2.0");
|
||||
remaining /= 2;
|
||||
}
|
||||
|
||||
while (remaining < 0.5) {
|
||||
filters.push("atempo=0.5");
|
||||
remaining /= 0.5;
|
||||
}
|
||||
|
||||
filters.push(`atempo=${remaining.toFixed(3)}`);
|
||||
return filters.join(",");
|
||||
}
|
||||
|
||||
async function applyPlaybackRate(filePath: string, playbackRate: number): Promise<string> {
|
||||
if (Math.abs(playbackRate - 1) < 0.01) {
|
||||
return filePath;
|
||||
}
|
||||
|
||||
const targetPath = path.join(os.tmpdir(), `realtime-voice-bot-tts-${randomUUID()}.wav`);
|
||||
const filter = buildAtempoFilter(playbackRate);
|
||||
|
||||
await run("ffmpeg", [
|
||||
"-y",
|
||||
"-hide_banner",
|
||||
"-loglevel",
|
||||
"error",
|
||||
"-i",
|
||||
filePath,
|
||||
"-filter:a",
|
||||
filter,
|
||||
targetPath,
|
||||
]);
|
||||
|
||||
return targetPath;
|
||||
}
|
||||
|
||||
async function playNativeWavFile(filePath: string): Promise<void> {
|
||||
if (process.platform === "win32") {
|
||||
const env = {
|
||||
...process.env,
|
||||
TTS_WAV_PATH: filePath,
|
||||
};
|
||||
|
||||
await run("powershell.exe", [
|
||||
"-NoProfile",
|
||||
"-NonInteractive",
|
||||
"-ExecutionPolicy",
|
||||
"Bypass",
|
||||
"-Command",
|
||||
[
|
||||
"$path = $env:TTS_WAV_PATH",
|
||||
"$player = New-Object System.Media.SoundPlayer $path",
|
||||
"$player.Load()",
|
||||
"$player.PlaySync()",
|
||||
].join("; "),
|
||||
], env);
|
||||
return;
|
||||
}
|
||||
|
||||
throw new Error(`지원하지 않는 플랫폼입니다: ${process.platform}`);
|
||||
}
|
||||
|
||||
export async function playWavFile(filePath: string, playbackRate = 1): Promise<void> {
|
||||
const playablePath = await applyPlaybackRate(filePath, playbackRate);
|
||||
|
||||
try {
|
||||
await playNativeWavFile(playablePath);
|
||||
} finally {
|
||||
if (playablePath !== filePath) {
|
||||
await rm(playablePath, { force: true }).catch(() => undefined);
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -1,77 +0,0 @@
|
||||
export interface ConversationTurn {
|
||||
role: "user" | "assistant";
|
||||
text: string;
|
||||
speakerId?: string;
|
||||
speakerName?: string;
|
||||
createdAt: number;
|
||||
}
|
||||
|
||||
export interface UserUtterance {
|
||||
speakerId: string;
|
||||
speakerName: string;
|
||||
text: string;
|
||||
}
|
||||
|
||||
export class ConversationMemory {
|
||||
private readonly turns: ConversationTurn[] = [];
|
||||
|
||||
constructor(private readonly maxTurns: number) {}
|
||||
|
||||
addUserTurn(utterance: UserUtterance): void {
|
||||
this.turns.push({
|
||||
role: "user",
|
||||
text: utterance.text,
|
||||
speakerId: utterance.speakerId,
|
||||
speakerName: utterance.speakerName,
|
||||
createdAt: Date.now(),
|
||||
});
|
||||
this.trim();
|
||||
}
|
||||
|
||||
addAssistantTurn(text: string): void {
|
||||
this.turns.push({
|
||||
role: "assistant",
|
||||
text,
|
||||
createdAt: Date.now(),
|
||||
});
|
||||
this.trim();
|
||||
}
|
||||
|
||||
clear(): void {
|
||||
this.turns.splice(0, this.turns.length);
|
||||
}
|
||||
|
||||
recentTurns(): ConversationTurn[] {
|
||||
return [...this.turns];
|
||||
}
|
||||
|
||||
buildPrompt(currentUtterance: UserUtterance): string {
|
||||
const recent = this.turns
|
||||
.slice(-this.maxTurns)
|
||||
.map((turn) => {
|
||||
if (turn.role === "assistant") {
|
||||
return `[assistant]\n${turn.text}`;
|
||||
}
|
||||
return `[user speaker_id=${turn.speakerId ?? "unknown"} speaker_name=${turn.speakerName ?? "unknown"}]\n${turn.text}`;
|
||||
})
|
||||
.join("\n\n");
|
||||
|
||||
const historyBlock = recent.length > 0 ? recent : "(이전 대화 없음)";
|
||||
|
||||
return [
|
||||
"최근 대화:",
|
||||
historyBlock,
|
||||
"",
|
||||
"이번 발화:",
|
||||
`[user speaker_id=${currentUtterance.speakerId} speaker_name=${currentUtterance.speakerName}]`,
|
||||
currentUtterance.text,
|
||||
].join("\n");
|
||||
}
|
||||
|
||||
private trim(): void {
|
||||
const overflow = this.turns.length - this.maxTurns;
|
||||
if (overflow > 0) {
|
||||
this.turns.splice(0, overflow);
|
||||
}
|
||||
}
|
||||
}
|
||||
40
src/services/faster-whisper-stt.ts
Normal file
40
src/services/faster-whisper-stt.ts
Normal file
@@ -0,0 +1,40 @@
|
||||
import type { AppConfig } from "../config.js";
|
||||
import type { Logger } from "../logger.js";
|
||||
import { PythonJsonWorker } from "./python-json-worker.js";
|
||||
|
||||
interface PingResult {
|
||||
model: string;
|
||||
device: string;
|
||||
compute_type: string;
|
||||
}
|
||||
|
||||
interface TranscribeResult {
|
||||
text: string;
|
||||
}
|
||||
|
||||
export class FasterWhisperSttService {
|
||||
private readonly worker: PythonJsonWorker;
|
||||
|
||||
constructor(
|
||||
private readonly config: AppConfig,
|
||||
private readonly logger: Logger,
|
||||
) {
|
||||
this.worker = new PythonJsonWorker(config, logger, "loopback_stt_worker.py", "faster-whisper");
|
||||
}
|
||||
|
||||
async warmup(): Promise<void> {
|
||||
const result = await this.worker.request<PingResult>("ping", {});
|
||||
this.logger.info("STT worker ready", result);
|
||||
}
|
||||
|
||||
async transcribePcm16(pcm16: Buffer): Promise<string> {
|
||||
const result = await this.worker.request<TranscribeResult>("transcribe", {
|
||||
pcm16_base64: pcm16.toString("base64"),
|
||||
});
|
||||
return result.text.trim();
|
||||
}
|
||||
|
||||
async destroy(): Promise<void> {
|
||||
await this.worker.destroy();
|
||||
}
|
||||
}
|
||||
@@ -1,5 +0,0 @@
|
||||
import type { ConversationMemory, UserUtterance } from "./conversation.js";
|
||||
|
||||
export interface LlmService {
|
||||
generateReply(memory: ConversationMemory, utterance: UserUtterance): Promise<string>;
|
||||
}
|
||||
@@ -1,43 +0,0 @@
|
||||
import type { AssistantRuntimeConfig } from "../config.js";
|
||||
import type { Logger } from "../logger.js";
|
||||
import { PythonJsonWorker } from "./python-json-worker.js";
|
||||
import type { SttService } from "./stt.js";
|
||||
|
||||
interface TranscribeResult {
|
||||
text?: string;
|
||||
}
|
||||
|
||||
export class LocalFasterWhisperSttService implements SttService {
|
||||
private readonly worker: PythonJsonWorker;
|
||||
|
||||
constructor(private readonly config: AssistantRuntimeConfig, logger: Logger) {
|
||||
this.worker = new PythonJsonWorker(config, logger, "local_stt_worker.py", "local-stt", {
|
||||
LOCAL_STT_MODEL: config.LOCAL_STT_MODEL,
|
||||
LOCAL_STT_DEVICE: config.LOCAL_STT_DEVICE,
|
||||
LOCAL_STT_COMPUTE_TYPE: config.LOCAL_STT_COMPUTE_TYPE,
|
||||
LOCAL_STT_BEAM_SIZE: String(config.LOCAL_STT_BEAM_SIZE),
|
||||
});
|
||||
}
|
||||
|
||||
async warmup(): Promise<void> {
|
||||
await this.worker.request("ping", {});
|
||||
}
|
||||
|
||||
async transcribePcm16(pcm16MonoAudio: Buffer): Promise<string | null> {
|
||||
if (pcm16MonoAudio.byteLength === 0) {
|
||||
return null;
|
||||
}
|
||||
|
||||
const result = await this.worker.request<TranscribeResult>("transcribe", {
|
||||
audio_base64: pcm16MonoAudio.toString("base64"),
|
||||
language: this.config.BOT_DEFAULT_LANGUAGE,
|
||||
});
|
||||
|
||||
const transcript = result.text?.trim() ?? "";
|
||||
return transcript.length > 0 ? transcript : null;
|
||||
}
|
||||
|
||||
async destroy(): Promise<void> {
|
||||
await this.worker.destroy();
|
||||
}
|
||||
}
|
||||
@@ -1,94 +0,0 @@
|
||||
import { Readable } from "node:stream";
|
||||
|
||||
import prism from "prism-media";
|
||||
|
||||
import type { AssistantRuntimeConfig } from "../config.js";
|
||||
import type { Logger } from "../logger.js";
|
||||
import { resolveFfmpegPath } from "../audio/ffmpeg-path.js";
|
||||
import { PythonJsonWorker } from "./python-json-worker.js";
|
||||
import type { PreparedSpeechAudio, TtsService } from "./tts.js";
|
||||
|
||||
interface SynthesizeResult {
|
||||
wav_base64?: string;
|
||||
}
|
||||
|
||||
export class LocalMeloTtsService implements TtsService {
|
||||
private readonly worker: PythonJsonWorker;
|
||||
|
||||
constructor(config: AssistantRuntimeConfig, logger: Logger) {
|
||||
const resolvedFfmpegPath = resolveFfmpegPath();
|
||||
if (resolvedFfmpegPath && !process.env.FFMPEG_PATH) {
|
||||
process.env.FFMPEG_PATH = resolvedFfmpegPath;
|
||||
}
|
||||
|
||||
this.worker = new PythonJsonWorker(config, logger, "local_tts_worker.py", "local-tts", {
|
||||
LOCAL_TTS_LANGUAGE: config.LOCAL_TTS_LANGUAGE,
|
||||
LOCAL_TTS_SPEAKER: config.LOCAL_TTS_SPEAKER,
|
||||
LOCAL_TTS_DEVICE: config.LOCAL_TTS_DEVICE,
|
||||
LOCAL_TTS_SPEED: String(config.LOCAL_TTS_SPEED),
|
||||
});
|
||||
}
|
||||
|
||||
async warmup(): Promise<void> {
|
||||
await this.worker.request("ping", {});
|
||||
}
|
||||
|
||||
async preparePlayback(text: string, signal?: AbortSignal): Promise<PreparedSpeechAudio> {
|
||||
const result = await this.worker.request<SynthesizeResult>(
|
||||
"synthesize",
|
||||
{
|
||||
text,
|
||||
},
|
||||
signal,
|
||||
);
|
||||
|
||||
const wavBase64 = result.wav_base64;
|
||||
if (!wavBase64) {
|
||||
throw new Error("로컬 TTS가 빈 오디오를 반환했습니다.");
|
||||
}
|
||||
|
||||
const input = Readable.from([Buffer.from(wavBase64, "base64")]);
|
||||
const ffmpeg = new prism.FFmpeg({
|
||||
args: [
|
||||
"-analyzeduration",
|
||||
"0",
|
||||
"-loglevel",
|
||||
"0",
|
||||
"-i",
|
||||
"pipe:0",
|
||||
"-f",
|
||||
"s16le",
|
||||
"-ar",
|
||||
"48000",
|
||||
"-ac",
|
||||
"2",
|
||||
"pipe:1",
|
||||
],
|
||||
});
|
||||
|
||||
if (signal) {
|
||||
signal.addEventListener(
|
||||
"abort",
|
||||
() => {
|
||||
input.destroy();
|
||||
ffmpeg.destroy();
|
||||
},
|
||||
{ once: true },
|
||||
);
|
||||
}
|
||||
|
||||
input.pipe(ffmpeg);
|
||||
|
||||
return {
|
||||
stream: ffmpeg,
|
||||
dispose: () => {
|
||||
input.destroy();
|
||||
ffmpeg.destroy();
|
||||
},
|
||||
};
|
||||
}
|
||||
|
||||
async destroy(): Promise<void> {
|
||||
await this.worker.destroy();
|
||||
}
|
||||
}
|
||||
364
src/services/melo-tts.ts
Normal file
364
src/services/melo-tts.ts
Normal file
@@ -0,0 +1,364 @@
|
||||
import { spawn, type ChildProcessWithoutNullStreams } from "node:child_process";
|
||||
import { randomUUID } from "node:crypto";
|
||||
import { mkdir, rm } from "node:fs/promises";
|
||||
import { once } from "node:events";
|
||||
import path from "node:path";
|
||||
import { createInterface } from "node:readline";
|
||||
|
||||
import type { AppConfig } from "../config.js";
|
||||
import { resolveDockerCommand } from "../docker-runtime.js";
|
||||
import type { Logger } from "../logger.js";
|
||||
import { playWavFile } from "./audio-playback.js";
|
||||
|
||||
async function run(command: string, args: string[], stdio: "ignore" | "inherit" = "ignore"): Promise<void> {
|
||||
const env = { ...process.env };
|
||||
if (path.isAbsolute(command)) {
|
||||
const dockerBinDir = path.dirname(command);
|
||||
const currentPath = env.PATH ?? env.Path ?? "";
|
||||
env.PATH = `${dockerBinDir}${path.delimiter}${currentPath}`;
|
||||
}
|
||||
|
||||
await new Promise<void>((resolve, reject) => {
|
||||
const child = spawn(command, args, {
|
||||
stdio: ["ignore", stdio, "inherit"],
|
||||
windowsHide: true,
|
||||
shell: process.platform === "win32" && !path.isAbsolute(command),
|
||||
env,
|
||||
});
|
||||
|
||||
child.on("error", (error) => {
|
||||
if ((error as NodeJS.ErrnoException).code === "ENOENT" && command === "docker") {
|
||||
reject(new Error("Docker를 찾지 못했습니다. Docker Desktop을 설치하고 실행한 뒤 다시 시도하세요."));
|
||||
return;
|
||||
}
|
||||
reject(error);
|
||||
});
|
||||
child.on("exit", (code) => {
|
||||
if (code === 0) {
|
||||
resolve();
|
||||
return;
|
||||
}
|
||||
reject(new Error(`${command} ${args.join(" ")} exited with code ${code ?? "null"}`));
|
||||
});
|
||||
});
|
||||
}
|
||||
|
||||
interface RpcSuccess<T> {
|
||||
id: string;
|
||||
result: T;
|
||||
}
|
||||
|
||||
interface RpcFailure {
|
||||
id: string;
|
||||
error: string;
|
||||
}
|
||||
|
||||
type RpcResponse<T> = RpcSuccess<T> | RpcFailure;
|
||||
|
||||
function isFailure<T>(value: RpcResponse<T>): value is RpcFailure {
|
||||
return "error" in value;
|
||||
}
|
||||
|
||||
interface TtsPingResult {
|
||||
language: string;
|
||||
speaker: string;
|
||||
speaker_id: number;
|
||||
device: string;
|
||||
speed: number;
|
||||
sdp_ratio: number;
|
||||
noise_scale: number;
|
||||
noise_scale_w: number;
|
||||
speaker_count: number;
|
||||
}
|
||||
|
||||
export class MeloTtsService {
|
||||
private processRef: ChildProcessWithoutNullStreams | null = null;
|
||||
private shuttingDown = false;
|
||||
private warmedUp = false;
|
||||
private readonly pending = new Map<
|
||||
string,
|
||||
{
|
||||
resolve: (value: unknown) => void;
|
||||
reject: (reason?: unknown) => void;
|
||||
}
|
||||
>();
|
||||
private nextId = 1;
|
||||
|
||||
constructor(
|
||||
private readonly config: AppConfig,
|
||||
private readonly logger: Logger,
|
||||
) {}
|
||||
|
||||
async warmup(): Promise<void> {
|
||||
if (this.warmedUp) {
|
||||
return;
|
||||
}
|
||||
|
||||
await mkdir(path.resolve(process.cwd(), this.config.TTS_CACHE_DIR), { recursive: true });
|
||||
await mkdir(path.resolve(process.cwd(), this.config.TTS_OUTPUT_DIR), { recursive: true });
|
||||
const docker = await resolveDockerCommand(this.config);
|
||||
|
||||
await run(docker, ["--version"]);
|
||||
await run(docker, ["image", "inspect", this.config.TTS_IMAGE]);
|
||||
|
||||
await this.start();
|
||||
const result = await this.request<TtsPingResult>("ping", {});
|
||||
this.logger.info("TTS worker ready", result);
|
||||
|
||||
const warmupFileName = `warmup-${randomUUID()}.wav`;
|
||||
const warmupHostPath = path.resolve(process.cwd(), this.config.TTS_OUTPUT_DIR, warmupFileName);
|
||||
try {
|
||||
await this.request("synthesize", {
|
||||
text: "안녕하세요. 로컬 티티에스 준비 테스트입니다.",
|
||||
output_path: `/work/output/${warmupFileName}`,
|
||||
});
|
||||
} finally {
|
||||
await rm(warmupHostPath, { force: true }).catch(() => undefined);
|
||||
}
|
||||
|
||||
this.warmedUp = true;
|
||||
}
|
||||
|
||||
async speak(text: string): Promise<void> {
|
||||
const trimmed = this.normalizeText(text);
|
||||
if (!trimmed) {
|
||||
return;
|
||||
}
|
||||
|
||||
const fileName = `tts-${Date.now()}-${randomUUID()}.wav`;
|
||||
const targetPath = path.resolve(process.cwd(), this.config.TTS_OUTPUT_DIR, fileName);
|
||||
|
||||
try {
|
||||
await this.synthesizeToFile(trimmed, targetPath, fileName);
|
||||
await playWavFile(targetPath, this.config.TTS_PLAYBACK_RATE);
|
||||
} finally {
|
||||
await rm(targetPath, { force: true }).catch(() => undefined);
|
||||
}
|
||||
}
|
||||
|
||||
async synthesizeToFile(text: string, targetPath: string, fileName?: string): Promise<void> {
|
||||
await this.warmup();
|
||||
const resolvedFileName = fileName ?? path.basename(targetPath);
|
||||
|
||||
this.logger.info("Starting MeloTTS synthesis", {
|
||||
image: this.config.TTS_IMAGE,
|
||||
language: this.config.TTS_LANGUAGE,
|
||||
speaker: this.config.TTS_SPEAKER,
|
||||
speed: this.config.TTS_SPEED,
|
||||
playback_rate: this.config.TTS_PLAYBACK_RATE,
|
||||
sdp_ratio: this.config.TTS_SDP_RATIO,
|
||||
noise_scale: this.config.TTS_NOISE_SCALE,
|
||||
noise_scale_w: this.config.TTS_NOISE_SCALE_W,
|
||||
device: this.config.TTS_DEVICE,
|
||||
});
|
||||
|
||||
await this.request("synthesize", {
|
||||
text,
|
||||
output_path: `/work/output/${resolvedFileName}`,
|
||||
});
|
||||
}
|
||||
|
||||
async destroy(): Promise<void> {
|
||||
if (!this.processRef) {
|
||||
return;
|
||||
}
|
||||
|
||||
const child = this.processRef;
|
||||
this.shuttingDown = true;
|
||||
|
||||
try {
|
||||
child.stdin.end();
|
||||
} catch {
|
||||
// ignore
|
||||
}
|
||||
|
||||
if (!child.killed && child.exitCode === null) {
|
||||
child.kill("SIGTERM");
|
||||
}
|
||||
|
||||
const timedWait = Promise.race([
|
||||
once(child, "exit"),
|
||||
new Promise<null>((resolve) => setTimeout(() => resolve(null), 1500)),
|
||||
]);
|
||||
|
||||
await timedWait;
|
||||
|
||||
if (child.exitCode === null && !child.killed) {
|
||||
child.kill("SIGKILL");
|
||||
await once(child, "exit").catch(() => null);
|
||||
}
|
||||
|
||||
this.processRef = null;
|
||||
this.shuttingDown = false;
|
||||
this.warmedUp = false;
|
||||
}
|
||||
|
||||
private normalizeText(input: string): string {
|
||||
const collapsed = input
|
||||
.replace(/[`*_#>\[\]\(\)]/g, " ")
|
||||
.replace(/\s+/g, " ")
|
||||
.trim();
|
||||
|
||||
if (!collapsed) {
|
||||
return "";
|
||||
}
|
||||
|
||||
if (/[.!?…]$/.test(collapsed)) {
|
||||
return collapsed;
|
||||
}
|
||||
|
||||
return `${collapsed}.`;
|
||||
}
|
||||
|
||||
private async start(): Promise<void> {
|
||||
if (this.processRef) {
|
||||
return;
|
||||
}
|
||||
if (this.shuttingDown) {
|
||||
throw new Error("tts worker is shutting down");
|
||||
}
|
||||
|
||||
const docker = await resolveDockerCommand(this.config);
|
||||
const outputDir = path.resolve(process.cwd(), this.config.TTS_OUTPUT_DIR);
|
||||
const cacheDir = path.resolve(process.cwd(), this.config.TTS_CACHE_DIR);
|
||||
|
||||
await mkdir(outputDir, { recursive: true });
|
||||
await mkdir(cacheDir, { recursive: true });
|
||||
|
||||
const args = [
|
||||
"run",
|
||||
"--rm",
|
||||
"-i",
|
||||
"-v",
|
||||
`${outputDir}:/work/output`,
|
||||
"-v",
|
||||
`${cacheDir}:/cache`,
|
||||
"-e",
|
||||
"HF_HOME=/cache/huggingface",
|
||||
"-e",
|
||||
"HF_HUB_CACHE=/cache/huggingface/hub",
|
||||
"-e",
|
||||
"TRANSFORMERS_CACHE=/cache/transformers",
|
||||
"-e",
|
||||
`TTS_LANGUAGE=${this.config.TTS_LANGUAGE}`,
|
||||
"-e",
|
||||
`TTS_SPEAKER=${this.config.TTS_SPEAKER}`,
|
||||
"-e",
|
||||
`TTS_DEVICE=${this.config.TTS_DEVICE}`,
|
||||
"-e",
|
||||
`TTS_SPEED=${this.config.TTS_SPEED}`,
|
||||
"-e",
|
||||
`TTS_SDP_RATIO=${this.config.TTS_SDP_RATIO}`,
|
||||
"-e",
|
||||
`TTS_NOISE_SCALE=${this.config.TTS_NOISE_SCALE}`,
|
||||
"-e",
|
||||
`TTS_NOISE_SCALE_W=${this.config.TTS_NOISE_SCALE_W}`,
|
||||
"--entrypoint",
|
||||
"python",
|
||||
];
|
||||
|
||||
if (this.config.TTS_DEVICE !== "cpu") {
|
||||
args.push("--gpus", "all");
|
||||
}
|
||||
|
||||
args.push(
|
||||
this.config.TTS_IMAGE,
|
||||
"/opt/realtime-voice-bot/melo_tts_worker.py",
|
||||
);
|
||||
|
||||
const env = { ...process.env };
|
||||
if (path.isAbsolute(docker)) {
|
||||
const dockerBinDir = path.dirname(docker);
|
||||
const currentPath = env.PATH ?? env.Path ?? "";
|
||||
env.PATH = `${dockerBinDir}${path.delimiter}${currentPath}`;
|
||||
}
|
||||
|
||||
this.processRef = spawn(docker, args, {
|
||||
stdio: ["pipe", "pipe", "pipe"],
|
||||
windowsHide: true,
|
||||
shell: process.platform === "win32" && !path.isAbsolute(docker),
|
||||
env,
|
||||
});
|
||||
|
||||
const rl = createInterface({
|
||||
input: this.processRef.stdout,
|
||||
crlfDelay: Infinity,
|
||||
});
|
||||
|
||||
rl.on("line", (line) => {
|
||||
this.handleStdoutLine(line);
|
||||
});
|
||||
|
||||
this.processRef.stderr.on("data", (chunk: Buffer) => {
|
||||
const text = chunk.toString().trim();
|
||||
if (text.length > 0) {
|
||||
this.logger.warn(`[melotts] ${text}`);
|
||||
}
|
||||
});
|
||||
|
||||
this.processRef.stdin.on("error", (error) => {
|
||||
this.logger.debug("melotts stdin error", error);
|
||||
});
|
||||
|
||||
this.processRef.on("exit", (code, signal) => {
|
||||
const error = new Error(`melotts worker exited code=${code ?? "null"} signal=${signal ?? "null"}`);
|
||||
for (const entry of this.pending.values()) {
|
||||
entry.reject(error);
|
||||
}
|
||||
this.pending.clear();
|
||||
this.processRef = null;
|
||||
});
|
||||
}
|
||||
|
||||
private async request<T>(method: string, params: Record<string, unknown>): Promise<T> {
|
||||
await this.start();
|
||||
|
||||
if (!this.processRef) {
|
||||
throw new Error("melotts worker is not running");
|
||||
}
|
||||
|
||||
const id = String(this.nextId++);
|
||||
const payload = JSON.stringify({
|
||||
id,
|
||||
method,
|
||||
params,
|
||||
});
|
||||
|
||||
const promise = new Promise<T>((resolve, reject) => {
|
||||
this.pending.set(id, {
|
||||
resolve: (value) => resolve(value as T),
|
||||
reject,
|
||||
});
|
||||
});
|
||||
|
||||
this.processRef.stdin.write(`${payload}\n`);
|
||||
return await promise;
|
||||
}
|
||||
|
||||
private handleStdoutLine(line: string): void {
|
||||
const trimmed = line.trim();
|
||||
if (!trimmed) {
|
||||
return;
|
||||
}
|
||||
|
||||
let message: RpcResponse<unknown>;
|
||||
try {
|
||||
message = JSON.parse(trimmed) as RpcResponse<unknown>;
|
||||
} catch (error) {
|
||||
this.logger.warn("melotts stdout parse failed", error);
|
||||
return;
|
||||
}
|
||||
|
||||
const pending = this.pending.get(message.id);
|
||||
if (!pending) {
|
||||
return;
|
||||
}
|
||||
|
||||
this.pending.delete(message.id);
|
||||
if (isFailure(message)) {
|
||||
pending.reject(new Error(message.error));
|
||||
return;
|
||||
}
|
||||
pending.resolve(message.result);
|
||||
}
|
||||
}
|
||||
@@ -1,85 +1,550 @@
|
||||
import type { AssistantRuntimeConfig } from "../config.js";
|
||||
import type { ConversationMemory, UserUtterance } from "./conversation.js";
|
||||
import type { LlmService } from "./llm.js";
|
||||
import type { AppConfig } from "../config.js";
|
||||
import type { Logger } from "../logger.js";
|
||||
import { loadPrompt } from "../prompt-loader.js";
|
||||
import { webFetch, webSearch } from "./web-tools.js";
|
||||
|
||||
const ASSISTANT_INSTRUCTIONS = [
|
||||
"너는 디스코드 음성 채널 또는 로컬 마이크 테스트에서 동작하는 한국어 음성 비서다.",
|
||||
"답변은 짧고 실용적으로 한다.",
|
||||
"기본은 한 문장, 길어도 두 문장을 넘기지 않는다.",
|
||||
"말투는 자연스러운 한국어로 유지한다.",
|
||||
"speaker_id와 speaker_name은 화자 구분용이므로 필요할 때만 자연스럽게 반영한다.",
|
||||
"잘 못 들었거나 의미가 불명확하면 짧게 다시 물어본다.",
|
||||
"목록, 마크다운, 코드블록은 쓰지 않는다.",
|
||||
"생각 과정을 드러내지 말고 최종 답변만 말한다.",
|
||||
].join(" ");
|
||||
interface OllamaChatMessage {
|
||||
role: "system" | "user" | "assistant";
|
||||
content: string;
|
||||
tool_calls?: OllamaToolCall[];
|
||||
}
|
||||
|
||||
interface OllamaChatResponse {
|
||||
message?: {
|
||||
content?: string;
|
||||
thinking?: string;
|
||||
tool_calls?: OllamaToolCall[];
|
||||
};
|
||||
error?: string;
|
||||
}
|
||||
|
||||
function normalizeReply(text: string): string {
|
||||
const strippedThink = text.replace(/<think>[\s\S]*?<\/think>/gi, " ");
|
||||
const compact = strippedThink.replace(/\s+/g, " ").trim();
|
||||
if (compact.length <= 180) {
|
||||
return compact;
|
||||
}
|
||||
|
||||
const sentences = compact.match(/[^.!?]+[.!?]?/g);
|
||||
if (!sentences || sentences.length === 0) {
|
||||
return compact.slice(0, 180).trim();
|
||||
}
|
||||
|
||||
return sentences.slice(0, 2).join(" ").trim().slice(0, 180).trim();
|
||||
interface OllamaToolCall {
|
||||
type: "function";
|
||||
function: {
|
||||
name: string;
|
||||
arguments: Record<string, unknown>;
|
||||
};
|
||||
}
|
||||
|
||||
export class OllamaLlmService implements LlmService {
|
||||
constructor(private readonly config: AssistantRuntimeConfig) {}
|
||||
interface OllamaToolDefinition {
|
||||
type: "function";
|
||||
function: {
|
||||
name: string;
|
||||
description: string;
|
||||
parameters: {
|
||||
type: "object";
|
||||
required?: string[];
|
||||
properties: Record<string, unknown>;
|
||||
};
|
||||
};
|
||||
}
|
||||
|
||||
async generateReply(memory: ConversationMemory, utterance: UserUtterance): Promise<string> {
|
||||
const url = new URL("/api/chat", this.config.OLLAMA_BASE_URL);
|
||||
const response = await fetch(url, {
|
||||
interface OllamaToolResultMessage {
|
||||
role: "tool";
|
||||
tool_name: string;
|
||||
content: string;
|
||||
}
|
||||
|
||||
interface GenerateReplyOptions {
|
||||
onProgress?: (message: string) => void;
|
||||
}
|
||||
|
||||
export interface ReplyAssessment {
|
||||
shouldReply: boolean;
|
||||
likelyNeedsLookup: boolean;
|
||||
reason: string;
|
||||
}
|
||||
|
||||
const ASSISTANT_PROMPT = loadPrompt("assistant.md");
|
||||
const REPLY_GATE_PROMPT = loadPrompt("reply-gate.md");
|
||||
const REWRITE_KOREAN_PROMPT = loadPrompt("rewrite-korean.md");
|
||||
|
||||
const TOOL_DEFINITIONS: OllamaToolDefinition[] = [
|
||||
{
|
||||
type: "function",
|
||||
function: {
|
||||
name: "get_current_time",
|
||||
description: "현재 시스템 시간을 Asia/Seoul 기준 ISO 문자열과 사람이 읽기 쉬운 형식으로 반환한다.",
|
||||
parameters: {
|
||||
type: "object",
|
||||
properties: {},
|
||||
},
|
||||
},
|
||||
},
|
||||
{
|
||||
type: "function",
|
||||
function: {
|
||||
name: "get_runtime_settings",
|
||||
description: "현재 로컬 LLM 및 STT 실행 설정의 핵심 값만 반환한다.",
|
||||
parameters: {
|
||||
type: "object",
|
||||
properties: {},
|
||||
},
|
||||
},
|
||||
},
|
||||
{
|
||||
type: "function",
|
||||
function: {
|
||||
name: "list_project_commands",
|
||||
description: "현재 프로젝트에서 사용 가능한 주요 bun 스크립트 명령 목록을 반환한다.",
|
||||
parameters: {
|
||||
type: "object",
|
||||
properties: {},
|
||||
},
|
||||
},
|
||||
},
|
||||
{
|
||||
type: "function",
|
||||
function: {
|
||||
name: "evaluate_math",
|
||||
description: "간단한 산술식을 정확히 계산한다. 숫자, 공백, 소수점, 괄호, + - * / % 만 허용한다.",
|
||||
parameters: {
|
||||
type: "object",
|
||||
required: ["expression"],
|
||||
properties: {
|
||||
expression: {
|
||||
type: "string",
|
||||
description: "예: (11434+12341)*412",
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
{
|
||||
type: "function",
|
||||
function: {
|
||||
name: "web_search",
|
||||
description: "웹 검색 결과 제목, URL, 요약을 가져온다. 최신 정보, 뉴스, 사실 확인이 필요할 때만 사용한다.",
|
||||
parameters: {
|
||||
type: "object",
|
||||
required: ["query"],
|
||||
properties: {
|
||||
query: {
|
||||
type: "string",
|
||||
description: "검색어",
|
||||
},
|
||||
max_results: {
|
||||
type: "number",
|
||||
description: "가져올 최대 결과 수. 보통 3~5",
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
{
|
||||
type: "function",
|
||||
function: {
|
||||
name: "fetch_url",
|
||||
description: "주어진 URL의 페이지 제목과 본문 텍스트를 읽어온다. 검색 결과 상세 확인에 사용한다.",
|
||||
parameters: {
|
||||
type: "object",
|
||||
required: ["url"],
|
||||
properties: {
|
||||
url: {
|
||||
type: "string",
|
||||
description: "http 또는 https URL",
|
||||
},
|
||||
max_chars: {
|
||||
type: "number",
|
||||
description: "본문에서 가져올 최대 글자 수",
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
];
|
||||
|
||||
export class OllamaLlmService {
|
||||
private history: OllamaChatMessage[] = [];
|
||||
|
||||
constructor(
|
||||
private readonly config: AppConfig,
|
||||
private readonly logger: Logger,
|
||||
) {}
|
||||
|
||||
async warmup(): Promise<void> {
|
||||
const reply = await this.chat(
|
||||
[
|
||||
{ role: "system", content: ASSISTANT_PROMPT },
|
||||
{ role: "user", content: "준비 상태 확인입니다. 한 단어로만 답하세요." },
|
||||
],
|
||||
);
|
||||
this.logger.info("LLM warmup finished", { model: this.config.OLLAMA_MODEL, reply: reply.content });
|
||||
}
|
||||
|
||||
async assessReplyNeed(userText: string): Promise<ReplyAssessment> {
|
||||
const heuristic = this.assessReplyNeedHeuristically(userText);
|
||||
if (heuristic) {
|
||||
return heuristic;
|
||||
}
|
||||
|
||||
const reply = await this.chat([
|
||||
{ role: "system", content: REPLY_GATE_PROMPT },
|
||||
{ role: "user", content: userText },
|
||||
], { enableTools: false });
|
||||
|
||||
const parsed = this.parseAssessment(reply.content);
|
||||
if (parsed) {
|
||||
return parsed;
|
||||
}
|
||||
|
||||
return {
|
||||
shouldReply: true,
|
||||
likelyNeedsLookup: this.mightNeedLookup(userText),
|
||||
reason: "fallback",
|
||||
};
|
||||
}
|
||||
|
||||
async generateReply(userText: string, options?: GenerateReplyOptions): Promise<string> {
|
||||
const messages: Array<OllamaChatMessage | OllamaToolResultMessage> = [
|
||||
{ role: "system", content: ASSISTANT_PROMPT },
|
||||
...this.history,
|
||||
{ role: "user", content: userText },
|
||||
];
|
||||
|
||||
const rawReply = await this.runAgentLoop(messages, options);
|
||||
const reply = await this.repairReplyLanguageIfNeeded(rawReply, userText);
|
||||
|
||||
this.history.push({ role: "user", content: userText });
|
||||
this.history.push({ role: "assistant", content: reply });
|
||||
this.trimHistory();
|
||||
|
||||
return reply;
|
||||
}
|
||||
|
||||
resetConversation(): void {
|
||||
this.history = [];
|
||||
}
|
||||
|
||||
private trimHistory(): void {
|
||||
const maxMessages = this.config.MAX_CONVERSATION_TURNS * 2;
|
||||
if (this.history.length <= maxMessages) {
|
||||
return;
|
||||
}
|
||||
this.history = this.history.slice(-maxMessages);
|
||||
}
|
||||
|
||||
private async runAgentLoop(
|
||||
messages: Array<OllamaChatMessage | OllamaToolResultMessage>,
|
||||
options?: GenerateReplyOptions,
|
||||
): Promise<string> {
|
||||
let progressEmitted = false;
|
||||
|
||||
for (let step = 0; step < 6; step += 1) {
|
||||
const response = await this.chat(messages, { enableTools: true });
|
||||
const toolCalls = response.toolCalls ?? [];
|
||||
|
||||
messages.push({
|
||||
role: "assistant",
|
||||
content: response.content,
|
||||
tool_calls: toolCalls.length > 0 ? toolCalls : undefined,
|
||||
});
|
||||
|
||||
if (toolCalls.length === 0) {
|
||||
return response.content;
|
||||
}
|
||||
|
||||
for (const call of toolCalls) {
|
||||
if (!progressEmitted) {
|
||||
const progressMessage = this.getProgressMessage(call.function.name);
|
||||
if (progressMessage) {
|
||||
options?.onProgress?.(progressMessage);
|
||||
progressEmitted = true;
|
||||
}
|
||||
}
|
||||
const result = await this.executeTool(call);
|
||||
this.logger.info("LLM tool call", {
|
||||
name: call.function.name,
|
||||
arguments: call.function.arguments,
|
||||
result,
|
||||
});
|
||||
messages.push({
|
||||
role: "tool",
|
||||
tool_name: call.function.name,
|
||||
content: result,
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
throw new Error("도구 호출 루프가 제한 횟수를 넘었습니다.");
|
||||
}
|
||||
|
||||
private async chat(
|
||||
messages: Array<OllamaChatMessage | OllamaToolResultMessage>,
|
||||
options?: { enableTools: boolean },
|
||||
): Promise<{ content: string; toolCalls: OllamaToolCall[] }> {
|
||||
const response = await fetch(`${this.config.OLLAMA_BASE_URL}/api/chat`, {
|
||||
method: "POST",
|
||||
headers: {
|
||||
"Content-Type": "application/json",
|
||||
"content-type": "application/json",
|
||||
},
|
||||
body: JSON.stringify({
|
||||
model: this.config.OLLAMA_MODEL,
|
||||
messages: [
|
||||
{
|
||||
role: "system",
|
||||
content: ASSISTANT_INSTRUCTIONS,
|
||||
},
|
||||
{
|
||||
role: "user",
|
||||
content: memory.buildPrompt(utterance),
|
||||
},
|
||||
],
|
||||
think: false,
|
||||
messages,
|
||||
tools: options?.enableTools ? TOOL_DEFINITIONS : undefined,
|
||||
stream: false,
|
||||
think: false,
|
||||
keep_alive: this.config.OLLAMA_KEEP_ALIVE,
|
||||
options: {
|
||||
num_ctx: this.config.OLLAMA_NUM_CTX,
|
||||
temperature: 0.4,
|
||||
num_predict: 120,
|
||||
},
|
||||
}),
|
||||
});
|
||||
|
||||
const body = (await response.json().catch(() => ({}))) as OllamaChatResponse;
|
||||
|
||||
if (!response.ok) {
|
||||
throw new Error(body.error ?? `Ollama request failed with status ${response.status}`);
|
||||
const body = await response.text();
|
||||
throw new Error(`Ollama API ${response.status}: ${body}`);
|
||||
}
|
||||
|
||||
const output = body.message?.content?.trim();
|
||||
if (!output) {
|
||||
return "잘 못 들었습니다. 한 번만 다시 말씀해 주세요.";
|
||||
const payload = (await response.json()) as OllamaChatResponse;
|
||||
const content = payload.message?.content?.trim() ?? "";
|
||||
const toolCalls = payload.message?.tool_calls ?? [];
|
||||
|
||||
if (!content && toolCalls.length === 0) {
|
||||
throw new Error("Ollama 응답에 message.content 와 tool_calls 가 모두 없습니다.");
|
||||
}
|
||||
|
||||
return normalizeReply(output);
|
||||
return {
|
||||
content,
|
||||
toolCalls,
|
||||
};
|
||||
}
|
||||
|
||||
private async executeTool(call: OllamaToolCall): Promise<string> {
|
||||
switch (call.function.name) {
|
||||
case "get_current_time":
|
||||
return JSON.stringify(this.getCurrentTime());
|
||||
case "get_runtime_settings":
|
||||
return JSON.stringify(this.getRuntimeSettings());
|
||||
case "list_project_commands":
|
||||
return JSON.stringify(this.listProjectCommands());
|
||||
case "evaluate_math":
|
||||
return JSON.stringify({
|
||||
expression: this.getStringArg(call.function.arguments, "expression"),
|
||||
result: this.evaluateMath(this.getStringArg(call.function.arguments, "expression")),
|
||||
});
|
||||
case "web_search":
|
||||
return JSON.stringify(
|
||||
await webSearch(
|
||||
this.getStringArg(call.function.arguments, "query"),
|
||||
Math.min(5, Math.max(1, Math.trunc(this.getNumberArg(call.function.arguments, "max_results", 4)))),
|
||||
),
|
||||
);
|
||||
case "fetch_url":
|
||||
return JSON.stringify(
|
||||
await webFetch(
|
||||
this.getStringArg(call.function.arguments, "url"),
|
||||
Math.min(10000, Math.max(1000, Math.trunc(this.getNumberArg(call.function.arguments, "max_chars", 6000)))),
|
||||
),
|
||||
);
|
||||
default:
|
||||
return JSON.stringify({
|
||||
error: `unknown tool: ${call.function.name}`,
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
private getCurrentTime(): { timezone: string; iso: string; local: string } {
|
||||
const now = new Date();
|
||||
return {
|
||||
timezone: "Asia/Seoul",
|
||||
iso: now.toISOString(),
|
||||
local: new Intl.DateTimeFormat("ko-KR", {
|
||||
timeZone: "Asia/Seoul",
|
||||
dateStyle: "full",
|
||||
timeStyle: "long",
|
||||
}).format(now),
|
||||
};
|
||||
}
|
||||
|
||||
private getRuntimeSettings(): Record<string, unknown> {
|
||||
return {
|
||||
ollama_base_url: this.config.OLLAMA_BASE_URL,
|
||||
ollama_model: this.config.OLLAMA_MODEL,
|
||||
ollama_keep_alive: this.config.OLLAMA_KEEP_ALIVE,
|
||||
max_conversation_turns: this.config.MAX_CONVERSATION_TURNS,
|
||||
whisper_model: this.config.WHISPER_MODEL,
|
||||
whisper_language: this.config.WHISPER_LANGUAGE,
|
||||
whisper_device: this.config.WHISPER_DEVICE,
|
||||
whisper_compute_type: this.config.WHISPER_COMPUTE_TYPE,
|
||||
whisper_beam_size: this.config.WHISPER_BEAM_SIZE,
|
||||
audio_source: this.config.AUDIO_SOURCE ?? null,
|
||||
debug: this.config.DEBUG,
|
||||
};
|
||||
}
|
||||
|
||||
private listProjectCommands(): { commands: string[] } {
|
||||
return {
|
||||
commands: [
|
||||
"bun run setup",
|
||||
"bun run setup:stt",
|
||||
"bun run setup:llm",
|
||||
"bun run setup:tts",
|
||||
"bun run setup:all",
|
||||
"bun run devices",
|
||||
"bun run test:stt",
|
||||
"bun run test:sttllm",
|
||||
"bun run test:all",
|
||||
"bun run test:llm",
|
||||
"bun run test:tts -- \"안녕하세요\"",
|
||||
],
|
||||
};
|
||||
}
|
||||
|
||||
private getStringArg(args: Record<string, unknown>, name: string): string {
|
||||
const value = args[name];
|
||||
if (typeof value !== "string" || value.trim().length === 0) {
|
||||
throw new Error(`도구 인자 ${name} 가 비어 있습니다.`);
|
||||
}
|
||||
return value.trim();
|
||||
}
|
||||
|
||||
private evaluateMath(expression: string): number {
|
||||
if (!/^[0-9+\-*/%().\s]+$/.test(expression)) {
|
||||
throw new Error("허용되지 않은 문자가 포함된 산술식입니다.");
|
||||
}
|
||||
|
||||
const result = Function(`"use strict"; return (${expression});`)();
|
||||
if (typeof result !== "number" || !Number.isFinite(result)) {
|
||||
throw new Error("산술식 계산 결과가 유효하지 않습니다.");
|
||||
}
|
||||
return result;
|
||||
}
|
||||
|
||||
private getNumberArg(args: Record<string, unknown>, name: string, fallback: number): number {
|
||||
const value = args[name];
|
||||
if (typeof value === "number" && Number.isFinite(value)) {
|
||||
return value;
|
||||
}
|
||||
if (typeof value === "string") {
|
||||
const parsed = Number(value);
|
||||
if (Number.isFinite(parsed)) {
|
||||
return parsed;
|
||||
}
|
||||
}
|
||||
return fallback;
|
||||
}
|
||||
|
||||
private async repairReplyLanguageIfNeeded(reply: string, userText: string): Promise<string> {
|
||||
if (!this.needsLanguageRepair(reply)) {
|
||||
return reply;
|
||||
}
|
||||
|
||||
this.logger.warn("Reply language repair triggered", {
|
||||
reply,
|
||||
analysis: this.analyzeScriptUsage(reply),
|
||||
});
|
||||
|
||||
const repaired = await this.chat(
|
||||
[
|
||||
{
|
||||
role: "system",
|
||||
content: REWRITE_KOREAN_PROMPT,
|
||||
},
|
||||
{
|
||||
role: "user",
|
||||
content: `원문 질문: ${userText}\n기존 답변: ${reply}`,
|
||||
},
|
||||
],
|
||||
{ enableTools: false },
|
||||
);
|
||||
|
||||
const normalized = repaired.content.trim();
|
||||
if (!normalized) {
|
||||
return reply;
|
||||
}
|
||||
|
||||
return normalized;
|
||||
}
|
||||
|
||||
private needsLanguageRepair(text: string): boolean {
|
||||
const analysis = this.analyzeScriptUsage(text);
|
||||
if (analysis.otherLetters > 0) {
|
||||
return true;
|
||||
}
|
||||
if (analysis.hangul === 0 && analysis.latin > 0) {
|
||||
return true;
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
private analyzeScriptUsage(text: string): { hangul: number; latin: number; otherLetters: number } {
|
||||
let hangul = 0;
|
||||
let latin = 0;
|
||||
let otherLetters = 0;
|
||||
|
||||
for (const char of text) {
|
||||
if (!/\p{Letter}/u.test(char)) {
|
||||
continue;
|
||||
}
|
||||
if (/\p{Script=Hangul}/u.test(char)) {
|
||||
hangul += 1;
|
||||
continue;
|
||||
}
|
||||
if (/\p{Script=Latin}/u.test(char)) {
|
||||
latin += 1;
|
||||
continue;
|
||||
}
|
||||
otherLetters += 1;
|
||||
}
|
||||
|
||||
return { hangul, latin, otherLetters };
|
||||
}
|
||||
|
||||
private getProgressMessage(toolName: string): string | null {
|
||||
switch (toolName) {
|
||||
case "web_search":
|
||||
case "fetch_url":
|
||||
return "검색해볼게요.";
|
||||
default:
|
||||
return null;
|
||||
}
|
||||
}
|
||||
|
||||
private parseAssessment(content: string): ReplyAssessment | null {
|
||||
const match = content.match(/\{[\s\S]*\}/);
|
||||
if (!match) {
|
||||
return null;
|
||||
}
|
||||
|
||||
try {
|
||||
const parsed = JSON.parse(match[0]) as Record<string, unknown>;
|
||||
return {
|
||||
shouldReply: parsed.should_reply === true || parsed.shouldReply === true,
|
||||
likelyNeedsLookup: parsed.likely_needs_lookup === true || parsed.likelyNeedsLookup === true,
|
||||
reason: typeof parsed.reason === "string" ? parsed.reason : "parsed",
|
||||
};
|
||||
} catch {
|
||||
return null;
|
||||
}
|
||||
}
|
||||
|
||||
private assessReplyNeedHeuristically(userText: string): ReplyAssessment | null {
|
||||
const normalized = userText.trim();
|
||||
|
||||
if (!normalized) {
|
||||
return {
|
||||
shouldReply: false,
|
||||
likelyNeedsLookup: false,
|
||||
reason: "empty",
|
||||
};
|
||||
}
|
||||
|
||||
if (/^(아+|어+|음+|으+|흠+|엉+|어어+|음음+|하+|호+|와+|오+|응+|네+|예+|끝\.?)$/u.test(normalized)) {
|
||||
return {
|
||||
shouldReply: false,
|
||||
likelyNeedsLookup: false,
|
||||
reason: "filler",
|
||||
};
|
||||
}
|
||||
|
||||
if (normalized.length <= 2 && !/[??]/.test(normalized)) {
|
||||
return {
|
||||
shouldReply: false,
|
||||
likelyNeedsLookup: false,
|
||||
reason: "too_short",
|
||||
};
|
||||
}
|
||||
|
||||
return null;
|
||||
}
|
||||
|
||||
private mightNeedLookup(text: string): boolean {
|
||||
return /(최신|오늘|최근|뉴스|검색|찾아|알아봐|확인|업데이트|가격|날씨|현재|실시간)/u.test(text);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -1,189 +1,184 @@
|
||||
import { spawn, type ChildProcessWithoutNullStreams } from "node:child_process";
|
||||
import { once } from "node:events";
|
||||
import { createInterface } from "node:readline";
|
||||
import path from "node:path";
|
||||
|
||||
import type { AssistantRuntimeConfig } from "../config.js";
|
||||
import type { AppConfig } from "../config.js";
|
||||
import type { Logger } from "../logger.js";
|
||||
import { resolveLocalAiCachePath, resolvePythonLaunch } from "../python-runtime.js";
|
||||
import { buildPythonInvocation, resolveWorkerPythonCommand, resolveWorkerScript } from "../python-runtime.js";
|
||||
|
||||
interface WorkerRequest {
|
||||
id: number;
|
||||
method: string;
|
||||
params: Record<string, unknown>;
|
||||
interface RpcSuccess<T> {
|
||||
id: string;
|
||||
result: T;
|
||||
}
|
||||
|
||||
interface WorkerResponse {
|
||||
id: number;
|
||||
ok: boolean;
|
||||
result?: unknown;
|
||||
error?: string;
|
||||
interface RpcFailure {
|
||||
id: string;
|
||||
error: string;
|
||||
}
|
||||
|
||||
type RpcResponse<T> = RpcSuccess<T> | RpcFailure;
|
||||
|
||||
function isFailure<T>(value: RpcResponse<T>): value is RpcFailure {
|
||||
return "error" in value;
|
||||
}
|
||||
|
||||
export class PythonJsonWorker {
|
||||
private child: ChildProcessWithoutNullStreams | null = null;
|
||||
private nextId = 1;
|
||||
private processRef: ChildProcessWithoutNullStreams | null = null;
|
||||
private shuttingDown = false;
|
||||
private readonly pending = new Map<
|
||||
number,
|
||||
string,
|
||||
{
|
||||
resolve: (value: unknown) => void;
|
||||
reject: (error: Error) => void;
|
||||
reject: (reason?: unknown) => void;
|
||||
}
|
||||
>();
|
||||
private nextId = 1;
|
||||
|
||||
constructor(
|
||||
private readonly config: AssistantRuntimeConfig,
|
||||
private readonly config: AppConfig,
|
||||
private readonly logger: Logger,
|
||||
private readonly scriptName: string,
|
||||
private readonly label: string,
|
||||
private readonly workerEnv: Record<string, string>,
|
||||
private readonly logPrefix: string,
|
||||
) {}
|
||||
|
||||
async request<T>(method: string, params: Record<string, unknown>, signal?: AbortSignal): Promise<T> {
|
||||
const child = this.ensureStarted();
|
||||
const id = this.nextId++;
|
||||
|
||||
return await new Promise<T>((resolve, reject) => {
|
||||
if (signal?.aborted) {
|
||||
reject(new Error(`${this.label} request aborted before start`));
|
||||
async start(): Promise<void> {
|
||||
if (this.processRef) {
|
||||
return;
|
||||
}
|
||||
|
||||
const abortHandler = () => {
|
||||
this.pending.delete(id);
|
||||
reject(new Error(`${this.label} request aborted`));
|
||||
};
|
||||
|
||||
if (signal) {
|
||||
signal.addEventListener("abort", abortHandler, { once: true });
|
||||
if (this.shuttingDown) {
|
||||
throw new Error(`${this.logPrefix} worker is shutting down`);
|
||||
}
|
||||
|
||||
this.pending.set(id, {
|
||||
resolve: (value) => {
|
||||
if (signal) {
|
||||
signal.removeEventListener("abort", abortHandler);
|
||||
}
|
||||
resolve(value as T);
|
||||
},
|
||||
reject: (error) => {
|
||||
if (signal) {
|
||||
signal.removeEventListener("abort", abortHandler);
|
||||
}
|
||||
reject(error);
|
||||
const python = await resolveWorkerPythonCommand(this.config);
|
||||
const scriptPath = resolveWorkerScript(this.scriptName);
|
||||
const invocation = buildPythonInvocation(python, [scriptPath]);
|
||||
|
||||
this.processRef = spawn(invocation.command, invocation.args, {
|
||||
stdio: ["pipe", "pipe", "pipe"],
|
||||
windowsHide: true,
|
||||
env: {
|
||||
...process.env,
|
||||
WHISPER_MODEL: this.config.WHISPER_MODEL,
|
||||
WHISPER_LANGUAGE: this.config.WHISPER_LANGUAGE,
|
||||
WHISPER_DEVICE: this.config.WHISPER_DEVICE,
|
||||
WHISPER_COMPUTE_TYPE: this.config.WHISPER_COMPUTE_TYPE,
|
||||
WHISPER_BEAM_SIZE: String(this.config.WHISPER_BEAM_SIZE),
|
||||
},
|
||||
});
|
||||
|
||||
const message: WorkerRequest = {
|
||||
const rl = createInterface({
|
||||
input: this.processRef.stdout,
|
||||
crlfDelay: Infinity,
|
||||
});
|
||||
|
||||
rl.on("line", (line) => {
|
||||
this.handleStdoutLine(line);
|
||||
});
|
||||
|
||||
this.processRef.stderr.on("data", (chunk: Buffer) => {
|
||||
const text = chunk.toString().trim();
|
||||
if (text.length > 0) {
|
||||
this.logger.warn(`[${this.logPrefix}] ${text}`);
|
||||
}
|
||||
});
|
||||
|
||||
this.processRef.stdin.on("error", (error) => {
|
||||
this.logger.debug(`${this.logPrefix} stdin error`, error);
|
||||
});
|
||||
|
||||
this.processRef.on("exit", (code, signal) => {
|
||||
const error = new Error(`${this.logPrefix} worker exited code=${code ?? "null"} signal=${signal ?? "null"}`);
|
||||
for (const entry of this.pending.values()) {
|
||||
entry.reject(error);
|
||||
}
|
||||
this.pending.clear();
|
||||
this.processRef = null;
|
||||
});
|
||||
}
|
||||
|
||||
async request<T>(method: string, params: Record<string, unknown>): Promise<T> {
|
||||
await this.start();
|
||||
|
||||
if (!this.processRef) {
|
||||
throw new Error(`${this.logPrefix} worker is not running`);
|
||||
}
|
||||
|
||||
const id = String(this.nextId++);
|
||||
const payload = JSON.stringify({
|
||||
id,
|
||||
method,
|
||||
params,
|
||||
};
|
||||
|
||||
child.stdin.write(`${JSON.stringify(message)}\n`);
|
||||
});
|
||||
|
||||
const promise = new Promise<T>((resolve, reject) => {
|
||||
this.pending.set(id, {
|
||||
resolve: (value) => resolve(value as T),
|
||||
reject,
|
||||
});
|
||||
});
|
||||
|
||||
this.processRef.stdin.write(`${payload}\n`);
|
||||
return await promise;
|
||||
}
|
||||
|
||||
async destroy(): Promise<void> {
|
||||
this.rejectAll(new Error(`${this.label} worker terminated`));
|
||||
|
||||
if (!this.child) {
|
||||
if (!this.processRef) {
|
||||
return;
|
||||
}
|
||||
|
||||
const child = this.child;
|
||||
this.child = null;
|
||||
const child = this.processRef;
|
||||
this.shuttingDown = true;
|
||||
|
||||
child.kill("SIGTERM");
|
||||
await new Promise<void>((resolve) => {
|
||||
child.once("exit", () => resolve());
|
||||
setTimeout(resolve, 1_500);
|
||||
});
|
||||
}
|
||||
|
||||
private ensureStarted(): ChildProcessWithoutNullStreams {
|
||||
if (this.child) {
|
||||
return this.child;
|
||||
}
|
||||
|
||||
const launch = resolvePythonLaunch(this.config);
|
||||
const scriptPath = path.resolve(process.cwd(), "python", this.scriptName);
|
||||
const cachePath = resolveLocalAiCachePath(this.config);
|
||||
const recentStderr: string[] = [];
|
||||
|
||||
const child = spawn(launch.command, [...launch.args, scriptPath], {
|
||||
stdio: ["pipe", "pipe", "pipe"],
|
||||
env: {
|
||||
...process.env,
|
||||
HF_HOME: cachePath,
|
||||
TRANSFORMERS_CACHE: cachePath,
|
||||
PYTHONIOENCODING: "utf-8",
|
||||
BOT_DEFAULT_LANGUAGE: this.config.BOT_DEFAULT_LANGUAGE,
|
||||
...this.workerEnv,
|
||||
},
|
||||
});
|
||||
|
||||
createInterface({
|
||||
input: child.stdout,
|
||||
crlfDelay: Number.POSITIVE_INFINITY,
|
||||
}).on("line", (line) => {
|
||||
if (!line.trim()) {
|
||||
return;
|
||||
}
|
||||
|
||||
let payload: WorkerResponse;
|
||||
try {
|
||||
payload = JSON.parse(line) as WorkerResponse;
|
||||
} catch (error) {
|
||||
this.logger.warn(`${this.label} stdout parse failed`, error);
|
||||
child.stdin.end();
|
||||
} catch {
|
||||
// ignore
|
||||
}
|
||||
|
||||
if (!child.killed && child.exitCode === null) {
|
||||
child.kill("SIGTERM");
|
||||
}
|
||||
|
||||
const timedWait = Promise.race([
|
||||
once(child, "exit"),
|
||||
new Promise<null>((resolve) => setTimeout(() => resolve(null), 1500)),
|
||||
]);
|
||||
|
||||
await timedWait;
|
||||
|
||||
if (child.exitCode === null && !child.killed) {
|
||||
child.kill("SIGKILL");
|
||||
await once(child, "exit").catch(() => null);
|
||||
}
|
||||
|
||||
this.processRef = null;
|
||||
this.shuttingDown = false;
|
||||
}
|
||||
|
||||
private handleStdoutLine(line: string): void {
|
||||
const trimmed = line.trim();
|
||||
if (!trimmed) {
|
||||
return;
|
||||
}
|
||||
|
||||
const pending = this.pending.get(payload.id);
|
||||
let message: RpcResponse<unknown>;
|
||||
try {
|
||||
message = JSON.parse(trimmed) as RpcResponse<unknown>;
|
||||
} catch (error) {
|
||||
this.logger.warn(`${this.logPrefix} stdout parse failed`, error);
|
||||
return;
|
||||
}
|
||||
|
||||
const pending = this.pending.get(message.id);
|
||||
if (!pending) {
|
||||
return;
|
||||
}
|
||||
|
||||
this.pending.delete(payload.id);
|
||||
if (payload.ok) {
|
||||
pending.resolve(payload.result);
|
||||
this.pending.delete(message.id);
|
||||
if (isFailure(message)) {
|
||||
pending.reject(new Error(message.error));
|
||||
return;
|
||||
}
|
||||
|
||||
pending.reject(new Error(payload.error ?? `${this.label} worker error`));
|
||||
});
|
||||
|
||||
child.stderr.on("data", (chunk: Buffer) => {
|
||||
const text = chunk.toString().trim();
|
||||
if (text.length > 0) {
|
||||
recentStderr.push(text);
|
||||
if (recentStderr.length > 20) {
|
||||
recentStderr.shift();
|
||||
}
|
||||
this.logger.warn(`[${this.label}]`, text);
|
||||
}
|
||||
});
|
||||
|
||||
child.on("exit", (code, signal) => {
|
||||
if (this.child === child) {
|
||||
this.child = null;
|
||||
}
|
||||
|
||||
const detail = recentStderr.length > 0 ? `\n${recentStderr.join("\n")}` : "";
|
||||
this.rejectAll(new Error(`${this.label} worker exited code=${code ?? "null"} signal=${signal ?? "null"}${detail}`));
|
||||
});
|
||||
|
||||
child.on("error", (error) => {
|
||||
this.rejectAll(error as Error);
|
||||
});
|
||||
|
||||
this.child = child;
|
||||
return child;
|
||||
}
|
||||
|
||||
private rejectAll(error: Error): void {
|
||||
const pending = [...this.pending.values()];
|
||||
this.pending.clear();
|
||||
for (const item of pending) {
|
||||
item.reject(error);
|
||||
}
|
||||
pending.resolve(message.result);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -1,4 +0,0 @@
|
||||
export interface SttService {
|
||||
transcribePcm16(pcm16MonoAudio: Buffer): Promise<string | null>;
|
||||
destroy?(): Promise<void>;
|
||||
}
|
||||
@@ -1,11 +0,0 @@
|
||||
import type { Readable } from "node:stream";
|
||||
|
||||
export interface PreparedSpeechAudio {
|
||||
stream: Readable;
|
||||
dispose: () => void;
|
||||
}
|
||||
|
||||
export interface TtsService {
|
||||
preparePlayback(text: string, signal?: AbortSignal): Promise<PreparedSpeechAudio>;
|
||||
destroy?(): Promise<void>;
|
||||
}
|
||||
105
src/services/web-tools.ts
Normal file
105
src/services/web-tools.ts
Normal file
@@ -0,0 +1,105 @@
|
||||
interface WebSearchResult {
|
||||
title: string;
|
||||
url: string;
|
||||
snippet: string;
|
||||
}
|
||||
|
||||
interface WebFetchResult {
|
||||
url: string;
|
||||
title: string;
|
||||
content: string;
|
||||
}
|
||||
|
||||
function stripTags(html: string): string {
|
||||
return html
|
||||
.replace(/<script[\s\S]*?<\/script>/gi, " ")
|
||||
.replace(/<style[\s\S]*?<\/style>/gi, " ")
|
||||
.replace(/<noscript[\s\S]*?<\/noscript>/gi, " ")
|
||||
.replace(/<[^>]+>/g, " ");
|
||||
}
|
||||
|
||||
function decodeEntities(text: string): string {
|
||||
return text
|
||||
.replace(/&/g, "&")
|
||||
.replace(/</g, "<")
|
||||
.replace(/>/g, ">")
|
||||
.replace(/"/g, '"')
|
||||
.replace(/'/g, "'")
|
||||
.replace(/ /g, " ");
|
||||
}
|
||||
|
||||
function normalizeWhitespace(text: string): string {
|
||||
return decodeEntities(text).replace(/\s+/g, " ").trim();
|
||||
}
|
||||
|
||||
function extractTitle(html: string): string {
|
||||
const match = html.match(/<title[^>]*>([\s\S]*?)<\/title>/i);
|
||||
return normalizeWhitespace(match?.[1] ?? "");
|
||||
}
|
||||
|
||||
function extractSearchResults(html: string, maxResults: number): WebSearchResult[] {
|
||||
const results: WebSearchResult[] = [];
|
||||
const pattern =
|
||||
/<a[^>]*class="result__a"[^>]*href="([^"]+)"[^>]*>([\s\S]*?)<\/a>[\s\S]*?(?:<a[^>]*class="result__snippet"[^>]*>|<div[^>]*class="result__snippet"[^>]*>)([\s\S]*?)(?:<\/a>|<\/div>)/gi;
|
||||
|
||||
for (const match of html.matchAll(pattern)) {
|
||||
const url = match[1]?.trim();
|
||||
const title = normalizeWhitespace(stripTags(match[2] ?? ""));
|
||||
const snippet = normalizeWhitespace(stripTags(match[3] ?? ""));
|
||||
if (!url || !title) {
|
||||
continue;
|
||||
}
|
||||
results.push({ title, url, snippet });
|
||||
if (results.length >= maxResults) {
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
return results;
|
||||
}
|
||||
|
||||
export async function webSearch(query: string, maxResults = 5): Promise<{ query: string; results: WebSearchResult[] }> {
|
||||
const url = `https://duckduckgo.com/html/?q=${encodeURIComponent(query)}`;
|
||||
const response = await fetch(url, {
|
||||
headers: {
|
||||
"user-agent":
|
||||
"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/136.0.0.0 Safari/537.36",
|
||||
},
|
||||
});
|
||||
|
||||
if (!response.ok) {
|
||||
throw new Error(`web search failed: ${response.status}`);
|
||||
}
|
||||
|
||||
const html = await response.text();
|
||||
const results = extractSearchResults(html, Math.min(Math.max(maxResults, 1), 8));
|
||||
return { query, results };
|
||||
}
|
||||
|
||||
export async function webFetch(url: string, maxChars = 6000): Promise<WebFetchResult> {
|
||||
if (!/^https?:\/\//i.test(url)) {
|
||||
throw new Error("http 또는 https URL만 허용됩니다.");
|
||||
}
|
||||
|
||||
const response = await fetch(url, {
|
||||
headers: {
|
||||
"user-agent":
|
||||
"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/136.0.0.0 Safari/537.36",
|
||||
},
|
||||
redirect: "follow",
|
||||
});
|
||||
|
||||
if (!response.ok) {
|
||||
throw new Error(`web fetch failed: ${response.status}`);
|
||||
}
|
||||
|
||||
const html = await response.text();
|
||||
const title = extractTitle(html);
|
||||
const content = normalizeWhitespace(stripTags(html)).slice(0, Math.max(500, maxChars));
|
||||
|
||||
return {
|
||||
url,
|
||||
title,
|
||||
content,
|
||||
};
|
||||
}
|
||||
38
src/setup-llm.ts
Normal file
38
src/setup-llm.ts
Normal file
@@ -0,0 +1,38 @@
|
||||
import process from "node:process";
|
||||
import { spawn } from "node:child_process";
|
||||
|
||||
import { loadConfig } from "./config.js";
|
||||
|
||||
async function run(command: string, args: string[]): Promise<void> {
|
||||
await new Promise<void>((resolve, reject) => {
|
||||
const child = spawn(command, args, {
|
||||
stdio: "inherit",
|
||||
windowsHide: true,
|
||||
shell: process.platform === "win32",
|
||||
});
|
||||
|
||||
child.on("exit", (code) => {
|
||||
if (code === 0) {
|
||||
resolve();
|
||||
return;
|
||||
}
|
||||
reject(new Error(`${command} ${args.join(" ")} exited with code ${code ?? "null"}`));
|
||||
});
|
||||
|
||||
child.on("error", reject);
|
||||
});
|
||||
}
|
||||
|
||||
export async function setupLlm(): Promise<void> {
|
||||
const config = loadConfig();
|
||||
console.log(`Ollama 모델 준비: ${config.OLLAMA_MODEL}`);
|
||||
await run("ollama", ["pull", config.OLLAMA_MODEL]);
|
||||
console.log("Ollama LLM 환경 준비 완료");
|
||||
}
|
||||
|
||||
if (import.meta.main) {
|
||||
void setupLlm().catch((error) => {
|
||||
console.error(error instanceof Error ? error.message : String(error));
|
||||
process.exit(1);
|
||||
});
|
||||
}
|
||||
@@ -1,88 +0,0 @@
|
||||
import { existsSync } from "node:fs";
|
||||
import { mkdir } from "node:fs/promises";
|
||||
import { spawn } from "node:child_process";
|
||||
import path from "node:path";
|
||||
|
||||
import { loadConfig } from "./config.js";
|
||||
import { resolveLocalAiCachePath, resolveLocalAiVenvPath, resolvePythonLaunch, resolveVenvPythonPath } from "./python-runtime.js";
|
||||
|
||||
async function run(command: string, args: string[], extraEnv?: NodeJS.ProcessEnv): Promise<void> {
|
||||
await new Promise<void>((resolve, reject) => {
|
||||
const child = spawn(command, args, {
|
||||
stdio: "inherit",
|
||||
env: {
|
||||
...process.env,
|
||||
...extraEnv,
|
||||
},
|
||||
});
|
||||
|
||||
child.on("exit", (code) => {
|
||||
if (code === 0) {
|
||||
resolve();
|
||||
return;
|
||||
}
|
||||
reject(new Error(`${command} ${args.join(" ")} exited with code ${code ?? "null"}`));
|
||||
});
|
||||
child.on("error", reject);
|
||||
});
|
||||
}
|
||||
|
||||
async function ensurePip(pythonBin: string, env: NodeJS.ProcessEnv): Promise<void> {
|
||||
await new Promise<void>((resolve, reject) => {
|
||||
const child = spawn(pythonBin, ["-m", "pip", "--version"], {
|
||||
stdio: "ignore",
|
||||
env,
|
||||
});
|
||||
child.on("exit", (code) => {
|
||||
if (code === 0) {
|
||||
resolve();
|
||||
return;
|
||||
}
|
||||
reject(new Error("pip missing"));
|
||||
});
|
||||
child.on("error", reject);
|
||||
}).catch(async () => {
|
||||
await run(pythonBin, ["-m", "ensurepip", "--upgrade"], env);
|
||||
});
|
||||
}
|
||||
|
||||
async function main(): Promise<void> {
|
||||
const config = loadConfig();
|
||||
const venvPath = resolveLocalAiVenvPath(config);
|
||||
const venvPython = resolveVenvPythonPath(config);
|
||||
const cachePath = resolveLocalAiCachePath(config);
|
||||
const requirementsPath = path.resolve(process.cwd(), "python", "requirements.txt");
|
||||
const baseEnv = {
|
||||
HF_HOME: cachePath,
|
||||
TRANSFORMERS_CACHE: cachePath,
|
||||
PYTHONIOENCODING: "utf-8",
|
||||
};
|
||||
|
||||
await mkdir(cachePath, { recursive: true });
|
||||
|
||||
if (!existsSync(venvPython)) {
|
||||
const launch = resolvePythonLaunch(config, { preferVenv: false });
|
||||
console.log(`기본 Python 확인: ${launch.command} ${launch.args.join(" ")}`.trim());
|
||||
console.log(`가상환경 생성: ${venvPath}`);
|
||||
await run(launch.command, [...launch.args, "-m", "venv", venvPath], baseEnv);
|
||||
}
|
||||
|
||||
await ensurePip(venvPython, {
|
||||
...process.env,
|
||||
...baseEnv,
|
||||
});
|
||||
|
||||
console.log("로컬 AI 의존성 설치를 시작합니다.");
|
||||
await run(venvPython, ["-m", "pip", "install", "--upgrade", "pip", "setuptools", "wheel"], baseEnv);
|
||||
await run(venvPython, ["-m", "pip", "install", "-r", requirementsPath], baseEnv);
|
||||
|
||||
console.log("설치가 끝났습니다.");
|
||||
console.log("다음 순서:");
|
||||
console.log("1. bun run devices");
|
||||
console.log("2. bun run start:local");
|
||||
}
|
||||
|
||||
void main().catch((error) => {
|
||||
console.error(error instanceof Error ? error.message : String(error));
|
||||
process.exit(1);
|
||||
});
|
||||
55
src/setup-python.ts
Normal file
55
src/setup-python.ts
Normal file
@@ -0,0 +1,55 @@
|
||||
import process from "node:process";
|
||||
import { mkdir } from "node:fs/promises";
|
||||
import path from "node:path";
|
||||
import { spawn } from "node:child_process";
|
||||
|
||||
import { loadConfig } from "./config.js";
|
||||
import { buildPythonInvocation, resolveBasePythonCommand, resolveVenvPythonPath } from "./python-runtime.js";
|
||||
|
||||
async function run(command: string, args: string[], cwd: string): Promise<void> {
|
||||
await new Promise<void>((resolve, reject) => {
|
||||
const child = spawn(command, args, {
|
||||
cwd,
|
||||
stdio: "inherit",
|
||||
windowsHide: true,
|
||||
});
|
||||
child.on("exit", (code) => {
|
||||
if (code === 0) {
|
||||
resolve();
|
||||
return;
|
||||
}
|
||||
reject(new Error(`${command} ${args.join(" ")} exited with code ${code ?? "null"}`));
|
||||
});
|
||||
child.on("error", reject);
|
||||
});
|
||||
}
|
||||
|
||||
export async function setupSttPython(): Promise<void> {
|
||||
const config = loadConfig();
|
||||
const python = await resolveBasePythonCommand(config);
|
||||
const venvRoot = path.resolve(process.cwd(), config.LOCAL_AI_VENV_PATH);
|
||||
const requirementsPath = path.resolve(
|
||||
process.cwd(),
|
||||
"python",
|
||||
process.platform === "win32" ? "requirements-windows.txt" : "requirements.txt",
|
||||
);
|
||||
|
||||
await mkdir(path.dirname(venvRoot), { recursive: true });
|
||||
|
||||
console.log(`가상환경 생성: ${venvRoot}`);
|
||||
const createVenv = buildPythonInvocation(python, ["-m", "venv", venvRoot]);
|
||||
await run(createVenv.command, createVenv.args, process.cwd());
|
||||
|
||||
const venvPython = resolveVenvPythonPath(config);
|
||||
await run(venvPython, ["-m", "pip", "install", "--upgrade", "pip", "setuptools", "wheel"], process.cwd());
|
||||
await run(venvPython, ["-m", "pip", "install", "-r", requirementsPath], process.cwd());
|
||||
|
||||
console.log("Python STT 환경 준비 완료");
|
||||
}
|
||||
|
||||
if (import.meta.main) {
|
||||
void setupSttPython().catch((error) => {
|
||||
console.error(error instanceof Error ? error.message : String(error));
|
||||
process.exit(1);
|
||||
});
|
||||
}
|
||||
76
src/setup-tts.ts
Normal file
76
src/setup-tts.ts
Normal file
@@ -0,0 +1,76 @@
|
||||
import process from "node:process";
|
||||
import { mkdir } from "node:fs/promises";
|
||||
import path from "node:path";
|
||||
import { spawn } from "node:child_process";
|
||||
|
||||
import { loadConfig } from "./config.js";
|
||||
import { resolveDockerCommand } from "./docker-runtime.js";
|
||||
import { Logger } from "./logger.js";
|
||||
import { MeloTtsService } from "./services/melo-tts.js";
|
||||
|
||||
async function run(command: string, args: string[], cwd = process.cwd()): Promise<void> {
|
||||
const env = { ...process.env };
|
||||
if (path.isAbsolute(command)) {
|
||||
const dockerBinDir = path.dirname(command);
|
||||
const currentPath = env.PATH ?? env.Path ?? "";
|
||||
env.PATH = `${dockerBinDir}${path.delimiter}${currentPath}`;
|
||||
}
|
||||
|
||||
await new Promise<void>((resolve, reject) => {
|
||||
const child = spawn(command, args, {
|
||||
cwd,
|
||||
stdio: "inherit",
|
||||
windowsHide: true,
|
||||
shell: process.platform === "win32" && !path.isAbsolute(command),
|
||||
env,
|
||||
});
|
||||
|
||||
child.on("error", (error) => {
|
||||
if ((error as NodeJS.ErrnoException).code === "ENOENT" && command === "docker") {
|
||||
reject(new Error("Docker를 찾지 못했습니다. Docker Desktop을 설치하고 실행한 뒤 다시 시도하세요."));
|
||||
return;
|
||||
}
|
||||
reject(error);
|
||||
});
|
||||
child.on("exit", (code) => {
|
||||
if (code === 0) {
|
||||
resolve();
|
||||
return;
|
||||
}
|
||||
reject(new Error(`${command} ${args.join(" ")} exited with code ${code ?? "null"}`));
|
||||
});
|
||||
});
|
||||
}
|
||||
|
||||
export async function setupTts(): Promise<void> {
|
||||
const config = loadConfig();
|
||||
const logger = new Logger(config.DEBUG ? config.LOG_LEVEL : "error");
|
||||
const docker = await resolveDockerCommand(config);
|
||||
const dockerContext = path.resolve(process.cwd(), "docker", "melotts");
|
||||
const cacheDir = path.resolve(process.cwd(), config.TTS_CACHE_DIR);
|
||||
const outputDir = path.resolve(process.cwd(), config.TTS_OUTPUT_DIR);
|
||||
|
||||
await mkdir(cacheDir, { recursive: true });
|
||||
await mkdir(outputDir, { recursive: true });
|
||||
|
||||
console.log(`MeloTTS Docker 이미지 빌드: ${config.TTS_IMAGE}`);
|
||||
await run(docker, ["build", "-t", config.TTS_IMAGE, dockerContext]);
|
||||
|
||||
const tts = new MeloTtsService(config, logger);
|
||||
|
||||
console.log("MeloTTS 모델 워밍업...");
|
||||
try {
|
||||
await tts.warmup();
|
||||
} finally {
|
||||
await tts.destroy().catch(() => undefined);
|
||||
}
|
||||
|
||||
console.log("로컬 TTS 환경 준비 완료");
|
||||
}
|
||||
|
||||
if (import.meta.main) {
|
||||
void setupTts().catch((error) => {
|
||||
console.error(error instanceof Error ? error.message : String(error));
|
||||
process.exit(1);
|
||||
});
|
||||
}
|
||||
18
src/setup.ts
Normal file
18
src/setup.ts
Normal file
@@ -0,0 +1,18 @@
|
||||
import process from "node:process";
|
||||
|
||||
import { setupLlm } from "./setup-llm.js";
|
||||
import { setupSttPython } from "./setup-python.js";
|
||||
import { setupTts } from "./setup-tts.js";
|
||||
|
||||
async function main(): Promise<void> {
|
||||
await setupSttPython();
|
||||
await setupLlm();
|
||||
await setupTts();
|
||||
}
|
||||
|
||||
if (import.meta.main) {
|
||||
void main().catch((error) => {
|
||||
console.error(error instanceof Error ? error.message : String(error));
|
||||
process.exit(1);
|
||||
});
|
||||
}
|
||||
Reference in New Issue
Block a user