Add realtime loopback STT prototype

This commit is contained in:
2026-05-02 20:20:54 +09:00
parent 10e0dd75db
commit 5775c4809a
17 changed files with 1034 additions and 0 deletions

16
.env.example Normal file
View File

@@ -0,0 +1,16 @@
LOCAL_AI_VENV_PATH=.local-ai/.venv
# Windows면 보통 python 또는 py -3
LOCAL_AI_PYTHON=
# Windows: ffmpeg dshow 장치 이름
# Linux: pactl list sources short 에서 monitor/source 이름
AUDIO_SOURCE=
WHISPER_MODEL=large-v3-turbo
WHISPER_LANGUAGE=ko
WHISPER_DEVICE=auto
WHISPER_COMPUTE_TYPE=auto
WHISPER_BEAM_SIZE=1
DEBUG_TRANSCRIPTS=true
LOG_LEVEL=info

6
.gitignore vendored Normal file
View File

@@ -0,0 +1,6 @@
node_modules
dist
.env
.local-ai
*.pyc
__pycache__

View File

@@ -1 +1,54 @@
# realtime_voice_bot # realtime_voice_bot
출력장치로 재생되는 소리를 파일 저장 없이 바로 받아서, 메모리 버퍼에서 발화 구간을 나눈 뒤 `faster-whisper`로 STT 하는 최소 프로토타입입니다.
## 현재 범위
- Node.js + TypeScript 메인 프로세스
- 출력 오디오 실시간 캡처
- 메모리 버퍼 기반 간단한 저지연 발화 분리
- 미리 로드한 `faster-whisper` 워커에 PCM 직접 전달
- 디스크에 WAV 저장 없이 바로 전사
## 빠른 시작
```bash
bun install
bun run setup:python
cp .env.example .env
```
장치 목록 확인:
```bash
bun run devices
```
실행:
```bash
bun run start:loopback
```
## 환경 변수
- `AUDIO_SOURCE`
- Windows: `bun run devices` 에서 보이는 `ffmpeg dshow` 오디오 장치 이름
- Linux: `pactl list sources short` 에서 보이는 monitor/source 이름
- `WHISPER_MODEL`
- 기본값 `large-v3-turbo`
- `WHISPER_LANGUAGE`
- 기본값 `ko`
- `WHISPER_DEVICE`
- `auto`, `cuda`, `cpu`
- `WHISPER_COMPUTE_TYPE`
- `auto`, `float16`, `int8_float16`, `int8`, `float32`
- `WHISPER_BEAM_SIZE`
- 기본값 `1`
## 메모
- 이 버전은 일단 `STT`만 합니다.
- 최소 지연을 위해 파일 저장은 하지 않습니다.
- VAD는 현재 모델 기반이 아니라 진폭 기반 단순 분리입니다.
- Windows에서는 보통 출력 루프백이 가능한 장치나 `Stereo Mix`, 오디오 인터페이스 loopback 채널을 `AUDIO_SOURCE`로 잡아야 합니다.

28
bun.lock Normal file
View File

@@ -0,0 +1,28 @@
{
"lockfileVersion": 1,
"configVersion": 1,
"workspaces": {
"": {
"name": "realtime_voice_bot",
"dependencies": {
"dotenv": "^17.4.2",
"zod": "^4.3.6",
},
"devDependencies": {
"@types/node": "^25.6.0",
"typescript": "^6.0.3",
},
},
},
"packages": {
"@types/node": ["@types/node@25.6.0", "", { "dependencies": { "undici-types": "~7.19.0" } }, "sha512-+qIYRKdNYJwY3vRCZMdJbPLJAtGjQBudzZzdzwQYkEPQd+PJGixUL5QfvCLDaULoLv+RhT3LDkwEfKaAkgSmNQ=="],
"dotenv": ["dotenv@17.4.2", "", {}, "sha512-nI4U3TottKAcAD9LLud4Cb7b2QztQMUEfHbvhTH09bqXTxnSie8WnjPALV/WMCrJZ6UV/qHJ6L03OqO3LcdYZw=="],
"typescript": ["typescript@6.0.3", "", { "bin": { "tsc": "bin/tsc", "tsserver": "bin/tsserver" } }, "sha512-y2TvuxSZPDyQakkFRPZHKFm+KKVqIisdg9/CZwm9ftvKXLP8NRWj38/ODjNbr43SsoXqNuAisEf1GdCxqWcdBw=="],
"undici-types": ["undici-types@7.19.2", "", {}, "sha512-qYVnV5OEm2AW8cJMCpdV20CDyaN3g0AjDlOGf1OW4iaDEx8MwdtChUp4zu4H0VP3nDRF/8RKWH+IPp9uW0YGZg=="],
"zod": ["zod@4.4.2", "", {}, "sha512-IynmDyxsEsb9RKzO3J9+4SxXnl2FTFSzNBaKKaMV6tsSk0rw9gYw9gs+JFCq/qk2LCZ78KDwyj+Z289TijSkUw=="],
}
}

25
package.json Normal file
View File

@@ -0,0 +1,25 @@
{
"name": "realtime_voice_bot",
"version": "0.1.0",
"private": true,
"type": "module",
"scripts": {
"start:loopback": "bun src/index.ts loopback",
"devices": "bun src/index.ts devices",
"setup:python": "bun src/setup-python.ts",
"check": "tsc --noEmit",
"build": "tsc -p tsconfig.json"
},
"engines": {
"bun": ">=1.3.0",
"node": ">=22.12.0"
},
"dependencies": {
"dotenv": "^17.4.2",
"zod": "^4.3.6"
},
"devDependencies": {
"@types/node": "^25.6.0",
"typescript": "^6.0.3"
}
}

View File

@@ -0,0 +1,133 @@
import base64
import json
import os
import sys
import traceback
from typing import Any
import numpy as np
from faster_whisper import WhisperModel
def resolve_model() -> WhisperModel:
model_name = os.environ.get("WHISPER_MODEL", "large-v3-turbo")
requested_device = os.environ.get("WHISPER_DEVICE", "auto")
requested_compute = os.environ.get("WHISPER_COMPUTE_TYPE", "auto")
attempts: list[tuple[str, str]] = []
if requested_device == "auto":
if requested_compute == "auto":
attempts.extend(
[
("cuda", "float16"),
("cuda", "int8_float16"),
("cpu", "int8"),
("cpu", "float32"),
]
)
else:
attempts.extend(
[
("cuda", requested_compute),
("cpu", requested_compute),
]
)
else:
if requested_compute == "auto":
compute = "float16" if requested_device == "cuda" else "int8"
else:
compute = requested_compute
attempts.append((requested_device, compute))
last_error: Exception | None = None
for device, compute_type in attempts:
try:
model = WhisperModel(model_name, device=device, compute_type=compute_type)
setattr(model, "_resolved_device", device)
setattr(model, "_resolved_compute_type", compute_type)
return model
except Exception as error: # noqa: BLE001
last_error = error
assert last_error is not None
raise last_error
MODEL = resolve_model()
LANGUAGE = os.environ.get("WHISPER_LANGUAGE", "ko")
BEAM_SIZE = int(os.environ.get("WHISPER_BEAM_SIZE", "1"))
def write(payload: dict[str, Any]) -> None:
sys.stdout.write(json.dumps(payload, ensure_ascii=False) + "\n")
sys.stdout.flush()
def transcribe_pcm16_base64(pcm16_base64: str) -> str:
audio_bytes = base64.b64decode(pcm16_base64)
audio = np.frombuffer(audio_bytes, dtype=np.int16).astype(np.float32) / 32768.0
segments, _info = MODEL.transcribe(
audio,
language=LANGUAGE,
task="transcribe",
beam_size=BEAM_SIZE,
condition_on_previous_text=False,
vad_filter=False,
without_timestamps=True,
word_timestamps=False,
temperature=0.0,
)
text_parts: list[str] = []
for segment in segments:
if segment.text:
text_parts.append(segment.text.strip())
return " ".join(part for part in text_parts if part).strip()
for raw_line in sys.stdin:
line = raw_line.strip()
if not line:
continue
request = json.loads(line)
request_id = request["id"]
method = request["method"]
params = request.get("params", {})
try:
if method == "ping":
write(
{
"id": request_id,
"result": {
"model": os.environ.get("WHISPER_MODEL", "large-v3-turbo"),
"device": getattr(MODEL, "_resolved_device", "unknown"),
"compute_type": getattr(MODEL, "_resolved_compute_type", "unknown"),
},
}
)
continue
if method == "transcribe":
text = transcribe_pcm16_base64(params["pcm16_base64"])
write(
{
"id": request_id,
"result": {
"text": text,
},
}
)
continue
raise RuntimeError(f"unknown method: {method}")
except Exception as error: # noqa: BLE001
traceback.print_exc(file=sys.stderr)
write(
{
"id": request_id,
"error": f"{type(error).__name__}: {error}",
}
)

1
python/requirements.txt Normal file
View File

@@ -0,0 +1 @@
faster-whisper==1.2.1

138
src/audio/capture.ts Normal file
View File

@@ -0,0 +1,138 @@
import { spawn, type ChildProcessByStdio } from "node:child_process";
import process from "node:process";
import type { Readable } from "node:stream";
import type { AppConfig } from "../config.js";
import type { Logger } from "../logger.js";
export function printAudioDevices(): Promise<void> {
if (process.platform === "win32") {
return new Promise<void>((resolve, reject) => {
const child = spawn("ffmpeg", ["-hide_banner", "-list_devices", "true", "-f", "dshow", "-i", "dummy"], {
stdio: ["ignore", "ignore", "inherit"],
});
child.on("exit", (code) => {
if (code === 0 || code === 1) {
resolve();
return;
}
reject(new Error(`ffmpeg exited with code ${code ?? "null"}`));
});
child.on("error", reject);
});
}
return new Promise<void>((resolve, reject) => {
const pactl = spawn("pactl", ["list", "sources", "short"], {
stdio: ["ignore", "inherit", "inherit"],
});
pactl.on("exit", (code) => {
if (code === 0) {
resolve();
return;
}
const wpctl = spawn("wpctl", ["status", "-n"], {
stdio: ["ignore", "inherit", "inherit"],
});
wpctl.on("exit", (wpctlCode) => {
if (wpctlCode === 0) {
resolve();
return;
}
reject(new Error(`pactl exited with code ${code ?? "null"} and wpctl exited with code ${wpctlCode ?? "null"}`));
});
wpctl.on("error", reject);
});
pactl.on("error", () => {
const wpctl = spawn("wpctl", ["status", "-n"], {
stdio: ["ignore", "inherit", "inherit"],
});
wpctl.on("exit", (code) => {
if (code === 0) {
resolve();
return;
}
reject(new Error(`pactl, wpctl 둘 다 실행할 수 없습니다. code=${code ?? "null"}`));
});
wpctl.on("error", reject);
});
});
}
export function spawnLoopbackCapture(
config: AppConfig,
logger: Logger,
): ChildProcessByStdio<null, Readable, Readable> {
if (!config.AUDIO_SOURCE) {
throw new Error("AUDIO_SOURCE 설정이 필요합니다. 먼저 `bun run devices` 로 장치 이름을 확인하세요.");
}
if (process.platform === "win32") {
const args = [
"-hide_banner",
"-loglevel",
"warning",
"-fflags",
"nobuffer",
"-flags",
"low_delay",
"-f",
"dshow",
"-i",
`audio=${config.AUDIO_SOURCE}`,
"-ac",
"1",
"-ar",
"16000",
"-f",
"s16le",
"pipe:1",
];
logger.info("Starting Windows loopback capture", {
source: config.AUDIO_SOURCE,
backend: "ffmpeg-dshow",
});
return spawn("ffmpeg", args, {
stdio: ["ignore", "pipe", "pipe"],
});
}
if (process.platform === "linux") {
const args = [
"-hide_banner",
"-loglevel",
"warning",
"-fflags",
"nobuffer",
"-flags",
"low_delay",
"-f",
"pulse",
"-i",
config.AUDIO_SOURCE,
"-ac",
"1",
"-ar",
"16000",
"-f",
"s16le",
"pipe:1",
];
logger.info("Starting Linux loopback capture", {
source: config.AUDIO_SOURCE,
backend: "ffmpeg-pulse",
});
return spawn("ffmpeg", args, {
stdio: ["ignore", "pipe", "pipe"],
});
}
throw new Error(`지원하지 않는 플랫폼입니다: ${process.platform}`);
}

View File

@@ -0,0 +1,112 @@
interface RealtimeSegmenterOptions {
onSegment: (pcm16: Buffer) => void;
}
export class RealtimeSegmenter {
private readonly pendingSamples: number[] = [];
private readonly preRoll: number[] = [];
private readonly speech: number[] = [];
private readonly frameSamples = 320;
private readonly preRollSamples = 3200;
private readonly speechStartThreshold = 900;
private readonly speechContinueThreshold = 450;
private readonly speechStartFrames = 2;
private readonly speechEndFrames = 18;
private readonly minSpeechSamples = 6400;
private speechActive = false;
private speechCandidateFrames = 0;
private silenceFrames = 0;
constructor(private readonly options: RealtimeSegmenterOptions) {}
pushChunk(chunk: Buffer): void {
for (let offset = 0; offset + 1 < chunk.length; offset += 2) {
this.pendingSamples.push(chunk.readInt16LE(offset));
}
while (true) {
const frame = takeFrame(this.pendingSamples, this.frameSamples);
if (!frame) {
return;
}
this.processFrame(frame);
}
}
private processFrame(frame: Int16Array): void {
let peak = 0;
for (const sample of frame) {
const abs = Math.abs(sample);
if (abs > peak) {
peak = abs;
}
}
if (!this.speechActive) {
appendWithCap(this.preRoll, frame, this.preRollSamples);
if (peak >= this.speechStartThreshold) {
this.speechCandidateFrames += 1;
} else {
this.speechCandidateFrames = 0;
}
if (this.speechCandidateFrames < this.speechStartFrames) {
return;
}
this.speechActive = true;
this.silenceFrames = 0;
this.speech.splice(0, this.speech.length, ...this.preRoll);
this.preRoll.splice(0, this.preRoll.length);
}
this.speech.push(...frame);
if (peak >= this.speechContinueThreshold) {
this.silenceFrames = 0;
} else {
this.silenceFrames += 1;
}
if (this.silenceFrames < this.speechEndFrames) {
return;
}
const speechPcm = int16ArrayToBuffer(Int16Array.from(this.speech));
this.speechActive = false;
this.speech.splice(0, this.speech.length);
this.silenceFrames = 0;
this.speechCandidateFrames = 0;
if (speechPcm.length < this.minSpeechSamples * 2) {
return;
}
this.options.onSegment(speechPcm);
}
}
function takeFrame(source: number[], size: number): Int16Array | null {
if (source.length < size) {
return null;
}
const samples = source.splice(0, size);
return Int16Array.from(samples);
}
function appendWithCap(target: number[], samples: Int16Array, cap: number): void {
target.push(...samples);
if (target.length > cap) {
target.splice(0, target.length - cap);
}
}
function int16ArrayToBuffer(input: Int16Array): Buffer {
const output = Buffer.allocUnsafe(input.length * 2);
for (let index = 0; index < input.length; index += 1) {
output.writeInt16LE(input[index]!, index * 2);
}
return output;
}

34
src/config.ts Normal file
View File

@@ -0,0 +1,34 @@
import { config as loadDotenv } from "dotenv";
import { z } from "zod";
loadDotenv();
const emptyToUndefined = z.preprocess((value) => {
if (typeof value !== "string") {
return value;
}
const trimmed = value.trim();
return trimmed.length === 0 ? undefined : trimmed;
}, z.string().min(1).optional());
const envSchema = z.object({
LOCAL_AI_VENV_PATH: z.string().min(1).default(".local-ai/.venv"),
LOCAL_AI_PYTHON: emptyToUndefined,
AUDIO_SOURCE: emptyToUndefined,
WHISPER_MODEL: z.string().min(1).default("large-v3-turbo"),
WHISPER_LANGUAGE: z.string().min(1).default("ko"),
WHISPER_DEVICE: z.enum(["auto", "cuda", "cpu"]).default("auto"),
WHISPER_COMPUTE_TYPE: z.string().min(1).default("auto"),
WHISPER_BEAM_SIZE: z.coerce.number().int().min(1).max(8).default(1),
DEBUG_TRANSCRIPTS: z
.string()
.optional()
.transform((value) => value === "true"),
LOG_LEVEL: z.enum(["debug", "info", "warn", "error"]).default("info"),
});
export type AppConfig = z.infer<typeof envSchema>;
export function loadConfig(): AppConfig {
return envSchema.parse(process.env);
}

107
src/index.ts Normal file
View File

@@ -0,0 +1,107 @@
import process from "node:process";
import { loadConfig } from "./config.js";
import { Logger } from "./logger.js";
import { printAudioDevices, spawnLoopbackCapture } from "./audio/capture.js";
import { RealtimeSegmenter } from "./audio/realtime-segmenter.js";
import { FasterWhisperSttService } from "./services/faster-whisper-stt.js";
const mode = process.argv[2] ?? "loopback";
async function runLoopback(): Promise<void> {
const config = loadConfig();
const logger = new Logger(config.LOG_LEVEL);
const stt = new FasterWhisperSttService(config, logger);
await stt.warmup();
const transcriptionQueue: Buffer[] = [];
let transcribing = false;
const runNext = async (): Promise<void> => {
if (transcribing) {
return;
}
const next = transcriptionQueue.shift();
if (!next) {
return;
}
transcribing = true;
try {
const text = await stt.transcribePcm16(next);
if (!text) {
logger.info("빈 전사 결과");
} else {
logger.info("Transcript", text);
if (config.DEBUG_TRANSCRIPTS) {
console.log(`\n[text] ${text}\n`);
}
}
} catch (error) {
logger.warn("STT failed", error);
} finally {
transcribing = false;
void runNext();
}
};
const segmenter = new RealtimeSegmenter({
onSegment: (pcm16) => {
transcriptionQueue.push(pcm16);
void runNext();
},
});
const capture = spawnLoopbackCapture(config, logger);
capture.stdout.on("data", (chunk: Buffer) => {
segmenter.pushChunk(chunk);
});
capture.stderr.on("data", (chunk: Buffer) => {
const text = chunk.toString().trim();
if (text) {
logger.debug("[capture]", text);
}
});
capture.on("exit", (code, signal) => {
logger.warn("capture exited", { code, signal });
});
console.log("실시간 출력장치 STT를 시작합니다. Ctrl+C 로 종료합니다.");
console.log(`source: ${config.AUDIO_SOURCE ?? "unset"}`);
console.log(`model: ${config.WHISPER_MODEL}`);
console.log(`language: ${config.WHISPER_LANGUAGE}`);
const shutdown = async (): Promise<void> => {
if (!capture.killed) {
capture.kill("SIGTERM");
}
await stt.destroy();
process.exit(0);
};
process.on("SIGINT", () => {
void shutdown();
});
process.on("SIGTERM", () => {
void shutdown();
});
}
async function main(): Promise<void> {
switch (mode) {
case "devices":
await printAudioDevices();
return;
case "loopback":
await runLoopback();
return;
default:
throw new Error(`알 수 없는 실행 모드입니다: ${mode}. 사용 가능: loopback, devices`);
}
}
void main().catch((error) => {
console.error(error instanceof Error ? error.message : String(error));
process.exit(1);
});

63
src/logger.ts Normal file
View File

@@ -0,0 +1,63 @@
type LogLevel = "debug" | "info" | "warn" | "error";
const levelOrder: Record<LogLevel, number> = {
debug: 10,
info: 20,
warn: 30,
error: 40,
};
function formatParts(parts: unknown[]): string {
return parts
.map((part) => {
if (part instanceof Error) {
return `${part.name}: ${part.message}`;
}
if (typeof part === "string") {
return part;
}
return JSON.stringify(part);
})
.join(" ");
}
export class Logger {
constructor(private readonly level: LogLevel) {}
private shouldLog(target: LogLevel): boolean {
return levelOrder[target] >= levelOrder[this.level];
}
private write(target: LogLevel, ...parts: unknown[]): void {
if (!this.shouldLog(target)) {
return;
}
const line = `[${new Date().toISOString()}] [${target.toUpperCase()}] ${formatParts(parts)}`;
if (target === "error") {
console.error(line);
return;
}
if (target === "warn") {
console.warn(line);
return;
}
console.log(line);
}
debug(...parts: unknown[]): void {
this.write("debug", ...parts);
}
info(...parts: unknown[]): void {
this.write("info", ...parts);
}
warn(...parts: unknown[]): void {
this.write("warn", ...parts);
}
error(...parts: unknown[]): void {
this.write("error", ...parts);
}
}

63
src/python-runtime.ts Normal file
View File

@@ -0,0 +1,63 @@
import { constants as fsConstants } from "node:fs";
import { access } from "node:fs/promises";
import path from "node:path";
import process from "node:process";
import type { AppConfig } from "./config.js";
function splitCommand(command: string): string[] {
const parts = command.match(/(?:[^\s"]+|"[^"]*")+/g) ?? [];
return parts.map((part) => part.replace(/^"(.*)"$/, "$1"));
}
async function fileExists(target: string): Promise<boolean> {
try {
await access(target, fsConstants.X_OK);
return true;
} catch {
return false;
}
}
export async function resolvePythonCommand(config: AppConfig): Promise<{ command: string; args: string[] }> {
return await resolveWorkerPythonCommand(config);
}
export async function resolveBasePythonCommand(config: AppConfig): Promise<{ command: string; args: string[] }> {
const configured = config.LOCAL_AI_PYTHON?.trim();
if (configured) {
const [command, ...args] = splitCommand(configured);
if (!command) {
throw new Error("LOCAL_AI_PYTHON 값이 비어 있습니다.");
}
return { command, args };
}
const venvPath = resolveVenvPythonPath(config);
if (await fileExists(venvPath)) {
return { command: venvPath, args: [] };
}
return await resolveBasePythonCommand(config);
}
export async function resolveWorkerPythonCommand(config: AppConfig): Promise<{ command: string; args: string[] }> {
const venvPath = resolveVenvPythonPath(config);
if (await fileExists(venvPath)) {
return { command: venvPath, args: [] };
}
return await resolveBasePythonCommand(config);
}
export function resolveVenvPythonPath(config: AppConfig): string {
const root = path.resolve(process.cwd(), config.LOCAL_AI_VENV_PATH);
if (process.platform === "win32") {
return path.join(root, "Scripts", "python.exe");
}
return path.join(root, "bin", "python");
}
export function resolveWorkerScript(name: string): string {
return path.resolve(process.cwd(), "python", name);
}

View File

@@ -0,0 +1,40 @@
import type { AppConfig } from "../config.js";
import type { Logger } from "../logger.js";
import { PythonJsonWorker } from "./python-json-worker.js";
interface PingResult {
model: string;
device: string;
compute_type: string;
}
interface TranscribeResult {
text: string;
}
export class FasterWhisperSttService {
private readonly worker: PythonJsonWorker;
constructor(
private readonly config: AppConfig,
private readonly logger: Logger,
) {
this.worker = new PythonJsonWorker(config, logger, "loopback_stt_worker.py", "faster-whisper");
}
async warmup(): Promise<void> {
const result = await this.worker.request<PingResult>("ping", {});
this.logger.info("STT worker ready", result);
}
async transcribePcm16(pcm16: Buffer): Promise<string> {
const result = await this.worker.request<TranscribeResult>("transcribe", {
pcm16_base64: pcm16.toString("base64"),
});
return result.text.trim();
}
async destroy(): Promise<void> {
await this.worker.destroy();
}
}

View File

@@ -0,0 +1,147 @@
import { spawn, type ChildProcessWithoutNullStreams } from "node:child_process";
import { createInterface } from "node:readline";
import type { AppConfig } from "../config.js";
import type { Logger } from "../logger.js";
import { resolveWorkerPythonCommand, resolveWorkerScript } from "../python-runtime.js";
interface RpcSuccess<T> {
id: string;
result: T;
}
interface RpcFailure {
id: string;
error: string;
}
type RpcResponse<T> = RpcSuccess<T> | RpcFailure;
function isFailure<T>(value: RpcResponse<T>): value is RpcFailure {
return "error" in value;
}
export class PythonJsonWorker {
private processRef: ChildProcessWithoutNullStreams | null = null;
private readonly pending = new Map<
string,
{
resolve: (value: unknown) => void;
reject: (reason?: unknown) => void;
}
>();
private nextId = 1;
constructor(
private readonly config: AppConfig,
private readonly logger: Logger,
private readonly scriptName: string,
private readonly logPrefix: string,
) {}
async start(): Promise<void> {
if (this.processRef) {
return;
}
const { command, args } = await resolveWorkerPythonCommand(this.config);
const scriptPath = resolveWorkerScript(this.scriptName);
this.processRef = spawn(command, [...args, scriptPath], {
stdio: ["pipe", "pipe", "pipe"],
env: {
...process.env,
WHISPER_MODEL: this.config.WHISPER_MODEL,
WHISPER_LANGUAGE: this.config.WHISPER_LANGUAGE,
WHISPER_DEVICE: this.config.WHISPER_DEVICE,
WHISPER_COMPUTE_TYPE: this.config.WHISPER_COMPUTE_TYPE,
WHISPER_BEAM_SIZE: String(this.config.WHISPER_BEAM_SIZE),
},
});
const rl = createInterface({
input: this.processRef.stdout,
crlfDelay: Infinity,
});
rl.on("line", (line) => {
this.handleStdoutLine(line);
});
this.processRef.stderr.on("data", (chunk: Buffer) => {
const text = chunk.toString().trim();
if (text.length > 0) {
this.logger.warn(`[${this.logPrefix}] ${text}`);
}
});
this.processRef.on("exit", (code, signal) => {
const error = new Error(`${this.logPrefix} worker exited code=${code ?? "null"} signal=${signal ?? "null"}`);
for (const entry of this.pending.values()) {
entry.reject(error);
}
this.pending.clear();
this.processRef = null;
});
}
async request<T>(method: string, params: Record<string, unknown>): Promise<T> {
await this.start();
if (!this.processRef) {
throw new Error(`${this.logPrefix} worker is not running`);
}
const id = String(this.nextId++);
const payload = JSON.stringify({
id,
method,
params,
});
const promise = new Promise<T>((resolve, reject) => {
this.pending.set(id, {
resolve: (value) => resolve(value as T),
reject,
});
});
this.processRef.stdin.write(`${payload}\n`);
return await promise;
}
async destroy(): Promise<void> {
if (!this.processRef) {
return;
}
this.processRef.kill("SIGTERM");
this.processRef = null;
}
private handleStdoutLine(line: string): void {
const trimmed = line.trim();
if (!trimmed) {
return;
}
let message: RpcResponse<unknown>;
try {
message = JSON.parse(trimmed) as RpcResponse<unknown>;
} catch (error) {
this.logger.warn(`${this.logPrefix} stdout parse failed`, error);
return;
}
const pending = this.pending.get(message.id);
if (!pending) {
return;
}
this.pending.delete(message.id);
if (isFailure(message)) {
pending.reject(new Error(message.error));
return;
}
pending.resolve(message.result);
}
}

47
src/setup-python.ts Normal file
View File

@@ -0,0 +1,47 @@
import process from "node:process";
import { mkdir } from "node:fs/promises";
import path from "node:path";
import { spawn } from "node:child_process";
import { loadConfig } from "./config.js";
import { resolveBasePythonCommand, resolveVenvPythonPath } from "./python-runtime.js";
async function run(command: string, args: string[], cwd: string): Promise<void> {
await new Promise<void>((resolve, reject) => {
const child = spawn(command, args, {
cwd,
stdio: "inherit",
});
child.on("exit", (code) => {
if (code === 0) {
resolve();
return;
}
reject(new Error(`${command} ${args.join(" ")} exited with code ${code ?? "null"}`));
});
child.on("error", reject);
});
}
async function main(): Promise<void> {
const config = loadConfig();
const { command, args } = await resolveBasePythonCommand(config);
const venvRoot = path.resolve(process.cwd(), config.LOCAL_AI_VENV_PATH);
const requirementsPath = path.resolve(process.cwd(), "python", "requirements.txt");
await mkdir(path.dirname(venvRoot), { recursive: true });
console.log(`가상환경 생성: ${venvRoot}`);
await run(command, [...args, "-m", "venv", venvRoot], process.cwd());
const venvPython = resolveVenvPythonPath(config);
await run(venvPython, ["-m", "pip", "install", "--upgrade", "pip", "setuptools", "wheel"], process.cwd());
await run(venvPython, ["-m", "pip", "install", "-r", requirementsPath], process.cwd());
console.log("Python STT 환경 준비 완료");
}
void main().catch((error) => {
console.error(error instanceof Error ? error.message : String(error));
process.exit(1);
});

21
tsconfig.json Normal file
View File

@@ -0,0 +1,21 @@
{
"compilerOptions": {
"target": "ES2022",
"module": "NodeNext",
"moduleResolution": "NodeNext",
"strict": true,
"noEmit": false,
"rootDir": "src",
"outDir": "dist",
"esModuleInterop": true,
"forceConsistentCasingInFileNames": true,
"skipLibCheck": true,
"resolveJsonModule": true,
"types": [
"node"
]
},
"include": [
"src/**/*.ts"
]
}