Files
realtime_voice_bot/src/config.ts

65 lines
2.8 KiB
TypeScript

import { config as loadDotenv } from "dotenv";
import { z } from "zod";
loadDotenv();
const emptyToUndefined = z.preprocess((value) => {
if (typeof value !== "string") {
return value;
}
const trimmed = value.trim();
return trimmed.length === 0 ? undefined : trimmed;
}, z.string().min(1).optional());
const envSchema = z.object({
LOCAL_AI_VENV_PATH: z.string().min(1).default(".local-ai/.venv"),
LOCAL_AI_PYTHON: emptyToUndefined,
AUDIO_SOURCE: emptyToUndefined,
DOCKER_BIN: emptyToUndefined,
TTS_ENABLED: z
.string()
.optional()
.transform((value) => value?.trim().toLowerCase() !== "false"),
TTS_IMAGE: z.string().min(1).default("realtime-voice-bot-melotts:v0.1.2"),
TTS_LANGUAGE: z.string().min(1).default("KR"),
TTS_SPEAKER: z.string().min(1).default("KR"),
TTS_DEVICE: z.string().min(1).default("cpu"),
TTS_SPEED: z.coerce.number().min(0.5).max(2).default(1.18),
TTS_SDP_RATIO: z.coerce.number().min(0).max(1).default(0.22),
TTS_NOISE_SCALE: z.coerce.number().min(0).max(2).default(0.55),
TTS_NOISE_SCALE_W: z.coerce.number().min(0).max(2).default(0.75),
TTS_CACHE_DIR: z.string().min(1).default(".local-ai/tts-cache"),
TTS_OUTPUT_DIR: z.string().min(1).default(".local-ai/tts-output"),
DEBUG: z
.string()
.optional()
.transform((value) => value?.trim().toLowerCase() === "true"),
OLLAMA_BASE_URL: z.string().min(1).default("http://127.0.0.1:11434"),
OLLAMA_MODEL: z.string().min(1).default("qwen3:8b"),
OLLAMA_KEEP_ALIVE: z.string().min(1).default("5m"),
MAX_CONVERSATION_TURNS: z.coerce.number().int().min(1).max(20).default(6),
WHISPER_MODEL: z.string().min(1).default("large-v3-turbo"),
WHISPER_LANGUAGE: z.string().min(1).default("ko"),
WHISPER_DEVICE: z.enum(["auto", "cuda", "cpu"]).default("auto"),
WHISPER_COMPUTE_TYPE: z.string().min(1).default("auto"),
WHISPER_BEAM_SIZE: z.coerce.number().int().min(1).max(8).default(2),
SEGMENT_START_THRESHOLD: z.coerce.number().int().min(100).max(10000).default(900),
SEGMENT_CONTINUE_THRESHOLD: z.coerce.number().int().min(50).max(10000).default(450),
SEGMENT_START_FRAMES: z.coerce.number().int().min(1).max(10).default(2),
SEGMENT_END_FRAMES: z.coerce.number().int().min(4).max(60).default(24),
SEGMENT_PREROLL_SAMPLES: z.coerce.number().int().min(320).max(16000).default(3200),
SEGMENT_MIN_SPEECH_SAMPLES: z.coerce.number().int().min(1600).max(64000).default(7200),
SEGMENT_MAX_SPEECH_SAMPLES: z.coerce.number().int().min(16000).max(320000).default(160000),
DEBUG_TRANSCRIPTS: z
.string()
.optional()
.transform((value) => value === "true"),
LOG_LEVEL: z.enum(["debug", "info", "warn", "error"]).default("info"),
});
export type AppConfig = z.infer<typeof envSchema>;
export function loadConfig(): AppConfig {
return envSchema.parse(process.env);
}