Tune MeloTTS speed and prosody defaults

This commit is contained in:
2026-05-03 18:24:39 +09:00
parent 46a6b926df
commit f5194f55a1
6 changed files with 53 additions and 6 deletions

View File

@@ -24,7 +24,10 @@ const envSchema = z.object({
TTS_LANGUAGE: z.string().min(1).default("KR"),
TTS_SPEAKER: z.string().min(1).default("KR"),
TTS_DEVICE: z.string().min(1).default("cpu"),
TTS_SPEED: z.coerce.number().min(0.5).max(2).default(1),
TTS_SPEED: z.coerce.number().min(0.5).max(2).default(1.18),
TTS_SDP_RATIO: z.coerce.number().min(0).max(1).default(0.22),
TTS_NOISE_SCALE: z.coerce.number().min(0).max(2).default(0.55),
TTS_NOISE_SCALE_W: z.coerce.number().min(0).max(2).default(0.75),
TTS_CACHE_DIR: z.string().min(1).default(".local-ai/tts-cache"),
TTS_OUTPUT_DIR: z.string().min(1).default(".local-ai/tts-output"),
DEBUG: z

View File

@@ -57,7 +57,7 @@ export class MeloTtsService {
}
async speak(text: string): Promise<void> {
const trimmed = text.trim();
const trimmed = this.normalizeText(text);
if (!trimmed) {
return;
}
@@ -113,6 +113,12 @@ export class MeloTtsService {
this.config.TTS_SPEAKER,
"--speed",
String(this.config.TTS_SPEED),
"--sdp-ratio",
String(this.config.TTS_SDP_RATIO),
"--noise-scale",
String(this.config.TTS_NOISE_SCALE),
"--noise-scale-w",
String(this.config.TTS_NOISE_SCALE_W),
"--device",
this.config.TTS_DEVICE,
);
@@ -122,10 +128,30 @@ export class MeloTtsService {
language: this.config.TTS_LANGUAGE,
speaker: this.config.TTS_SPEAKER,
speed: this.config.TTS_SPEED,
sdp_ratio: this.config.TTS_SDP_RATIO,
noise_scale: this.config.TTS_NOISE_SCALE,
noise_scale_w: this.config.TTS_NOISE_SCALE_W,
device: this.config.TTS_DEVICE,
});
const docker = await resolveDockerCommand(this.config);
await run(docker, args, "inherit");
}
private normalizeText(input: string): string {
const collapsed = input
.replace(/[`*_#>\[\]\(\)]/g, " ")
.replace(/\s+/g, " ")
.trim();
if (!collapsed) {
return "";
}
if (/[.!?…]$/.test(collapsed)) {
return collapsed;
}
return `${collapsed}.`;
}
}