Files
realtime_voice_bot/docker/melotts/melo_tts_cli.py

43 lines
1.3 KiB
Python

import argparse
from pathlib import Path
from melo.api import TTS
def main() -> None:
parser = argparse.ArgumentParser()
parser.add_argument("--text", required=True)
parser.add_argument("--output", required=True)
parser.add_argument("--language", default="KR")
parser.add_argument("--speaker", default="KR")
parser.add_argument("--speed", type=float, default=1.0)
parser.add_argument("--sdp-ratio", type=float, default=0.2)
parser.add_argument("--noise-scale", type=float, default=0.6)
parser.add_argument("--noise-scale-w", type=float, default=0.8)
parser.add_argument("--device", default="cpu")
args = parser.parse_args()
output_path = Path(args.output)
output_path.parent.mkdir(parents=True, exist_ok=True)
model = TTS(language=args.language, device=args.device)
speaker_ids = model.hps.data.spk2id
if args.speaker not in speaker_ids:
supported = ", ".join(sorted(speaker_ids.keys()))
raise SystemExit(f"지원하지 않는 speaker 입니다: {args.speaker}. 사용 가능: {supported}")
model.tts_to_file(
args.text,
speaker_ids[args.speaker],
str(output_path),
speed=args.speed,
sdp_ratio=args.sdp_ratio,
noise_scale=args.noise_scale,
noise_scale_w=args.noise_scale_w,
)
if __name__ == "__main__":
main()