43 lines
1.3 KiB
Python
43 lines
1.3 KiB
Python
import argparse
|
|
from pathlib import Path
|
|
|
|
from melo.api import TTS
|
|
|
|
|
|
def main() -> None:
|
|
parser = argparse.ArgumentParser()
|
|
parser.add_argument("--text", required=True)
|
|
parser.add_argument("--output", required=True)
|
|
parser.add_argument("--language", default="KR")
|
|
parser.add_argument("--speaker", default="KR")
|
|
parser.add_argument("--speed", type=float, default=1.0)
|
|
parser.add_argument("--sdp-ratio", type=float, default=0.2)
|
|
parser.add_argument("--noise-scale", type=float, default=0.6)
|
|
parser.add_argument("--noise-scale-w", type=float, default=0.8)
|
|
parser.add_argument("--device", default="cpu")
|
|
args = parser.parse_args()
|
|
|
|
output_path = Path(args.output)
|
|
output_path.parent.mkdir(parents=True, exist_ok=True)
|
|
|
|
model = TTS(language=args.language, device=args.device)
|
|
speaker_ids = model.hps.data.spk2id
|
|
|
|
if args.speaker not in speaker_ids:
|
|
supported = ", ".join(sorted(speaker_ids.keys()))
|
|
raise SystemExit(f"지원하지 않는 speaker 입니다: {args.speaker}. 사용 가능: {supported}")
|
|
|
|
model.tts_to_file(
|
|
args.text,
|
|
speaker_ids[args.speaker],
|
|
str(output_path),
|
|
speed=args.speed,
|
|
sdp_ratio=args.sdp_ratio,
|
|
noise_scale=args.noise_scale,
|
|
noise_scale_w=args.noise_scale_w,
|
|
)
|
|
|
|
|
|
if __name__ == "__main__":
|
|
main()
|