diff --git a/vall_e/__main__.py b/vall_e/__main__.py index b979b04..8f96d3e 100755 --- a/vall_e/__main__.py +++ b/vall_e/__main__.py @@ -22,7 +22,7 @@ def main(): parser.add_argument("--max-nar-levels", type=int, default=7) parser.add_argument("--ar-temp", type=float, default=0.0) - parser.add_argument("--nar-temp", type=float, default=0.01) + parser.add_argument("--nar-temp", type=float, default=0.0) parser.add_argument("--min-ar-temp", type=float, default=-1.0) parser.add_argument("--min-nar-temp", type=float, default=-1.0) parser.add_argument("--input-prompt-length", type=float, default=3.0) diff --git a/vall_e/inference.py b/vall_e/inference.py index aef6b02..b5817db 100755 --- a/vall_e/inference.py +++ b/vall_e/inference.py @@ -270,6 +270,9 @@ class TTS(): return text_list[0] + # validate settings here + if not references and ar_temp < 0.5: + _logger.warning(f'Audio-promptless inferencing fails with low AR temperatures.') for line in lines: if out_path is None: