diff --git a/vall_e/data.py b/vall_e/data.py index 84da276..7e381d5 100755 --- a/vall_e/data.py +++ b/vall_e/data.py @@ -45,8 +45,7 @@ def get_task_symmap(): "": start + 3, "": start + 4, "": start + 5, - "": start + 6, - "": start + 7, + "": start + 6, } return symmap @@ -320,8 +319,6 @@ class Dataset(_Dataset): if task == "tts-c" and trim_length * 2 >= resps.shape[0]: task = "tts" - task = "tts" - # VALL-E continuous # ignore if target utterance is shorter than prompt duration # to-do: actually do this for the AR only as I don't think the paper trained the NAR for this