diff --git a/vall_e/config.py b/vall_e/config.py index 9657f80..615bc99 100755 --- a/vall_e/config.py +++ b/vall_e/config.py @@ -173,7 +173,8 @@ class Dataset: reencode_device: str = "cpu" # "cpu" is slower but saves memory, cuda throws [rank0]: RuntimeError: Cannot re-initialize CUDA in forked subprocess. To use CUDA with multiprocessing, you must use the 'spawn' start method noise_scale: float = 0.25 # scaling noise value inject_noise_in_prom: bool = False # adds noise to the input prompt waveform to try and vary things - + retokenize_text: bool = False + _frames_per_second: int = 0 # allows setting your own hint @cached_property diff --git a/vall_e/data.py b/vall_e/data.py index 960e1d8..fdb553b 100755 --- a/vall_e/data.py +++ b/vall_e/data.py @@ -1005,6 +1005,9 @@ class Dataset(_Dataset): lang = metadata["language"] if "language" in metadata else None tone = metadata["tone"] if "tone" in metadata else None text_string = metadata["text"] if "text" in metadata else None + + if cfg.dataset.retokenize_text and "phonemes" in metadata: + text = torch.tensor(tokenize( metadata["phonemes"] )).to(self.text_dtype) else: resps, metadata = _load_quants(path, return_metadata=True) text = torch.tensor(tokenize( metadata["phonemes"] )).to(self.text_dtype)