added option to retokenize phonemes for hdf5 (to save having to remake my hdf5 file)

2024-09-21 13:08:01 -05:00 · 2024-09-21 13:08:01 -05:00 · c5e9142863
commit c5e9142863
parent 536c11c4ac
2 changed files with 5 additions and 1 deletions
--- a/vall_e/config.py
+++ b/vall_e/config.py
@ -173,7 +173,8 @@ class Dataset:
 	reencode_device: str = "cpu" # "cpu" is slower but saves memory, cuda throws [rank0]: RuntimeError: Cannot re-initialize CUDA in forked subprocess. To use CUDA with multiprocessing, you must use the 'spawn' start method
 	noise_scale: float = 0.25 # scaling noise value
 	inject_noise_in_prom: bool = False # adds noise to the input prompt waveform to try and vary things
-	
+	retokenize_text: bool = False
+
 	_frames_per_second: int = 0 # allows setting your own hint

 	@cached_property
--- a/vall_e/data.py
+++ b/vall_e/data.py
@ -1005,6 +1005,9 @@ class Dataset(_Dataset):
 			lang = metadata["language"] if "language" in metadata else None
 			tone = metadata["tone"] if "tone" in metadata else None
 			text_string = metadata["text"] if "text" in metadata else None
+
+			if cfg.dataset.retokenize_text and "phonemes" in metadata:
+				text = torch.tensor(tokenize( metadata["phonemes"] )).to(self.text_dtype)
 		else:
 			resps, metadata = _load_quants(path, return_metadata=True)
 			text = torch.tensor(tokenize( metadata["phonemes"] )).to(self.text_dtype)