From b4405c98ea16920f5f514a7981d0ee6ae25fc02e Mon Sep 17 00:00:00 2001 From: mrq Date: Tue, 10 Oct 2023 19:18:24 -0500 Subject: [PATCH] remove double spaces in the text phonemes (might have caused problems.........) --- vall_e/data.py | 14 ++++++++++++-- 1 file changed, 12 insertions(+), 2 deletions(-) diff --git a/vall_e/data.py b/vall_e/data.py index 5c1c6ee..08244c9 100755 --- a/vall_e/data.py +++ b/vall_e/data.py @@ -143,9 +143,14 @@ def _get_paths_of_extensions( path, extensions=".qnt.pt", validate=False ): def _load_quants(path) -> Tensor: return torch.load(_get_quant_path(path))[0][:, :].t().to(torch.int16) +# prune consecutive spaces +def _cleanup_phones( phones, targets=[" "]): + return [ p for i, p in enumerate(phones) if p not in targets or ( p in targets and p != phones[i-1] ) ] + @cache def _get_phones(path, language="en"): content = open(_get_phone_path(path), "r", encoding="utf-8").read().split(" ") + content = _cleanup_phones( content ) return [""] + [ " " if not p else p for p in content ] + [""] def _interleaved_reorder(l, fn): @@ -333,8 +338,13 @@ class Dataset(_Dataset): if cfg.dataset.use_hdf5: key = _get_hdf5_path(path) - text = torch.from_numpy(cfg.hdf5[key]["text"][:]).to(self.text_dtype) - resps = torch.from_numpy(cfg.hdf5[key]["audio"][:, :]).to(torch.int16) + text = cfg.hdf5[key]["text"][:] + resps = cfg.hdf5[key]["audio"][:, :] + + text = np.array( _cleanup_phones( text, targets=[ self.phone_symmap[" "] ] ) ) + + text = torch.from_numpy(text).to(self.text_dtype) + resps = torch.from_numpy(resps).to(torch.int16) else: text = torch.tensor([*map(self.phone_symmap.get, _get_phones(path))]).to(self.text_dtype) resps = _load_quants(path)