From 3195026dbaae35a559cd71ac7eb586bd63890755 Mon Sep 17 00:00:00 2001 From: mrq Date: Wed, 18 Oct 2023 20:38:33 -0500 Subject: [PATCH] fixed issue with the 'add another target audio to artificially create longer sequences' for HDF5 just duplicating the utterance initially sampled --- vall_e/config.py | 2 +- vall_e/data.py | 17 ++++------------- 2 files changed, 5 insertions(+), 14 deletions(-) diff --git a/vall_e/config.py b/vall_e/config.py index dcd4c9c..7bce83d 100755 --- a/vall_e/config.py +++ b/vall_e/config.py @@ -134,7 +134,7 @@ class Dataset: phones_range: list[int] = field(default_factory=lambda: [4, 256]) duration_range: list[float] = field(default_factory=lambda: [1.0, 12.0]) - min_utterances: int = 0 + min_utterances: int = 2 random_utterance: float = 1.0 max_prompts: int = 3 diff --git a/vall_e/data.py b/vall_e/data.py index 88dfd85..d400b3d 100755 --- a/vall_e/data.py +++ b/vall_e/data.py @@ -214,15 +214,13 @@ class Dataset(_Dataset): spkr = cfg.get_spkr( data_dir / "dummy" ) spkr_group = cfg.get_spkr_group( data_dir / "dummy" ) - if len(self.paths_by_spkr_name[spkr]) < cfg.dataset.min_utterances: - continue - if spkr_group not in self.spkrs_by_spkr_group: self.spkrs_by_spkr_group[spkr_group] = [] self.spkrs_by_spkr_group[spkr_group].append( spkr ) self.spkr_groups = list(self.spkrs_by_spkr_group.keys()) + self.spkr_samplers = { name: Sampler( [*set(speakers)], keep_all=True ) for name, speakers in self.spkrs_by_spkr_group.items() } if cfg.dataset.sample_type == "path": @@ -385,15 +383,8 @@ class Dataset(_Dataset): spkr_group = self.spkr_groups[index] spkr_group_id = self.spkr_group_symmap[spkr_group] spkr_name = self.spkr_samplers[spkr_group].sample() - if spkr_name in self.spkr_symmap: - spkr_id = self.spkr_symmap[spkr_name] - else: - spkr_id = -1 - try: - path = self.samplers[spkr_name].sample() - except Exception as e: - print( "ERROR", spkr_group, spkr_name ) - raise e + spkr_id = self.spkr_symmap[spkr_name] + path = self.samplers[spkr_name].sample() elif cfg.dataset.sample_type == "speaker": spkr_name = self.spkrs[index] spkr_id = self.spkr_symmap[spkr_name] @@ -432,7 +423,7 @@ class Dataset(_Dataset): sampled_path = random.choice(choices) choices = [*(set(choices) - {sampled_path})] if cfg.dataset.use_hdf5: - key = _get_hdf5_path(path) + key = _get_hdf5_path(sampled_path) txt = cfg.hdf5[key]["text"][:] qnt = cfg.hdf5[key]["audio"][:, :]