This commit is contained in:
mrq 2025-03-15 16:50:21 -05:00
parent 2053580838
commit bee2688dea
2 changed files with 4 additions and 5 deletions

View File

@ -853,10 +853,9 @@ class Dataset(_Dataset):
self.metadata = _load_dataset_metadata(self.dataset, self.dataset_type, dataset_hash_key=self.dataset_hash_key) self.metadata = _load_dataset_metadata(self.dataset, self.dataset_type, dataset_hash_key=self.dataset_hash_key)
# cull speakers with too little utterances # cull speakers with too little utterances
for speaker in self.metadata.keys(): prune_keys = [ speaker for speaker in self.metadata.keys() if len(self.metadata[speaker]) < cfg.dataset.min_utterances ]
utterances = len(self.metadata[speaker]) for speaker in prune_keys:
if utterances < cfg.dataset.min_utterances: del self.metadata[speaker]
del self.metadata[speaker]
self.paths = [] self.paths = []
self.speakers = list(self.metadata.keys()) self.speakers = list(self.metadata.keys())

View File

@ -122,7 +122,7 @@ def run_eval(engines, eval_name, dl, args=None):
return return
def process( name, batch, resps_list ): def process( name, batch, resps_list ):
for speaker, path, ref, hyp, prom, task in zip(batch["spkr_name"], batch["path"], batch["resps"], resps_list, batch["proms"], batch["task"]): for speaker, path, ref, hyp, prom, task in zip(batch["speaker_name"], batch["path"], batch["resps"], resps_list, batch["proms"], batch["task"]):
if len(hyp) == 0: if len(hyp) == 0:
continue continue