actually validated and fixed sampling similar utterances for the prompt (hopefully nothing else is needed)
This commit is contained in:
parent
d31f27119a
commit
536c11c4ac
|
@ -879,8 +879,18 @@ class Dataset(_Dataset):
|
||||||
|
|
||||||
return path, text, resps
|
return path, text, resps
|
||||||
|
|
||||||
def get_similar_utterance(self, spkr_name, reference, offset=0 ):
|
# icky slop
|
||||||
metadata = json_read( cfg.metadata_dir / f"{spkr_name}.json", default={} )
|
def get_similar_utterance(self, path, offset=0 ):
|
||||||
|
reference = path.name
|
||||||
|
|
||||||
|
if cfg.dataset.use_hdf5:
|
||||||
|
root = Path( *path.parts[:-1] )
|
||||||
|
path = Path( *path.parts[2:-1] )
|
||||||
|
else:
|
||||||
|
root = Path( *path.parts[:-1] )
|
||||||
|
path = Path(*path.parts[len(cfg.data_dir.parts):-1])
|
||||||
|
|
||||||
|
metadata = json_read( cfg.metadata_dir / path.with_suffix(".json"), default={} )
|
||||||
|
|
||||||
if reference not in metadata:
|
if reference not in metadata:
|
||||||
return None
|
return None
|
||||||
|
@ -894,8 +904,10 @@ class Dataset(_Dataset):
|
||||||
offset = 0
|
offset = 0
|
||||||
|
|
||||||
metadata_keys = list(metadata.keys())
|
metadata_keys = list(metadata.keys())
|
||||||
name = metadata_keys[reference_metadata["similar"][offset]]
|
index = reference_metadata["similar"][offset]
|
||||||
return name
|
name = metadata_keys[index]
|
||||||
|
|
||||||
|
return root / name
|
||||||
|
|
||||||
def sample_prompts(self, spkr_name, reference, should_trim=True):
|
def sample_prompts(self, spkr_name, reference, should_trim=True):
|
||||||
if not cfg.dataset.prompt_duration_range or cfg.dataset.prompt_duration_range[-1] == 0:
|
if not cfg.dataset.prompt_duration_range or cfg.dataset.prompt_duration_range[-1] == 0:
|
||||||
|
@ -920,7 +932,7 @@ class Dataset(_Dataset):
|
||||||
|
|
||||||
for _ in range(cfg.dataset.max_prompts):
|
for _ in range(cfg.dataset.max_prompts):
|
||||||
if reference is not None and cfg.dataset.prom_sample_similar:
|
if reference is not None and cfg.dataset.prom_sample_similar:
|
||||||
path = self.get_similar_utterance( spkr_name=spkr_name, reference=reference, offset = len(prom_list) )
|
path = self.get_similar_utterance( reference, offset = len(prom_list) )
|
||||||
# yuck
|
# yuck
|
||||||
if not path:
|
if not path:
|
||||||
path = random.choice(choices)
|
path = random.choice(choices)
|
||||||
|
|
Loading…
Reference in New Issue
Block a user