tweaks
This commit is contained in:
parent
276a2342a4
commit
d6a679ca5c
|
@ -749,11 +749,7 @@ def _load_paths_from_metadata(group_name, type="training", validate=False):
|
||||||
metadata = {}
|
metadata = {}
|
||||||
|
|
||||||
if cfg.dataset.use_metadata and metadata_path.exists():
|
if cfg.dataset.use_metadata and metadata_path.exists():
|
||||||
#metadata = json.loads(open( metadata_path, "r", encoding="utf-8" ).read())
|
|
||||||
try:
|
|
||||||
metadata = json_read( metadata_path )
|
metadata = json_read( metadata_path )
|
||||||
except Exception as e:
|
|
||||||
return []
|
|
||||||
|
|
||||||
if len(metadata) == 0:
|
if len(metadata) == 0:
|
||||||
return _fn( data_dir, type if cfg.dataset.use_hdf5 else _get_artifact_extension(), validate )
|
return _fn( data_dir, type if cfg.dataset.use_hdf5 else _get_artifact_extension(), validate )
|
||||||
|
@ -765,8 +761,11 @@ def _load_paths_from_metadata(group_name, type="training", validate=False):
|
||||||
k = key(id, entry)
|
k = key(id, entry)
|
||||||
|
|
||||||
# double check if in HDF5
|
# double check if in HDF5
|
||||||
|
# this might be slow
|
||||||
|
"""
|
||||||
if cfg.dataset.use_hdf5 and k not in cfg.hdf5:
|
if cfg.dataset.use_hdf5 and k not in cfg.hdf5:
|
||||||
return False
|
return False
|
||||||
|
"""
|
||||||
|
|
||||||
# add to duration bucket
|
# add to duration bucket
|
||||||
if type not in _durations_map:
|
if type not in _durations_map:
|
||||||
|
@ -1201,7 +1200,16 @@ class Dataset(_Dataset):
|
||||||
index = reference_metadata["similar"][offset]
|
index = reference_metadata["similar"][offset]
|
||||||
name = metadata_keys[index]
|
name = metadata_keys[index]
|
||||||
|
|
||||||
return root / name
|
path = root / name
|
||||||
|
|
||||||
|
if cfg.dataset.use_hdf5:
|
||||||
|
key = _get_hdf5_path(path)
|
||||||
|
if key not in cfg.hdf5[key]:
|
||||||
|
return None
|
||||||
|
elif not path.exists():
|
||||||
|
return None
|
||||||
|
|
||||||
|
return path
|
||||||
|
|
||||||
def sample_prompts(self, spkr_name, reference, should_trim=True):
|
def sample_prompts(self, spkr_name, reference, should_trim=True):
|
||||||
# return no prompt if explicitly requested for who knows why
|
# return no prompt if explicitly requested for who knows why
|
||||||
|
@ -1243,6 +1251,7 @@ class Dataset(_Dataset):
|
||||||
path = random.choice(choices)
|
path = random.choice(choices)
|
||||||
else:
|
else:
|
||||||
path = random.choice(choices)
|
path = random.choice(choices)
|
||||||
|
|
||||||
if cfg.dataset.use_hdf5:
|
if cfg.dataset.use_hdf5:
|
||||||
key = _get_hdf5_path(path)
|
key = _get_hdf5_path(path)
|
||||||
qnt = torch.from_numpy(cfg.hdf5[key]["audio"][:, :]).to(torch.int16)
|
qnt = torch.from_numpy(cfg.hdf5[key]["audio"][:, :]).to(torch.int16)
|
||||||
|
|
|
@ -246,7 +246,7 @@ def process(
|
||||||
outfolder.mkdir(parents=True, exist_ok=True)
|
outfolder.mkdir(parents=True, exist_ok=True)
|
||||||
|
|
||||||
if speaker_id in audio_only:
|
if speaker_id in audio_only:
|
||||||
for filename in sorted(os.listdir(f'./{input_audio}/{group_name}/{speaker_id}/')):
|
for filename in tqdm(sorted(os.listdir(f'./{input_audio}/{group_name}/{speaker_id}/')), desc=f"Processing {speaker_id}"):
|
||||||
inpath = Path(f'./{input_audio}/{group_name}/{speaker_id}/{filename}')
|
inpath = Path(f'./{input_audio}/{group_name}/{speaker_id}/{filename}')
|
||||||
outpath = Path(f'./{output_dataset}/{group_name}/{speaker_id}/{filename}').with_suffix(audio_extension)
|
outpath = Path(f'./{output_dataset}/{group_name}/{speaker_id}/{filename}').with_suffix(audio_extension)
|
||||||
|
|
||||||
|
|
Loading…
Reference in New Issue
Block a user