diff --git a/README.md b/README.md index 69f153e..b283908 100755 --- a/README.md +++ b/README.md @@ -129,6 +129,7 @@ Some additional flags you can pass are: ## To-Do +* reduce load time for creating / preparing dataloaders. * properly pass in `modules` names to `weight_quantization` and `activation_quantization`. * train and release a model. * extend to multiple languages (VALL-E X) and extend to SpeechX features. diff --git a/vall_e/config.py b/vall_e/config.py index 341e088..fcb3a4d 100755 --- a/vall_e/config.py +++ b/vall_e/config.py @@ -467,7 +467,7 @@ try: # cached_property stopped working... if cfg.dataset.use_hdf5: try: - cfg.hdf5 = h5py.File(f'{cfg.cfg_path}/{cfg.dataset.hdf5_name}', 'r' if cfg.distributed else 'a') + cfg.hdf5 = h5py.File(f'{cfg.cfg_path}/{cfg.dataset.hdf5_name}', 'r' if cfg.distributed else 'a') # to-do, have an easy to set flag that determines if training or creating the dataset except Exception as e: print("Error while opening HDF5 file:", f'{cfg.cfg_path}/{cfg.dataset.hdf5_name}', str(e)) cfg.dataset.use_hdf5 = False diff --git a/vall_e/data.py b/vall_e/data.py index 382c36a..4338250 100755 --- a/vall_e/data.py +++ b/vall_e/data.py @@ -466,9 +466,9 @@ def create_train_val_dataloader(): train_dataset.sample_type = cfg.dataset.sample_type #"speaker" subtrain_dataset = copy.deepcopy(train_dataset) - subtrain_dataset.head_(cfg.evaluation.size) - subtrain_dataset.interleaved_reorder_(cfg.get_spkr) - #subtrain_dataset.training_(False) + if subtrain_dataset.sample_type == "path": + subtrain_dataset.head_(cfg.evaluation.size) + subtrain_dataset.interleaved_reorder_(cfg.get_spkr) train_dl = _create_dataloader(train_dataset, training=True) val_dl = _create_dataloader(val_dataset, training=False) @@ -564,8 +564,19 @@ def create_dataset_hdf5(): hf.close() if __name__ == "__main__": - create_dataset_hdf5() + import argparse + + parser = argparse.ArgumentParser("Save trained model to path.") + parser.add_argument("--create-hdf5", action="store_true") + args = parser.parse_args() + + if args.create_hdf5: + create_dataset_hdf5() train_dl, subtrain_dl, val_dl = create_train_val_dataloader() - sample = train_dl.dataset[0] - print(sample) + print("Training DL:", next(iter(train_dl))) + print("Training DL:", next(iter(train_dl))) + print("Evaluation DL:", next(iter(subtrain_dl))) + print("Evaluation DL:", next(iter(subtrain_dl))) + print("Validation DL:", next(iter(val_dl))) + print("Validation DL:", next(iter(val_dl)))