removed the sampler as it's very misleading

2023-08-18 14:47:48 -05:00 · 2023-08-18 14:47:48 -05:00 · ced31fd9b7
commit ced31fd9b7
parent 8e7f900210
5 changed files with 46 additions and 71 deletions
--- a/vall_e/config.py
+++ b/vall_e/config.py
@ -118,6 +118,7 @@ class Dataset:
 	hdf5_name: str = "data.h5"
 	use_hdf5: bool = False
 	hdf5_flag: str = "a"
 	validate: bool = True
 	workers: int = 8
 	cache: bool = True
@ -467,7 +468,7 @@ try:
 	# cached_property stopped working...
 	if cfg.dataset.use_hdf5:
 		try:
-			cfg.hdf5 = h5py.File(f'{cfg.cfg_path}/{cfg.dataset.hdf5_name}', 'r' if cfg.distributed else 'a') # to-do, have an easy to set flag that determines if training or creating the dataset
+			cfg.hdf5 = h5py.File(f'{cfg.cfg_path}/{cfg.dataset.hdf5_name}', cfg.dataset.hdf5_flag) # to-do, have an easy to set flag that determines if training or creating the dataset
 		except Exception as e:
 			print("Error while opening HDF5 file:", f'{cfg.cfg_path}/{cfg.dataset.hdf5_name}', str(e))
 			cfg.dataset.use_hdf5 = False
--- a/vall_e/data.py
+++ b/vall_e/data.py
@ -10,7 +10,6 @@ import random
 import torch
 from .config import cfg
 from .utils.sampler import Sampler
 from collections import defaultdict
 from functools import cache, cached_property
@ -168,12 +167,6 @@ class Dataset(_Dataset):
 					self.durations[spkr_id] = duration
 				else:
 					self.durations[spkr_id] += duration
 		if training and not cfg.distributed and self.sample_type == "path":
 			self.sampler = Sampler(self.paths, [cfg.get_spkr])
 		else:
 			self.sampler = None
 	def _get_paths_by_spkr_name(self, extra_paths_by_spkr_name: dict[str, list]):
 		ret = defaultdict(list)
 		for path in self.paths:
@ -267,10 +260,7 @@ class Dataset(_Dataset):
 			spkr_id = self.spkr_symmap[spkr_name]
 			path = random.choice([*set(self.paths_by_spkr_name[spkr_name])])
 		else:
-			if self.training and self.sampler is not None:
+			path = self.paths[index]
 				path = self.sampler.sample()
 			else:
 				path = self.paths[index]
 			spkr_name = cfg.get_spkr(path)
 			spkr_id = self.spkr_symmap[spkr_name]
@ -299,12 +289,18 @@ class Dataset(_Dataset):
 			resps = self.sample_noise()
 			resps = extend_audio(resps, proms.shape[0])
 			# something to prepend a sr token to the beginning of proms
-		elif task == "tse:
+		elif task == "tse":
 			proms = self.sample_prompts(spkr_name, ignore=path) if random.random() < cfg.dataset.random_utterance else resps
 			other_speaker = self.sample_speaker(ignore=[spkr_name])
 			other_proms = self.sample_prompts(other_speaker, ignore="")
 			proms = merge_audio(proms, other_proms)
-			# something to prepend a ns token to the beginning of proms
+			# something to prepend a tse token to the beginning of proms
 		"""
 		"""
 		# speech editing would require higher quality transcription data (phoneme level/word level) unfortunately
 		# as I need to get a good clean point to trim into
 		elif task == "cse":
 		elif task == "nse":
 		"""
@ -595,9 +591,16 @@ if __name__ == "__main__":
 		create_dataset_hdf5()
 	train_dl, subtrain_dl, val_dl = create_train_val_dataloader()
-	print("Training DL:", next(iter(train_dl)))
+
-	print("Training DL:", next(iter(train_dl)))
+	samples = {
-	print("Evaluation DL:", next(iter(subtrain_dl)))
+		"training": [ next(iter(train_dl)),  next(iter(train_dl)) ],
-	print("Evaluation DL:", next(iter(subtrain_dl)))
+		"evaluation": [ next(iter(subtrain_dl)),  next(iter(subtrain_dl)) ],
-	print("Validation DL:", next(iter(val_dl)))
+		"validation": [ next(iter(val_dl)),  next(iter(val_dl)) ],
-	print("Validation DL:", next(iter(val_dl)))
+	}
 	for k, v in samples.items():
 		for i in range(len(v)):
 			del v[i]['proms']
 			del v[i]['resps']
 		print(f'{k}:', v)
--- a/vall_e/inference.py
+++ b/vall_e/inference.py
@ -35,6 +35,16 @@ class TTS():
 			cfg.load_yaml( config )
 		cfg.format()
 		"""
 		if cfg.trainer.load_state_dict:
 			for model in cfg.models.get():
 				path = cfg.ckpt_dir / model.full_name / "fp32.pth"
 				if model.name.startswith("ar"):
 					ar_ckpt = path
 				if model.name.startswith("nar"):
 					nar_ckpt = path
 		"""
 		if ar_ckpt and nar_ckpt:
 			self.ar_ckpt = ar_ckpt
@ -44,10 +54,16 @@ class TTS():
 			for name, model in models.items():
 				if name.startswith("ar"):
 					self.ar = model.to(self.device, dtype=torch.float32)
-					self.ar.load_state_dict(torch.load(self.ar_ckpt)['module'])
+					state = torch.load(self.ar_ckpt)
 					if "module" in state:
 						state = state['module']
 					self.ar.load_state_dict(state)
 				elif name.startswith("nar"):
 					self.nar = model.to(self.device, dtype=torch.float32)
-					self.nar.load_state_dict(torch.load(self.nar_ckpt)['module'])
+					state = torch.load(self.nar_ckpt)
 					if "module" in state:
 						state = state['module']
 					self.nar.load_state_dict(state)
 		else:
 			self.load_models()
--- a/vall_e/utils/sampler.py
+++ b/vall_e/utils/sampler.py
@ -1,48 +0,0 @@
 """
 A sampler that balances data by key_fns.
 MIT License
 Copyright (c) 2023 Zhe Niu
 niuzhe.nz@outlook.com
 """
 import random
 class Sampler:
 	def __init__(self, l, key_fns):
 		self.tree = self._build(l, key_fns)
 	def _build(self, l, key_fns) -> dict[dict, list]:
 		if not key_fns:
 			return l
 		tree = {}
 		key_fn, *key_fns = key_fns
 		for x in l:
 			k = key_fn(x)
 			if k in tree:
 				tree[k].append(x)
 			else:
 				tree[k] = [x]
 		for k in tree:
 			tree[k] = self._build(tree[k], key_fns)
 		return tree
 	def _sample(self, tree: dict | list):
 		if isinstance(tree, list):
 			ret = random.choice(tree)
 		else:
 			key = random.choice([*tree.keys()])
 			ret = self._sample(tree[key])
 		return ret
 	def sample(self):
 		return self._sample(self.tree)
--- a/vall_e/utils/trainer.py
+++ b/vall_e/utils/trainer.py
@ -80,7 +80,10 @@ def load_engines():
 		if cfg.trainer.load_state_dict:
 			load_path = cfg.ckpt_dir / name / "fp32.pth"
-			model.load_state_dict(torch.load(load_path)['module'])
+			state = torch.load(load_path)
 			if "module" in state:
 				state = state["module"]
 			model.load_state_dict(state)
 		engines[name] = Engine(
 			model=model,