vall-e/vall_e/train.py

# todo: clean this mess up

from .config import cfg
from .data import create_train_val_dataloader, get_random_prompt, tokenize
from .emb import qnt, g2p

from .utils import setup_logging, to_device, trainer, flatten_dict, do_gc
from .data import fold_inputs, unfold_outputs
from .utils.distributed import is_global_leader

import auraloss
import json
import logging
import random
import torch
import torch.nn.functional as F
import traceback
import shutil

from collections import defaultdict

from tqdm import tqdm
import argparse

_logger = logging.getLogger(__name__)

mel_stft_loss = auraloss.freq.MelSTFTLoss(cfg.sample_rate, device="cpu")

def train_feeder(engine, batch):
	with torch.autocast("cuda", dtype=cfg.trainer.dtype, enabled=cfg.trainer.amp):
		batch_size = len(batch["text"])
		engine.current_batch_size = batch_size

		engine(
			text_list=batch["text"],
			proms_list=batch["proms"],
			resps_list=batch["resps"],
			lang_list=batch["lang"],
			tone_list=batch["tone"],
			task_list=batch["task"],

			training=True,
		)

		losses = engine.gather_attribute("loss")
		stat = engine.gather_attribute("stats")

		loss = torch.stack([*losses.values()]).sum()

	if torch.isnan(loss).any():
		return

	stats = {}
	stats |= {k: v.item() for k, v in losses.items()}
	stats |= {k: v.item() for k, v in stat.items()}

	engine.tokens_processed += sum([ text.shape[0] for text in batch["text"] ])
	engine.tokens_processed += sum([ resps.shape[0] for resps in batch["resps"] ])

	return loss, stats

@torch.inference_mode()
def run_eval(engines, eval_name, dl, args=None):
	stats = defaultdict(list)
	stats['loss'] = []

	if cfg.evaluation.size == 0:
		return

	def process( name, batch, resps_list ):
		for speaker, path, ref, hyp, prom, task in zip(batch["spkr_name"], batch["path"], batch["resps"], resps_list, batch["proms"], batch["task"]):
			if len(hyp) == 0:
				continue

			filename = f'{speaker}_{path.parts[-1]}'

			if task != "tts":
				filename = f"{filename}_{task}"

			# flatten prom
			if not isinstance(prom, torch.Tensor) and prom is not None:
				prom = torch.concat([ p for p in prom if isinstance(p, torch.Tensor) ])

			# to-do, refine the output dir to be sane-er
			ref_path = (cfg.log_dir / str(engines.global_step) / "ref" / filename).with_suffix(".wav")
			hyp_path = (cfg.log_dir / str(engines.global_step) / name / eval_name / filename).with_suffix(".wav")
			prom_path = (cfg.log_dir / str(engines.global_step) / name / "prom" / filename).with_suffix(".wav")

			hyp_path.parent.mkdir(parents=True, exist_ok=True)
			ref_path.parent.mkdir(parents=True, exist_ok=True)
			prom_path.parent.mkdir(parents=True, exist_ok=True)
			
			hyp_audio, sr = qnt.decode_to_file(hyp, hyp_path)
			
			if ref is not None:
				ref_audio, sr = qnt.decode_to_file(ref, ref_path)

			if prom is not None:
				prom_audio, sr = qnt.decode_to_file(prom, prom_path)

			# naive loss calculation
			# to-do: find a better way to calculate this / a better metric
			if ref is not None:
				min_length = min( ref_audio.shape[-1], hyp_audio.shape[-1] )
				ref_audio = ref_audio[..., 0:min_length]
				hyp_audio = hyp_audio[..., 0:min_length]
				stats['loss'].append(mel_stft_loss(hyp_audio[None, :, :], ref_audio[None, :, :]).item())
	
	processed = 0
	while processed < cfg.evaluation.size:
		# directly randomly sample
		if eval_name == "subtrain":
			# sample from dataset
			# to-do: derive from current iteration
			samples = [ to_device(dl.dataset[random.randint( 0, len( dl.dataset ) )], cfg.device) for sample in range( cfg.evaluation.batch_size ) ]
			# collate manually
			batch = {k: [s[k] for s in samples] for k in samples[0]}
		else:
			batch = to_device(next(iter(dl)), cfg.device)

		# limit to eval batch size in the event we somehow have a weird dataloader
		for key in batch.keys():
			batch[key] = batch[key][:cfg.evaluation.batch_size]

		batch_size = len(batch["text"])

		# to-do: eval for text tasks
		has_stt = False
		for i, task in enumerate( batch["task"] ):
			# easier to just change it to a tts task than drop stt tasks from the batch
			if task == "stt":
				# has_stt = True
				batch["task"][i] = "tts"
				batch["proms"][i] = batch["resps"][i][:75*3, :]

		# random prompts requested
		if args and args.eval_random_text_prompts and eval_name == "subtrain":
			for i, _ in enumerate(batch["text"]):
				batch["text"][i] = get_random_prompt(tokenized=True).to(device=cfg.device)
				batch["resps"][i] = None

		processed += batch_size
		for name in engines:
			engine = engines[name]

			base_kwargs = dict(
				text_list=batch["text"],
				proms_list=batch["proms"],
				lang_list=batch["lang"],
				task_list=batch["task"],
				training=False,
			)

			if engine.hyper_config.experimental.hf:
				resps_list = engine( **base_kwargs )
			elif "len" in engine.hyper_config.capabilities:
				kwargs = base_kwargs | cfg.evaluation.kwargs
				max_steps = kwargs.pop("max_steps", 500)

				if "denoise_start" in kwargs:
					len_list = [ resp.shape[0] for resp in batch["resps"] ]
					kwargs["resps_list"] = [ resp[:, :1] for resp in batch["resps"] ]
				else:
					len_list = engine( max_steps=5, **kwargs )
					len_list = [ min( l, max_steps ) for l in len_list ]
				
				kwargs = base_kwargs | cfg.evaluation.kwargs
				resps_list = engine( **kwargs, len_list=len_list )
			else:
				if "ar" in engine.hyper_config.capabilities:
					kwargs = base_kwargs | cfg.evaluation.kwargs
					resps_list = engine( **kwargs )
				else:
					resps_list = [ resp[:, 0] for resp in batch["resps"] ]

				if "nar" in engine.hyper_config.capabilities:
					kwargs = base_kwargs | cfg.evaluation.kwargs
					resps_list = engine( **kwargs, resps_list=resps_list )

			process( name, batch, resps_list )

			# evaluate why it's so slow
			if has_stt:
				max_steps = max( [ text.shape[0] for text in batch["text"] ] )

				kwargs["text_list"] = None
				kwargs["task_list"] = [ "stt" for _ in range(batch_size) ]
				kwargs["proms_list"] = [ ["stt"] for _ in range(batch_size) ]
				kwargs["resps_list"] = batch["resps"]

				text_list = engine( **kwargs, max_steps=max_steps, sampling_temperature=0.0)
				text_list = [ cfg.tokenizer.decode( text ) for i, text in enumerate( text_list ) ]

				_logger.info(f"Validation Metrics (STT): {text_list}")

	stats = {k: sum(v) / len(v) for k, v in stats.items() if v}
	engines_stats = {
		f'{name}.{eval_name}': stats,
		"it": engines.global_step,
	}
	#engines_stats['epoch'] = iteration * cfg.hyperparameters.gradient_accumulation_steps / len(dl)

	_logger.info(f"Validation Metrics: {json.dumps(engines_stats)}.")


def train():
	parser = argparse.ArgumentParser("VALL-E TTS")
	parser.add_argument("--eval", action="store_true", default=None)
	parser.add_argument("--eval-random-text-prompts", action="store_true", default=None)
	#parser.add_argument("--eval-random-audio-prompts", action="store_true", default=None)
	args, unknown = parser.parse_known_args()

	# create log folder
	setup_logging(cfg.log_dir)
	# copy config yaml to backup
	if cfg.yaml_path is not None and is_global_leader():
		shutil.copy( cfg.yaml_path, cfg.log_dir / "config.yaml" )
	# create dataloaders
	train_dl, val_dl = create_train_val_dataloader()
	# evaluation lambda
	def eval_fn(engines):
		do_gc()
		engines.eval()
		# wrapped in a try block because it's sometimes prone to breaking
		try:
			run_eval(engines, "subtrain", train_dl, args)
			run_eval(engines, "val", val_dl, args)
		except Exception as e:
			_logger.warning(f"Error occurred while performing eval: {str(e)}")
			_logger.warning(traceback.format_exc())

		engines.train()
		qnt.unload_model()
		do_gc()
	# unload EnCodec if it's already loaded
	qnt.unload_model()
	# only eval is requested
	if args.eval:
		return eval_fn(engines=trainer.load_engines())

	"""
	# start web UI
	if cfg.trainer.load_webui:
		from .webui import start
		start(lock=False)
	"""
	# pre-training config validation
	if cfg.model.experimental.layerskip and cfg.trainer.weight_dtype == "float16":
		_logger.warning(f"Training with LayerSkip enabled with float16 may result in frying the model if the loss scale gets too small (<=8K) or with too large of a de facto batch size (>512 samples).")

	# train
	trainer.train(
		train_dl=train_dl,
		train_feeder=train_feeder,
		eval_fn=eval_fn,
	)

if __name__ == "__main__":
	# to-do: for DDP, spawn multiprocess instead of requiring `torchrun --nnodes=1 --nproc-per-node=4 -m vall_e.train yaml="./data/config.yaml"`
	train()
Rewrite init 2023-08-02 21:53:35 +00:00			`# todo: clean this mess up`

			`from .config import cfg`
added --eval-random-text-prompts to use random text prompts for eval pass, added --random-prompts for demo page and --lora to use a sample with the lora disabled, probably finally fixed validation dataloader breaking on eval 2024-10-10 18:40:25 +00:00			`from .data import create_train_val_dataloader, get_random_prompt, tokenize`
			`from .emb import qnt, g2p`
Rewrite init 2023-08-02 21:53:35 +00:00
			`from .utils import setup_logging, to_device, trainer, flatten_dict, do_gc`
feverish cleanup 2024-06-04 02:28:49 +00:00			`from .data import fold_inputs, unfold_outputs`
ugh 2024-06-09 16:39:43 +00:00			`from .utils.distributed import is_global_leader`
Rewrite init 2023-08-02 21:53:35 +00:00
			`import auraloss`
			`import json`
			`import logging`
			`import random`
			`import torch`
			`import torch.nn.functional as F`
			`import traceback`
sanity cleanup, backup config yaml for each log file 2024-06-09 16:22:52 +00:00			`import shutil`
Rewrite init 2023-08-02 21:53:35 +00:00
			`from collections import defaultdict`

			`from tqdm import tqdm`
added kludgy eval only so I don't have to start training, type eval, stop training, then delete the logs for that session 2024-05-25 22:39:51 +00:00			`import argparse`
Rewrite init 2023-08-02 21:53:35 +00:00
			`_logger = logging.getLogger(__name__)`
big cleanup 2023-08-04 01:26:36 +00:00
added kludgy eval only so I don't have to start training, type eval, stop training, then delete the logs for that session 2024-05-25 22:39:51 +00:00			`mel_stft_loss = auraloss.freq.MelSTFTLoss(cfg.sample_rate, device="cpu")`

big cleanup 2023-08-04 01:26:36 +00:00			`def train_feeder(engine, batch):`
shuffled VALL-E continuous as a task tts-c instead, logic fixes for it 2023-09-02 17:23:40 +00:00			`with torch.autocast("cuda", dtype=cfg.trainer.dtype, enabled=cfg.trainer.amp):`
local training backend should be a bit more aware of variable batch sizes, maybe 2024-06-29 03:39:05 +00:00			`batch_size = len(batch["text"])`
			`engine.current_batch_size = batch_size`
limit eval size because the training batch size seems to be used for the eval dataloader, somehow (bandaid) 2024-06-29 14:11:28 +00:00
sanity cleanups with weird off-by-one-ness, cleaned up and validated vall_e.models.experimental works again 2024-07-27 20:36:05 +00:00			`engine(`
			`text_list=batch["text"],`
			`proms_list=batch["proms"],`
			`resps_list=batch["resps"],`
			`lang_list=batch["lang"],`
			`tone_list=batch["tone"],`
			`task_list=batch["task"],`

			`training=True,`
			`)`
big cleanup 2023-08-04 01:26:36 +00:00
somewhat got recurrent forward working (it's as accurate as chunkwise forward: it's not accurate at all), added option to use AMP instead of blanket setting the weight's dtype 2023-09-02 01:58:29 +00:00			`losses = engine.gather_attribute("loss")`
			`stat = engine.gather_attribute("stats")`
big cleanup 2023-08-04 01:26:36 +00:00
somewhat got recurrent forward working (it's as accurate as chunkwise forward: it's not accurate at all), added option to use AMP instead of blanket setting the weight's dtype 2023-09-02 01:58:29 +00:00			`loss = torch.stack([*losses.values()]).sum()`
big cleanup 2023-08-04 01:26:36 +00:00
ughh 2024-11-02 03:36:48 +00:00			`if torch.isnan(loss).any():`
			`return`

big cleanup 2023-08-04 01:26:36 +00:00			`stats = {}`
			`stats \|= {k: v.item() for k, v in losses.items()}`
fixed that mysterious discepancy between the reported losses (I am so freaking mad, my piss is boiling, I had to interrupt halfway through an epoch) 2023-08-05 20:25:41 +00:00			`stats \|= {k: v.item() for k, v in stat.items()}`
big cleanup 2023-08-04 01:26:36 +00:00
added total samples processed and tokens processed (len of text tokens + len of target response tokens) 2023-08-28 16:02:45 +00:00			`engine.tokens_processed += sum([ text.shape[0] for text in batch["text"] ])`
			`engine.tokens_processed += sum([ resps.shape[0] for resps in batch["resps"] ])`

big cleanup 2023-08-04 01:26:36 +00:00			`return loss, stats`

			`@torch.inference_mode()`
added --eval-random-text-prompts to use random text prompts for eval pass, added --random-prompts for demo page and --lora to use a sample with the lora disabled, probably finally fixed validation dataloader breaking on eval 2024-10-10 18:40:25 +00:00			`def run_eval(engines, eval_name, dl, args=None):`
big cleanup 2023-08-04 01:26:36 +00:00			`stats = defaultdict(list)`
			`stats['loss'] = []`

added --eval-random-text-prompts to use random text prompts for eval pass, added --random-prompts for demo page and --lora to use a sample with the lora disabled, probably finally fixed validation dataloader breaking on eval 2024-10-10 18:40:25 +00:00			`if cfg.evaluation.size == 0:`
			`return`

big cleanup 2023-08-04 01:26:36 +00:00			`def process( name, batch, resps_list ):`
literally had a urethra moment before going to bed with a way to implement cse/nse tasks 2023-08-19 06:16:46 +00:00			`for speaker, path, ref, hyp, prom, task in zip(batch["spkr_name"], batch["path"], batch["resps"], resps_list, batch["proms"], batch["task"]):`
big cleanup 2023-08-04 01:26:36 +00:00			`if len(hyp) == 0:`
			`continue`

			`filename = f'{speaker}_{path.parts[-1]}'`

literally had a urethra moment before going to bed with a way to implement cse/nse tasks 2023-08-19 06:16:46 +00:00			`if task != "tts":`
			`filename = f"{filename}_{task}"`

eval/validation fix for SpeechX tasks 2024-07-19 14:16:37 +00:00			`# flatten prom`
added prom-less training / inferencing, some other things 2024-07-23 00:36:07 +00:00			`if not isinstance(prom, torch.Tensor) and prom is not None:`
eval/validation fix for SpeechX tasks 2024-07-19 14:16:37 +00:00			`prom = torch.concat([ p for p in prom if isinstance(p, torch.Tensor) ])`

big cleanup 2023-08-04 01:26:36 +00:00			`# to-do, refine the output dir to be sane-er`
			`ref_path = (cfg.log_dir / str(engines.global_step) / "ref" / filename).with_suffix(".wav")`
			`hyp_path = (cfg.log_dir / str(engines.global_step) / name / eval_name / filename).with_suffix(".wav")`
			`prom_path = (cfg.log_dir / str(engines.global_step) / name / "prom" / filename).with_suffix(".wav")`

			`hyp_path.parent.mkdir(parents=True, exist_ok=True)`
			`ref_path.parent.mkdir(parents=True, exist_ok=True)`
			`prom_path.parent.mkdir(parents=True, exist_ok=True)`

			`hyp_audio, sr = qnt.decode_to_file(hyp, hyp_path)`
added --eval-random-text-prompts to use random text prompts for eval pass, added --random-prompts for demo page and --lora to use a sample with the lora disabled, probably finally fixed validation dataloader breaking on eval 2024-10-10 18:40:25 +00:00
			`if ref is not None:`
			`ref_audio, sr = qnt.decode_to_file(ref, ref_path)`

added prom-less training / inferencing, some other things 2024-07-23 00:36:07 +00:00			`if prom is not None:`
			`prom_audio, sr = qnt.decode_to_file(prom, prom_path)`
big cleanup 2023-08-04 01:26:36 +00:00
added --eval-random-text-prompts to use random text prompts for eval pass, added --random-prompts for demo page and --lora to use a sample with the lora disabled, probably finally fixed validation dataloader breaking on eval 2024-10-10 18:40:25 +00:00			`# naive loss calculation`
			`# to-do: find a better way to calculate this / a better metric`
			`if ref is not None:`
			`min_length = min( ref_audio.shape[-1], hyp_audio.shape[-1] )`
			`ref_audio = ref_audio[..., 0:min_length]`
			`hyp_audio = hyp_audio[..., 0:min_length]`
			`stats['loss'].append(mel_stft_loss(hyp_audio[None, :, :], ref_audio[None, :, :]).item())`
big cleanup 2023-08-04 01:26:36 +00:00
maybe fix evaluation dataset not being capped to cfg.evaluation.size 2023-08-17 23:56:37 +00:00			`processed = 0`
repaired auraloss loss calc during eval/val 2023-08-19 02:19:47 +00:00			`while processed < cfg.evaluation.size:`
dropped subtrain dataloader since its useless to duplicate 2024-11-11 23:00:49 +00:00			`# directly randomly sample`
			`if eval_name == "subtrain":`
			`# sample from dataset`
			`# to-do: derive from current iteration`
			`samples = [ to_device(dl.dataset[random.randint( 0, len( dl.dataset ) )], cfg.device) for sample in range( cfg.evaluation.batch_size ) ]`
			`# collate manually`
			`batch = {k: [s[k] for s in samples] for k in samples[0]}`
			`else:`
			`batch = to_device(next(iter(dl)), cfg.device)`
limit eval size because the training batch size seems to be used for the eval dataloader, somehow (bandaid) 2024-06-29 14:11:28 +00:00
			`# limit to eval batch size in the event we somehow have a weird dataloader`
			`for key in batch.keys():`
			`batch[key] = batch[key][:cfg.evaluation.batch_size]`

more fixes, moved sampler state dict to a better place, eval works again 2024-09-06 21:59:56 +00:00			`batch_size = len(batch["text"])`

added --eval-random-text-prompts to use random text prompts for eval pass, added --random-prompts for demo page and --lora to use a sample with the lora disabled, probably finally fixed validation dataloader breaking on eval 2024-10-10 18:40:25 +00:00			`# to-do: eval for text tasks`
			`has_stt = False`
			`for i, task in enumerate( batch["task"] ):`
			`# easier to just change it to a tts task than drop stt tasks from the batch`
			`if task == "stt":`
			`# has_stt = True`
			`batch["task"][i] = "tts"`
			`batch["proms"][i] = batch["resps"][i][:75*3, :]`

			`# random prompts requested`
			`if args and args.eval_random_text_prompts and eval_name == "subtrain":`
			`for i, _ in enumerate(batch["text"]):`
			`batch["text"][i] = get_random_prompt(tokenized=True).to(device=cfg.device)`
			`batch["resps"][i] = None`
big cleanup 2023-08-04 01:26:36 +00:00
added --eval-random-text-prompts to use random text prompts for eval pass, added --random-prompts for demo page and --lora to use a sample with the lora disabled, probably finally fixed validation dataloader breaking on eval 2024-10-10 18:40:25 +00:00			`processed += batch_size`
feverish cleanup 2024-06-04 02:28:49 +00:00			`for name in engines:`
			`engine = engines[name]`

actually have beam_width in the webUI work 2024-10-23 03:06:22 +00:00			`base_kwargs = dict(`
cleanup for AR_NAR inferencing to allow both TTS and STT tasks simultaneously (need to have training eval do this to though) 2024-09-06 19:30:12 +00:00			`text_list=batch["text"],`
more fixes, moved sampler state dict to a better place, eval works again 2024-09-06 21:59:56 +00:00			`proms_list=batch["proms"],`
cleanup for AR_NAR inferencing to allow both TTS and STT tasks simultaneously (need to have training eval do this to though) 2024-09-06 19:30:12 +00:00			`lang_list=batch["lang"],`
			`task_list=batch["task"],`
lol 2024-11-10 18:48:41 +00:00			`training=False,`
cleanup for AR_NAR inferencing to allow both TTS and STT tasks simultaneously (need to have training eval do this to though) 2024-09-06 19:30:12 +00:00			`)`

sanity cleanup: moved experimental features under its own thing 2024-06-30 15:37:33 +00:00			`if engine.hyper_config.experimental.hf:`
actually have beam_width in the webUI work 2024-10-23 03:06:22 +00:00			`resps_list = engine( **base_kwargs )`
sanity cleanups with weird off-by-one-ness, cleaned up and validated vall_e.models.experimental works again 2024-07-27 20:36:05 +00:00			`elif "len" in engine.hyper_config.capabilities:`
tweaks and changes 2024-11-16 21:49:06 +00:00			`kwargs = base_kwargs \| cfg.evaluation.kwargs`
eval fix for nar-len 2024-11-07 05:14:16 +00:00			`max_steps = kwargs.pop("max_steps", 500)`
all I can do now until I wait for the model to (re)train for pure NAR 2024-11-10 04:57:34 +00:00
tweaks and changes 2024-11-16 21:49:06 +00:00			`if "denoise_start" in kwargs:`
all I can do now until I wait for the model to (re)train for pure NAR 2024-11-10 04:57:34 +00:00			`len_list = [ resp.shape[0] for resp in batch["resps"] ]`
			`kwargs["resps_list"] = [ resp[:, :1] for resp in batch["resps"] ]`
fixes 2024-11-11 02:37:50 +00:00			`else:`
			`len_list = engine( max_steps=5, **kwargs )`
			`len_list = [ min( l, max_steps ) for l in len_list ]`
actually have beam_width in the webUI work 2024-10-23 03:06:22 +00:00
tweaks and changes 2024-11-16 21:49:06 +00:00			`kwargs = base_kwargs \| cfg.evaluation.kwargs`
actually have beam_width in the webUI work 2024-10-23 03:06:22 +00:00			`resps_list = engine( **kwargs, len_list=len_list )`
feverish cleanup 2024-06-04 02:28:49 +00:00			`else:`
sanity cleanups with weird off-by-one-ness, cleaned up and validated vall_e.models.experimental works again 2024-07-27 20:36:05 +00:00			`if "ar" in engine.hyper_config.capabilities:`
default set cfg strength to 3.0 since the reference model is updated 2024-11-17 16:23:40 +00:00			`kwargs = base_kwargs \| cfg.evaluation.kwargs`
actually have beam_width in the webUI work 2024-10-23 03:06:22 +00:00			`resps_list = engine( **kwargs )`
oops 2024-06-05 15:30:04 +00:00			`else:`
sanity cleanups with weird off-by-one-ness, cleaned up and validated vall_e.models.experimental works again 2024-07-27 20:36:05 +00:00			`resps_list = [ resp[:, 0] for resp in batch["resps"] ]`
oops 2024-06-05 15:30:04 +00:00
sanity cleanups with weird off-by-one-ness, cleaned up and validated vall_e.models.experimental works again 2024-07-27 20:36:05 +00:00			`if "nar" in engine.hyper_config.capabilities:`
tweaks and changes 2024-11-16 21:49:06 +00:00			`kwargs = base_kwargs \| cfg.evaluation.kwargs`
actually have beam_width in the webUI work 2024-10-23 03:06:22 +00:00			`resps_list = engine( **kwargs, resps_list=resps_list )`
big cleanup 2023-08-04 01:26:36 +00:00
			`process( name, batch, resps_list )`

more fixes, moved sampler state dict to a better place, eval works again 2024-09-06 21:59:56 +00:00			`# evaluate why it's so slow`
			`if has_stt:`
			`max_steps = max( [ text.shape[0] for text in batch["text"] ] )`

			`kwargs["text_list"] = None`
			`kwargs["task_list"] = [ "stt" for _ in range(batch_size) ]`
			`kwargs["proms_list"] = [ ["stt"] for _ in range(batch_size) ]`
			`kwargs["resps_list"] = batch["resps"]`

			`text_list = engine( **kwargs, max_steps=max_steps, sampling_temperature=0.0)`
			`text_list = [ cfg.tokenizer.decode( text ) for i, text in enumerate( text_list ) ]`

			`_logger.info(f"Validation Metrics (STT): {text_list}")`

added --eval-random-text-prompts to use random text prompts for eval pass, added --random-prompts for demo page and --lora to use a sample with the lora disabled, probably finally fixed validation dataloader breaking on eval 2024-10-10 18:40:25 +00:00			`stats = {k: sum(v) / len(v) for k, v in stats.items() if v}`
added picking final candidate based on sum of score instead of first candidate (this changes nothing). 2023-09-13 18:19:11 +00:00			`engines_stats = {`
			`f'{name}.{eval_name}': stats,`
			`"it": engines.global_step,`
			`}`
preparing for SpeechX extensions 2023-08-19 01:58:07 +00:00			`#engines_stats['epoch'] = iteration * cfg.hyperparameters.gradient_accumulation_steps / len(dl)`
big cleanup 2023-08-04 01:26:36 +00:00
moved prints to use logger, edited readme (fused_attn doesnt seem stable for training) 2024-08-29 18:27:16 +00:00			`_logger.info(f"Validation Metrics: {json.dumps(engines_stats)}.")`
big cleanup 2023-08-04 01:26:36 +00:00
Rewrite init 2023-08-02 21:53:35 +00:00
fixed issue with training from scratch (oops) 2023-10-21 14:55:38 +00:00			`def train():`
added kludgy eval only so I don't have to start training, type eval, stop training, then delete the logs for that session 2024-05-25 22:39:51 +00:00			`parser = argparse.ArgumentParser("VALL-E TTS")`
added a flag to convert to a HF compatible model on export by stitching things 2024-06-04 03:34:47 +00:00			`parser.add_argument("--eval", action="store_true", default=None)`
added --eval-random-text-prompts to use random text prompts for eval pass, added --random-prompts for demo page and --lora to use a sample with the lora disabled, probably finally fixed validation dataloader breaking on eval 2024-10-10 18:40:25 +00:00			`parser.add_argument("--eval-random-text-prompts", action="store_true", default=None)`
			`#parser.add_argument("--eval-random-audio-prompts", action="store_true", default=None)`
oops 2024-06-04 03:35:55 +00:00			`args, unknown = parser.parse_known_args()`
added kludgy eval only so I don't have to start training, type eval, stop training, then delete the logs for that session 2024-05-25 22:39:51 +00:00
sanity cleanup, backup config yaml for each log file 2024-06-09 16:22:52 +00:00			`# create log folder`
Fixed an issue with having fairseq installed at all will brick logging 2023-08-03 03:57:10 +00:00			`setup_logging(cfg.log_dir)`
sanity cleanup, backup config yaml for each log file 2024-06-09 16:22:52 +00:00			`# copy config yaml to backup`
ugh 2024-06-09 16:39:43 +00:00			`if cfg.yaml_path is not None and is_global_leader():`
sanity cleanup, backup config yaml for each log file 2024-06-09 16:22:52 +00:00			`shutil.copy( cfg.yaml_path, cfg.log_dir / "config.yaml" )`
actually float16(+AMP) and layerskip is bad and will kill the model...... 2024-11-01 23:36:44 +00:00			`# create dataloaders`
dropped subtrain dataloader since its useless to duplicate 2024-11-11 23:00:49 +00:00			`train_dl, val_dl = create_train_val_dataloader()`
actually float16(+AMP) and layerskip is bad and will kill the model...... 2024-11-01 23:36:44 +00:00			`# evaluation lambda`
Rewrite init 2023-08-02 21:53:35 +00:00			`def eval_fn(engines):`
some cleanup 2024-05-25 22:46:52 +00:00			`do_gc()`
			`engines.eval()`
			`# wrapped in a try block because it's sometimes prone to breaking`
Rewrite init 2023-08-02 21:53:35 +00:00			`try:`
dropped subtrain dataloader since its useless to duplicate 2024-11-11 23:00:49 +00:00			`run_eval(engines, "subtrain", train_dl, args)`
added --eval-random-text-prompts to use random text prompts for eval pass, added --random-prompts for demo page and --lora to use a sample with the lora disabled, probably finally fixed validation dataloader breaking on eval 2024-10-10 18:40:25 +00:00			`run_eval(engines, "val", val_dl, args)`
Rewrite init 2023-08-02 21:53:35 +00:00			`except Exception as e:`
moved prints to use logger, edited readme (fused_attn doesnt seem stable for training) 2024-08-29 18:27:16 +00:00			`_logger.warning(f"Error occurred while performing eval: {str(e)}")`
			`_logger.warning(traceback.format_exc())`
Rewrite init 2023-08-02 21:53:35 +00:00
some cleanup 2024-05-25 22:46:52 +00:00			`engines.train()`
Rewrite init 2023-08-02 21:53:35 +00:00			`qnt.unload_model()`
			`do_gc()`
actually float16(+AMP) and layerskip is bad and will kill the model...... 2024-11-01 23:36:44 +00:00			`# unload EnCodec if it's already loaded`
Rewrite init 2023-08-02 21:53:35 +00:00			`qnt.unload_model()`
actually float16(+AMP) and layerskip is bad and will kill the model...... 2024-11-01 23:36:44 +00:00			`# only eval is requested`
added kludgy eval only so I don't have to start training, type eval, stop training, then delete the logs for that session 2024-05-25 22:39:51 +00:00			`if args.eval:`
			`return eval_fn(engines=trainer.load_engines())`

fixed issue with training from scratch (oops) 2023-10-21 14:55:38 +00:00			`"""`
actually float16(+AMP) and layerskip is bad and will kill the model...... 2024-11-01 23:36:44 +00:00			`# start web UI`
fixed issue with training from scratch (oops) 2023-10-21 14:55:38 +00:00			`if cfg.trainer.load_webui:`
			`from .webui import start`
			`start(lock=False)`
			`"""`
actually float16(+AMP) and layerskip is bad and will kill the model...... 2024-11-01 23:36:44 +00:00			`# pre-training config validation`
			`if cfg.model.experimental.layerskip and cfg.trainer.weight_dtype == "float16":`
changed layerskip float16 training warning (since it didnt seem to fry on my 4xV100 system) 2024-11-03 15:58:29 +00:00			`_logger.warning(f"Training with LayerSkip enabled with float16 may result in frying the model if the loss scale gets too small (<=8K) or with too large of a de facto batch size (>512 samples).")`
fixed issue with training from scratch (oops) 2023-10-21 14:55:38 +00:00
actually float16(+AMP) and layerskip is bad and will kill the model...... 2024-11-01 23:36:44 +00:00			`# train`
Rewrite init 2023-08-02 21:53:35 +00:00			`trainer.train(`
			`train_dl=train_dl,`
			`train_feeder=train_feeder,`
			`eval_fn=eval_fn,`
			`)`

			`if __name__ == "__main__":`
simple DDP wrapper (for my NVlink test) 2024-05-04 16:48:26 +00:00			# to-do: for DDP, spawn multiprocess instead of requiring `torchrun --nnodes=1 --nproc-per-node=4 -m vall_e.train yaml="./data/config.yaml"`
fixed issue with training from scratch (oops) 2023-10-21 14:55:38 +00:00			`train()`