vall-e/vall_e/inference.py

import torch
import torchaudio
import soundfile
import time

from torch import Tensor
from einops import rearrange
from pathlib import Path

from .emb import g2p, qnt
from .emb.qnt import trim, trim_random, unload_model
from .utils import to_device, set_seed, wrapper as ml

from .config import cfg, Config
from .models import get_models
from .engines import load_engines, deepspeed_available
from .data import get_phone_symmap, get_lang_symmap, _load_quants, _cleanup_phones, tokenize

if deepspeed_available:
	import deepspeed

class TTS():
	def __init__( self, config=None, device=None, amp=None, dtype=None ):
		self.loading = True 

		self.load_config( config=config, device=device, amp=amp, dtype=dtype )	
		self.load_model()

		self.loading = False 

	def load_config( self, config=None, device=None, amp=None, dtype=None ):
		if config:
			print("Loading YAML:", config)
			cfg.load_yaml( config )

		try:
			cfg.format( training=False )
			cfg.dataset.use_hdf5 = False # could use cfg.load_hdf5(), but why would it ever need to be loaded for inferencing
		except Exception as e:
			print("Error while parsing config YAML:")
			raise e # throw an error because I'm tired of silent errors messing things up for me

		if amp is None:
			amp = cfg.inference.amp
		if dtype is None or dtype == "auto":
			dtype = cfg.inference.weight_dtype
		if device is None:
			device = cfg.device

		cfg.device = device
		cfg.mode = "inferencing"
		cfg.trainer.backend = cfg.inference.backend
		cfg.trainer.weight_dtype = dtype
		cfg.inference.weight_dtype = dtype

		self.device = device
		self.dtype = cfg.inference.dtype
		self.amp = amp
		

	def load_model( self ):
		load_engines.cache_clear()
		unload_model()
		
		self.engines = load_engines(training=False)
		for name, engine in self.engines.items():
			if self.dtype != torch.int8:
				engine.to(self.device, dtype=self.dtype if not self.amp else torch.float32)

		self.engines.eval()
		self.symmap = get_phone_symmap()
		print("Loaded model")

	def encode_text( self, text, language="en" ):
		# already a tensor, return it
		if isinstance( text, Tensor ):
			return text

		content = g2p.encode(text, language=language)
		tokens = tokenize( content )

		return torch.tensor( tokens )

	def encode_lang( self, language ):
		symmap = get_lang_symmap()
		id = 0
		if language in symmap:
			id = symmap[language]
		return torch.tensor([ id ])

	def encode_audio( self, paths, trim_length=0.0 ):
		# already a tensor, return it
		if isinstance( paths, Tensor ):
			return paths

		# split string into paths
		if isinstance( paths, str ):
			paths = [ Path(p) for p in paths.split(";") ]

		# merge inputs

		proms = []

		for path in paths:
			prom = qnt.encode_from_file(path)
			if hasattr( prom, "codes" ):
				prom = prom.codes
			prom = prom[0][:, :].t().to(torch.int16)

			proms.append( prom )

		res = torch.cat(proms)
		
		if trim_length:
			res = trim( res, int( cfg.dataset.frames_per_second * trim_length ) )
		
		return res

	@torch.inference_mode()
	def inference(
		self,
		text,
		references,
		language="en",
		#
		max_ar_steps=6 * cfg.dataset.frames_per_second,
		max_nar_levels=7,
		#
		input_prompt_length=0.0,
		#
		ar_temp=0.95,
		nar_temp=0.5,
		#
		min_ar_temp=0.95,
		min_nar_temp=0.5,
		#
		top_p=1.0,
		top_k=0,
		#
		repetition_penalty=1.0,
		repetition_penalty_decay=0.0,
		length_penalty=0.0,
		#
		beam_width=0,
		#
		mirostat_tau=0,
		mirostat_eta=0.1,
		#
		dry_multiplier=0.0,
		dry_base=1.75,
		dry_allowed_length=2,

		seed = None,

		out_path=None,

		tqdm=True,
	):
		lines = text.split("\n")

		wavs = []
		sr = None

		model_ar = None
		model_len = None
		model_nar = None

		for name, engine in self.engines.items():
			if "ar" in engine.hyper_config.capabilities:
				model_ar = engine.module
			if "len" in engine.hyper_config.capabilities:
				model_len = engine.module
			if "nar" in engine.hyper_config.capabilities:
				model_nar = engine.module
		
		set_seed(seed)

		for line in lines:
			if out_path is None:
				output_dir = Path("./data/results/")
				if not output_dir.exists():
					output_dir.mkdir(parents=True, exist_ok=True)
				out_path = output_dir / f"{time.time()}.wav"

			prom = self.encode_audio( references, trim_length=input_prompt_length ) if references else None
			phns = self.encode_text( line, language=language )
			lang = self.encode_lang( language )

			prom = to_device(prom, device=self.device, dtype=torch.int16)
			phns = to_device(phns, device=self.device, dtype=torch.uint8 if len(self.symmap) < 256 else torch.int16)
			lang = to_device(lang, device=self.device, dtype=torch.uint8)

			# to-do: add in case for experimental.hf model
			with torch.autocast("cuda", dtype=self.dtype, enabled=self.amp):
				if model_ar is not None:
					resps_list = model_ar(
						text_list=[phns], proms_list=[prom], lang_list=[lang], max_steps=max_ar_steps,
						sampling_temperature=ar_temp,
						sampling_min_temperature=min_ar_temp,
						sampling_top_p=top_p, sampling_top_k=top_k,
						sampling_repetition_penalty=repetition_penalty, sampling_repetition_penalty_decay=repetition_penalty_decay,
						sampling_length_penalty=length_penalty,
						sampling_beam_width=beam_width,
						sampling_mirostat_tau=mirostat_tau,
						sampling_mirostat_eta=mirostat_eta,
						sampling_dry_multiplier=dry_multiplier,
						sampling_dry_base=dry_base,
						sampling_dry_allowed_length=dry_allowed_length,

						disable_tqdm=not tqdm,
					)
					resps_list = model_nar(
						text_list=[phns], proms_list=[prom], lang_list=[lang], resps_list=resps_list,
						max_levels=max_nar_levels,
						sampling_temperature=nar_temp,
						sampling_min_temperature=min_nar_temp,
						sampling_top_p=top_p, sampling_top_k=top_k,
						sampling_repetition_penalty=repetition_penalty, sampling_repetition_penalty_decay=repetition_penalty_decay,

						disable_tqdm=not tqdm,
					)
				elif model_len is not None:
					len_list = model_len( text_list=[phns], proms_list=[prom], max_steps=10, disable_tqdm=not tqdm ) # don't need more than that
					resps_list = model_nar( text_list=[phns], proms_list=[prom], len_list=len_list,
						max_levels=max_nar_levels,
						sampling_temperature=nar_temp,
						sampling_min_temperature=min_nar_temp,
						sampling_top_p=top_p, sampling_top_k=top_k,
						sampling_repetition_penalty=repetition_penalty, sampling_repetition_penalty_decay=repetition_penalty_decay,

						disable_tqdm=not tqdm,
					)
				else:
					raise Exception("!")

			wav, sr = qnt.decode_to_file(resps_list[0], out_path, device=self.device)
			wavs.append(wav)
		
		return (torch.concat(wavs, dim=-1), sr)
Rewrite init 2023-08-02 21:53:35 +00:00			`import torch`
			`import torchaudio`
			`import soundfile`
backport fix from tortoise_tts with local trainer + loading state when training lora 2024-06-25 18:41:29 +00:00			`import time`
Rewrite init 2023-08-02 21:53:35 +00:00
inferencing cleanup 2023-08-21 02:36:02 +00:00			`from torch import Tensor`
Rewrite init 2023-08-02 21:53:35 +00:00			`from einops import rearrange`
inferencing cleanup 2023-08-21 02:36:02 +00:00			`from pathlib import Path`
Rewrite init 2023-08-02 21:53:35 +00:00
			`from .emb import g2p, qnt`
allow loading a different model within the web ui (apparently I did not have the web UI in the documentation) 2024-07-16 00:59:48 +00:00			`from .emb.qnt import trim, trim_random, unload_model`
backport fix from tortoise_tts with local trainer + loading state when training lora 2024-06-25 18:41:29 +00:00			`from .utils import to_device, set_seed, wrapper as ml`
Rewrite init 2023-08-02 21:53:35 +00:00
allow loading a different model within the web ui (apparently I did not have the web UI in the documentation) 2024-07-16 00:59:48 +00:00			`from .config import cfg, Config`
distributed training works now (hopefully) 2023-08-14 03:07:45 +00:00			`from .models import get_models`
cleanup, use deepspeed inferencing pathway if requested 2023-10-09 20:24:04 +00:00			`from .engines import load_engines, deepspeed_available`
backwards compat for my shitty old weights (was testing if disabling AudioEmbedding summing magically made things better (it did not)) 2024-04-30 03:14:01 +00:00			`from .data import get_phone_symmap, get_lang_symmap, _load_quants, _cleanup_phones, tokenize`
Rewrite init 2023-08-02 21:53:35 +00:00
cleanup, use deepspeed inferencing pathway if requested 2023-10-09 20:24:04 +00:00			`if deepspeed_available:`
reduced dynamic temperature threshold to > 1.0, as it seems to not quite be useful for audio LMs, sped up any sampling that touches logits by copying them to CPU first, as accessing tensors on the GPU is slow as balls) 2023-10-09 19:46:17 +00:00			`import deepspeed`

Rewrite init 2023-08-02 21:53:35 +00:00			`class TTS():`
re-added loading multiple models because I'm now entertaining having split AR/NAR models again (and need a way to load both at once) 2024-06-06 14:48:43 +00:00			`def __init__( self, config=None, device=None, amp=None, dtype=None ):`
Rewrite init 2023-08-02 21:53:35 +00:00			`self.loading = True`
made exporter make more sense 2023-08-14 03:56:28 +00:00
allow loading a different model within the web ui (apparently I did not have the web UI in the documentation) 2024-07-16 00:59:48 +00:00			`self.load_config( config=config, device=device, amp=amp, dtype=dtype )`
			`self.load_model()`

			`self.loading = False`

			`def load_config( self, config=None, device=None, amp=None, dtype=None ):`
made exporter make more sense 2023-08-14 03:56:28 +00:00			`if config:`
allow loading a different model within the web ui (apparently I did not have the web UI in the documentation) 2024-07-16 00:59:48 +00:00			`print("Loading YAML:", config)`
made exporter make more sense 2023-08-14 03:56:28 +00:00			`cfg.load_yaml( config )`
tweaks 2023-08-16 02:58:16 +00:00
I don't know if the lack of start/stop tokens being added was causing my inference tests to fail, but it seems better now 2023-08-21 00:21:54 +00:00			`try:`
validated that inferencing works, changed some defaults (NAR benefits from greedy sampling) 2024-06-09 22:11:38 +00:00			`cfg.format( training=False )`
			`cfg.dataset.use_hdf5 = False # could use cfg.load_hdf5(), but why would it ever need to be loaded for inferencing`
I don't know if the lack of start/stop tokens being added was causing my inference tests to fail, but it seems better now 2023-08-21 00:21:54 +00:00			`except Exception as e:`
finally got around to removing omegaconf 2024-06-08 01:23:53 +00:00			`print("Error while parsing config YAML:")`
			`raise e # throw an error because I'm tired of silent errors messing things up for me`
somewhat got recurrent forward working (it's as accurate as chunkwise forward: it's not accurate at all), added option to use AMP instead of blanket setting the weight's dtype 2023-09-02 01:58:29 +00:00
added light web UI (need to port the telemetry disabling bandaids from aivc) 2023-09-09 21:17:20 +00:00			`if amp is None:`
			`amp = cfg.inference.amp`
tweaks to try and get deepspeed quantized inferencing, validating bitsandbytes and deepspeed quantization, nothing seems to work 2023-10-13 03:21:43 +00:00			`if dtype is None or dtype == "auto":`
restructured some things with the model to remove dead weights 2023-09-21 00:10:59 +00:00			`dtype = cfg.inference.weight_dtype`
added light web UI (need to port the telemetry disabling bandaids from aivc) 2023-09-09 21:17:20 +00:00			`if device is None:`
			`device = cfg.device`

			`cfg.device = device`
cleanup, use deepspeed inferencing pathway if requested 2023-10-09 20:24:04 +00:00			`cfg.mode = "inferencing"`
			`cfg.trainer.backend = cfg.inference.backend`
added light web UI (need to port the telemetry disabling bandaids from aivc) 2023-09-09 21:17:20 +00:00			`cfg.trainer.weight_dtype = dtype`
			`cfg.inference.weight_dtype = dtype`

			`self.device = device`
			`self.dtype = cfg.inference.dtype`
			`self.amp = amp`
allow loading a different model within the web ui (apparently I did not have the web UI in the documentation) 2024-07-16 00:59:48 +00:00
added light web UI (need to port the telemetry disabling bandaids from aivc) 2023-09-09 21:17:20 +00:00
allow loading a different model within the web ui (apparently I did not have the web UI in the documentation) 2024-07-16 00:59:48 +00:00			`def load_model( self ):`
			`load_engines.cache_clear()`
			`unload_model()`

re-added loading multiple models because I'm now entertaining having split AR/NAR models again (and need a way to load both at once) 2024-06-06 14:48:43 +00:00			`self.engines = load_engines(training=False)`
			`for name, engine in self.engines.items():`
			`if self.dtype != torch.int8:`
			`engine.to(self.device, dtype=self.dtype if not self.amp else torch.float32)`

			`self.engines.eval()`
allow loading a different model within the web ui (apparently I did not have the web UI in the documentation) 2024-07-16 00:59:48 +00:00			`self.symmap = get_phone_symmap()`
			`print("Loaded model")`
Rewrite init 2023-08-02 21:53:35 +00:00
inferencing cleanup 2023-08-21 02:36:02 +00:00			`def encode_text( self, text, language="en" ):`
			`# already a tensor, return it`
			`if isinstance( text, Tensor ):`
			`return text`

			`content = g2p.encode(text, language=language)`
backwards compat for my shitty old weights (was testing if disabling AudioEmbedding summing magically made things better (it did not)) 2024-04-30 03:14:01 +00:00			`tokens = tokenize( content )`
dataset preparation script updates, caved and am using HF tokenizer now 2024-04-21 19:49:18 +00:00
backwards compat for my shitty old weights (was testing if disabling AudioEmbedding summing magically made things better (it did not)) 2024-04-30 03:14:01 +00:00			`return torch.tensor( tokens )`
Rewrite init 2023-08-02 21:53:35 +00:00
exposed rolling resp context to the web UI, added passing in language to inferencing command line 2023-10-13 04:21:01 +00:00			`def encode_lang( self, language ):`
			`symmap = get_lang_symmap()`
			`id = 0`
			`if language in symmap:`
			`id = symmap[language]`
			`return torch.tensor([ id ])`

added option to set the trim length for an input prompt 2023-09-09 23:04:44 +00:00			`def encode_audio( self, paths, trim_length=0.0 ):`
inferencing cleanup 2023-08-21 02:36:02 +00:00			`# already a tensor, return it`
			`if isinstance( paths, Tensor ):`
			`return paths`

			`# split string into paths`
			`if isinstance( paths, str ):`
			`paths = [ Path(p) for p in paths.split(";") ]`

			`# merge inputs`
DAC just doesn't work well enough...... 2024-05-25 16:07:52 +00:00
			`proms = []`

			`for path in paths:`
			`prom = qnt.encode_from_file(path)`
			`if hasattr( prom, "codes" ):`
			`prom = prom.codes`
			`prom = prom[0][:, :].t().to(torch.int16)`

			`proms.append( prom )`

			`res = torch.cat(proms)`
I think I fixed a bug? 2023-08-25 04:33:36 +00:00
added option to set the trim length for an input prompt 2023-09-09 23:04:44 +00:00			`if trim_length:`
added option to specify frames per second for the given audio representation (Encodec is 75Hz, DAC is 41Hz (at 24K sources)) 2024-05-04 17:05:41 +00:00			`res = trim( res, int( cfg.dataset.frames_per_second * trim_length ) )`
inferencing cleanup 2023-08-21 02:36:02 +00:00
tweaks 2023-08-16 02:58:16 +00:00			`return res`
Rewrite init 2023-08-02 21:53:35 +00:00
inferencing cleanup 2023-08-21 02:36:02 +00:00			`@torch.inference_mode()`
implemented a naive beam search (I really should be taking a break) 2023-09-13 02:28:07 +00:00			`def inference(`
			`self,`
			`text,`
			`references,`
exposed rolling resp context to the web UI, added passing in language to inferencing command line 2023-10-13 04:21:01 +00:00			`language="en",`
added what I think is DRY sampling 2024-07-30 00:15:07 +00:00			`#`
added option to specify frames per second for the given audio representation (Encodec is 75Hz, DAC is 41Hz (at 24K sources)) 2024-05-04 17:05:41 +00:00			`max_ar_steps=6 * cfg.dataset.frames_per_second,`
implemented a naive beam search (I really should be taking a break) 2023-09-13 02:28:07 +00:00			`max_nar_levels=7,`
added what I think is DRY sampling 2024-07-30 00:15:07 +00:00			`#`
implemented a naive beam search (I really should be taking a break) 2023-09-13 02:28:07 +00:00			`input_prompt_length=0.0,`
added what I think is DRY sampling 2024-07-30 00:15:07 +00:00			`#`
implemented a naive beam search (I really should be taking a break) 2023-09-13 02:28:07 +00:00			`ar_temp=0.95,`
			`nar_temp=0.5,`
added what I think is DRY sampling 2024-07-30 00:15:07 +00:00			`#`
changed dynamic temperature trigger to be a min-(n)ar-temp value between [0,(n)ar-temp), flags to set min temp, checkbox in web UI to request it 2023-10-10 22:02:33 +00:00			`min_ar_temp=0.95,`
			`min_nar_temp=0.5,`
added what I think is DRY sampling 2024-07-30 00:15:07 +00:00			`#`
implemented a naive beam search (I really should be taking a break) 2023-09-13 02:28:07 +00:00			`top_p=1.0,`
			`top_k=0,`
added what I think is DRY sampling 2024-07-30 00:15:07 +00:00			`#`
implemented a naive beam search (I really should be taking a break) 2023-09-13 02:28:07 +00:00			`repetition_penalty=1.0,`
			`repetition_penalty_decay=0.0,`
			`length_penalty=0.0,`
added what I think is DRY sampling 2024-07-30 00:15:07 +00:00			`#`
implemented a naive beam search (I really should be taking a break) 2023-09-13 02:28:07 +00:00			`beam_width=0,`
added what I think is DRY sampling 2024-07-30 00:15:07 +00:00			`#`
added mirostat sampling (given a partially trained model, it got far decent output than I expected, need to test on a better trained model) 2023-09-18 23:55:41 +00:00			`mirostat_tau=0,`
			`mirostat_eta=0.1,`
added what I think is DRY sampling 2024-07-30 00:15:07 +00:00			`#`
			`dry_multiplier=0.0,`
			`dry_base=1.75,`
			`dry_allowed_length=2,`
backport fix from tortoise_tts with local trainer + loading state when training lora 2024-06-25 18:41:29 +00:00
			`seed = None,`

added rudimentary demo page creator (currently just embeds base64 wavs into the page, need to test not doing that) 2024-07-20 01:49:40 +00:00			`out_path=None,`

			`tqdm=True,`
implemented a naive beam search (I really should be taking a break) 2023-09-13 02:28:07 +00:00			`):`
fixes and compat (MoE-fying an existing model and retraining from there just ruins it after a second of audio...) 2023-12-26 03:20:32 +00:00			`lines = text.split("\n")`

			`wavs = []`
			`sr = None`

re-added loading multiple models because I'm now entertaining having split AR/NAR models again (and need a way to load both at once) 2024-06-06 14:48:43 +00:00			`model_ar = None`
the NAR only dream is dead (it just won't work) 2024-06-13 00:49:47 +00:00			`model_len = None`
re-added loading multiple models because I'm now entertaining having split AR/NAR models again (and need a way to load both at once) 2024-06-06 14:48:43 +00:00			`model_nar = None`

			`for name, engine in self.engines.items():`
			`if "ar" in engine.hyper_config.capabilities:`
			`model_ar = engine.module`
the NAR only dream is dead (it just won't work) 2024-06-13 00:49:47 +00:00			`if "len" in engine.hyper_config.capabilities:`
			`model_len = engine.module`
re-added loading multiple models because I'm now entertaining having split AR/NAR models again (and need a way to load both at once) 2024-06-06 14:48:43 +00:00			`if "nar" in engine.hyper_config.capabilities:`
			`model_nar = engine.module`
backport fix from tortoise_tts with local trainer + loading state when training lora 2024-06-25 18:41:29 +00:00
			`set_seed(seed)`
re-added loading multiple models because I'm now entertaining having split AR/NAR models again (and need a way to load both at once) 2024-06-06 14:48:43 +00:00
fixes and compat (MoE-fying an existing model and retraining from there just ruins it after a second of audio...) 2023-12-26 03:20:32 +00:00			`for line in lines:`
			`if out_path is None:`
backport fix from tortoise_tts with local trainer + loading state when training lora 2024-06-25 18:41:29 +00:00			`output_dir = Path("./data/results/")`
			`if not output_dir.exists():`
			`output_dir.mkdir(parents=True, exist_ok=True)`
			`out_path = output_dir / f"{time.time()}.wav"`
fixes and compat (MoE-fying an existing model and retraining from there just ruins it after a second of audio...) 2023-12-26 03:20:32 +00:00
added prom-less training / inferencing, some other things 2024-07-23 00:36:07 +00:00			`prom = self.encode_audio( references, trim_length=input_prompt_length ) if references else None`
fixes and compat (MoE-fying an existing model and retraining from there just ruins it after a second of audio...) 2023-12-26 03:20:32 +00:00			`phns = self.encode_text( line, language=language )`
			`lang = self.encode_lang( language )`

added prom-less training / inferencing, some other things 2024-07-23 00:36:07 +00:00			`prom = to_device(prom, device=self.device, dtype=torch.int16)`
			`phns = to_device(phns, device=self.device, dtype=torch.uint8 if len(self.symmap) < 256 else torch.int16)`
			`lang = to_device(lang, device=self.device, dtype=torch.uint8)`
fixes and compat (MoE-fying an existing model and retraining from there just ruins it after a second of audio...) 2023-12-26 03:20:32 +00:00
fix weird regression in handling checkpoints when backend is local, but deepspeed checkpoints are in (it was handled with LoRA loading but not real loading...) 2024-07-31 03:15:56 +00:00			`# to-do: add in case for experimental.hf model`
fixes and compat (MoE-fying an existing model and retraining from there just ruins it after a second of audio...) 2023-12-26 03:20:32 +00:00			`with torch.autocast("cuda", dtype=self.dtype, enabled=self.amp):`
the NAR only dream is dead (it just won't work) 2024-06-13 00:49:47 +00:00			`if model_ar is not None:`
			`resps_list = model_ar(`
sanity cleanup: moved experimental features under its own thing 2024-06-30 15:37:33 +00:00			`text_list=[phns], proms_list=[prom], lang_list=[lang], max_steps=max_ar_steps,`
the NAR only dream is dead (it just won't work) 2024-06-13 00:49:47 +00:00			`sampling_temperature=ar_temp,`
			`sampling_min_temperature=min_ar_temp,`
			`sampling_top_p=top_p, sampling_top_k=top_k,`
			`sampling_repetition_penalty=repetition_penalty, sampling_repetition_penalty_decay=repetition_penalty_decay,`
			`sampling_length_penalty=length_penalty,`
			`sampling_beam_width=beam_width,`
			`sampling_mirostat_tau=mirostat_tau,`
			`sampling_mirostat_eta=mirostat_eta,`
added what I think is DRY sampling 2024-07-30 00:15:07 +00:00			`sampling_dry_multiplier=dry_multiplier,`
			`sampling_dry_base=dry_base,`
			`sampling_dry_allowed_length=dry_allowed_length,`
added rudimentary demo page creator (currently just embeds base64 wavs into the page, need to test not doing that) 2024-07-20 01:49:40 +00:00
			`disable_tqdm=not tqdm,`
the NAR only dream is dead (it just won't work) 2024-06-13 00:49:47 +00:00			`)`
			`resps_list = model_nar(`
			`text_list=[phns], proms_list=[prom], lang_list=[lang], resps_list=resps_list,`
			`max_levels=max_nar_levels,`
			`sampling_temperature=nar_temp,`
			`sampling_min_temperature=min_nar_temp,`
			`sampling_top_p=top_p, sampling_top_k=top_k,`
			`sampling_repetition_penalty=repetition_penalty, sampling_repetition_penalty_decay=repetition_penalty_decay,`
added rudimentary demo page creator (currently just embeds base64 wavs into the page, need to test not doing that) 2024-07-20 01:49:40 +00:00
			`disable_tqdm=not tqdm,`
the NAR only dream is dead (it just won't work) 2024-06-13 00:49:47 +00:00			`)`
			`elif model_len is not None:`
added rudimentary demo page creator (currently just embeds base64 wavs into the page, need to test not doing that) 2024-07-20 01:49:40 +00:00			`len_list = model_len( text_list=[phns], proms_list=[prom], max_steps=10, disable_tqdm=not tqdm ) # don't need more than that`
the NAR only dream is dead (it just won't work) 2024-06-13 00:49:47 +00:00			`resps_list = model_nar( text_list=[phns], proms_list=[prom], len_list=len_list,`
			`max_levels=max_nar_levels,`
			`sampling_temperature=nar_temp,`
			`sampling_min_temperature=min_nar_temp,`
			`sampling_top_p=top_p, sampling_top_k=top_k,`
			`sampling_repetition_penalty=repetition_penalty, sampling_repetition_penalty_decay=repetition_penalty_decay,`
added rudimentary demo page creator (currently just embeds base64 wavs into the page, need to test not doing that) 2024-07-20 01:49:40 +00:00
			`disable_tqdm=not tqdm,`
the NAR only dream is dead (it just won't work) 2024-06-13 00:49:47 +00:00			`)`
			`else:`
			`raise Exception("!")`
fixes and compat (MoE-fying an existing model and retraining from there just ruins it after a second of audio...) 2023-12-26 03:20:32 +00:00
			`wav, sr = qnt.decode_to_file(resps_list[0], out_path, device=self.device)`
			`wavs.append(wav)`
Rewrite init 2023-08-02 21:53:35 +00:00
fixes and compat (MoE-fying an existing model and retraining from there just ruins it after a second of audio...) 2023-12-26 03:20:32 +00:00			`return (torch.concat(wavs, dim=-1), sr)`
Rewrite init 2023-08-02 21:53:35 +00:00