limit eval size because the training batch size seems to be used for the eval dataloader, somehow (bandaid)

This commit is contained in:
mrq 2024-06-29 09:11:28 -05:00
parent 591d3ac848
commit dd40463803
6 changed files with 31 additions and 14 deletions

View File

@ -23,8 +23,7 @@ I've tested this repo under Python versions `3.10.9`, `3.11.3`, and `3.12.3`.
## Pre-Trained Model
> [!NOTE]
> Pre-Trained weights aren't up to par as a pure zero-shot model at the moment, but are fine for finetuning / LoRAs.
> [!NOTE] Pre-Trained weights aren't up to par as a pure zero-shot model at the moment, but are fine for finetuning / LoRAs.
My pre-trained weights can be acquired from [here](https://huggingface.co/ecker/vall-e).

View File

@ -1,4 +1,3 @@
experimental: False # should probably expand this into a dict of experimental flags
sample_rate: 24_000 # 44_000 for dac
audio_backend: "vocos" # or dac
@ -131,7 +130,9 @@ dataset:
max_resps: 1
p_resp_append: 0.25
sample_type: path # speaker
sample_type: path # path | speaker | group
sample_order: duration # shuffle | duration
sample_max_duration_batch: 0 # used when above = duration, 120 seconds per batch at 12GiB of VRAM works
tasks_list: [ "tts" ] # , [ "tts", "tts-c", "ns", "sr", "tse", "cse", "nse", "tts"]

View File

@ -547,7 +547,7 @@ class Dataset(_Dataset):
if self.sampler_type == "path":
if self.sampler_order == "duration" and cfg.dataset.sample_max_duration_batch > 0:
self.sampler = BatchedOrderedSampler( self.duration_buckets, cfg.dataset.sample_max_duration_batch, cfg.hyperparameters.batch_size if training else cfg.evaluation.batch_size )
self.sampler = BatchedOrderedSampler( self.duration_buckets, cfg.dataset.sample_max_duration_batch, cfg.hyperparameters.batch_size if self.training else cfg.evaluation.batch_size )
else:
self.sampler = OrderedSampler( len(self) )
self.samplers = {}

View File

@ -159,13 +159,11 @@ def load_engines(training=True):
for k in erase:
del state[k]
# resize text embedding
if "text_emb.weight" in state and model.config.text_tokens != state["text_emb.weight"].shape[0]:
state["text_emb.weight"] = state["text_emb.weight"][:model.config.text_tokens]
# resize text embedding
if "rvq_l_emb.weight" in state and model.config.resp_levels != state["rvq_l_emb.weight"].shape[0]:
state["rvq_l_emb.weight"] = state["rvq_l_emb.weight"][:model.config.resp_levels]
# resize embeddings
if "text_emb.weight" in state:
state["text_emb.weight"] = ml.resize_weight( state["text_emb.weight"], model.config.text_tokens )
if "rvq_l_emb.weight" in state:
state["rvq_l_emb.weight"] = ml.resize_weight( state["rvq_l_emb.weight"], model.config.resp_levels )
model.load_state_dict(state, strict=cfg.trainer.strict_loading)

View File

@ -30,7 +30,7 @@ def train_feeder(engine, batch):
with torch.autocast("cuda", dtype=cfg.trainer.dtype, enabled=cfg.trainer.amp):
batch_size = len(batch["text"])
engine.current_batch_size = batch_size
if engine.hyper_config.experimental:
if cfg.model.interleave:
quant_levels = 0
@ -116,7 +116,12 @@ def run_eval(engines, eval_name, dl):
processed = 0
while processed < cfg.evaluation.size:
batch: dict = to_device(next(iter(dl)), cfg.device)
batch = to_device(next(iter(dl)), cfg.device)
# limit to eval batch size in the event we somehow have a weird dataloader
for key in batch.keys():
batch[key] = batch[key][:cfg.evaluation.batch_size]
processed += len(batch["text"])
for name in engines:

View File

@ -212,6 +212,20 @@ def replace_attention( model, klass, target, mode="math", verbose=False ):
return model
# trim/expand a tensor (for example, in a state dict)
def resize_weight( weight, target ):
# trim
if target < weight.shape[0]:
return weight[:target]
# expand
if target > weight.shape[0]:
return torch.stack(
[ x for x in weight ] +
[ torch.rand( weight[0].shape ).to(device=weight[0].device, dtype=weight[0].dtype) for _ in range( target - weight.shape[0] ) ]
)
return weight
# https://github.com/konstmish/prodigy
try:
from prodigyopt import Prodigy