limit eval size because the training batch size seems to be used for the eval dataloader, somehow (bandaid)
This commit is contained in:
parent
591d3ac848
commit
dd40463803
|
@ -23,8 +23,7 @@ I've tested this repo under Python versions `3.10.9`, `3.11.3`, and `3.12.3`.
|
|||
|
||||
## Pre-Trained Model
|
||||
|
||||
> [!NOTE]
|
||||
> Pre-Trained weights aren't up to par as a pure zero-shot model at the moment, but are fine for finetuning / LoRAs.
|
||||
> [!NOTE] Pre-Trained weights aren't up to par as a pure zero-shot model at the moment, but are fine for finetuning / LoRAs.
|
||||
|
||||
My pre-trained weights can be acquired from [here](https://huggingface.co/ecker/vall-e).
|
||||
|
||||
|
|
|
@ -1,4 +1,3 @@
|
|||
experimental: False # should probably expand this into a dict of experimental flags
|
||||
sample_rate: 24_000 # 44_000 for dac
|
||||
audio_backend: "vocos" # or dac
|
||||
|
||||
|
@ -131,7 +130,9 @@ dataset:
|
|||
max_resps: 1
|
||||
p_resp_append: 0.25
|
||||
|
||||
sample_type: path # speaker
|
||||
sample_type: path # path | speaker | group
|
||||
sample_order: duration # shuffle | duration
|
||||
sample_max_duration_batch: 0 # used when above = duration, 120 seconds per batch at 12GiB of VRAM works
|
||||
|
||||
tasks_list: [ "tts" ] # , [ "tts", "tts-c", "ns", "sr", "tse", "cse", "nse", "tts"]
|
||||
|
||||
|
|
|
@ -547,7 +547,7 @@ class Dataset(_Dataset):
|
|||
|
||||
if self.sampler_type == "path":
|
||||
if self.sampler_order == "duration" and cfg.dataset.sample_max_duration_batch > 0:
|
||||
self.sampler = BatchedOrderedSampler( self.duration_buckets, cfg.dataset.sample_max_duration_batch, cfg.hyperparameters.batch_size if training else cfg.evaluation.batch_size )
|
||||
self.sampler = BatchedOrderedSampler( self.duration_buckets, cfg.dataset.sample_max_duration_batch, cfg.hyperparameters.batch_size if self.training else cfg.evaluation.batch_size )
|
||||
else:
|
||||
self.sampler = OrderedSampler( len(self) )
|
||||
self.samplers = {}
|
||||
|
|
|
@ -159,13 +159,11 @@ def load_engines(training=True):
|
|||
for k in erase:
|
||||
del state[k]
|
||||
|
||||
# resize text embedding
|
||||
if "text_emb.weight" in state and model.config.text_tokens != state["text_emb.weight"].shape[0]:
|
||||
state["text_emb.weight"] = state["text_emb.weight"][:model.config.text_tokens]
|
||||
|
||||
# resize text embedding
|
||||
if "rvq_l_emb.weight" in state and model.config.resp_levels != state["rvq_l_emb.weight"].shape[0]:
|
||||
state["rvq_l_emb.weight"] = state["rvq_l_emb.weight"][:model.config.resp_levels]
|
||||
# resize embeddings
|
||||
if "text_emb.weight" in state:
|
||||
state["text_emb.weight"] = ml.resize_weight( state["text_emb.weight"], model.config.text_tokens )
|
||||
if "rvq_l_emb.weight" in state:
|
||||
state["rvq_l_emb.weight"] = ml.resize_weight( state["rvq_l_emb.weight"], model.config.resp_levels )
|
||||
|
||||
model.load_state_dict(state, strict=cfg.trainer.strict_loading)
|
||||
|
||||
|
|
|
@ -30,7 +30,7 @@ def train_feeder(engine, batch):
|
|||
with torch.autocast("cuda", dtype=cfg.trainer.dtype, enabled=cfg.trainer.amp):
|
||||
batch_size = len(batch["text"])
|
||||
engine.current_batch_size = batch_size
|
||||
|
||||
|
||||
if engine.hyper_config.experimental:
|
||||
if cfg.model.interleave:
|
||||
quant_levels = 0
|
||||
|
@ -116,7 +116,12 @@ def run_eval(engines, eval_name, dl):
|
|||
|
||||
processed = 0
|
||||
while processed < cfg.evaluation.size:
|
||||
batch: dict = to_device(next(iter(dl)), cfg.device)
|
||||
batch = to_device(next(iter(dl)), cfg.device)
|
||||
|
||||
# limit to eval batch size in the event we somehow have a weird dataloader
|
||||
for key in batch.keys():
|
||||
batch[key] = batch[key][:cfg.evaluation.batch_size]
|
||||
|
||||
processed += len(batch["text"])
|
||||
|
||||
for name in engines:
|
||||
|
|
|
@ -212,6 +212,20 @@ def replace_attention( model, klass, target, mode="math", verbose=False ):
|
|||
|
||||
return model
|
||||
|
||||
# trim/expand a tensor (for example, in a state dict)
|
||||
def resize_weight( weight, target ):
|
||||
# trim
|
||||
if target < weight.shape[0]:
|
||||
return weight[:target]
|
||||
# expand
|
||||
if target > weight.shape[0]:
|
||||
return torch.stack(
|
||||
[ x for x in weight ] +
|
||||
[ torch.rand( weight[0].shape ).to(device=weight[0].device, dtype=weight[0].dtype) for _ in range( target - weight.shape[0] ) ]
|
||||
)
|
||||
|
||||
return weight
|
||||
|
||||
# https://github.com/konstmish/prodigy
|
||||
try:
|
||||
from prodigyopt import Prodigy
|
||||
|
|
Loading…
Reference in New Issue
Block a user