This commit is contained in:
mrq 2024-05-18 12:02:56 -05:00
parent 59ef9461f8
commit 74e531d391
2 changed files with 3 additions and 2 deletions

View File

@ -144,7 +144,7 @@ for dataset_name in sorted(os.listdir(f'./{input_audio}/')):
i = i + 1
outpath = Path(f'./{output_dataset}/{dataset_name}/{speaker_id}/{fname}_{id}.{extension}')
text = metadata[filename]["text"]
text = segment["text"]
if len(text) == 0:
continue

View File

@ -285,6 +285,7 @@ def encode(wav: Tensor, sr: int = cfg.sample_rate, device="cuda", levels=cfg.mod
wav = convert_audio(wav, sr, model.sample_rate, model.channels)
wav = wav.to(device)
with torch.autocast("cuda", dtype=cfg.inference.dtype, enabled=cfg.inference.amp):
encoded_frames = model.encode(wav)
qnt = torch.cat([encoded[0] for encoded in encoded_frames], dim=-1) # (b q t)