ugh

2024-05-18 12:02:56 -05:00 · 2024-05-18 12:02:56 -05:00 · 74e531d391
commit 74e531d391
parent 59ef9461f8
2 changed files with 3 additions and 2 deletions
--- a/scripts/process_dataset.py
+++ b/scripts/process_dataset.py
@ -144,7 +144,7 @@ for dataset_name in sorted(os.listdir(f'./{input_audio}/')):
 					i = i + 1

 					outpath = Path(f'./{output_dataset}/{dataset_name}/{speaker_id}/{fname}_{id}.{extension}')
-					text = metadata[filename]["text"]
+					text = segment["text"]

 					if len(text) == 0:
 						continue
--- a/vall_e/emb/qnt.py
+++ b/vall_e/emb/qnt.py
@ -285,7 +285,8 @@ def encode(wav: Tensor, sr: int = cfg.sample_rate, device="cuda", levels=cfg.mod
 	wav = convert_audio(wav, sr, model.sample_rate, model.channels)
 	wav = wav.to(device)

-	encoded_frames = model.encode(wav)
+	with torch.autocast("cuda", dtype=cfg.inference.dtype, enabled=cfg.inference.amp):
+		encoded_frames = model.encode(wav)
 	qnt = torch.cat([encoded[0] for encoded in encoded_frames], dim=-1)  # (b q t)

 	return qnt