From 8f86cf0e4e5be32673b9983130f05303275e5c5c Mon Sep 17 00:00:00 2001 From: mrq Date: Sun, 16 Feb 2025 11:34:05 -0600 Subject: [PATCH] possible logic optimization so I don't spend another 15 minutes simply iterating back to the point I was at in vall_e.emb.process --- vall_e/emb/process.py | 24 +++++++++++++----------- 1 file changed, 13 insertions(+), 11 deletions(-) diff --git a/vall_e/emb/process.py b/vall_e/emb/process.py index 1272ff8..d22592d 100644 --- a/vall_e/emb/process.py +++ b/vall_e/emb/process.py @@ -322,13 +322,12 @@ def process( if len(text) == 0 or outpath.exists(): continue - # audio not already loaded, load it - if waveform is None: - waveform, sample_rate = load_audio( inpath, dtype=dtype ) - - if max_duration and waveform.shape[-1] / sample_rate > max_duration: - continue + if max_duration: + info = torchaudio.info( inpath ) + if info.num_frames / info.sample_rate > max_duration: + continue + waveform, sample_rate = load_audio( inpath, dtype=dtype ) jobs.append(( outpath, waveform, sample_rate, text, language )) else: i = 0 @@ -351,15 +350,18 @@ def process( if len(text) == 0 or outpath.exists(): continue + start = (segment['start']-0.05) + end = (segment['end']+0.5) + + if max_duration and end - start > max_duration: + continue + # audio not already loaded, load it if waveform is None: waveform, sample_rate = load_audio( inpath, dtype=dtype ) - start = int((segment['start']-0.05) * sample_rate) - end = int((segment['end']+0.5) * sample_rate) - - if max_duration and (end - start) / sample_rate > max_duration: - continue + start = int(start * sample_rate) + end = int(end * sample_rate) if not presliced: if start < 0: