possible logic optimization so I don't spend another 15 minutes simply iterating back to the point I was at in vall_e.emb.process

2025-02-16 11:34:05 -06:00 · 2025-02-16 11:34:05 -06:00 · 8f86cf0e4e
commit 8f86cf0e4e
parent 0dc49ef4d5
1 changed files with 13 additions and 11 deletions
--- a/vall_e/emb/process.py
+++ b/vall_e/emb/process.py
@ -322,13 +322,12 @@ def process(
 					if len(text) == 0 or outpath.exists():
 						continue

-					# audio not already loaded, load it
-					if waveform is None:
-						waveform, sample_rate = load_audio( inpath, dtype=dtype )
-
-					if max_duration and waveform.shape[-1] / sample_rate > max_duration:
-						continue
+					if max_duration:
+						info = torchaudio.info( inpath )
+						if info.num_frames / info.sample_rate > max_duration:
+							continue

+					waveform, sample_rate = load_audio( inpath, dtype=dtype )
 					jobs.append(( outpath, waveform, sample_rate, text, language ))
 				else:
 					i = 0
@ -351,15 +350,18 @@ def process(
 						if len(text) == 0 or outpath.exists():
 							continue

+						start = (segment['start']-0.05)
+						end = (segment['end']+0.5)
+						
+						if max_duration and end - start > max_duration:
+							continue
+
 						# audio not already loaded, load it
 						if waveform is None:
 							waveform, sample_rate = load_audio( inpath, dtype=dtype )

-						start = int((segment['start']-0.05) * sample_rate)
-						end = int((segment['end']+0.5) * sample_rate)
-
-						if max_duration and (end - start) / sample_rate > max_duration:
-							continue
+						start = int(start * sample_rate)
+						end = int(end * sample_rate)

 						if not presliced:
 							if start < 0: