when the sanitizer thingy works in testing but it doesn't outside of testing, and you have to retranscribe for the fourth time today

2023-03-23 02:37:44 +00:00 · 2023-03-23 02:37:44 +00:00 · a6daf289bc
commit a6daf289bc
parent 86589fff91
1 changed files with 2 additions and 0 deletions
--- a/src/utils.py
+++ b/src/utils.py
@ -1294,6 +1294,7 @@ def transcribe_dataset( voice, language=None, skip_existings=False, progress=Non
 				continue
 			results[basename] = result
 		"""
 		try:
 			sanitized = whisper_sanitize(results[basename])
 			if len(sanitized['segments']) > 0 and len(sanitized['segments']) != len(results[basename]['segments']):
@ -1303,6 +1304,7 @@ def transcribe_dataset( voice, language=None, skip_existings=False, progress=Non
 		except Exception as e:
 			print("Failed to sanitize:", basename, e)
 			pass
 		"""
 		waveform, sample_rate = torchaudio.load(file)
 		# resample to the input rate, since it'll get resampled for training anyways