From a6daf289bcf2c9f66fbbb75e4dac5ca2b46c1fd8 Mon Sep 17 00:00:00 2001 From: mrq Date: Thu, 23 Mar 2023 02:37:44 +0000 Subject: [PATCH] when the sanitizer thingy works in testing but it doesn't outside of testing, and you have to retranscribe for the fourth time today --- src/utils.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/src/utils.py b/src/utils.py index 43cb78d..668d696 100755 --- a/src/utils.py +++ b/src/utils.py @@ -1294,6 +1294,7 @@ def transcribe_dataset( voice, language=None, skip_existings=False, progress=Non continue results[basename] = result + """ try: sanitized = whisper_sanitize(results[basename]) if len(sanitized['segments']) > 0 and len(sanitized['segments']) != len(results[basename]['segments']): @@ -1303,6 +1304,7 @@ def transcribe_dataset( voice, language=None, skip_existings=False, progress=Non except Exception as e: print("Failed to sanitize:", basename, e) pass + """ waveform, sample_rate = torchaudio.load(file) # resample to the input rate, since it'll get resampled for training anyways