when the sanitizer thingy works in testing but it doesn't outside of testing, and you have to retranscribe for the fourth time today
This commit is contained in:
parent
86589fff91
commit
a6daf289bc
|
@ -1294,6 +1294,7 @@ def transcribe_dataset( voice, language=None, skip_existings=False, progress=Non
|
||||||
continue
|
continue
|
||||||
results[basename] = result
|
results[basename] = result
|
||||||
|
|
||||||
|
"""
|
||||||
try:
|
try:
|
||||||
sanitized = whisper_sanitize(results[basename])
|
sanitized = whisper_sanitize(results[basename])
|
||||||
if len(sanitized['segments']) > 0 and len(sanitized['segments']) != len(results[basename]['segments']):
|
if len(sanitized['segments']) > 0 and len(sanitized['segments']) != len(results[basename]['segments']):
|
||||||
|
@ -1303,6 +1304,7 @@ def transcribe_dataset( voice, language=None, skip_existings=False, progress=Non
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
print("Failed to sanitize:", basename, e)
|
print("Failed to sanitize:", basename, e)
|
||||||
pass
|
pass
|
||||||
|
"""
|
||||||
|
|
||||||
waveform, sample_rate = torchaudio.load(file)
|
waveform, sample_rate = torchaudio.load(file)
|
||||||
# resample to the input rate, since it'll get resampled for training anyways
|
# resample to the input rate, since it'll get resampled for training anyways
|
||||||
|
|
Loading…
Reference in New Issue
Block a user