diff --git a/src/utils.py b/src/utils.py index f5c712a..1cda2c1 100755 --- a/src/utils.py +++ b/src/utils.py @@ -1271,11 +1271,13 @@ def transcribe_dataset( voice, language=None, skip_existings=False, progress=Non for file in enumerate_progress(files, desc="Iterating through voice files", progress=progress): basename = os.path.basename(file) + modified = False if basename in results and skip_existings: print(f"Skipping already parsed file: {basename}") else: try: result = whisper_transcribe(file, language=language) + modified = True except Exception as e: print("Failed to transcribe:", file) continue @@ -1283,8 +1285,9 @@ def transcribe_dataset( voice, language=None, skip_existings=False, progress=Non try: sanitized = whisper_sanitize(results[basename]) - if len(sanitized['segments']) > 0 and len(sanitized['segments'] != results[basename]['segments']): + if len(sanitized['segments']) > 0 and len(sanitized['segments']) != len(results[basename]['segments']): results[basename] = sanitized + modified = True print("Segments sanizited: ", basename) except Exception as e: print("Failed to sanitize:", basename, e) @@ -1298,8 +1301,9 @@ def transcribe_dataset( voice, language=None, skip_existings=False, progress=Non waveform = waveform[:1] torchaudio.save(f"{indir}/audio/{basename}", waveform, sample_rate, encoding="PCM_S", bits_per_sample=16) - with open(infile, 'w', encoding="utf-8") as f: - f.write(json.dumps(results, indent='\t')) + if modified: + with open(infile, 'w', encoding="utf-8") as f: + f.write(json.dumps(results, indent='\t')) do_gc() diff --git a/src/webui.py b/src/webui.py index 6cb87b0..ea072ea 100755 --- a/src/webui.py +++ b/src/webui.py @@ -443,8 +443,8 @@ def setup_gradio(): DATASET_SETTINGS['voice'] = gr.Dropdown( choices=voice_list, label="Dataset Source", type="value", value=voice_list[0] if len(voice_list) > 0 else "" ) with gr.Row(): DATASET_SETTINGS['language'] = gr.Textbox(label="Language", value="en") - DATASET_SETTINGS['validation_text_length'] = gr.Number(label="Validation Text Length Threshold", value=12, precision=0) - DATASET_SETTINGS['validation_audio_length'] = gr.Number(label="Validation Audio Length Threshold", value=1 ) + DATASET_SETTINGS['validation_text_length'] = gr.Number(label="Validation Text Length Threshold", value=12, precision=0, visible=args.tts_backend=="tortoise") + DATASET_SETTINGS['validation_audio_length'] = gr.Number(label="Validation Audio Length Threshold", value=1, visible=args.tts_backend=="tortoise" ) with gr.Row(): DATASET_SETTINGS['skip'] = gr.Checkbox(label="Skip Existing", value=False) DATASET_SETTINGS['slice'] = gr.Checkbox(label="Slice Segments", value=False)