From dea2fa9caf40245943813b6799cce923e47bc2d9 Mon Sep 17 00:00:00 2001 From: mrq Date: Sat, 11 Mar 2023 21:34:29 +0000 Subject: [PATCH] added fields to offset start/end slices to apply in bulk when slicing --- src/utils.py | 5 +++++ src/webui.py | 11 ++++++++--- 2 files changed, 13 insertions(+), 3 deletions(-) diff --git a/src/utils.py b/src/utils.py index 8fe8fc6..883ffd9 100755 --- a/src/utils.py +++ b/src/utils.py @@ -1122,6 +1122,11 @@ def slice_dataset( voice, start_offset=0, end_offset=0 ): start = int((segment['start'] + start_offset) * sampling_rate) end = int((segment['end'] + end_offset) * sampling_rate) + if start < 0: + start = 0 + if end >= waveform.shape[-1]: + end = waveform.shape[-1] - 1 + sliced = waveform[:, start:end] file = filename.replace(".wav", f"_{pad(segment['id'], 4)}.wav") diff --git a/src/webui.py b/src/webui.py index d5bbbc0..6e6cc14 100755 --- a/src/webui.py +++ b/src/webui.py @@ -182,14 +182,14 @@ def read_generate_settings_proxy(file, saveAs='.temp'): gr.update(visible=j is not None), ) -def prepare_dataset_proxy( voice, language, validation_text_length, validation_audio_length, skip_existings, slice_audio, progress=gr.Progress(track_tqdm=False) ): +def prepare_dataset_proxy( voice, language, validation_text_length, validation_audio_length, skip_existings, slice_audio, slice_start_offset, slice_end_offset, progress=gr.Progress(track_tqdm=False) ): messages = [] message = transcribe_dataset( voice=voice, language=language, skip_existings=skip_existings, progress=progress ) messages.append(message) if slice_audio: - message = slice_dataset( voice ) + message = slice_dataset( voice, start_offset=slice_start_offset, end_offset=slice_end_offset ) messages.append(message) message = prepare_dataset( voice, use_segments=slice_audio, text_length=validation_text_length, audio_length=validation_audio_length ) @@ -421,6 +421,9 @@ def setup_gradio(): with gr.Row(): DATASET_SETTINGS['skip'] = gr.Checkbox(label="Skip Already Transcribed", value=False) DATASET_SETTINGS['slice'] = gr.Checkbox(label="Slice Segments", value=False) + with gr.Row(): + DATASET_SETTINGS['slice_start_offset'] = gr.Number(label="Slice Start Offset", value=0) + DATASET_SETTINGS['slice_end_offset'] = gr.Number(label="Slice End Offset", value=0) transcribe_button = gr.Button(value="Transcribe and Process") @@ -759,7 +762,9 @@ def setup_gradio(): slice_dataset_button.click( slice_dataset, inputs=[ - DATASET_SETTINGS['voice'] + DATASET_SETTINGS['voice'], + DATASET_SETTINGS['slice_start_offset'], + DATASET_SETTINGS['slice_end_offset'], ], outputs=prepare_dataset_output )