added fields to offset start/end slices to apply in bulk when slicing

This commit is contained in:
mrq 2023-03-11 21:34:29 +00:00
parent 89bb3d4419
commit dea2fa9caf
2 changed files with 13 additions and 3 deletions

View File

@ -1122,6 +1122,11 @@ def slice_dataset( voice, start_offset=0, end_offset=0 ):
start = int((segment['start'] + start_offset) * sampling_rate) start = int((segment['start'] + start_offset) * sampling_rate)
end = int((segment['end'] + end_offset) * sampling_rate) end = int((segment['end'] + end_offset) * sampling_rate)
if start < 0:
start = 0
if end >= waveform.shape[-1]:
end = waveform.shape[-1] - 1
sliced = waveform[:, start:end] sliced = waveform[:, start:end]
file = filename.replace(".wav", f"_{pad(segment['id'], 4)}.wav") file = filename.replace(".wav", f"_{pad(segment['id'], 4)}.wav")

View File

@ -182,14 +182,14 @@ def read_generate_settings_proxy(file, saveAs='.temp'):
gr.update(visible=j is not None), gr.update(visible=j is not None),
) )
def prepare_dataset_proxy( voice, language, validation_text_length, validation_audio_length, skip_existings, slice_audio, progress=gr.Progress(track_tqdm=False) ): def prepare_dataset_proxy( voice, language, validation_text_length, validation_audio_length, skip_existings, slice_audio, slice_start_offset, slice_end_offset, progress=gr.Progress(track_tqdm=False) ):
messages = [] messages = []
message = transcribe_dataset( voice=voice, language=language, skip_existings=skip_existings, progress=progress ) message = transcribe_dataset( voice=voice, language=language, skip_existings=skip_existings, progress=progress )
messages.append(message) messages.append(message)
if slice_audio: if slice_audio:
message = slice_dataset( voice ) message = slice_dataset( voice, start_offset=slice_start_offset, end_offset=slice_end_offset )
messages.append(message) messages.append(message)
message = prepare_dataset( voice, use_segments=slice_audio, text_length=validation_text_length, audio_length=validation_audio_length ) message = prepare_dataset( voice, use_segments=slice_audio, text_length=validation_text_length, audio_length=validation_audio_length )
@ -421,6 +421,9 @@ def setup_gradio():
with gr.Row(): with gr.Row():
DATASET_SETTINGS['skip'] = gr.Checkbox(label="Skip Already Transcribed", value=False) DATASET_SETTINGS['skip'] = gr.Checkbox(label="Skip Already Transcribed", value=False)
DATASET_SETTINGS['slice'] = gr.Checkbox(label="Slice Segments", value=False) DATASET_SETTINGS['slice'] = gr.Checkbox(label="Slice Segments", value=False)
with gr.Row():
DATASET_SETTINGS['slice_start_offset'] = gr.Number(label="Slice Start Offset", value=0)
DATASET_SETTINGS['slice_end_offset'] = gr.Number(label="Slice End Offset", value=0)
transcribe_button = gr.Button(value="Transcribe and Process") transcribe_button = gr.Button(value="Transcribe and Process")
@ -759,7 +762,9 @@ def setup_gradio():
slice_dataset_button.click( slice_dataset_button.click(
slice_dataset, slice_dataset,
inputs=[ inputs=[
DATASET_SETTINGS['voice'] DATASET_SETTINGS['voice'],
DATASET_SETTINGS['slice_start_offset'],
DATASET_SETTINGS['slice_end_offset'],
], ],
outputs=prepare_dataset_output outputs=prepare_dataset_output
) )