DL-Art-School/codes/scripts/audio/split_on_silence.py
James Betker 73b930c0f6 Add diffusion_dvae
Increase split_on_silence interval
2021-09-09 16:22:05 -06:00

43 lines
1.5 KiB
Python

import argparse
import logging
import os
from pydub import AudioSegment
from pydub.exceptions import CouldntDecodeError
from pydub.silence import split_on_silence
from data.util import find_audio_files
from tqdm import tqdm
# Uses pydub to process a directory of audio files, splitting them into clips at points where it detects a small amount
# of silence.
def main():
parser = argparse.ArgumentParser()
parser.add_argument('--path')
parser.add_argument('--out')
args = parser.parse_args()
minimum_duration = 5
maximum_duration = 20
files = find_audio_files(args.path, include_nonwav=True)
for e, wav_file in enumerate(tqdm(files)):
#if e < 4197:
# continue
print(f"Processing {wav_file}..")
outdir = os.path.join(args.out, f'{e}_{os.path.basename(wav_file[:-4])}').replace('.', '').strip()
os.makedirs(outdir, exist_ok=True)
try:
speech = AudioSegment.from_file(wav_file)
except CouldntDecodeError as e:
print(e)
continue
chunks = split_on_silence(speech, min_silence_len=400, silence_thresh=-40,
seek_step=100, keep_silence=50)
for i in range(0, len(chunks)):
if chunks[i].duration_seconds < minimum_duration or chunks[i].duration_seconds > maximum_duration:
continue
chunks[i].export(f"{outdir}/{i:05d}.wav", format='wav', parameters=["-ar", "22050", "-ac", "1"])
if __name__ == '__main__':
main()