forked from mrq/DL-Art-School
More scripts for splitting and formatting audio
This commit is contained in:
parent
909754cc27
commit
ed6eae407f
62
codes/scripts/audio/spleeter_split_voice_and_background.py
Normal file
62
codes/scripts/audio/spleeter_split_voice_and_background.py
Normal file
|
@ -0,0 +1,62 @@
|
||||||
|
from scipy.io import wavfile
|
||||||
|
from spleeter.separator import Separator
|
||||||
|
from tqdm import tqdm
|
||||||
|
|
||||||
|
from data.util import find_audio_files
|
||||||
|
import os
|
||||||
|
import os.path as osp
|
||||||
|
from spleeter.audio.adapter import AudioAdapter
|
||||||
|
import numpy as np
|
||||||
|
|
||||||
|
|
||||||
|
# Uses spleeter to divide audio clips into one of two bins:
|
||||||
|
# 1. Audio has little to no background noise, saved to "output_dir"
|
||||||
|
# 2. Audio has a lot of background noise, bg noise split off and saved to "output_dir_bg"
|
||||||
|
if __name__ == '__main__':
|
||||||
|
src_dir = 'F:\\split\\books1'
|
||||||
|
output_dir = 'F:\\split\\cleaned\\books1'
|
||||||
|
output_dir_bg = 'F:\\split\\background-noise\\books1'
|
||||||
|
output_sample_rate=22050
|
||||||
|
|
||||||
|
os.makedirs(output_dir_bg, exist_ok=True)
|
||||||
|
os.makedirs(output_dir, exist_ok=True)
|
||||||
|
|
||||||
|
audio_loader = AudioAdapter.default()
|
||||||
|
separator = Separator('spleeter:2stems')
|
||||||
|
files = find_audio_files(src_dir, include_nonwav=True)
|
||||||
|
for e, file in enumerate(tqdm(files)):
|
||||||
|
file_basis = osp.relpath(file, src_dir)\
|
||||||
|
.replace('/', '_')\
|
||||||
|
.replace('\\', '_')\
|
||||||
|
.replace('.', '_')\
|
||||||
|
.replace(' ', '_')\
|
||||||
|
.replace('!', '_')\
|
||||||
|
.replace(',', '_')
|
||||||
|
if len(file_basis) > 100:
|
||||||
|
file_basis = file_basis[:100]
|
||||||
|
try:
|
||||||
|
wave, sample_rate = audio_loader.load(file, sample_rate=output_sample_rate)
|
||||||
|
except:
|
||||||
|
print(f"Error with {file}")
|
||||||
|
continue
|
||||||
|
|
||||||
|
sep = separator.separate(wave)
|
||||||
|
vocals = sep['vocals']
|
||||||
|
bg = sep['accompaniment']
|
||||||
|
vmax = np.abs(vocals).mean()
|
||||||
|
bmax = np.abs(bg).mean()
|
||||||
|
|
||||||
|
# Only output to the "good" sample dir if the ratio of background noise to vocal noise is high enough.
|
||||||
|
ratio = vmax / (bmax+.0000001)
|
||||||
|
if ratio >= 25: # These values were derived empirically
|
||||||
|
od = output_dir
|
||||||
|
os = wave
|
||||||
|
elif ratio <= 1:
|
||||||
|
od = output_dir_bg
|
||||||
|
os = bg
|
||||||
|
|
||||||
|
# Strip out channels.
|
||||||
|
if len(os.shape) > 1:
|
||||||
|
os = os[:, 0] # Just use the first channel.
|
||||||
|
|
||||||
|
wavfile.write(osp.join(od, f'{e}_{file_basis}.wav'), output_sample_rate, os)
|
42
codes/scripts/audio/split_on_silence.py
Normal file
42
codes/scripts/audio/split_on_silence.py
Normal file
|
@ -0,0 +1,42 @@
|
||||||
|
import argparse
|
||||||
|
import logging
|
||||||
|
import os
|
||||||
|
from pydub import AudioSegment
|
||||||
|
from pydub.exceptions import CouldntDecodeError
|
||||||
|
from pydub.silence import split_on_silence
|
||||||
|
from data.util import find_audio_files
|
||||||
|
from tqdm import tqdm
|
||||||
|
|
||||||
|
|
||||||
|
# Uses pydub to process a directory of audio files, splitting them into clips at points where it detects a small amount
|
||||||
|
# of silence.
|
||||||
|
def main():
|
||||||
|
parser = argparse.ArgumentParser()
|
||||||
|
parser.add_argument('--path')
|
||||||
|
parser.add_argument('--out')
|
||||||
|
args = parser.parse_args()
|
||||||
|
minimum_duration = 5
|
||||||
|
maximum_duration = 20
|
||||||
|
files = find_audio_files(args.path, include_nonwav=True)
|
||||||
|
for e, wav_file in enumerate(tqdm(files)):
|
||||||
|
if e < 4197:
|
||||||
|
continue
|
||||||
|
print(f"Processing {wav_file}..")
|
||||||
|
outdir = os.path.join(args.out, f'{e}_{os.path.basename(wav_file[:-4])}').replace('.', '').strip()
|
||||||
|
os.makedirs(outdir, exist_ok=True)
|
||||||
|
|
||||||
|
try:
|
||||||
|
speech = AudioSegment.from_file(wav_file)
|
||||||
|
except CouldntDecodeError as e:
|
||||||
|
print(e)
|
||||||
|
continue
|
||||||
|
chunks = split_on_silence(speech, min_silence_len=300, silence_thresh=-40,
|
||||||
|
seek_step=100, keep_silence=50)
|
||||||
|
|
||||||
|
for i in range(0, len(chunks)):
|
||||||
|
if chunks[i].duration_seconds < minimum_duration or chunks[i].duration_seconds > maximum_duration:
|
||||||
|
continue
|
||||||
|
chunks[i].export(f"{outdir}/{i:05d}.wav", format='wav', parameters=["-ar", "22050", "-ac", "1"])
|
||||||
|
|
||||||
|
if __name__ == '__main__':
|
||||||
|
main()
|
|
@ -54,7 +54,7 @@ if __name__ == "__main__":
|
||||||
torch.backends.cudnn.benchmark = True
|
torch.backends.cudnn.benchmark = True
|
||||||
want_metrics = False
|
want_metrics = False
|
||||||
parser = argparse.ArgumentParser()
|
parser = argparse.ArgumentParser()
|
||||||
parser.add_argument('-opt', type=str, help='Path to options YAML file.', default='../options/test_stop_pred_dataset.yml')
|
parser.add_argument('-opt', type=str, help='Path to options YAML file.', default='../options/test_lrdvae_audio_clips.yml')
|
||||||
opt = option.parse(parser.parse_args().opt, is_train=False)
|
opt = option.parse(parser.parse_args().opt, is_train=False)
|
||||||
opt = option.dict_to_nonedict(opt)
|
opt = option.dict_to_nonedict(opt)
|
||||||
utils.util.loaded_options = opt
|
utils.util.loaded_options = opt
|
||||||
|
|
|
@ -44,7 +44,7 @@ if __name__ == "__main__":
|
||||||
torch.backends.cudnn.benchmark = True
|
torch.backends.cudnn.benchmark = True
|
||||||
want_metrics = False
|
want_metrics = False
|
||||||
parser = argparse.ArgumentParser()
|
parser = argparse.ArgumentParser()
|
||||||
parser.add_argument('-opt', type=str, help='Path to options YAML file.', default='../options/test_gpt_asr_mozcv.yml')
|
parser.add_argument('-opt', type=str, help='Path to options YAML file.', default='../options/test_gpt_asr_mass.yml')
|
||||||
opt = option.parse(parser.parse_args().opt, is_train=False)
|
opt = option.parse(parser.parse_args().opt, is_train=False)
|
||||||
opt = option.dict_to_nonedict(opt)
|
opt = option.dict_to_nonedict(opt)
|
||||||
utils.util.loaded_options = opt
|
utils.util.loaded_options = opt
|
||||||
|
|
Loading…
Reference in New Issue
Block a user