forked from mrq/DL-Art-School
Make spleeter filter simpler (and hopefully much faster)
This commit is contained in:
parent
359e9e27a7
commit
97ea329a59
|
@ -12,13 +12,12 @@ from scripts.audio.preparation.spleeter_dataset import SpleeterDataset
|
|||
|
||||
def main():
|
||||
src_dir = 'F:\\split\\joe_rogan'
|
||||
output_dir = 'F:\\split\\cleaned\\joe_rogan'
|
||||
output_dir_bg = 'F:\\split\\background-noise\\joe_rogan'
|
||||
output_sample_rate=22050
|
||||
batch_size=16
|
||||
|
||||
dl = DataLoader(SpleeterDataset(src_dir, output_sample_rate, skip=batch_size*33000), batch_size=batch_size, shuffle=False, num_workers=1, pin_memory=True)
|
||||
separator = Separator('pretrained_models/2stems', input_sr=output_sample_rate)
|
||||
unacceptable_files = open('unacceptable.txt', 'a')
|
||||
for batch in tqdm(dl):
|
||||
waves = batch['wave']
|
||||
paths = batch['path']
|
||||
|
@ -26,7 +25,6 @@ def main():
|
|||
|
||||
sep = separator.separate(waves)
|
||||
for j in range(sep['vocals'].shape[0]):
|
||||
wave = waves[j].cpu().numpy()[:durations[j]]
|
||||
vocals = sep['vocals'][j][:durations[j]]
|
||||
bg = sep['accompaniment'][j][:durations[j]]
|
||||
vmax = np.abs(vocals[output_sample_rate:-output_sample_rate]).mean()
|
||||
|
@ -34,27 +32,10 @@ def main():
|
|||
|
||||
# Only output to the "good" sample dir if the ratio of background noise to vocal noise is high enough.
|
||||
ratio = vmax / (bmax+.0000001)
|
||||
if ratio >= 4: # These values were derived empirically
|
||||
od = output_dir
|
||||
out_sound = wave
|
||||
elif ratio <= 2:
|
||||
od = output_dir_bg
|
||||
out_sound = bg
|
||||
else:
|
||||
print(f"Reject {paths[j]}: {ratio}")
|
||||
continue
|
||||
|
||||
# Strip out channels.
|
||||
if len(out_sound.shape) > 1:
|
||||
out_sound = out_sound[:, 0] # Just use the first channel.
|
||||
|
||||
# Compile an output path.
|
||||
path = paths[j]
|
||||
reld = str(os.path.relpath(os.path.dirname(path), src_dir)).strip()
|
||||
os.makedirs(os.path.join(od, reld), exist_ok=True)
|
||||
output_path = os.path.join(od, reld, os.path.basename(path))
|
||||
|
||||
wavfile.write(output_path, output_sample_rate, out_sound)
|
||||
if ratio < 4: # These values were derived empirically
|
||||
unacceptable_files.write(f'{paths[j]}\n')
|
||||
unacceptable_files.flush()
|
||||
unacceptable_files.close()
|
||||
|
||||
|
||||
# Uses torch spleeter to divide audio clips into one of two bins:
|
||||
|
|
Loading…
Reference in New Issue
Block a user