forked from mrq/DL-Art-School
Improvements to splitter
This commit is contained in:
parent
742f9b4010
commit
76e2c497f7
|
@ -29,4 +29,4 @@ class Separator:
|
||||||
return {
|
return {
|
||||||
'vocals': res[0].cpu().numpy(),
|
'vocals': res[0].cpu().numpy(),
|
||||||
'accompaniment': res[1].cpu().numpy()
|
'accompaniment': res[1].cpu().numpy()
|
||||||
}
|
}
|
||||||
|
|
|
@ -14,6 +14,7 @@ def main():
|
||||||
src_dir = 'F:\\split\\podcast-dump0'
|
src_dir = 'F:\\split\\podcast-dump0'
|
||||||
output_dir = 'F:\\tmp\\out'
|
output_dir = 'F:\\tmp\\out'
|
||||||
output_dir_bg = 'F:\\tmp\\bg'
|
output_dir_bg = 'F:\\tmp\\bg'
|
||||||
|
output_dir_reject = 'F:\\tmp\\rejected'
|
||||||
output_sample_rate=22050
|
output_sample_rate=22050
|
||||||
batch_size=24
|
batch_size=24
|
||||||
|
|
||||||
|
@ -28,34 +29,34 @@ def main():
|
||||||
|
|
||||||
sep = separator.separate(waves)
|
sep = separator.separate(waves)
|
||||||
for j in range(sep['vocals'].shape[0]):
|
for j in range(sep['vocals'].shape[0]):
|
||||||
vocals = sep['vocals'][j]
|
wave = waves[j].cpu().numpy()[:durations[j]]
|
||||||
bg = sep['accompaniment'][j]
|
vocals = sep['vocals'][j][:durations[j]]
|
||||||
vmax = np.abs(vocals).mean()
|
bg = sep['accompaniment'][j][:durations[j]]
|
||||||
bmax = np.abs(bg).mean()
|
vmax = np.abs(vocals[output_sample_rate:-output_sample_rate]).mean()
|
||||||
|
bmax = np.abs(bg[output_sample_rate:-output_sample_rate]).mean()
|
||||||
|
|
||||||
# Only output to the "good" sample dir if the ratio of background noise to vocal noise is high enough.
|
# Only output to the "good" sample dir if the ratio of background noise to vocal noise is high enough.
|
||||||
ratio = vmax / (bmax+.0000001)
|
ratio = vmax / (bmax+.0000001)
|
||||||
if ratio >= 25: # These values were derived empirically
|
if ratio >= 4: # These values were derived empirically
|
||||||
od = output_dir
|
od = output_dir
|
||||||
out_sound = waves[j].cpu().numpy()
|
out_sound = wave
|
||||||
elif ratio <= 1:
|
elif ratio <= 2:
|
||||||
od = output_dir_bg
|
od = output_dir_bg
|
||||||
out_sound = bg
|
out_sound = bg
|
||||||
else:
|
else:
|
||||||
continue
|
print(f"Reject {paths[j]}: {ratio}")
|
||||||
|
od = output_dir_reject
|
||||||
|
out_sound = wave
|
||||||
|
|
||||||
# Strip out channels.
|
# Strip out channels.
|
||||||
if len(out_sound.shape) > 1:
|
if len(out_sound.shape) > 1:
|
||||||
out_sound = out_sound[:, 0] # Just use the first channel.
|
out_sound = out_sound[:, 0] # Just use the first channel.
|
||||||
# Resize to true duration
|
|
||||||
out_sound = out_sound[:durations[j]]
|
|
||||||
|
|
||||||
# Compile an output path.
|
# Compile an output path.
|
||||||
path = paths[j]
|
path = paths[j]
|
||||||
reld = os.path.relpath(os.path.dirname(path), src_dir)
|
reld = str(os.path.relpath(os.path.dirname(path), src_dir)).strip()
|
||||||
os.makedirs(os.path.join(od, reld), exist_ok=True)
|
os.makedirs(os.path.join(od, reld), exist_ok=True)
|
||||||
relp = os.path.relpath(path, src_dir)
|
output_path = os.path.join(od, reld, os.path.basename(path))
|
||||||
output_path = os.path.join(od, relp)
|
|
||||||
|
|
||||||
wavfile.write(output_path, output_sample_rate, out_sound)
|
wavfile.write(output_path, output_sample_rate, out_sound)
|
||||||
|
|
||||||
|
|
Loading…
Reference in New Issue
Block a user