diff --git a/codes/data/audio/wavfile_dataset.py b/codes/data/audio/wavfile_dataset.py index d8c73415..f77ac7a5 100644 --- a/codes/data/audio/wavfile_dataset.py +++ b/codes/data/audio/wavfile_dataset.py @@ -15,13 +15,17 @@ from utils.util import opt_get class WavfileDataset(torch.utils.data.Dataset): def __init__(self, opt): + cache_path = opt_get(opt, ['cache_path'], os.path.join(self.path, 'cache.pth')) # Will fail when multiple paths specified, must be specified in this case. self.path = os.path.dirname(opt['path']) - cache_path = os.path.join(self.path, 'cache.pth') + if not isinstance(self.path, list): + self.path = [self.path] if os.path.exists(cache_path): self.audiopaths = torch.load(cache_path) else: print("Building cache..") - self.audiopaths = find_files_of_type('img', opt['path'], qualifier=is_wav_file)[0] + self.audiopaths = [] + for p in self.path: + self.audiopaths.extend(find_files_of_type('img', p, qualifier=is_wav_file)[0]) torch.save(self.audiopaths, cache_path) # Parse options diff --git a/codes/scripts/audio/random_mp3_splitter.py b/codes/scripts/audio/random_mp3_splitter.py index 9170f1b6..01f52638 100644 --- a/codes/scripts/audio/random_mp3_splitter.py +++ b/codes/scripts/audio/random_mp3_splitter.py @@ -23,7 +23,7 @@ if __name__ == '__main__': separator = Separator('spleeter:2stems') files = find_audio_files(src_dir, include_nonwav=True) for e, file in enumerate(tqdm(files)): - if e < 575: + if e < 3055: continue file_basis = osp.relpath(file, src_dir)\ .replace('/', '_')\ @@ -56,7 +56,7 @@ if __name__ == '__main__': bmax = np.abs(bg).mean() # Only output to the "good" sample dir if the ratio of background noise to vocal noise is high enough. - ratio = vmax / bmax + ratio = vmax / (bmax+.0000001) if ratio >= 25: # These values were derived empirically od = output_dir os = clip