forked from mrq/DL-Art-School
data and prep improvements
This commit is contained in:
parent
1e87b934db
commit
896accb71f
|
@ -9,6 +9,7 @@ from utils.util import opt_get
|
||||||
|
|
||||||
def create_dataloader(dataset, dataset_opt, opt=None, sampler=None, collate_fn=None, shuffle=True):
|
def create_dataloader(dataset, dataset_opt, opt=None, sampler=None, collate_fn=None, shuffle=True):
|
||||||
phase = dataset_opt['phase']
|
phase = dataset_opt['phase']
|
||||||
|
pin_memory = opt_get(dataset_opt, ['pin_memory'], True)
|
||||||
if phase == 'train':
|
if phase == 'train':
|
||||||
if opt_get(opt, ['dist'], False):
|
if opt_get(opt, ['dist'], False):
|
||||||
world_size = torch.distributed.get_world_size()
|
world_size = torch.distributed.get_world_size()
|
||||||
|
@ -20,11 +21,11 @@ def create_dataloader(dataset, dataset_opt, opt=None, sampler=None, collate_fn=N
|
||||||
batch_size = dataset_opt['batch_size']
|
batch_size = dataset_opt['batch_size']
|
||||||
return torch.utils.data.DataLoader(dataset, batch_size=batch_size, shuffle=shuffle,
|
return torch.utils.data.DataLoader(dataset, batch_size=batch_size, shuffle=shuffle,
|
||||||
num_workers=num_workers, sampler=sampler, drop_last=True,
|
num_workers=num_workers, sampler=sampler, drop_last=True,
|
||||||
pin_memory=True, collate_fn=collate_fn)
|
pin_memory=pin_memory, collate_fn=collate_fn)
|
||||||
else:
|
else:
|
||||||
batch_size = dataset_opt['batch_size'] or 1
|
batch_size = dataset_opt['batch_size'] or 1
|
||||||
return torch.utils.data.DataLoader(dataset, batch_size=batch_size, shuffle=False, num_workers=0,
|
return torch.utils.data.DataLoader(dataset, batch_size=batch_size, shuffle=False, num_workers=0,
|
||||||
pin_memory=True, collate_fn=collate_fn)
|
pin_memory=pin_memory, collate_fn=collate_fn)
|
||||||
|
|
||||||
|
|
||||||
def create_dataset(dataset_opt, return_collate=False):
|
def create_dataset(dataset_opt, return_collate=False):
|
||||||
|
|
|
@ -4,6 +4,7 @@ import os
|
||||||
import shutil
|
import shutil
|
||||||
import sys
|
import sys
|
||||||
from multiprocessing.pool import ThreadPool
|
from multiprocessing.pool import ThreadPool
|
||||||
|
from random import shuffle
|
||||||
|
|
||||||
import torch
|
import torch
|
||||||
import torch.nn as nn
|
import torch.nn as nn
|
||||||
|
@ -53,57 +54,61 @@ class AudioFolderDataset(torch.utils.data.Dataset):
|
||||||
def process_folder(folder, output_path, base_path, progress_file, max_files):
|
def process_folder(folder, output_path, base_path, progress_file, max_files):
|
||||||
classifier = load_model_from_config(args.classifier_model_opt, model_name='classifier', also_load_savepoint=True).cuda().eval()
|
classifier = load_model_from_config(args.classifier_model_opt, model_name='classifier', also_load_savepoint=True).cuda().eval()
|
||||||
dataset = AudioFolderDataset(folder, sampling_rate=22050, pad_to=600000)
|
dataset = AudioFolderDataset(folder, sampling_rate=22050, pad_to=600000)
|
||||||
|
if len(dataset) == 0:
|
||||||
|
return
|
||||||
dataloader = DataLoader(dataset, batch_size=32, shuffle=True, num_workers=2, pin_memory=True)
|
dataloader = DataLoader(dataset, batch_size=32, shuffle=True, num_workers=2, pin_memory=True)
|
||||||
spec_injector = MelSpectrogramInjector({'in': 'clip', 'out': 'mel'}, {})
|
spec_injector = MelSpectrogramInjector({'in': 'clip', 'out': 'mel'}, {})
|
||||||
|
|
||||||
with torch.no_grad():
|
with torch.no_grad():
|
||||||
total_count = 0
|
total_count = 0
|
||||||
for batch in tqdm(dataloader):
|
for batch in tqdm(dataloader):
|
||||||
max_len = max(batch['samples'])
|
try:
|
||||||
clips = batch['clip'][:, :max_len].cuda()
|
max_len = max(batch['samples'])
|
||||||
paths = batch['path']
|
clips = batch['clip'][:, :max_len].cuda()
|
||||||
mels = spec_injector({'clip': clips})['mel']
|
paths = batch['path']
|
||||||
padding = ceil_multiple(mels.shape[-1], 16)
|
mels = spec_injector({'clip': clips})['mel']
|
||||||
mels = F.pad(mels, (0, padding))
|
|
||||||
|
|
||||||
def get_spec_mags(clip):
|
def get_spec_mags(clip):
|
||||||
stft = torch.stft(clip, n_fft=22000, hop_length=1024, return_complex=True)
|
stft = torch.stft(clip, n_fft=22000, hop_length=1024, return_complex=True)
|
||||||
stft = stft[0, -2000:, :]
|
stft = stft[0, -2000:, :]
|
||||||
return (stft.real ** 2 + stft.imag ** 2).sqrt()
|
return (stft.real ** 2 + stft.imag ** 2).sqrt()
|
||||||
no_hifreq_data = get_spec_mags(clips).mean(dim=1) < .15
|
no_hifreq_data = get_spec_mags(clips).mean(dim=1) < .15
|
||||||
if torch.all(no_hifreq_data):
|
if torch.all(no_hifreq_data):
|
||||||
continue
|
|
||||||
|
|
||||||
labels = torch.argmax(classifier(mels), dim=-1)
|
|
||||||
|
|
||||||
for b in range(clips.shape[0]):
|
|
||||||
if no_hifreq_data[b]:
|
|
||||||
continue
|
continue
|
||||||
if labels[b] != 0:
|
|
||||||
continue
|
labels = torch.argmax(classifier(mels), dim=-1)
|
||||||
dirpath = paths[b].replace(os.path.basename(paths[b]), "")
|
|
||||||
path = os.path.relpath(dirpath, base_path)
|
for b in range(clips.shape[0]):
|
||||||
opath = os.path.join(output_path, path)
|
if no_hifreq_data[b]:
|
||||||
os.makedirs(opath, exist_ok=True)
|
continue
|
||||||
shutil.copy(paths[b], opath)
|
if labels[b] != 0:
|
||||||
total_count += 1
|
continue
|
||||||
|
dirpath = paths[b].replace(os.path.basename(paths[b]), "")
|
||||||
|
path = os.path.relpath(dirpath, base_path)
|
||||||
|
opath = os.path.join(output_path, path)
|
||||||
|
os.makedirs(opath, exist_ok=True)
|
||||||
|
shutil.copy(paths[b], opath)
|
||||||
|
total_count += 1
|
||||||
|
if total_count >= max_files:
|
||||||
|
break
|
||||||
if total_count >= max_files:
|
if total_count >= max_files:
|
||||||
break
|
break
|
||||||
if total_count >= max_files:
|
except:
|
||||||
break
|
print("Exception encountered. Will ignore and continue. Exception info follows.")
|
||||||
|
print(sys.exc_info())
|
||||||
|
|
||||||
with open(progress_file, 'a', encoding='utf-8') as pf:
|
with open(progress_file, 'a', encoding='utf-8') as pf:
|
||||||
pf.write(output_path + "\n")
|
pf.write(folder + "\n")
|
||||||
|
|
||||||
|
|
||||||
if __name__ == '__main__':
|
if __name__ == '__main__':
|
||||||
parser = argparse.ArgumentParser()
|
parser = argparse.ArgumentParser()
|
||||||
parser.add_argument('-path', type=str, help='Path to search for split files (should be the direct output of phase 1)',
|
parser.add_argument('-path', type=str, help='Path to search for split files (should be the direct output of phase 1)',
|
||||||
default='Y:\\split\\big_podcast')
|
default='Y:\\split\\big_podcast')
|
||||||
parser.add_argument('-progress_file', type=str, help='Place to store all folders that have already been processed', default='Y:\\split\\big_podcast\\already_processed.txt')
|
parser.add_argument('-progress_file', type=str, help='Place to store all folders that have already been processed', default='Y:\\filtered\\big_podcast\\already_processed.txt')
|
||||||
parser.add_argument('-output_path', type=str, help='Path where sampled&filtered files are sent', default='Y:\\filtered\\big_podcast')
|
parser.add_argument('-output_path', type=str, help='Path where sampled&filtered files are sent', default='Y:\\filtered\\big_podcast')
|
||||||
parser.add_argument('-num_threads', type=int, help='Number of concurrent workers processing files.', default=1)
|
parser.add_argument('-num_threads', type=int, help='Number of concurrent workers processing files.', default=6)
|
||||||
parser.add_argument('-max_samples_per_folder', type=int, help='Maximum number of clips that can be extracted from each folder.', default=2000)
|
parser.add_argument('-max_samples_per_folder', type=int, help='Maximum number of clips that can be extracted from each folder.', default=1000)
|
||||||
parser.add_argument('-classifier_model_opt', type=str, help='Train/test options file that configures the model used to classify the audio clips.',
|
parser.add_argument('-classifier_model_opt', type=str, help='Train/test options file that configures the model used to classify the audio clips.',
|
||||||
default='../options/test_noisy_audio_clips_classifier.yml')
|
default='../options/test_noisy_audio_clips_classifier.yml')
|
||||||
args = parser.parse_args()
|
args = parser.parse_args()
|
||||||
|
@ -114,12 +119,13 @@ if __name__ == '__main__':
|
||||||
fullpath = os.path.join(args.path, cast_dir)
|
fullpath = os.path.join(args.path, cast_dir)
|
||||||
if os.path.isdir(fullpath):
|
if os.path.isdir(fullpath):
|
||||||
all_split_files.append(fullpath)
|
all_split_files.append(fullpath)
|
||||||
|
shuffle(all_split_files)
|
||||||
all_split_files = set(all_split_files)
|
all_split_files = set(all_split_files)
|
||||||
|
|
||||||
# Load the already processed files, if present, and get the set difference.
|
# Load the already processed files, if present, and get the set difference.
|
||||||
if os.path.exists(args.progress_file):
|
if os.path.exists(args.progress_file):
|
||||||
with open(args.progress_file, 'r', encoding='utf-8') as pf:
|
with open(args.progress_file, 'r', encoding='utf-8') as pf:
|
||||||
processed = set(pf.readlines())
|
processed = set([l.strip() for l in pf.readlines()])
|
||||||
orig_len = len(all_split_files)
|
orig_len = len(all_split_files)
|
||||||
all_split_files = all_split_files - processed
|
all_split_files = all_split_files - processed
|
||||||
print(f'All folders: {orig_len}, processed files: {len(processed)}; {len(all_split_files)/orig_len}% of files remain to be processed.')
|
print(f'All folders: {orig_len}, processed files: {len(processed)}; {len(all_split_files)/orig_len}% of files remain to be processed.')
|
||||||
|
|
|
@ -18,5 +18,5 @@ if __name__ == '__main__':
|
||||||
'''
|
'''
|
||||||
|
|
||||||
# Build tokenizer vocab
|
# Build tokenizer vocab
|
||||||
mapping = tacotron_symbol_mapping()
|
#mapping = tacotron_symbol_mapping()
|
||||||
print(json.dumps(mapping))
|
#print(json.dumps(mapping))
|
Loading…
Reference in New Issue
Block a user