forked from mrq/DL-Art-School
Constrain wav_aug a bit more
This commit is contained in:
parent
f126040da2
commit
62c7570512
|
@ -23,7 +23,9 @@ class WavAugmentor:
|
||||||
pass
|
pass
|
||||||
|
|
||||||
def augment(self, wav, sample_rate):
|
def augment(self, wav, sample_rate):
|
||||||
speed_effect = ['speed', rdstr(.7, 1)]
|
speed_effect = ['speed', rdstr(.8, 1)]
|
||||||
|
'''
|
||||||
|
Band effects are disabled until I can audit them better.
|
||||||
band_effects = [
|
band_effects = [
|
||||||
['reverb', '-w'],
|
['reverb', '-w'],
|
||||||
['reverb'],
|
['reverb'],
|
||||||
|
@ -39,15 +41,16 @@ class WavAugmentor:
|
||||||
['sinc', '3k-4k']
|
['sinc', '3k-4k']
|
||||||
]
|
]
|
||||||
band_effect = random.choice(band_effects)
|
band_effect = random.choice(band_effects)
|
||||||
|
'''
|
||||||
volume_effects = [
|
volume_effects = [
|
||||||
['loudness', rdi(10,-2)],
|
['loudness', rdi(10,-2)],
|
||||||
['overdrive', rdi(20,0), rdi(20,0)],
|
['overdrive', rdi(20,0), rdi(20,0)],
|
||||||
]
|
]
|
||||||
vol_effect = random.choice(volume_effects)
|
vol_effect = random.choice(volume_effects)
|
||||||
effects = [speed_effect, band_effect, vol_effect]
|
effects = [speed_effect, vol_effect]
|
||||||
out, sr = torchaudio.sox_effects.apply_effects_tensor(wav, sample_rate, effects)
|
out, sr = torchaudio.sox_effects.apply_effects_tensor(wav, sample_rate, effects)
|
||||||
# Add a variable amount of noise
|
# Add a variable amount of noise
|
||||||
out = out + torch.rand_like(out) * random.random() * .05
|
out = out + torch.rand_like(out) * random.random() * .03
|
||||||
return out
|
return out
|
||||||
|
|
||||||
|
|
||||||
|
|
|
@ -51,7 +51,7 @@ if __name__ == "__main__":
|
||||||
torch.backends.cudnn.benchmark = True
|
torch.backends.cudnn.benchmark = True
|
||||||
want_metrics = False
|
want_metrics = False
|
||||||
parser = argparse.ArgumentParser()
|
parser = argparse.ArgumentParser()
|
||||||
parser.add_argument('-opt', type=str, help='Path to options YAML file.', default='../options/test_vqvae_audio_lj.yml')
|
parser.add_argument('-opt', type=str, help='Path to options YAML file.', default='../options/test_gpt_tts_lj.yml')
|
||||||
opt = option.parse(parser.parse_args().opt, is_train=False)
|
opt = option.parse(parser.parse_args().opt, is_train=False)
|
||||||
opt = option.dict_to_nonedict(opt)
|
opt = option.dict_to_nonedict(opt)
|
||||||
utils.util.loaded_options = opt
|
utils.util.loaded_options = opt
|
||||||
|
|
|
@ -20,12 +20,13 @@ if __name__ == '__main__':
|
||||||
clip = clip[:,0]
|
clip = clip[:,0]
|
||||||
clip = clip[:window].unsqueeze(0)
|
clip = clip[:window].unsqueeze(0)
|
||||||
clip = clip / 32768.0 # Normalize
|
clip = clip / 32768.0 # Normalize
|
||||||
|
clip = clip + torch.rand_like(clip) * .03 # Noise (this is how the model was trained)
|
||||||
assert sr == 24000
|
assert sr == 24000
|
||||||
clips.append(clip)
|
clips.append(clip)
|
||||||
clips = torch.stack(clips, dim=0)
|
clips = torch.stack(clips, dim=0)
|
||||||
|
|
||||||
resnet = resnet34()
|
resnet = resnet34()
|
||||||
sd = torch.load('../experiments/train_byol_audio_clips/models/66000_generator.pth')
|
sd = torch.load('../experiments/train_byol_audio_clips/models/57000_generator.pth')
|
||||||
sd = extract_byol_model_from_state_dict(sd)
|
sd = extract_byol_model_from_state_dict(sd)
|
||||||
resnet.load_state_dict(sd)
|
resnet.load_state_dict(sd)
|
||||||
embedding = resnet(clips, return_pool=True)
|
embedding = resnet(clips, return_pool=True)
|
||||||
|
|
Loading…
Reference in New Issue
Block a user