DL-Art-School/codes/data/audio/wav_aug.py

63 lines
1.8 KiB
Python
Raw Normal View History

import random
import torch
import torchaudio.sox_effects
2022-03-16 03:43:00 +00:00
from models.audio.tts.tacotron2.taco_utils import load_wav_to_torch
# Returns random double on [l,h] as a string
def rdstr(l=0,h=1):
assert h > l
i=h-l
return str(random.random() * i + l)
# Returns a randint on [s,e] as a string
def rdi(e, s=0):
return str(random.randint(s,e))
class WavAugmentor:
def __init__(self):
pass
def augment(self, wav, sample_rate):
2021-08-06 14:19:38 +00:00
speed_effect = ['speed', rdstr(.8, 1)]
'''
Band effects are disabled until I can audit them better.
band_effects = [
['reverb', '-w'],
['reverb'],
['band', rdi(8000, 3000), rdi(1000, 100)],
['bandpass', rdi(8000, 3000), rdi(1000, 100)],
['bass', rdi(20,-20)],
['treble', rdi(20,-20)],
['dither'],
['equalizer', rdi(3000, 100), rdi(1000, 100), rdi(10, -10)],
['hilbert'],
['sinc', '3k'],
['sinc', '-4k'],
['sinc', '3k-4k']
]
band_effect = random.choice(band_effects)
2021-08-06 14:19:38 +00:00
'''
volume_effects = [
2021-08-06 05:24:38 +00:00
['loudness', rdi(10,-2)],
['overdrive', rdi(20,0), rdi(20,0)],
]
vol_effect = random.choice(volume_effects)
2021-08-06 14:19:38 +00:00
effects = [speed_effect, vol_effect]
2021-08-06 05:24:38 +00:00
out, sr = torchaudio.sox_effects.apply_effects_tensor(wav, sample_rate, effects)
# Add a variable amount of noise
2021-08-06 14:19:38 +00:00
out = out + torch.rand_like(out) * random.random() * .03
return out
if __name__ == '__main__':
sample, _ = load_wav_to_torch('obama1.wav')
2021-08-06 05:12:59 +00:00
sample = sample / 32768.0
aug = WavAugmentor()
for j in range(10):
out = aug.augment(sample, 24000)
torchaudio.save(f'out{j}.wav', out, 24000)