86fd3ad7fd
These two are tested, full support for training to come.
93 lines
2.5 KiB
Python
93 lines
2.5 KiB
Python
import torch
|
|
import numpy as np
|
|
from scipy.signal import get_window
|
|
import librosa.util as librosa_util
|
|
|
|
|
|
def window_sumsquare(window, n_frames, hop_length=200, win_length=800,
|
|
n_fft=800, dtype=np.float32, norm=None):
|
|
"""
|
|
# from librosa 0.6
|
|
Compute the sum-square envelope of a window function at a given hop length.
|
|
|
|
This is used to estimate modulation effects induced by windowing
|
|
observations in short-time fourier transforms.
|
|
|
|
Parameters
|
|
----------
|
|
window : string, tuple, number, callable, or list-like
|
|
Window specification, as in `get_window`
|
|
|
|
n_frames : int > 0
|
|
The number of analysis frames
|
|
|
|
hop_length : int > 0
|
|
The number of samples to advance between frames
|
|
|
|
win_length : [optional]
|
|
The length of the window function. By default, this matches `n_fft`.
|
|
|
|
n_fft : int > 0
|
|
The length of each analysis frame.
|
|
|
|
dtype : np.dtype
|
|
The data type of the output
|
|
|
|
Returns
|
|
-------
|
|
wss : np.ndarray, shape=`(n_fft + hop_length * (n_frames - 1))`
|
|
The sum-squared envelope of the window function
|
|
"""
|
|
if win_length is None:
|
|
win_length = n_fft
|
|
|
|
n = n_fft + hop_length * (n_frames - 1)
|
|
x = np.zeros(n, dtype=dtype)
|
|
|
|
# Compute the squared window at the desired length
|
|
win_sq = get_window(window, win_length, fftbins=True)
|
|
win_sq = librosa_util.normalize(win_sq, norm=norm)**2
|
|
win_sq = librosa_util.pad_center(win_sq, n_fft)
|
|
|
|
# Fill the envelope
|
|
for i in range(n_frames):
|
|
sample = i * hop_length
|
|
x[sample:min(n, sample + n_fft)] += win_sq[:max(0, min(n_fft, n - sample))]
|
|
return x
|
|
|
|
|
|
def griffin_lim(magnitudes, stft_fn, n_iters=30):
|
|
"""
|
|
PARAMS
|
|
------
|
|
magnitudes: spectrogram magnitudes
|
|
stft_fn: STFT class with transform (STFT) and inverse (ISTFT) methods
|
|
"""
|
|
|
|
angles = np.angle(np.exp(2j * np.pi * np.random.rand(*magnitudes.size())))
|
|
angles = angles.astype(np.float32)
|
|
angles = torch.autograd.Variable(torch.from_numpy(angles))
|
|
signal = stft_fn.inverse(magnitudes, angles).squeeze(1)
|
|
|
|
for i in range(n_iters):
|
|
_, angles = stft_fn.transform(signal)
|
|
signal = stft_fn.inverse(magnitudes, angles).squeeze(1)
|
|
return signal
|
|
|
|
|
|
def dynamic_range_compression(x, C=1, clip_val=1e-5):
|
|
"""
|
|
PARAMS
|
|
------
|
|
C: compression factor
|
|
"""
|
|
return torch.log(torch.clamp(x, min=clip_val) * C)
|
|
|
|
|
|
def dynamic_range_decompression(x, C=1):
|
|
"""
|
|
PARAMS
|
|
------
|
|
C: compression factor used to compress
|
|
"""
|
|
return torch.exp(x) / C |