forked from mrq/DL-Art-School
misc
This commit is contained in:
parent
611316faab
commit
e5d97dfd56
11
codes/scripts/audio/play_with_spectral_representations.py
Normal file
11
codes/scripts/audio/play_with_spectral_representations.py
Normal file
|
@ -0,0 +1,11 @@
|
|||
import torchvision.utils
|
||||
|
||||
from utils.music_utils import music2mel, music2cqt
|
||||
from utils.util import load_audio
|
||||
|
||||
if __name__ == '__main__':
|
||||
clip = load_audio('Y:\\split\\yt-music-eval\\00001.wav', 22050)
|
||||
mel = music2mel(clip)
|
||||
cqt = music2cqt(clip)
|
||||
torchvision.utils.save_image((mel.unsqueeze(1) + 1) / 2, 'mel.png')
|
||||
torchvision.utils.save_image((cqt.unsqueeze(1) + 1) / 2, 'cqt.png')
|
|
@ -5,4 +5,14 @@ https://github.com/neonbjb/demucs
|
|||
conda activate demucs
|
||||
python setup.py install
|
||||
CUDA_VISIBLE_DEVICES=0 python -m demucs /y/split/bt-music-5 --out=/y/separated/bt-music-5 --num_workers=2 --device cuda --two-stems=vocals
|
||||
``
|
||||
```
|
||||
|
||||
Example usage of generate_long_cheaters and generate_long_mels, post demucs:
|
||||
|
||||
```
|
||||
CUDA_VISIBLE_DEVICES=0 python generate_long_mels.py --path=/y/separated/mpm/1 --progress_file=/y/separated/large_mels/mpm/already_processed.txt \
|
||||
--output_path=/y/separated/large_mels/mpm/1 --num_threads=2
|
||||
|
||||
CUDA_VISIBLE_DEVICES=2 python generate_long_cheaters.py --path=/y/separated/large_mels/mpm/3 --progress_file=/y/separated/large_mel_cheaters/mpm/already_processed.txt \
|
||||
--output_path=/y/separated/large_mel_cheaters/mpm/3 --num_threads=1
|
||||
```
|
|
@ -47,9 +47,9 @@ def process_file(file, base_path, output_path, progress_file, duration_per_clip,
|
|||
|
||||
if __name__ == '__main__':
|
||||
parser = argparse.ArgumentParser()
|
||||
parser.add_argument('-path', type=str, help='Path to search for files', default='C:\\Users\\James\\Downloads\\soundcloud-dl\\sc2')
|
||||
parser.add_argument('-progress_file', type=str, help='Place to store all files that have already been processed', default='C:\\Users\\James\\Downloads\\soundcloud-dl\\sc2\\already_processed.txt')
|
||||
parser.add_argument('-output_path', type=str, help='Path for output files', default='Y:\\split\\soundcloud_mixes\\bigmix1')
|
||||
parser.add_argument('-path', type=str, help='Path to search for files', default='Y:\\sources\\soundcloud-mixes\\mixes2')
|
||||
parser.add_argument('-progress_file', type=str, help='Place to store all files that have already been processed', default='Y:\\sources\\soundcloud-mixes\\mixes2\\already_processed.txt')
|
||||
parser.add_argument('-output_path', type=str, help='Path for output files', default='Y:\\split\\soundcloud-mixes2')
|
||||
parser.add_argument('-num_threads', type=int, help='Number of concurrent workers processing files.', default=4)
|
||||
parser.add_argument('-duration', type=int, help='Duration per clip in seconds', default=30)
|
||||
args = parser.parse_args()
|
||||
|
|
|
@ -1,6 +1,31 @@
|
|||
import torch
|
||||
|
||||
|
||||
def music2mel(clip):
|
||||
if len(clip.shape) == 1:
|
||||
clip = clip.unsqueeze(0)
|
||||
|
||||
from trainer.injectors.audio_injectors import TorchMelSpectrogramInjector
|
||||
inj = TorchMelSpectrogramInjector({'n_mel_channels': 256, 'mel_fmax': 11000, 'filter_length': 16000,
|
||||
'normalize': True, 'true_normalization': True, 'in': 'in', 'out': 'out'}, {})
|
||||
return inj({'in': clip})['out']
|
||||
|
||||
|
||||
def music2cqt(clip):
|
||||
def normalize_cqt(cqt):
|
||||
# CQT_MIN = 0
|
||||
CQT_MAX = 18
|
||||
return 2 * cqt / CQT_MAX - 1
|
||||
|
||||
if len(clip.shape) == 1:
|
||||
clip = clip.unsqueeze(0)
|
||||
from nnAudio.features.cqt import CQT
|
||||
# Visually, filter_scale=.25 seems to be the most descriptive representation, but loses frequency fidelity.
|
||||
# It may be desirable to mix filter_scale=.25 with filter_scale=1.
|
||||
cqt = CQT(sr=22050, hop_length=256, n_bins=256, bins_per_octave=32, filter_scale=.25, norm=1, verbose=False)
|
||||
return normalize_cqt(cqt(clip))
|
||||
|
||||
|
||||
def get_mel2wav_model():
|
||||
from models.audio.music.unet_diffusion_waveform_gen_simple import DiffusionWaveformGen
|
||||
model = DiffusionWaveformGen(model_channels=256, in_channels=16, in_mel_channels=256, out_channels=32, channel_mult=[1,2,3,4,4],
|
||||
|
|
Loading…
Reference in New Issue
Block a user