forked from mrq/DL-Art-School
misc
This commit is contained in:
parent
963f0e9cee
commit
be5f052255
15
codes/scripts/audio/gen/w2v_patcher.py
Normal file
15
codes/scripts/audio/gen/w2v_patcher.py
Normal file
|
@ -0,0 +1,15 @@
|
||||||
|
import torch
|
||||||
|
|
||||||
|
from utils.util import load_model_from_config
|
||||||
|
|
||||||
|
if __name__ == '__main__':
|
||||||
|
config = "D:\\dlas\\options\\train_wav2vec_matcher.yml"
|
||||||
|
model_name = "generator"
|
||||||
|
model_path = "D:\dlas\experiments\train_wav2vec_matcher\models"
|
||||||
|
wav_dump_path = "FIXME"
|
||||||
|
|
||||||
|
model = load_model_from_config(config, model_name, also_load_savepoint=False, load_path=model_path, device='cuda').eval()
|
||||||
|
w2v_logits, audio_samples = torch.load(wav_dump_path)
|
||||||
|
|
||||||
|
w2v_logits_chunked = torch.chunk(w2v_logits, 32)
|
||||||
|
for chunk in w2v_logits_chunked:
|
|
@ -106,8 +106,8 @@ if __name__ == '__main__':
|
||||||
"""
|
"""
|
||||||
parser = argparse.ArgumentParser()
|
parser = argparse.ArgumentParser()
|
||||||
parser.add_argument('-o', type=str, help='Path to the options YAML file used to train the CLIP model', default='../options/train_voice_voice_clip.yml')
|
parser.add_argument('-o', type=str, help='Path to the options YAML file used to train the CLIP model', default='../options/train_voice_voice_clip.yml')
|
||||||
parser.add_argument('--num_workers', type=int, help='Number concurrent processes to use', default=2)
|
parser.add_argument('--num_workers', type=int, help='Number concurrent processes to use', default=4)
|
||||||
parser.add_argument('--root_path', type=str, help='Root path to search for audio directories from', default='Y:\\filtered\\big_podcast')
|
parser.add_argument('--root_path', type=str, help='Root path to search for audio directories from', default='Y:\\filtered\\youtube')
|
||||||
parser.add_argument('--clip_size', type=int, help='Amount of audio samples to pull from each file', default=22050)
|
parser.add_argument('--clip_size', type=int, help='Amount of audio samples to pull from each file', default=22050)
|
||||||
args = parser.parse_args()
|
args = parser.parse_args()
|
||||||
|
|
||||||
|
|
|
@ -265,11 +265,11 @@ if __name__ == '__main__':
|
||||||
if __name__ == '__main__':
|
if __name__ == '__main__':
|
||||||
from utils.util import load_model_from_config
|
from utils.util import load_model_from_config
|
||||||
|
|
||||||
diffusion = load_model_from_config('X:\\dlas\\experiments\\train_diffusion_tts9_mel.yml', 'generator',
|
diffusion = load_model_from_config('X:\\dlas\\experiments\\train_diffusion_tts_mel_flat.yml', 'generator',
|
||||||
also_load_savepoint=False,
|
also_load_savepoint=False,
|
||||||
load_path='X:\\dlas\\experiments\\train_diffusion_tts9_mel\\models\\47500_generator_ema.pth').cuda()
|
load_path='X:\\dlas\\experiments\\train_diffusion_tts_mel_flat\\models\\6500_generator.pth').cuda()
|
||||||
opt_eval = {'eval_tsv': 'Y:\\libritts\\test-clean\\transcribed-brief-w2v.tsv', 'diffusion_steps': 100,
|
opt_eval = {'eval_tsv': 'Y:\\libritts\\test-clean\\transcribed-brief-w2v.tsv', 'diffusion_steps': 100,
|
||||||
'conditioning_free': True, 'conditioning_free_k': 1,
|
'conditioning_free': False, 'conditioning_free_k': 1,
|
||||||
'diffusion_schedule': 'linear', 'diffusion_type': 'tts9_mel'}
|
'diffusion_schedule': 'linear', 'diffusion_type': 'tts9_mel'}
|
||||||
env = {'rank': 0, 'base_path': 'D:\\tmp\\test_eval', 'step': 557, 'device': 'cuda', 'opt': {}}
|
env = {'rank': 0, 'base_path': 'D:\\tmp\\test_eval', 'step': 557, 'device': 'cuda', 'opt': {}}
|
||||||
eval = AudioDiffusionFid(diffusion, opt_eval, env)
|
eval = AudioDiffusionFid(diffusion, opt_eval, env)
|
||||||
|
|
Loading…
Reference in New Issue
Block a user