This commit is contained in:
James Betker 2022-03-22 11:40:56 -06:00
parent 963f0e9cee
commit be5f052255
3 changed files with 20 additions and 5 deletions

View File

@ -0,0 +1,15 @@
import torch
from utils.util import load_model_from_config
if __name__ == '__main__':
config = "D:\\dlas\\options\\train_wav2vec_matcher.yml"
model_name = "generator"
model_path = "D:\dlas\experiments\train_wav2vec_matcher\models"
wav_dump_path = "FIXME"
model = load_model_from_config(config, model_name, also_load_savepoint=False, load_path=model_path, device='cuda').eval()
w2v_logits, audio_samples = torch.load(wav_dump_path)
w2v_logits_chunked = torch.chunk(w2v_logits, 32)
for chunk in w2v_logits_chunked:

View File

@ -106,8 +106,8 @@ if __name__ == '__main__':
"""
parser = argparse.ArgumentParser()
parser.add_argument('-o', type=str, help='Path to the options YAML file used to train the CLIP model', default='../options/train_voice_voice_clip.yml')
parser.add_argument('--num_workers', type=int, help='Number concurrent processes to use', default=2)
parser.add_argument('--root_path', type=str, help='Root path to search for audio directories from', default='Y:\\filtered\\big_podcast')
parser.add_argument('--num_workers', type=int, help='Number concurrent processes to use', default=4)
parser.add_argument('--root_path', type=str, help='Root path to search for audio directories from', default='Y:\\filtered\\youtube')
parser.add_argument('--clip_size', type=int, help='Amount of audio samples to pull from each file', default=22050)
args = parser.parse_args()

View File

@ -265,11 +265,11 @@ if __name__ == '__main__':
if __name__ == '__main__':
from utils.util import load_model_from_config
diffusion = load_model_from_config('X:\\dlas\\experiments\\train_diffusion_tts9_mel.yml', 'generator',
diffusion = load_model_from_config('X:\\dlas\\experiments\\train_diffusion_tts_mel_flat.yml', 'generator',
also_load_savepoint=False,
load_path='X:\\dlas\\experiments\\train_diffusion_tts9_mel\\models\\47500_generator_ema.pth').cuda()
load_path='X:\\dlas\\experiments\\train_diffusion_tts_mel_flat\\models\\6500_generator.pth').cuda()
opt_eval = {'eval_tsv': 'Y:\\libritts\\test-clean\\transcribed-brief-w2v.tsv', 'diffusion_steps': 100,
'conditioning_free': True, 'conditioning_free_k': 1,
'conditioning_free': False, 'conditioning_free_k': 1,
'diffusion_schedule': 'linear', 'diffusion_type': 'tts9_mel'}
env = {'rank': 0, 'base_path': 'D:\\tmp\\test_eval', 'step': 557, 'device': 'cuda', 'opt': {}}
eval = AudioDiffusionFid(diffusion, opt_eval, env)