mild updates
This commit is contained in:
parent
ff35d13b99
commit
9e9ae328f2
|
@ -132,20 +132,17 @@ def register_ctc_code_generator(opt_net, opt):
|
|||
|
||||
|
||||
def inf():
|
||||
sd = torch.load('D:\\dlas\\experiments\\train_encoder_build_ctc_alignments\\models\\11000_generator.pth', map_location='cpu')
|
||||
model = CtcCodeGenerator(layers=10).eval()
|
||||
sd = torch.load('D:\\dlas\\experiments\\train_encoder_build_ctc_alignments_medium\\models\\24000_generator.pth', map_location='cpu')
|
||||
model = CtcCodeGenerator(model_dim=1024,layers=32).eval()
|
||||
model.load_state_dict(sd)
|
||||
#raw_batch = torch.load('raw_batch.pth')
|
||||
with torch.no_grad():
|
||||
from data.audio.unsupervised_audio_dataset import load_audio
|
||||
from scripts.audio.gen.speech_synthesis_utils import wav_to_mel
|
||||
#loss = model(wav_to_mel(raw_batch['conditioning'][0]),
|
||||
# raw_batch['ctc_raw_codes'][0].unsqueeze(0),
|
||||
# raw_batch['ctc_pads'][0].unsqueeze(0),
|
||||
# raw_batch['ctc_repeats'][0].unsqueeze(0),
|
||||
# raw_batch['ctc_raw_lengths'][0].unsqueeze(0),)
|
||||
ref_mel = wav_to_mel(load_audio("D:\\tortoise-tts\\voices\\atkins\\1.wav", 22050))
|
||||
ctc = model.generate(ref_mel, ["i suppose though it's too early for them"])
|
||||
ref_mel = torch.cat([wav_to_mel(load_audio("D:\\tortoise-tts\\voices\\atkins\\1.wav", 22050))[:,:,:450],
|
||||
wav_to_mel(load_audio("D:\\tortoise-tts\\voices\\kennard\\1.wav", 22050))[:,:,:450],
|
||||
wav_to_mel(load_audio("D:\\tortoise-tts\\voices\\grace\\1.wav", 22050))[:,:,:450],
|
||||
wav_to_mel(load_audio("D:\\tortoise-tts\\voices\\atkins\\1.wav", 22050))[:,:,:450]], dim=0)
|
||||
ctc = model.generate(ref_mel, (["i suppose though it's too early for them"] * 3) + ["i suppose though it's too early for them, dear"])
|
||||
print("Break")
|
||||
|
||||
|
||||
|
|
|
@ -379,7 +379,7 @@ class DiffusionTts(nn.Module):
|
|||
assert conditioning_input is not None
|
||||
if self.super_sampling_enabled:
|
||||
assert lr_input is not None
|
||||
if self.super_sampling_max_noising_factor > 0:
|
||||
if self.training and self.super_sampling_max_noising_factor > 0:
|
||||
noising_factor = random.uniform(0,self.super_sampling_max_noising_factor)
|
||||
lr_input = torch.randn_like(lr_input) * noising_factor + lr_input
|
||||
lr_input = F.interpolate(lr_input, size=(x.shape[-1],), mode='nearest')
|
||||
|
|
|
@ -53,7 +53,7 @@ if __name__ == '__main__':
|
|||
parser.add_argument('-diffusion_model_path', type=str, help='Path to saved model weights', default='X:\\dlas\\experiments\\train_diffusion_tts5_medium\\models\\73000_generator_ema.pth')
|
||||
parser.add_argument('-sr_opt', type=str, help='Path to options YAML file used to train the SR diffusion model', default='X:\\dlas\\experiments\\train_diffusion_tts6_upsample.yml')
|
||||
parser.add_argument('-sr_diffusion_model_name', type=str, help='Name of the SR diffusion model in opt.', default='generator')
|
||||
parser.add_argument('-sr_diffusion_model_path', type=str, help='Path to saved model weights for the SR diffuser', default='X:\\dlas\\experiments\\train_diffusion_tts6_upsample\\models\\26500_generator_ema.pth')
|
||||
parser.add_argument('-sr_diffusion_model_path', type=str, help='Path to saved model weights for the SR diffuser', default='X:\\dlas\\experiments\\train_diffusion_tts6_upsample_continued\\models\\41000_generator_ema.pth')
|
||||
parser.add_argument('-voice', type=str, help='Type of conditioning voice', default='puppy')
|
||||
parser.add_argument('-diffusion_steps', type=int, help='Number of diffusion steps to perform to create the generate. Lower steps reduces quality, but >40 is generally pretty good.', default=100)
|
||||
parser.add_argument('-output_path', type=str, help='Where to store outputs.', default='../results/use_diffuse_voice_translation')
|
||||
|
|
Loading…
Reference in New Issue
Block a user