forked from mrq/DL-Art-School
simplify span selecting logic in tfdpc
This commit is contained in:
parent
4d5688be47
commit
4d53c66602
|
@ -84,7 +84,6 @@ class ConditioningEncoder(nn.Module):
|
||||||
do_checkpointing=False,
|
do_checkpointing=False,
|
||||||
time_proj=True):
|
time_proj=True):
|
||||||
super().__init__()
|
super().__init__()
|
||||||
attn = []
|
|
||||||
self.init = nn.Conv1d(cond_dim, embedding_dim, kernel_size=1)
|
self.init = nn.Conv1d(cond_dim, embedding_dim, kernel_size=1)
|
||||||
self.time_proj = time_proj
|
self.time_proj = time_proj
|
||||||
if time_proj:
|
if time_proj:
|
||||||
|
@ -211,10 +210,11 @@ class TransformerDiffusionWithPointConditioning(nn.Module):
|
||||||
conditioning_input[:,:,tstart:tstart+tclip] = 0
|
conditioning_input[:,:,tstart:tstart+tclip] = 0
|
||||||
|
|
||||||
if cond_left is None and self.new_cond:
|
if cond_left is None and self.new_cond:
|
||||||
cond_left = conditioning_input[:,:,:max(cond_start, 20)]
|
assert cond_start > 20 and (cond_start+N+20 <= conditioning_input.shape[-1])
|
||||||
left_pt = cond_start-1
|
cond_left = conditioning_input[:,:,:cond_start]
|
||||||
cond_right = conditioning_input[:,:,min(N+cond_start, conditioning_input.shape[-1]-20):]
|
left_pt = -1
|
||||||
right_pt = min(cond_right.shape[-1]-1, cond_right.shape[-1] - (conditioning_input.shape[-1] - (N+cond_start)))
|
cond_right = conditioning_input[:,:,cond_start+N:]
|
||||||
|
right_pt = 0
|
||||||
elif cond_left is None:
|
elif cond_left is None:
|
||||||
assert conditioning_input.shape[-1] - cond_start - N >= 0, f'Some sort of conditioning misalignment, {conditioning_input.shape[-1], cond_start, N}'
|
assert conditioning_input.shape[-1] - cond_start - N >= 0, f'Some sort of conditioning misalignment, {conditioning_input.shape[-1], cond_start, N}'
|
||||||
cond_pre = conditioning_input[:,:,:cond_start]
|
cond_pre = conditioning_input[:,:,:cond_start]
|
||||||
|
@ -292,13 +292,13 @@ class TransformerDiffusionWithPointConditioning(nn.Module):
|
||||||
def before_step(self, step):
|
def before_step(self, step):
|
||||||
scaled_grad_parameters = list(itertools.chain.from_iterable([lyr.out.parameters() for lyr in self.layers])) + \
|
scaled_grad_parameters = list(itertools.chain.from_iterable([lyr.out.parameters() for lyr in self.layers])) + \
|
||||||
list(itertools.chain.from_iterable([lyr.prenorm.parameters() for lyr in self.layers]))
|
list(itertools.chain.from_iterable([lyr.prenorm.parameters() for lyr in self.layers]))
|
||||||
|
|
||||||
# Scale back the gradients of the blkout and prenorm layers by a constant factor. These get two orders of magnitudes
|
# Scale back the gradients of the blkout and prenorm layers by a constant factor. These get two orders of magnitudes
|
||||||
# higher gradients. Ideally we would use parameter groups, but ZeroRedundancyOptimizer makes this trickier than
|
# higher gradients. Ideally we would use parameter groups, but ZeroRedundancyOptimizer makes this trickier than
|
||||||
# directly fiddling with the gradients.
|
# directly fiddling with the gradients.
|
||||||
if not self.new_cond: # Not really related, I just don't want to add a new config.
|
for p in scaled_grad_parameters:
|
||||||
for p in scaled_grad_parameters:
|
if hasattr(p, 'grad') and p.grad is not None:
|
||||||
if hasattr(p, 'grad') and p.grad is not None:
|
p.grad *= .2
|
||||||
p.grad *= .2
|
|
||||||
|
|
||||||
|
|
||||||
@register_model
|
@register_model
|
||||||
|
|
|
@ -434,20 +434,20 @@ class MusicDiffusionFid(evaluator.Evaluator):
|
||||||
|
|
||||||
|
|
||||||
if __name__ == '__main__':
|
if __name__ == '__main__':
|
||||||
diffusion = load_model_from_config('X:\\dlas\\experiments\\train_music_cheater_gen_ar_prior.yml', 'generator',
|
diffusion = load_model_from_config('X:\\dlas\\experiments\\train_music_cheater_gen.yml', 'generator',
|
||||||
also_load_savepoint=False,
|
also_load_savepoint=False,
|
||||||
load_path='X:\\dlas\\experiments\\train_music_diffusion_tfd12_cheater_gen_ar_prior\\models\\43500_generator_ema.pth'
|
load_path='X:\\dlas\\experiments\\train_music_cheater_gen_v5_cosine_40_lyr\\models\\18500_generator_ema.pth'
|
||||||
).cuda()
|
).cuda()
|
||||||
opt_eval = {'path': 'Y:\\split\\yt-music-eval', # eval music, mostly electronica. :)
|
opt_eval = {'path': 'Y:\\split\\yt-music-eval', # eval music, mostly electronica. :)
|
||||||
#'path': 'E:\\music_eval', # this is music from the training dataset, including a lot more variety.
|
#'path': 'E:\\music_eval', # this is music from the training dataset, including a lot more variety.
|
||||||
'diffusion_steps': 256, # basis: 192
|
'diffusion_steps': 128, # basis: 192
|
||||||
'conditioning_free': True, 'conditioning_free_k': 1, 'use_ddim': True, 'clip_audio': False,
|
'conditioning_free': False, 'conditioning_free_k': 1, 'use_ddim': True, 'clip_audio': False,
|
||||||
'diffusion_schedule': 'linear', 'diffusion_type': 'from_ar_prior',
|
'diffusion_schedule': 'cosine', 'diffusion_type': 'cheater_gen',
|
||||||
# Slope 1: 1.03x, 2: 1.06, 4: 1.135, 8: 1.27, 16: 1.54
|
# Slope 1: 1.03x, 2: 1.06, 4: 1.135, 8: 1.27, 16: 1.54
|
||||||
#'causal': True, 'causal_slope': 4, # DONT FORGET TO INCREMENT THE STEP!
|
#'causal': True, 'causal_slope': 4, # DONT FORGET TO INCREMENT THE STEP!
|
||||||
#'partial_low': 128, 'partial_high': 192
|
#'partial_low': 128, 'partial_high': 192
|
||||||
}
|
}
|
||||||
env = {'rank': 0, 'base_path': 'D:\\tmp\\test_eval_music', 'step': 200, 'device': 'cuda', 'opt': {}}
|
env = {'rank': 0, 'base_path': 'D:\\tmp\\test_eval_music', 'step': 201, 'device': 'cuda', 'opt': {}}
|
||||||
eval = MusicDiffusionFid(diffusion, opt_eval, env)
|
eval = MusicDiffusionFid(diffusion, opt_eval, env)
|
||||||
fds = []
|
fds = []
|
||||||
for i in range(2):
|
for i in range(2):
|
||||||
|
|
|
@ -98,12 +98,12 @@ class RandomAudioCropInjector(Injector):
|
||||||
self.max_crop_sz = opt['max_crop_size']
|
self.max_crop_sz = opt['max_crop_size']
|
||||||
self.lengths_key = opt['lengths_key']
|
self.lengths_key = opt['lengths_key']
|
||||||
self.crop_start_key = opt['crop_start_key']
|
self.crop_start_key = opt['crop_start_key']
|
||||||
|
self.min_buffer = opt_get(opt, ['min_buffer'], 0)
|
||||||
self.rand_buffer_ptr=9999
|
self.rand_buffer_ptr=9999
|
||||||
self.rand_buffer_sz=5000
|
self.rand_buffer_sz=5000
|
||||||
|
|
||||||
|
|
||||||
def forward(self, state):
|
def forward(self, state):
|
||||||
crop_sz = random.randint(self.min_crop_sz, self.max_crop_sz)
|
|
||||||
inp = state[self.input]
|
inp = state[self.input]
|
||||||
if torch.distributed.get_world_size() > 1:
|
if torch.distributed.get_world_size() > 1:
|
||||||
# All processes should agree, otherwise all processes wait to process max_crop_sz (effectively). But agreeing too often
|
# All processes should agree, otherwise all processes wait to process max_crop_sz (effectively). But agreeing too often
|
||||||
|
@ -121,12 +121,14 @@ class RandomAudioCropInjector(Injector):
|
||||||
len = torch.min(lens)
|
len = torch.min(lens)
|
||||||
else:
|
else:
|
||||||
len = inp.shape[-1]
|
len = inp.shape[-1]
|
||||||
margin = len - crop_sz
|
|
||||||
|
margin = len - crop_sz - self.min_buffer
|
||||||
if margin < 0:
|
if margin < 0:
|
||||||
res = {self.output: inp}
|
start = self.min_buffer
|
||||||
else:
|
else:
|
||||||
start = random.randint(0, margin)
|
start = random.randint(0, margin) + self.min_buffer
|
||||||
res = {self.output: inp[:, :, start:start+crop_sz]}
|
|
||||||
|
res = {self.output: inp[:, :, start:start+crop_sz]}
|
||||||
if self.crop_start_key is not None:
|
if self.crop_start_key is not None:
|
||||||
res[self.crop_start_key] = start
|
res[self.crop_start_key] = start
|
||||||
return res
|
return res
|
||||||
|
|
Loading…
Reference in New Issue
Block a user