forked from camenduru/ai-voice-cloning
forgot to uncomment the block to transcribe and slice when using transcribe all because I was piece-processing a huge batch of LibriTTS and somehow that leaked over to the repo
This commit is contained in:
parent
fd306d850d
commit
853c7fdccf
17
src/utils.py
17
src/utils.py
|
@ -1458,6 +1458,7 @@ class TrainingState():
|
||||||
'lrs': ['lr'],
|
'lrs': ['lr'],
|
||||||
'losses': ['loss_text_ce', 'loss_mel_ce'],
|
'losses': ['loss_text_ce', 'loss_mel_ce'],
|
||||||
'accuracies': [],
|
'accuracies': [],
|
||||||
|
'precisions': [],
|
||||||
'grad_norms': [],
|
'grad_norms': [],
|
||||||
}
|
}
|
||||||
if args.tts_backend == "vall-e":
|
if args.tts_backend == "vall-e":
|
||||||
|
@ -1481,6 +1482,11 @@ class TrainingState():
|
||||||
'ar-half.loss.acc', 'nar-half.loss.acc',
|
'ar-half.loss.acc', 'nar-half.loss.acc',
|
||||||
'ar-quarter.loss.acc', 'nar-quarter.loss.acc',
|
'ar-quarter.loss.acc', 'nar-quarter.loss.acc',
|
||||||
]
|
]
|
||||||
|
keys['precisions'] = [
|
||||||
|
'ar.loss.precision', 'nar.loss.precision',
|
||||||
|
'ar-half.loss.precision', 'nar-half.loss.precision',
|
||||||
|
'ar-quarter.loss.precision', 'nar-quarter.loss.precision',
|
||||||
|
]
|
||||||
keys['grad_norms'] = ['ar.grad_norm', 'nar.grad_norm', 'ar-half.grad_norm', 'nar-half.grad_norm', 'ar-quarter.grad_norm', 'nar-quarter.grad_norm']
|
keys['grad_norms'] = ['ar.grad_norm', 'nar.grad_norm', 'ar-half.grad_norm', 'nar-half.grad_norm', 'ar-quarter.grad_norm', 'nar-quarter.grad_norm']
|
||||||
|
|
||||||
for k in keys['lrs']:
|
for k in keys['lrs']:
|
||||||
|
@ -1495,6 +1501,12 @@ class TrainingState():
|
||||||
|
|
||||||
self.statistics['loss'].append({'epoch': epoch, 'it': self.it, 'value': self.info[k], 'type': k})
|
self.statistics['loss'].append({'epoch': epoch, 'it': self.it, 'value': self.info[k], 'type': k})
|
||||||
|
|
||||||
|
for k in keys['precisions']:
|
||||||
|
if k not in self.info:
|
||||||
|
continue
|
||||||
|
|
||||||
|
self.statistics['loss'].append({'epoch': epoch, 'it': self.it, 'value': self.info[k], 'type': k})
|
||||||
|
|
||||||
for k in keys['losses']:
|
for k in keys['losses']:
|
||||||
if k not in self.info:
|
if k not in self.info:
|
||||||
continue
|
continue
|
||||||
|
@ -1671,6 +1683,9 @@ class TrainingState():
|
||||||
for k in data:
|
for k in data:
|
||||||
if data[k] is None:
|
if data[k] is None:
|
||||||
continue
|
continue
|
||||||
|
if k not in averager['metrics']:
|
||||||
|
averager['metrics'][k] = [ data[k] ]
|
||||||
|
else:
|
||||||
averager['metrics'][k].append( data[k] )
|
averager['metrics'][k].append( data[k] )
|
||||||
|
|
||||||
unq[f'{it}_{mode}_{name}'] = averager
|
unq[f'{it}_{mode}_{name}'] = averager
|
||||||
|
@ -1685,6 +1700,8 @@ class TrainingState():
|
||||||
if args.tts_backend == "vall-e":
|
if args.tts_backend == "vall-e":
|
||||||
stats = unq[it]
|
stats = unq[it]
|
||||||
data = {k: sum(v) / len(v) for k, v in stats['metrics'].items() if k not in blacklist }
|
data = {k: sum(v) / len(v) for k, v in stats['metrics'].items() if k not in blacklist }
|
||||||
|
#data = {k: min(v) for k, v in stats['metrics'].items() if k not in blacklist }
|
||||||
|
#data = {k: max(v) for k, v in stats['metrics'].items() if k not in blacklist }
|
||||||
data['name'] = stats['name']
|
data['name'] = stats['name']
|
||||||
data['mode'] = stats['mode']
|
data['mode'] = stats['mode']
|
||||||
data['steps'] = len(stats['metrics']['it'])
|
data['steps'] = len(stats['metrics']['it'])
|
||||||
|
|
|
@ -221,7 +221,6 @@ def prepare_all_datasets( language, validation_text_length, validation_audio_len
|
||||||
messages = []
|
messages = []
|
||||||
voices = get_voice_list()
|
voices = get_voice_list()
|
||||||
|
|
||||||
"""
|
|
||||||
for voice in voices:
|
for voice in voices:
|
||||||
print("Processing:", voice)
|
print("Processing:", voice)
|
||||||
message = transcribe_dataset( voice=voice, language=language, skip_existings=skip_existings, progress=progress )
|
message = transcribe_dataset( voice=voice, language=language, skip_existings=skip_existings, progress=progress )
|
||||||
|
@ -232,7 +231,6 @@ def prepare_all_datasets( language, validation_text_length, validation_audio_len
|
||||||
print("Processing:", voice)
|
print("Processing:", voice)
|
||||||
message = slice_dataset( voice, trim_silence=trim_silence, start_offset=slice_start_offset, end_offset=slice_end_offset, results=None, progress=progress )
|
message = slice_dataset( voice, trim_silence=trim_silence, start_offset=slice_start_offset, end_offset=slice_end_offset, results=None, progress=progress )
|
||||||
messages.append(message)
|
messages.append(message)
|
||||||
"""
|
|
||||||
|
|
||||||
for voice in voices:
|
for voice in voices:
|
||||||
print("Processing:", voice)
|
print("Processing:", voice)
|
||||||
|
|
Loading…
Reference in New Issue
Block a user