forgot to uncomment the block to transcribe and slice when using transcribe all because I was piece-processing a huge batch of LibriTTS and somehow that leaked over to the repo

This commit is contained in:
mrq 2023-05-03 21:31:37 +00:00
parent fd306d850d
commit 853c7fdccf
2 changed files with 19 additions and 4 deletions

View File

@ -1458,6 +1458,7 @@ class TrainingState():
'lrs': ['lr'], 'lrs': ['lr'],
'losses': ['loss_text_ce', 'loss_mel_ce'], 'losses': ['loss_text_ce', 'loss_mel_ce'],
'accuracies': [], 'accuracies': [],
'precisions': [],
'grad_norms': [], 'grad_norms': [],
} }
if args.tts_backend == "vall-e": if args.tts_backend == "vall-e":
@ -1481,6 +1482,11 @@ class TrainingState():
'ar-half.loss.acc', 'nar-half.loss.acc', 'ar-half.loss.acc', 'nar-half.loss.acc',
'ar-quarter.loss.acc', 'nar-quarter.loss.acc', 'ar-quarter.loss.acc', 'nar-quarter.loss.acc',
] ]
keys['precisions'] = [
'ar.loss.precision', 'nar.loss.precision',
'ar-half.loss.precision', 'nar-half.loss.precision',
'ar-quarter.loss.precision', 'nar-quarter.loss.precision',
]
keys['grad_norms'] = ['ar.grad_norm', 'nar.grad_norm', 'ar-half.grad_norm', 'nar-half.grad_norm', 'ar-quarter.grad_norm', 'nar-quarter.grad_norm'] keys['grad_norms'] = ['ar.grad_norm', 'nar.grad_norm', 'ar-half.grad_norm', 'nar-half.grad_norm', 'ar-quarter.grad_norm', 'nar-quarter.grad_norm']
for k in keys['lrs']: for k in keys['lrs']:
@ -1495,6 +1501,12 @@ class TrainingState():
self.statistics['loss'].append({'epoch': epoch, 'it': self.it, 'value': self.info[k], 'type': k}) self.statistics['loss'].append({'epoch': epoch, 'it': self.it, 'value': self.info[k], 'type': k})
for k in keys['precisions']:
if k not in self.info:
continue
self.statistics['loss'].append({'epoch': epoch, 'it': self.it, 'value': self.info[k], 'type': k})
for k in keys['losses']: for k in keys['losses']:
if k not in self.info: if k not in self.info:
continue continue
@ -1671,7 +1683,10 @@ class TrainingState():
for k in data: for k in data:
if data[k] is None: if data[k] is None:
continue continue
averager['metrics'][k].append( data[k] ) if k not in averager['metrics']:
averager['metrics'][k] = [ data[k] ]
else:
averager['metrics'][k].append( data[k] )
unq[f'{it}_{mode}_{name}'] = averager unq[f'{it}_{mode}_{name}'] = averager
else: else:
@ -1685,6 +1700,8 @@ class TrainingState():
if args.tts_backend == "vall-e": if args.tts_backend == "vall-e":
stats = unq[it] stats = unq[it]
data = {k: sum(v) / len(v) for k, v in stats['metrics'].items() if k not in blacklist } data = {k: sum(v) / len(v) for k, v in stats['metrics'].items() if k not in blacklist }
#data = {k: min(v) for k, v in stats['metrics'].items() if k not in blacklist }
#data = {k: max(v) for k, v in stats['metrics'].items() if k not in blacklist }
data['name'] = stats['name'] data['name'] = stats['name']
data['mode'] = stats['mode'] data['mode'] = stats['mode']
data['steps'] = len(stats['metrics']['it']) data['steps'] = len(stats['metrics']['it'])

View File

@ -221,7 +221,6 @@ def prepare_all_datasets( language, validation_text_length, validation_audio_len
messages = [] messages = []
voices = get_voice_list() voices = get_voice_list()
"""
for voice in voices: for voice in voices:
print("Processing:", voice) print("Processing:", voice)
message = transcribe_dataset( voice=voice, language=language, skip_existings=skip_existings, progress=progress ) message = transcribe_dataset( voice=voice, language=language, skip_existings=skip_existings, progress=progress )
@ -232,7 +231,6 @@ def prepare_all_datasets( language, validation_text_length, validation_audio_len
print("Processing:", voice) print("Processing:", voice)
message = slice_dataset( voice, trim_silence=trim_silence, start_offset=slice_start_offset, end_offset=slice_end_offset, results=None, progress=progress ) message = slice_dataset( voice, trim_silence=trim_silence, start_offset=slice_start_offset, end_offset=slice_end_offset, results=None, progress=progress )
messages.append(message) messages.append(message)
"""
for voice in voices: for voice in voices:
print("Processing:", voice) print("Processing:", voice)