cleaned up brain worms with wrapping around gradio progress by instead just using tqdm directly (slight regressions with some messages not getting pushed)

This commit is contained in:
mrq 2023-05-04 23:40:33 +00:00
parent 09d849a78f
commit 5003bc89d3
3 changed files with 35 additions and 43 deletions

@ -1 +1 @@
Subproject commit 086aad5b49e7ef39d043c6b0e12ac33c20773ab8 Subproject commit c90ee7c5296992ad96c8790b5b7cc3737062e1e6

View File

@ -238,7 +238,7 @@ def generate_bark(**kwargs):
if tts_loading: if tts_loading:
raise Exception("TTS is still initializing...") raise Exception("TTS is still initializing...")
if progress is not None: if progress is not None:
progress(0, "Initializing TTS...") notify_progress("Initializing TTS...", progress=progress)
load_tts() load_tts()
if hasattr(tts, "loading") and tts.loading: if hasattr(tts, "loading") and tts.loading:
raise Exception("TTS is still initializing...") raise Exception("TTS is still initializing...")
@ -339,8 +339,8 @@ def generate_bark(**kwargs):
INFERENCING = True INFERENCING = True
for line, cut_text in enumerate(texts): for line, cut_text in enumerate(texts):
progress.msg_prefix = f'[{str(line+1)}/{str(len(texts))}]' tqdm_prefix = f'[{str(line+1)}/{str(len(texts))}]'
print(f"{progress.msg_prefix} Generating line: {cut_text}") print(f"{tqdm_prefix} Generating line: {cut_text}")
start_time = time.time() start_time = time.time()
# do setting editing # do setting editing
@ -422,12 +422,12 @@ def generate_bark(**kwargs):
if args.voice_fixer: if args.voice_fixer:
if not voicefixer: if not voicefixer:
progress(0, "Loading voicefix...") notify_progress("Loading voicefix...", progress=progress)
load_voicefixer() load_voicefixer()
try: try:
fixed_cache = {} fixed_cache = {}
for name in progress.tqdm(audio_cache, desc="Running voicefix..."): for name in tqdm(audio_cache, desc="Running voicefix..."):
del audio_cache[name]['audio'] del audio_cache[name]['audio']
if 'output' not in audio_cache[name] or not audio_cache[name]['output']: if 'output' not in audio_cache[name] or not audio_cache[name]['output']:
continue continue
@ -467,7 +467,7 @@ def generate_bark(**kwargs):
f.write(json.dumps(audio_cache[name]['settings'], indent='\t') ) f.write(json.dumps(audio_cache[name]['settings'], indent='\t') )
if args.embed_output_metadata: if args.embed_output_metadata:
for name in progress.tqdm(audio_cache, desc="Embedding metadata..."): for name in tqdm(audio_cache, desc="Embedding metadata..."):
if 'pruned' in audio_cache[name] and audio_cache[name]['pruned']: if 'pruned' in audio_cache[name] and audio_cache[name]['pruned']:
continue continue
@ -521,7 +521,7 @@ def generate_valle(**kwargs):
if tts_loading: if tts_loading:
raise Exception("TTS is still initializing...") raise Exception("TTS is still initializing...")
if progress is not None: if progress is not None:
progress(0, "Initializing TTS...") notify_progress("Initializing TTS...", progress=progress)
load_tts() load_tts()
if hasattr(tts, "loading") and tts.loading: if hasattr(tts, "loading") and tts.loading:
raise Exception("TTS is still initializing...") raise Exception("TTS is still initializing...")
@ -630,8 +630,8 @@ def generate_valle(**kwargs):
INFERENCING = True INFERENCING = True
for line, cut_text in enumerate(texts): for line, cut_text in enumerate(texts):
progress.msg_prefix = f'[{str(line+1)}/{str(len(texts))}]' tqdm_prefix = f'[{str(line+1)}/{str(len(texts))}]'
print(f"{progress.msg_prefix} Generating line: {cut_text}") print(f"{tqdm_prefix} Generating line: {cut_text}")
start_time = time.time() start_time = time.time()
# do setting editing # do setting editing
@ -715,12 +715,12 @@ def generate_valle(**kwargs):
if args.voice_fixer: if args.voice_fixer:
if not voicefixer: if not voicefixer:
progress(0, "Loading voicefix...") notify_progress("Loading voicefix...", progress=progress)
load_voicefixer() load_voicefixer()
try: try:
fixed_cache = {} fixed_cache = {}
for name in progress.tqdm(audio_cache, desc="Running voicefix..."): for name in tqdm(audio_cache, desc="Running voicefix..."):
del audio_cache[name]['audio'] del audio_cache[name]['audio']
if 'output' not in audio_cache[name] or not audio_cache[name]['output']: if 'output' not in audio_cache[name] or not audio_cache[name]['output']:
continue continue
@ -760,7 +760,7 @@ def generate_valle(**kwargs):
f.write(json.dumps(audio_cache[name]['settings'], indent='\t') ) f.write(json.dumps(audio_cache[name]['settings'], indent='\t') )
if args.embed_output_metadata: if args.embed_output_metadata:
for name in progress.tqdm(audio_cache, desc="Embedding metadata..."): for name in tqdm(audio_cache, desc="Embedding metadata..."):
if 'pruned' in audio_cache[name] and audio_cache[name]['pruned']: if 'pruned' in audio_cache[name] and audio_cache[name]['pruned']:
continue continue
@ -839,7 +839,7 @@ def generate_tortoise(**kwargs):
voice_samples, conditioning_latents = None, tts.get_random_conditioning_latents() voice_samples, conditioning_latents = None, tts.get_random_conditioning_latents()
else: else:
if progress is not None: if progress is not None:
progress(0, desc=f"Loading voice: {voice}") notify_progress(f"Loading voice: {voice}", progress=progress)
voice_samples, conditioning_latents = load_voice(voice, model_hash=tts.autoregressive_model_hash) voice_samples, conditioning_latents = load_voice(voice, model_hash=tts.autoregressive_model_hash)
@ -1032,8 +1032,8 @@ def generate_tortoise(**kwargs):
elif parameters['emotion'] != "None" and parameters['emotion']: elif parameters['emotion'] != "None" and parameters['emotion']:
cut_text = f"[I am really {parameters['emotion'].lower()},] {cut_text}" cut_text = f"[I am really {parameters['emotion'].lower()},] {cut_text}"
progress.msg_prefix = f'[{str(line+1)}/{str(len(texts))}]' tqdm_prefix = f'[{str(line+1)}/{str(len(texts))}]'
print(f"{progress.msg_prefix} Generating line: {cut_text}") print(f"{tqdm_prefix} Generating line: {cut_text}")
start_time = time.time() start_time = time.time()
# do setting editing # do setting editing
@ -1115,12 +1115,12 @@ def generate_tortoise(**kwargs):
if args.voice_fixer: if args.voice_fixer:
if not voicefixer: if not voicefixer:
progress(0, "Loading voicefix...") notify_progress("Loading voicefix...", progress=progress)
load_voicefixer() load_voicefixer()
try: try:
fixed_cache = {} fixed_cache = {}
for name in progress.tqdm(audio_cache, desc="Running voicefix..."): for name in tqdm(audio_cache, desc="Running voicefix..."):
del audio_cache[name]['audio'] del audio_cache[name]['audio']
if 'output' not in audio_cache[name] or not audio_cache[name]['output']: if 'output' not in audio_cache[name] or not audio_cache[name]['output']:
continue continue
@ -1160,7 +1160,7 @@ def generate_tortoise(**kwargs):
f.write(json.dumps(audio_cache[name]['settings'], indent='\t') ) f.write(json.dumps(audio_cache[name]['settings'], indent='\t') )
if args.embed_output_metadata: if args.embed_output_metadata:
for name in progress.tqdm(audio_cache, desc="Embedding metadata..."): for name in tqdm(audio_cache, desc="Embedding metadata..."):
if 'pruned' in audio_cache[name] and audio_cache[name]['pruned']: if 'pruned' in audio_cache[name] and audio_cache[name]['pruned']:
continue continue
@ -1309,7 +1309,7 @@ def compute_latents(voice=None, voice_samples=None, voice_latents_chunks=0, prog
if voice_samples is None: if voice_samples is None:
return return
conditioning_latents = tts.get_conditioning_latents(voice_samples, return_mels=not args.latents_lean_and_mean, slices=voice_latents_chunks, force_cpu=args.force_cpu_for_conditioning_latents, progress=progress) conditioning_latents = tts.get_conditioning_latents(voice_samples, return_mels=not args.latents_lean_and_mean, slices=voice_latents_chunks, force_cpu=args.force_cpu_for_conditioning_latents)
if len(conditioning_latents) == 4: if len(conditioning_latents) == 4:
conditioning_latents = (conditioning_latents[0], conditioning_latents[1], conditioning_latents[2], None) conditioning_latents = (conditioning_latents[0], conditioning_latents[1], conditioning_latents[2], None)
@ -2117,7 +2117,7 @@ def transcribe_dataset( voice, language=None, skip_existings=False, progress=Non
if os.path.exists(infile): if os.path.exists(infile):
results = json.load(open(infile, 'r', encoding="utf-8")) results = json.load(open(infile, 'r', encoding="utf-8"))
for file in enumerate_progress(files, desc="Iterating through voice files", progress=progress): for file in tqdm(files, desc="Iterating through voice files"):
basename = os.path.basename(file) basename = os.path.basename(file)
if basename in results and skip_existings: if basename in results and skip_existings:
@ -2246,7 +2246,7 @@ def phonemize_txt_file( path ):
reparsed = [] reparsed = []
with open(path.replace(".txt", ".phn.txt"), 'a', encoding='utf-8') as f: with open(path.replace(".txt", ".phn.txt"), 'a', encoding='utf-8') as f:
for line in enumerate_progress(lines, desc='Phonemizing...'): for line in tqdm(lines, desc='Phonemizing...'):
split = line.split("|") split = line.split("|")
audio = split[0] audio = split[0]
text = split[2] text = split[2]
@ -2357,7 +2357,7 @@ def prepare_dataset( voice, use_segments=False, text_length=0, audio_length=0, p
text_length = 0 text_length = 0
audio_length = 0 audio_length = 0
for filename in enumerate_progress(results, desc="Parsing results", progress=progress): for filename in tqdm(results, desc="Parsing results"):
use_segment = use_segments use_segment = use_segments
result = results[filename] result = results[filename]
@ -2438,7 +2438,7 @@ def prepare_dataset( voice, use_segments=False, text_length=0, audio_length=0, p
'phonemize': [[], []], 'phonemize': [[], []],
} }
for file in enumerate_progress(segments, desc="Parsing segments", progress=progress): for file in tqdm(segments, desc="Parsing segments"):
result = segments[file] result = segments[file]
path = f'{indir}/audio/{file}' path = f'{indir}/audio/{file}'
@ -2511,7 +2511,7 @@ def prepare_dataset( voice, use_segments=False, text_length=0, audio_length=0, p
print("Phonemized:", file, normalized, text) print("Phonemized:", file, normalized, text)
""" """
for i in enumerate_progress(range(len(jobs['quantize'][0])), desc="Quantizing", progress=progress): for i in tqdm(range(len(jobs['quantize'][0])), desc="Quantizing"):
qnt_file = jobs['quantize'][0][i] qnt_file = jobs['quantize'][0][i]
waveform, sample_rate = jobs['quantize'][1][i] waveform, sample_rate = jobs['quantize'][1][i]
@ -2519,7 +2519,7 @@ def prepare_dataset( voice, use_segments=False, text_length=0, audio_length=0, p
torch.save(quantized, qnt_file) torch.save(quantized, qnt_file)
print("Quantized:", qnt_file) print("Quantized:", qnt_file)
for i in enumerate_progress(range(len(jobs['phonemize'][0])), desc="Phonemizing", progress=progress): for i in tqdm(range(len(jobs['phonemize'][0])), desc="Phonemizing"):
phn_file = jobs['phonemize'][0][i] phn_file = jobs['phonemize'][0][i]
normalized = jobs['phonemize'][1][i] normalized = jobs['phonemize'][1][i]
@ -2807,7 +2807,7 @@ def import_voices(files, saveAs=None, progress=None):
if not isinstance(files, list): if not isinstance(files, list):
files = [files] files = [files]
for file in enumerate_progress(files, desc="Importing voice files", progress=progress): for file in tqdm(files, desc="Importing voice files"):
j, latents = read_generate_settings(file, read_latents=True) j, latents = read_generate_settings(file, read_latents=True)
if j is not None and saveAs is None: if j is not None and saveAs is None:
@ -3025,21 +3025,13 @@ def check_for_updates( dir = None ):
return False return False
def enumerate_progress(iterable, desc=None, progress=None, verbose=None):
if verbose and desc is not None:
print(desc)
if progress is None:
return tqdm(iterable, disable=False) #not verbose)
return progress.tqdm(iterable, desc=f'{progress.msg_prefix} {desc}' if hasattr(progress, 'msg_prefix') else desc)
def notify_progress(message, progress=None, verbose=True): def notify_progress(message, progress=None, verbose=True):
if verbose: if verbose:
print(message) print(message)
if progress is None: if progress is None:
return tqdm.write( desc=message)
else:
progress(0, desc=message) progress(0, desc=message)
def get_args(): def get_args():
@ -3650,7 +3642,7 @@ def load_whisper_model(language=None, model_name=None, progress=None):
model_name = f'{model_name}.{language}' model_name = f'{model_name}.{language}'
print(f"Loading specialized model for language: {language}") print(f"Loading specialized model for language: {language}")
notify_progress(f"Loading Whisper model: {model_name}", progress) notify_progress(f"Loading Whisper model: {model_name}", progress=progress)
if args.whisper_backend == "openai/whisper": if args.whisper_backend == "openai/whisper":
import whisper import whisper
@ -3733,7 +3725,7 @@ def merge_models( primary_model_name, secondary_model_name, alpha, progress=gr.P
theta_0 = read_model(primary_model_name) theta_0 = read_model(primary_model_name)
theta_1 = read_model(secondary_model_name) theta_1 = read_model(secondary_model_name)
for key in enumerate_progress(theta_0.keys(), desc="Merging...", progress=progress): for key in tqdm(theta_0.keys(), desc="Merging..."):
if key in key_blacklist: if key in key_blacklist:
print("Skipping ignored key:", key) print("Skipping ignored key:", key)
continue continue

View File

@ -200,7 +200,7 @@ def read_generate_settings_proxy(file, saveAs='.temp'):
def slice_dataset_proxy( voice, trim_silence, start_offset, end_offset, progress=gr.Progress(track_tqdm=True) ): def slice_dataset_proxy( voice, trim_silence, start_offset, end_offset, progress=gr.Progress(track_tqdm=True) ):
return slice_dataset( voice, trim_silence=trim_silence, start_offset=start_offset, end_offset=end_offset, results=None, progress=progress ) return slice_dataset( voice, trim_silence=trim_silence, start_offset=start_offset, end_offset=end_offset, results=None, progress=progress )
def diarize_dataset( voice, progress=gr.Progress(track_tqdm=False) ): def diarize_dataset( voice, progress=gr.Progress(track_tqdm=True) ):
from pyannote.audio import Pipeline from pyannote.audio import Pipeline
pipeline = Pipeline.from_pretrained("pyannote/speaker-diarization", use_auth_token=args.hf_token) pipeline = Pipeline.from_pretrained("pyannote/speaker-diarization", use_auth_token=args.hf_token)
@ -215,7 +215,7 @@ def diarize_dataset( voice, progress=gr.Progress(track_tqdm=False) ):
return "\n".join(messages) return "\n".join(messages)
def prepare_all_datasets( language, validation_text_length, validation_audio_length, skip_existings, slice_audio, trim_silence, slice_start_offset, slice_end_offset, progress=gr.Progress(track_tqdm=False) ): def prepare_all_datasets( language, validation_text_length, validation_audio_length, skip_existings, slice_audio, trim_silence, slice_start_offset, slice_end_offset, progress=gr.Progress(track_tqdm=True) ):
kwargs = locals() kwargs = locals()
messages = [] messages = []
@ -239,7 +239,7 @@ def prepare_all_datasets( language, validation_text_length, validation_audio_len
return "\n".join(messages) return "\n".join(messages)
def prepare_dataset_proxy( voice, language, validation_text_length, validation_audio_length, skip_existings, slice_audio, trim_silence, slice_start_offset, slice_end_offset, progress=gr.Progress(track_tqdm=False) ): def prepare_dataset_proxy( voice, language, validation_text_length, validation_audio_length, skip_existings, slice_audio, trim_silence, slice_start_offset, slice_end_offset, progress=gr.Progress(track_tqdm=True) ):
messages = [] messages = []
message = transcribe_dataset( voice=voice, language=language, skip_existings=skip_existings, progress=progress ) message = transcribe_dataset( voice=voice, language=language, skip_existings=skip_existings, progress=progress )