cleaned up brain worms with wrapping around gradio progress by instead just using tqdm directly (slight regressions with some messages not getting pushed)

2023-05-04 23:40:33 +00:00 · 2023-05-04 23:40:33 +00:00 · 5003bc89d3
commit 5003bc89d3
parent 09d849a78f
3 changed files with 35 additions and 43 deletions
--- a/modules/tortoise-tts
+++ b/modules/tortoise-tts
@ -1 +1 @@
-Subproject commit 086aad5b49e7ef39d043c6b0e12ac33c20773ab8
+Subproject commit c90ee7c5296992ad96c8790b5b7cc3737062e1e6
--- a/src/utils.py
+++ b/src/utils.py
@ -238,7 +238,7 @@ def generate_bark(**kwargs):
 		if tts_loading:
 			raise Exception("TTS is still initializing...")
 		if progress is not None:
-			progress(0, "Initializing TTS...")
+			notify_progress("Initializing TTS...", progress=progress)
 		load_tts()
 	if hasattr(tts, "loading") and tts.loading:
 		raise Exception("TTS is still initializing...")
@ -339,8 +339,8 @@ def generate_bark(**kwargs):

 	INFERENCING = True
 	for line, cut_text in enumerate(texts):	
-		progress.msg_prefix = f'[{str(line+1)}/{str(len(texts))}]'
-		print(f"{progress.msg_prefix} Generating line: {cut_text}")
+		tqdm_prefix = f'[{str(line+1)}/{str(len(texts))}]'
+		print(f"{tqdm_prefix} Generating line: {cut_text}")
 		start_time = time.time()

 		# do setting editing
@ -422,12 +422,12 @@ def generate_bark(**kwargs):

 	if args.voice_fixer:
 		if not voicefixer:
-			progress(0, "Loading voicefix...")
+			notify_progress("Loading voicefix...", progress=progress)
 			load_voicefixer()

 		try:
 			fixed_cache = {}
-			for name in progress.tqdm(audio_cache, desc="Running voicefix..."):
+			for name in tqdm(audio_cache, desc="Running voicefix..."):
 				del audio_cache[name]['audio']
 				if 'output' not in audio_cache[name] or not audio_cache[name]['output']:
 					continue
@ -467,7 +467,7 @@ def generate_bark(**kwargs):
 				f.write(json.dumps(audio_cache[name]['settings'], indent='\t') )

 	if args.embed_output_metadata:
-		for name in progress.tqdm(audio_cache, desc="Embedding metadata..."):
+		for name in tqdm(audio_cache, desc="Embedding metadata..."):
 			if 'pruned' in audio_cache[name] and audio_cache[name]['pruned']:
 				continue

@ -521,7 +521,7 @@ def generate_valle(**kwargs):
 		if tts_loading:
 			raise Exception("TTS is still initializing...")
 		if progress is not None:
-			progress(0, "Initializing TTS...")
+			notify_progress("Initializing TTS...", progress=progress)
 		load_tts()
 	if hasattr(tts, "loading") and tts.loading:
 		raise Exception("TTS is still initializing...")
@ -630,8 +630,8 @@ def generate_valle(**kwargs):

 	INFERENCING = True
 	for line, cut_text in enumerate(texts):	
-		progress.msg_prefix = f'[{str(line+1)}/{str(len(texts))}]'
-		print(f"{progress.msg_prefix} Generating line: {cut_text}")
+		tqdm_prefix = f'[{str(line+1)}/{str(len(texts))}]'
+		print(f"{tqdm_prefix} Generating line: {cut_text}")
 		start_time = time.time()

 		# do setting editing
@ -715,12 +715,12 @@ def generate_valle(**kwargs):

 	if args.voice_fixer:
 		if not voicefixer:
-			progress(0, "Loading voicefix...")
+			notify_progress("Loading voicefix...", progress=progress)
 			load_voicefixer()

 		try:
 			fixed_cache = {}
-			for name in progress.tqdm(audio_cache, desc="Running voicefix..."):
+			for name in tqdm(audio_cache, desc="Running voicefix..."):
 				del audio_cache[name]['audio']
 				if 'output' not in audio_cache[name] or not audio_cache[name]['output']:
 					continue
@ -760,7 +760,7 @@ def generate_valle(**kwargs):
 				f.write(json.dumps(audio_cache[name]['settings'], indent='\t') )

 	if args.embed_output_metadata:
-		for name in progress.tqdm(audio_cache, desc="Embedding metadata..."):
+		for name in tqdm(audio_cache, desc="Embedding metadata..."):
 			if 'pruned' in audio_cache[name] and audio_cache[name]['pruned']:
 				continue

@ -839,7 +839,7 @@ def generate_tortoise(**kwargs):
 			voice_samples, conditioning_latents = None, tts.get_random_conditioning_latents()
 		else:
 			if progress is not None:
-				progress(0, desc=f"Loading voice: {voice}")
+				notify_progress(f"Loading voice: {voice}", progress=progress)

 			voice_samples, conditioning_latents = load_voice(voice, model_hash=tts.autoregressive_model_hash)
 			
@ -1032,8 +1032,8 @@ def generate_tortoise(**kwargs):
 		elif parameters['emotion'] != "None" and parameters['emotion']:
 			cut_text = f"[I am really {parameters['emotion'].lower()},] {cut_text}"
 		
-		progress.msg_prefix = f'[{str(line+1)}/{str(len(texts))}]'
-		print(f"{progress.msg_prefix} Generating line: {cut_text}")
+		tqdm_prefix = f'[{str(line+1)}/{str(len(texts))}]'
+		print(f"{tqdm_prefix} Generating line: {cut_text}")
 		start_time = time.time()

 		# do setting editing
@ -1115,12 +1115,12 @@ def generate_tortoise(**kwargs):

 	if args.voice_fixer:
 		if not voicefixer:
-			progress(0, "Loading voicefix...")
+			notify_progress("Loading voicefix...", progress=progress)
 			load_voicefixer()

 		try:
 			fixed_cache = {}
-			for name in progress.tqdm(audio_cache, desc="Running voicefix..."):
+			for name in tqdm(audio_cache, desc="Running voicefix..."):
 				del audio_cache[name]['audio']
 				if 'output' not in audio_cache[name] or not audio_cache[name]['output']:
 					continue
@ -1160,7 +1160,7 @@ def generate_tortoise(**kwargs):
 				f.write(json.dumps(audio_cache[name]['settings'], indent='\t') )

 	if args.embed_output_metadata:
-		for name in progress.tqdm(audio_cache, desc="Embedding metadata..."):
+		for name in tqdm(audio_cache, desc="Embedding metadata..."):
 			if 'pruned' in audio_cache[name] and audio_cache[name]['pruned']:
 				continue

@ -1309,7 +1309,7 @@ def compute_latents(voice=None, voice_samples=None, voice_latents_chunks=0, prog
 	if voice_samples is None:
 		return

-	conditioning_latents = tts.get_conditioning_latents(voice_samples, return_mels=not args.latents_lean_and_mean, slices=voice_latents_chunks, force_cpu=args.force_cpu_for_conditioning_latents, progress=progress)
+	conditioning_latents = tts.get_conditioning_latents(voice_samples, return_mels=not args.latents_lean_and_mean, slices=voice_latents_chunks, force_cpu=args.force_cpu_for_conditioning_latents)

 	if len(conditioning_latents) == 4:
 		conditioning_latents = (conditioning_latents[0], conditioning_latents[1], conditioning_latents[2], None)
@ -2117,7 +2117,7 @@ def transcribe_dataset( voice, language=None, skip_existings=False, progress=Non
 	if os.path.exists(infile):
 		results = json.load(open(infile, 'r', encoding="utf-8"))

-	for file in enumerate_progress(files, desc="Iterating through voice files", progress=progress):
+	for file in tqdm(files, desc="Iterating through voice files"):
 		basename = os.path.basename(file)

 		if basename in results and skip_existings:
@ -2246,7 +2246,7 @@ def phonemize_txt_file( path ):

 	reparsed = []
 	with open(path.replace(".txt", ".phn.txt"), 'a', encoding='utf-8') as f:
-		for line in enumerate_progress(lines, desc='Phonemizing...'):
+		for line in tqdm(lines, desc='Phonemizing...'):
 			split = line.split("|")
 			audio = split[0]
 			text = split[2]
@ -2357,7 +2357,7 @@ def prepare_dataset( voice, use_segments=False, text_length=0, audio_length=0, p
 		text_length = 0
 		audio_length = 0

-	for filename in enumerate_progress(results, desc="Parsing results", progress=progress):
+	for filename in tqdm(results, desc="Parsing results"):
 		use_segment = use_segments

 		result = results[filename]
@ -2438,7 +2438,7 @@ def prepare_dataset( voice, use_segments=False, text_length=0, audio_length=0, p
 		'phonemize': [[], []],
 	}

-	for file in enumerate_progress(segments, desc="Parsing segments", progress=progress):
+	for file in tqdm(segments, desc="Parsing segments"):
 		result = segments[file]
 		path = f'{indir}/audio/{file}'

@ -2511,7 +2511,7 @@ def prepare_dataset( voice, use_segments=False, text_length=0, audio_length=0, p
 			print("Phonemized:", file, normalized, text)
 			"""

-	for i in enumerate_progress(range(len(jobs['quantize'][0])), desc="Quantizing", progress=progress):
+	for i in tqdm(range(len(jobs['quantize'][0])), desc="Quantizing"):
 		qnt_file = jobs['quantize'][0][i]
 		waveform, sample_rate = jobs['quantize'][1][i]

@ -2519,7 +2519,7 @@ def prepare_dataset( voice, use_segments=False, text_length=0, audio_length=0, p
 		torch.save(quantized, qnt_file)
 		print("Quantized:", qnt_file)

-	for i in enumerate_progress(range(len(jobs['phonemize'][0])), desc="Phonemizing", progress=progress):
+	for i in tqdm(range(len(jobs['phonemize'][0])), desc="Phonemizing"):
 		phn_file = jobs['phonemize'][0][i]
 		normalized = jobs['phonemize'][1][i]

@ -2807,7 +2807,7 @@ def import_voices(files, saveAs=None, progress=None):
 	if not isinstance(files, list):
 		files = [files]

-	for file in enumerate_progress(files, desc="Importing voice files", progress=progress):
+	for file in tqdm(files, desc="Importing voice files"):
 		j, latents = read_generate_settings(file, read_latents=True)
 		
 		if j is not None and saveAs is None:
@ -3025,22 +3025,14 @@ def check_for_updates( dir = None ):

 	return False

-def enumerate_progress(iterable, desc=None, progress=None, verbose=None):
-	if verbose and desc is not None:
-		print(desc)
-
-	if progress is None:
-		return tqdm(iterable, disable=False) #not verbose)
-	return progress.tqdm(iterable, desc=f'{progress.msg_prefix} {desc}' if hasattr(progress, 'msg_prefix') else desc)
-
 def notify_progress(message, progress=None, verbose=True):
 	if verbose:
 		print(message)

 	if progress is None:
-		return
-
-	progress(0, desc=message)
+		tqdm.write( desc=message)
+	else:
+		progress(0, desc=message)

 def get_args():
 	global args
@ -3650,7 +3642,7 @@ def load_whisper_model(language=None, model_name=None, progress=None):
 		model_name = f'{model_name}.{language}'
 		print(f"Loading specialized model for language: {language}")

-	notify_progress(f"Loading Whisper model: {model_name}", progress)
+	notify_progress(f"Loading Whisper model: {model_name}", progress=progress)

 	if args.whisper_backend == "openai/whisper":
 		import whisper
@ -3733,7 +3725,7 @@ def merge_models( primary_model_name, secondary_model_name, alpha, progress=gr.P
 	theta_0 = read_model(primary_model_name)
 	theta_1 = read_model(secondary_model_name)

-	for key in enumerate_progress(theta_0.keys(), desc="Merging...", progress=progress):
+	for key in tqdm(theta_0.keys(), desc="Merging..."):
 		if key in key_blacklist:
 			print("Skipping ignored key:", key)
 			continue
--- a/src/webui.py
+++ b/src/webui.py
@ -200,7 +200,7 @@ def read_generate_settings_proxy(file, saveAs='.temp'):
 def slice_dataset_proxy( voice, trim_silence, start_offset, end_offset, progress=gr.Progress(track_tqdm=True) ):
 	return slice_dataset( voice, trim_silence=trim_silence, start_offset=start_offset, end_offset=end_offset, results=None, progress=progress )

-def diarize_dataset( voice, progress=gr.Progress(track_tqdm=False) ):
+def diarize_dataset( voice, progress=gr.Progress(track_tqdm=True) ):
 	from pyannote.audio import Pipeline
 	pipeline = Pipeline.from_pretrained("pyannote/speaker-diarization", use_auth_token=args.hf_token)

@ -215,7 +215,7 @@ def diarize_dataset( voice, progress=gr.Progress(track_tqdm=False) ):

 	return "\n".join(messages)

-def prepare_all_datasets( language, validation_text_length, validation_audio_length, skip_existings, slice_audio, trim_silence, slice_start_offset, slice_end_offset, progress=gr.Progress(track_tqdm=False) ):
+def prepare_all_datasets( language, validation_text_length, validation_audio_length, skip_existings, slice_audio, trim_silence, slice_start_offset, slice_end_offset, progress=gr.Progress(track_tqdm=True) ):
 	kwargs = locals()

 	messages = []
@ -239,7 +239,7 @@ def prepare_all_datasets( language, validation_text_length, validation_audio_len

 	return "\n".join(messages)

-def prepare_dataset_proxy( voice, language, validation_text_length, validation_audio_length, skip_existings, slice_audio, trim_silence, slice_start_offset, slice_end_offset, progress=gr.Progress(track_tqdm=False) ):
+def prepare_dataset_proxy( voice, language, validation_text_length, validation_audio_length, skip_existings, slice_audio, trim_silence, slice_start_offset, slice_end_offset, progress=gr.Progress(track_tqdm=True) ):
 	messages = []
 	
 	message = transcribe_dataset( voice=voice, language=language, skip_existings=skip_existings, progress=progress )