forked from mrq/ai-voice-cloning
cleaned up brain worms with wrapping around gradio progress by instead just using tqdm directly (slight regressions with some messages not getting pushed)
This commit is contained in:
parent
09d849a78f
commit
5003bc89d3
|
@ -1 +1 @@
|
||||||
Subproject commit 086aad5b49e7ef39d043c6b0e12ac33c20773ab8
|
Subproject commit c90ee7c5296992ad96c8790b5b7cc3737062e1e6
|
68
src/utils.py
68
src/utils.py
|
@ -238,7 +238,7 @@ def generate_bark(**kwargs):
|
||||||
if tts_loading:
|
if tts_loading:
|
||||||
raise Exception("TTS is still initializing...")
|
raise Exception("TTS is still initializing...")
|
||||||
if progress is not None:
|
if progress is not None:
|
||||||
progress(0, "Initializing TTS...")
|
notify_progress("Initializing TTS...", progress=progress)
|
||||||
load_tts()
|
load_tts()
|
||||||
if hasattr(tts, "loading") and tts.loading:
|
if hasattr(tts, "loading") and tts.loading:
|
||||||
raise Exception("TTS is still initializing...")
|
raise Exception("TTS is still initializing...")
|
||||||
|
@ -339,8 +339,8 @@ def generate_bark(**kwargs):
|
||||||
|
|
||||||
INFERENCING = True
|
INFERENCING = True
|
||||||
for line, cut_text in enumerate(texts):
|
for line, cut_text in enumerate(texts):
|
||||||
progress.msg_prefix = f'[{str(line+1)}/{str(len(texts))}]'
|
tqdm_prefix = f'[{str(line+1)}/{str(len(texts))}]'
|
||||||
print(f"{progress.msg_prefix} Generating line: {cut_text}")
|
print(f"{tqdm_prefix} Generating line: {cut_text}")
|
||||||
start_time = time.time()
|
start_time = time.time()
|
||||||
|
|
||||||
# do setting editing
|
# do setting editing
|
||||||
|
@ -422,12 +422,12 @@ def generate_bark(**kwargs):
|
||||||
|
|
||||||
if args.voice_fixer:
|
if args.voice_fixer:
|
||||||
if not voicefixer:
|
if not voicefixer:
|
||||||
progress(0, "Loading voicefix...")
|
notify_progress("Loading voicefix...", progress=progress)
|
||||||
load_voicefixer()
|
load_voicefixer()
|
||||||
|
|
||||||
try:
|
try:
|
||||||
fixed_cache = {}
|
fixed_cache = {}
|
||||||
for name in progress.tqdm(audio_cache, desc="Running voicefix..."):
|
for name in tqdm(audio_cache, desc="Running voicefix..."):
|
||||||
del audio_cache[name]['audio']
|
del audio_cache[name]['audio']
|
||||||
if 'output' not in audio_cache[name] or not audio_cache[name]['output']:
|
if 'output' not in audio_cache[name] or not audio_cache[name]['output']:
|
||||||
continue
|
continue
|
||||||
|
@ -467,7 +467,7 @@ def generate_bark(**kwargs):
|
||||||
f.write(json.dumps(audio_cache[name]['settings'], indent='\t') )
|
f.write(json.dumps(audio_cache[name]['settings'], indent='\t') )
|
||||||
|
|
||||||
if args.embed_output_metadata:
|
if args.embed_output_metadata:
|
||||||
for name in progress.tqdm(audio_cache, desc="Embedding metadata..."):
|
for name in tqdm(audio_cache, desc="Embedding metadata..."):
|
||||||
if 'pruned' in audio_cache[name] and audio_cache[name]['pruned']:
|
if 'pruned' in audio_cache[name] and audio_cache[name]['pruned']:
|
||||||
continue
|
continue
|
||||||
|
|
||||||
|
@ -521,7 +521,7 @@ def generate_valle(**kwargs):
|
||||||
if tts_loading:
|
if tts_loading:
|
||||||
raise Exception("TTS is still initializing...")
|
raise Exception("TTS is still initializing...")
|
||||||
if progress is not None:
|
if progress is not None:
|
||||||
progress(0, "Initializing TTS...")
|
notify_progress("Initializing TTS...", progress=progress)
|
||||||
load_tts()
|
load_tts()
|
||||||
if hasattr(tts, "loading") and tts.loading:
|
if hasattr(tts, "loading") and tts.loading:
|
||||||
raise Exception("TTS is still initializing...")
|
raise Exception("TTS is still initializing...")
|
||||||
|
@ -630,8 +630,8 @@ def generate_valle(**kwargs):
|
||||||
|
|
||||||
INFERENCING = True
|
INFERENCING = True
|
||||||
for line, cut_text in enumerate(texts):
|
for line, cut_text in enumerate(texts):
|
||||||
progress.msg_prefix = f'[{str(line+1)}/{str(len(texts))}]'
|
tqdm_prefix = f'[{str(line+1)}/{str(len(texts))}]'
|
||||||
print(f"{progress.msg_prefix} Generating line: {cut_text}")
|
print(f"{tqdm_prefix} Generating line: {cut_text}")
|
||||||
start_time = time.time()
|
start_time = time.time()
|
||||||
|
|
||||||
# do setting editing
|
# do setting editing
|
||||||
|
@ -715,12 +715,12 @@ def generate_valle(**kwargs):
|
||||||
|
|
||||||
if args.voice_fixer:
|
if args.voice_fixer:
|
||||||
if not voicefixer:
|
if not voicefixer:
|
||||||
progress(0, "Loading voicefix...")
|
notify_progress("Loading voicefix...", progress=progress)
|
||||||
load_voicefixer()
|
load_voicefixer()
|
||||||
|
|
||||||
try:
|
try:
|
||||||
fixed_cache = {}
|
fixed_cache = {}
|
||||||
for name in progress.tqdm(audio_cache, desc="Running voicefix..."):
|
for name in tqdm(audio_cache, desc="Running voicefix..."):
|
||||||
del audio_cache[name]['audio']
|
del audio_cache[name]['audio']
|
||||||
if 'output' not in audio_cache[name] or not audio_cache[name]['output']:
|
if 'output' not in audio_cache[name] or not audio_cache[name]['output']:
|
||||||
continue
|
continue
|
||||||
|
@ -760,7 +760,7 @@ def generate_valle(**kwargs):
|
||||||
f.write(json.dumps(audio_cache[name]['settings'], indent='\t') )
|
f.write(json.dumps(audio_cache[name]['settings'], indent='\t') )
|
||||||
|
|
||||||
if args.embed_output_metadata:
|
if args.embed_output_metadata:
|
||||||
for name in progress.tqdm(audio_cache, desc="Embedding metadata..."):
|
for name in tqdm(audio_cache, desc="Embedding metadata..."):
|
||||||
if 'pruned' in audio_cache[name] and audio_cache[name]['pruned']:
|
if 'pruned' in audio_cache[name] and audio_cache[name]['pruned']:
|
||||||
continue
|
continue
|
||||||
|
|
||||||
|
@ -839,7 +839,7 @@ def generate_tortoise(**kwargs):
|
||||||
voice_samples, conditioning_latents = None, tts.get_random_conditioning_latents()
|
voice_samples, conditioning_latents = None, tts.get_random_conditioning_latents()
|
||||||
else:
|
else:
|
||||||
if progress is not None:
|
if progress is not None:
|
||||||
progress(0, desc=f"Loading voice: {voice}")
|
notify_progress(f"Loading voice: {voice}", progress=progress)
|
||||||
|
|
||||||
voice_samples, conditioning_latents = load_voice(voice, model_hash=tts.autoregressive_model_hash)
|
voice_samples, conditioning_latents = load_voice(voice, model_hash=tts.autoregressive_model_hash)
|
||||||
|
|
||||||
|
@ -1032,8 +1032,8 @@ def generate_tortoise(**kwargs):
|
||||||
elif parameters['emotion'] != "None" and parameters['emotion']:
|
elif parameters['emotion'] != "None" and parameters['emotion']:
|
||||||
cut_text = f"[I am really {parameters['emotion'].lower()},] {cut_text}"
|
cut_text = f"[I am really {parameters['emotion'].lower()},] {cut_text}"
|
||||||
|
|
||||||
progress.msg_prefix = f'[{str(line+1)}/{str(len(texts))}]'
|
tqdm_prefix = f'[{str(line+1)}/{str(len(texts))}]'
|
||||||
print(f"{progress.msg_prefix} Generating line: {cut_text}")
|
print(f"{tqdm_prefix} Generating line: {cut_text}")
|
||||||
start_time = time.time()
|
start_time = time.time()
|
||||||
|
|
||||||
# do setting editing
|
# do setting editing
|
||||||
|
@ -1115,12 +1115,12 @@ def generate_tortoise(**kwargs):
|
||||||
|
|
||||||
if args.voice_fixer:
|
if args.voice_fixer:
|
||||||
if not voicefixer:
|
if not voicefixer:
|
||||||
progress(0, "Loading voicefix...")
|
notify_progress("Loading voicefix...", progress=progress)
|
||||||
load_voicefixer()
|
load_voicefixer()
|
||||||
|
|
||||||
try:
|
try:
|
||||||
fixed_cache = {}
|
fixed_cache = {}
|
||||||
for name in progress.tqdm(audio_cache, desc="Running voicefix..."):
|
for name in tqdm(audio_cache, desc="Running voicefix..."):
|
||||||
del audio_cache[name]['audio']
|
del audio_cache[name]['audio']
|
||||||
if 'output' not in audio_cache[name] or not audio_cache[name]['output']:
|
if 'output' not in audio_cache[name] or not audio_cache[name]['output']:
|
||||||
continue
|
continue
|
||||||
|
@ -1160,7 +1160,7 @@ def generate_tortoise(**kwargs):
|
||||||
f.write(json.dumps(audio_cache[name]['settings'], indent='\t') )
|
f.write(json.dumps(audio_cache[name]['settings'], indent='\t') )
|
||||||
|
|
||||||
if args.embed_output_metadata:
|
if args.embed_output_metadata:
|
||||||
for name in progress.tqdm(audio_cache, desc="Embedding metadata..."):
|
for name in tqdm(audio_cache, desc="Embedding metadata..."):
|
||||||
if 'pruned' in audio_cache[name] and audio_cache[name]['pruned']:
|
if 'pruned' in audio_cache[name] and audio_cache[name]['pruned']:
|
||||||
continue
|
continue
|
||||||
|
|
||||||
|
@ -1309,7 +1309,7 @@ def compute_latents(voice=None, voice_samples=None, voice_latents_chunks=0, prog
|
||||||
if voice_samples is None:
|
if voice_samples is None:
|
||||||
return
|
return
|
||||||
|
|
||||||
conditioning_latents = tts.get_conditioning_latents(voice_samples, return_mels=not args.latents_lean_and_mean, slices=voice_latents_chunks, force_cpu=args.force_cpu_for_conditioning_latents, progress=progress)
|
conditioning_latents = tts.get_conditioning_latents(voice_samples, return_mels=not args.latents_lean_and_mean, slices=voice_latents_chunks, force_cpu=args.force_cpu_for_conditioning_latents)
|
||||||
|
|
||||||
if len(conditioning_latents) == 4:
|
if len(conditioning_latents) == 4:
|
||||||
conditioning_latents = (conditioning_latents[0], conditioning_latents[1], conditioning_latents[2], None)
|
conditioning_latents = (conditioning_latents[0], conditioning_latents[1], conditioning_latents[2], None)
|
||||||
|
@ -2117,7 +2117,7 @@ def transcribe_dataset( voice, language=None, skip_existings=False, progress=Non
|
||||||
if os.path.exists(infile):
|
if os.path.exists(infile):
|
||||||
results = json.load(open(infile, 'r', encoding="utf-8"))
|
results = json.load(open(infile, 'r', encoding="utf-8"))
|
||||||
|
|
||||||
for file in enumerate_progress(files, desc="Iterating through voice files", progress=progress):
|
for file in tqdm(files, desc="Iterating through voice files"):
|
||||||
basename = os.path.basename(file)
|
basename = os.path.basename(file)
|
||||||
|
|
||||||
if basename in results and skip_existings:
|
if basename in results and skip_existings:
|
||||||
|
@ -2246,7 +2246,7 @@ def phonemize_txt_file( path ):
|
||||||
|
|
||||||
reparsed = []
|
reparsed = []
|
||||||
with open(path.replace(".txt", ".phn.txt"), 'a', encoding='utf-8') as f:
|
with open(path.replace(".txt", ".phn.txt"), 'a', encoding='utf-8') as f:
|
||||||
for line in enumerate_progress(lines, desc='Phonemizing...'):
|
for line in tqdm(lines, desc='Phonemizing...'):
|
||||||
split = line.split("|")
|
split = line.split("|")
|
||||||
audio = split[0]
|
audio = split[0]
|
||||||
text = split[2]
|
text = split[2]
|
||||||
|
@ -2357,7 +2357,7 @@ def prepare_dataset( voice, use_segments=False, text_length=0, audio_length=0, p
|
||||||
text_length = 0
|
text_length = 0
|
||||||
audio_length = 0
|
audio_length = 0
|
||||||
|
|
||||||
for filename in enumerate_progress(results, desc="Parsing results", progress=progress):
|
for filename in tqdm(results, desc="Parsing results"):
|
||||||
use_segment = use_segments
|
use_segment = use_segments
|
||||||
|
|
||||||
result = results[filename]
|
result = results[filename]
|
||||||
|
@ -2438,7 +2438,7 @@ def prepare_dataset( voice, use_segments=False, text_length=0, audio_length=0, p
|
||||||
'phonemize': [[], []],
|
'phonemize': [[], []],
|
||||||
}
|
}
|
||||||
|
|
||||||
for file in enumerate_progress(segments, desc="Parsing segments", progress=progress):
|
for file in tqdm(segments, desc="Parsing segments"):
|
||||||
result = segments[file]
|
result = segments[file]
|
||||||
path = f'{indir}/audio/{file}'
|
path = f'{indir}/audio/{file}'
|
||||||
|
|
||||||
|
@ -2511,7 +2511,7 @@ def prepare_dataset( voice, use_segments=False, text_length=0, audio_length=0, p
|
||||||
print("Phonemized:", file, normalized, text)
|
print("Phonemized:", file, normalized, text)
|
||||||
"""
|
"""
|
||||||
|
|
||||||
for i in enumerate_progress(range(len(jobs['quantize'][0])), desc="Quantizing", progress=progress):
|
for i in tqdm(range(len(jobs['quantize'][0])), desc="Quantizing"):
|
||||||
qnt_file = jobs['quantize'][0][i]
|
qnt_file = jobs['quantize'][0][i]
|
||||||
waveform, sample_rate = jobs['quantize'][1][i]
|
waveform, sample_rate = jobs['quantize'][1][i]
|
||||||
|
|
||||||
|
@ -2519,7 +2519,7 @@ def prepare_dataset( voice, use_segments=False, text_length=0, audio_length=0, p
|
||||||
torch.save(quantized, qnt_file)
|
torch.save(quantized, qnt_file)
|
||||||
print("Quantized:", qnt_file)
|
print("Quantized:", qnt_file)
|
||||||
|
|
||||||
for i in enumerate_progress(range(len(jobs['phonemize'][0])), desc="Phonemizing", progress=progress):
|
for i in tqdm(range(len(jobs['phonemize'][0])), desc="Phonemizing"):
|
||||||
phn_file = jobs['phonemize'][0][i]
|
phn_file = jobs['phonemize'][0][i]
|
||||||
normalized = jobs['phonemize'][1][i]
|
normalized = jobs['phonemize'][1][i]
|
||||||
|
|
||||||
|
@ -2807,7 +2807,7 @@ def import_voices(files, saveAs=None, progress=None):
|
||||||
if not isinstance(files, list):
|
if not isinstance(files, list):
|
||||||
files = [files]
|
files = [files]
|
||||||
|
|
||||||
for file in enumerate_progress(files, desc="Importing voice files", progress=progress):
|
for file in tqdm(files, desc="Importing voice files"):
|
||||||
j, latents = read_generate_settings(file, read_latents=True)
|
j, latents = read_generate_settings(file, read_latents=True)
|
||||||
|
|
||||||
if j is not None and saveAs is None:
|
if j is not None and saveAs is None:
|
||||||
|
@ -3025,21 +3025,13 @@ def check_for_updates( dir = None ):
|
||||||
|
|
||||||
return False
|
return False
|
||||||
|
|
||||||
def enumerate_progress(iterable, desc=None, progress=None, verbose=None):
|
|
||||||
if verbose and desc is not None:
|
|
||||||
print(desc)
|
|
||||||
|
|
||||||
if progress is None:
|
|
||||||
return tqdm(iterable, disable=False) #not verbose)
|
|
||||||
return progress.tqdm(iterable, desc=f'{progress.msg_prefix} {desc}' if hasattr(progress, 'msg_prefix') else desc)
|
|
||||||
|
|
||||||
def notify_progress(message, progress=None, verbose=True):
|
def notify_progress(message, progress=None, verbose=True):
|
||||||
if verbose:
|
if verbose:
|
||||||
print(message)
|
print(message)
|
||||||
|
|
||||||
if progress is None:
|
if progress is None:
|
||||||
return
|
tqdm.write( desc=message)
|
||||||
|
else:
|
||||||
progress(0, desc=message)
|
progress(0, desc=message)
|
||||||
|
|
||||||
def get_args():
|
def get_args():
|
||||||
|
@ -3650,7 +3642,7 @@ def load_whisper_model(language=None, model_name=None, progress=None):
|
||||||
model_name = f'{model_name}.{language}'
|
model_name = f'{model_name}.{language}'
|
||||||
print(f"Loading specialized model for language: {language}")
|
print(f"Loading specialized model for language: {language}")
|
||||||
|
|
||||||
notify_progress(f"Loading Whisper model: {model_name}", progress)
|
notify_progress(f"Loading Whisper model: {model_name}", progress=progress)
|
||||||
|
|
||||||
if args.whisper_backend == "openai/whisper":
|
if args.whisper_backend == "openai/whisper":
|
||||||
import whisper
|
import whisper
|
||||||
|
@ -3733,7 +3725,7 @@ def merge_models( primary_model_name, secondary_model_name, alpha, progress=gr.P
|
||||||
theta_0 = read_model(primary_model_name)
|
theta_0 = read_model(primary_model_name)
|
||||||
theta_1 = read_model(secondary_model_name)
|
theta_1 = read_model(secondary_model_name)
|
||||||
|
|
||||||
for key in enumerate_progress(theta_0.keys(), desc="Merging...", progress=progress):
|
for key in tqdm(theta_0.keys(), desc="Merging..."):
|
||||||
if key in key_blacklist:
|
if key in key_blacklist:
|
||||||
print("Skipping ignored key:", key)
|
print("Skipping ignored key:", key)
|
||||||
continue
|
continue
|
||||||
|
|
|
@ -200,7 +200,7 @@ def read_generate_settings_proxy(file, saveAs='.temp'):
|
||||||
def slice_dataset_proxy( voice, trim_silence, start_offset, end_offset, progress=gr.Progress(track_tqdm=True) ):
|
def slice_dataset_proxy( voice, trim_silence, start_offset, end_offset, progress=gr.Progress(track_tqdm=True) ):
|
||||||
return slice_dataset( voice, trim_silence=trim_silence, start_offset=start_offset, end_offset=end_offset, results=None, progress=progress )
|
return slice_dataset( voice, trim_silence=trim_silence, start_offset=start_offset, end_offset=end_offset, results=None, progress=progress )
|
||||||
|
|
||||||
def diarize_dataset( voice, progress=gr.Progress(track_tqdm=False) ):
|
def diarize_dataset( voice, progress=gr.Progress(track_tqdm=True) ):
|
||||||
from pyannote.audio import Pipeline
|
from pyannote.audio import Pipeline
|
||||||
pipeline = Pipeline.from_pretrained("pyannote/speaker-diarization", use_auth_token=args.hf_token)
|
pipeline = Pipeline.from_pretrained("pyannote/speaker-diarization", use_auth_token=args.hf_token)
|
||||||
|
|
||||||
|
@ -215,7 +215,7 @@ def diarize_dataset( voice, progress=gr.Progress(track_tqdm=False) ):
|
||||||
|
|
||||||
return "\n".join(messages)
|
return "\n".join(messages)
|
||||||
|
|
||||||
def prepare_all_datasets( language, validation_text_length, validation_audio_length, skip_existings, slice_audio, trim_silence, slice_start_offset, slice_end_offset, progress=gr.Progress(track_tqdm=False) ):
|
def prepare_all_datasets( language, validation_text_length, validation_audio_length, skip_existings, slice_audio, trim_silence, slice_start_offset, slice_end_offset, progress=gr.Progress(track_tqdm=True) ):
|
||||||
kwargs = locals()
|
kwargs = locals()
|
||||||
|
|
||||||
messages = []
|
messages = []
|
||||||
|
@ -239,7 +239,7 @@ def prepare_all_datasets( language, validation_text_length, validation_audio_len
|
||||||
|
|
||||||
return "\n".join(messages)
|
return "\n".join(messages)
|
||||||
|
|
||||||
def prepare_dataset_proxy( voice, language, validation_text_length, validation_audio_length, skip_existings, slice_audio, trim_silence, slice_start_offset, slice_end_offset, progress=gr.Progress(track_tqdm=False) ):
|
def prepare_dataset_proxy( voice, language, validation_text_length, validation_audio_length, skip_existings, slice_audio, trim_silence, slice_start_offset, slice_end_offset, progress=gr.Progress(track_tqdm=True) ):
|
||||||
messages = []
|
messages = []
|
||||||
|
|
||||||
message = transcribe_dataset( voice=voice, language=language, skip_existings=skip_existings, progress=progress )
|
message = transcribe_dataset( voice=voice, language=language, skip_existings=skip_existings, progress=progress )
|
||||||
|
|
Loading…
Reference in New Issue
Block a user