diff --git a/.gitignore b/.gitignore index 9d5407e..ec48a3f 100755 --- a/.gitignore +++ b/.gitignore @@ -139,4 +139,16 @@ dmypy.json .models/* .custom/* results/* -debug_states/* \ No newline at end of file +debug_states/* +prompts/geralt_regis_voice_switching_template.txt +prompts/regis1.txt +prompts/regis2.txt +prompts/regis3.txt +prompts/regis4.txt +prompts/regis5.txt +prompts/regis6.txt +prompts/regis7.txt +prompts/regis8.txt +prompts/regis9.txt +prompts/regis10.txt +prompts/regis11.txt diff --git a/modules/bark b/modules/bark new file mode 160000 index 0000000..773624d --- /dev/null +++ b/modules/bark @@ -0,0 +1 @@ +Subproject commit 773624d26db84278a55aacae9a16d7b25fbccab8 diff --git a/modules/tortoise-tts b/modules/tortoise-tts index bf3b6c8..95f679f 160000 --- a/modules/tortoise-tts +++ b/modules/tortoise-tts @@ -1 +1 @@ -Subproject commit bf3b6c87aa825295f64a31d010fd5e896fbcda43 +Subproject commit 95f679f4ba714c0f2a37d66f4ab8bc33f8b952d8 diff --git a/modules/vall-e b/modules/vall-e new file mode 160000 index 0000000..6c51a62 --- /dev/null +++ b/modules/vall-e @@ -0,0 +1 @@ +Subproject commit 6c51a629cc30cd97b064c276a5dc68375c99b78a diff --git a/scripts.zip b/scripts.zip new file mode 100644 index 0000000..c8ec480 Binary files /dev/null and b/scripts.zip differ diff --git a/src/utils.py b/src/utils.py index 287c909..6b2377b 100755 --- a/src/utils.py +++ b/src/utils.py @@ -968,6 +968,8 @@ def generate_valle(**kwargs): def generate_tortoise(**kwargs): parameters = {} parameters.update(kwargs) + + voice = parameters['voice'] progress = parameters['progress'] if 'progress' in parameters else None @@ -995,6 +997,7 @@ def generate_tortoise(**kwargs): voice_samples = None conditioning_latents = None sample_voice = None + #model = tts.autoregressive_model_path voice_cache = {} def fetch_voice( voice ): @@ -1133,9 +1136,14 @@ def generate_tortoise(**kwargs): idx = keys[-1] + 1 idx = pad(idx, 4) + - def get_name(line=0, candidate=0, combined=False): - name = f"{idx}" + def get_name(line=0, candidate=0, combined=False, seed=None, model_path=None): + filename_with_extension = os.path.basename(model_path) + model_name, _ = os.path.splitext(filename_with_extension) + model_part = f"model-{model_name}" if model is not None else "No_model_found" + seed_part = f"_seed-{seed}" if seed is not None else "" + name = f"{idx}_{seed_part}_{model_part}" if combined: name = f"{name}_combined" elif len(texts) > 1: @@ -1220,6 +1228,8 @@ def generate_tortoise(**kwargs): raise Exception("Prompt settings editing requested, but received invalid JSON") settings = get_settings( override=override ) + #model = tts.autoregressive_model_path + gen, additionals = tts.tts(cut_text, **settings ) parameters['seed'] = additionals[0] @@ -1231,7 +1241,8 @@ def generate_tortoise(**kwargs): for j, g in enumerate(gen): audio = g.squeeze(0).cpu() - name = get_name(line=line, candidate=j) + + name = get_name(line=line, candidate=j, seed=parameters['seed'], model_path=tts.autoregressive_model_path) settings['text'] = cut_text settings['time'] = run_time @@ -1263,14 +1274,16 @@ def generate_tortoise(**kwargs): output_voices = [] for candidate in range(parameters['candidates']): + if len(texts) > 1: audio_clips = [] for line in range(len(texts)): - name = get_name(line=line, candidate=candidate) + + name = get_name(line=line, candidate=candidate, seed=parameters['seed'], model_path=tts.autoregressive_model_path) audio = audio_cache[name]['audio'] audio_clips.append(audio) - name = get_name(candidate=candidate, combined=True) + name = get_name(candidate=candidate, combined=True, seed=parameters['seed'], model_path=tts.autoregressive_model_path) audio = torch.cat(audio_clips, dim=-1) torchaudio.save(f'{outdir}/{cleanup_voice_name(voice)}_{name}.wav', audio, args.output_sample_rate) @@ -1281,7 +1294,7 @@ def generate_tortoise(**kwargs): 'output': True } else: - name = get_name(candidate=candidate) + name = get_name(candidate=candidate, seed=parameters['seed'], model_path=tts.autoregressive_model_path) audio_cache[name]['output'] = True @@ -1344,7 +1357,8 @@ def generate_tortoise(**kwargs): sample_voice = (tts.input_sample_rate, sample_voice.numpy()) info = get_info(voice=voice, latents=False) - print(f"Generation took {info['time']} seconds, saved to '{output_voices[0]}'\n") + print(f"Generation took {info['time']} seconds, seed {info['seed']}, saved to '{output_voices[0]}'\n") + info['seed'] = usedSeed if 'latents' in info: