forked from mrq/ai-voice-cloning
added more info on output generated clips
This commit is contained in:
parent
94f88886b0
commit
3621f30433
12
.gitignore
vendored
12
.gitignore
vendored
|
@ -140,3 +140,15 @@ dmypy.json
|
||||||
.custom/*
|
.custom/*
|
||||||
results/*
|
results/*
|
||||||
debug_states/*
|
debug_states/*
|
||||||
|
prompts/geralt_regis_voice_switching_template.txt
|
||||||
|
prompts/regis1.txt
|
||||||
|
prompts/regis2.txt
|
||||||
|
prompts/regis3.txt
|
||||||
|
prompts/regis4.txt
|
||||||
|
prompts/regis5.txt
|
||||||
|
prompts/regis6.txt
|
||||||
|
prompts/regis7.txt
|
||||||
|
prompts/regis8.txt
|
||||||
|
prompts/regis9.txt
|
||||||
|
prompts/regis10.txt
|
||||||
|
prompts/regis11.txt
|
||||||
|
|
1
modules/bark
Submodule
1
modules/bark
Submodule
|
@ -0,0 +1 @@
|
||||||
|
Subproject commit 773624d26db84278a55aacae9a16d7b25fbccab8
|
|
@ -1 +1 @@
|
||||||
Subproject commit bf3b6c87aa825295f64a31d010fd5e896fbcda43
|
Subproject commit 95f679f4ba714c0f2a37d66f4ab8bc33f8b952d8
|
1
modules/vall-e
Submodule
1
modules/vall-e
Submodule
|
@ -0,0 +1 @@
|
||||||
|
Subproject commit 6c51a629cc30cd97b064c276a5dc68375c99b78a
|
BIN
scripts.zip
Normal file
BIN
scripts.zip
Normal file
Binary file not shown.
28
src/utils.py
28
src/utils.py
|
@ -969,6 +969,8 @@ def generate_tortoise(**kwargs):
|
||||||
parameters = {}
|
parameters = {}
|
||||||
parameters.update(kwargs)
|
parameters.update(kwargs)
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
voice = parameters['voice']
|
voice = parameters['voice']
|
||||||
progress = parameters['progress'] if 'progress' in parameters else None
|
progress = parameters['progress'] if 'progress' in parameters else None
|
||||||
if parameters['seed'] == 0:
|
if parameters['seed'] == 0:
|
||||||
|
@ -995,6 +997,7 @@ def generate_tortoise(**kwargs):
|
||||||
voice_samples = None
|
voice_samples = None
|
||||||
conditioning_latents = None
|
conditioning_latents = None
|
||||||
sample_voice = None
|
sample_voice = None
|
||||||
|
#model = tts.autoregressive_model_path
|
||||||
|
|
||||||
voice_cache = {}
|
voice_cache = {}
|
||||||
def fetch_voice( voice ):
|
def fetch_voice( voice ):
|
||||||
|
@ -1134,8 +1137,13 @@ def generate_tortoise(**kwargs):
|
||||||
|
|
||||||
idx = pad(idx, 4)
|
idx = pad(idx, 4)
|
||||||
|
|
||||||
def get_name(line=0, candidate=0, combined=False):
|
|
||||||
name = f"{idx}"
|
def get_name(line=0, candidate=0, combined=False, seed=None, model_path=None):
|
||||||
|
filename_with_extension = os.path.basename(model_path)
|
||||||
|
model_name, _ = os.path.splitext(filename_with_extension)
|
||||||
|
model_part = f"model-{model_name}" if model is not None else "No_model_found"
|
||||||
|
seed_part = f"_seed-{seed}" if seed is not None else ""
|
||||||
|
name = f"{idx}_{seed_part}_{model_part}"
|
||||||
if combined:
|
if combined:
|
||||||
name = f"{name}_combined"
|
name = f"{name}_combined"
|
||||||
elif len(texts) > 1:
|
elif len(texts) > 1:
|
||||||
|
@ -1220,6 +1228,8 @@ def generate_tortoise(**kwargs):
|
||||||
raise Exception("Prompt settings editing requested, but received invalid JSON")
|
raise Exception("Prompt settings editing requested, but received invalid JSON")
|
||||||
|
|
||||||
settings = get_settings( override=override )
|
settings = get_settings( override=override )
|
||||||
|
#model = tts.autoregressive_model_path
|
||||||
|
|
||||||
gen, additionals = tts.tts(cut_text, **settings )
|
gen, additionals = tts.tts(cut_text, **settings )
|
||||||
|
|
||||||
parameters['seed'] = additionals[0]
|
parameters['seed'] = additionals[0]
|
||||||
|
@ -1231,7 +1241,8 @@ def generate_tortoise(**kwargs):
|
||||||
|
|
||||||
for j, g in enumerate(gen):
|
for j, g in enumerate(gen):
|
||||||
audio = g.squeeze(0).cpu()
|
audio = g.squeeze(0).cpu()
|
||||||
name = get_name(line=line, candidate=j)
|
|
||||||
|
name = get_name(line=line, candidate=j, seed=parameters['seed'], model_path=tts.autoregressive_model_path)
|
||||||
|
|
||||||
settings['text'] = cut_text
|
settings['text'] = cut_text
|
||||||
settings['time'] = run_time
|
settings['time'] = run_time
|
||||||
|
@ -1263,14 +1274,16 @@ def generate_tortoise(**kwargs):
|
||||||
|
|
||||||
output_voices = []
|
output_voices = []
|
||||||
for candidate in range(parameters['candidates']):
|
for candidate in range(parameters['candidates']):
|
||||||
|
|
||||||
if len(texts) > 1:
|
if len(texts) > 1:
|
||||||
audio_clips = []
|
audio_clips = []
|
||||||
for line in range(len(texts)):
|
for line in range(len(texts)):
|
||||||
name = get_name(line=line, candidate=candidate)
|
|
||||||
|
name = get_name(line=line, candidate=candidate, seed=parameters['seed'], model_path=tts.autoregressive_model_path)
|
||||||
audio = audio_cache[name]['audio']
|
audio = audio_cache[name]['audio']
|
||||||
audio_clips.append(audio)
|
audio_clips.append(audio)
|
||||||
|
|
||||||
name = get_name(candidate=candidate, combined=True)
|
name = get_name(candidate=candidate, combined=True, seed=parameters['seed'], model_path=tts.autoregressive_model_path)
|
||||||
audio = torch.cat(audio_clips, dim=-1)
|
audio = torch.cat(audio_clips, dim=-1)
|
||||||
torchaudio.save(f'{outdir}/{cleanup_voice_name(voice)}_{name}.wav', audio, args.output_sample_rate)
|
torchaudio.save(f'{outdir}/{cleanup_voice_name(voice)}_{name}.wav', audio, args.output_sample_rate)
|
||||||
|
|
||||||
|
@ -1281,7 +1294,7 @@ def generate_tortoise(**kwargs):
|
||||||
'output': True
|
'output': True
|
||||||
}
|
}
|
||||||
else:
|
else:
|
||||||
name = get_name(candidate=candidate)
|
name = get_name(candidate=candidate, seed=parameters['seed'], model_path=tts.autoregressive_model_path)
|
||||||
audio_cache[name]['output'] = True
|
audio_cache[name]['output'] = True
|
||||||
|
|
||||||
|
|
||||||
|
@ -1344,7 +1357,8 @@ def generate_tortoise(**kwargs):
|
||||||
sample_voice = (tts.input_sample_rate, sample_voice.numpy())
|
sample_voice = (tts.input_sample_rate, sample_voice.numpy())
|
||||||
|
|
||||||
info = get_info(voice=voice, latents=False)
|
info = get_info(voice=voice, latents=False)
|
||||||
print(f"Generation took {info['time']} seconds, saved to '{output_voices[0]}'\n")
|
print(f"Generation took {info['time']} seconds, seed {info['seed']}, saved to '{output_voices[0]}'\n")
|
||||||
|
|
||||||
|
|
||||||
info['seed'] = usedSeed
|
info['seed'] = usedSeed
|
||||||
if 'latents' in info:
|
if 'latents' in info:
|
||||||
|
|
Loading…
Reference in New Issue
Block a user