forked from camenduru/ai-voice-cloning
small fixes
This commit is contained in:
parent
f708909687
commit
ad4adc960f
|
@ -12,4 +12,4 @@ This is not endorsed by [neonbjb](https://github.com/neonbjb/). I do not expect
|
|||
|
||||
## Documentation
|
||||
|
||||
Please consult [the wiki](https://git.ecker.tech/mrq/ai-voice-cloning/wiki) for documentation.
|
||||
Please consult [the wiki](https://git.ecker.tech/mrq/ai-voice-cloning/wiki) for the documentation, including how to install, prepare voices for, and use the software.
|
21
src/utils.py
21
src/utils.py
|
@ -55,7 +55,7 @@ def setup_args():
|
|||
'sample-batch-size': None,
|
||||
'embed-output-metadata': True,
|
||||
'latents-lean-and-mean': True,
|
||||
'voice-fixer': True,
|
||||
'voice-fixer': False, # getting tired of long initialization times in a Colab for downloading a large dataset for it
|
||||
'voice-fixer-use-cuda': True,
|
||||
'force-cpu-for-conditioning-latents': False,
|
||||
'device-override': None,
|
||||
|
@ -167,7 +167,7 @@ def generate(
|
|||
progress(0, desc="Loading voice...")
|
||||
voice_samples, conditioning_latents = load_voice(voice)
|
||||
|
||||
if voice_samples is not None:
|
||||
if voice_samples is not None and len(voice_samples) > 0:
|
||||
sample_voice = torch.cat(voice_samples, dim=-1).squeeze().cpu()
|
||||
|
||||
conditioning_latents = tts.get_conditioning_latents(voice_samples, return_mels=not args.latents_lean_and_mean, progress=progress, slices=voice_latents_chunks, force_cpu=args.force_cpu_for_conditioning_latents)
|
||||
|
@ -374,7 +374,7 @@ def generate(
|
|||
with open(f'{outdir}/{voice}_{name}.json', 'w', encoding="utf-8") as f:
|
||||
f.write(json.dumps(info, indent='\t') )
|
||||
|
||||
if args.voice_fixer and voicefixer:
|
||||
if args.voice_fixer and voicefixer is not None:
|
||||
fixed_output_voices = []
|
||||
for path in progress.tqdm(output_voices, desc="Running voicefix..."):
|
||||
fixed = path.replace(".wav", "_fixed.wav")
|
||||
|
@ -409,6 +409,7 @@ def generate(
|
|||
if 'latents' in info:
|
||||
del info['latents']
|
||||
|
||||
os.makedirs('./config/', exist_ok=True)
|
||||
with open(f'./config/generate.json', 'w', encoding="utf-8") as f:
|
||||
f.write(json.dumps(info, indent='\t') )
|
||||
|
||||
|
@ -422,13 +423,18 @@ def generate(
|
|||
stats,
|
||||
)
|
||||
|
||||
import subprocess
|
||||
|
||||
def run_training(config_path):
|
||||
print("Unloading TTS to save VRAM.")
|
||||
global tts
|
||||
del tts
|
||||
tts = None
|
||||
|
||||
import subprocess
|
||||
subprocess.run(["python", "./src/train.py", "-opt", config_path], env=os.environ.copy(), shell=True, stdout=subprocess.PIPE)
|
||||
cmd = ["python", "./src/train.py", "-opt", config_path]
|
||||
|
||||
print("Spawning process: ", " ".join(cmd))
|
||||
subprocess.run(cmd, env=os.environ.copy(), shell=True, stdout=subprocess.STDOUT, stderr=subprocess.STDOUT)
|
||||
"""
|
||||
from train import train
|
||||
train(config)
|
||||
|
@ -501,7 +507,7 @@ def prepare_dataset( files, outdir, language=None ):
|
|||
for file in files:
|
||||
print(f"Transcribing file: {file}")
|
||||
|
||||
result = whisper_model.transcribe(file, language=language)
|
||||
result = whisper_model.transcribe(file, language=language if language else "English")
|
||||
results[os.path.basename(file)] = result
|
||||
|
||||
print(f"Transcribed file: {file}, {len(result['segments'])} found.")
|
||||
|
@ -557,7 +563,7 @@ def import_voice(file, saveAs = None):
|
|||
path = f"{outdir}/{os.path.basename(filename)}"
|
||||
waveform, sampling_rate = torchaudio.load(filename)
|
||||
|
||||
if args.voice_fixer:
|
||||
if args.voice_fixer and voicefixer is not None:
|
||||
# resample to best bandwidth since voicefixer will do it anyways through librosa
|
||||
if sampling_rate != 44100:
|
||||
print(f"Resampling imported voice sample: {path}")
|
||||
|
@ -714,6 +720,7 @@ def export_exec_settings( listen, share, check_for_updates, models_from_local_on
|
|||
'output-volume': args.output_volume,
|
||||
}
|
||||
|
||||
os.makedirs('./config/', exist_ok=True)
|
||||
with open(f'./config/exec.json', 'w', encoding="utf-8") as f:
|
||||
f.write(json.dumps(settings, indent='\t') )
|
||||
|
||||
|
|
Loading…
Reference in New Issue
Block a user