forked from mrq/tortoise-tts
added storing the loaded model's hash to the TTS object instead of relying on jerryrig injecting it (although I still have to for the weirdos who refuse to update the right way), added a parameter when loading voices to load a latent tagged with a model's hash so latents are per-model now
This commit is contained in:
parent
7b839a4263
commit
a9de016230
|
@ -42,6 +42,32 @@ MODELS = {
|
||||||
'rlg_diffuser.pth': 'https://huggingface.co/jbetker/tortoise-tts-v2/resolve/main/.models/rlg_diffuser.pth',
|
'rlg_diffuser.pth': 'https://huggingface.co/jbetker/tortoise-tts-v2/resolve/main/.models/rlg_diffuser.pth',
|
||||||
}
|
}
|
||||||
|
|
||||||
|
def hash_file(path, algo="md5", buffer_size=0):
|
||||||
|
import hashlib
|
||||||
|
|
||||||
|
hash = None
|
||||||
|
if algo == "md5":
|
||||||
|
hash = hashlib.md5()
|
||||||
|
elif algo == "sha1":
|
||||||
|
hash = hashlib.sha1()
|
||||||
|
else:
|
||||||
|
raise Exception(f'Unknown hash algorithm specified: {algo}')
|
||||||
|
|
||||||
|
if not os.path.exists(path):
|
||||||
|
raise Exception(f'Path not found: {path}')
|
||||||
|
|
||||||
|
with open(path, 'rb') as f:
|
||||||
|
if buffer_size > 0:
|
||||||
|
while True:
|
||||||
|
data = f.read(buffer_size)
|
||||||
|
if not data:
|
||||||
|
break
|
||||||
|
hash.update(data)
|
||||||
|
else:
|
||||||
|
hash.update(f.read())
|
||||||
|
|
||||||
|
return "{0}".format(hash.hexdigest())
|
||||||
|
|
||||||
def check_for_kill_signal():
|
def check_for_kill_signal():
|
||||||
global STOP_SIGNAL
|
global STOP_SIGNAL
|
||||||
if STOP_SIGNAL:
|
if STOP_SIGNAL:
|
||||||
|
@ -221,16 +247,6 @@ class TextToSpeech:
|
||||||
if device is None:
|
if device is None:
|
||||||
device = get_device(verbose=True)
|
device = get_device(verbose=True)
|
||||||
|
|
||||||
try:
|
|
||||||
import tortoise.utils.torch_intermediary as ml
|
|
||||||
if ml.OVERRIDE_ADAM:
|
|
||||||
print("Using BitsAndBytes ADAMW optimizations")
|
|
||||||
else:
|
|
||||||
print("NOT using BitsAndBytes ADAMW optimizations")
|
|
||||||
except Exception as e:
|
|
||||||
print(e)
|
|
||||||
pass
|
|
||||||
|
|
||||||
self.input_sample_rate = input_sample_rate
|
self.input_sample_rate = input_sample_rate
|
||||||
self.output_sample_rate = output_sample_rate
|
self.output_sample_rate = output_sample_rate
|
||||||
self.minor_optimizations = minor_optimizations
|
self.minor_optimizations = minor_optimizations
|
||||||
|
@ -252,6 +268,7 @@ class TextToSpeech:
|
||||||
self.tokenizer = VoiceBpeTokenizer()
|
self.tokenizer = VoiceBpeTokenizer()
|
||||||
|
|
||||||
self.autoregressive_model_path = autoregressive_model_path if autoregressive_model_path and os.path.exists(autoregressive_model_path) else get_model_path('autoregressive.pth', models_dir)
|
self.autoregressive_model_path = autoregressive_model_path if autoregressive_model_path and os.path.exists(autoregressive_model_path) else get_model_path('autoregressive.pth', models_dir)
|
||||||
|
self.autoregressive_model_hash = hash_file(self.autoregressive_model_path)
|
||||||
|
|
||||||
if os.path.exists(f'{models_dir}/autoregressive.ptt'):
|
if os.path.exists(f'{models_dir}/autoregressive.ptt'):
|
||||||
# Assume this is a traced directory.
|
# Assume this is a traced directory.
|
||||||
|
@ -295,6 +312,7 @@ class TextToSpeech:
|
||||||
def load_autoregressive_model(self, autoregressive_model_path):
|
def load_autoregressive_model(self, autoregressive_model_path):
|
||||||
previous_path = self.autoregressive_model_path
|
previous_path = self.autoregressive_model_path
|
||||||
self.autoregressive_model_path = autoregressive_model_path if autoregressive_model_path and os.path.exists(autoregressive_model_path) else get_model_path('autoregressive.pth', self.models_dir)
|
self.autoregressive_model_path = autoregressive_model_path if autoregressive_model_path and os.path.exists(autoregressive_model_path) else get_model_path('autoregressive.pth', self.models_dir)
|
||||||
|
self.autoregressive_model_hash = hash_file(self.autoregressive_model_path)
|
||||||
|
|
||||||
del self.autoregressive
|
del self.autoregressive
|
||||||
self.autoregressive = UnifiedVoice(max_mel_tokens=604, max_text_tokens=402, max_conditioning_inputs=2, layers=30,
|
self.autoregressive = UnifiedVoice(max_mel_tokens=604, max_text_tokens=402, max_conditioning_inputs=2, layers=30,
|
||||||
|
@ -306,6 +324,7 @@ class TextToSpeech:
|
||||||
if self.preloaded_tensors:
|
if self.preloaded_tensors:
|
||||||
self.autoregressive = self.autoregressive.to(self.device)
|
self.autoregressive = self.autoregressive.to(self.device)
|
||||||
|
|
||||||
|
|
||||||
return previous_path != self.autoregressive_model_path
|
return previous_path != self.autoregressive_model_path
|
||||||
|
|
||||||
def load_cvvp(self):
|
def load_cvvp(self):
|
||||||
|
|
|
@ -91,25 +91,28 @@ def get_voices(extra_voice_dirs=[], load_latents=True):
|
||||||
return voices
|
return voices
|
||||||
|
|
||||||
|
|
||||||
def load_voice(voice, extra_voice_dirs=[], load_latents=True, sample_rate=22050, device='cpu'):
|
def load_voice(voice, extra_voice_dirs=[], load_latents=True, sample_rate=22050, device='cpu', model_hash=None):
|
||||||
if voice == 'random':
|
if voice == 'random':
|
||||||
return None, None
|
return None, None
|
||||||
|
|
||||||
voices = get_voices(extra_voice_dirs=extra_voice_dirs, load_latents=load_latents)
|
voices = get_voices(extra_voice_dirs=extra_voice_dirs, load_latents=load_latents)
|
||||||
paths = voices[voice]
|
|
||||||
|
|
||||||
|
paths = voices[voice]
|
||||||
mtime = 0
|
mtime = 0
|
||||||
voices = []
|
|
||||||
latent = None
|
latent = None
|
||||||
for file in paths:
|
voices = []
|
||||||
if file[-16:] == "cond_latents.pth":
|
|
||||||
latent = file
|
for path in paths:
|
||||||
elif file[-4:] == ".pth":
|
filename = os.path.basename(path)
|
||||||
{}
|
if filename[-4:] == ".pth" and filename[:12] == "cond_latents":
|
||||||
# noop
|
if not model_hash and filename == "cond_latents.pth":
|
||||||
|
latent = path
|
||||||
|
elif model_hash and filename == f"cond_latents_{model_hash[:8]}.pth":
|
||||||
|
latent = path
|
||||||
else:
|
else:
|
||||||
voices.append(file)
|
voices.append(path)
|
||||||
mtime = max(mtime, os.path.getmtime(file))
|
mtime = max(mtime, os.path.getmtime(path))
|
||||||
|
|
||||||
if load_latents and latent is not None:
|
if load_latents and latent is not None:
|
||||||
if os.path.getmtime(latent) > mtime:
|
if os.path.getmtime(latent) > mtime:
|
||||||
|
|
Loading…
Reference in New Issue
Block a user