forked from mrq/ai-voice-cloning
whispercpp actually works now (language loading was weird, slicing needed to divide time by 100), transcribing audio checks for silence and discards them
This commit is contained in:
parent
b8a620e8d7
commit
d97639e138
22
src/utils.py
22
src/utils.py
|
@ -39,6 +39,7 @@ from tortoise.utils.device import get_device_name, set_device_name
|
|||
MODELS['dvae.pth'] = "https://huggingface.co/jbetker/tortoise-tts-v2/resolve/3704aea61678e7e468a06d8eea121dba368a798e/.models/dvae.pth"
|
||||
WHISPER_MODELS = ["tiny", "base", "small", "medium", "large"]
|
||||
WHISPER_SPECIALIZED_MODELS = ["tiny.en", "base.en", "small.en", "medium.en"]
|
||||
EPOCH_SCHEDULE = [ 9, 18, 25, 33 ]
|
||||
|
||||
args = None
|
||||
tts = None
|
||||
|