forked from mrq/ai-voice-cloning
Compare commits
4 Commits
7dd8b740e8
...
94f88886b0
Author | SHA1 | Date | |
---|---|---|---|
94f88886b0 | |||
2830d1fa96 | |||
17acfee5d0 | |||
|
2fae5008fc |
2
Dockerfile
Normal file → Executable file
2
Dockerfile
Normal file → Executable file
|
@ -20,7 +20,7 @@ ENV PATH="$HOME/miniconda/bin:$PATH"
|
|||
RUN conda init
|
||||
RUN conda install python=$PYTHON_VERSION
|
||||
RUN python3 -m pip install --upgrade pip
|
||||
RUN pip3 install torch torchvision torchaudio --extra-index-url https://download.pytorch.org/whl/cu118
|
||||
RUN pip3 install torch torchvision torchaudio --index-url https://download.pytorch.org/whl/cu118
|
||||
|
||||
RUN mkdir $HOME/ai-voice-cloning
|
||||
WORKDIR $HOME/ai-voice-cloning
|
||||
|
|
|
@ -1 +1 @@
|
|||
Subproject commit b10c58436d6871c26485d30b203e6cfdd4167602
|
||||
Subproject commit bf3b6c87aa825295f64a31d010fd5e896fbcda43
|
|
@ -38,10 +38,24 @@
|
|||
|
||||
],
|
||||
"source":[
|
||||
"!apt install python3.8-venv\n",
|
||||
"!apt install python3.10-venv\n",
|
||||
"!git clone https://git.ecker.tech/mrq/ai-voice-cloning/\n",
|
||||
"%cd /content/ai-voice-cloning\n",
|
||||
"!./setup-cuda.sh"
|
||||
"# get local dependencies\n",
|
||||
"!git submodule init\n",
|
||||
"!git submodule update --remote\n",
|
||||
"# setup venv\n",
|
||||
"!python3 -m venv venv\n",
|
||||
"!source ./venv/bin/activate\n",
|
||||
"!python3 -m pip install --upgrade pip # just to be safe\n",
|
||||
"# CUDA\n",
|
||||
"!pip3 install torch torchvision torchaudio --index-url https://download.pytorch.org/whl/cu118\n",
|
||||
"# install requirements\n",
|
||||
"!python3 -m pip install -r ./modules/tortoise-tts/requirements.txt # install TorToiSe requirements\n",
|
||||
"!python3 -m pip install -e ./modules/tortoise-tts/ # install TorToiSe\n",
|
||||
"!python3 -m pip install -r ./modules/dlas/requirements.txt # instal DLAS requirements, last, because whisperx will break a dependency here\n",
|
||||
"!python3 -m pip install -e ./modules/dlas/ # install DLAS\n",
|
||||
"!python3 -m pip install -r ./requirements.txt # install local requirements"
|
||||
]
|
||||
},
|
||||
{
|
||||
|
@ -115,7 +129,8 @@
|
|||
"cell_type":"code",
|
||||
"source":[
|
||||
"%cd /content/ai-voice-cloning/\n",
|
||||
"!./start.sh --share"
|
||||
"!source ./venv/bin/activate\n",
|
||||
"!python3 ./src/main.py --share"
|
||||
],
|
||||
"metadata":{
|
||||
"id":"QRA8jF3cF-YJ"
|
||||
|
|
|
@ -1,5 +1,9 @@
|
|||
git+https://github.com/openai/whisper.git
|
||||
--extra-index-url https://download.pytorch.org/whl/cu118
|
||||
torch>=2.1.0
|
||||
torchvision
|
||||
torchaudio
|
||||
|
||||
openai-whisper
|
||||
more-itertools
|
||||
ffmpeg-python
|
||||
gradio<=3.23.0
|
||||
|
@ -10,3 +14,4 @@ phonemizer
|
|||
pydantic==1.10.11
|
||||
websockets
|
||||
beartype==0.15.0
|
||||
pykakasi
|
|
@ -4,7 +4,7 @@ git submodule update --remote
|
|||
python -m venv venv
|
||||
call .\venv\Scripts\activate.bat
|
||||
python -m pip install --upgrade pip
|
||||
python -m pip install torch torchvision torchaudio --extra-index-url https://download.pytorch.org/whl/cu118
|
||||
python -m pip install torch torchvision torchaudio --index-url https://download.pytorch.org/whl/cu118
|
||||
python -m pip install -r .\modules\tortoise-tts\requirements.txt
|
||||
python -m pip install -e .\modules\tortoise-tts\
|
||||
python -m pip install -r .\modules\dlas\requirements.txt
|
||||
|
|
|
@ -7,7 +7,7 @@ python3 -m venv venv
|
|||
source ./venv/bin/activate
|
||||
python3 -m pip install --upgrade pip # just to be safe
|
||||
# CUDA
|
||||
pip3 install torch torchvision torchaudio --extra-index-url https://download.pytorch.org/whl/cu118
|
||||
pip3 install torch torchvision torchaudio --index-url https://download.pytorch.org/whl/cu118
|
||||
# install requirements
|
||||
python3 -m pip install -r ./modules/tortoise-tts/requirements.txt # install TorToiSe requirements
|
||||
python3 -m pip install -e ./modules/tortoise-tts/ # install TorToiSe
|
||||
|
|
|
@ -7,7 +7,7 @@ python3 -m venv venv
|
|||
source ./venv/bin/activate
|
||||
python3 -m pip install --upgrade pip # just to be safe
|
||||
# ROCM
|
||||
pip3 install torch==1.13.1 torchvision torchaudio --extra-index-url https://download.pytorch.org/whl/rocm5.2 # 5.4.2 doesn't work for me desu
|
||||
pip3 install torch==1.13.1 torchvision torchaudio --index-url https://download.pytorch.org/whl/rocm5.2 # 5.4.2 doesn't work for me desu
|
||||
# install requirements
|
||||
python3 -m pip install -r ./modules/tortoise-tts/requirements.txt # install TorToiSe requirements
|
||||
python3 -m pip install -e ./modules/tortoise-tts/ # install TorToiSe
|
||||
|
|
24
src/utils.py
24
src/utils.py
|
@ -68,8 +68,20 @@ BARK_ENABLED = False
|
|||
|
||||
VERBOSE_DEBUG = True
|
||||
|
||||
KKS = None
|
||||
PYKAKASI_ENABLED = False
|
||||
|
||||
import traceback
|
||||
|
||||
try:
|
||||
import pykakasi
|
||||
KKS = pykakasi.kakasi()
|
||||
PYKAKASI_ENABLED = True
|
||||
except Exception as e:
|
||||
#if VERBOSE_DEBUG:
|
||||
# print(traceback.format_exc())
|
||||
pass
|
||||
|
||||
try:
|
||||
from whisper.normalizers.english import EnglishTextNormalizer
|
||||
from whisper.normalizers.basic import BasicTextNormalizer
|
||||
|
@ -2665,8 +2677,8 @@ def prepare_dataset( voice, use_segments=False, text_length=0, audio_length=0, p
|
|||
|
||||
|
||||
culled = len(text) < text_length
|
||||
#if not culled and audio_length > 0:
|
||||
# culled = duration < audio_length
|
||||
if not culled and audio_length > 0:
|
||||
culled = duration < audio_length
|
||||
|
||||
line = f'audio/{file}|{phonemes if phonemize and phonemes else text}'
|
||||
|
||||
|
@ -2734,6 +2746,14 @@ def prepare_dataset( voice, use_segments=False, text_length=0, audio_length=0, p
|
|||
phn_file = jobs['phonemize'][0][i]
|
||||
normalized = jobs['phonemize'][1][i]
|
||||
|
||||
if language == "japanese":
|
||||
language = "ja"
|
||||
|
||||
if language == "ja" and PYKAKASI_ENABLED and KKS is not None:
|
||||
normalized = KKS.convert(normalized)
|
||||
normalized = [ n["hira"] for n in normalized ]
|
||||
normalized = "".join(normalized)
|
||||
|
||||
try:
|
||||
phonemized = valle_phonemize( normalized )
|
||||
open(phn_file, 'w', encoding='utf-8').write(" ".join(phonemized))
|
||||
|
|
Loading…
Reference in New Issue
Block a user