Compare commits

...

4 Commits

8 changed files with 52 additions and 12 deletions

2
Dockerfile Normal file → Executable file
View File

@ -20,7 +20,7 @@ ENV PATH="$HOME/miniconda/bin:$PATH"
RUN conda init RUN conda init
RUN conda install python=$PYTHON_VERSION RUN conda install python=$PYTHON_VERSION
RUN python3 -m pip install --upgrade pip RUN python3 -m pip install --upgrade pip
RUN pip3 install torch torchvision torchaudio --extra-index-url https://download.pytorch.org/whl/cu118 RUN pip3 install torch torchvision torchaudio --index-url https://download.pytorch.org/whl/cu118
RUN mkdir $HOME/ai-voice-cloning RUN mkdir $HOME/ai-voice-cloning
WORKDIR $HOME/ai-voice-cloning WORKDIR $HOME/ai-voice-cloning

@ -1 +1 @@
Subproject commit b10c58436d6871c26485d30b203e6cfdd4167602 Subproject commit bf3b6c87aa825295f64a31d010fd5e896fbcda43

View File

@ -38,10 +38,24 @@
], ],
"source":[ "source":[
"!apt install python3.8-venv\n", "!apt install python3.10-venv\n",
"!git clone https://git.ecker.tech/mrq/ai-voice-cloning/\n", "!git clone https://git.ecker.tech/mrq/ai-voice-cloning/\n",
"%cd /content/ai-voice-cloning\n", "%cd /content/ai-voice-cloning\n",
"!./setup-cuda.sh" "# get local dependencies\n",
"!git submodule init\n",
"!git submodule update --remote\n",
"# setup venv\n",
"!python3 -m venv venv\n",
"!source ./venv/bin/activate\n",
"!python3 -m pip install --upgrade pip # just to be safe\n",
"# CUDA\n",
"!pip3 install torch torchvision torchaudio --index-url https://download.pytorch.org/whl/cu118\n",
"# install requirements\n",
"!python3 -m pip install -r ./modules/tortoise-tts/requirements.txt # install TorToiSe requirements\n",
"!python3 -m pip install -e ./modules/tortoise-tts/ # install TorToiSe\n",
"!python3 -m pip install -r ./modules/dlas/requirements.txt # instal DLAS requirements, last, because whisperx will break a dependency here\n",
"!python3 -m pip install -e ./modules/dlas/ # install DLAS\n",
"!python3 -m pip install -r ./requirements.txt # install local requirements"
] ]
}, },
{ {
@ -115,7 +129,8 @@
"cell_type":"code", "cell_type":"code",
"source":[ "source":[
"%cd /content/ai-voice-cloning/\n", "%cd /content/ai-voice-cloning/\n",
"!./start.sh --share" "!source ./venv/bin/activate\n",
"!python3 ./src/main.py --share"
], ],
"metadata":{ "metadata":{
"id":"QRA8jF3cF-YJ" "id":"QRA8jF3cF-YJ"

View File

@ -1,5 +1,9 @@
git+https://github.com/openai/whisper.git --extra-index-url https://download.pytorch.org/whl/cu118
torch>=2.1.0
torchvision
torchaudio
openai-whisper
more-itertools more-itertools
ffmpeg-python ffmpeg-python
gradio<=3.23.0 gradio<=3.23.0
@ -9,4 +13,5 @@ psutil
phonemizer phonemizer
pydantic==1.10.11 pydantic==1.10.11
websockets websockets
beartype==0.15.0 beartype==0.15.0
pykakasi

View File

@ -4,7 +4,7 @@ git submodule update --remote
python -m venv venv python -m venv venv
call .\venv\Scripts\activate.bat call .\venv\Scripts\activate.bat
python -m pip install --upgrade pip python -m pip install --upgrade pip
python -m pip install torch torchvision torchaudio --extra-index-url https://download.pytorch.org/whl/cu118 python -m pip install torch torchvision torchaudio --index-url https://download.pytorch.org/whl/cu118
python -m pip install -r .\modules\tortoise-tts\requirements.txt python -m pip install -r .\modules\tortoise-tts\requirements.txt
python -m pip install -e .\modules\tortoise-tts\ python -m pip install -e .\modules\tortoise-tts\
python -m pip install -r .\modules\dlas\requirements.txt python -m pip install -r .\modules\dlas\requirements.txt

View File

@ -7,7 +7,7 @@ python3 -m venv venv
source ./venv/bin/activate source ./venv/bin/activate
python3 -m pip install --upgrade pip # just to be safe python3 -m pip install --upgrade pip # just to be safe
# CUDA # CUDA
pip3 install torch torchvision torchaudio --extra-index-url https://download.pytorch.org/whl/cu118 pip3 install torch torchvision torchaudio --index-url https://download.pytorch.org/whl/cu118
# install requirements # install requirements
python3 -m pip install -r ./modules/tortoise-tts/requirements.txt # install TorToiSe requirements python3 -m pip install -r ./modules/tortoise-tts/requirements.txt # install TorToiSe requirements
python3 -m pip install -e ./modules/tortoise-tts/ # install TorToiSe python3 -m pip install -e ./modules/tortoise-tts/ # install TorToiSe

View File

@ -7,7 +7,7 @@ python3 -m venv venv
source ./venv/bin/activate source ./venv/bin/activate
python3 -m pip install --upgrade pip # just to be safe python3 -m pip install --upgrade pip # just to be safe
# ROCM # ROCM
pip3 install torch==1.13.1 torchvision torchaudio --extra-index-url https://download.pytorch.org/whl/rocm5.2 # 5.4.2 doesn't work for me desu pip3 install torch==1.13.1 torchvision torchaudio --index-url https://download.pytorch.org/whl/rocm5.2 # 5.4.2 doesn't work for me desu
# install requirements # install requirements
python3 -m pip install -r ./modules/tortoise-tts/requirements.txt # install TorToiSe requirements python3 -m pip install -r ./modules/tortoise-tts/requirements.txt # install TorToiSe requirements
python3 -m pip install -e ./modules/tortoise-tts/ # install TorToiSe python3 -m pip install -e ./modules/tortoise-tts/ # install TorToiSe

View File

@ -68,8 +68,20 @@ BARK_ENABLED = False
VERBOSE_DEBUG = True VERBOSE_DEBUG = True
KKS = None
PYKAKASI_ENABLED = False
import traceback import traceback
try:
import pykakasi
KKS = pykakasi.kakasi()
PYKAKASI_ENABLED = True
except Exception as e:
#if VERBOSE_DEBUG:
# print(traceback.format_exc())
pass
try: try:
from whisper.normalizers.english import EnglishTextNormalizer from whisper.normalizers.english import EnglishTextNormalizer
from whisper.normalizers.basic import BasicTextNormalizer from whisper.normalizers.basic import BasicTextNormalizer
@ -2665,8 +2677,8 @@ def prepare_dataset( voice, use_segments=False, text_length=0, audio_length=0, p
culled = len(text) < text_length culled = len(text) < text_length
#if not culled and audio_length > 0: if not culled and audio_length > 0:
# culled = duration < audio_length culled = duration < audio_length
line = f'audio/{file}|{phonemes if phonemize and phonemes else text}' line = f'audio/{file}|{phonemes if phonemize and phonemes else text}'
@ -2734,6 +2746,14 @@ def prepare_dataset( voice, use_segments=False, text_length=0, audio_length=0, p
phn_file = jobs['phonemize'][0][i] phn_file = jobs['phonemize'][0][i]
normalized = jobs['phonemize'][1][i] normalized = jobs['phonemize'][1][i]
if language == "japanese":
language = "ja"
if language == "ja" and PYKAKASI_ENABLED and KKS is not None:
normalized = KKS.convert(normalized)
normalized = [ n["hira"] for n in normalized ]
normalized = "".join(normalized)
try: try:
phonemized = valle_phonemize( normalized ) phonemized = valle_phonemize( normalized )
open(phn_file, 'w', encoding='utf-8').write(" ".join(phonemized)) open(phn_file, 'w', encoding='utf-8').write(" ".join(phonemized))