From 2830d1fa966121a847edb5ab8cdc4ce09bd47ee3 Mon Sep 17 00:00:00 2001 From: mrq Date: Thu, 12 Oct 2023 00:27:46 +0000 Subject: [PATCH] fixed setup scripts and Dockerfile to NOT use extra-index-url and instead use index-url (how this happened I don't know, since pytorch instructions use index-url), '''fixed''' phonemizing japanese for VALL-E with pykakasi --- Dockerfile | 2 +- requirements.txt | 3 ++- setup-cuda.bat | 2 +- setup-cuda.sh | 2 +- setup-rocm.sh | 2 +- src/utils.py | 20 ++++++++++++++++++++ 6 files changed, 26 insertions(+), 5 deletions(-) mode change 100644 => 100755 Dockerfile diff --git a/Dockerfile b/Dockerfile old mode 100644 new mode 100755 index e0fb05a..7ff4dd3 --- a/Dockerfile +++ b/Dockerfile @@ -20,7 +20,7 @@ ENV PATH="$HOME/miniconda/bin:$PATH" RUN conda init RUN conda install python=$PYTHON_VERSION RUN python3 -m pip install --upgrade pip -RUN pip3 install torch torchvision torchaudio --extra-index-url https://download.pytorch.org/whl/cu118 +RUN pip3 install torch torchvision torchaudio --index-url https://download.pytorch.org/whl/cu118 RUN mkdir $HOME/ai-voice-cloning WORKDIR $HOME/ai-voice-cloning diff --git a/requirements.txt b/requirements.txt index 732b51d..d062a19 100755 --- a/requirements.txt +++ b/requirements.txt @@ -9,4 +9,5 @@ psutil phonemizer pydantic==1.10.11 websockets -beartype==0.15.0 \ No newline at end of file +beartype==0.15.0 +pykakasi \ No newline at end of file diff --git a/setup-cuda.bat b/setup-cuda.bat index 8c75c1f..2045115 100755 --- a/setup-cuda.bat +++ b/setup-cuda.bat @@ -4,7 +4,7 @@ git submodule update --remote python -m venv venv call .\venv\Scripts\activate.bat python -m pip install --upgrade pip -python -m pip install torch torchvision torchaudio --extra-index-url https://download.pytorch.org/whl/cu118 +python -m pip install torch torchvision torchaudio --index-url https://download.pytorch.org/whl/cu118 python -m pip install -r .\modules\tortoise-tts\requirements.txt python -m pip install -e .\modules\tortoise-tts\ python -m pip install -r .\modules\dlas\requirements.txt diff --git a/setup-cuda.sh b/setup-cuda.sh index 72ffe14..2c49c87 100755 --- a/setup-cuda.sh +++ b/setup-cuda.sh @@ -7,7 +7,7 @@ python3 -m venv venv source ./venv/bin/activate python3 -m pip install --upgrade pip # just to be safe # CUDA -pip3 install torch torchvision torchaudio --extra-index-url https://download.pytorch.org/whl/cu118 +pip3 install torch torchvision torchaudio --index-url https://download.pytorch.org/whl/cu118 # install requirements python3 -m pip install -r ./modules/tortoise-tts/requirements.txt # install TorToiSe requirements python3 -m pip install -e ./modules/tortoise-tts/ # install TorToiSe diff --git a/setup-rocm.sh b/setup-rocm.sh index 249e4ab..04a6a96 100755 --- a/setup-rocm.sh +++ b/setup-rocm.sh @@ -7,7 +7,7 @@ python3 -m venv venv source ./venv/bin/activate python3 -m pip install --upgrade pip # just to be safe # ROCM -pip3 install torch==1.13.1 torchvision torchaudio --extra-index-url https://download.pytorch.org/whl/rocm5.2 # 5.4.2 doesn't work for me desu +pip3 install torch==1.13.1 torchvision torchaudio --index-url https://download.pytorch.org/whl/rocm5.2 # 5.4.2 doesn't work for me desu # install requirements python3 -m pip install -r ./modules/tortoise-tts/requirements.txt # install TorToiSe requirements python3 -m pip install -e ./modules/tortoise-tts/ # install TorToiSe diff --git a/src/utils.py b/src/utils.py index 0f56d49..287c909 100755 --- a/src/utils.py +++ b/src/utils.py @@ -68,8 +68,20 @@ BARK_ENABLED = False VERBOSE_DEBUG = True +KKS = None +PYKAKASI_ENABLED = False + import traceback +try: + import pykakasi + KKS = pykakasi.kakasi() + PYKAKASI_ENABLED = True +except Exception as e: + #if VERBOSE_DEBUG: + # print(traceback.format_exc()) + pass + try: from whisper.normalizers.english import EnglishTextNormalizer from whisper.normalizers.basic import BasicTextNormalizer @@ -2734,6 +2746,14 @@ def prepare_dataset( voice, use_segments=False, text_length=0, audio_length=0, p phn_file = jobs['phonemize'][0][i] normalized = jobs['phonemize'][1][i] + if language == "japanese": + language = "ja" + + if language == "ja" and PYKAKASI_ENABLED and KKS is not None: + normalized = KKS.convert(normalized) + normalized = [ n["hira"] for n in normalized ] + normalized = "".join(normalized) + try: phonemized = valle_phonemize( normalized ) open(phn_file, 'w', encoding='utf-8').write(" ".join(phonemized)) -- 2.45.2