updated colab notebook

fixed setup scripts and Dockerfile to NOT use extra-index-url and instead use index-url (how this happened I don't know, since pytorch instructions use index-url), '''fixed''' phonemizing japanese for VALL-E with pykakasi
fixed culling for validation based on audio duration not working
2023-10-24 23:39:46 +00:00 · 2023-10-12 00:27:46 +00:00 · 2023-09-21 22:33:11 +00:00 · 2023-09-19 02:25:04 +00:00
8 changed files with 52 additions and 12 deletions
--- a/2
+++ b/2
@ -20,7 +20,7 @@ ENV PATH="$HOME/miniconda/bin:$PATH"
 RUN conda init
 RUN conda install python=$PYTHON_VERSION
 RUN python3 -m pip install --upgrade pip
-RUN pip3 install torch torchvision torchaudio --extra-index-url https://download.pytorch.org/whl/cu118
+RUN pip3 install torch torchvision torchaudio --index-url https://download.pytorch.org/whl/cu118

 RUN mkdir $HOME/ai-voice-cloning
 WORKDIR $HOME/ai-voice-cloning
--- a/modules/tortoise-tts
+++ b/modules/tortoise-tts
@ -1 +1 @@
-Subproject commit b10c58436d6871c26485d30b203e6cfdd4167602
+Subproject commit bf3b6c87aa825295f64a31d010fd5e896fbcda43
--- a/notebook_colab.ipynb
+++ b/notebook_colab.ipynb
@ -38,10 +38,24 @@
            
         ],
         "source":[
-            "!apt install python3.8-venv\n",
+            "!apt install python3.10-venv\n",
            "!git clone https://git.ecker.tech/mrq/ai-voice-cloning/\n",
            "%cd /content/ai-voice-cloning\n",
-            "!./setup-cuda.sh"
+            "# get local dependencies\n",
+            "!git submodule init\n",
+            "!git submodule update --remote\n",
+            "# setup venv\n",
+            "!python3 -m venv venv\n",
+            "!source ./venv/bin/activate\n",
+            "!python3 -m pip install --upgrade pip # just to be safe\n",
+            "# CUDA\n",
+            "!pip3 install torch torchvision torchaudio --index-url https://download.pytorch.org/whl/cu118\n",
+            "# install requirements\n",
+            "!python3 -m pip install -r ./modules/tortoise-tts/requirements.txt # install TorToiSe requirements\n",
+            "!python3 -m pip install -e ./modules/tortoise-tts/ # install TorToiSe\n",
+            "!python3 -m pip install -r ./modules/dlas/requirements.txt # instal DLAS requirements, last, because whisperx will break a dependency here\n",
+            "!python3 -m pip install -e ./modules/dlas/ # install DLAS\n",
+            "!python3 -m pip install -r ./requirements.txt # install local requirements"
         ]
      },
      {
@ -115,7 +129,8 @@
         "cell_type":"code",
         "source":[
            "%cd /content/ai-voice-cloning/\n",
-            "!./start.sh --share"
+            "!source ./venv/bin/activate\n",
+            "!python3 ./src/main.py --share"
         ],
         "metadata":{
            "id":"QRA8jF3cF-YJ"
--- a/requirements.txt
+++ b/requirements.txt
@ -1,5 +1,9 @@
-git+https://github.com/openai/whisper.git
+--extra-index-url https://download.pytorch.org/whl/cu118
+torch>=2.1.0
+torchvision
+torchaudio

+openai-whisper
 more-itertools
 ffmpeg-python
 gradio<=3.23.0
@ -9,4 +13,5 @@ psutil
 phonemizer
 pydantic==1.10.11
 websockets
-beartype==0.15.0
+beartype==0.15.0
+pykakasi
--- a/setup-cuda.bat
+++ b/setup-cuda.bat
@ -4,7 +4,7 @@ git submodule update --remote
 python -m venv venv
 call .\venv\Scripts\activate.bat
 python -m pip install --upgrade pip
-python -m pip install torch torchvision torchaudio --extra-index-url https://download.pytorch.org/whl/cu118
+python -m pip install torch torchvision torchaudio --index-url https://download.pytorch.org/whl/cu118
 python -m pip install -r .\modules\tortoise-tts\requirements.txt
 python -m pip install -e .\modules\tortoise-tts\
 python -m pip install -r .\modules\dlas\requirements.txt
--- a/setup-cuda.sh
+++ b/setup-cuda.sh
@ -7,7 +7,7 @@ python3 -m venv venv
 source ./venv/bin/activate
 python3 -m pip install --upgrade pip # just to be safe
 # CUDA
-pip3 install torch torchvision torchaudio --extra-index-url https://download.pytorch.org/whl/cu118
+pip3 install torch torchvision torchaudio --index-url https://download.pytorch.org/whl/cu118
 # install requirements
 python3 -m pip install -r ./modules/tortoise-tts/requirements.txt # install TorToiSe requirements
 python3 -m pip install -e ./modules/tortoise-tts/ # install TorToiSe
--- a/setup-rocm.sh
+++ b/setup-rocm.sh
@ -7,7 +7,7 @@ python3 -m venv venv
 source ./venv/bin/activate
 python3 -m pip install --upgrade pip # just to be safe
 # ROCM
-pip3 install torch==1.13.1 torchvision torchaudio --extra-index-url https://download.pytorch.org/whl/rocm5.2 # 5.4.2 doesn't work for me desu
+pip3 install torch==1.13.1 torchvision torchaudio --index-url https://download.pytorch.org/whl/rocm5.2 # 5.4.2 doesn't work for me desu
 # install requirements
 python3 -m pip install -r ./modules/tortoise-tts/requirements.txt # install TorToiSe requirements
 python3 -m pip install -e ./modules/tortoise-tts/ # install TorToiSe
--- a/src/utils.py
+++ b/src/utils.py
@ -68,8 +68,20 @@ BARK_ENABLED = False

 VERBOSE_DEBUG = True

+KKS = None
+PYKAKASI_ENABLED = False
+
 import traceback

+try:
+	import pykakasi
+	KKS = pykakasi.kakasi()
+	PYKAKASI_ENABLED = True
+except Exception as e:
+	#if VERBOSE_DEBUG:
+	#	print(traceback.format_exc())
+	pass
+
 try:
 	from whisper.normalizers.english import EnglishTextNormalizer
 	from whisper.normalizers.basic import BasicTextNormalizer
@ -2665,8 +2677,8 @@ def prepare_dataset( voice, use_segments=False, text_length=0, audio_length=0, p


 		culled = len(text) < text_length
-		#if not culled and audio_length > 0:
-		#	culled = duration < audio_length
+		if not culled and audio_length > 0:
+			culled = duration < audio_length

 		line = f'audio/{file}|{phonemes if phonemize and phonemes else text}'

@ -2734,6 +2746,14 @@ def prepare_dataset( voice, use_segments=False, text_length=0, audio_length=0, p
 		phn_file = jobs['phonemize'][0][i]
 		normalized = jobs['phonemize'][1][i]

+		if language == "japanese":
+			language = "ja"
+
+		if language == "ja" and PYKAKASI_ENABLED and KKS is not None:
+			normalized = KKS.convert(normalized)
+			normalized = [ n["hira"] for n in normalized ]
+			normalized = "".join(normalized)
+
 		try:
 			phonemized = valle_phonemize( normalized )
 			open(phn_file, 'w', encoding='utf-8').write(" ".join(phonemized))
Author	SHA1	Message	Date
mrq	94f88886b0	updated colab notebook	2023-10-24 23:39:46 +00:00
mrq	2830d1fa96	fixed setup scripts and Dockerfile to NOT use extra-index-url and instead use index-url (how this happened I don't know, since pytorch instructions use index-url), '''fixed''' phonemizing japanese for VALL-E with pykakasi	2023-10-12 00:27:46 +00:00
mrq	17acfee5d0	fixed culling for validation based on audio duration not working	2023-09-21 22:33:11 +00:00
mrq	2fae5008fc	Merge pull request 'Freeze beartype==0.15.0' (#393 ) from Jarod/ai-voice-cloning:master into master Reviewed-on: mrq/ai-voice-cloning#393	2023-09-19 02:25:04 +00:00