From 2830d1fa966121a847edb5ab8cdc4ce09bd47ee3 Mon Sep 17 00:00:00 2001
From: mrq <mrq@ecker.tech>
Date: Thu, 12 Oct 2023 00:27:46 +0000
Subject: [PATCH] fixed setup scripts and Dockerfile to NOT use extra-index-url
 and instead use index-url (how this happened I don't know, since pytorch
 instructions use index-url), '''fixed''' phonemizing japanese for VALL-E with
 pykakasi

---
 Dockerfile       |  2 +-
 requirements.txt |  3 ++-
 setup-cuda.bat   |  2 +-
 setup-cuda.sh    |  2 +-
 setup-rocm.sh    |  2 +-
 src/utils.py     | 20 ++++++++++++++++++++
 6 files changed, 26 insertions(+), 5 deletions(-)
 mode change 100644 => 100755 Dockerfile

diff --git a/Dockerfile b/Dockerfile
old mode 100644
new mode 100755
index e0fb05a..7ff4dd3
--- a/Dockerfile
+++ b/Dockerfile
@@ -20,7 +20,7 @@ ENV PATH="$HOME/miniconda/bin:$PATH"
 RUN conda init
 RUN conda install python=$PYTHON_VERSION
 RUN python3 -m pip install --upgrade pip
-RUN pip3 install torch torchvision torchaudio --extra-index-url https://download.pytorch.org/whl/cu118
+RUN pip3 install torch torchvision torchaudio --index-url https://download.pytorch.org/whl/cu118
 
 RUN mkdir $HOME/ai-voice-cloning
 WORKDIR $HOME/ai-voice-cloning
diff --git a/requirements.txt b/requirements.txt
index 732b51d..d062a19 100755
--- a/requirements.txt
+++ b/requirements.txt
@@ -9,4 +9,5 @@ psutil
 phonemizer
 pydantic==1.10.11
 websockets
-beartype==0.15.0
\ No newline at end of file
+beartype==0.15.0
+pykakasi
\ No newline at end of file
diff --git a/setup-cuda.bat b/setup-cuda.bat
index 8c75c1f..2045115 100755
--- a/setup-cuda.bat
+++ b/setup-cuda.bat
@@ -4,7 +4,7 @@ git submodule update --remote
 python -m venv venv
 call .\venv\Scripts\activate.bat
 python -m pip install --upgrade pip
-python -m pip install torch torchvision torchaudio --extra-index-url https://download.pytorch.org/whl/cu118
+python -m pip install torch torchvision torchaudio --index-url https://download.pytorch.org/whl/cu118
 python -m pip install -r .\modules\tortoise-tts\requirements.txt
 python -m pip install -e .\modules\tortoise-tts\
 python -m pip install -r .\modules\dlas\requirements.txt
diff --git a/setup-cuda.sh b/setup-cuda.sh
index 72ffe14..2c49c87 100755
--- a/setup-cuda.sh
+++ b/setup-cuda.sh
@@ -7,7 +7,7 @@ python3 -m venv venv
 source ./venv/bin/activate
 python3 -m pip install --upgrade pip # just to be safe
 # CUDA
-pip3 install torch torchvision torchaudio --extra-index-url https://download.pytorch.org/whl/cu118
+pip3 install torch torchvision torchaudio --index-url https://download.pytorch.org/whl/cu118
 # install requirements
 python3 -m pip install -r ./modules/tortoise-tts/requirements.txt # install TorToiSe requirements
 python3 -m pip install -e ./modules/tortoise-tts/ # install TorToiSe
diff --git a/setup-rocm.sh b/setup-rocm.sh
index 249e4ab..04a6a96 100755
--- a/setup-rocm.sh
+++ b/setup-rocm.sh
@@ -7,7 +7,7 @@ python3 -m venv venv
 source ./venv/bin/activate
 python3 -m pip install --upgrade pip # just to be safe
 # ROCM
-pip3 install torch==1.13.1 torchvision torchaudio --extra-index-url https://download.pytorch.org/whl/rocm5.2 # 5.4.2 doesn't work for me desu
+pip3 install torch==1.13.1 torchvision torchaudio --index-url https://download.pytorch.org/whl/rocm5.2 # 5.4.2 doesn't work for me desu
 # install requirements
 python3 -m pip install -r ./modules/tortoise-tts/requirements.txt # install TorToiSe requirements
 python3 -m pip install -e ./modules/tortoise-tts/ # install TorToiSe
diff --git a/src/utils.py b/src/utils.py
index 0f56d49..287c909 100755
--- a/src/utils.py
+++ b/src/utils.py
@@ -68,8 +68,20 @@ BARK_ENABLED = False
 
 VERBOSE_DEBUG = True
 
+KKS = None
+PYKAKASI_ENABLED = False
+
 import traceback
 
+try:
+	import pykakasi
+	KKS = pykakasi.kakasi()
+	PYKAKASI_ENABLED = True
+except Exception as e:
+	#if VERBOSE_DEBUG:
+	#	print(traceback.format_exc())
+	pass
+
 try:
 	from whisper.normalizers.english import EnglishTextNormalizer
 	from whisper.normalizers.basic import BasicTextNormalizer
@@ -2734,6 +2746,14 @@ def prepare_dataset( voice, use_segments=False, text_length=0, audio_length=0, p
 		phn_file = jobs['phonemize'][0][i]
 		normalized = jobs['phonemize'][1][i]
 
+		if language == "japanese":
+			language = "ja"
+
+		if language == "ja" and PYKAKASI_ENABLED and KKS is not None:
+			normalized = KKS.convert(normalized)
+			normalized = [ n["hira"] for n in normalized ]
+			normalized = "".join(normalized)
+
 		try:
 			phonemized = valle_phonemize( normalized )
 			open(phn_file, 'w', encoding='utf-8').write(" ".join(phonemized))