diff --git a/modules/dlas b/modules/dlas index 0db8ebc..7b5e059 160000 --- a/modules/dlas +++ b/modules/dlas @@ -1 +1 @@ -Subproject commit 0db8ebc543db46c8f533393f39bc1c168f4ee8eb +Subproject commit 7b5e0592f875772cfed27f00fe16928a503c582a diff --git a/requirements.txt b/requirements.txt index ef33ec9..365a8fc 100755 --- a/requirements.txt +++ b/requirements.txt @@ -5,4 +5,5 @@ ffmpeg-python gradio music-tag voicefixer -psutil \ No newline at end of file +psutil +phonemizer diff --git a/setup-cuda-bnb.bat b/setup-cuda-bnb.bat new file mode 100644 index 0000000..d545405 --- /dev/null +++ b/setup-cuda-bnb.bat @@ -0,0 +1,6 @@ + +git clone https://git.ecker.tech/mrq/bitsandbytes-windows.git .\modules\bitsandbytes-windows\ + +xcopy .\modules\bitsandbytes-windows\bin\* .\venv\Lib\site-packages\bitsandbytes\. /Y +xcopy .\modules\bitsandbytes-windows\bin\cuda_setup\* .\venv\Lib\site-packages\bitsandbytes\cuda_setup\. /Y +xcopy .\modules\bitsandbytes-windows\bin\nn\* .\venv\Lib\site-packages\bitsandbytes\nn\. /Y diff --git a/setup-cuda.bat b/setup-cuda.bat index 76fd901..15f2da8 100755 --- a/setup-cuda.bat +++ b/setup-cuda.bat @@ -10,11 +10,10 @@ python -m pip install -e .\modules\tortoise-tts\ python -m pip install -r .\modules\dlas\requirements.txt python -m pip install -r .\requirements.txt -xcopy .\modules\dlas\bitsandbytes_windows\* .\venv\Lib\site-packages\bitsandbytes\. /Y -xcopy .\modules\dlas\bitsandbytes_windows\cuda_setup\* .\venv\Lib\site-packages\bitsandbytes\cuda_setup\. /Y -xcopy .\modules\dlas\bitsandbytes_windows\nn\* .\venv\Lib\site-packages\bitsandbytes\nn\. /Y +# setup BnB +.\setup-cuda-bnb.bat del *.sh pause -deactivate \ No newline at end of file +deactivate diff --git a/src/utils.py b/src/utils.py index 03b5252..89fc376 100755 --- a/src/utils.py +++ b/src/utils.py @@ -22,7 +22,7 @@ import yaml import hashlib import string -import tqdm +from tqdm import tqdm import torch import torchaudio import music_tag @@ -1269,6 +1269,28 @@ def phonemizer( text, language="eng" ): return ["_" if p in ignored else p for p in phones] """ +def phonemize_txt( path ): + with open(path, 'r', encoding='utf-8') as f: + lines = f.readlines() + + reparsed = [] + with open(path.replace(".txt", ".phn.txt"), 'a', encoding='utf-8') as f: + for line in enumerate_progress(lines, desc='Phonemizing...'): + split = line.split("|") + audio = split[0] + text = split[2] + + phonemes = phonemizer( text, preserve_punctuation=True, strip=True ) + reparsed.append(f'{audio}|{phonemes}') + f.write(f'\n{audio}|{phonemes}') + + + joined = "\n".join(reparsed) + with open(path.replace(".txt", ".phn.txt"), 'w', encoding='utf-8') as f: + f.write(joined) + + return joined + def prepare_dataset( voice, use_segments=False, text_length=0, audio_length=0, progress=gr.Progress() ): indir = f'./training/{voice}/' infile = f'{indir}/whisper.json' @@ -1858,7 +1880,7 @@ def enumerate_progress(iterable, desc=None, progress=None, verbose=None): print(desc) if progress is None: - return tqdm(iterable, disable=not verbose) + return tqdm(iterable, disable=False) #not verbose) return progress.tqdm(iterable, desc=f'{progress.msg_prefix} {desc}' if hasattr(progress, 'msg_prefix') else desc, track_tqdm=True) def notify_progress(message, progress=None, verbose=True):