ai-voice-cloning/notebook.ipynb

{
   "nbformat":4,
   "nbformat_minor":0,
   "metadata":{
      "colab":{
         "private_outputs":true,
         "provenance":[
            
         ]
      },
      "kernelspec":{
         "name":"python3",
         "display_name":"Python 3"
      },
      "language_info":{
         "name":"python"
      },
      "accelerator":"GPU",
      "gpuClass":"standard"
   },
   "cells":[
      {
         "cell_type":"markdown",
         "source":[
            "## Initialization"
         ],
         "metadata":{
            "id":"ni41hmE03DL6"
         }
      },
      {
         "cell_type":"code",
         "execution_count":null,
         "metadata":{
            "id":"FtsMKKfH18iM"
         },
         "outputs":[
            
         ],
         "source":[
            "!git clone https://git.ecker.tech/mrq/ai-voice-cloning/\n",
            "%cd ai-voice-cloning\n",
            "\n",
            "# TODO: fix venvs working for subprocess.Popen calling a bash script\n",
            "#!apt install python3.8-venv\n",
            "#!python -m venv venv\n",
            "#!source ./venv/bin/activate\n",
            "\n",
            "!git clone https://git.ecker.tech/mrq/DL-Art-School dlas\n",
            "!python -m pip install --upgrade pip\n",
            "!pip install torch torchvision torchaudio --extra-index-url https://download.pytorch.org/whl/cu116\n",
            "!./setup-tortoise.sh\n",
            "!./setup-training.sh\n",
            "!python -m pip install -r ./requirements.txt"
         ]
      },
      {
         "cell_type":"markdown",
         "source":[
            "# Update Repos"
         ],
         "metadata":{
            "id":"IzrGt5IcHlAD"
         }
      },
      {
         "cell_type":"code",
         "source":[
            "# for my debugging purposes\n",
            "%cd /content/ai-voice-cloning/dlas\n",
            "!git reset --hard HEAD\n",
            "!git pull\n",
            "%cd ../tortoise-tts/\n",
            "!git reset --hard HEAD\n",
            "!git pull\n",
            "!cd ..\n",
            "!git reset --hard HEAD\n",
            "!git pull\n",
            "# exit()"
         ],
         "metadata":{
            "id":"3DktoOXSHmtw"
         },
         "execution_count":null,
         "outputs":[
            
         ]
      },
      {
         "cell_type":"markdown",
         "source":[
            "# Mount Drive"
         ],
         "metadata":{
            "id":"2Y4t9zDIZMTg"
         }
      },
      {
         "cell_type":"code",
         "source":[
            "# only run once, this will save all userdata to your Drive\n",
            "from google.colab import drive\n",
            "drive.mount('/content/drive')\n",
            "\n",
            "%cd /content/ai-voice-cloning\n",
            "!rm -r ./training\n",
            "!rm -r ./results\n",
            "!rm -r ./voices\n",
            "\n",
            "!mkdir /content/drive/MyDrive/training/\n",
            "!mkdir /content/drive/MyDrive/results/\n",
            "!mkdir /content/drive/MyDrive/voices/\n",
            "\n",
            "!ln -s /content/drive/MyDrive/training/\n",
            "!ln -s /content/drive/MyDrive/results/\n",
            "!ln -s /content/drive/MyDrive/voices/"
         ],
         "metadata":{
            "id":"SGt9gyvubveT"
         },
         "execution_count":null,
         "outputs":[
            
         ]
      },
      {
         "cell_type":"markdown",
         "source":[
            "## Running"
         ],
         "metadata":{
            "id":"o1gkfw3B3JSk"
         }
      },
      {
         "cell_type":"code",
         "source":[
            "%cd /content/ai-voice-cloning\n",
            "#!source ./venv/bin/activate\n",
            "\n",
            "import os\n",
            "import sys\n",
            "\n",
            "sys.argv = [\"\"]\n",
            "sys.path.append('./src/')\n",
            "sys.path.append('./tortoise-tts/')\n",
            "\n",
            "if 'TORTOISE_MODELS_DIR' not in os.environ:\n",
            "\tos.environ['TORTOISE_MODELS_DIR'] = os.path.realpath(os.path.join(os.getcwd(), './models/tortoise/'))\n",
            "\n",
            "if 'TRANSFORMERS_CACHE' not in os.environ:\n",
            "\tos.environ['TRANSFORMERS_CACHE'] = os.path.realpath(os.path.join(os.getcwd(), './models/transformers/'))\n",
            "\n",
            "from utils import *\n",
            "from webui import *\n",
            "\n",
            "args = setup_args()\n",
            "\n",
            "webui = setup_gradio()\n",
            "# Be very, very sure to check \"Defer TTS Load\" in Settings, then restart, before you start training\n",
            "# You'll crash the runtime if you don't\n",
            "if not args.defer_tts_load:\n",
            "\ttts = setup_tortoise()\n",
            "webui.launch(share=True, prevent_thread_lock=True, height=1000)\n",
            "webui.block_thread()"
         ],
         "metadata":{
            "id":"c_EQZLTA19c7"
         },
         "execution_count":null,
         "outputs":[
            
         ]
      },
      {
         "cell_type":"markdown",
         "source":[
            "# Fallback Training"
         ],
         "metadata":{
            "id":"ggLY9A9KA21D"
         }
      },
      {
         "cell_type":"code",
         "source":[
            "# This is in case you can't get training through the web UI\n",
            "%cd /content/ai-voice-cloning\n",
            "!python ./dlas/codes/train.py -opt ./training/finetune.yaml"
         ],
         "metadata":{
            "id":"-KayB8klA5tY"
         },
         "execution_count":null,
         "outputs":[
            
         ]
      },
      {
         "cell_type":"markdown",
         "source":[
            "## Exporting"
         ],
         "metadata":{
            "id":"2AnVQxEJx47p"
         }
      },
      {
         "cell_type":"code",
         "source":[
            "%cd /content/ai-voice-cloning\n",
            "!apt install -y p7zip-full\n",
            "from datetime import datetime\n",
            "timestamp = datetime.now().strftime('%m-%d-%Y_%H:%M:%S')\n",
            "!mkdir -p \"../{timestamp}/results\"\n",
            "!mv ./results/* \"../{timestamp}/results/.\"\n",
            "!mv ./training/* \"../{timestamp}/training/.\"\n",
            "!7z a -t7z -m0=lzma2 -mx=9 -mfb=64 -md=32m -ms=on \"../{timestamp}.7z\" \"../{timestamp}/\"\n",
            "!ls ~/\n",
            "!echo \"Finished zipping, archive is available at {timestamp}.7z\""
         ],
         "metadata":{
            "id":"YOACiDCXx72G"
         },
         "execution_count":null,
         "outputs":[
            
         ]
      }
   ]
}
Initial refractor 2023-02-17 00:08:27 +00:00			`{`
			`"nbformat":4,`
			`"nbformat_minor":0,`
			`"metadata":{`
			`"colab":{`
notebook tweaked, drive mounts and symlinks folders so I can stop having to wait a gorillion years to import voices 2023-02-18 16:30:05 +00:00			`"private_outputs":true,`
			`"provenance":[`

			`]`
Initial refractor 2023-02-17 00:08:27 +00:00			`},`
			`"kernelspec":{`
			`"name":"python3",`
			`"display_name":"Python 3"`
			`},`
			`"language_info":{`
			`"name":"python"`
			`},`
			`"accelerator":"GPU",`
			`"gpuClass":"standard"`
			`},`
			`"cells":[`
			`{`
			`"cell_type":"markdown",`
			`"source":[`
			`"## Initialization"`
			`],`
			`"metadata":{`
			`"id":"ni41hmE03DL6"`
			`}`
			`},`
			`{`
			`"cell_type":"code",`
			`"execution_count":null,`
			`"metadata":{`
			`"id":"FtsMKKfH18iM"`
			`},`
			`"outputs":[`

			`],`
			`"source":[`
			`"!git clone https://git.ecker.tech/mrq/ai-voice-cloning/\n",`
			`"%cd ai-voice-cloning\n",`
Update notebook to follow the \'other\' way of installing mrq/tortoise-tts 2023-02-19 07:22:22 +00:00			`"\n",`
notebook tweaked, drive mounts and symlinks folders so I can stop having to wait a gorillion years to import voices 2023-02-18 16:30:05 +00:00			`"# TODO: fix venvs working for subprocess.Popen calling a bash script\n",`
debugging in colab is pure cock and ball torture because sometimes the files don't actually update when edited, and sometimes they update after I restart the runtime, notebook can't use venv because I can't source it in a subprocess shell call 2023-02-18 03:31:44 +00:00			`"#!apt install python3.8-venv\n",`
			`"#!python -m venv venv\n",`
			`"#!source ./venv/bin/activate\n",`
Update notebook to follow the \'other\' way of installing mrq/tortoise-tts 2023-02-19 07:22:22 +00:00			`"\n",`
a bit of UI cleanup, import multiple audio files at once, actually shows progress when importing voices, hides audio metadata / latents if no generated settings are detected, preparing datasets shows its progress, saving a training YAML shows a message when done, training now works within the web UI, training output shows to web UI, provided notebook is cleaned up and uses a venv, etc. 2023-02-18 02:07:22 +00:00			`"!git clone https://git.ecker.tech/mrq/DL-Art-School dlas\n",`
Initial refractor 2023-02-17 00:08:27 +00:00			`"!python -m pip install --upgrade pip\n",`
			`"!pip install torch torchvision torchaudio --extra-index-url https://download.pytorch.org/whl/cu116\n",`
Update notebook to follow the \'other\' way of installing mrq/tortoise-tts 2023-02-19 07:22:22 +00:00			`"!./setup-tortoise.sh\n",`
			`"!./setup-training.sh\n",`
a bit of UI cleanup, import multiple audio files at once, actually shows progress when importing voices, hides audio metadata / latents if no generated settings are detected, preparing datasets shows its progress, saving a training YAML shows a message when done, training now works within the web UI, training output shows to web UI, provided notebook is cleaned up and uses a venv, etc. 2023-02-18 02:07:22 +00:00			`"!python -m pip install -r ./requirements.txt"`
Initial refractor 2023-02-17 00:08:27 +00:00			`]`
			`},`
small fixes 2023-02-17 20:18:57 +00:00			`{`
			`"cell_type":"markdown",`
			`"source":[`
a bit of UI cleanup, import multiple audio files at once, actually shows progress when importing voices, hides audio metadata / latents if no generated settings are detected, preparing datasets shows its progress, saving a training YAML shows a message when done, training now works within the web UI, training output shows to web UI, provided notebook is cleaned up and uses a venv, etc. 2023-02-18 02:07:22 +00:00			`"# Update Repos"`
small fixes 2023-02-17 20:18:57 +00:00			`],`
			`"metadata":{`
a bit of UI cleanup, import multiple audio files at once, actually shows progress when importing voices, hides audio metadata / latents if no generated settings are detected, preparing datasets shows its progress, saving a training YAML shows a message when done, training now works within the web UI, training output shows to web UI, provided notebook is cleaned up and uses a venv, etc. 2023-02-18 02:07:22 +00:00			`"id":"IzrGt5IcHlAD"`
small fixes 2023-02-17 20:18:57 +00:00			`}`
			`},`
Initial refractor 2023-02-17 00:08:27 +00:00			`{`
			`"cell_type":"code",`
			`"source":[`
notebook tweaked, drive mounts and symlinks folders so I can stop having to wait a gorillion years to import voices 2023-02-18 16:30:05 +00:00			`"# for my debugging purposes\n",`
a bit of UI cleanup, import multiple audio files at once, actually shows progress when importing voices, hides audio metadata / latents if no generated settings are detected, preparing datasets shows its progress, saving a training YAML shows a message when done, training now works within the web UI, training output shows to web UI, provided notebook is cleaned up and uses a venv, etc. 2023-02-18 02:07:22 +00:00			`"%cd /content/ai-voice-cloning/dlas\n",`
			`"!git reset --hard HEAD\n",`
			`"!git pull\n",`
Update notebook to follow the \'other\' way of installing mrq/tortoise-tts 2023-02-19 07:22:22 +00:00			`"%cd ../tortoise-tts/\n",`
			`"!git reset --hard HEAD\n",`
			`"!git pull\n",`
			`"!cd ..\n",`
a bit of UI cleanup, import multiple audio files at once, actually shows progress when importing voices, hides audio metadata / latents if no generated settings are detected, preparing datasets shows its progress, saving a training YAML shows a message when done, training now works within the web UI, training output shows to web UI, provided notebook is cleaned up and uses a venv, etc. 2023-02-18 02:07:22 +00:00			`"!git reset --hard HEAD\n",`
notebook tweaked, drive mounts and symlinks folders so I can stop having to wait a gorillion years to import voices 2023-02-18 16:30:05 +00:00			`"!git pull\n",`
Update notebook to follow the \'other\' way of installing mrq/tortoise-tts 2023-02-19 07:22:22 +00:00			`"# exit()"`
Initial refractor 2023-02-17 00:08:27 +00:00			`],`
			`"metadata":{`
a bit of UI cleanup, import multiple audio files at once, actually shows progress when importing voices, hides audio metadata / latents if no generated settings are detected, preparing datasets shows its progress, saving a training YAML shows a message when done, training now works within the web UI, training output shows to web UI, provided notebook is cleaned up and uses a venv, etc. 2023-02-18 02:07:22 +00:00			`"id":"3DktoOXSHmtw"`
Initial refractor 2023-02-17 00:08:27 +00:00			`},`
			`"execution_count":null,`
			`"outputs":[`

			`]`
			`},`
a bit of UI cleanup, import multiple audio files at once, actually shows progress when importing voices, hides audio metadata / latents if no generated settings are detected, preparing datasets shows its progress, saving a training YAML shows a message when done, training now works within the web UI, training output shows to web UI, provided notebook is cleaned up and uses a venv, etc. 2023-02-18 02:07:22 +00:00			`{`
			`"cell_type":"markdown",`
			`"source":[`
			`"# Mount Drive"`
			`],`
			`"metadata":{`
			`"id":"2Y4t9zDIZMTg"`
			`}`
			`},`
small fixes 2023-02-17 20:18:57 +00:00			`{`
			`"cell_type":"code",`
			`"source":[`
notebook tweaked, drive mounts and symlinks folders so I can stop having to wait a gorillion years to import voices 2023-02-18 16:30:05 +00:00			`"# only run once, this will save all userdata to your Drive\n",`
small fixes 2023-02-17 20:18:57 +00:00			`"from google.colab import drive\n",`
a bit of UI cleanup, import multiple audio files at once, actually shows progress when importing voices, hides audio metadata / latents if no generated settings are detected, preparing datasets shows its progress, saving a training YAML shows a message when done, training now works within the web UI, training output shows to web UI, provided notebook is cleaned up and uses a venv, etc. 2023-02-18 02:07:22 +00:00			`"drive.mount('/content/drive')\n",`
			`"\n",`
			`"%cd /content/ai-voice-cloning\n",`
			`"!rm -r ./training\n",`
notebook tweaked, drive mounts and symlinks folders so I can stop having to wait a gorillion years to import voices 2023-02-18 16:30:05 +00:00			`"!rm -r ./results\n",`
			`"!rm -r ./voices\n",`
			`"\n",`
			`"!mkdir /content/drive/MyDrive/training/\n",`
			`"!mkdir /content/drive/MyDrive/results/\n",`
			`"!mkdir /content/drive/MyDrive/voices/\n",`
			`"\n",`
			`"!ln -s /content/drive/MyDrive/training/\n",`
			`"!ln -s /content/drive/MyDrive/results/\n",`
			`"!ln -s /content/drive/MyDrive/voices/"`
small fixes 2023-02-17 20:18:57 +00:00			`],`
			`"metadata":{`
			`"id":"SGt9gyvubveT"`
			`},`
			`"execution_count":null,`
			`"outputs":[`

			`]`
			`},`
Initial refractor 2023-02-17 00:08:27 +00:00			`{`
			`"cell_type":"markdown",`
			`"source":[`
			`"## Running"`
			`],`
			`"metadata":{`
			`"id":"o1gkfw3B3JSk"`
			`}`
			`},`
			`{`
			`"cell_type":"code",`
			`"source":[`
updated notebooks to use the new "main" setup 2023-02-17 03:30:53 +00:00			`"%cd /content/ai-voice-cloning\n",`
debugging in colab is pure cock and ball torture because sometimes the files don't actually update when edited, and sometimes they update after I restart the runtime, notebook can't use venv because I can't source it in a subprocess shell call 2023-02-18 03:31:44 +00:00			`"#!source ./venv/bin/activate\n",`
updated notebooks to use the new "main" setup 2023-02-17 03:30:53 +00:00			`"\n",`
			`"import os\n",`
Initial refractor 2023-02-17 00:08:27 +00:00			`"import sys\n",`
updated notebooks to use the new "main" setup 2023-02-17 03:30:53 +00:00			`"\n",`
Initial refractor 2023-02-17 00:08:27 +00:00			`"sys.argv = [\"\"]\n",`
updated notebooks to use the new "main" setup 2023-02-17 03:30:53 +00:00			`"sys.path.append('./src/')\n",`
Update notebook to follow the \'other\' way of installing mrq/tortoise-tts 2023-02-19 07:22:22 +00:00			`"sys.path.append('./tortoise-tts/')\n",`
updated notebooks to use the new "main" setup 2023-02-17 03:30:53 +00:00			`"\n",`
			`"if 'TORTOISE_MODELS_DIR' not in os.environ:\n",`
			`"\tos.environ['TORTOISE_MODELS_DIR'] = os.path.realpath(os.path.join(os.getcwd(), './models/tortoise/'))\n",`
			`"\n",`
			`"if 'TRANSFORMERS_CACHE' not in os.environ:\n",`
			`"\tos.environ['TRANSFORMERS_CACHE'] = os.path.realpath(os.path.join(os.getcwd(), './models/transformers/'))\n",`
			`"\n",`
			`"from utils import *\n",`
			`"from webui import *\n",`
			`"\n",`
			`"args = setup_args()\n",`
Initial refractor 2023-02-17 00:08:27 +00:00			`"\n",`
updated notebooks to use the new "main" setup 2023-02-17 03:30:53 +00:00			`"webui = setup_gradio()\n",`
notebook tweaked, drive mounts and symlinks folders so I can stop having to wait a gorillion years to import voices 2023-02-18 16:30:05 +00:00			`"# Be very, very sure to check \"Defer TTS Load\" in Settings, then restart, before you start training\n",`
			`"# You'll crash the runtime if you don't\n",`
			`"if not args.defer_tts_load:\n",`
			`"\ttts = setup_tortoise()\n",`
a bit of UI cleanup, import multiple audio files at once, actually shows progress when importing voices, hides audio metadata / latents if no generated settings are detected, preparing datasets shows its progress, saving a training YAML shows a message when done, training now works within the web UI, training output shows to web UI, provided notebook is cleaned up and uses a venv, etc. 2023-02-18 02:07:22 +00:00			`"webui.launch(share=True, prevent_thread_lock=True, height=1000)\n",`
updated notebooks to use the new "main" setup 2023-02-17 03:30:53 +00:00			`"webui.block_thread()"`
Initial refractor 2023-02-17 00:08:27 +00:00			`],`
			`"metadata":{`
			`"id":"c_EQZLTA19c7"`
			`},`
small fixes 2023-02-17 20:18:57 +00:00			`"execution_count":null,`
			`"outputs":[`

			`]`
			`},`
			`{`
			`"cell_type":"markdown",`
			`"source":[`
			`"# Fallback Training"`
			`],`
			`"metadata":{`
			`"id":"ggLY9A9KA21D"`
			`}`
			`},`
			`{`
			`"cell_type":"code",`
			`"source":[`
a bit of UI cleanup, import multiple audio files at once, actually shows progress when importing voices, hides audio metadata / latents if no generated settings are detected, preparing datasets shows its progress, saving a training YAML shows a message when done, training now works within the web UI, training output shows to web UI, provided notebook is cleaned up and uses a venv, etc. 2023-02-18 02:07:22 +00:00			`"# This is in case you can't get training through the web UI\n",`
small fixes 2023-02-17 20:18:57 +00:00			`"%cd /content/ai-voice-cloning\n",`
a bit of UI cleanup, import multiple audio files at once, actually shows progress when importing voices, hides audio metadata / latents if no generated settings are detected, preparing datasets shows its progress, saving a training YAML shows a message when done, training now works within the web UI, training output shows to web UI, provided notebook is cleaned up and uses a venv, etc. 2023-02-18 02:07:22 +00:00			`"!python ./dlas/codes/train.py -opt ./training/finetune.yaml"`
small fixes 2023-02-17 20:18:57 +00:00			`],`
			`"metadata":{`
			`"id":"-KayB8klA5tY"`
			`},`
Initial refractor 2023-02-17 00:08:27 +00:00			`"execution_count":null,`
			`"outputs":[`

			`]`
			`},`
			`{`
			`"cell_type":"markdown",`
			`"source":[`
			`"## Exporting"`
			`],`
			`"metadata":{`
			`"id":"2AnVQxEJx47p"`
			`}`
			`},`
			`{`
			`"cell_type":"code",`
			`"source":[`
updated notebooks to use the new "main" setup 2023-02-17 03:30:53 +00:00			`"%cd /content/ai-voice-cloning\n",`
Initial refractor 2023-02-17 00:08:27 +00:00			`"!apt install -y p7zip-full\n",`
			`"from datetime import datetime\n",`
			`"timestamp = datetime.now().strftime('%m-%d-%Y_%H:%M:%S')\n",`
a bit of UI cleanup, import multiple audio files at once, actually shows progress when importing voices, hides audio metadata / latents if no generated settings are detected, preparing datasets shows its progress, saving a training YAML shows a message when done, training now works within the web UI, training output shows to web UI, provided notebook is cleaned up and uses a venv, etc. 2023-02-18 02:07:22 +00:00			`"!mkdir -p \"../{timestamp}/results\"\n",`
			`"!mv ./results/* \"../{timestamp}/results/.\"\n",`
			`"!mv ./training/* \"../{timestamp}/training/.\"\n",`
Initial refractor 2023-02-17 00:08:27 +00:00			`"!7z a -t7z -m0=lzma2 -mx=9 -mfb=64 -md=32m -ms=on \"../{timestamp}.7z\" \"../{timestamp}/\"\n",`
			`"!ls ~/\n",`
			`"!echo \"Finished zipping, archive is available at {timestamp}.7z\""`
			`],`
			`"metadata":{`
			`"id":"YOACiDCXx72G"`
			`},`
			`"execution_count":null,`
			`"outputs":[`

			`]`
			`}`
			`]`
			`}`