forked from ecker/ai-voice-cloning
Compare commits
114 Commits
master
...
guided-set
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
2c244c49ec | ||
| 119ac50c58 | |||
| da0af4c498 | |||
| 11a1f6a00e | |||
| 12c51b6057 | |||
| 999878d9c6 | |||
| 14779a5020 | |||
| 0e3bbc55f8 | |||
| 788a957f79 | |||
| 5be14abc21 | |||
| 287738a338 | |||
| 206a14fdbe | |||
| b82961ba8a | |||
| b2e89d8da3 | |||
| 8094401a6d | |||
| 8b9c9e1bbf | |||
| 0231550287 | |||
| d97639e138 | |||
| b8a620e8d7 | |||
| 35225a35da | |||
| b5e9899bbf | |||
| cd8702ab0d | |||
| d312019d05 | |||
| ce3866d0cd | |||
| 1316331be3 | |||
| 3e220ed306 | |||
| 37cab14272 | |||
| 5026d93ecd | |||
| 1a9d159b2a | |||
| df24827b9a | |||
| 6d5e1e1a80 | |||
| 6d8c2dd459 | |||
|
|
07163644dd | ||
| e1f3ffa08c | |||
|
|
5487c28683 | ||
| 9fb4aa7917 | |||
| 740b5587df | |||
| 68f4858ce9 | |||
| e859a7c01d | |||
| e205322c8d | |||
| 59773a7637 | |||
| c956d81baf | |||
| 534a761e49 | |||
| 5a41db978e | |||
| b989123bd4 | |||
| c2726fa0d4 | |||
| 5037752059 | |||
| 787b44807a | |||
| 81eb58f0d6 | |||
| fda47156ec | |||
| bc0d9ab3ed | |||
| 6925ec731b | |||
| 47abde224c | |||
| 92553973be | |||
| aafeb9f96a | |||
| 65329dba31 | |||
| 8b4da29d5f | |||
| d5d8821a9d | |||
| e5e16bc5b5 | |||
| bedbb893ac | |||
| f31ea9d5bc | |||
| 2104dbdbc5 | |||
| f6d0b66e10 | |||
| 1e0fec4358 | |||
| 7d1220e83e | |||
| 487f2ebf32 | |||
| 1cbcf14cff | |||
| 41fca1a101 | |||
| 941a27d2b3 | |||
| 225dee22d4 | |||
| aa96edde2f | |||
| 526a430c2a | |||
| 2aa70532e8 | |||
| cc47ed7242 | |||
| 93b061fb4d | |||
| c4b41e07fa | |||
| fefc7aba03 | |||
| 9e64dad785 | |||
| f119993fb5 | |||
| 8a1a48f31e | |||
| ed2cf9f5ee | |||
| b6f7aa6264 | |||
| bbc2d26289 | |||
| 7d1936adad | |||
| 1fd88afcca | |||
| bacac6daea | |||
| 37ffa60d14 | |||
| d17f6fafb0 | |||
| c99cacec2e | |||
| 109757d56d | |||
| ee95616dfd | |||
| 6260594a1e | |||
| 4694d622f4 | |||
| ec76676b16 | |||
| 4f79b3724b | |||
| 092dd7b2d7 | |||
| f4e82fcf08 | |||
| 3891870b5d | |||
| d89b7d60e0 | |||
| 485319c2bb | |||
| debdf6049a | |||
| ae5d4023aa | |||
| ec550d74fd | |||
| 57060190af | |||
| f44239a85a | |||
| e7d0cfaa82 | |||
| 5fcdb19f8b | |||
| 47058db67f | |||
| fc5b303319 | |||
| 58c981d714 | |||
| cd8919e65c | |||
| ebbc85fb6a | |||
|
|
8dddb560e1 | ||
|
|
4807072894 |
7
.gitignore
vendored
7
.gitignore
vendored
@ -1,7 +1,8 @@
|
|||||||
# ignores user files
|
# ignores user files
|
||||||
/tortoise-venv/
|
/venv/
|
||||||
/tortoise/voices/
|
/voices/*
|
||||||
/models/
|
/models/*
|
||||||
|
/training/*
|
||||||
/config/*
|
/config/*
|
||||||
|
|
||||||
# Byte-compiled / optimized / DLL files
|
# Byte-compiled / optimized / DLL files
|
||||||
|
|||||||
6
.gitmodules
vendored
Executable file
6
.gitmodules
vendored
Executable file
@ -0,0 +1,6 @@
|
|||||||
|
[submodule "tortoise-tts"]
|
||||||
|
path = tortoise-tts
|
||||||
|
url = https://git.ecker.tech/mrq/tortoise-tts
|
||||||
|
[submodule "dlas"]
|
||||||
|
path = dlas
|
||||||
|
url = https://git.ecker.tech/mrq/DL-Art-School
|
||||||
17
README.md
17
README.md
@ -16,4 +16,19 @@ Please consult [the wiki](https://git.ecker.tech/mrq/ai-voice-cloning/wiki) for
|
|||||||
|
|
||||||
## Bug Reporting
|
## Bug Reporting
|
||||||
|
|
||||||
If you run into any problems, please refer to the [issues you may encounter](https://git.ecker.tech/mrq/ai-voice-cloning/wiki/Issues) wiki page first. Please don't hesitate to submit an issue.
|
If you run into any problems, please refer to the [issues you may encounter](https://git.ecker.tech/mrq/ai-voice-cloning/wiki/Issues) wiki page first. Please don't hesitate to submit an issue.
|
||||||
|
|
||||||
|
## Changelogs
|
||||||
|
|
||||||
|
Below will be a rather-loose changelogss, as I don't think I have a way to chronicle them outside of commit messages:
|
||||||
|
|
||||||
|
### `2023.02.22`
|
||||||
|
|
||||||
|
* greatly reduced VRAM consumption through the use of [TimDettmers/bitsandbytes](https://github.com/TimDettmers/bitsandbytes)
|
||||||
|
* cleaned up section of code that handled parsing output from training script
|
||||||
|
* added button to reconnect to the training script's output (sometimes skips a line to update, but it's better than nothing)
|
||||||
|
* actually update submodules from the update script (somehow forgot to pass `--remote`)
|
||||||
|
|
||||||
|
### `Before 2023.02.22`
|
||||||
|
|
||||||
|
Refer to commit logs.
|
||||||
1
dlas
Submodule
1
dlas
Submodule
@ -0,0 +1 @@
|
|||||||
|
Subproject commit 6eb7ebf847cf2e4761536391de841dc4209d1e63
|
||||||
@ -1,21 +1,21 @@
|
|||||||
name: ${name}
|
name: ${name}
|
||||||
model: extensibletrainer
|
model: extensibletrainer
|
||||||
scale: 1
|
scale: 1
|
||||||
gpu_ids: [0] # <-- unless you have multiple gpus, use this
|
gpu_ids: [0] # Superfluous, redundant, unnecessary, the way you launch the training script will set this
|
||||||
start_step: -1
|
start_step: 0
|
||||||
checkpointing_enabled: true # <-- Gradient checkpointing. Enable for huge GPU memory savings. Disable for distributed training.
|
checkpointing_enabled: true
|
||||||
fp16: false # might want to check this out
|
fp16: ${float16}
|
||||||
wandb: false # <-- enable to log to wandb. tensorboard logging is always enabled.
|
wandb: false
|
||||||
use_tb_logger: true
|
use_tb_logger: true
|
||||||
|
|
||||||
datasets:
|
datasets:
|
||||||
train:
|
train:
|
||||||
name: ${dataset_name}
|
name: ${dataset_name}
|
||||||
n_workers: 8 # idk what this does
|
n_workers: ${workers}
|
||||||
batch_size: ${batch_size} # This leads to ~16GB of vram usage on my 3090.
|
batch_size: ${batch_size}
|
||||||
mode: paired_voice_audio
|
mode: paired_voice_audio
|
||||||
path: ${dataset_path}
|
path: ${dataset_path}
|
||||||
fetcher_mode: ['lj'] # CHANGEME if your dataset isn't in LJSpeech format
|
fetcher_mode: ['lj']
|
||||||
phase: train
|
phase: train
|
||||||
max_wav_length: 255995
|
max_wav_length: 255995
|
||||||
max_text_length: 200
|
max_text_length: 200
|
||||||
@ -26,14 +26,14 @@ datasets:
|
|||||||
use_bpe_tokenizer: True
|
use_bpe_tokenizer: True
|
||||||
tokenizer_vocab: ./models/tortoise/bpe_lowercase_asr_256.json
|
tokenizer_vocab: ./models/tortoise/bpe_lowercase_asr_256.json
|
||||||
load_aligned_codes: False
|
load_aligned_codes: False
|
||||||
val:
|
val: # I really do not care about validation right now
|
||||||
name: ${validation_name}
|
name: ${validation_name}
|
||||||
n_workers: 1
|
n_workers: 1
|
||||||
batch_size: 32 # this could be higher probably
|
batch_size: 1
|
||||||
mode: paired_voice_audio
|
mode: paired_voice_audio
|
||||||
path: ${validation_path}
|
path: ${validation_path}
|
||||||
fetcher_mode: ['lj']
|
fetcher_mode: ['lj']
|
||||||
phase: val # might be broken idk
|
phase: val
|
||||||
max_wav_length: 255995
|
max_wav_length: 255995
|
||||||
max_text_length: 200
|
max_text_length: 200
|
||||||
sample_rate: 22050
|
sample_rate: 22050
|
||||||
@ -47,18 +47,18 @@ datasets:
|
|||||||
steps:
|
steps:
|
||||||
gpt_train:
|
gpt_train:
|
||||||
training: gpt
|
training: gpt
|
||||||
loss_log_buffer: 500 # no idea what this does
|
loss_log_buffer: 500
|
||||||
|
|
||||||
# Generally follows the recipe from the DALLE paper.
|
# Generally follows the recipe from the DALLE paper.
|
||||||
optimizer: adamw # this should be adamw_zero if you're using distributed training
|
optimizer: adamw # this should be adamw_zero if you're using distributed training
|
||||||
optimizer_params:
|
optimizer_params:
|
||||||
lr: !!float ${learning_rate} # CHANGEME: this was originally 1e-4; I reduced it to 1e-5 because it's fine-tuning, but **you should experiment with this value**
|
lr: !!float ${learning_rate} # originally: 1e-4
|
||||||
weight_decay: !!float 1e-2
|
weight_decay: !!float 1e-2
|
||||||
beta1: 0.9
|
beta1: 0.9
|
||||||
beta2: 0.96
|
beta2: 0.96
|
||||||
clip_grad_eps: 4
|
clip_grad_eps: 4
|
||||||
|
|
||||||
injectors: # TODO: replace this entire sequence with the GptVoiceLatentInjector
|
injectors:
|
||||||
paired_to_mel:
|
paired_to_mel:
|
||||||
type: torch_mel_spectrogram
|
type: torch_mel_spectrogram
|
||||||
mel_norm_file: ./models/tortoise/clips_mel_norms.pth
|
mel_norm_file: ./models/tortoise/clips_mel_norms.pth
|
||||||
@ -74,7 +74,7 @@ steps:
|
|||||||
type: discrete_token
|
type: discrete_token
|
||||||
in: paired_mel
|
in: paired_mel
|
||||||
out: paired_mel_codes
|
out: paired_mel_codes
|
||||||
dvae_config: "./models/tortoise/train_diffusion_vocoder_22k_level.yml" # EXTREMELY IMPORTANT
|
dvae_config: "./models/tortoise/train_diffusion_vocoder_22k_level.yml"
|
||||||
paired_fwd_text:
|
paired_fwd_text:
|
||||||
type: generator
|
type: generator
|
||||||
generator: gpt
|
generator: gpt
|
||||||
@ -83,7 +83,7 @@ steps:
|
|||||||
losses:
|
losses:
|
||||||
text_ce:
|
text_ce:
|
||||||
type: direct
|
type: direct
|
||||||
weight: .01
|
weight: ${text_ce_lr_weight}
|
||||||
key: loss_text_ce
|
key: loss_text_ce
|
||||||
mel_ce:
|
mel_ce:
|
||||||
type: direct
|
type: direct
|
||||||
@ -95,12 +95,12 @@ networks:
|
|||||||
type: generator
|
type: generator
|
||||||
which_model_G: unified_voice2 # none of the unified_voice*.py files actually match the tortoise inference code... 4 and 3 have "alignment_head" (wtf is that?), 2 lacks the types=1 parameter.
|
which_model_G: unified_voice2 # none of the unified_voice*.py files actually match the tortoise inference code... 4 and 3 have "alignment_head" (wtf is that?), 2 lacks the types=1 parameter.
|
||||||
kwargs:
|
kwargs:
|
||||||
layers: 30 # WAS 8
|
layers: 30 # originally: 8
|
||||||
model_dim: 1024 # WAS 512
|
model_dim: 1024 # originally: 512
|
||||||
heads: 16 # WAS 8
|
heads: 16 # originally: 8
|
||||||
max_text_tokens: 402 # WAS 120
|
max_text_tokens: 402 # originally: 120
|
||||||
max_mel_tokens: 604 # WAS 250
|
max_mel_tokens: 604 # originally: 250
|
||||||
max_conditioning_inputs: 2 # WAS 1
|
max_conditioning_inputs: 2 # originally: 1
|
||||||
mel_length_compression: 1024
|
mel_length_compression: 1024
|
||||||
number_text_tokens: 256 # supposed to be 255 for newer unified_voice files
|
number_text_tokens: 256 # supposed to be 255 for newer unified_voice files
|
||||||
number_mel_codes: 8194
|
number_mel_codes: 8194
|
||||||
@ -114,19 +114,20 @@ networks:
|
|||||||
#only_alignment_head: False # uv3/4
|
#only_alignment_head: False # uv3/4
|
||||||
|
|
||||||
path:
|
path:
|
||||||
pretrain_model_gpt: './models/tortoise/autoregressive.pth' # CHANGEME: copy this from tortoise cache
|
${pretrain_model_gpt}
|
||||||
strict_load: true
|
strict_load: true
|
||||||
#resume_state: ./models/tortoise/train_imgnet_vqvae_stage1/training_state/0.state # <-- Set this to resume from a previous training state.
|
${resume_state}
|
||||||
|
|
||||||
# afaik all units here are measured in **steps** (i.e. one batch of batch_size is 1 unit)
|
train:
|
||||||
train: # CHANGEME: ALL OF THESE PARAMETERS SHOULD BE EXPERIMENTED WITH
|
|
||||||
niter: ${iterations}
|
niter: ${iterations}
|
||||||
warmup_iter: -1
|
warmup_iter: -1
|
||||||
mega_batch_factor: 4 # <-- Gradient accumulation factor. If you are running OOM, increase this to [2,4,8].
|
mega_batch_factor: ${gradient_accumulation_size}
|
||||||
val_freq: 500
|
val_freq: ${iterations}
|
||||||
|
|
||||||
|
ema_enabled: false # I really don't think EMA matters
|
||||||
|
|
||||||
default_lr_scheme: MultiStepLR
|
default_lr_scheme: MultiStepLR
|
||||||
gen_lr_steps: [500, 1000, 1400, 1800] #[50000, 100000, 140000, 180000]
|
gen_lr_steps: ${gen_lr_steps} #[50000, 100000, 140000, 180000]
|
||||||
lr_gamma: 0.5
|
lr_gamma: 0.5
|
||||||
|
|
||||||
eval:
|
eval:
|
||||||
@ -140,7 +141,7 @@ eval:
|
|||||||
|
|
||||||
logger:
|
logger:
|
||||||
print_freq: ${print_rate}
|
print_freq: ${print_rate}
|
||||||
save_checkpoint_freq: ${save_rate} # CHANGEME: especially you should increase this it's really slow
|
save_checkpoint_freq: ${save_rate}
|
||||||
visuals: [gen, mel]
|
visuals: [gen, mel]
|
||||||
visual_debug_rate: ${print_rate}
|
visual_debug_rate: ${print_rate}
|
||||||
is_mel_spectrogram: true
|
is_mel_spectrogram: true
|
||||||
112
notebook.ipynb
112
notebook.ipynb
@ -40,15 +40,23 @@
|
|||||||
"source":[
|
"source":[
|
||||||
"!git clone https://git.ecker.tech/mrq/ai-voice-cloning/\n",
|
"!git clone https://git.ecker.tech/mrq/ai-voice-cloning/\n",
|
||||||
"%cd ai-voice-cloning\n",
|
"%cd ai-voice-cloning\n",
|
||||||
|
"\n",
|
||||||
|
"!git submodule init\n",
|
||||||
|
"!git submodule update --remote\n",
|
||||||
|
"\n",
|
||||||
"# TODO: fix venvs working for subprocess.Popen calling a bash script\n",
|
"# TODO: fix venvs working for subprocess.Popen calling a bash script\n",
|
||||||
"#!apt install python3.8-venv\n",
|
"#!apt install python3.8-venv\n",
|
||||||
"#!python -m venv venv\n",
|
"#!python -m venv venv\n",
|
||||||
"#!source ./venv/bin/activate\n",
|
"#!source ./venv/bin/activate\n",
|
||||||
"!git clone https://git.ecker.tech/mrq/DL-Art-School dlas\n",
|
"\n",
|
||||||
"!python -m pip install --upgrade pip\n",
|
"!python -m pip install --upgrade pip\n",
|
||||||
"!pip install torch torchvision torchaudio --extra-index-url https://download.pytorch.org/whl/cu116\n",
|
"!pip install torch torchvision torchaudio --extra-index-url https://download.pytorch.org/whl/cu116\n",
|
||||||
"!python -m pip install -r ./dlas/requirements.txt\n",
|
"!python -m pip install -r ./dlas/requirements.txt\n",
|
||||||
"!python -m pip install -r ./requirements.txt"
|
"!python -m pip install -r ./tortoise-tts/requirements.txt\n",
|
||||||
|
"!python -m pip install -r ./requirements.txt\n",
|
||||||
|
"!python -m pip install -e ./tortoise-tts/\n",
|
||||||
|
"\n",
|
||||||
|
"!rm ./tortoise-tts/{main,webui}.py"
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
@ -64,13 +72,8 @@
|
|||||||
"cell_type":"code",
|
"cell_type":"code",
|
||||||
"source":[
|
"source":[
|
||||||
"# for my debugging purposes\n",
|
"# for my debugging purposes\n",
|
||||||
"%cd /content/ai-voice-cloning/dlas\n",
|
"%cd /content/ai-voice-cloning/\n",
|
||||||
"!git reset --hard HEAD\n",
|
"!./update.sh"
|
||||||
"!git pull\n",
|
|
||||||
"%cd ..\n",
|
|
||||||
"!git reset --hard HEAD\n",
|
|
||||||
"!git pull\n",
|
|
||||||
"#exit()"
|
|
||||||
],
|
],
|
||||||
"metadata":{
|
"metadata":{
|
||||||
"id":"3DktoOXSHmtw"
|
"id":"3DktoOXSHmtw"
|
||||||
@ -93,21 +96,18 @@
|
|||||||
"cell_type":"code",
|
"cell_type":"code",
|
||||||
"source":[
|
"source":[
|
||||||
"# only run once, this will save all userdata to your Drive\n",
|
"# only run once, this will save all userdata to your Drive\n",
|
||||||
|
"# it shouldn't delete through symlinks, but you never know\n",
|
||||||
|
"\n",
|
||||||
"from google.colab import drive\n",
|
"from google.colab import drive\n",
|
||||||
"drive.mount('/content/drive')\n",
|
|
||||||
"\n",
|
"\n",
|
||||||
"%cd /content/ai-voice-cloning\n",
|
"%cd /content/ai-voice-cloning\n",
|
||||||
"!rm -r ./training\n",
|
"drive.flush_and_unmount()\n",
|
||||||
"!rm -r ./results\n",
|
"!rm -r ./{training,results,voices,config}\n",
|
||||||
"!rm -r ./voices\n",
|
"drive.mount('/content/drive')\n",
|
||||||
"\n",
|
"!mkdir /content/drive/MyDrive/ai-voice-cloning/\n",
|
||||||
"!mkdir /content/drive/MyDrive/training/\n",
|
"!mv /content/drive/MyDrive/{training,results,voices,config} /content/drive/MyDrive/ai-voice-cloning\n",
|
||||||
"!mkdir /content/drive/MyDrive/results/\n",
|
"!mkdir /content/drive/MyDrive/ai-voice-cloning/{training,results,voices,config}\n",
|
||||||
"!mkdir /content/drive/MyDrive/voices/\n",
|
"!ln -s /content/drive/MyDrive/ai-voice-cloning/{training,results,voices,config} ./"
|
||||||
"\n",
|
|
||||||
"!ln -s /content/drive/MyDrive/training/\n",
|
|
||||||
"!ln -s /content/drive/MyDrive/results/\n",
|
|
||||||
"!ln -s /content/drive/MyDrive/voices/"
|
|
||||||
],
|
],
|
||||||
"metadata":{
|
"metadata":{
|
||||||
"id":"SGt9gyvubveT"
|
"id":"SGt9gyvubveT"
|
||||||
@ -120,7 +120,7 @@
|
|||||||
{
|
{
|
||||||
"cell_type":"markdown",
|
"cell_type":"markdown",
|
||||||
"source":[
|
"source":[
|
||||||
"## Running"
|
"## Running (Inlined)"
|
||||||
],
|
],
|
||||||
"metadata":{
|
"metadata":{
|
||||||
"id":"o1gkfw3B3JSk"
|
"id":"o1gkfw3B3JSk"
|
||||||
@ -136,7 +136,10 @@
|
|||||||
"import sys\n",
|
"import sys\n",
|
||||||
"\n",
|
"\n",
|
||||||
"sys.argv = [\"\"]\n",
|
"sys.argv = [\"\"]\n",
|
||||||
"sys.path.append('./src/')\n",
|
"if './src/' not in sys.path:\n",
|
||||||
|
"\tsys.path.append('./src/')\n",
|
||||||
|
"if './tortoise-tts/' not in sys.path:\n",
|
||||||
|
"\tsys.path.append('./tortoise-tts/')\n",
|
||||||
"\n",
|
"\n",
|
||||||
"if 'TORTOISE_MODELS_DIR' not in os.environ:\n",
|
"if 'TORTOISE_MODELS_DIR' not in os.environ:\n",
|
||||||
"\tos.environ['TORTOISE_MODELS_DIR'] = os.path.realpath(os.path.join(os.getcwd(), './models/tortoise/'))\n",
|
"\tos.environ['TORTOISE_MODELS_DIR'] = os.path.realpath(os.path.join(os.getcwd(), './models/tortoise/'))\n",
|
||||||
@ -144,18 +147,18 @@
|
|||||||
"if 'TRANSFORMERS_CACHE' not in os.environ:\n",
|
"if 'TRANSFORMERS_CACHE' not in os.environ:\n",
|
||||||
"\tos.environ['TRANSFORMERS_CACHE'] = os.path.realpath(os.path.join(os.getcwd(), './models/transformers/'))\n",
|
"\tos.environ['TRANSFORMERS_CACHE'] = os.path.realpath(os.path.join(os.getcwd(), './models/transformers/'))\n",
|
||||||
"\n",
|
"\n",
|
||||||
"from utils import *\n",
|
"import utils\n",
|
||||||
"from webui import *\n",
|
"import webui\n",
|
||||||
"\n",
|
"\n",
|
||||||
"args = setup_args()\n",
|
"args = utils.setup_args()\n",
|
||||||
"\n",
|
"ui = webui.setup_gradio()\n",
|
||||||
"webui = setup_gradio()\n",
|
|
||||||
"# Be very, very sure to check \"Defer TTS Load\" in Settings, then restart, before you start training\n",
|
"# Be very, very sure to check \"Defer TTS Load\" in Settings, then restart, before you start training\n",
|
||||||
"# You'll crash the runtime if you don't\n",
|
"# You'll crash the runtime if you don't\n",
|
||||||
"if not args.defer_tts_load:\n",
|
"if not args.defer_tts_load:\n",
|
||||||
"\ttts = setup_tortoise()\n",
|
"\tutils.setup_tortoise()\n",
|
||||||
"webui.launch(share=True, prevent_thread_lock=True, height=1000)\n",
|
"\n",
|
||||||
"webui.block_thread()"
|
"ui.launch(share=True, prevent_thread_lock=True, height=1000)\n",
|
||||||
|
"ui.block_thread()"
|
||||||
],
|
],
|
||||||
"metadata":{
|
"metadata":{
|
||||||
"id":"c_EQZLTA19c7"
|
"id":"c_EQZLTA19c7"
|
||||||
@ -165,6 +168,52 @@
|
|||||||
|
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
|
{
|
||||||
|
"cell_type":"markdown",
|
||||||
|
"source":[
|
||||||
|
"## Running (non-inlined)"
|
||||||
|
],
|
||||||
|
"metadata":{
|
||||||
|
"id":"EM3iNqgJF6Be"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type":"code",
|
||||||
|
"source":[
|
||||||
|
"%cd /content/ai-voice-cloning/\n",
|
||||||
|
"!./start.sh"
|
||||||
|
],
|
||||||
|
"metadata":{
|
||||||
|
"id":"QRA8jF3cF-YJ"
|
||||||
|
},
|
||||||
|
"execution_count":null,
|
||||||
|
"outputs":[
|
||||||
|
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type":"markdown",
|
||||||
|
"source":[
|
||||||
|
"# Restart Runtime"
|
||||||
|
],
|
||||||
|
"metadata":{
|
||||||
|
"id":"vH9KU7SMGDxb"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type":"code",
|
||||||
|
"source":[
|
||||||
|
"!rm ./tortoise-tts/{main,webui}.py\n",
|
||||||
|
"exit()"
|
||||||
|
],
|
||||||
|
"metadata":{
|
||||||
|
"id":"EWeyUPvgGDX5"
|
||||||
|
},
|
||||||
|
"execution_count":null,
|
||||||
|
"outputs":[
|
||||||
|
|
||||||
|
]
|
||||||
|
},
|
||||||
{
|
{
|
||||||
"cell_type":"markdown",
|
"cell_type":"markdown",
|
||||||
"source":[
|
"source":[
|
||||||
@ -201,6 +250,7 @@
|
|||||||
{
|
{
|
||||||
"cell_type":"code",
|
"cell_type":"code",
|
||||||
"source":[
|
"source":[
|
||||||
|
"# if you're not using drive mounting\n",
|
||||||
"%cd /content/ai-voice-cloning\n",
|
"%cd /content/ai-voice-cloning\n",
|
||||||
"!apt install -y p7zip-full\n",
|
"!apt install -y p7zip-full\n",
|
||||||
"from datetime import datetime\n",
|
"from datetime import datetime\n",
|
||||||
|
|||||||
@ -1,6 +1,3 @@
|
|||||||
git+https://git.ecker.tech/mrq/tortoise-tts.git
|
|
||||||
# git+https://git.ecker.tech/mrq/DL-Art-School.git
|
|
||||||
git+https://github.com/openai/whisper.git
|
|
||||||
more-itertools
|
more-itertools
|
||||||
ffmpeg-python
|
ffmpeg-python
|
||||||
gradio
|
gradio
|
||||||
|
|||||||
@ -1,8 +1,18 @@
|
|||||||
|
git submodule init
|
||||||
|
git submodule update --remote
|
||||||
|
|
||||||
python -m venv venv
|
python -m venv venv
|
||||||
call .\venv\Scripts\activate.bat
|
call .\venv\Scripts\activate.bat
|
||||||
python -m pip install --upgrade pip
|
python -m pip install --upgrade pip
|
||||||
python -m pip install torch torchvision torchaudio --extra-index-url https://download.pytorch.org/whl/cu116
|
python -m pip install torch torchvision torchaudio --extra-index-url https://download.pytorch.org/whl/cu117
|
||||||
python -m pip install -r ./requirements.txt
|
python -m pip install -r .\requirements.txt
|
||||||
.\setup-training.bat
|
python -m pip install -r .\tortoise-tts\requirements.txt
|
||||||
deactivate
|
python -m pip install -e .\tortoise-tts\
|
||||||
pause
|
python -m pip install -r .\dlas\requirements.txt
|
||||||
|
|
||||||
|
xcopy .\dlas\bitsandbytes_windows\* .\venv\Lib\site-packages\bitsandbytes\. /Y
|
||||||
|
xcopy .\dlas\bitsandbytes_windows\cuda_setup\* .\venv\Lib\site-packages\bitsandbytes\cuda_setup\. /Y
|
||||||
|
xcopy .\dlas\bitsandbytes_windows\nn\* .\venv\Lib\site-packages\bitsandbytes\nn\. /Y
|
||||||
|
|
||||||
|
pause
|
||||||
|
deactivate
|
||||||
@ -1,8 +1,17 @@
|
|||||||
#!/bin/bash
|
#!/bin/bash
|
||||||
python -m venv venv
|
# get local dependencies
|
||||||
|
git submodule init
|
||||||
|
git submodule update --remote
|
||||||
|
# setup venv
|
||||||
|
python3 -m venv venv
|
||||||
source ./venv/bin/activate
|
source ./venv/bin/activate
|
||||||
python -m pip install --upgrade pip
|
python3 -m pip install --upgrade pip # just to be safe
|
||||||
pip install torch torchvision torchaudio --extra-index-url https://download.pytorch.org/whl/cu116
|
# CUDA
|
||||||
python -m pip install -r ./requirements.txt
|
pip3 install torch torchvision torchaudio --extra-index-url https://download.pytorch.org/whl/cu117
|
||||||
./setup-training.sh
|
# install requirements
|
||||||
deactivate
|
python3 -m pip install -r ./requirements.txt # install local requirements
|
||||||
|
python3 -m pip install -r ./tortoise-tts/requirements.txt # install TorToiSe requirements
|
||||||
|
python3 -m pip install -e ./tortoise-tts/ # install TorToiSe
|
||||||
|
python3 -m pip install -r ./dlas/requirements.txt # instal DLAS requirements, last, because whisperx will break a dependency here
|
||||||
|
|
||||||
|
deactivate
|
||||||
@ -1,8 +1,14 @@
|
|||||||
|
git submodule init
|
||||||
|
git submodule update --remote
|
||||||
|
|
||||||
python -m venv venv
|
python -m venv venv
|
||||||
call .\venv\Scripts\activate.bat
|
call .\venv\Scripts\activate.bat
|
||||||
python -m pip install --upgrade pip
|
python -m pip install --upgrade pip
|
||||||
python -m pip install torch torchvision torchaudio torch-directml==0.1.13.1.dev230119
|
python -m pip install torch torchvision torchaudio torch-directml
|
||||||
python -m pip install -r ./requirements.txt
|
python -m pip install -r .\requirements.txt
|
||||||
.\setup-training.bat
|
python -m pip install -r .\tortoise-tts\requirements.txt
|
||||||
deactivate
|
python -m pip install -e .\tortoise-tts\
|
||||||
pause
|
python -m pip install -r .\dlas\requirements.txt
|
||||||
|
|
||||||
|
pause
|
||||||
|
deactivate
|
||||||
54
setup-guided.sh
Executable file
54
setup-guided.sh
Executable file
@ -0,0 +1,54 @@
|
|||||||
|
#!/bin/bash
|
||||||
|
|
||||||
|
if ! command -v git &> /dev/null; then
|
||||||
|
if [[ "$(read -e -p 'Could not find git. Continue? [y/N]> '; echo $REPLY)" != [Yy]* ]]; then exit 1; fi
|
||||||
|
else
|
||||||
|
printf "git - ok\n"
|
||||||
|
fi
|
||||||
|
|
||||||
|
# TODO: This could be more precise. e.g. checking for python3, then python, etc.
|
||||||
|
if ! command -v python &> /dev/null; then
|
||||||
|
if [[ "$(read -e -p 'Could not find python. Continue? [y/N]> '; echo $REPLY)" != [Yy]* ]]; then exit 1; fi
|
||||||
|
else
|
||||||
|
python -c 'import sys; sys.stderr.write("Wrong python version.\n") if sys.version_info.major != 3 else sys.stderr.write("Python 3 - ok\n")'
|
||||||
|
fi
|
||||||
|
|
||||||
|
printf "Which GPU brand do you have?\n"
|
||||||
|
gpus=(nvidia amd)
|
||||||
|
|
||||||
|
gpu=""
|
||||||
|
while [ "$gpu" = "" ]; do
|
||||||
|
select gpu in $(printf '%s\n' ${gpus[@]}); do break; done
|
||||||
|
done
|
||||||
|
|
||||||
|
if [ $gpu = "nvidia" ]; then
|
||||||
|
./setup-cuda.sh
|
||||||
|
elif [ $gpu = "amd" ]; then
|
||||||
|
./setup-rocm.sh
|
||||||
|
fi
|
||||||
|
|
||||||
|
source ./venv/bin/activate
|
||||||
|
|
||||||
|
printf "Which Whisper backend would you like to use?\n"
|
||||||
|
whisper_backends=("openai/whisper" "m-bain/whisperx" "lightmare/whispercpp")
|
||||||
|
|
||||||
|
whisper_backend=""
|
||||||
|
while [ "$whisper_backend" = "" ]; do
|
||||||
|
select whisper_backend in $(printf '%s\n' ${whisper_backends[@]}); do break; done
|
||||||
|
done
|
||||||
|
|
||||||
|
if [ $whisper_backend = "openai/whisper" ]; then
|
||||||
|
python -m pip install git+https://github.com/openai/whisper.git
|
||||||
|
elif [ $whisper_backend = "m-bain/whisperx" ]; then
|
||||||
|
python -m pip install git+https://github.com/m-bain/whisperx.git
|
||||||
|
elif [ $whisper_backend = "lightmare/whispercpp" ]; then
|
||||||
|
# This depends on SemVer
|
||||||
|
# Git > v2.18 for `--sort`
|
||||||
|
# Git > v2.4 for `versionsort.suffix`
|
||||||
|
# For older versions:
|
||||||
|
# git ls-remote --refs --tags https://git.ecker.tech/lightmare/whispercpp.py | cut --delimiter='/' --fields=3 | tr '-' '~' | sort --version-sort | tail --lines=1
|
||||||
|
WHISPERCPP_LATEST=$(git -c 'versionsort.suffix=-' ls-remote --exit-code --refs --sort='version:refname' --tags https://git.ecker.tech/lightmare/whispercpp.py '*.*.*' | tail -n 1 | cut --delimiter='/' --fields=3)
|
||||||
|
python -m pip install git+https://git.ecker.tech/lightmare/whispercpp.py@$WHISPERCPP_LATEST
|
||||||
|
fi
|
||||||
|
|
||||||
|
deactivate
|
||||||
8
setup-rocm-bnb.sh
Executable file
8
setup-rocm-bnb.sh
Executable file
@ -0,0 +1,8 @@
|
|||||||
|
#!/bin/bash
|
||||||
|
source ./venv/bin/activate
|
||||||
|
git clone https://git.ecker.tech/mrq/bitsandbytes-rocm
|
||||||
|
cd bitsandbytes-rocm
|
||||||
|
make hip
|
||||||
|
CUDA_VERSION=gfx1030 python setup.py install # assumes you're using a 6XXX series card
|
||||||
|
python3 -m bitsandbytes # to validate it works
|
||||||
|
cd ..
|
||||||
@ -1,9 +1,19 @@
|
|||||||
#!/bin/bash
|
#!/bin/bash
|
||||||
python -m venv venv
|
# get local dependencies
|
||||||
|
git submodule init
|
||||||
|
git submodule update --remote
|
||||||
|
# setup venv
|
||||||
|
python3 -m venv venv
|
||||||
source ./venv/bin/activate
|
source ./venv/bin/activate
|
||||||
python -m pip install --upgrade pip
|
python3 -m pip install --upgrade pip # just to be safe
|
||||||
# ROCM
|
# ROCM
|
||||||
pip install torch torchvision torchaudio --extra-index-url https://download.pytorch.org/whl/rocm5.1.1 # 5.2 does not work for me desu
|
pip3 install torch torchvision torchaudio --extra-index-url https://download.pytorch.org/whl/rocm5.1.1 # 5.2 does not work for me desu
|
||||||
python -m pip install -r ./requirements.txt
|
# install requirements
|
||||||
./setup-training.sh
|
python3 -m pip install -r ./requirements.txt # install local requirements
|
||||||
deactivate
|
python3 -m pip install -r ./tortoise-tts/requirements.txt # install TorToiSe requirements
|
||||||
|
python3 -m pip install -e ./tortoise-tts/ # install TorToiSe
|
||||||
|
python3 -m pip install -r ./dlas/requirements.txt # instal DLAS requirements
|
||||||
|
# swap to ROCm version of BitsAndBytes
|
||||||
|
pip3 uninstall -y bitsandbytes
|
||||||
|
./setup-rocm-bnb.sh
|
||||||
|
deactivate
|
||||||
@ -1,2 +0,0 @@
|
|||||||
git clone https://git.ecker.tech/mrq/DL-Art-School dlas
|
|
||||||
python -m pip install -r .\dlas\requirements.txt
|
|
||||||
@ -1,3 +0,0 @@
|
|||||||
#!/bin/bash
|
|
||||||
git clone https://git.ecker.tech/mrq/DL-Art-School dlas
|
|
||||||
python -m pip install -r ./dlas/requirements.txt
|
|
||||||
@ -1,14 +1,14 @@
|
|||||||
import os
|
import os
|
||||||
|
|
||||||
from utils import *
|
|
||||||
from webui import *
|
|
||||||
|
|
||||||
if 'TORTOISE_MODELS_DIR' not in os.environ:
|
if 'TORTOISE_MODELS_DIR' not in os.environ:
|
||||||
os.environ['TORTOISE_MODELS_DIR'] = os.path.realpath(os.path.join(os.getcwd(), './models/tortoise/'))
|
os.environ['TORTOISE_MODELS_DIR'] = os.path.realpath(os.path.join(os.getcwd(), './models/tortoise/'))
|
||||||
|
|
||||||
if 'TRANSFORMERS_CACHE' not in os.environ:
|
if 'TRANSFORMERS_CACHE' not in os.environ:
|
||||||
os.environ['TRANSFORMERS_CACHE'] = os.path.realpath(os.path.join(os.getcwd(), './models/transformers/'))
|
os.environ['TRANSFORMERS_CACHE'] = os.path.realpath(os.path.join(os.getcwd(), './models/transformers/'))
|
||||||
|
|
||||||
|
from utils import *
|
||||||
|
from webui import *
|
||||||
|
|
||||||
if __name__ == "__main__":
|
if __name__ == "__main__":
|
||||||
args = setup_args()
|
args = setup_args()
|
||||||
|
|
||||||
|
|||||||
48
src/train.py
48
src/train.py
@ -1,8 +1,34 @@
|
|||||||
import torch
|
|
||||||
import argparse
|
|
||||||
|
|
||||||
import os
|
import os
|
||||||
import sys
|
import sys
|
||||||
|
import argparse
|
||||||
|
import yaml
|
||||||
|
|
||||||
|
"""
|
||||||
|
if 'BITSANDBYTES_OVERRIDE_LINEAR' not in os.environ:
|
||||||
|
os.environ['BITSANDBYTES_OVERRIDE_LINEAR'] = '0'
|
||||||
|
if 'BITSANDBYTES_OVERRIDE_EMBEDDING' not in os.environ:
|
||||||
|
os.environ['BITSANDBYTES_OVERRIDE_EMBEDDING'] = '1'
|
||||||
|
if 'BITSANDBYTES_OVERRIDE_ADAM' not in os.environ:
|
||||||
|
os.environ['BITSANDBYTES_OVERRIDE_ADAM'] = '1'
|
||||||
|
if 'BITSANDBYTES_OVERRIDE_ADAMW' not in os.environ:
|
||||||
|
os.environ['BITSANDBYTES_OVERRIDE_ADAMW'] = '1'
|
||||||
|
"""
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
parser = argparse.ArgumentParser()
|
||||||
|
parser.add_argument('-opt', type=str, help='Path to option YAML file.', default='../options/train_vit_latent.yml', nargs='+') # ugh
|
||||||
|
parser.add_argument('--launcher', choices=['none', 'pytorch'], default='none', help='job launcher')
|
||||||
|
args = parser.parse_args()
|
||||||
|
args.opt = " ".join(args.opt) # absolutely disgusting
|
||||||
|
|
||||||
|
with open(args.opt, 'r') as file:
|
||||||
|
opt_config = yaml.safe_load(file)
|
||||||
|
|
||||||
|
if "ext" in opt_config and "bitsandbytes" in opt_config["ext"] and not opt_config["ext"]["bitsandbytes"]:
|
||||||
|
os.environ['BITSANDBYTES_OVERRIDE_LINEAR'] = '0'
|
||||||
|
os.environ['BITSANDBYTES_OVERRIDE_EMBEDDING'] = '0'
|
||||||
|
os.environ['BITSANDBYTES_OVERRIDE_ADAM'] = '0'
|
||||||
|
os.environ['BITSANDBYTES_OVERRIDE_ADAMW'] = '0'
|
||||||
|
|
||||||
# this is some massive kludge that only works if it's called from a shell and not an import/PIP package
|
# this is some massive kludge that only works if it's called from a shell and not an import/PIP package
|
||||||
# it's smart-yet-irritating module-model loader breaks when trying to load something specifically when not from a shell
|
# it's smart-yet-irritating module-model loader breaks when trying to load something specifically when not from a shell
|
||||||
@ -19,6 +45,7 @@ sys.path.insert(0, './dlas/')
|
|||||||
# don't even really bother trying to get DLAS PIP'd
|
# don't even really bother trying to get DLAS PIP'd
|
||||||
# without kludge, it'll have to be accessible as `codes` and not `dlas`
|
# without kludge, it'll have to be accessible as `codes` and not `dlas`
|
||||||
|
|
||||||
|
import torch
|
||||||
from codes import train as tr
|
from codes import train as tr
|
||||||
from utils import util, options as option
|
from utils import util, options as option
|
||||||
|
|
||||||
@ -44,7 +71,7 @@ def train(yaml, launcher='none'):
|
|||||||
print('Disabled distributed training.')
|
print('Disabled distributed training.')
|
||||||
else:
|
else:
|
||||||
opt['dist'] = True
|
opt['dist'] = True
|
||||||
init_dist('nccl')
|
tr.init_dist('nccl')
|
||||||
trainer.world_size = torch.distributed.get_world_size()
|
trainer.world_size = torch.distributed.get_world_size()
|
||||||
trainer.rank = torch.distributed.get_rank()
|
trainer.rank = torch.distributed.get_rank()
|
||||||
torch.cuda.set_device(torch.distributed.get_rank())
|
torch.cuda.set_device(torch.distributed.get_rank())
|
||||||
@ -53,9 +80,14 @@ def train(yaml, launcher='none'):
|
|||||||
trainer.do_training()
|
trainer.do_training()
|
||||||
|
|
||||||
if __name__ == "__main__":
|
if __name__ == "__main__":
|
||||||
parser = argparse.ArgumentParser()
|
# simple check because I'm brain damaged and forgot I can't modify what a module exports by simply changing the booleans that decide what it exports after the fact
|
||||||
parser.add_argument('-opt', type=str, help='Path to option YAML file.', default='../options/train_vit_latent.yml')
|
try:
|
||||||
parser.add_argument('--launcher', choices=['none', 'pytorch'], default='none', help='job launcher')
|
import torch_intermediary
|
||||||
args = parser.parse_args()
|
if torch_intermediary.OVERRIDE_ADAM:
|
||||||
|
print("Using BitsAndBytes ADAMW optimizations")
|
||||||
|
else:
|
||||||
|
print("NOT using BitsAndBytes ADAMW optimizations")
|
||||||
|
except Exception as e:
|
||||||
|
pass
|
||||||
|
|
||||||
train(args.opt, args.launcher)
|
train(args.opt, args.launcher)
|
||||||
1742
src/utils.py
1742
src/utils.py
File diff suppressed because it is too large
Load Diff
550
src/webui.py
550
src/webui.py
@ -16,6 +16,7 @@ from datetime import datetime
|
|||||||
|
|
||||||
import tortoise.api
|
import tortoise.api
|
||||||
from tortoise.utils.audio import get_voice_dir, get_voices
|
from tortoise.utils.audio import get_voice_dir, get_voices
|
||||||
|
from tortoise.utils.device import get_device_count
|
||||||
|
|
||||||
from utils import *
|
from utils import *
|
||||||
|
|
||||||
@ -45,35 +46,40 @@ def run_generation(
|
|||||||
experimental_checkboxes,
|
experimental_checkboxes,
|
||||||
progress=gr.Progress(track_tqdm=True)
|
progress=gr.Progress(track_tqdm=True)
|
||||||
):
|
):
|
||||||
|
if not text:
|
||||||
|
raise gr.Error("Please provide text.")
|
||||||
|
if not voice:
|
||||||
|
raise gr.Error("Please provide a voice.")
|
||||||
|
|
||||||
try:
|
try:
|
||||||
sample, outputs, stats = generate(
|
sample, outputs, stats = generate(
|
||||||
text,
|
text=text,
|
||||||
delimiter,
|
delimiter=delimiter,
|
||||||
emotion,
|
emotion=emotion,
|
||||||
prompt,
|
prompt=prompt,
|
||||||
voice,
|
voice=voice,
|
||||||
mic_audio,
|
mic_audio=mic_audio,
|
||||||
voice_latents_chunks,
|
voice_latents_chunks=voice_latents_chunks,
|
||||||
seed,
|
seed=seed,
|
||||||
candidates,
|
candidates=candidates,
|
||||||
num_autoregressive_samples,
|
num_autoregressive_samples=num_autoregressive_samples,
|
||||||
diffusion_iterations,
|
diffusion_iterations=diffusion_iterations,
|
||||||
temperature,
|
temperature=temperature,
|
||||||
diffusion_sampler,
|
diffusion_sampler=diffusion_sampler,
|
||||||
breathing_room,
|
breathing_room=breathing_room,
|
||||||
cvvp_weight,
|
cvvp_weight=cvvp_weight,
|
||||||
top_p,
|
top_p=top_p,
|
||||||
diffusion_temperature,
|
diffusion_temperature=diffusion_temperature,
|
||||||
length_penalty,
|
length_penalty=length_penalty,
|
||||||
repetition_penalty,
|
repetition_penalty=repetition_penalty,
|
||||||
cond_free_k,
|
cond_free_k=cond_free_k,
|
||||||
experimental_checkboxes,
|
experimental_checkboxes=experimental_checkboxes,
|
||||||
progress
|
progress=progress
|
||||||
)
|
)
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
message = str(e)
|
message = str(e)
|
||||||
if message == "Kill signal detected":
|
if message == "Kill signal detected":
|
||||||
reload_tts()
|
unload_tts()
|
||||||
|
|
||||||
raise gr.Error(message)
|
raise gr.Error(message)
|
||||||
|
|
||||||
@ -82,31 +88,9 @@ def run_generation(
|
|||||||
outputs[0],
|
outputs[0],
|
||||||
gr.update(value=sample, visible=sample is not None),
|
gr.update(value=sample, visible=sample is not None),
|
||||||
gr.update(choices=outputs, value=outputs[0], visible=len(outputs) > 1, interactive=True),
|
gr.update(choices=outputs, value=outputs[0], visible=len(outputs) > 1, interactive=True),
|
||||||
gr.update(visible=len(outputs) > 1),
|
|
||||||
gr.update(value=stats, visible=True),
|
gr.update(value=stats, visible=True),
|
||||||
)
|
)
|
||||||
|
|
||||||
def compute_latents(voice, voice_latents_chunks, progress=gr.Progress(track_tqdm=True)):
|
|
||||||
global tts
|
|
||||||
global args
|
|
||||||
|
|
||||||
if not tts:
|
|
||||||
raise Exception("TTS is uninitialized or still initializing...")
|
|
||||||
|
|
||||||
voice_samples, conditioning_latents = load_voice(voice, load_latents=False)
|
|
||||||
|
|
||||||
if voice_samples is None:
|
|
||||||
return
|
|
||||||
|
|
||||||
conditioning_latents = tts.get_conditioning_latents(voice_samples, return_mels=not args.latents_lean_and_mean, progress=progress, slices=voice_latents_chunks, force_cpu=args.force_cpu_for_conditioning_latents)
|
|
||||||
|
|
||||||
if len(conditioning_latents) == 4:
|
|
||||||
conditioning_latents = (conditioning_latents[0], conditioning_latents[1], conditioning_latents[2], None)
|
|
||||||
|
|
||||||
torch.save(conditioning_latents, f'{get_voice_dir()}/{voice}/cond_latents.pth')
|
|
||||||
|
|
||||||
return voice
|
|
||||||
|
|
||||||
def update_presets(value):
|
def update_presets(value):
|
||||||
PRESETS = {
|
PRESETS = {
|
||||||
'Ultra Fast': {'num_autoregressive_samples': 16, 'diffusion_iterations': 30, 'cond_free': False},
|
'Ultra Fast': {'num_autoregressive_samples': 16, 'diffusion_iterations': 30, 'cond_free': False},
|
||||||
@ -146,6 +130,9 @@ history_headers = {
|
|||||||
"Rep Pen": "repetition_penalty",
|
"Rep Pen": "repetition_penalty",
|
||||||
"Cond-Free K": "cond_free_k",
|
"Cond-Free K": "cond_free_k",
|
||||||
"Time": "time",
|
"Time": "time",
|
||||||
|
"Datetime": "datetime",
|
||||||
|
"Model": "model",
|
||||||
|
"Model Hash": "model_hash",
|
||||||
}
|
}
|
||||||
|
|
||||||
def history_view_results( voice ):
|
def history_view_results( voice ):
|
||||||
@ -164,7 +151,7 @@ def history_view_results( voice ):
|
|||||||
for k in history_headers:
|
for k in history_headers:
|
||||||
v = file
|
v = file
|
||||||
if k != "Name":
|
if k != "Name":
|
||||||
v = metadata[headers[k]]
|
v = metadata[history_headers[k]] if history_headers[k] in metadata else '?'
|
||||||
values.append(v)
|
values.append(v)
|
||||||
|
|
||||||
|
|
||||||
@ -193,15 +180,117 @@ def read_generate_settings_proxy(file, saveAs='.temp'):
|
|||||||
|
|
||||||
return (
|
return (
|
||||||
gr.update(value=j, visible=j is not None),
|
gr.update(value=j, visible=j is not None),
|
||||||
gr.update(visible=j is not None),
|
|
||||||
gr.update(value=latents, visible=latents is not None),
|
gr.update(value=latents, visible=latents is not None),
|
||||||
None if j is None else j['voice']
|
None if j is None else j['voice'],
|
||||||
|
gr.update(visible=j is not None),
|
||||||
)
|
)
|
||||||
|
|
||||||
def prepare_dataset_proxy( voice, language, progress=gr.Progress(track_tqdm=True) ):
|
def prepare_dataset_proxy( voice, language, skip_existings, progress=gr.Progress(track_tqdm=True) ):
|
||||||
return prepare_dataset( get_voices(load_latents=False)[voice], outdir=f"./training/{voice}/", language=language, progress=progress )
|
return prepare_dataset( get_voices(load_latents=False)[voice], outdir=f"./training/{voice}/", language=language, skip_existings=skip_existings, progress=progress )
|
||||||
|
|
||||||
def save_training_settings_proxy( iterations, batch_size, learning_rate, print_rate, save_rate, voice ):
|
def optimize_training_settings_proxy( *args, **kwargs ):
|
||||||
|
tup = optimize_training_settings(*args, **kwargs)
|
||||||
|
|
||||||
|
return (
|
||||||
|
gr.update(value=tup[0]),
|
||||||
|
gr.update(value=tup[1]),
|
||||||
|
gr.update(value=tup[2]),
|
||||||
|
gr.update(value=tup[3]),
|
||||||
|
gr.update(value=tup[4]),
|
||||||
|
gr.update(value=tup[5]),
|
||||||
|
gr.update(value=tup[6]),
|
||||||
|
gr.update(value=tup[7]),
|
||||||
|
"\n".join(tup[8])
|
||||||
|
)
|
||||||
|
|
||||||
|
def import_training_settings_proxy( voice ):
|
||||||
|
indir = f'./training/{voice}/'
|
||||||
|
outdir = f'./training/{voice}-finetune/'
|
||||||
|
|
||||||
|
in_config_path = f"{indir}/train.yaml"
|
||||||
|
out_config_path = None
|
||||||
|
out_configs = []
|
||||||
|
if os.path.isdir(outdir):
|
||||||
|
out_configs = sorted([d[:-5] for d in os.listdir(outdir) if d[-5:] == ".yaml" ])
|
||||||
|
if len(out_configs) > 0:
|
||||||
|
out_config_path = f'{outdir}/{out_configs[-1]}.yaml'
|
||||||
|
|
||||||
|
config_path = out_config_path if out_config_path else in_config_path
|
||||||
|
|
||||||
|
messages = []
|
||||||
|
with open(config_path, 'r') as file:
|
||||||
|
config = yaml.safe_load(file)
|
||||||
|
messages.append(f"Importing from: {config_path}")
|
||||||
|
|
||||||
|
dataset_path = f"./training/{voice}/train.txt"
|
||||||
|
with open(dataset_path, 'r', encoding="utf-8") as f:
|
||||||
|
lines = len(f.readlines())
|
||||||
|
messages.append(f"Basing epoch size to {lines} lines")
|
||||||
|
|
||||||
|
batch_size = config['datasets']['train']['batch_size']
|
||||||
|
gradient_accumulation_size = config['train']['mega_batch_factor']
|
||||||
|
|
||||||
|
iterations = config['train']['niter']
|
||||||
|
steps_per_iteration = int(lines / batch_size)
|
||||||
|
epochs = int(iterations / steps_per_iteration)
|
||||||
|
|
||||||
|
|
||||||
|
learning_rate = config['steps']['gpt_train']['optimizer_params']['lr']
|
||||||
|
text_ce_lr_weight = config['steps']['gpt_train']['losses']['text_ce']['weight']
|
||||||
|
learning_rate_schedule = [ int(x / steps_per_iteration) for x in config['train']['gen_lr_steps'] ]
|
||||||
|
|
||||||
|
|
||||||
|
print_rate = int(config['logger']['print_freq'] / steps_per_iteration)
|
||||||
|
save_rate = int(config['logger']['save_checkpoint_freq'] / steps_per_iteration)
|
||||||
|
|
||||||
|
statedir = f'{outdir}/training_state/' # NOOO STOP MIXING YOUR CASES
|
||||||
|
resumes = []
|
||||||
|
resume_path = None
|
||||||
|
source_model = None
|
||||||
|
|
||||||
|
if "pretrain_model_gpt" in config['path']:
|
||||||
|
source_model = config['path']['pretrain_model_gpt']
|
||||||
|
elif "resume_state" in config['path']:
|
||||||
|
resume_path = config['path']['resume_state']
|
||||||
|
|
||||||
|
|
||||||
|
if os.path.isdir(statedir):
|
||||||
|
resumes = sorted([int(d[:-6]) for d in os.listdir(statedir) if d[-6:] == ".state" ])
|
||||||
|
|
||||||
|
if len(resumes) > 0:
|
||||||
|
resume_path = f'{statedir}/{resumes[-1]}.state'
|
||||||
|
messages.append(f"Latest resume found: {resume_path}")
|
||||||
|
|
||||||
|
|
||||||
|
half_p = config['fp16']
|
||||||
|
bnb = True
|
||||||
|
|
||||||
|
if "ext" in config and "bitsandbytes" in config["ext"]:
|
||||||
|
bnb = config["ext"]["bitsandbytes"]
|
||||||
|
|
||||||
|
workers = config['datasets']['train']['n_workers']
|
||||||
|
|
||||||
|
messages = "\n".join(messages)
|
||||||
|
|
||||||
|
return (
|
||||||
|
epochs,
|
||||||
|
learning_rate,
|
||||||
|
text_ce_lr_weight,
|
||||||
|
learning_rate_schedule,
|
||||||
|
batch_size,
|
||||||
|
gradient_accumulation_size,
|
||||||
|
print_rate,
|
||||||
|
save_rate,
|
||||||
|
resume_path,
|
||||||
|
half_p,
|
||||||
|
bnb,
|
||||||
|
workers,
|
||||||
|
source_model,
|
||||||
|
messages
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
def save_training_settings_proxy( epochs, learning_rate, text_ce_lr_weight, learning_rate_schedule, batch_size, gradient_accumulation_size, print_rate, save_rate, resume_path, half_p, bnb, workers, source_model, voice ):
|
||||||
name = f"{voice}-finetune"
|
name = f"{voice}-finetune"
|
||||||
dataset_name = f"{voice}-train"
|
dataset_name = f"{voice}-train"
|
||||||
dataset_path = f"./training/{voice}/train.txt"
|
dataset_path = f"./training/{voice}/train.txt"
|
||||||
@ -211,17 +300,47 @@ def save_training_settings_proxy( iterations, batch_size, learning_rate, print_r
|
|||||||
with open(dataset_path, 'r', encoding="utf-8") as f:
|
with open(dataset_path, 'r', encoding="utf-8") as f:
|
||||||
lines = len(f.readlines())
|
lines = len(f.readlines())
|
||||||
|
|
||||||
if batch_size > lines:
|
messages = []
|
||||||
print("Batch size is larger than your dataset, clamping...")
|
|
||||||
batch_size = lines
|
|
||||||
|
|
||||||
out_name = f"{voice}/train.yaml"
|
iterations = calc_iterations(epochs=epochs, lines=lines, batch_size=batch_size)
|
||||||
|
messages.append(f"For {epochs} epochs with {lines} lines, iterating for {iterations} steps")
|
||||||
|
|
||||||
return save_training_settings(iterations, batch_size, learning_rate, print_rate, save_rate, name, dataset_name, dataset_path, validation_name, validation_path, out_name )
|
print_rate = int(print_rate * iterations / epochs)
|
||||||
|
save_rate = int(save_rate * iterations / epochs)
|
||||||
|
|
||||||
|
if not learning_rate_schedule:
|
||||||
|
learning_rate_schedule = EPOCH_SCHEDULE
|
||||||
|
elif isinstance(learning_rate_schedule,str):
|
||||||
|
learning_rate_schedule = json.loads(learning_rate_schedule)
|
||||||
|
|
||||||
|
learning_rate_schedule = schedule_learning_rate( iterations / epochs, learning_rate_schedule )
|
||||||
|
|
||||||
|
messages.append(save_training_settings(
|
||||||
|
iterations=iterations,
|
||||||
|
batch_size=batch_size,
|
||||||
|
learning_rate=learning_rate,
|
||||||
|
text_ce_lr_weight=text_ce_lr_weight,
|
||||||
|
learning_rate_schedule=learning_rate_schedule,
|
||||||
|
gradient_accumulation_size=gradient_accumulation_size,
|
||||||
|
print_rate=print_rate,
|
||||||
|
save_rate=save_rate,
|
||||||
|
name=name,
|
||||||
|
dataset_name=dataset_name,
|
||||||
|
dataset_path=dataset_path,
|
||||||
|
validation_name=validation_name,
|
||||||
|
validation_path=validation_path,
|
||||||
|
output_name=f"{voice}/train.yaml",
|
||||||
|
resume_path=resume_path,
|
||||||
|
half_p=half_p,
|
||||||
|
bnb=bnb,
|
||||||
|
workers=workers,
|
||||||
|
source_model=source_model,
|
||||||
|
))
|
||||||
|
return "\n".join(messages)
|
||||||
|
|
||||||
def update_voices():
|
def update_voices():
|
||||||
return (
|
return (
|
||||||
gr.Dropdown.update(choices=get_voice_list()),
|
gr.Dropdown.update(choices=get_voice_list(append_defaults=True)),
|
||||||
gr.Dropdown.update(choices=get_voice_list()),
|
gr.Dropdown.update(choices=get_voice_list()),
|
||||||
gr.Dropdown.update(choices=get_voice_list("./results/")),
|
gr.Dropdown.update(choices=get_voice_list("./results/")),
|
||||||
)
|
)
|
||||||
@ -229,14 +348,6 @@ def update_voices():
|
|||||||
def history_copy_settings( voice, file ):
|
def history_copy_settings( voice, file ):
|
||||||
return import_generate_settings( f"./results/{voice}/{file}" )
|
return import_generate_settings( f"./results/{voice}/{file}" )
|
||||||
|
|
||||||
def update_model_settings( autoregressive_model, whisper_model ):
|
|
||||||
if args.autoregressive_model != autoregressive_model:
|
|
||||||
update_autoregressive_model(autoregressive_model)
|
|
||||||
|
|
||||||
args.whisper_model = whisper_model
|
|
||||||
|
|
||||||
save_args_settings()
|
|
||||||
|
|
||||||
def setup_gradio():
|
def setup_gradio():
|
||||||
global args
|
global args
|
||||||
global ui
|
global ui
|
||||||
@ -257,41 +368,60 @@ def setup_gradio():
|
|||||||
if args.models_from_local_only:
|
if args.models_from_local_only:
|
||||||
os.environ['TRANSFORMERS_OFFLINE']='1'
|
os.environ['TRANSFORMERS_OFFLINE']='1'
|
||||||
|
|
||||||
|
voice_list_with_defaults = get_voice_list(append_defaults=True)
|
||||||
|
voice_list = get_voice_list()
|
||||||
|
result_voices = get_voice_list("./results/")
|
||||||
|
autoregressive_models = get_autoregressive_models()
|
||||||
|
dataset_list = get_dataset_list()
|
||||||
|
|
||||||
with gr.Blocks() as ui:
|
with gr.Blocks() as ui:
|
||||||
with gr.Tab("Generate"):
|
with gr.Tab("Generate"):
|
||||||
with gr.Row():
|
with gr.Row():
|
||||||
with gr.Column():
|
with gr.Column():
|
||||||
text = gr.Textbox(lines=4, label="Prompt")
|
text = gr.Textbox(lines=4, label="Input Prompt")
|
||||||
with gr.Row():
|
with gr.Row():
|
||||||
with gr.Column():
|
with gr.Column():
|
||||||
delimiter = gr.Textbox(lines=1, label="Line Delimiter", placeholder="\\n")
|
delimiter = gr.Textbox(lines=1, label="Line Delimiter", placeholder="\\n")
|
||||||
|
|
||||||
emotion = gr.Radio( ["Happy", "Sad", "Angry", "Disgusted", "Arrogant", "Custom"], value="Custom", label="Emotion", type="value", interactive=True )
|
emotion = gr.Radio( ["Happy", "Sad", "Angry", "Disgusted", "Arrogant", "Custom", "None"], value="None", label="Emotion", type="value", interactive=True )
|
||||||
prompt = gr.Textbox(lines=1, label="Custom Emotion + Prompt (if selected)")
|
prompt = gr.Textbox(lines=1, label="Custom Emotion")
|
||||||
voice = gr.Dropdown(get_voice_list(), label="Voice", type="value")
|
voice = gr.Dropdown(choices=voice_list_with_defaults, label="Voice", type="value", value=voice_list_with_defaults[0]) # it'd be very cash money if gradio was able to default to the first value in the list without this shit
|
||||||
mic_audio = gr.Audio( label="Microphone Source", source="microphone", type="filepath" )
|
mic_audio = gr.Audio( label="Microphone Source", source="microphone", type="filepath", visible=False )
|
||||||
refresh_voices = gr.Button(value="Refresh Voice List")
|
voice_latents_chunks = gr.Slider(label="Voice Chunks", minimum=1, maximum=128, value=1, step=1)
|
||||||
voice_latents_chunks = gr.Slider(label="Voice Chunks", minimum=1, maximum=64, value=1, step=1)
|
with gr.Row():
|
||||||
recompute_voice_latents = gr.Button(value="(Re)Compute Voice Latents")
|
refresh_voices = gr.Button(value="Refresh Voice List")
|
||||||
|
recompute_voice_latents = gr.Button(value="(Re)Compute Voice Latents")
|
||||||
|
|
||||||
|
voice.change(
|
||||||
|
fn=update_baseline_for_latents_chunks,
|
||||||
|
inputs=voice,
|
||||||
|
outputs=voice_latents_chunks
|
||||||
|
)
|
||||||
|
voice.change(
|
||||||
|
fn=lambda value: gr.update(visible=value == "microphone"),
|
||||||
|
inputs=voice,
|
||||||
|
outputs=mic_audio,
|
||||||
|
)
|
||||||
with gr.Column():
|
with gr.Column():
|
||||||
candidates = gr.Slider(value=1, minimum=1, maximum=6, step=1, label="Candidates")
|
candidates = gr.Slider(value=1, minimum=1, maximum=6, step=1, label="Candidates")
|
||||||
seed = gr.Number(value=0, precision=0, label="Seed")
|
seed = gr.Number(value=0, precision=0, label="Seed")
|
||||||
|
|
||||||
preset = gr.Radio( ["Ultra Fast", "Fast", "Standard", "High Quality"], label="Preset", type="value" )
|
preset = gr.Radio( ["Ultra Fast", "Fast", "Standard", "High Quality"], label="Preset", type="value" )
|
||||||
num_autoregressive_samples = gr.Slider(value=128, minimum=0, maximum=512, step=1, label="Samples")
|
num_autoregressive_samples = gr.Slider(value=128, minimum=2, maximum=512, step=1, label="Samples")
|
||||||
diffusion_iterations = gr.Slider(value=128, minimum=0, maximum=512, step=1, label="Iterations")
|
diffusion_iterations = gr.Slider(value=128, minimum=0, maximum=512, step=1, label="Iterations")
|
||||||
|
|
||||||
temperature = gr.Slider(value=0.2, minimum=0, maximum=1, step=0.1, label="Temperature")
|
temperature = gr.Slider(value=0.2, minimum=0, maximum=1, step=0.1, label="Temperature")
|
||||||
breathing_room = gr.Slider(value=8, minimum=1, maximum=32, step=1, label="Pause Size")
|
|
||||||
diffusion_sampler = gr.Radio(
|
|
||||||
["P", "DDIM"], # + ["K_Euler_A", "DPM++2M"],
|
|
||||||
value="P", label="Diffusion Samplers", type="value" )
|
|
||||||
show_experimental_settings = gr.Checkbox(label="Show Experimental Settings")
|
show_experimental_settings = gr.Checkbox(label="Show Experimental Settings")
|
||||||
reset_generation_settings_button = gr.Button(value="Reset to Default")
|
reset_generation_settings_button = gr.Button(value="Reset to Default")
|
||||||
with gr.Column(visible=False) as col:
|
with gr.Column(visible=False) as col:
|
||||||
experimental_column = col
|
experimental_column = col
|
||||||
|
|
||||||
experimental_checkboxes = gr.CheckboxGroup(["Half Precision", "Conditioning-Free"], value=["Conditioning-Free"], label="Experimental Flags")
|
experimental_checkboxes = gr.CheckboxGroup(["Half Precision", "Conditioning-Free"], value=["Conditioning-Free"], label="Experimental Flags")
|
||||||
|
breathing_room = gr.Slider(value=8, minimum=1, maximum=32, step=1, label="Pause Size")
|
||||||
|
diffusion_sampler = gr.Radio(
|
||||||
|
["P", "DDIM"], # + ["K_Euler_A", "DPM++2M"],
|
||||||
|
value="DDIM", label="Diffusion Samplers", type="value"
|
||||||
|
)
|
||||||
cvvp_weight = gr.Slider(value=0, minimum=0, maximum=1, label="CVVP Weight")
|
cvvp_weight = gr.Slider(value=0, minimum=0, maximum=1, label="CVVP Weight")
|
||||||
top_p = gr.Slider(value=0.8, minimum=0, maximum=1, label="Top P")
|
top_p = gr.Slider(value=0.8, minimum=0, maximum=1, label="Top P")
|
||||||
diffusion_temperature = gr.Slider(value=1.0, minimum=0, maximum=1, label="Diffusion Temperature")
|
diffusion_temperature = gr.Slider(value=1.0, minimum=0, maximum=1, label="Diffusion Temperature")
|
||||||
@ -299,25 +429,34 @@ def setup_gradio():
|
|||||||
repetition_penalty = gr.Slider(value=2.0, minimum=0, maximum=8, label="Repetition Penalty")
|
repetition_penalty = gr.Slider(value=2.0, minimum=0, maximum=8, label="Repetition Penalty")
|
||||||
cond_free_k = gr.Slider(value=2.0, minimum=0, maximum=4, label="Conditioning-Free K")
|
cond_free_k = gr.Slider(value=2.0, minimum=0, maximum=4, label="Conditioning-Free K")
|
||||||
with gr.Column():
|
with gr.Column():
|
||||||
submit = gr.Button(value="Generate")
|
with gr.Row():
|
||||||
stop = gr.Button(value="Stop")
|
submit = gr.Button(value="Generate")
|
||||||
|
stop = gr.Button(value="Stop")
|
||||||
|
|
||||||
generation_results = gr.Dataframe(label="Results", headers=["Seed", "Time"], visible=False)
|
generation_results = gr.Dataframe(label="Results", headers=["Seed", "Time"], visible=False)
|
||||||
source_sample = gr.Audio(label="Source Sample", visible=False)
|
source_sample = gr.Audio(label="Source Sample", visible=False)
|
||||||
output_audio = gr.Audio(label="Output")
|
output_audio = gr.Audio(label="Output")
|
||||||
candidates_list = gr.Dropdown(label="Candidates", type="value", visible=False)
|
candidates_list = gr.Dropdown(label="Candidates", type="value", visible=False, choices=[""], value="")
|
||||||
output_pick = gr.Button(value="Select Candidate", visible=False)
|
|
||||||
|
def change_candidate( val ):
|
||||||
|
if not val:
|
||||||
|
return
|
||||||
|
return val
|
||||||
|
|
||||||
|
candidates_list.change(
|
||||||
|
fn=change_candidate,
|
||||||
|
inputs=candidates_list,
|
||||||
|
outputs=output_audio,
|
||||||
|
)
|
||||||
with gr.Tab("History"):
|
with gr.Tab("History"):
|
||||||
with gr.Row():
|
with gr.Row():
|
||||||
with gr.Column():
|
with gr.Column():
|
||||||
history_info = gr.Dataframe(label="Results", headers=list(history_headers.keys()))
|
history_info = gr.Dataframe(label="Results", headers=list(history_headers.keys()))
|
||||||
with gr.Row():
|
with gr.Row():
|
||||||
with gr.Column():
|
with gr.Column():
|
||||||
history_voices = gr.Dropdown(choices=get_voice_list("./results/"), label="Voice", type="value")
|
history_voices = gr.Dropdown(choices=result_voices, label="Voice", type="value", value=result_voices[0] if len(result_voices) > 0 else "")
|
||||||
history_view_results_button = gr.Button(value="View Files")
|
|
||||||
with gr.Column():
|
with gr.Column():
|
||||||
history_results_list = gr.Dropdown(label="Results",type="value", interactive=True)
|
history_results_list = gr.Dropdown(label="Results",type="value", interactive=True, value="")
|
||||||
history_view_result_button = gr.Button(value="View File")
|
|
||||||
with gr.Column():
|
with gr.Column():
|
||||||
history_audio = gr.Audio()
|
history_audio = gr.Audio()
|
||||||
history_copy_settings_button = gr.Button(value="Copy Settings")
|
history_copy_settings_button = gr.Button(value="Copy Settings")
|
||||||
@ -327,17 +466,20 @@ def setup_gradio():
|
|||||||
audio_in = gr.Files(type="file", label="Audio Input", file_types=["audio"])
|
audio_in = gr.Files(type="file", label="Audio Input", file_types=["audio"])
|
||||||
import_voice_name = gr.Textbox(label="Voice Name")
|
import_voice_name = gr.Textbox(label="Voice Name")
|
||||||
import_voice_button = gr.Button(value="Import Voice")
|
import_voice_button = gr.Button(value="Import Voice")
|
||||||
with gr.Column():
|
with gr.Column(visible=False) as col:
|
||||||
metadata_out = gr.JSON(label="Audio Metadata", visible=False)
|
utilities_metadata_column = col
|
||||||
copy_button = gr.Button(value="Copy Settings", visible=False)
|
|
||||||
latents_out = gr.File(type="binary", label="Voice Latents", visible=False)
|
metadata_out = gr.JSON(label="Audio Metadata")
|
||||||
|
copy_button = gr.Button(value="Copy Settings")
|
||||||
|
latents_out = gr.File(type="binary", label="Voice Latents")
|
||||||
with gr.Tab("Training"):
|
with gr.Tab("Training"):
|
||||||
with gr.Tab("Prepare Dataset"):
|
with gr.Tab("Prepare Dataset"):
|
||||||
with gr.Row():
|
with gr.Row():
|
||||||
with gr.Column():
|
with gr.Column():
|
||||||
dataset_settings = [
|
dataset_settings = [
|
||||||
gr.Dropdown( get_voice_list(), label="Dataset Source", type="value" ),
|
gr.Dropdown( choices=voice_list, label="Dataset Source", type="value", value=voice_list[0] if len(voice_list) > 0 else "" ),
|
||||||
gr.Textbox(label="Language", placeholder="English")
|
gr.Textbox(label="Language", value="en"),
|
||||||
|
gr.Checkbox(label="Skip Already Transcribed", value=False)
|
||||||
]
|
]
|
||||||
prepare_dataset_button = gr.Button(value="Prepare")
|
prepare_dataset_button = gr.Button(value="Prepare")
|
||||||
with gr.Column():
|
with gr.Column():
|
||||||
@ -346,38 +488,78 @@ def setup_gradio():
|
|||||||
with gr.Row():
|
with gr.Row():
|
||||||
with gr.Column():
|
with gr.Column():
|
||||||
training_settings = [
|
training_settings = [
|
||||||
gr.Slider(label="Iterations", minimum=0, maximum=5000, value=500),
|
gr.Number(label="Epochs", value=500, precision=0),
|
||||||
gr.Slider(label="Batch Size", minimum=2, maximum=128, value=64),
|
|
||||||
gr.Slider(label="Learning Rate", value=1e-5, minimum=0, maximum=1e-4, step=1e-6),
|
|
||||||
gr.Number(label="Print Frequency", value=50),
|
|
||||||
gr.Number(label="Save Frequency", value=50),
|
|
||||||
]
|
]
|
||||||
dataset_list = gr.Dropdown( get_dataset_list(), label="Dataset", type="value" )
|
with gr.Row():
|
||||||
|
with gr.Column():
|
||||||
|
training_settings = training_settings + [
|
||||||
|
gr.Slider(label="Learning Rate", value=1e-5, minimum=0, maximum=1e-4, step=1e-6),
|
||||||
|
gr.Slider(label="Text_CE LR Ratio", value=0.01, minimum=0, maximum=1),
|
||||||
|
]
|
||||||
|
training_settings = training_settings + [
|
||||||
|
gr.Textbox(label="Learning Rate Schedule", placeholder=str(EPOCH_SCHEDULE)),
|
||||||
|
]
|
||||||
|
with gr.Row():
|
||||||
|
training_settings = training_settings + [
|
||||||
|
gr.Number(label="Batch Size", value=128, precision=0),
|
||||||
|
gr.Number(label="Gradient Accumulation Size", value=4, precision=0),
|
||||||
|
]
|
||||||
|
with gr.Row():
|
||||||
|
training_settings = training_settings + [
|
||||||
|
gr.Number(label="Print Frequency (in epochs)", value=5, precision=0),
|
||||||
|
gr.Number(label="Save Frequency (in epochs)", value=5, precision=0),
|
||||||
|
]
|
||||||
training_settings = training_settings + [
|
training_settings = training_settings + [
|
||||||
dataset_list
|
gr.Textbox(label="Resume State Path", placeholder="./training/${voice}-finetune/training_state/${last_state}.state"),
|
||||||
]
|
]
|
||||||
refresh_dataset_list = gr.Button(value="Refresh Dataset List")
|
|
||||||
"""
|
with gr.Row():
|
||||||
training_settings = training_settings + [
|
training_halfp = gr.Checkbox(label="Half Precision", value=args.training_default_halfp)
|
||||||
gr.Textbox(label="Training Name", placeholder="finetune"),
|
training_bnb = gr.Checkbox(label="BitsAndBytes", value=args.training_default_bnb)
|
||||||
gr.Textbox(label="Dataset Name", placeholder="finetune"),
|
|
||||||
gr.Textbox(label="Dataset Path", placeholder="./training/finetune/train.txt"),
|
training_workers = gr.Number(label="Worker Processes", value=2, precision=0)
|
||||||
gr.Textbox(label="Validation Name", placeholder="finetune"),
|
|
||||||
gr.Textbox(label="Validation Path", placeholder="./training/finetune/train.txt"),
|
source_model = gr.Dropdown( choices=autoregressive_models, label="Source Model", type="value", value=autoregressive_models[0] )
|
||||||
]
|
dataset_list_dropdown = gr.Dropdown( choices=dataset_list, label="Dataset", type="value", value=dataset_list[0] if len(dataset_list) else "" )
|
||||||
"""
|
training_settings = training_settings + [ training_halfp, training_bnb, training_workers, source_model, dataset_list_dropdown ]
|
||||||
|
|
||||||
|
with gr.Row():
|
||||||
|
refresh_dataset_list = gr.Button(value="Refresh Dataset List")
|
||||||
|
import_dataset_button = gr.Button(value="Reuse/Import Dataset")
|
||||||
with gr.Column():
|
with gr.Column():
|
||||||
save_yaml_output = gr.TextArea(label="Console Output", interactive=False, max_lines=8)
|
save_yaml_output = gr.TextArea(label="Console Output", interactive=False, max_lines=8)
|
||||||
save_yaml_button = gr.Button(value="Save Training Configuration")
|
with gr.Row():
|
||||||
|
optimize_yaml_button = gr.Button(value="Validate Training Configuration")
|
||||||
|
save_yaml_button = gr.Button(value="Save Training Configuration")
|
||||||
with gr.Tab("Run Training"):
|
with gr.Tab("Run Training"):
|
||||||
with gr.Row():
|
with gr.Row():
|
||||||
with gr.Column():
|
with gr.Column():
|
||||||
training_configs = gr.Dropdown(label="Training Configuration", choices=get_training_list())
|
training_configs = gr.Dropdown(label="Training Configuration", choices=get_training_list())
|
||||||
refresh_configs = gr.Button(value="Refresh Configurations")
|
with gr.Row():
|
||||||
start_training_button = gr.Button(value="Train")
|
refresh_configs = gr.Button(value="Refresh Configurations")
|
||||||
stop_training_button = gr.Button(value="Stop")
|
|
||||||
|
training_loss_graph = gr.LinePlot(label="Training Metrics",
|
||||||
|
x="step",
|
||||||
|
y="value",
|
||||||
|
title="Training Metrics",
|
||||||
|
color="type",
|
||||||
|
tooltip=['step', 'value', 'type'],
|
||||||
|
width=600,
|
||||||
|
height=350,
|
||||||
|
)
|
||||||
|
view_losses = gr.Button(value="View Losses")
|
||||||
|
|
||||||
with gr.Column():
|
with gr.Column():
|
||||||
training_output = gr.TextArea(label="Console Output", interactive=False, max_lines=8)
|
training_output = gr.TextArea(label="Console Output", interactive=False, max_lines=8)
|
||||||
|
verbose_training = gr.Checkbox(label="Verbose Console Output", value=True)
|
||||||
|
|
||||||
|
with gr.Row():
|
||||||
|
training_keep_x_past_datasets = gr.Slider(label="Keep X Previous States", minimum=0, maximum=8, value=0, step=1)
|
||||||
|
training_gpu_count = gr.Number(label="GPUs", value=get_device_count())
|
||||||
|
with gr.Row():
|
||||||
|
start_training_button = gr.Button(value="Train")
|
||||||
|
stop_training_button = gr.Button(value="Stop")
|
||||||
|
reconnect_training_button = gr.Button(value="Reconnect")
|
||||||
with gr.Tab("Settings"):
|
with gr.Tab("Settings"):
|
||||||
with gr.Row():
|
with gr.Row():
|
||||||
exec_inputs = []
|
exec_inputs = []
|
||||||
@ -390,31 +572,59 @@ def setup_gradio():
|
|||||||
gr.Checkbox(label="Low VRAM", value=args.low_vram),
|
gr.Checkbox(label="Low VRAM", value=args.low_vram),
|
||||||
gr.Checkbox(label="Embed Output Metadata", value=args.embed_output_metadata),
|
gr.Checkbox(label="Embed Output Metadata", value=args.embed_output_metadata),
|
||||||
gr.Checkbox(label="Slimmer Computed Latents", value=args.latents_lean_and_mean),
|
gr.Checkbox(label="Slimmer Computed Latents", value=args.latents_lean_and_mean),
|
||||||
gr.Checkbox(label="Voice Fixer", value=args.voice_fixer),
|
gr.Checkbox(label="Use Voice Fixer on Generated Output", value=args.voice_fixer),
|
||||||
gr.Checkbox(label="Use CUDA for Voice Fixer", value=args.voice_fixer_use_cuda),
|
gr.Checkbox(label="Use CUDA for Voice Fixer", value=args.voice_fixer_use_cuda),
|
||||||
gr.Checkbox(label="Force CPU for Conditioning Latents", value=args.force_cpu_for_conditioning_latents),
|
gr.Checkbox(label="Force CPU for Conditioning Latents", value=args.force_cpu_for_conditioning_latents),
|
||||||
gr.Checkbox(label="Defer TTS Load", value=args.defer_tts_load),
|
gr.Checkbox(label="Do Not Load TTS On Startup", value=args.defer_tts_load),
|
||||||
|
gr.Checkbox(label="Delete Non-Final Output", value=args.prune_nonfinal_outputs),
|
||||||
|
gr.Checkbox(label="Use BigVGAN Vocoder", value=args.use_bigvgan_vocoder),
|
||||||
gr.Textbox(label="Device Override", value=args.device_override),
|
gr.Textbox(label="Device Override", value=args.device_override),
|
||||||
]
|
]
|
||||||
with gr.Column():
|
with gr.Column():
|
||||||
exec_inputs = exec_inputs + [
|
exec_inputs = exec_inputs + [
|
||||||
gr.Number(label="Sample Batch Size", precision=0, value=args.sample_batch_size),
|
gr.Number(label="Sample Batch Size", precision=0, value=args.sample_batch_size),
|
||||||
gr.Number(label="Concurrency Count", precision=0, value=args.concurrency_count),
|
gr.Number(label="Gradio Concurrency Count", precision=0, value=args.concurrency_count),
|
||||||
gr.Number(label="Ouptut Sample Rate", precision=0, value=args.output_sample_rate),
|
gr.Number(label="Auto-Calculate Voice Chunk Duration (in seconds)", precision=0, value=args.autocalculate_voice_chunk_duration_size),
|
||||||
gr.Slider(label="Ouptut Volume", minimum=0, maximum=2, value=args.output_volume),
|
gr.Slider(label="Output Volume", minimum=0, maximum=2, value=args.output_volume),
|
||||||
]
|
]
|
||||||
|
|
||||||
autoregressive_model_dropdown = gr.Dropdown(get_autoregressive_models(), label="Autoregressive Model", value=args.autoregressive_model)
|
autoregressive_model_dropdown = gr.Dropdown(choices=autoregressive_models, label="Autoregressive Model", value=args.autoregressive_model if args.autoregressive_model else autoregressive_models[0])
|
||||||
whisper_model_dropdown = gr.Dropdown(["tiny", "tiny.en", "base", "base.en", "small", "small.en", "medium", "medium.en", "large"], label="Whisper Model", value=args.whisper_model)
|
|
||||||
save_settings_button = gr.Button(value="Save Settings")
|
whisper_backend = gr.Dropdown(WHISPER_BACKENDS, label="Whisper Backends", value=args.whisper_backend)
|
||||||
|
whisper_model_dropdown = gr.Dropdown(WHISPER_MODELS, label="Whisper Model", value=args.whisper_model)
|
||||||
|
|
||||||
|
exec_inputs = exec_inputs + [ autoregressive_model_dropdown, whisper_backend, whisper_model_dropdown, training_halfp, training_bnb ]
|
||||||
|
|
||||||
gr.Button(value="Check for Updates").click(check_for_updates)
|
with gr.Row():
|
||||||
gr.Button(value="(Re)Load TTS").click(reload_tts)
|
autoregressive_models_update_button = gr.Button(value="Refresh Model List")
|
||||||
|
gr.Button(value="Check for Updates").click(check_for_updates)
|
||||||
|
gr.Button(value="(Re)Load TTS").click(
|
||||||
|
reload_tts,
|
||||||
|
inputs=autoregressive_model_dropdown,
|
||||||
|
outputs=None
|
||||||
|
)
|
||||||
|
# kill_button = gr.Button(value="Close UI")
|
||||||
|
|
||||||
|
def update_model_list_proxy( val ):
|
||||||
|
autoregressive_models = get_autoregressive_models()
|
||||||
|
if val not in autoregressive_models:
|
||||||
|
val = autoregressive_models[0]
|
||||||
|
return gr.update( choices=autoregressive_models, value=val )
|
||||||
|
|
||||||
|
autoregressive_models_update_button.click(
|
||||||
|
update_model_list_proxy,
|
||||||
|
inputs=autoregressive_model_dropdown,
|
||||||
|
outputs=autoregressive_model_dropdown,
|
||||||
|
)
|
||||||
|
|
||||||
for i in exec_inputs:
|
for i in exec_inputs:
|
||||||
i.change( fn=update_args, inputs=exec_inputs )
|
i.change( fn=update_args, inputs=exec_inputs )
|
||||||
|
|
||||||
# console_output = gr.TextArea(label="Console Output", interactive=False, max_lines=8)
|
autoregressive_model_dropdown.change(
|
||||||
|
fn=update_autoregressive_model,
|
||||||
|
inputs=autoregressive_model_dropdown,
|
||||||
|
outputs=None
|
||||||
|
)
|
||||||
|
|
||||||
input_settings = [
|
input_settings = [
|
||||||
text,
|
text,
|
||||||
@ -440,7 +650,7 @@ def setup_gradio():
|
|||||||
experimental_checkboxes,
|
experimental_checkboxes,
|
||||||
]
|
]
|
||||||
|
|
||||||
history_view_results_button.click(
|
history_voices.change(
|
||||||
fn=history_view_results,
|
fn=history_view_results,
|
||||||
inputs=history_voices,
|
inputs=history_voices,
|
||||||
outputs=[
|
outputs=[
|
||||||
@ -448,7 +658,7 @@ def setup_gradio():
|
|||||||
history_results_list,
|
history_results_list,
|
||||||
]
|
]
|
||||||
)
|
)
|
||||||
history_view_result_button.click(
|
history_results_list.change(
|
||||||
fn=lambda voice, file: f"./results/{voice}/{file}",
|
fn=lambda voice, file: f"./results/{voice}/{file}",
|
||||||
inputs=[
|
inputs=[
|
||||||
history_voices,
|
history_voices,
|
||||||
@ -461,9 +671,9 @@ def setup_gradio():
|
|||||||
inputs=audio_in,
|
inputs=audio_in,
|
||||||
outputs=[
|
outputs=[
|
||||||
metadata_out,
|
metadata_out,
|
||||||
copy_button,
|
|
||||||
latents_out,
|
latents_out,
|
||||||
import_voice_name
|
import_voice_name,
|
||||||
|
utilities_metadata_column,
|
||||||
]
|
]
|
||||||
)
|
)
|
||||||
|
|
||||||
@ -496,9 +706,10 @@ def setup_gradio():
|
|||||||
outputs=voice,
|
outputs=voice,
|
||||||
)
|
)
|
||||||
|
|
||||||
prompt.change(fn=lambda value: gr.update(value="Custom"),
|
emotion.change(
|
||||||
inputs=prompt,
|
fn=lambda value: gr.update(visible=value == "Custom"),
|
||||||
outputs=emotion
|
inputs=emotion,
|
||||||
|
outputs=prompt
|
||||||
)
|
)
|
||||||
mic_audio.change(fn=lambda value: gr.update(value="microphone"),
|
mic_audio.change(fn=lambda value: gr.update(value="microphone"),
|
||||||
inputs=mic_audio,
|
inputs=mic_audio,
|
||||||
@ -514,20 +725,15 @@ def setup_gradio():
|
|||||||
]
|
]
|
||||||
)
|
)
|
||||||
|
|
||||||
output_pick.click(
|
|
||||||
lambda x: x,
|
|
||||||
inputs=candidates_list,
|
|
||||||
outputs=output_audio,
|
|
||||||
)
|
|
||||||
|
|
||||||
submit.click(
|
submit.click(
|
||||||
lambda: (gr.update(visible=False), gr.update(visible=False), gr.update(visible=False), gr.update(visible=False)),
|
lambda: (gr.update(visible=False), gr.update(visible=False), gr.update(visible=False)),
|
||||||
outputs=[source_sample, candidates_list, output_pick, generation_results],
|
outputs=[source_sample, candidates_list, generation_results],
|
||||||
)
|
)
|
||||||
|
|
||||||
submit_event = submit.click(run_generation,
|
submit_event = submit.click(run_generation,
|
||||||
inputs=input_settings,
|
inputs=input_settings,
|
||||||
outputs=[output_audio, source_sample, candidates_list, output_pick, generation_results],
|
outputs=[output_audio, source_sample, candidates_list, generation_results],
|
||||||
|
api_name="generate",
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
@ -556,13 +762,45 @@ def setup_gradio():
|
|||||||
outputs=training_configs
|
outputs=training_configs
|
||||||
)
|
)
|
||||||
start_training_button.click(run_training,
|
start_training_button.click(run_training,
|
||||||
inputs=training_configs,
|
inputs=[
|
||||||
outputs=training_output #console_output
|
training_configs,
|
||||||
|
verbose_training,
|
||||||
|
training_gpu_count,
|
||||||
|
training_keep_x_past_datasets,
|
||||||
|
],
|
||||||
|
outputs=[
|
||||||
|
training_output,
|
||||||
|
],
|
||||||
)
|
)
|
||||||
|
training_output.change(
|
||||||
|
fn=update_training_dataplot,
|
||||||
|
inputs=None,
|
||||||
|
outputs=[
|
||||||
|
training_loss_graph,
|
||||||
|
],
|
||||||
|
show_progress=False,
|
||||||
|
)
|
||||||
|
|
||||||
|
view_losses.click(
|
||||||
|
fn=update_training_dataplot,
|
||||||
|
inputs=[
|
||||||
|
training_configs
|
||||||
|
],
|
||||||
|
outputs=[
|
||||||
|
training_loss_graph,
|
||||||
|
],
|
||||||
|
)
|
||||||
|
|
||||||
stop_training_button.click(stop_training,
|
stop_training_button.click(stop_training,
|
||||||
inputs=None,
|
inputs=None,
|
||||||
outputs=training_output #console_output
|
outputs=training_output #console_output
|
||||||
)
|
)
|
||||||
|
reconnect_training_button.click(reconnect_training,
|
||||||
|
inputs=[
|
||||||
|
verbose_training,
|
||||||
|
],
|
||||||
|
outputs=training_output #console_output
|
||||||
|
)
|
||||||
prepare_dataset_button.click(
|
prepare_dataset_button.click(
|
||||||
prepare_dataset_proxy,
|
prepare_dataset_proxy,
|
||||||
inputs=dataset_settings,
|
inputs=dataset_settings,
|
||||||
@ -571,20 +809,32 @@ def setup_gradio():
|
|||||||
refresh_dataset_list.click(
|
refresh_dataset_list.click(
|
||||||
lambda: gr.update(choices=get_dataset_list()),
|
lambda: gr.update(choices=get_dataset_list()),
|
||||||
inputs=None,
|
inputs=None,
|
||||||
outputs=dataset_list,
|
outputs=dataset_list_dropdown,
|
||||||
|
)
|
||||||
|
optimize_yaml_button.click(optimize_training_settings_proxy,
|
||||||
|
inputs=training_settings,
|
||||||
|
outputs=training_settings[1:9] + [save_yaml_output] #console_output
|
||||||
|
)
|
||||||
|
import_dataset_button.click(import_training_settings_proxy,
|
||||||
|
inputs=dataset_list_dropdown,
|
||||||
|
outputs=training_settings[:13] + [save_yaml_output] #console_output
|
||||||
)
|
)
|
||||||
save_yaml_button.click(save_training_settings_proxy,
|
save_yaml_button.click(save_training_settings_proxy,
|
||||||
inputs=training_settings,
|
inputs=training_settings,
|
||||||
outputs=save_yaml_output #console_output
|
outputs=save_yaml_output #console_output
|
||||||
)
|
)
|
||||||
|
|
||||||
save_settings_button.click(update_model_settings,
|
"""
|
||||||
inputs=[
|
def kill_process():
|
||||||
autoregressive_model_dropdown,
|
ui.close()
|
||||||
whisper_model_dropdown,
|
exit()
|
||||||
],
|
|
||||||
|
kill_button.click(
|
||||||
|
kill_process,
|
||||||
|
inputs=None,
|
||||||
outputs=None
|
outputs=None
|
||||||
)
|
)
|
||||||
|
"""
|
||||||
|
|
||||||
if os.path.isfile('./config/generate.json'):
|
if os.path.isfile('./config/generate.json'):
|
||||||
ui.load(import_generate_settings, inputs=None, outputs=input_settings)
|
ui.load(import_generate_settings, inputs=None, outputs=input_settings)
|
||||||
@ -592,7 +842,7 @@ def setup_gradio():
|
|||||||
if args.check_for_updates:
|
if args.check_for_updates:
|
||||||
ui.load(check_for_updates)
|
ui.load(check_for_updates)
|
||||||
|
|
||||||
stop.click(fn=cancel_generate, inputs=None, outputs=None, cancels=[submit_event])
|
stop.click(fn=cancel_generate, inputs=None, outputs=None)
|
||||||
|
|
||||||
|
|
||||||
ui.queue(concurrency_count=args.concurrency_count)
|
ui.queue(concurrency_count=args.concurrency_count)
|
||||||
|
|||||||
@ -1,5 +1,4 @@
|
|||||||
call .\venv\Scripts\activate.bat
|
call .\venv\Scripts\activate.bat
|
||||||
set PATH=.\bin\;%PATH%
|
set PATH=.\bin\;%PATH%
|
||||||
python .\src\main.py
|
python .\src\main.py %*
|
||||||
deactivate
|
|
||||||
pause
|
pause
|
||||||
4
start.sh
4
start.sh
@ -1,4 +1,6 @@
|
|||||||
#!/bin/bash
|
#!/bin/bash
|
||||||
|
if [ ! -d "venv" ]; then ./setup-guided.sh; fi
|
||||||
|
|
||||||
source ./venv/bin/activate
|
source ./venv/bin/activate
|
||||||
python3 ./src/main.py
|
python3 ./src/main.py "$@"
|
||||||
deactivate
|
deactivate
|
||||||
|
|||||||
1
tortoise-tts
Submodule
1
tortoise-tts
Submodule
@ -0,0 +1 @@
|
|||||||
|
Subproject commit 6fcd8c604f066e4e346da522bd14e6670395025f
|
||||||
@ -1,4 +1,4 @@
|
|||||||
call .\venv\Scripts\activate.bat
|
call .\venv\Scripts\activate.bat
|
||||||
python ./src/train.py -opt "%1"
|
python ./src/train.py -opt "%1"
|
||||||
deactivate
|
pause
|
||||||
pause
|
deactivate
|
||||||
69
train.ipynb
69
train.ipynb
@ -1,69 +0,0 @@
|
|||||||
{
|
|
||||||
"nbformat":4,
|
|
||||||
"nbformat_minor":0,
|
|
||||||
"metadata":{
|
|
||||||
"colab":{
|
|
||||||
"private_outputs":true,
|
|
||||||
"provenance":[
|
|
||||||
|
|
||||||
]
|
|
||||||
},
|
|
||||||
"kernelspec":{
|
|
||||||
"name":"python3",
|
|
||||||
"display_name":"Python 3"
|
|
||||||
},
|
|
||||||
"language_info":{
|
|
||||||
"name":"python"
|
|
||||||
},
|
|
||||||
"accelerator":"GPU",
|
|
||||||
"gpuClass":"standard"
|
|
||||||
},
|
|
||||||
"cells":[
|
|
||||||
{
|
|
||||||
"cell_type":"code",
|
|
||||||
"execution_count":null,
|
|
||||||
"metadata":{
|
|
||||||
"id":"AaKpV3rCI3Eo"
|
|
||||||
},
|
|
||||||
"outputs":[
|
|
||||||
|
|
||||||
],
|
|
||||||
"source":[
|
|
||||||
"!git clone https://git.ecker.tech/mrq/DL-Art-School\n",
|
|
||||||
"%cd DL-Art-School\n",
|
|
||||||
"!rm -r experiments\n",
|
|
||||||
"!pip install -r requirements.txt"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type":"code",
|
|
||||||
"source":[
|
|
||||||
"from google.colab import drive\n",
|
|
||||||
"drive.mount('/content/drive')",
|
|
||||||
"%cd /content/DL-Art-School/\n",
|
|
||||||
"!ln -s /content/drive/MyDrive/experiments/\n",
|
|
||||||
],
|
|
||||||
"metadata":{
|
|
||||||
"id":"8eV92cjGI4XL"
|
|
||||||
},
|
|
||||||
"execution_count":null,
|
|
||||||
"outputs":[
|
|
||||||
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type":"code",
|
|
||||||
"source":[
|
|
||||||
"%cd /content/DL-Art-School/\n",
|
|
||||||
"!python ./codes/train.py -opt ./experiments/ar.yml"
|
|
||||||
],
|
|
||||||
"metadata":{
|
|
||||||
"id":"7lcRGqglX2FC"
|
|
||||||
},
|
|
||||||
"execution_count":null,
|
|
||||||
"outputs":[
|
|
||||||
|
|
||||||
]
|
|
||||||
}
|
|
||||||
]
|
|
||||||
}
|
|
||||||
11
train.sh
11
train.sh
@ -1,4 +1,13 @@
|
|||||||
#!/bin/bash
|
#!/bin/bash
|
||||||
source ./venv/bin/activate
|
source ./venv/bin/activate
|
||||||
python3 ./src/train.py -opt "$1"
|
|
||||||
|
GPUS=$1
|
||||||
|
CONFIG=$2
|
||||||
|
PORT=1234
|
||||||
|
|
||||||
|
if (( $GPUS > 1 )); then
|
||||||
|
torchrun --nproc_per_node=$GPUS --master_port=$PORT ./src/train.py -opt "$CONFIG" --launcher=pytorch
|
||||||
|
else
|
||||||
|
python3 ./src/train.py -opt "$CONFIG"
|
||||||
|
fi
|
||||||
deactivate
|
deactivate
|
||||||
|
|||||||
@ -1,3 +1,15 @@
|
|||||||
git fetch --all
|
git fetch --all
|
||||||
git reset --hard origin/master
|
git reset --hard origin/master
|
||||||
call .\update.bat
|
call .\update.bat
|
||||||
|
|
||||||
|
python -m venv venv
|
||||||
|
call .\venv\Scripts\activate.bat
|
||||||
|
|
||||||
|
python -m pip install --upgrade pip
|
||||||
|
python -m pip install -U -r .\requirements.txt
|
||||||
|
python -m pip install -U -r .\tortoise-tts\requirements.txt
|
||||||
|
python -m pip install -U -e .\tortoise-tts
|
||||||
|
python -m pip install -U -r .\dlas\requirements.txt
|
||||||
|
|
||||||
|
pause
|
||||||
|
deactivate
|
||||||
@ -1,4 +1,17 @@
|
|||||||
#!/bin/bash
|
#!/bin/bash
|
||||||
git fetch --all
|
git fetch --all
|
||||||
git reset --hard origin/master
|
git reset --hard origin/master
|
||||||
./update.sh
|
|
||||||
|
./update.sh
|
||||||
|
|
||||||
|
# force install requirements
|
||||||
|
python3 -m venv venv
|
||||||
|
source ./venv/bin/activate
|
||||||
|
|
||||||
|
python3 -m pip install --upgrade pip
|
||||||
|
python3 -m pip install -r ./requirements.txt
|
||||||
|
python3 -m pip install -r ./tortoise-tts/requirements.txt
|
||||||
|
python3 -m pip install -e ./tortoise-tts
|
||||||
|
python3 -m pip install -r ./dlas/requirements.txt
|
||||||
|
|
||||||
|
deactivate
|
||||||
13
update.bat
13
update.bat
@ -1,13 +1,2 @@
|
|||||||
git pull
|
git pull
|
||||||
python -m venv venv
|
git submodule update --remote
|
||||||
call .\venv\Scripts\activate.bat
|
|
||||||
python -m pip install --upgrade pip
|
|
||||||
python -m pip install -r ./requirements.txt
|
|
||||||
python -m pip install -r ./dlas/requirements.txt
|
|
||||||
|
|
||||||
cd dlas
|
|
||||||
git pull
|
|
||||||
cd ..
|
|
||||||
|
|
||||||
deactivate
|
|
||||||
pause
|
|
||||||
11
update.sh
11
update.sh
@ -1,13 +1,8 @@
|
|||||||
#!/bin/bash
|
#!/bin/bash
|
||||||
git pull
|
git pull
|
||||||
python -m venv venv
|
git submodule update --remote
|
||||||
source ./venv/bin/activate
|
|
||||||
python -m pip install --upgrade pip
|
|
||||||
python -m pip install -r ./requirements.txt
|
|
||||||
python -m pip install -r ./dlas/requirements.txt
|
|
||||||
|
|
||||||
cd dlas
|
if python -m pip show whispercpp &>/dev/null; then python -m pip install -U git+https://git.ecker.tech/lightmare/whispercpp.py; fi
|
||||||
git pull
|
if python -m pip show whisperx &>/dev/null; then python -m pip install -U git+https://github.com/m-bain/whisperx.git; fi
|
||||||
cd ..
|
|
||||||
|
|
||||||
deactivate
|
deactivate
|
||||||
Loading…
Reference in New Issue
Block a user