updated setup script

master
mrq 2023-10-06 20:08:28 +07:00
parent 1fd91b6437
commit 2deb995cc9
3 changed files with 20 additions and 20 deletions

@ -13,12 +13,11 @@ dataset:
workers: 2
cache: True
phones_range: [4, 256]
duration_range: [1.0, 16.0]
min_utterances: 32
phones_range: [4, 512]
duration_range: [1.0, 32.0]
random_utterance: 1.0
max_prompts: 6
max_prompts: 3
prompt_duration: 6.0
sample_type: speaker
@ -31,27 +30,22 @@ models:
_models:
- name: "ar+nar"
size: "double"
size: "full"
resp_levels: 8
prom_levels: 8
tasks: 8
arch_type: "retnet"
training: True
version: 2
version: 3
hyperparameters:
batch_size: 8
gradient_accumulation_steps: 16
gradient_accumulation_steps: 32
gradient_clipping: 100
# prodigyopt is nicer, but requires even more VRAM
#optimizer: Prodigy
#learning_rate: 1.0 # e-4
optimizer: AdamW
learning_rate: 1.0e-4
optimizer: Prodigy
torch_optimizer: True
learning_rate: 0.0625
scheduler_type: ""
#scheduler_type: OneCycle
@ -118,8 +112,12 @@ inference:
use_vocos: True
normalize: False
weight_dtype: bfloat16
amp: False
bitsandbytes:
enabled: False
injects: False
linear: False
embedding: False
injects: True
linear: True
embedding: True

@ -1,6 +1,8 @@
#!/bin/bash
python3 -m venv venv
source ./venv/bin/activate
pip3 install --pre torch torchvision torchaudio --index-url https://download.pytorch.org/whl/nightly/cu121
pip3 install -e .
mkdir -p ./training/valle/ckpt/ar+nar-retnet-8/

@ -389,9 +389,9 @@ class Base(nn.Module):
dropout=p_dropout,
checkpoint_activations=self.activation_checkpointing,
activation_fn="gelu",
use_layernorm=True,
use_biases=True,
use_glu=False,
use_layernorm=True, # self.version < 3,
use_biases=True, # self.version < 3,
use_glu=False, # self.version >= 3,
chunkwise_recurrent=self.causal and self.recurrent_chunk_size > 0,
recurrent_chunkwise_size=self.recurrent_chunk_size if self.causal else 0,