updated setup script

This commit is contained in:
mrq 2023-10-06 20:08:28 -05:00
parent 1fd91b6437
commit 2deb995cc9
3 changed files with 20 additions and 20 deletions

View File

@ -13,12 +13,11 @@ dataset:
workers: 2 workers: 2
cache: True cache: True
phones_range: [4, 256] phones_range: [4, 512]
duration_range: [1.0, 16.0] duration_range: [1.0, 32.0]
min_utterances: 32
random_utterance: 1.0 random_utterance: 1.0
max_prompts: 6 max_prompts: 3
prompt_duration: 6.0 prompt_duration: 6.0
sample_type: speaker sample_type: speaker
@ -31,27 +30,22 @@ models:
_models: _models:
- name: "ar+nar" - name: "ar+nar"
size: "double" size: "full"
resp_levels: 8 resp_levels: 8
prom_levels: 8 prom_levels: 8
tasks: 8 tasks: 8
arch_type: "retnet" arch_type: "retnet"
training: True training: True
version: 2 version: 3
hyperparameters: hyperparameters:
batch_size: 8 batch_size: 8
gradient_accumulation_steps: 16 gradient_accumulation_steps: 32
gradient_clipping: 100 gradient_clipping: 100
# prodigyopt is nicer, but requires even more VRAM optimizer: Prodigy
#optimizer: Prodigy
#learning_rate: 1.0 # e-4
optimizer: AdamW
learning_rate: 1.0e-4
torch_optimizer: True torch_optimizer: True
learning_rate: 0.0625
scheduler_type: "" scheduler_type: ""
#scheduler_type: OneCycle #scheduler_type: OneCycle
@ -118,8 +112,12 @@ inference:
use_vocos: True use_vocos: True
normalize: False normalize: False
weight_dtype: bfloat16
amp: False
bitsandbytes: bitsandbytes:
enabled: False enabled: False
injects: False injects: True
linear: False linear: True
embedding: False embedding: True

View File

@ -1,6 +1,8 @@
#!/bin/bash #!/bin/bash
python3 -m venv venv python3 -m venv venv
source ./venv/bin/activate
pip3 install --pre torch torchvision torchaudio --index-url https://download.pytorch.org/whl/nightly/cu121
pip3 install -e . pip3 install -e .
mkdir -p ./training/valle/ckpt/ar+nar-retnet-8/ mkdir -p ./training/valle/ckpt/ar+nar-retnet-8/

View File

@ -389,9 +389,9 @@ class Base(nn.Module):
dropout=p_dropout, dropout=p_dropout,
checkpoint_activations=self.activation_checkpointing, checkpoint_activations=self.activation_checkpointing,
activation_fn="gelu", activation_fn="gelu",
use_layernorm=True, use_layernorm=True, # self.version < 3,
use_biases=True, use_biases=True, # self.version < 3,
use_glu=False, use_glu=False, # self.version >= 3,
chunkwise_recurrent=self.causal and self.recurrent_chunk_size > 0, chunkwise_recurrent=self.causal and self.recurrent_chunk_size > 0,
recurrent_chunkwise_size=self.recurrent_chunk_size if self.causal else 0, recurrent_chunkwise_size=self.recurrent_chunk_size if self.causal else 0,