updated setup script
This commit is contained in:
parent
1fd91b6437
commit
2deb995cc9
|
@ -13,12 +13,11 @@ dataset:
|
|||
workers: 2
|
||||
cache: True
|
||||
|
||||
phones_range: [4, 256]
|
||||
duration_range: [1.0, 16.0]
|
||||
min_utterances: 32
|
||||
phones_range: [4, 512]
|
||||
duration_range: [1.0, 32.0]
|
||||
|
||||
random_utterance: 1.0
|
||||
max_prompts: 6
|
||||
max_prompts: 3
|
||||
prompt_duration: 6.0
|
||||
|
||||
sample_type: speaker
|
||||
|
@ -31,27 +30,22 @@ models:
|
|||
|
||||
_models:
|
||||
- name: "ar+nar"
|
||||
size: "double"
|
||||
size: "full"
|
||||
resp_levels: 8
|
||||
prom_levels: 8
|
||||
tasks: 8
|
||||
arch_type: "retnet"
|
||||
training: True
|
||||
version: 2
|
||||
|
||||
version: 3
|
||||
|
||||
hyperparameters:
|
||||
batch_size: 8
|
||||
gradient_accumulation_steps: 16
|
||||
gradient_accumulation_steps: 32
|
||||
gradient_clipping: 100
|
||||
|
||||
# prodigyopt is nicer, but requires even more VRAM
|
||||
#optimizer: Prodigy
|
||||
#learning_rate: 1.0 # e-4
|
||||
|
||||
optimizer: AdamW
|
||||
learning_rate: 1.0e-4
|
||||
optimizer: Prodigy
|
||||
torch_optimizer: True
|
||||
learning_rate: 0.0625
|
||||
|
||||
scheduler_type: ""
|
||||
#scheduler_type: OneCycle
|
||||
|
@ -118,8 +112,12 @@ inference:
|
|||
use_vocos: True
|
||||
normalize: False
|
||||
|
||||
weight_dtype: bfloat16
|
||||
amp: False
|
||||
|
||||
bitsandbytes:
|
||||
enabled: False
|
||||
injects: False
|
||||
linear: False
|
||||
embedding: False
|
||||
injects: True
|
||||
linear: True
|
||||
embedding: True
|
||||
|
|
@ -1,6 +1,8 @@
|
|||
#!/bin/bash
|
||||
|
||||
python3 -m venv venv
|
||||
source ./venv/bin/activate
|
||||
pip3 install --pre torch torchvision torchaudio --index-url https://download.pytorch.org/whl/nightly/cu121
|
||||
pip3 install -e .
|
||||
|
||||
mkdir -p ./training/valle/ckpt/ar+nar-retnet-8/
|
||||
|
|
|
@ -389,9 +389,9 @@ class Base(nn.Module):
|
|||
dropout=p_dropout,
|
||||
checkpoint_activations=self.activation_checkpointing,
|
||||
activation_fn="gelu",
|
||||
use_layernorm=True,
|
||||
use_biases=True,
|
||||
use_glu=False,
|
||||
use_layernorm=True, # self.version < 3,
|
||||
use_biases=True, # self.version < 3,
|
||||
use_glu=False, # self.version >= 3,
|
||||
|
||||
chunkwise_recurrent=self.causal and self.recurrent_chunk_size > 0,
|
||||
recurrent_chunkwise_size=self.recurrent_chunk_size if self.causal else 0,
|
||||
|
|
Loading…
Reference in New Issue
Block a user