diff --git a/models/.gitkeep b/models/.gitkeep new file mode 100755 index 0000000..e69de29 diff --git a/models/tortoise/bpe_lowercase_asr_256.json b/models/tortoise/bpe_lowercase_asr_256.json new file mode 100755 index 0000000..1f32162 --- /dev/null +++ b/models/tortoise/bpe_lowercase_asr_256.json @@ -0,0 +1,527 @@ +{ + "version": "1.0", + "truncation": null, + "padding": null, + "added_tokens": + [ + { + "id": 0, + "special": true, + "content": "[STOP]", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false + }, + { + "id": 1, + "special": true, + "content": "[UNK]", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false + }, + { + "id": 2, + "special": true, + "content": "[SPACE]", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false + } + ], + "normalizer": null, + "pre_tokenizer": + { + "type": "Whitespace" + }, + "post_processor": null, + "decoder": null, + "model": + { + "type": "BPE", + "dropout": null, + "unk_token": "[UNK]", + "continuing_subword_prefix": null, + "end_of_word_suffix": null, + "fuse_unk": false, + "vocab": + { + "[STOP]": 0, + "[UNK]": 1, + "[SPACE]": 2, + "!": 3, + "'": 4, + "(": 5, + ")": 6, + ",": 7, + "-": 8, + ".": 9, + "/": 10, + ":": 11, + ";": 12, + "?": 13, + "a": 14, + "b": 15, + "c": 16, + "d": 17, + "e": 18, + "f": 19, + "g": 20, + "h": 21, + "i": 22, + "j": 23, + "k": 24, + "l": 25, + "m": 26, + "n": 27, + "o": 28, + "p": 29, + "q": 30, + "r": 31, + "s": 32, + "t": 33, + "u": 34, + "v": 35, + "w": 36, + "x": 37, + "y": 38, + "z": 39, + "th": 40, + "in": 41, + "the": 42, + "an": 43, + "er": 44, + "ou": 45, + "re": 46, + "on": 47, + "at": 48, + "ed": 49, + "en": 50, + "to": 51, + "ing": 52, + "and": 53, + "is": 54, + "as": 55, + "al": 56, + "or": 57, + "of": 58, + "ar": 59, + "it": 60, + "es": 61, + "he": 62, + "st": 63, + "le": 64, + "om": 65, + "se": 66, + "be": 67, + "ad": 68, + "ow": 69, + "ly": 70, + "ch": 71, + "wh": 72, + "that": 73, + "you": 74, + "li": 75, + "ve": 76, + "ac": 77, + "ti": 78, + "ld": 79, + "me": 80, + "was": 81, + "gh": 82, + "id": 83, + "ll": 84, + "wi": 85, + "ent": 86, + "for": 87, + "ay": 88, + "ro": 89, + "ver": 90, + "ic": 91, + "her": 92, + "ke": 93, + "his": 94, + "no": 95, + "ut": 96, + "un": 97, + "ir": 98, + "lo": 99, + "we": 100, + "ri": 101, + "ha": 102, + "with": 103, + "ght": 104, + "out": 105, + "im": 106, + "ion": 107, + "all": 108, + "ab": 109, + "one": 110, + "ne": 111, + "ge": 112, + "ould": 113, + "ter": 114, + "mo": 115, + "had": 116, + "ce": 117, + "she": 118, + "go": 119, + "sh": 120, + "ur": 121, + "am": 122, + "so": 123, + "pe": 124, + "my": 125, + "de": 126, + "are": 127, + "but": 128, + "ome": 129, + "fr": 130, + "ther": 131, + "fe": 132, + "su": 133, + "do": 134, + "con": 135, + "te": 136, + "ain": 137, + "ere": 138, + "po": 139, + "if": 140, + "they": 141, + "us": 142, + "ag": 143, + "tr": 144, + "now": 145, + "oun": 146, + "this": 147, + "have": 148, + "not": 149, + "sa": 150, + "il": 151, + "up": 152, + "thing": 153, + "from": 154, + "ap": 155, + "him": 156, + "ack": 157, + "ation": 158, + "ant": 159, + "our": 160, + "op": 161, + "like": 162, + "ust": 163, + "ess": 164, + "bo": 165, + "ok": 166, + "ul": 167, + "ind": 168, + "ex": 169, + "com": 170, + "some": 171, + "there": 172, + "ers": 173, + "co": 174, + "res": 175, + "man": 176, + "ard": 177, + "pl": 178, + "wor": 179, + "way": 180, + "tion": 181, + "fo": 182, + "ca": 183, + "were": 184, + "by": 185, + "ate": 186, + "pro": 187, + "ted": 188, + "ound": 189, + "own": 190, + "would": 191, + "ts": 192, + "what": 193, + "qu": 194, + "ally": 195, + "ight": 196, + "ck": 197, + "gr": 198, + "when": 199, + "ven": 200, + "can": 201, + "ough": 202, + "ine": 203, + "end": 204, + "per": 205, + "ous": 206, + "od": 207, + "ide": 208, + "know": 209, + "ty": 210, + "very": 211, + "si": 212, + "ak": 213, + "who": 214, + "about": 215, + "ill": 216, + "them": 217, + "est": 218, + "red": 219, + "ye": 220, + "could": 221, + "ong": 222, + "your": 223, + "their": 224, + "em": 225, + "just": 226, + "other": 227, + "into": 228, + "any": 229, + "whi": 230, + "um": 231, + "tw": 232, + "ast": 233, + "der": 234, + "did": 235, + "ie": 236, + "been": 237, + "ace": 238, + "ink": 239, + "ity": 240, + "back": 241, + "ting": 242, + "br": 243, + "more": 244, + "ake": 245, + "pp": 246, + "then": 247, + "sp": 248, + "el": 249, + "use": 250, + "bl": 251, + "said": 252, + "over": 253, + "get": 254 + }, + "merges": + [ + "t h", + "i n", + "th e", + "a n", + "e r", + "o u", + "r e", + "o n", + "a t", + "e d", + "e n", + "t o", + "in g", + "an d", + "i s", + "a s", + "a l", + "o r", + "o f", + "a r", + "i t", + "e s", + "h e", + "s t", + "l e", + "o m", + "s e", + "b e", + "a d", + "o w", + "l y", + "c h", + "w h", + "th at", + "y ou", + "l i", + "v e", + "a c", + "t i", + "l d", + "m e", + "w as", + "g h", + "i d", + "l l", + "w i", + "en t", + "f or", + "a y", + "r o", + "v er", + "i c", + "h er", + "k e", + "h is", + "n o", + "u t", + "u n", + "i r", + "l o", + "w e", + "r i", + "h a", + "wi th", + "gh t", + "ou t", + "i m", + "i on", + "al l", + "a b", + "on e", + "n e", + "g e", + "ou ld", + "t er", + "m o", + "h ad", + "c e", + "s he", + "g o", + "s h", + "u r", + "a m", + "s o", + "p e", + "m y", + "d e", + "a re", + "b ut", + "om e", + "f r", + "the r", + "f e", + "s u", + "d o", + "c on", + "t e", + "a in", + "er e", + "p o", + "i f", + "the y", + "u s", + "a g", + "t r", + "n ow", + "ou n", + "th is", + "ha ve", + "no t", + "s a", + "i l", + "u p", + "th ing", + "fr om", + "a p", + "h im", + "ac k", + "at ion", + "an t", + "ou r", + "o p", + "li ke", + "u st", + "es s", + "b o", + "o k", + "u l", + "in d", + "e x", + "c om", + "s ome", + "the re", + "er s", + "c o", + "re s", + "m an", + "ar d", + "p l", + "w or", + "w ay", + "ti on", + "f o", + "c a", + "w ere", + "b y", + "at e", + "p ro", + "t ed", + "oun d", + "ow n", + "w ould", + "t s", + "wh at", + "q u", + "al ly", + "i ght", + "c k", + "g r", + "wh en", + "v en", + "c an", + "ou gh", + "in e", + "en d", + "p er", + "ou s", + "o d", + "id e", + "k now", + "t y", + "ver y", + "s i", + "a k", + "wh o", + "ab out", + "i ll", + "the m", + "es t", + "re d", + "y e", + "c ould", + "on g", + "you r", + "the ir", + "e m", + "j ust", + "o ther", + "in to", + "an y", + "wh i", + "u m", + "t w", + "as t", + "d er", + "d id", + "i e", + "be en", + "ac e", + "in k", + "it y", + "b ack", + "t ing", + "b r", + "mo re", + "a ke", + "p p", + "the n", + "s p", + "e l", + "u se", + "b l", + "sa id", + "o ver", + "ge t" + ] + } +} \ No newline at end of file diff --git a/models/tortoise/clips_mel_norms.pth b/models/tortoise/clips_mel_norms.pth new file mode 100755 index 0000000..d8c7321 Binary files /dev/null and b/models/tortoise/clips_mel_norms.pth differ diff --git a/models/tortoise/train_diffusion_vocoder_22k_level.yml b/models/tortoise/train_diffusion_vocoder_22k_level.yml new file mode 100755 index 0000000..f532349 --- /dev/null +++ b/models/tortoise/train_diffusion_vocoder_22k_level.yml @@ -0,0 +1,18 @@ +path: + pretrain_model_dvae: './models/tortoise/dvae.pth' + strict_load: true + #resume_state: ../experiments/train_imgnet_vqvae_stage1/training_state/0.state # <-- Set this to resume from a previous training state. +networks: + dvae: + type: generator + which_model_G: lucidrains_dvae + kwargs: + channels: 80 + codebook_dim: 512 + hidden_dim: 512 + kernel_size: 3 + num_layers: 2 + num_resnet_blocks: 3 + num_tokens: 8192 + positional_dims: 1 + use_transposed_convs: false diff --git a/setup-cuda.bat b/setup-cuda.bat index 5488cb5..bb0a17c 100755 --- a/setup-cuda.bat +++ b/setup-cuda.bat @@ -4,4 +4,5 @@ python -m pip install --upgrade pip python -m pip install torch torchvision torchaudio --extra-index-url https://download.pytorch.org/whl/cu116 python -m pip install -r ./requirements.txt deactivate +call .\setup-training.bat pause \ No newline at end of file diff --git a/setup-cuda.sh b/setup-cuda.sh index 1701169..7c7d20b 100755 --- a/setup-cuda.sh +++ b/setup-cuda.sh @@ -3,4 +3,5 @@ source ./venv/bin/activate python -m pip install --upgrade pip pip install torch torchvision torchaudio --extra-index-url https://download.pytorch.org/whl/cu116 python -m pip install -r ./requirements.txt +./setup-training.sh deactivate diff --git a/setup-directml.bat b/setup-directml.bat index 57f9448..bdd154f 100755 --- a/setup-directml.bat +++ b/setup-directml.bat @@ -4,4 +4,5 @@ python -m pip install --upgrade pip python -m pip install torch torchvision torchaudio torch-directml==0.1.13.1.dev230119 python -m pip install -r ./requirements.txt deactivate +call .\setup-training.bat pause \ No newline at end of file diff --git a/setup-rocm.sh b/setup-rocm.sh index 4a42342..d79ae28 100755 --- a/setup-rocm.sh +++ b/setup-rocm.sh @@ -4,4 +4,5 @@ python -m pip install --upgrade pip # ROCM pip install torch torchvision torchaudio --extra-index-url https://download.pytorch.org/whl/rocm5.1.1 # 5.2 does not work for me desu python -m pip install -r ./requirements.txt +./setup-training.sh deactivate diff --git a/setup-training.sh b/setup-training.sh new file mode 100755 index 0000000..d7a813c --- /dev/null +++ b/setup-training.sh @@ -0,0 +1 @@ +git clone https://git.ecker.tech/mrq/DL-Art-School dlas \ No newline at end of file diff --git a/setup_training.bat b/setup_training.bat new file mode 100755 index 0000000..d7a813c --- /dev/null +++ b/setup_training.bat @@ -0,0 +1 @@ +git clone https://git.ecker.tech/mrq/DL-Art-School dlas \ No newline at end of file