ai-voice-cloning/models/tortoise/bpe_lowercase_asr_256.json

{
    "version": "1.0",
    "truncation": null,
    "padding": null,
    "added_tokens":
    [
        {
            "id": 0,
            "special": true,
            "content": "[STOP]",
            "single_word": false,
            "lstrip": false,
            "rstrip": false,
            "normalized": false
        },
        {
            "id": 1,
            "special": true,
            "content": "[UNK]",
            "single_word": false,
            "lstrip": false,
            "rstrip": false,
            "normalized": false
        },
        {
            "id": 2,
            "special": true,
            "content": "[SPACE]",
            "single_word": false,
            "lstrip": false,
            "rstrip": false,
            "normalized": false
        }
    ],
    "normalizer": null,
    "pre_tokenizer":
    {
        "type": "Whitespace"
    },
    "post_processor": null,
    "decoder": null,
    "model":
    {
        "type": "BPE",
        "dropout": null,
        "unk_token": "[UNK]",
        "continuing_subword_prefix": null,
        "end_of_word_suffix": null,
        "fuse_unk": false,
        "vocab":
        {
            "[STOP]": 0,
            "[UNK]": 1,
            "[SPACE]": 2,
            "!": 3,
            "'": 4,
            "(": 5,
            ")": 6,
            ",": 7,
            "-": 8,
            ".": 9,
            "/": 10,
            ":": 11,
            ";": 12,
            "?": 13,
            "a": 14,
            "b": 15,
            "c": 16,
            "d": 17,
            "e": 18,
            "f": 19,
            "g": 20,
            "h": 21,
            "i": 22,
            "j": 23,
            "k": 24,
            "l": 25,
            "m": 26,
            "n": 27,
            "o": 28,
            "p": 29,
            "q": 30,
            "r": 31,
            "s": 32,
            "t": 33,
            "u": 34,
            "v": 35,
            "w": 36,
            "x": 37,
            "y": 38,
            "z": 39,
            "th": 40,
            "in": 41,
            "the": 42,
            "an": 43,
            "er": 44,
            "ou": 45,
            "re": 46,
            "on": 47,
            "at": 48,
            "ed": 49,
            "en": 50,
            "to": 51,
            "ing": 52,
            "and": 53,
            "is": 54,
            "as": 55,
            "al": 56,
            "or": 57,
            "of": 58,
            "ar": 59,
            "it": 60,
            "es": 61,
            "he": 62,
            "st": 63,
            "le": 64,
            "om": 65,
            "se": 66,
            "be": 67,
            "ad": 68,
            "ow": 69,
            "ly": 70,
            "ch": 71,
            "wh": 72,
            "that": 73,
            "you": 74,
            "li": 75,
            "ve": 76,
            "ac": 77,
            "ti": 78,
            "ld": 79,
            "me": 80,
            "was": 81,
            "gh": 82,
            "id": 83,
            "ll": 84,
            "wi": 85,
            "ent": 86,
            "for": 87,
            "ay": 88,
            "ro": 89,
            "ver": 90,
            "ic": 91,
            "her": 92,
            "ke": 93,
            "his": 94,
            "no": 95,
            "ut": 96,
            "un": 97,
            "ir": 98,
            "lo": 99,
            "we": 100,
            "ri": 101,
            "ha": 102,
            "with": 103,
            "ght": 104,
            "out": 105,
            "im": 106,
            "ion": 107,
            "all": 108,
            "ab": 109,
            "one": 110,
            "ne": 111,
            "ge": 112,
            "ould": 113,
            "ter": 114,
            "mo": 115,
            "had": 116,
            "ce": 117,
            "she": 118,
            "go": 119,
            "sh": 120,
            "ur": 121,
            "am": 122,
            "so": 123,
            "pe": 124,
            "my": 125,
            "de": 126,
            "are": 127,
            "but": 128,
            "ome": 129,
            "fr": 130,
            "ther": 131,
            "fe": 132,
            "su": 133,
            "do": 134,
            "con": 135,
            "te": 136,
            "ain": 137,
            "ere": 138,
            "po": 139,
            "if": 140,
            "they": 141,
            "us": 142,
            "ag": 143,
            "tr": 144,
            "now": 145,
            "oun": 146,
            "this": 147,
            "have": 148,
            "not": 149,
            "sa": 150,
            "il": 151,
            "up": 152,
            "thing": 153,
            "from": 154,
            "ap": 155,
            "him": 156,
            "ack": 157,
            "ation": 158,
            "ant": 159,
            "our": 160,
            "op": 161,
            "like": 162,
            "ust": 163,
            "ess": 164,
            "bo": 165,
            "ok": 166,
            "ul": 167,
            "ind": 168,
            "ex": 169,
            "com": 170,
            "some": 171,
            "there": 172,
            "ers": 173,
            "co": 174,
            "res": 175,
            "man": 176,
            "ard": 177,
            "pl": 178,
            "wor": 179,
            "way": 180,
            "tion": 181,
            "fo": 182,
            "ca": 183,
            "were": 184,
            "by": 185,
            "ate": 186,
            "pro": 187,
            "ted": 188,
            "ound": 189,
            "own": 190,
            "would": 191,
            "ts": 192,
            "what": 193,
            "qu": 194,
            "ally": 195,
            "ight": 196,
            "ck": 197,
            "gr": 198,
            "when": 199,
            "ven": 200,
            "can": 201,
            "ough": 202,
            "ine": 203,
            "end": 204,
            "per": 205,
            "ous": 206,
            "od": 207,
            "ide": 208,
            "know": 209,
            "ty": 210,
            "very": 211,
            "si": 212,
            "ak": 213,
            "who": 214,
            "about": 215,
            "ill": 216,
            "them": 217,
            "est": 218,
            "red": 219,
            "ye": 220,
            "could": 221,
            "ong": 222,
            "your": 223,
            "their": 224,
            "em": 225,
            "just": 226,
            "other": 227,
            "into": 228,
            "any": 229,
            "whi": 230,
            "um": 231,
            "tw": 232,
            "ast": 233,
            "der": 234,
            "did": 235,
            "ie": 236,
            "been": 237,
            "ace": 238,
            "ink": 239,
            "ity": 240,
            "back": 241,
            "ting": 242,
            "br": 243,
            "more": 244,
            "ake": 245,
            "pp": 246,
            "then": 247,
            "sp": 248,
            "el": 249,
            "use": 250,
            "bl": 251,
            "said": 252,
            "over": 253,
            "get": 254
        },
        "merges":
        [
            "t h",
            "i n",
            "th e",
            "a n",
            "e r",
            "o u",
            "r e",
            "o n",
            "a t",
            "e d",
            "e n",
            "t o",
            "in g",
            "an d",
            "i s",
            "a s",
            "a l",
            "o r",
            "o f",
            "a r",
            "i t",
            "e s",
            "h e",
            "s t",
            "l e",
            "o m",
            "s e",
            "b e",
            "a d",
            "o w",
            "l y",
            "c h",
            "w h",
            "th at",
            "y ou",
            "l i",
            "v e",
            "a c",
            "t i",
            "l d",
            "m e",
            "w as",
            "g h",
            "i d",
            "l l",
            "w i",
            "en t",
            "f or",
            "a y",
            "r o",
            "v er",
            "i c",
            "h er",
            "k e",
            "h is",
            "n o",
            "u t",
            "u n",
            "i r",
            "l o",
            "w e",
            "r i",
            "h a",
            "wi th",
            "gh t",
            "ou t",
            "i m",
            "i on",
            "al l",
            "a b",
            "on e",
            "n e",
            "g e",
            "ou ld",
            "t er",
            "m o",
            "h ad",
            "c e",
            "s he",
            "g o",
            "s h",
            "u r",
            "a m",
            "s o",
            "p e",
            "m y",
            "d e",
            "a re",
            "b ut",
            "om e",
            "f r",
            "the r",
            "f e",
            "s u",
            "d o",
            "c on",
            "t e",
            "a in",
            "er e",
            "p o",
            "i f",
            "the y",
            "u s",
            "a g",
            "t r",
            "n ow",
            "ou n",
            "th is",
            "ha ve",
            "no t",
            "s a",
            "i l",
            "u p",
            "th ing",
            "fr om",
            "a p",
            "h im",
            "ac k",
            "at ion",
            "an t",
            "ou r",
            "o p",
            "li ke",
            "u st",
            "es s",
            "b o",
            "o k",
            "u l",
            "in d",
            "e x",
            "c om",
            "s ome",
            "the re",
            "er s",
            "c o",
            "re s",
            "m an",
            "ar d",
            "p l",
            "w or",
            "w ay",
            "ti on",
            "f o",
            "c a",
            "w ere",
            "b y",
            "at e",
            "p ro",
            "t ed",
            "oun d",
            "ow n",
            "w ould",
            "t s",
            "wh at",
            "q u",
            "al ly",
            "i ght",
            "c k",
            "g r",
            "wh en",
            "v en",
            "c an",
            "ou gh",
            "in e",
            "en d",
            "p er",
            "ou s",
            "o d",
            "id e",
            "k now",
            "t y",
            "ver y",
            "s i",
            "a k",
            "wh o",
            "ab out",
            "i ll",
            "the m",
            "es t",
            "re d",
            "y e",
            "c ould",
            "on g",
            "you r",
            "the ir",
            "e m",
            "j ust",
            "o ther",
            "in to",
            "an y",
            "wh i",
            "u m",
            "t w",
            "as t",
            "d er",
            "d id",
            "i e",
            "be en",
            "ac e",
            "in k",
            "it y",
            "b ack",
            "t ing",
            "b r",
            "mo re",
            "a ke",
            "p p",
            "the n",
            "s p",
            "e l",
            "u se",
            "b l",
            "sa id",
            "o ver",
            "ge t"
        ]
    }
}
almost 2023-02-17 15:53:50 +00:00			`{`
			`"version": "1.0",`
			`"truncation": null,`
			`"padding": null,`
			`"added_tokens":`
			`[`
			`{`
			`"id": 0,`
			`"special": true,`
			`"content": "[STOP]",`
			`"single_word": false,`
			`"lstrip": false,`
			`"rstrip": false,`
			`"normalized": false`
			`},`
			`{`
			`"id": 1,`
			`"special": true,`
			`"content": "[UNK]",`
			`"single_word": false,`
			`"lstrip": false,`
			`"rstrip": false,`
			`"normalized": false`
			`},`
			`{`
			`"id": 2,`
			`"special": true,`
			`"content": "[SPACE]",`
			`"single_word": false,`
			`"lstrip": false,`
			`"rstrip": false,`
			`"normalized": false`
			`}`
			`],`
			`"normalizer": null,`
			`"pre_tokenizer":`
			`{`
			`"type": "Whitespace"`
			`},`
			`"post_processor": null,`
			`"decoder": null,`
			`"model":`
			`{`
			`"type": "BPE",`
			`"dropout": null,`
			`"unk_token": "[UNK]",`
			`"continuing_subword_prefix": null,`
			`"end_of_word_suffix": null,`
			`"fuse_unk": false,`
			`"vocab":`
			`{`
			`"[STOP]": 0,`
			`"[UNK]": 1,`
			`"[SPACE]": 2,`
			`"!": 3,`
			`"'": 4,`
			`"(": 5,`
			`")": 6,`
			`",": 7,`
			`"-": 8,`
			`".": 9,`
			`"/": 10,`
			`":": 11,`
			`";": 12,`
			`"?": 13,`
			`"a": 14,`
			`"b": 15,`
			`"c": 16,`
			`"d": 17,`
			`"e": 18,`
			`"f": 19,`
			`"g": 20,`
			`"h": 21,`
			`"i": 22,`
			`"j": 23,`
			`"k": 24,`
			`"l": 25,`
			`"m": 26,`
			`"n": 27,`
			`"o": 28,`
			`"p": 29,`
			`"q": 30,`
			`"r": 31,`
			`"s": 32,`
			`"t": 33,`
			`"u": 34,`
			`"v": 35,`
			`"w": 36,`
			`"x": 37,`
			`"y": 38,`
			`"z": 39,`
			`"th": 40,`
			`"in": 41,`
			`"the": 42,`
			`"an": 43,`
			`"er": 44,`
			`"ou": 45,`
			`"re": 46,`
			`"on": 47,`
			`"at": 48,`
			`"ed": 49,`
			`"en": 50,`
			`"to": 51,`
			`"ing": 52,`
			`"and": 53,`
			`"is": 54,`
			`"as": 55,`
			`"al": 56,`
			`"or": 57,`
			`"of": 58,`
			`"ar": 59,`
			`"it": 60,`
			`"es": 61,`
			`"he": 62,`
			`"st": 63,`
			`"le": 64,`
			`"om": 65,`
			`"se": 66,`
			`"be": 67,`
			`"ad": 68,`
			`"ow": 69,`
			`"ly": 70,`
			`"ch": 71,`
			`"wh": 72,`
			`"that": 73,`
			`"you": 74,`
			`"li": 75,`
			`"ve": 76,`
			`"ac": 77,`
			`"ti": 78,`
			`"ld": 79,`
			`"me": 80,`
			`"was": 81,`
			`"gh": 82,`
			`"id": 83,`
			`"ll": 84,`
			`"wi": 85,`
			`"ent": 86,`
			`"for": 87,`
			`"ay": 88,`
			`"ro": 89,`
			`"ver": 90,`
			`"ic": 91,`
			`"her": 92,`
			`"ke": 93,`
			`"his": 94,`
			`"no": 95,`
			`"ut": 96,`
			`"un": 97,`
			`"ir": 98,`
			`"lo": 99,`
			`"we": 100,`
			`"ri": 101,`
			`"ha": 102,`
			`"with": 103,`
			`"ght": 104,`
			`"out": 105,`
			`"im": 106,`
			`"ion": 107,`
			`"all": 108,`
			`"ab": 109,`
			`"one": 110,`
			`"ne": 111,`
			`"ge": 112,`
			`"ould": 113,`
			`"ter": 114,`
			`"mo": 115,`
			`"had": 116,`
			`"ce": 117,`
			`"she": 118,`
			`"go": 119,`
			`"sh": 120,`
			`"ur": 121,`
			`"am": 122,`
			`"so": 123,`
			`"pe": 124,`
			`"my": 125,`
			`"de": 126,`
			`"are": 127,`
			`"but": 128,`
			`"ome": 129,`
			`"fr": 130,`
			`"ther": 131,`
			`"fe": 132,`
			`"su": 133,`
			`"do": 134,`
			`"con": 135,`
			`"te": 136,`
			`"ain": 137,`
			`"ere": 138,`
			`"po": 139,`
			`"if": 140,`
			`"they": 141,`
			`"us": 142,`
			`"ag": 143,`
			`"tr": 144,`
			`"now": 145,`
			`"oun": 146,`
			`"this": 147,`
			`"have": 148,`
			`"not": 149,`
			`"sa": 150,`
			`"il": 151,`
			`"up": 152,`
			`"thing": 153,`
			`"from": 154,`
			`"ap": 155,`
			`"him": 156,`
			`"ack": 157,`
			`"ation": 158,`
			`"ant": 159,`
			`"our": 160,`
			`"op": 161,`
			`"like": 162,`
			`"ust": 163,`
			`"ess": 164,`
			`"bo": 165,`
			`"ok": 166,`
			`"ul": 167,`
			`"ind": 168,`
			`"ex": 169,`
			`"com": 170,`
			`"some": 171,`
			`"there": 172,`
			`"ers": 173,`
			`"co": 174,`
			`"res": 175,`
			`"man": 176,`
			`"ard": 177,`
			`"pl": 178,`
			`"wor": 179,`
			`"way": 180,`
			`"tion": 181,`
			`"fo": 182,`
			`"ca": 183,`
			`"were": 184,`
			`"by": 185,`
			`"ate": 186,`
			`"pro": 187,`
			`"ted": 188,`
			`"ound": 189,`
			`"own": 190,`
			`"would": 191,`
			`"ts": 192,`
			`"what": 193,`
			`"qu": 194,`
			`"ally": 195,`
			`"ight": 196,`
			`"ck": 197,`
			`"gr": 198,`
			`"when": 199,`
			`"ven": 200,`
			`"can": 201,`
			`"ough": 202,`
			`"ine": 203,`
			`"end": 204,`
			`"per": 205,`
			`"ous": 206,`
			`"od": 207,`
			`"ide": 208,`
			`"know": 209,`
			`"ty": 210,`
			`"very": 211,`
			`"si": 212,`
			`"ak": 213,`
			`"who": 214,`
			`"about": 215,`
			`"ill": 216,`
			`"them": 217,`
			`"est": 218,`
			`"red": 219,`
			`"ye": 220,`
			`"could": 221,`
			`"ong": 222,`
			`"your": 223,`
			`"their": 224,`
			`"em": 225,`
			`"just": 226,`
			`"other": 227,`
			`"into": 228,`
			`"any": 229,`
			`"whi": 230,`
			`"um": 231,`
			`"tw": 232,`
			`"ast": 233,`
			`"der": 234,`
			`"did": 235,`
			`"ie": 236,`
			`"been": 237,`
			`"ace": 238,`
			`"ink": 239,`
			`"ity": 240,`
			`"back": 241,`
			`"ting": 242,`
			`"br": 243,`
			`"more": 244,`
			`"ake": 245,`
			`"pp": 246,`
			`"then": 247,`
			`"sp": 248,`
			`"el": 249,`
			`"use": 250,`
			`"bl": 251,`
			`"said": 252,`
			`"over": 253,`
			`"get": 254`
			`},`
			`"merges":`
			`[`
			`"t h",`
			`"i n",`
			`"th e",`
			`"a n",`
			`"e r",`
			`"o u",`
			`"r e",`
			`"o n",`
			`"a t",`
			`"e d",`
			`"e n",`
			`"t o",`
			`"in g",`
			`"an d",`
			`"i s",`
			`"a s",`
			`"a l",`
			`"o r",`
			`"o f",`
			`"a r",`
			`"i t",`
			`"e s",`
			`"h e",`
			`"s t",`
			`"l e",`
			`"o m",`
			`"s e",`
			`"b e",`
			`"a d",`
			`"o w",`
			`"l y",`
			`"c h",`
			`"w h",`
			`"th at",`
			`"y ou",`
			`"l i",`
			`"v e",`
			`"a c",`
			`"t i",`
			`"l d",`
			`"m e",`
			`"w as",`
			`"g h",`
			`"i d",`
			`"l l",`
			`"w i",`
			`"en t",`
			`"f or",`
			`"a y",`
			`"r o",`
			`"v er",`
			`"i c",`
			`"h er",`
			`"k e",`
			`"h is",`
			`"n o",`
			`"u t",`
			`"u n",`
			`"i r",`
			`"l o",`
			`"w e",`
			`"r i",`
			`"h a",`
			`"wi th",`
			`"gh t",`
			`"ou t",`
			`"i m",`
			`"i on",`
			`"al l",`
			`"a b",`
			`"on e",`
			`"n e",`
			`"g e",`
			`"ou ld",`
			`"t er",`
			`"m o",`
			`"h ad",`
			`"c e",`
			`"s he",`
			`"g o",`
			`"s h",`
			`"u r",`
			`"a m",`
			`"s o",`
			`"p e",`
			`"m y",`
			`"d e",`
			`"a re",`
			`"b ut",`
			`"om e",`
			`"f r",`
			`"the r",`
			`"f e",`
			`"s u",`
			`"d o",`
			`"c on",`
			`"t e",`
			`"a in",`
			`"er e",`
			`"p o",`
			`"i f",`
			`"the y",`
			`"u s",`
			`"a g",`
			`"t r",`
			`"n ow",`
			`"ou n",`
			`"th is",`
			`"ha ve",`
			`"no t",`
			`"s a",`
			`"i l",`
			`"u p",`
			`"th ing",`
			`"fr om",`
			`"a p",`
			`"h im",`
			`"ac k",`
			`"at ion",`
			`"an t",`
			`"ou r",`
			`"o p",`
			`"li ke",`
			`"u st",`
			`"es s",`
			`"b o",`
			`"o k",`
			`"u l",`
			`"in d",`
			`"e x",`
			`"c om",`
			`"s ome",`
			`"the re",`
			`"er s",`
			`"c o",`
			`"re s",`
			`"m an",`
			`"ar d",`
			`"p l",`
			`"w or",`
			`"w ay",`
			`"ti on",`
			`"f o",`
			`"c a",`
			`"w ere",`
			`"b y",`
			`"at e",`
			`"p ro",`
			`"t ed",`
			`"oun d",`
			`"ow n",`
			`"w ould",`
			`"t s",`
			`"wh at",`
			`"q u",`
			`"al ly",`
			`"i ght",`
			`"c k",`
			`"g r",`
			`"wh en",`
			`"v en",`
			`"c an",`
			`"ou gh",`
			`"in e",`
			`"en d",`
			`"p er",`
			`"ou s",`
			`"o d",`
			`"id e",`
			`"k now",`
			`"t y",`
			`"ver y",`
			`"s i",`
			`"a k",`
			`"wh o",`
			`"ab out",`
			`"i ll",`
			`"the m",`
			`"es t",`
			`"re d",`
			`"y e",`
			`"c ould",`
			`"on g",`
			`"you r",`
			`"the ir",`
			`"e m",`
			`"j ust",`
			`"o ther",`
			`"in to",`
			`"an y",`
			`"wh i",`
			`"u m",`
			`"t w",`
			`"as t",`
			`"d er",`
			`"d id",`
			`"i e",`
			`"be en",`
			`"ac e",`
			`"in k",`
			`"it y",`
			`"b ack",`
			`"t ing",`
			`"b r",`
			`"mo re",`
			`"a ke",`
			`"p p",`
			`"the n",`
			`"s p",`
			`"e l",`
			`"u se",`
			`"b l",`
			`"sa id",`
			`"o ver",`
			`"ge t"`
			`]`
			`}`
			`}`