{ "version": "1.0", "truncation": null, "padding": null, "added_tokens": [ { "id": 0, "content": "", "single_word": false, "lstrip": false, "rstrip": false, "normalized": false, "special": true }, { "id": 1, "content": "", "single_word": false, "lstrip": false, "rstrip": false, "normalized": false, "special": true }, { "id": 2, "content": "", "single_word": false, "lstrip": false, "rstrip": false, "normalized": false, "special": true }, { "id": 3, "content": "", "single_word": false, "lstrip": false, "rstrip": false, "normalized": false, "special": true } ], "normalizer": null, "pre_tokenizer": null, "post_processor": { "type": "TemplateProcessing", "single": [ { "SpecialToken": { "id": "", "type_id": 0 } }, { "Sequence": { "id": "A", "type_id": 0 } }, { "SpecialToken": { "id": "", "type_id": 0 } } ], "pair": [ { "Sequence": { "id": "A", "type_id": 0 } }, { "Sequence": { "id": "B", "type_id": 1 } } ], "special_tokens": { "": { "id": "", "ids": [ 1 ], "tokens": [ "" ] }, "": { "id": "", "ids": [ 2 ], "tokens": [ "" ] } } }, "decoder": { "type": "ByteLevel", "add_prefix_space": false, "trim_offsets": false, "use_regex": false }, "model": { "type": "BPE", "dropout": null, "unk_token": "", "continuing_subword_prefix": null, "end_of_word_suffix": null, "fuse_unk": false, "byte_fallback": true, "ignore_merges": false, "vocab": { "": 0, "": 1, "": 2, "": 3, " ": 4, "ᵝ": 4, "!": 5, "\"": 6, "(": 7, "{": 7, "[": 7, ")": 8, "}": 8, "]": 8, ",": 9, "-": 10, ".": 11, "1": 211, "—": 10, "“": 6, "”": 81, "ˇ": 6, "ˉ": 12, "ˊ": 79, "ˋ": 80, "_": 81, ":": 13, ";": 14, "?": 15, "a": 16, "ä": 16, "ɒ": 16, "b": 17, "c": 18, "d": 19, "e": 20, "f": 21, "h": 22, "i": 23, "ĩ": 23, "j": 24, "k": 25, "l": 26, "m": 27, "n": 28, "ɴ": 28, "ɲ": 28, "o": 29, "̞": 29, "p": 30, "ɸ": 30, "q": 31, "r": 32, "ɽ": 32, "ʁ": 32, "s": 33, "t": 34, "u": 35, "ø": 35, "œ": 35, "y": 35, "ɣ": 35, "ũ": 35, "v": 36, "w": 37, "ʍ": 37, "x": 38, "z": 39, "¡": 40, "«": 41, "»": 42, "¿": 43, "æ": 44, "ç": 45, "ð": 46, "ŋ": 47, "ɐ": 48, "ɑ": 49, "ɔ": 50, "ɕ": 51, "ə": 52, "ɚ": 53, "ɛ": 54, "ɜ": 55, "ɟ": 56, "ɡ": 57, "ɪ": 58, "ɬ": 59, "ɯ": 60, "ɹ": 61, "ɾ": 62, "ʃ": 63, "ʈ": 64, "ʊ": 65, "ʋ": 66, "ʌ": 67, "ʑ": 68, "ʒ": 69, "ʔ": 70, "ʲ": 71, "ˈ": 72, "ˌ": 73, "ˌ": 73, "ː": 74, "̃": 75, "̩": 76, "θ": 77, "ᵻ": 78, "…": 82, "ˈɛ": 83, "iː": 84, "aɪ": 85, "nd": 86, "ˈɪ": 87, "eɪ": 88, "ˈæ": 89, "ðə": 90, "oʊ": 91, "ɑː": 92, "ˈeɪ": 93, "ən": 94, "uː": 95, "ˈʌ": 96, "ˈaɪ": 97, "st": 98, "ˈɔ": 99, "ˈoʊ": 100, "ˈiː": 101, "ˈɑː": 102, "ænd": 103, "ːɹ": 104, "ɪŋ": 105, "ɜː": 106, "ɪn": 107, "tə": 108, "ʌv": 109, "aʊ": 110, "əl": 111, "ˈuː": 112, "tʃ": 113, "ɪz": 114, "ˈɜː": 115, "ˌʌ": 116, "æt": 117, "dʒ": 118, "ˈɔː": 119, "ɪt": 120, "ˈaʊ": 121, "ɚɹ": 122, "ˈɛn": 123, "wʌ": 124, "li": 125, "hiː": 126, "ˌɛ": 127, "wɪ": 128, "wʌz": 129, "ðæt": 130, "juː": 131, "oːɹ": 132, "ðɪ": 133, "sˈɛ": 134, "ˌɪ": 135, "ˈɑːɹ": 136, "nt": 137, "ˈʊ": 138, "ənt": 139, "hɪz": 140, "ˌɑː": 141, "hæ": 142, "ɔːɹ": 143, "ˈɛɹ": 144, "wɪð": 145, "ᵻd": 146, "ˈoːɹ": 147, "pɹ": 148, "ˈɔːl": 149, "mˌ": 150, "ʃən": 151, "kt": 152, "ˌoʊ": 153, "ˈɔːɹ": 154, "fɹ": 155, "æz": 156, "ˌʌt": 157, "ʃiː": 158, "ˈɛl": 159, "ˌaʊ": 160, "ˈʌn": 161, "əs": 162, "hɜː": 163, "lˈaɪ": 164, "ˈæn": 165, "ˈɪɹ": 166, "ʊd": 167, "ɹᵻ": 168, "ld": 169, "bˌʌt": 170, "ks": 171, "nˈoʊ": 172, "hæd": 173, "ɾɚ": 174, "ɛɹ": 175, "ˈɪŋ": 176, "ɡɹ": 177, "nˌɑː": 178, "ɔn": 179, "vɚ": 180, "maɪ": 181, "fɔːɹ": 182, "ðɚ": 183, "tʊ": 184, "ðɛɹ": 185, "nˌɑːt": 186, "ˈʌm": 187, "tɹ": 188, "sˈiː": 189, "ʌvðə": 190, "mˈɪ": 191, "hˈæ": 192, "ˌɪm": 193, "lˈeɪ": 194, "ɪk": 195, "sp": 196, "hˌɪm": 197, "ɐn": 198, "ðeɪ": 199, "lˈɪ": 200, "ɾi": 201, "lˈɛ": 202, "bɹ": 203, "kɹ": 204, "lˈæ": 205, "ˈɪl": 206, "jˈuː": 207, "ʌm": 208, "mˌiː": 209, "bᵻ": 210, "wˈʌn": 211, "ˌɪn": 212, "ˈɪn": 213, "ˈoʊn": 214, "sˈɛd": 215, "biː": 216, "ˈɛd": 217, "ˈaɪt": 218, "baɪ": 219, "fɹʌm": 220, "ɪs": 221, "ɚz": 222, "ðɪs": 223, "əns": 224, "bəl": 225, "ɪf": 226, "ɪnðə": 227, "əm": 228, "ᵻz": 229, "ˌuː": 230, "wˈeɪ": 231, "ft": 232, "wiː": 233, "stɹ": 234, "lˈiː": 235, "iːz": 236, "pt": 237, "jʊ": 238, "ɚd": 239, "ˌaɪ": 240, "kw": 241, "ˌɔn": 242, "ˈaɪd": 243, "ɪm": 244, "ˈʌst": 245, "ˈoʊld": 246, "ts": 247, "ˌɪtʃ": 248, "sˌoʊ": 249, "dˈɪ": 250, "ɑːɹ": 251, "hɐ": 252, "sˈeɪ": 253, "ɾᵻd": 254, "wˌɪtʃ": 255 }, "merges": [ "ˈ ɛ", "i ː", "a ɪ", "n d", "ˈ ɪ", "e ɪ", "ˈ æ", "ð ə", "o ʊ", "ɑ ː", "ˈ eɪ", "ə n", "u ː", "ˈ ʌ", "ˈ aɪ", "s t", "ˈ ɔ", "ˈ oʊ", "ˈ iː", "ˈ ɑː", "æ nd", "ː ɹ", "ɪ ŋ", "ɜ ː", "ɪ n", "t ə", "ʌ v", "a ʊ", "ə l", "ˈ uː", "t ʃ", "ɪ z", "ˈ ɜː", "ˌ ʌ", "æ t", "d ʒ", "ˈɔ ː", "ɪ t", "ˈ aʊ", "ɚ ɹ", "ˈɛ n", "w ʌ", "l i", "h iː", "ˌ ɛ", "w ɪ", "wʌ z", "ð æt", "j uː", "o ːɹ", "ð ɪ", "s ˈɛ", "ˌ ɪ", "ˈɑː ɹ", "n t", "ˈ ʊ", "ən t", "h ɪz", "ˌ ɑː", "h æ", "ɔ ːɹ", "ˈɛ ɹ", "wɪ ð", "ᵻ d", "ˈ oːɹ", "p ɹ", "ˈɔː l", "m ˌ", "ʃ ən", "k t", "ˌ oʊ", "ˈɔ ːɹ", "f ɹ", "æ z", "ˌʌ t", "ʃ iː", "ˈɛ l", "ˌ aʊ", "ˈʌ n", "ə s", "h ɜː", "l ˈaɪ", "ˈæ n", "ˈɪ ɹ", "ʊ d", "ɹ ᵻ", "l d", "b ˌʌt", "k s", "n ˈoʊ", "hæ d", "ɾ ɚ", "ɛ ɹ", "ˈɪ ŋ", "ɡ ɹ", "n ˌɑː", "ɔ n", "v ɚ", "m aɪ", "f ɔːɹ", "ð ɚ", "t ʊ", "ð ɛɹ", "nˌɑː t", "ˈʌ m", "t ɹ", "s ˈiː", "ʌv ðə", "m ˈɪ", "h ˈæ", "ˌɪ m", "l ˈeɪ", "ɪ k", "s p", "h ˌɪm", "ɐ n", "ð eɪ", "l ˈɪ", "ɾ i", "l ˈɛ", "b ɹ", "k ɹ", "l ˈæ", "ˈɪ l", "j ˈuː", "ʌ m", "mˌ iː", "b ᵻ", "w ˈʌn", "ˌ ɪn", "ˈɪ n", "ˈoʊ n", "sˈɛ d", "b iː", "ˈɛ d", "ˈaɪ t", "b aɪ", "fɹ ʌm", "ɪ s", "ɚ z", "ðɪ s", "ən s", "b əl", "ɪ f", "ɪn ðə", "ə m", "ᵻ z", "ˌ uː", "w ˈeɪ", "f t", "w iː", "st ɹ", "l ˈiː", "iː z", "p t", "j ʊ", "ɚ d", "ˌ aɪ", "k w", "ˌ ɔn", "ˈaɪ d", "ɪ m", "ˈʌ st", "ˈoʊ ld", "t s", "ˌɪ tʃ", "s ˌoʊ", "d ˈɪ", "ɑː ɹ", "h ɐ", "s ˈeɪ", "ɾ ᵻd", "w ˌɪtʃ" ] } }