{ "version": "1.0", "truncation": null, "padding": null, "normalizer": null, "pre_tokenizer": null, "post_processor": null, "decoder": null, "added_tokens": [ { "id": 0, "special": true, "content": "[STOP]", "single_word": false, "lstrip": false, "rstrip": false, "normalized": false }, { "id": 1, "special": true, "content": "[UNK]", "single_word": false, "lstrip": false, "rstrip": false, "normalized": false }, { "id": 2, "special": true, "content": "[SPACE]", "single_word": false, "lstrip": false, "rstrip": false, "normalized": false } ], "model": { "type": "BPE", "dropout": null, "unk_token": "[UNK]", "continuing_subword_prefix": null, "end_of_word_suffix": null, "fuse_unk": false, "vocab": { "[STOP]": 0, "[UNK]": 1, "[SPACE]": 2, "!": 3, "'": 4, "(": 5, ")": 6, ",": 7, "-": 8, ".": 9, "/": 10, ":": 11, ";": 12, "?": 13, "a": 14, "b": 15, "c": 16, "d": 17, "e": 18, "f": 19, "g": 20, "h": 21, "i": 22, "j": 23, "k": 24, "l": 25, "m": 26, "n": 27, "o": 28, "p": 29, "q": 30, "r": 31, "s": 32, "t": 33, "u": 34, "v": 35, "w": 36, "x": 37, "y": 38, "z": 39, "d͡": 40, "t͡": 41, "|": 42, "æ": 43, "ð": 44, "ŋ": 45, "ɑ": 46, "ɔ": 47, "ə": 48, "ɚ": 49, "ɛ": 50, "ɡ": 51, "ɪ": 52, "ɹ": 53, "ʃ": 54, "ʊ": 55, "ʌ": 56, "ʒ": 57, "θ": 58, "ɐ": 59, "ɜ": 60, "ᵻ": 61, "ɾ": 62, "n\u0329": 63, "ː": 64, "ˈ": 65, "ˌ": 66, "ʔ": 67, "d͡ʒ": 68, "aɪ": 69, "aʊ": 70, "eɪ": 71, "oʊ": 72, "t͡ʃ": 73, "ɔɪ": 74, "ɔː": 75, "uː": 76, "iː": 77, "ɑː": 78, "oː": 79, "ɜː": 80 }, "merges": [ "a ɪ", "e ɪ", "ɔ ɪ", "a ʊ", "o ʊ", "d͡ ʒ", "t͡ ʃ", "i ː", "o ː", "u ː", "ɑ ː", "ɔ ː", "ɜ ː" ] } }