{ "version": "1.0", "truncation": null, "padding": null, "normalizer": null, "pre_tokenizer": null, "post_processor": null, "decoder": null, "added_tokens": [ { "id": 0, "special": true, "content": "[STOP]", "single_word": false, "lstrip": false, "rstrip": false, "normalized": false }, { "id": 1, "special": true, "content": "[UNK]", "single_word": false, "lstrip": false, "rstrip": false, "normalized": false }, { "id": 2, "special": true, "content": "[SPACE]", "single_word": false, "lstrip": false, "rstrip": false, "normalized": false } ], "model": { "type": "BPE", "dropout": null, "unk_token": "[UNK]", "continuing_subword_prefix": null, "end_of_word_suffix": null, "fuse_unk": false, "vocab": { "[STOP]": 0, "[UNK]": 1, "[SPACE]": 2, "!": 3, "'": 4, "(": 5, ")": 6, ",": 7, "-": 8, ".": 9, "/": 10, ":": 11, ";": 12, "?": 13, "a": 14, "b": 15, "c": 16, "d": 17, "e": 18, "f": 19, "g": 20, "h": 21, "i": 22, "j": 23, "k": 24, "l": 25, "m": 26, "n": 27, "o": 28, "p": 29, "q": 30, "r": 31, "s": 32, "t": 33, "u": 34, "v": 35, "w": 36, "x": 37, "y": 38, "z": 39, "d͡": 41, "t͡": 42, "|": 43, "æ": 44, "ð": 45, "ŋ": 46, "ɑ": 47, "ɔ": 48, "ə": 49, "ɚ": 50, "ɛ": 51, "ɡ": 52, "ɪ": 53, "ɹ": 54, "ʃ": 55, "ʊ": 56, "ʌ": 57, "ʒ": 58, "θ": 59, "ɐ": 60, "ɜ": 61, "ᵻ": 62, "ɾ": 63, "n\u0329": 64, "ː": 65, "ˈ": 66, "ˌ": 67, "ʔ": 68, "d͡ʒ": 69, "aɪ": 70, "aʊ": 71, "eɪ": 72, "oʊ": 73, "t͡ʃ": 74, "ɔɪ": 75, "ɔː": 76, "uː": 77, "iː": 78, "ɑː": 79, "oː": 80, "ɜː": 81 }, "merges": [ "a ɪ", "e ɪ", "ɔ ɪ", "a ʊ", "o ʊ", "d͡ ʒ", "t͡ ʃ", "i ː", "o ː", "u ː", "ɑ ː", "ɔ ː", "ɜ ː" ] } }