{ "version": "1.0", "truncation": null, "padding": null, "added_tokens": [ { "id": 0, "special": true, "content": "[STOP]", "single_word": false, "lstrip": false, "rstrip": false, "normalized": false }, { "id": 1, "special": true, "content": "[UNK]", "single_word": false, "lstrip": false, "rstrip": false, "normalized": false }, { "id": 2, "special": true, "content": "[SPACE]", "single_word": false, "lstrip": false, "rstrip": false, "normalized": false } ], "normalizer": null, "pre_tokenizer": { "type": "Whitespace" }, "post_processor": null, "decoder": null, "model": { "type": "BPE", "dropout": null, "unk_token": "[UNK]", "continuing_subword_prefix": null, "end_of_word_suffix": null, "fuse_unk": false, "vocab": { "[STOP]": 0, "[UNK]": 1, "[SPACE]": 2, "!": 3, "'": 4, "(": 5, ")": 6, ",": 7, "-": 8, ".": 9, "/": 10, ":": 11, ";": 12, "?": 13, "a": 14, "b": 15, "c": 16, "d": 17, "e": 18, "f": 19, "g": 20, "h": 21, "i": 22, "j": 23, "k": 24, "l": 25, "m": 26, "n": 27, "o": 28, "p": 29, "q": 30, "r": 31, "s": 32, "t": 33, "u": 34, "v": 35, "w": 36, "x": 37, "y": 38, "z": 39, "th": 40, "in": 41, "the": 42, "an": 43, "er": 44, "ou": 45, "re": 46, "on": 47, "at": 48, "ed": 49, "en": 50, "to": 51, "ing": 52, "and": 53, "is": 54, "as": 55, "al": 56, "or": 57, "of": 58, "ar": 59, "it": 60, "es": 61, "he": 62, "st": 63, "le": 64, "om": 65, "se": 66, "be": 67, "ad": 68, "ow": 69, "ly": 70, "ch": 71, "wh": 72, "that": 73, "you": 74, "li": 75, "ve": 76, "ac": 77, "ti": 78, "ld": 79, "me": 80, "was": 81, "gh": 82, "id": 83, "ll": 84, "wi": 85, "ent": 86, "for": 87, "ay": 88, "ro": 89, "ver": 90, "ic": 91, "her": 92, "ke": 93, "his": 94, "no": 95, "ut": 96, "un": 97, "ir": 98, "lo": 99, "we": 100, "ri": 101, "ha": 102, "with": 103, "ght": 104, "out": 105, "im": 106, "ion": 107, "all": 108, "ab": 109, "one": 110, "ne": 111, "ge": 112, "ould": 113, "ter": 114, "mo": 115, "had": 116, "ce": 117, "she": 118, "go": 119, "sh": 120, "ur": 121, "am": 122, "so": 123, "pe": 124, "my": 125, "de": 126, "are": 127, "but": 128, "ome": 129, "fr": 130, "ther": 131, "fe": 132, "su": 133, "do": 134, "con": 135, "te": 136, "ain": 137, "ere": 138, "po": 139, "if": 140, "they": 141, "us": 142, "ag": 143, "tr": 144, "now": 145, "oun": 146, "this": 147, "have": 148, "not": 149, "sa": 150, "il": 151, "up": 152, "thing": 153, "from": 154, "ap": 155, "him": 156, "ack": 157, "ation": 158, "ant": 159, "our": 160, "op": 161, "like": 162, "ust": 163, "ess": 164, "bo": 165, "ok": 166, "ul": 167, "ind": 168, "ex": 169, "com": 170, "some": 171, "there": 172, "ers": 173, "co": 174, "res": 175, "man": 176, "ard": 177, "pl": 178, "wor": 179, "way": 180, "tion": 181, "fo": 182, "ca": 183, "were": 184, "by": 185, "ate": 186, "pro": 187, "ted": 188, "ound": 189, "own": 190, "would": 191, "ts": 192, "what": 193, "qu": 194, "ally": 195, "ight": 196, "ck": 197, "gr": 198, "when": 199, "ven": 200, "can": 201, "ough": 202, "ine": 203, "end": 204, "per": 205, "ous": 206, "od": 207, "ide": 208, "know": 209, "ty": 210, "very": 211, "si": 212, "ak": 213, "who": 214, "about": 215, "ill": 216, "them": 217, "est": 218, "red": 219, "ye": 220, "could": 221, "ong": 222, "your": 223, "their": 224, "em": 225, "just": 226, "other": 227, "into": 228, "any": 229, "whi": 230, "um": 231, "tw": 232, "ast": 233, "der": 234, "did": 235, "ie": 236, "been": 237, "ace": 238, "ink": 239, "ity": 240, "back": 241, "ting": 242, "br": 243, "more": 244, "ake": 245, "pp": 246, "then": 247, "sp": 248, "el": 249, "use": 250, "bl": 251, "said": 252, "over": 253, "get": 254 }, "merges": [ "t h", "i n", "th e", "a n", "e r", "o u", "r e", "o n", "a t", "e d", "e n", "t o", "in g", "an d", "i s", "a s", "a l", "o r", "o f", "a r", "i t", "e s", "h e", "s t", "l e", "o m", "s e", "b e", "a d", "o w", "l y", "c h", "w h", "th at", "y ou", "l i", "v e", "a c", "t i", "l d", "m e", "w as", "g h", "i d", "l l", "w i", "en t", "f or", "a y", "r o", "v er", "i c", "h er", "k e", "h is", "n o", "u t", "u n", "i r", "l o", "w e", "r i", "h a", "wi th", "gh t", "ou t", "i m", "i on", "al l", "a b", "on e", "n e", "g e", "ou ld", "t er", "m o", "h ad", "c e", "s he", "g o", "s h", "u r", "a m", "s o", "p e", "m y", "d e", "a re", "b ut", "om e", "f r", "the r", "f e", "s u", "d o", "c on", "t e", "a in", "er e", "p o", "i f", "the y", "u s", "a g", "t r", "n ow", "ou n", "th is", "ha ve", "no t", "s a", "i l", "u p", "th ing", "fr om", "a p", "h im", "ac k", "at ion", "an t", "ou r", "o p", "li ke", "u st", "es s", "b o", "o k", "u l", "in d", "e x", "c om", "s ome", "the re", "er s", "c o", "re s", "m an", "ar d", "p l", "w or", "w ay", "ti on", "f o", "c a", "w ere", "b y", "at e", "p ro", "t ed", "oun d", "ow n", "w ould", "t s", "wh at", "q u", "al ly", "i ght", "c k", "g r", "wh en", "v en", "c an", "ou gh", "in e", "en d", "p er", "ou s", "o d", "id e", "k now", "t y", "ver y", "s i", "a k", "wh o", "ab out", "i ll", "the m", "es t", "re d", "y e", "c ould", "on g", "you r", "the ir", "e m", "j ust", "o ther", "in to", "an y", "wh i", "u m", "t w", "as t", "d er", "d id", "i e", "be en", "ac e", "in k", "it y", "b ack", "t ing", "b r", "mo re", "a ke", "p p", "the n", "s p", "e l", "u se", "b l", "sa id", "o ver", "ge t" ] } }