unk hunting

This commit is contained in:
mrq 2023-03-16 14:59:12 +00:00
parent 46ff3c476a
commit 1a8c5de517
2 changed files with 20 additions and 3 deletions

View File

@ -106,7 +106,18 @@
"ʌ": 57, "ʌ": 57,
"ʒ": 58, "ʒ": 58,
"θ": 59, "θ": 59,
"ː": 60 "ɐ": 60,
"ɜ": 61,
"ᵻ": 62,
"ɾ": 63,
"n̩": 64,
"ː": 65,
"ɔː": 66,
"uː": 67,
"iː": 68,
"ɑː": 69,
"oː": 70,
"ɜː": 71
}, },
"merges": "merges":
[ [
@ -116,7 +127,13 @@
"e ɪ", "e ɪ",
"o ʊ", "o ʊ",
"t͡ ʃ", "t͡ ʃ",
ɪ" ɪ",
ː",
"u ː",
"i ː",
"ɑ ː",
"o ː",
ː"
] ]
} }
} }

View File

@ -1781,7 +1781,7 @@ def tokenize_text( text ):
load_tts() load_tts()
encoded = tts.tokenizer.encode(text) encoded = tts.tokenizer.encode(text)
decoded = tts.tokenizer.tokenizer.decode(encoded, skip_special_tokens=False).replace(" ", "") decoded = tts.tokenizer.tokenizer.decode(encoded, skip_special_tokens=False).replace(" ", "").replace("[SPACE]", " ")
return "\n".join([ str(encoded), decoded ]) return "\n".join([ str(encoded), decoded ])