unk hunting

This commit is contained in:
mrq 2023-03-16 14:59:12 +00:00
parent 46ff3c476a
commit 1a8c5de517
2 changed files with 20 additions and 3 deletions

View File

@ -106,7 +106,18 @@
"ʌ": 57,
"ʒ": 58,
"θ": 59,
"ː": 60
"ɐ": 60,
"ɜ": 61,
"ᵻ": 62,
"ɾ": 63,
"n̩": 64,
"ː": 65,
"ɔː": 66,
"uː": 67,
"iː": 68,
"ɑː": 69,
"oː": 70,
"ɜː": 71
},
"merges":
[
@ -116,7 +127,13 @@
"e ɪ",
"o ʊ",
"t͡ ʃ",
ɪ"
ɪ",
ː",
"u ː",
"i ː",
"ɑ ː",
"o ː",
ː"
]
}
}

View File

@ -1781,7 +1781,7 @@ def tokenize_text( text ):
load_tts()
encoded = tts.tokenizer.encode(text)
decoded = tts.tokenizer.tokenizer.decode(encoded, skip_special_tokens=False).replace(" ", "")
decoded = tts.tokenizer.tokenizer.decode(encoded, skip_special_tokens=False).replace(" ", "").replace("[SPACE]", " ")
return "\n".join([ str(encoded), decoded ])