forgot the other things that were in tortoise implementation but not here

This commit is contained in:
mrq 2023-03-17 20:24:17 +00:00
parent 64a41fde24
commit efd038c076

View File

@ -29,6 +29,27 @@ def remove_extraneous_punctuation(word):
word = extraneous.sub('', word)
return word
def expand_numbers(text):
return normalize_numbers(text)
def lowercase(text):
return text.lower()
_whitespace_re = re.compile(r'\s+')
def collapse_whitespace(text):
return re.sub(_whitespace_re, ' ', text)
def convert_to_ascii(text):
return unidecode(text)
def basic_cleaners(text):
'''Basic pipeline that lowercases and collapses whitespace without transliteration.'''
text = lowercase(text)
text = collapse_whitespace(text)
return text
class VoiceBpeTokenizer:
def __init__(self, vocab_file, preprocess=None):