forgot the other things that were in tortoise implementation but not here
This commit is contained in:
parent
64a41fde24
commit
efd038c076
|
@ -29,6 +29,27 @@ def remove_extraneous_punctuation(word):
|
|||
word = extraneous.sub('', word)
|
||||
return word
|
||||
|
||||
def expand_numbers(text):
|
||||
return normalize_numbers(text)
|
||||
|
||||
|
||||
def lowercase(text):
|
||||
return text.lower()
|
||||
|
||||
_whitespace_re = re.compile(r'\s+')
|
||||
|
||||
def collapse_whitespace(text):
|
||||
return re.sub(_whitespace_re, ' ', text)
|
||||
|
||||
|
||||
def convert_to_ascii(text):
|
||||
return unidecode(text)
|
||||
|
||||
def basic_cleaners(text):
|
||||
'''Basic pipeline that lowercases and collapses whitespace without transliteration.'''
|
||||
text = lowercase(text)
|
||||
text = collapse_whitespace(text)
|
||||
return text
|
||||
|
||||
class VoiceBpeTokenizer:
|
||||
def __init__(self, vocab_file, preprocess=None):
|
||||
|
|
Loading…
Reference in New Issue
Block a user