default text_to_sequence cleaners

This commit is contained in:
James Betker 2022-02-21 19:14:22 -07:00
parent ba7f54c162
commit 7201b4500c

View File

@ -15,7 +15,7 @@ _id_to_symbol = {i: s for i, s in enumerate(symbols)}
_curly_re = re.compile(r'(.*?)\{(.+?)\}(.*)') _curly_re = re.compile(r'(.*?)\{(.+?)\}(.*)')
def text_to_sequence(text, cleaner_names): def text_to_sequence(text, cleaner_names=['english_cleaners']):
'''Converts a string of text to a sequence of IDs corresponding to the symbols in the text. '''Converts a string of text to a sequence of IDs corresponding to the symbols in the text.
The text can optionally have ARPAbet sequences enclosed in curly braces embedded The text can optionally have ARPAbet sequences enclosed in curly braces embedded
@ -58,6 +58,10 @@ def sequence_to_text(sequence):
return result.replace('}{', ' ') return result.replace('}{', ' ')
def tacotron_symbols():
return list(_symbol_to_id.keys())
def _clean_text(text, cleaner_names): def _clean_text(text, cleaner_names):
for name in cleaner_names: for name in cleaner_names:
cleaner = getattr(cleaners, name) cleaner = getattr(cleaners, name)