From 69f140ba45a1599223a3440905dce64ca0690f79 Mon Sep 17 00:00:00 2001 From: mrq Date: Fri, 13 Sep 2024 12:53:36 -0500 Subject: [PATCH] fix oversight with phonemizing french because espeak defines french as fr-fr instead of fr (even though spain spanish is es and not es-sp or some shit, but portugal portuguese is pt-pt) --- vall_e/emb/g2p.py | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-) diff --git a/vall_e/emb/g2p.py b/vall_e/emb/g2p.py index 96d5f15..c06eba1 100755 --- a/vall_e/emb/g2p.py +++ b/vall_e/emb/g2p.py @@ -26,6 +26,15 @@ def romanize( runes, sep="" ): result = kks.convert( runes ) return sep.join([ res['hira'] for res in result ]) +# to-do: fill out this table +# although desu the only thing that might be needed are en-uk/en-gb, es-la, pt-br, and pt-pt +def coerce_language( lang ): + if lang == "en": + lang = "en-us" + if lang == "fr": + return "fr-fr" + return lang + cached_backends = {} def _get_backend( language="en-us", backend="espeak", punctuation=True, stress=True, strip=True ): key = f'{language}_{backend}' @@ -44,8 +53,7 @@ def _get_backend( language="en-us", backend="espeak", punctuation=True, stress=T def encode(text: str, language="en-us", backend="auto", punctuation=True, stress=True, strip=True) -> list[str]: - if language == "en": - language = "en-us" + language = coerce_language( language ) # Convert to kana because espeak does not like kanji... if language[:2] == "ja" and backend == "auto":