vall-e/vall_e/emb/g2p.py

70 lines
1.8 KiB
Python
Raw Normal View History

2023-08-02 21:53:35 +00:00
import argparse
import random
import string
import torch
from functools import cache
from pathlib import Path
from phonemizer import phonemize
from phonemizer.backend import BACKENDS
from tqdm import tqdm
try:
import pykakasi
except Exception as e:
pass
2023-08-02 21:53:35 +00:00
@cache
def _get_graphs(path):
with open(path, "r") as f:
graphs = f.read()
return graphs
def romanize( runes, sep="" ):
kks = pykakasi.kakasi()
result = kks.convert( runes )
return sep.join([ res['hira'] for res in result ])
2023-08-02 21:53:35 +00:00
cached_backends = {}
def _get_backend( language="en-us", backend="espeak", punctuation=True, stress=True, strip=True ):
2023-08-02 21:53:35 +00:00
key = f'{language}_{backend}'
if key in cached_backends:
return cached_backends[key]
if backend == 'espeak':
phonemizer = BACKENDS[backend]( language, preserve_punctuation=punctuation, with_stress=stress)
2023-08-02 21:53:35 +00:00
elif backend == 'espeak-mbrola':
phonemizer = BACKENDS[backend]( language )
else:
phonemizer = BACKENDS[backend]( language, preserve_punctuation=punctuation )
2023-08-02 21:53:35 +00:00
cached_backends[key] = phonemizer
return phonemizer
def encode(text: str, language="en-us", backend="auto", punctuation=True, stress=True, strip=True) -> list[str]:
2023-08-02 21:53:35 +00:00
if language == "en":
language = "en-us"
# Convert to kana because espeak does not like kanji...
if language[:2] == "ja" and backend == "auto":
text = romanize( text )
if not backend or backend == "auto":
backend = "espeak" # if language[:2] != "en" else "festival"
2023-08-02 21:53:35 +00:00
text = [ text ]
backend = _get_backend(language=language, backend=backend, stress=stress, strip=strip, punctuation=punctuation)
2023-08-02 21:53:35 +00:00
if backend is not None:
tokens = backend.phonemize( text, strip=strip )
2023-08-02 21:53:35 +00:00
else:
tokens = phonemize( text, language=language, strip=strip, preserve_punctuation=punctuation, with_stress=stress )
2023-08-02 21:53:35 +00:00
if not len(tokens):
tokens = []
else:
tokens = list(tokens[0])
2023-08-02 21:53:35 +00:00
return tokens