actually pass language into dataset process script, fix coercing japanese into hiragana because espeak does not like kanji
This commit is contained in:
parent
3e5ca3a201
commit
ad024f400f
|
@ -185,7 +185,7 @@ for dataset_name in sorted(os.listdir(f'./{input_audio}/')):
|
|||
try:
|
||||
outpath, text, language, waveform, sample_rate = job
|
||||
|
||||
phones = valle_phonemize(text)
|
||||
phones = valle_phonemize( text, language=language )
|
||||
qnt = valle_quantize(waveform, sr=sample_rate, device=device)
|
||||
|
||||
if cfg.audio_backend == "dac":
|
||||
|
|
|
@ -25,7 +25,7 @@ def main():
|
|||
parser.add_argument("--input-prompt-length", type=float, default=3.0)
|
||||
|
||||
parser.add_argument("--top-p", type=float, default=1.0)
|
||||
parser.add_argument("--top-k", type=int, default=16)
|
||||
parser.add_argument("--top-k", type=int, default=0)
|
||||
parser.add_argument("--repetition-penalty", type=float, default=1.0)
|
||||
parser.add_argument("--repetition-penalty-decay", type=float, default=0.0)
|
||||
parser.add_argument("--length-penalty", type=float, default=0.0)
|
||||
|
|
|
@ -56,7 +56,7 @@ def main():
|
|||
parser.add_argument("--input-prompt-length", type=float, default=0.0)
|
||||
|
||||
parser.add_argument("--top-p", type=float, default=1.0)
|
||||
parser.add_argument("--top-k", type=int, default=16)
|
||||
parser.add_argument("--top-k", type=int, default=0)
|
||||
parser.add_argument("--repetition-penalty", type=float, default=1.0)
|
||||
parser.add_argument("--repetition-penalty-decay", type=float, default=0.0)
|
||||
parser.add_argument("--length-penalty", type=float, default=0.0)
|
||||
|
@ -108,6 +108,8 @@ def main():
|
|||
|
||||
# pull from dataset samples
|
||||
if args.sample_from_dataset:
|
||||
cfg.dataset.cache = False
|
||||
|
||||
samples_dirs["dataset"] = args.demo_dir / "dataset"
|
||||
|
||||
print("Loading dataloader...")
|
||||
|
@ -157,13 +159,14 @@ def main():
|
|||
text = open(dir / "prompt.txt").read()
|
||||
language = open(dir / "language.txt").read() if (dir / "language.txt").exists() else "en"
|
||||
prompt = dir / "prompt.wav"
|
||||
reference = dir / "reference.wav"
|
||||
out_path = dir / "out" / "ours.wav"
|
||||
|
||||
extra_sources = [ dir / "out" / f"{source}.wav" for source in sources ] if k == "librispeech" else []
|
||||
|
||||
samples.append((
|
||||
text,
|
||||
[ prompt, dir / "reference.wav", out_path ] + extra_sources
|
||||
[ prompt, reference, out_path ] + extra_sources
|
||||
))
|
||||
|
||||
if args.skip_existing and out_path.exists():
|
||||
|
|
|
@ -10,12 +10,22 @@ from phonemizer.backend import BACKENDS
|
|||
|
||||
from tqdm import tqdm
|
||||
|
||||
try:
|
||||
import pykakasi
|
||||
except Exception as e:
|
||||
pass
|
||||
|
||||
@cache
|
||||
def _get_graphs(path):
|
||||
with open(path, "r") as f:
|
||||
graphs = f.read()
|
||||
return graphs
|
||||
|
||||
def romanize( runes, sep="" ):
|
||||
kks = pykakasi.kakasi()
|
||||
result = kks.convert( runes )
|
||||
return sep.join([ res['hira'] for res in result ])
|
||||
|
||||
cached_backends = {}
|
||||
def _get_backend( language="en-us", backend="espeak" ):
|
||||
key = f'{language}_{backend}'
|
||||
|
@ -37,6 +47,10 @@ def encode(text: str, language="en-us", backend="auto") -> list[str]:
|
|||
if language == "en":
|
||||
language = "en-us"
|
||||
|
||||
# Convert to kana because espeak does not like kanji...
|
||||
if language[:2] == "ja" and backend == "auto":
|
||||
text = romanize( text )
|
||||
|
||||
if not backend or backend == "auto":
|
||||
backend = "espeak" # if language[:2] != "en" else "festival"
|
||||
|
||||
|
@ -48,14 +62,7 @@ def encode(text: str, language="en-us", backend="auto") -> list[str]:
|
|||
else:
|
||||
tokens = phonemize( text, language=language, strip=True, preserve_punctuation=True, with_stress=True )
|
||||
|
||||
|
||||
tokens = list(tokens[0])
|
||||
return tokens
|
||||
"""
|
||||
tokenized = " ".join( tokens )
|
||||
|
||||
merges = [ "\u02C8", "\u02CC", "\u02D0" ]
|
||||
for merge in merges:
|
||||
tokenized = tokenized.replace( f' {merge}', merge )
|
||||
|
||||
return tokenized.split(" ")
|
||||
"""
|
||||
return tokens
|
Loading…
Reference in New Issue
Block a user