forked from mrq/ai-voice-cloning
forgot to separate phonemes by spaces for [redacted]
This commit is contained in:
parent
d4c50967a6
commit
1b72d0bba0
|
@ -106,22 +106,24 @@
|
||||||
"ɜ": 61,
|
"ɜ": 61,
|
||||||
"ᵻ": 62,
|
"ᵻ": 62,
|
||||||
"ɾ": 63,
|
"ɾ": 63,
|
||||||
"n̩": 64,
|
"n\u0329": 64,
|
||||||
"ː": 65,
|
"ː": 65,
|
||||||
"ˈ": 66,
|
"ˈ": 66,
|
||||||
"d͡ʒ": 67,
|
"ˌ": 67,
|
||||||
"aɪ": 68,
|
"ʔ": 68,
|
||||||
"aʊ": 69,
|
"d͡ʒ": 69,
|
||||||
"eɪ": 70,
|
"aɪ": 70,
|
||||||
"oʊ": 71,
|
"aʊ": 71,
|
||||||
"t͡ʃ": 72,
|
"eɪ": 72,
|
||||||
"ɔɪ": 73,
|
"oʊ": 73,
|
||||||
"ɔː": 74,
|
"t͡ʃ": 74,
|
||||||
"uː": 75,
|
"ɔɪ": 75,
|
||||||
"iː": 76,
|
"ɔː": 76,
|
||||||
"ɑː": 77,
|
"uː": 77,
|
||||||
"oː": 78,
|
"iː": 78,
|
||||||
"ɜː": 79
|
"ɑː": 79,
|
||||||
|
"oː": 80,
|
||||||
|
"ɜː": 81
|
||||||
},
|
},
|
||||||
"merges": [
|
"merges": [
|
||||||
"a ɪ",
|
"a ɪ",
|
||||||
|
|
42
src/utils.py
42
src/utils.py
|
@ -1372,12 +1372,20 @@ def prepare_dataset( voice, use_segments=False, text_length=0, audio_length=0, p
|
||||||
|
|
||||||
# implicitly segment
|
# implicitly segment
|
||||||
if use_segment and not use_segments:
|
if use_segment and not use_segments:
|
||||||
tmp = {}
|
exists = True
|
||||||
tmp[filename] = result
|
for segment in result['segments']:
|
||||||
print(f"Audio not segmented, segmenting: {filename}")
|
if os.path.exists(filename.replace(".wav", f"_{pad(segment['id'], 4)}.wav")):
|
||||||
message = slice_dataset( voice, results=tmp )
|
continue
|
||||||
print(message)
|
exists = False
|
||||||
messages = messages + message.split("\n")
|
break
|
||||||
|
|
||||||
|
if not exists:
|
||||||
|
tmp = {}
|
||||||
|
tmp[filename] = result
|
||||||
|
print(f"Audio not segmented, segmenting: {filename}")
|
||||||
|
message = slice_dataset( voice, results=tmp )
|
||||||
|
print(message)
|
||||||
|
messages = messages + message.split("\n")
|
||||||
|
|
||||||
if not use_segment:
|
if not use_segment:
|
||||||
segments[filename] = {
|
segments[filename] = {
|
||||||
|
@ -1444,10 +1452,11 @@ def prepare_dataset( voice, use_segments=False, text_length=0, audio_length=0, p
|
||||||
# from vall_e.emb.g2p import encode as phonemize
|
# from vall_e.emb.g2p import encode as phonemize
|
||||||
|
|
||||||
quantized = quantize( waveform, sample_rate ).cpu()
|
quantized = quantize( waveform, sample_rate ).cpu()
|
||||||
|
torch.save(quantized, f'{indir}/valle/{file.replace(".wav",".qnt.pt")}')
|
||||||
print("Quantized:", file)
|
print("Quantized:", file)
|
||||||
|
|
||||||
torch.save(quantized, f'{indir}/valle/{file.replace(".wav",".qnt.pt")}')
|
tokens = tokenize_text(text, stringed=False, skip_specials=True)
|
||||||
open(f'{indir}/valle/{file.replace(".wav",".phn.txt")}', 'w', encoding='utf-8').write(text)
|
open(f'{indir}/valle/{file.replace(".wav",".phn.txt")}', 'w', encoding='utf-8').write(" ".join( tokens ).replace(" \u02C8", "\u02C8"))
|
||||||
|
|
||||||
training_joined = "\n".join(lines['training'])
|
training_joined = "\n".join(lines['training'])
|
||||||
validation_joined = "\n".join(lines['validation'])
|
validation_joined = "\n".join(lines['validation'])
|
||||||
|
@ -1815,18 +1824,21 @@ def get_tokenizer_jsons( dir="./models/tokenizers/" ):
|
||||||
additionals = sorted([ f'{dir}/{d}' for d in os.listdir(dir) if d[-5:] == ".json" ]) if os.path.isdir(dir) else []
|
additionals = sorted([ f'{dir}/{d}' for d in os.listdir(dir) if d[-5:] == ".json" ]) if os.path.isdir(dir) else []
|
||||||
return relative_paths([ "./modules/tortoise-tts/tortoise/data/tokenizer.json" ] + additionals)
|
return relative_paths([ "./modules/tortoise-tts/tortoise/data/tokenizer.json" ] + additionals)
|
||||||
|
|
||||||
def tokenize_text( text ):
|
def tokenize_text( text, stringed=True, skip_specials=False ):
|
||||||
from tortoise.utils.tokenizer import VoiceBpeTokenizer
|
from tortoise.utils.tokenizer import VoiceBpeTokenizer
|
||||||
|
|
||||||
if not tts:
|
if not tts:
|
||||||
if tts_loading:
|
tokenizer = VoiceBpeTokenizer(args.tokenizer_json if args.tokenizer_json else get_tokenizer_jsons()[0])
|
||||||
raise Exception("TTS is still initializing...")
|
else:
|
||||||
load_tts()
|
tts.tokenizer
|
||||||
|
|
||||||
encoded = tts.tokenizer.encode(text)
|
encoded = tokenizer.encode(text)
|
||||||
decoded = tts.tokenizer.tokenizer.decode(encoded, skip_special_tokens=False).split(" ")
|
decoded = tokenizer.tokenizer.decode(encoded, skip_special_tokens=specials).split(" ")
|
||||||
|
|
||||||
return "\n".join([ str(encoded), str(decoded) ])
|
if stringed:
|
||||||
|
return "\n".join([ str(encoded), str(decoded) ])
|
||||||
|
|
||||||
|
return decoded
|
||||||
|
|
||||||
def get_dataset_list(dir="./training/"):
|
def get_dataset_list(dir="./training/"):
|
||||||
return sorted([d for d in os.listdir(dir) if os.path.isdir(os.path.join(dir, d)) and "train.txt" in os.listdir(os.path.join(dir, d)) ])
|
return sorted([d for d in os.listdir(dir) if os.path.isdir(os.path.join(dir, d)) and "train.txt" in os.listdir(os.path.join(dir, d)) ])
|
||||||
|
|
Loading…
Reference in New Issue
Block a user