diff --git a/tortoise/models/arch_util.py b/tortoise/models/arch_util.py index 5d8c36e..6a79194 100644 --- a/tortoise/models/arch_util.py +++ b/tortoise/models/arch_util.py @@ -1,3 +1,4 @@ +import os import functools import math @@ -288,9 +289,12 @@ class AudioMiniEncoder(nn.Module): return h[:, :, 0] +DEFAULT_MEL_NORM_FILE = os.path.join(os.path.dirname(os.path.realpath(__file__)), '../data/mel_norms.pth') + + class TorchMelSpectrogram(nn.Module): def __init__(self, filter_length=1024, hop_length=256, win_length=1024, n_mel_channels=80, mel_fmin=0, mel_fmax=8000, - sampling_rate=22050, normalize=False, mel_norm_file='tortoise/data/mel_norms.pth'): + sampling_rate=22050, normalize=False, mel_norm_file=DEFAULT_MEL_NORM_FILE): super().__init__() # These are the default tacotron values for the MEL spectrogram. self.filter_length = filter_length diff --git a/tortoise/utils/audio.py b/tortoise/utils/audio.py index 7d5390c..b125258 100644 --- a/tortoise/utils/audio.py +++ b/tortoise/utils/audio.py @@ -10,6 +10,9 @@ from scipy.io.wavfile import read from tortoise.utils.stft import STFT +BUILTIN_VOICES_DIR = os.path.join(os.path.dirname(os.path.realpath(__file__)), '../voices') + + def load_wav_to_torch(full_path): sampling_rate, data = read(full_path) if data.dtype == np.int32: @@ -83,7 +86,7 @@ def dynamic_range_decompression(x, C=1): def get_voices(extra_voice_dirs=[]): - dirs = ['tortoise/voices'] + extra_voice_dirs + dirs = [BUILTIN_VOICES_DIR] + extra_voice_dirs voices = {} for d in dirs: subs = os.listdir(d) diff --git a/tortoise/utils/tokenizer.py b/tortoise/utils/tokenizer.py index a8959d8..3ab1c31 100644 --- a/tortoise/utils/tokenizer.py +++ b/tortoise/utils/tokenizer.py @@ -1,3 +1,4 @@ +import os import re import inflect @@ -165,8 +166,11 @@ def lev_distance(s1, s2): return distances[-1] +DEFAULT_VOCAB_FILE = os.path.join(os.path.dirname(os.path.realpath(__file__)), '../data/tokenizer.json') + + class VoiceBpeTokenizer: - def __init__(self, vocab_file='tortoise/data/tokenizer.json'): + def __init__(self, vocab_file=DEFAULT_VOCAB_FILE): if vocab_file is not None: self.tokenizer = Tokenizer.from_file(vocab_file)