diff --git a/.gitignore b/.gitignore index d632a14..82504f8 100644 --- a/.gitignore +++ b/.gitignore @@ -129,7 +129,6 @@ dmypy.json .pyre/ .idea/* -tortoise/.models/* -tortoise/random_voices/* +.models/* .custom/* results/* \ No newline at end of file diff --git a/tortoise/models/arch_util.py b/tortoise/models/arch_util.py index 3a004af..5d8c36e 100644 --- a/tortoise/models/arch_util.py +++ b/tortoise/models/arch_util.py @@ -290,7 +290,7 @@ class AudioMiniEncoder(nn.Module): class TorchMelSpectrogram(nn.Module): def __init__(self, filter_length=1024, hop_length=256, win_length=1024, n_mel_channels=80, mel_fmin=0, mel_fmax=8000, - sampling_rate=22050, normalize=False, mel_norm_file='data/mel_norms.pth'): + sampling_rate=22050, normalize=False, mel_norm_file='tortoise/data/mel_norms.pth'): super().__init__() # These are the default tacotron values for the MEL spectrogram. self.filter_length = filter_length diff --git a/tortoise/read.py b/tortoise/read.py index b5d6efe..5212e3b 100644 --- a/tortoise/read.py +++ b/tortoise/read.py @@ -28,7 +28,7 @@ def split_and_recombine_text(texts, desired_length=200, max_len=300): if __name__ == '__main__': parser = argparse.ArgumentParser() - parser.add_argument('--textfile', type=str, help='A file containing the text to read.', default="data/riding_hood.txt") + parser.add_argument('--textfile', type=str, help='A file containing the text to read.', default="tortoise/data/riding_hood.txt") parser.add_argument('--voice', type=str, help='Selects the voice to use for generation. See options in voices/ directory (and add your own!) ' 'Use the & character to join two voices together. Use a comma to perform inference on multiple voices.', default='pat') parser.add_argument('--output_path', type=str, help='Where to store outputs.', default='../results/longform/') diff --git a/tortoise/utils/audio.py b/tortoise/utils/audio.py index c76f7a6..d857da9 100644 --- a/tortoise/utils/audio.py +++ b/tortoise/utils/audio.py @@ -82,10 +82,10 @@ def dynamic_range_decompression(x, C=1): def get_voices(): - subs = os.listdir('voices') + subs = os.listdir('tortoise/voices') voices = {} for sub in subs: - subj = os.path.join('voices', sub) + subj = os.path.join('tortoise/voices', sub) if os.path.isdir(subj): voices[sub] = list(glob(f'{subj}/*.wav')) + list(glob(f'{subj}/*.mp3')) + list(glob(f'{subj}/*.pth')) return voices diff --git a/tortoise/utils/tokenizer.py b/tortoise/utils/tokenizer.py index ed7e4cd..2f36a06 100644 --- a/tortoise/utils/tokenizer.py +++ b/tortoise/utils/tokenizer.py @@ -164,7 +164,7 @@ def lev_distance(s1, s2): return distances[-1] class VoiceBpeTokenizer: - def __init__(self, vocab_file='data/tokenizer.json'): + def __init__(self, vocab_file='tortoise/data/tokenizer.json'): if vocab_file is not None: self.tokenizer = Tokenizer.from_file(vocab_file)