diff --git a/utils/__init__.py b/tortoise/__init__.py similarity index 100% rename from utils/__init__.py rename to tortoise/__init__.py diff --git a/api.py b/tortoise/api.py similarity index 97% rename from api.py rename to tortoise/api.py index 92e82be..8377b35 100644 --- a/api.py +++ b/tortoise/api.py @@ -1,4 +1,3 @@ -import argparse import os import random from urllib import request @@ -8,19 +7,18 @@ import torch.nn.functional as F import progressbar import torchaudio -from models.classifier import AudioMiniEncoderWithClassifierHead -from models.cvvp import CVVP -from models.diffusion_decoder import DiffusionTts -from models.autoregressive import UnifiedVoice +from tortoise.models.classifier import AudioMiniEncoderWithClassifierHead +from tortoise.models.cvvp import CVVP +from tortoise.models.diffusion_decoder import DiffusionTts +from tortoise.models.autoregressive import UnifiedVoice from tqdm import tqdm -from models.arch_util import TorchMelSpectrogram -from models.clvp import CLVP -from models.vocoder import UnivNetGenerator -from utils.audio import load_audio, wav_to_univnet_mel, denormalize_tacotron_mel -from utils.diffusion import SpacedDiffusion, space_timesteps, get_named_beta_schedule -from utils.tokenizer import VoiceBpeTokenizer, lev_distance - +from tortoise.models.arch_util import TorchMelSpectrogram +from tortoise.models.clvp import CLVP +from tortoise.models.vocoder import UnivNetGenerator +from tortoise.utils.audio import wav_to_univnet_mel, denormalize_tacotron_mel +from tortoise.utils.diffusion import SpacedDiffusion, space_timesteps, get_named_beta_schedule +from tortoise.utils.tokenizer import VoiceBpeTokenizer pbar = None diff --git a/do_tts.py b/tortoise/do_tts.py similarity index 96% rename from do_tts.py rename to tortoise/do_tts.py index fa0347e..12e1356 100644 --- a/do_tts.py +++ b/tortoise/do_tts.py @@ -4,7 +4,7 @@ import os import torchaudio from api import TextToSpeech -from utils.audio import load_audio, get_voices +from tortoise.utils.audio import load_audio, get_voices if __name__ == '__main__': parser = argparse.ArgumentParser() diff --git a/eval_multiple.py b/tortoise/eval_multiple.py similarity index 96% rename from eval_multiple.py rename to tortoise/eval_multiple.py index 9defa52..baaaaed 100644 --- a/eval_multiple.py +++ b/tortoise/eval_multiple.py @@ -3,7 +3,7 @@ import os import torchaudio from api import TextToSpeech -from utils.audio import load_audio +from tortoise.utils.audio import load_audio if __name__ == '__main__': fname = 'Y:\\clips\\books2\\subset512-oco.tsv' diff --git a/is_this_from_tortoise.py b/tortoise/is_this_from_tortoise.py similarity index 91% rename from is_this_from_tortoise.py rename to tortoise/is_this_from_tortoise.py index 550b33e..4bd6dbe 100644 --- a/is_this_from_tortoise.py +++ b/tortoise/is_this_from_tortoise.py @@ -1,7 +1,7 @@ import argparse from api import classify_audio_clip -from utils.audio import load_audio +from tortoise.utils.audio import load_audio if __name__ == '__main__': parser = argparse.ArgumentParser() diff --git a/models/arch_util.py b/tortoise/models/arch_util.py similarity index 99% rename from models/arch_util.py rename to tortoise/models/arch_util.py index 832315c..3a004af 100644 --- a/models/arch_util.py +++ b/tortoise/models/arch_util.py @@ -5,7 +5,7 @@ import torch import torch.nn as nn import torch.nn.functional as F import torchaudio -from models.xtransformers import ContinuousTransformerWrapper, RelativePositionBias +from tortoise.models.xtransformers import ContinuousTransformerWrapper, RelativePositionBias def zero_module(module): diff --git a/models/autoregressive.py b/tortoise/models/autoregressive.py similarity index 99% rename from models/autoregressive.py rename to tortoise/models/autoregressive.py index 6a91748..28ed39b 100644 --- a/models/autoregressive.py +++ b/tortoise/models/autoregressive.py @@ -6,8 +6,8 @@ import torch.nn.functional as F from transformers import GPT2Config, GPT2PreTrainedModel, LogitsProcessorList from transformers.modeling_outputs import CausalLMOutputWithCrossAttentions from transformers.utils.model_parallel_utils import get_device_map, assert_device_map -from models.arch_util import AttentionBlock -from utils.typical_sampling import TypicalLogitsWarper +from tortoise.models.arch_util import AttentionBlock +from tortoise.utils.typical_sampling import TypicalLogitsWarper def null_position_embeddings(range, dim): diff --git a/models/classifier.py b/tortoise/models/classifier.py similarity index 97% rename from models/classifier.py rename to tortoise/models/classifier.py index c899773..ce574ea 100644 --- a/models/classifier.py +++ b/tortoise/models/classifier.py @@ -1,9 +1,8 @@ import torch import torch.nn as nn -import torch.nn.functional as F from torch.utils.checkpoint import checkpoint -from models.arch_util import Upsample, Downsample, normalization, zero_module, AttentionBlock +from tortoise.models.arch_util import Upsample, Downsample, normalization, zero_module, AttentionBlock class ResBlock(nn.Module): diff --git a/models/clvp.py b/tortoise/models/clvp.py similarity index 97% rename from models/clvp.py rename to tortoise/models/clvp.py index 1eec06a..00f5011 100644 --- a/models/clvp.py +++ b/tortoise/models/clvp.py @@ -3,9 +3,9 @@ import torch.nn as nn import torch.nn.functional as F from torch import einsum -from models.arch_util import CheckpointedXTransformerEncoder -from models.transformer import Transformer -from models.xtransformers import Encoder +from tortoise.models.arch_util import CheckpointedXTransformerEncoder +from tortoise.models.transformer import Transformer +from tortoise.models.xtransformers import Encoder def exists(val): diff --git a/models/cvvp.py b/tortoise/models/cvvp.py similarity index 97% rename from models/cvvp.py rename to tortoise/models/cvvp.py index 0c9fd35..d094649 100644 --- a/models/cvvp.py +++ b/tortoise/models/cvvp.py @@ -4,8 +4,8 @@ import torch.nn.functional as F from torch import einsum from torch.utils.checkpoint import checkpoint -from models.arch_util import AttentionBlock -from models.xtransformers import ContinuousTransformerWrapper, Encoder +from tortoise.models.arch_util import AttentionBlock +from tortoise.models.xtransformers import ContinuousTransformerWrapper, Encoder def exists(val): diff --git a/models/diffusion_decoder.py b/tortoise/models/diffusion_decoder.py similarity index 99% rename from models/diffusion_decoder.py rename to tortoise/models/diffusion_decoder.py index 5fdf7ad..b779324 100644 --- a/models/diffusion_decoder.py +++ b/tortoise/models/diffusion_decoder.py @@ -7,7 +7,7 @@ import torch.nn as nn import torch.nn.functional as F from torch import autocast -from models.arch_util import normalization, AttentionBlock +from tortoise.models.arch_util import normalization, AttentionBlock def is_latent(t): diff --git a/models/transformer.py b/tortoise/models/transformer.py similarity index 100% rename from models/transformer.py rename to tortoise/models/transformer.py diff --git a/models/vocoder.py b/tortoise/models/vocoder.py similarity index 100% rename from models/vocoder.py rename to tortoise/models/vocoder.py diff --git a/models/xtransformers.py b/tortoise/models/xtransformers.py similarity index 100% rename from models/xtransformers.py rename to tortoise/models/xtransformers.py diff --git a/read.py b/tortoise/read.py similarity index 95% rename from read.py rename to tortoise/read.py index 9e4e04c..bd9ea11 100644 --- a/read.py +++ b/tortoise/read.py @@ -2,12 +2,10 @@ import argparse import os import torch -import torch.nn.functional as F import torchaudio -from api import TextToSpeech, format_conditioning -from utils.audio import load_audio, get_voices -from utils.tokenizer import VoiceBpeTokenizer +from api import TextToSpeech +from tortoise.utils.audio import load_audio, get_voices def split_and_recombine_text(texts, desired_length=200, max_len=300): diff --git a/samples_generator.py b/tortoise/samples_generator.py similarity index 95% rename from samples_generator.py rename to tortoise/samples_generator.py index e2b36d3..937ba2f 100644 --- a/samples_generator.py +++ b/tortoise/samples_generator.py @@ -4,7 +4,7 @@ import os if __name__ == '__main__': result = "These words were never spoken.

Handpicked results

" - for fv in os.listdir('results/favorites'): + for fv in os.listdir('../results/favorites'): url = f'https://github.com/neonbjb/tortoise-tts/raw/main/results/favorites/{fv}' result = result + f'
\n' @@ -30,7 +30,7 @@ if __name__ == '__main__': line = line + f'' line = line + "" lines.append(line) - for txt in os.listdir('results/various/'): + for txt in os.listdir('../results/various/'): if 'desktop' in txt: continue line = f'{txt}' @@ -42,7 +42,7 @@ if __name__ == '__main__': result = result + '\n'.join(lines) + "" result = result + "

Longform result for all voices:

" - for lf in os.listdir('results/riding_hood'): + for lf in os.listdir('../results/riding_hood'): url = f'https://github.com/neonbjb/tortoise-tts/raw/main/results/riding_hood/{lf}' result = result + f'
\n' diff --git a/sweep.py b/tortoise/sweep.py similarity index 98% rename from sweep.py rename to tortoise/sweep.py index bc72fec..cfe0de9 100644 --- a/sweep.py +++ b/tortoise/sweep.py @@ -4,7 +4,7 @@ from random import shuffle import torchaudio from api import TextToSpeech -from utils.audio import load_audio +from tortoise.utils.audio import load_audio def permutations(args): diff --git a/tortoise/utils/__init__.py b/tortoise/utils/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/utils/audio.py b/tortoise/utils/audio.py similarity index 99% rename from utils/audio.py rename to tortoise/utils/audio.py index cb86566..3fe558a 100644 --- a/utils/audio.py +++ b/tortoise/utils/audio.py @@ -6,7 +6,7 @@ import torchaudio import numpy as np from scipy.io.wavfile import read -from utils.stft import STFT +from tortoise.utils.stft import STFT def load_wav_to_torch(full_path): diff --git a/utils/diffusion.py b/tortoise/utils/diffusion.py similarity index 100% rename from utils/diffusion.py rename to tortoise/utils/diffusion.py diff --git a/utils/stft.py b/tortoise/utils/stft.py similarity index 100% rename from utils/stft.py rename to tortoise/utils/stft.py diff --git a/utils/tokenizer.py b/tortoise/utils/tokenizer.py similarity index 100% rename from utils/tokenizer.py rename to tortoise/utils/tokenizer.py diff --git a/utils/typical_sampling.py b/tortoise/utils/typical_sampling.py similarity index 100% rename from utils/typical_sampling.py rename to tortoise/utils/typical_sampling.py