From 3b19328886c80c7307b50b4f544af9172ed9ad32 Mon Sep 17 00:00:00 2001 From: osanseviero Date: Wed, 27 Apr 2022 15:04:15 +0200 Subject: [PATCH] Package everything --- requirements.txt | 4 +++- setup.py | 22 +++++++++++++++++++ tortoise_tts/__init__.py | 1 + api.py => tortoise_tts/api.py | 20 ++++++++--------- {utils => tortoise_tts/data}/__init__.py | 0 {data => tortoise_tts/data}/riding_hood.txt | 0 {data => tortoise_tts/data}/tokenizer.json | 0 do_tts.py => tortoise_tts/do_tts.py | 0 .../eval_multiple.py | 0 .../is_this_from_tortoise.py | 0 tortoise_tts/models/__init__.py | 0 {models => tortoise_tts/models}/arch_util.py | 2 +- .../models}/autoregressive.py | 4 ++-- {models => tortoise_tts/models}/classifier.py | 2 +- {models => tortoise_tts/models}/clvp.py | 6 ++--- {models => tortoise_tts/models}/cvvp.py | 4 ++-- .../models}/diffusion_decoder.py | 2 +- .../models}/transformer.py | 0 {models => tortoise_tts/models}/vocoder.py | 0 .../models}/xtransformers.py | 0 read.py => tortoise_tts/read.py | 0 .../results}/.gitattributes | 0 tortoise_tts/results/__init__.py | 0 .../results}/various/desktop.ini | 0 .../samples_generator.py | 0 sweep.py => tortoise_tts/sweep.py | 0 tortoise_tts/utils/__init__.py | 4 ++++ {utils => tortoise_tts/utils}/audio.py | 2 +- {utils => tortoise_tts/utils}/diffusion.py | 0 {utils => tortoise_tts/utils}/stft.py | 0 {utils => tortoise_tts/utils}/tokenizer.py | 3 +++ .../utils}/typical_sampling.py | 0 tortoise_tts/voices/__init__.py | 0 33 files changed, 54 insertions(+), 22 deletions(-) create mode 100644 setup.py create mode 100644 tortoise_tts/__init__.py rename api.py => tortoise_tts/api.py (96%) rename {utils => tortoise_tts/data}/__init__.py (100%) rename {data => tortoise_tts/data}/riding_hood.txt (100%) rename {data => tortoise_tts/data}/tokenizer.json (100%) rename do_tts.py => tortoise_tts/do_tts.py (100%) rename eval_multiple.py => tortoise_tts/eval_multiple.py (100%) rename is_this_from_tortoise.py => tortoise_tts/is_this_from_tortoise.py (100%) create mode 100644 tortoise_tts/models/__init__.py rename {models => tortoise_tts/models}/arch_util.py (99%) rename {models => tortoise_tts/models}/autoregressive.py (99%) rename {models => tortoise_tts/models}/classifier.py (97%) rename {models => tortoise_tts/models}/clvp.py (96%) rename {models => tortoise_tts/models}/cvvp.py (97%) rename {models => tortoise_tts/models}/diffusion_decoder.py (99%) rename {models => tortoise_tts/models}/transformer.py (100%) rename {models => tortoise_tts/models}/vocoder.py (100%) rename {models => tortoise_tts/models}/xtransformers.py (100%) rename read.py => tortoise_tts/read.py (100%) rename {results => tortoise_tts/results}/.gitattributes (100%) create mode 100644 tortoise_tts/results/__init__.py rename {results => tortoise_tts/results}/various/desktop.ini (100%) rename samples_generator.py => tortoise_tts/samples_generator.py (100%) rename sweep.py => tortoise_tts/sweep.py (100%) create mode 100644 tortoise_tts/utils/__init__.py rename {utils => tortoise_tts/utils}/audio.py (99%) rename {utils => tortoise_tts/utils}/diffusion.py (100%) rename {utils => tortoise_tts/utils}/stft.py (100%) rename {utils => tortoise_tts/utils}/tokenizer.py (97%) rename {utils => tortoise_tts/utils}/typical_sampling.py (100%) create mode 100644 tortoise_tts/voices/__init__.py diff --git a/requirements.txt b/requirements.txt index b971e61..91503ea 100644 --- a/requirements.txt +++ b/requirements.txt @@ -7,4 +7,6 @@ inflect progressbar einops unidecode -entmax \ No newline at end of file +entmax +scipy +librosa \ No newline at end of file diff --git a/setup.py b/setup.py new file mode 100644 index 0000000..3162754 --- /dev/null +++ b/setup.py @@ -0,0 +1,22 @@ +from setuptools import setup, find_packages + +install_requires = [ + "torch", + "torchaudio", + "rotary_embedding_torch", + "transformers", + "tokenizers", + "inflect", + "progressbar", + "einops", + "unidecode", + "entmax", + "scipy", + "librosa" +] + +setup( + name="tortoise_tts", + packages=['tortoise_tts'], + install_requires=install_requires, +) \ No newline at end of file diff --git a/tortoise_tts/__init__.py b/tortoise_tts/__init__.py new file mode 100644 index 0000000..c9cc7ce --- /dev/null +++ b/tortoise_tts/__init__.py @@ -0,0 +1 @@ +from .api import TextToSpeech diff --git a/api.py b/tortoise_tts/api.py similarity index 96% rename from api.py rename to tortoise_tts/api.py index 6aa94cf..aa1f358 100644 --- a/api.py +++ b/tortoise_tts/api.py @@ -8,18 +8,18 @@ import torch.nn.functional as F import progressbar import torchaudio -from models.classifier import AudioMiniEncoderWithClassifierHead -from models.cvvp import CVVP -from models.diffusion_decoder import DiffusionTts -from models.autoregressive import UnifiedVoice +from tortoise_tts.models.classifier import AudioMiniEncoderWithClassifierHead +from tortoise_tts.models.cvvp import CVVP +from tortoise_tts.models.diffusion_decoder import DiffusionTts +from tortoise_tts.models.autoregressive import UnifiedVoice from tqdm import tqdm -from models.arch_util import TorchMelSpectrogram -from models.clvp import CLVP -from models.vocoder import UnivNetGenerator -from utils.audio import load_audio, wav_to_univnet_mel, denormalize_tacotron_mel -from utils.diffusion import SpacedDiffusion, space_timesteps, get_named_beta_schedule -from utils.tokenizer import VoiceBpeTokenizer, lev_distance +from tortoise_tts.models.arch_util import TorchMelSpectrogram +from tortoise_tts.models.clvp import CLVP +from tortoise_tts.models.vocoder import UnivNetGenerator +from tortoise_tts.utils.audio import load_audio, wav_to_univnet_mel, denormalize_tacotron_mel +from tortoise_tts.utils.diffusion import SpacedDiffusion, space_timesteps, get_named_beta_schedule +from tortoise_tts.utils.tokenizer import VoiceBpeTokenizer, lev_distance pbar = None diff --git a/utils/__init__.py b/tortoise_tts/data/__init__.py similarity index 100% rename from utils/__init__.py rename to tortoise_tts/data/__init__.py diff --git a/data/riding_hood.txt b/tortoise_tts/data/riding_hood.txt similarity index 100% rename from data/riding_hood.txt rename to tortoise_tts/data/riding_hood.txt diff --git a/data/tokenizer.json b/tortoise_tts/data/tokenizer.json similarity index 100% rename from data/tokenizer.json rename to tortoise_tts/data/tokenizer.json diff --git a/do_tts.py b/tortoise_tts/do_tts.py similarity index 100% rename from do_tts.py rename to tortoise_tts/do_tts.py diff --git a/eval_multiple.py b/tortoise_tts/eval_multiple.py similarity index 100% rename from eval_multiple.py rename to tortoise_tts/eval_multiple.py diff --git a/is_this_from_tortoise.py b/tortoise_tts/is_this_from_tortoise.py similarity index 100% rename from is_this_from_tortoise.py rename to tortoise_tts/is_this_from_tortoise.py diff --git a/tortoise_tts/models/__init__.py b/tortoise_tts/models/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/models/arch_util.py b/tortoise_tts/models/arch_util.py similarity index 99% rename from models/arch_util.py rename to tortoise_tts/models/arch_util.py index 832315c..3390153 100644 --- a/models/arch_util.py +++ b/tortoise_tts/models/arch_util.py @@ -5,7 +5,7 @@ import torch import torch.nn as nn import torch.nn.functional as F import torchaudio -from models.xtransformers import ContinuousTransformerWrapper, RelativePositionBias +from tortoise_tts.models.xtransformers import ContinuousTransformerWrapper, RelativePositionBias def zero_module(module): diff --git a/models/autoregressive.py b/tortoise_tts/models/autoregressive.py similarity index 99% rename from models/autoregressive.py rename to tortoise_tts/models/autoregressive.py index 6a91748..aa2393a 100644 --- a/models/autoregressive.py +++ b/tortoise_tts/models/autoregressive.py @@ -6,8 +6,8 @@ import torch.nn.functional as F from transformers import GPT2Config, GPT2PreTrainedModel, LogitsProcessorList from transformers.modeling_outputs import CausalLMOutputWithCrossAttentions from transformers.utils.model_parallel_utils import get_device_map, assert_device_map -from models.arch_util import AttentionBlock -from utils.typical_sampling import TypicalLogitsWarper +from tortoise_tts.models.arch_util import AttentionBlock +from tortoise_tts.utils.typical_sampling import TypicalLogitsWarper def null_position_embeddings(range, dim): diff --git a/models/classifier.py b/tortoise_tts/models/classifier.py similarity index 97% rename from models/classifier.py rename to tortoise_tts/models/classifier.py index c899773..abd0ec8 100644 --- a/models/classifier.py +++ b/tortoise_tts/models/classifier.py @@ -3,7 +3,7 @@ import torch.nn as nn import torch.nn.functional as F from torch.utils.checkpoint import checkpoint -from models.arch_util import Upsample, Downsample, normalization, zero_module, AttentionBlock +from tortoise_tts.models.arch_util import Upsample, Downsample, normalization, zero_module, AttentionBlock class ResBlock(nn.Module): diff --git a/models/clvp.py b/tortoise_tts/models/clvp.py similarity index 96% rename from models/clvp.py rename to tortoise_tts/models/clvp.py index 1eec06a..c054cc6 100644 --- a/models/clvp.py +++ b/tortoise_tts/models/clvp.py @@ -3,9 +3,9 @@ import torch.nn as nn import torch.nn.functional as F from torch import einsum -from models.arch_util import CheckpointedXTransformerEncoder -from models.transformer import Transformer -from models.xtransformers import Encoder +from tortoise_tts.models.arch_util import CheckpointedXTransformerEncoder +from tortoise_tts.models.transformer import Transformer +from tortoise_tts.models.xtransformers import Encoder def exists(val): diff --git a/models/cvvp.py b/tortoise_tts/models/cvvp.py similarity index 97% rename from models/cvvp.py rename to tortoise_tts/models/cvvp.py index 0c9fd35..fe441f9 100644 --- a/models/cvvp.py +++ b/tortoise_tts/models/cvvp.py @@ -4,8 +4,8 @@ import torch.nn.functional as F from torch import einsum from torch.utils.checkpoint import checkpoint -from models.arch_util import AttentionBlock -from models.xtransformers import ContinuousTransformerWrapper, Encoder +from tortoise_tts.models.arch_util import AttentionBlock +from tortoise_tts.models.xtransformers import ContinuousTransformerWrapper, Encoder def exists(val): diff --git a/models/diffusion_decoder.py b/tortoise_tts/models/diffusion_decoder.py similarity index 99% rename from models/diffusion_decoder.py rename to tortoise_tts/models/diffusion_decoder.py index 5fdf7ad..d72315f 100644 --- a/models/diffusion_decoder.py +++ b/tortoise_tts/models/diffusion_decoder.py @@ -7,7 +7,7 @@ import torch.nn as nn import torch.nn.functional as F from torch import autocast -from models.arch_util import normalization, AttentionBlock +from tortoise_tts.models.arch_util import normalization, AttentionBlock def is_latent(t): diff --git a/models/transformer.py b/tortoise_tts/models/transformer.py similarity index 100% rename from models/transformer.py rename to tortoise_tts/models/transformer.py diff --git a/models/vocoder.py b/tortoise_tts/models/vocoder.py similarity index 100% rename from models/vocoder.py rename to tortoise_tts/models/vocoder.py diff --git a/models/xtransformers.py b/tortoise_tts/models/xtransformers.py similarity index 100% rename from models/xtransformers.py rename to tortoise_tts/models/xtransformers.py diff --git a/read.py b/tortoise_tts/read.py similarity index 100% rename from read.py rename to tortoise_tts/read.py diff --git a/results/.gitattributes b/tortoise_tts/results/.gitattributes similarity index 100% rename from results/.gitattributes rename to tortoise_tts/results/.gitattributes diff --git a/tortoise_tts/results/__init__.py b/tortoise_tts/results/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/results/various/desktop.ini b/tortoise_tts/results/various/desktop.ini similarity index 100% rename from results/various/desktop.ini rename to tortoise_tts/results/various/desktop.ini diff --git a/samples_generator.py b/tortoise_tts/samples_generator.py similarity index 100% rename from samples_generator.py rename to tortoise_tts/samples_generator.py diff --git a/sweep.py b/tortoise_tts/sweep.py similarity index 100% rename from sweep.py rename to tortoise_tts/sweep.py diff --git a/tortoise_tts/utils/__init__.py b/tortoise_tts/utils/__init__.py new file mode 100644 index 0000000..fa1f97f --- /dev/null +++ b/tortoise_tts/utils/__init__.py @@ -0,0 +1,4 @@ +from .audio import ( + load_audio, + get_voices +) diff --git a/utils/audio.py b/tortoise_tts/utils/audio.py similarity index 99% rename from utils/audio.py rename to tortoise_tts/utils/audio.py index aad3a0f..e560c45 100644 --- a/utils/audio.py +++ b/tortoise_tts/utils/audio.py @@ -6,7 +6,7 @@ import torchaudio import numpy as np from scipy.io.wavfile import read -from utils.stft import STFT +from tortoise_tts.utils.stft import STFT def load_wav_to_torch(full_path): diff --git a/utils/diffusion.py b/tortoise_tts/utils/diffusion.py similarity index 100% rename from utils/diffusion.py rename to tortoise_tts/utils/diffusion.py diff --git a/utils/stft.py b/tortoise_tts/utils/stft.py similarity index 100% rename from utils/stft.py rename to tortoise_tts/utils/stft.py diff --git a/utils/tokenizer.py b/tortoise_tts/utils/tokenizer.py similarity index 97% rename from utils/tokenizer.py rename to tortoise_tts/utils/tokenizer.py index ed7e4cd..f018abd 100644 --- a/utils/tokenizer.py +++ b/tortoise_tts/utils/tokenizer.py @@ -3,6 +3,7 @@ import re import inflect import torch from tokenizers import Tokenizer +from pathlib import Path # Regular expression matching whitespace: @@ -165,6 +166,8 @@ def lev_distance(s1, s2): class VoiceBpeTokenizer: def __init__(self, vocab_file='data/tokenizer.json'): + vocab_file = str(Path(__file__).parent.parent / Path(vocab_file)) + print(vocab_file) if vocab_file is not None: self.tokenizer = Tokenizer.from_file(vocab_file) diff --git a/utils/typical_sampling.py b/tortoise_tts/utils/typical_sampling.py similarity index 100% rename from utils/typical_sampling.py rename to tortoise_tts/utils/typical_sampling.py diff --git a/tortoise_tts/voices/__init__.py b/tortoise_tts/voices/__init__.py new file mode 100644 index 0000000..e69de29