diff --git a/tortoise_tts/api.py b/api.py similarity index 96% rename from tortoise_tts/api.py rename to api.py index aa1f358..6aa94cf 100644 --- a/tortoise_tts/api.py +++ b/api.py @@ -8,18 +8,18 @@ import torch.nn.functional as F import progressbar import torchaudio -from tortoise_tts.models.classifier import AudioMiniEncoderWithClassifierHead -from tortoise_tts.models.cvvp import CVVP -from tortoise_tts.models.diffusion_decoder import DiffusionTts -from tortoise_tts.models.autoregressive import UnifiedVoice +from models.classifier import AudioMiniEncoderWithClassifierHead +from models.cvvp import CVVP +from models.diffusion_decoder import DiffusionTts +from models.autoregressive import UnifiedVoice from tqdm import tqdm -from tortoise_tts.models.arch_util import TorchMelSpectrogram -from tortoise_tts.models.clvp import CLVP -from tortoise_tts.models.vocoder import UnivNetGenerator -from tortoise_tts.utils.audio import load_audio, wav_to_univnet_mel, denormalize_tacotron_mel -from tortoise_tts.utils.diffusion import SpacedDiffusion, space_timesteps, get_named_beta_schedule -from tortoise_tts.utils.tokenizer import VoiceBpeTokenizer, lev_distance +from models.arch_util import TorchMelSpectrogram +from models.clvp import CLVP +from models.vocoder import UnivNetGenerator +from utils.audio import load_audio, wav_to_univnet_mel, denormalize_tacotron_mel +from utils.diffusion import SpacedDiffusion, space_timesteps, get_named_beta_schedule +from utils.tokenizer import VoiceBpeTokenizer, lev_distance pbar = None diff --git a/tortoise_tts/data/riding_hood.txt b/data/riding_hood.txt similarity index 100% rename from tortoise_tts/data/riding_hood.txt rename to data/riding_hood.txt diff --git a/tortoise_tts/data/tokenizer.json b/data/tokenizer.json similarity index 100% rename from tortoise_tts/data/tokenizer.json rename to data/tokenizer.json diff --git a/tortoise_tts/do_tts.py b/do_tts.py similarity index 100% rename from tortoise_tts/do_tts.py rename to do_tts.py diff --git a/tortoise_tts/eval_multiple.py b/eval_multiple.py similarity index 100% rename from tortoise_tts/eval_multiple.py rename to eval_multiple.py diff --git a/tortoise_tts/is_this_from_tortoise.py b/is_this_from_tortoise.py similarity index 100% rename from tortoise_tts/is_this_from_tortoise.py rename to is_this_from_tortoise.py diff --git a/tortoise_tts/models/arch_util.py b/models/arch_util.py similarity index 99% rename from tortoise_tts/models/arch_util.py rename to models/arch_util.py index 3390153..832315c 100644 --- a/tortoise_tts/models/arch_util.py +++ b/models/arch_util.py @@ -5,7 +5,7 @@ import torch import torch.nn as nn import torch.nn.functional as F import torchaudio -from tortoise_tts.models.xtransformers import ContinuousTransformerWrapper, RelativePositionBias +from models.xtransformers import ContinuousTransformerWrapper, RelativePositionBias def zero_module(module): diff --git a/tortoise_tts/models/autoregressive.py b/models/autoregressive.py similarity index 99% rename from tortoise_tts/models/autoregressive.py rename to models/autoregressive.py index aa2393a..6a91748 100644 --- a/tortoise_tts/models/autoregressive.py +++ b/models/autoregressive.py @@ -6,8 +6,8 @@ import torch.nn.functional as F from transformers import GPT2Config, GPT2PreTrainedModel, LogitsProcessorList from transformers.modeling_outputs import CausalLMOutputWithCrossAttentions from transformers.utils.model_parallel_utils import get_device_map, assert_device_map -from tortoise_tts.models.arch_util import AttentionBlock -from tortoise_tts.utils.typical_sampling import TypicalLogitsWarper +from models.arch_util import AttentionBlock +from utils.typical_sampling import TypicalLogitsWarper def null_position_embeddings(range, dim): diff --git a/tortoise_tts/models/classifier.py b/models/classifier.py similarity index 97% rename from tortoise_tts/models/classifier.py rename to models/classifier.py index abd0ec8..c899773 100644 --- a/tortoise_tts/models/classifier.py +++ b/models/classifier.py @@ -3,7 +3,7 @@ import torch.nn as nn import torch.nn.functional as F from torch.utils.checkpoint import checkpoint -from tortoise_tts.models.arch_util import Upsample, Downsample, normalization, zero_module, AttentionBlock +from models.arch_util import Upsample, Downsample, normalization, zero_module, AttentionBlock class ResBlock(nn.Module): diff --git a/tortoise_tts/models/clvp.py b/models/clvp.py similarity index 96% rename from tortoise_tts/models/clvp.py rename to models/clvp.py index c054cc6..1eec06a 100644 --- a/tortoise_tts/models/clvp.py +++ b/models/clvp.py @@ -3,9 +3,9 @@ import torch.nn as nn import torch.nn.functional as F from torch import einsum -from tortoise_tts.models.arch_util import CheckpointedXTransformerEncoder -from tortoise_tts.models.transformer import Transformer -from tortoise_tts.models.xtransformers import Encoder +from models.arch_util import CheckpointedXTransformerEncoder +from models.transformer import Transformer +from models.xtransformers import Encoder def exists(val): diff --git a/tortoise_tts/models/cvvp.py b/models/cvvp.py similarity index 97% rename from tortoise_tts/models/cvvp.py rename to models/cvvp.py index fe441f9..0c9fd35 100644 --- a/tortoise_tts/models/cvvp.py +++ b/models/cvvp.py @@ -4,8 +4,8 @@ import torch.nn.functional as F from torch import einsum from torch.utils.checkpoint import checkpoint -from tortoise_tts.models.arch_util import AttentionBlock -from tortoise_tts.models.xtransformers import ContinuousTransformerWrapper, Encoder +from models.arch_util import AttentionBlock +from models.xtransformers import ContinuousTransformerWrapper, Encoder def exists(val): diff --git a/tortoise_tts/models/diffusion_decoder.py b/models/diffusion_decoder.py similarity index 99% rename from tortoise_tts/models/diffusion_decoder.py rename to models/diffusion_decoder.py index d72315f..5fdf7ad 100644 --- a/tortoise_tts/models/diffusion_decoder.py +++ b/models/diffusion_decoder.py @@ -7,7 +7,7 @@ import torch.nn as nn import torch.nn.functional as F from torch import autocast -from tortoise_tts.models.arch_util import normalization, AttentionBlock +from models.arch_util import normalization, AttentionBlock def is_latent(t): diff --git a/tortoise_tts/models/transformer.py b/models/transformer.py similarity index 100% rename from tortoise_tts/models/transformer.py rename to models/transformer.py diff --git a/tortoise_tts/models/vocoder.py b/models/vocoder.py similarity index 100% rename from tortoise_tts/models/vocoder.py rename to models/vocoder.py diff --git a/tortoise_tts/models/xtransformers.py b/models/xtransformers.py similarity index 100% rename from tortoise_tts/models/xtransformers.py rename to models/xtransformers.py diff --git a/tortoise_tts/read.py b/read.py similarity index 100% rename from tortoise_tts/read.py rename to read.py diff --git a/requirements.txt b/requirements.txt index 91503ea..b971e61 100644 --- a/requirements.txt +++ b/requirements.txt @@ -7,6 +7,4 @@ inflect progressbar einops unidecode -entmax -scipy -librosa \ No newline at end of file +entmax \ No newline at end of file diff --git a/tortoise_tts/results/various/desktop.ini b/results/various/desktop.ini similarity index 100% rename from tortoise_tts/results/various/desktop.ini rename to results/various/desktop.ini diff --git a/tortoise_tts/samples_generator.py b/samples_generator.py similarity index 100% rename from tortoise_tts/samples_generator.py rename to samples_generator.py diff --git a/tortoise_tts/sweep.py b/sweep.py similarity index 100% rename from tortoise_tts/sweep.py rename to sweep.py diff --git a/tortoise_tts/results/.gitattributes b/tortoise_tts/results/.gitattributes deleted file mode 100644 index e935e39..0000000 --- a/tortoise_tts/results/.gitattributes +++ /dev/null @@ -1,5 +0,0 @@ -favorite_riding_hood.mp3 filter=lfs diff=lfs merge=lfs -text -favorites filter=lfs diff=lfs merge=lfs -text -riding_hood filter=lfs diff=lfs merge=lfs -text -tacotron_comparison filter=lfs diff=lfs merge=lfs -text -various filter=lfs diff=lfs merge=lfs -text diff --git a/tortoise_tts/data/__init__.py b/utils/__init__.py similarity index 100% rename from tortoise_tts/data/__init__.py rename to utils/__init__.py diff --git a/tortoise_tts/utils/audio.py b/utils/audio.py similarity index 99% rename from tortoise_tts/utils/audio.py rename to utils/audio.py index e560c45..aad3a0f 100644 --- a/tortoise_tts/utils/audio.py +++ b/utils/audio.py @@ -6,7 +6,7 @@ import torchaudio import numpy as np from scipy.io.wavfile import read -from tortoise_tts.utils.stft import STFT +from utils.stft import STFT def load_wav_to_torch(full_path): diff --git a/tortoise_tts/utils/diffusion.py b/utils/diffusion.py similarity index 100% rename from tortoise_tts/utils/diffusion.py rename to utils/diffusion.py diff --git a/tortoise_tts/utils/stft.py b/utils/stft.py similarity index 100% rename from tortoise_tts/utils/stft.py rename to utils/stft.py diff --git a/tortoise_tts/utils/tokenizer.py b/utils/tokenizer.py similarity index 97% rename from tortoise_tts/utils/tokenizer.py rename to utils/tokenizer.py index f018abd..ed7e4cd 100644 --- a/tortoise_tts/utils/tokenizer.py +++ b/utils/tokenizer.py @@ -3,7 +3,6 @@ import re import inflect import torch from tokenizers import Tokenizer -from pathlib import Path # Regular expression matching whitespace: @@ -166,8 +165,6 @@ def lev_distance(s1, s2): class VoiceBpeTokenizer: def __init__(self, vocab_file='data/tokenizer.json'): - vocab_file = str(Path(__file__).parent.parent / Path(vocab_file)) - print(vocab_file) if vocab_file is not None: self.tokenizer = Tokenizer.from_file(vocab_file) diff --git a/tortoise_tts/utils/typical_sampling.py b/utils/typical_sampling.py similarity index 100% rename from tortoise_tts/utils/typical_sampling.py rename to utils/typical_sampling.py