diff --git a/README.md b/README.md index 7353a7b..450754e 100644 --- a/README.md +++ b/README.md @@ -121,7 +121,7 @@ For the those in the ML space: this is created by projecting a random vector ont This repo comes with several pre-packaged voices. Voices prepended with "train_" came from the training set and perform far better than the others. If your goal is high quality speech, I recommend you pick one of them. If you want to see -what Tortoise can do for zero-shot mimicing, take a look at the others. +what Tortoise can do for zero-shot mimicking, take a look at the others. ### Adding a new voice diff --git a/scripts/tortoise_tts.py b/scripts/tortoise_tts.py index f3849d0..932a780 100755 --- a/scripts/tortoise_tts.py +++ b/scripts/tortoise_tts.py @@ -110,7 +110,7 @@ tuning_group.add_argument( tuning_group.add_argument( '--cvvp-amount', type=float, default=None, help='How much the CVVP model should influence the output.' - 'Increasing this can in some cases reduce the likelyhood of multiple speakers.') + 'Increasing this can in some cases reduce the likelihood of multiple speakers.') tuning_group.add_argument( '--diffusion-iterations', type=int, default=None, help='Number of diffusion steps to perform. More steps means the network has more chances to iteratively' diff --git a/tortoise/do_tts.py b/tortoise/do_tts.py index 47f78ec..522afa0 100644 --- a/tortoise/do_tts.py +++ b/tortoise/do_tts.py @@ -20,7 +20,7 @@ if __name__ == '__main__': parser.add_argument('--seed', type=int, help='Random seed which can be used to reproduce results.', default=None) parser.add_argument('--produce_debug_state', type=bool, help='Whether or not to produce debug_state.pth, which can aid in reproducing problems. Defaults to true.', default=True) parser.add_argument('--cvvp_amount', type=float, help='How much the CVVP model should influence the output.' - 'Increasing this can in some cases reduce the likelyhood of multiple speakers. Defaults to 0 (disabled)', default=.0) + 'Increasing this can in some cases reduce the likelihood of multiple speakers. Defaults to 0 (disabled)', default=.0) args = parser.parse_args() os.makedirs(args.output_path, exist_ok=True) diff --git a/tortoise/models/arch_util.py b/tortoise/models/arch_util.py index ffce5cf..661ee1f 100644 --- a/tortoise/models/arch_util.py +++ b/tortoise/models/arch_util.py @@ -43,7 +43,7 @@ def normalization(channels): class QKVAttentionLegacy(nn.Module): """ - A module which performs QKV attention. Matches legacy QKVAttention + input/ouput heads shaping + A module which performs QKV attention. Matches legacy QKVAttention + input/output heads shaping """ def __init__(self, n_heads): diff --git a/tortoise/models/transformer.py b/tortoise/models/transformer.py index aa59b46..707e9eb 100644 --- a/tortoise/models/transformer.py +++ b/tortoise/models/transformer.py @@ -216,4 +216,4 @@ class Transformer(nn.Module): self.layers = execute_type(layers, args_route = attn_route_map) def forward(self, x, **kwargs): - return self.layers(x, **kwargs) \ No newline at end of file + return self.layers(x, **kwargs)