diff --git a/README.md b/README.md
index 7353a7b..450754e 100644
--- a/README.md
+++ b/README.md
@@ -121,7 +121,7 @@ For the those in the ML space: this is created by projecting a random vector ont
 
 This repo comes with several pre-packaged voices. Voices prepended with "train_" came from the training set and perform
 far better than the others. If your goal is high quality speech, I recommend you pick one of them. If you want to see
-what Tortoise can do for zero-shot mimicing, take a look at the others.
+what Tortoise can do for zero-shot mimicking, take a look at the others.
 
 ### Adding a new voice
 
diff --git a/scripts/tortoise_tts.py b/scripts/tortoise_tts.py
index f3849d0..932a780 100755
--- a/scripts/tortoise_tts.py
+++ b/scripts/tortoise_tts.py
@@ -110,7 +110,7 @@ tuning_group.add_argument(
 tuning_group.add_argument(
     '--cvvp-amount', type=float, default=None,
     help='How much the CVVP model should influence the output.'
-    'Increasing this can in some cases reduce the likelyhood of multiple speakers.')
+    'Increasing this can in some cases reduce the likelihood of multiple speakers.')
 tuning_group.add_argument(
     '--diffusion-iterations', type=int, default=None,
     help='Number of diffusion steps to perform.  More steps means the network has more chances to iteratively'
diff --git a/tortoise/do_tts.py b/tortoise/do_tts.py
index 47f78ec..522afa0 100644
--- a/tortoise/do_tts.py
+++ b/tortoise/do_tts.py
@@ -20,7 +20,7 @@ if __name__ == '__main__':
     parser.add_argument('--seed', type=int, help='Random seed which can be used to reproduce results.', default=None)
     parser.add_argument('--produce_debug_state', type=bool, help='Whether or not to produce debug_state.pth, which can aid in reproducing problems. Defaults to true.', default=True)
     parser.add_argument('--cvvp_amount', type=float, help='How much the CVVP model should influence the output.'
-                                                          'Increasing this can in some cases reduce the likelyhood of multiple speakers. Defaults to 0 (disabled)', default=.0)
+                                                          'Increasing this can in some cases reduce the likelihood of multiple speakers. Defaults to 0 (disabled)', default=.0)
     args = parser.parse_args()
     os.makedirs(args.output_path, exist_ok=True)
 
diff --git a/tortoise/models/arch_util.py b/tortoise/models/arch_util.py
index ffce5cf..661ee1f 100644
--- a/tortoise/models/arch_util.py
+++ b/tortoise/models/arch_util.py
@@ -43,7 +43,7 @@ def normalization(channels):
 
 class QKVAttentionLegacy(nn.Module):
     """
-    A module which performs QKV attention. Matches legacy QKVAttention + input/ouput heads shaping
+    A module which performs QKV attention. Matches legacy QKVAttention + input/output heads shaping
     """
 
     def __init__(self, n_heads):
diff --git a/tortoise/models/transformer.py b/tortoise/models/transformer.py
index aa59b46..707e9eb 100644
--- a/tortoise/models/transformer.py
+++ b/tortoise/models/transformer.py
@@ -216,4 +216,4 @@ class Transformer(nn.Module):
         self.layers = execute_type(layers, args_route = attn_route_map)
 
     def forward(self, x, **kwargs):
-        return self.layers(x, **kwargs)
\ No newline at end of file
+        return self.layers(x, **kwargs)