From 50690e44657c34445a127b44868c34fde3623965 Mon Sep 17 00:00:00 2001
From: James Betker <jbetker@gmail.com>
Date: Fri, 13 May 2022 10:30:02 -0600
Subject: [PATCH] Automatically pick batch size based on available GPU memory

---
 tortoise/api.py             | 20 ++++++++++++++++++--
 tortoise/utils/tokenizer.py |  2 ++
 2 files changed, 20 insertions(+), 2 deletions(-)

diff --git a/tortoise/api.py b/tortoise/api.py
index ca8d825..fa915b4 100644
--- a/tortoise/api.py
+++ b/tortoise/api.py
@@ -160,12 +160,28 @@ def classify_audio_clip(clip):
     return results[0][0]
 
 
+def pick_best_batch_size_for_gpu():
+    """
+    Tries to pick a batch size that will fit in your GPU. These sizes aren't guaranteed to work, but they should give
+    you a good shot.
+    """
+    free, available = torch.cuda.mem_get_info()
+    availableGb = available / (1024 ** 3)
+    if availableGb > 14:
+        return 16
+    elif availableGb > 10:
+        return 8
+    elif availableGb > 7:
+        return 4
+    return 1
+
+
 class TextToSpeech:
     """
     Main entry point into Tortoise.
     """
 
-    def __init__(self, autoregressive_batch_size=16, models_dir='.models', enable_redaction=True):
+    def __init__(self, autoregressive_batch_size=None, models_dir='.models', enable_redaction=True):
         """
         Constructor
         :param autoregressive_batch_size: Specifies how many samples to generate per batch. Lower this if you are seeing
@@ -176,7 +192,7 @@ class TextToSpeech:
                                  (but are still rendered by the model). This can be used for prompt engineering.
                                  Default is true.
         """
-        self.autoregressive_batch_size = autoregressive_batch_size
+        self.autoregressive_batch_size = pick_best_batch_size_for_gpu() if autoregressive_batch_size is None else autoregressive_batch_size
         self.enable_redaction = enable_redaction
         if self.enable_redaction:
             self.aligner = Wav2VecAlignment()
diff --git a/tortoise/utils/tokenizer.py b/tortoise/utils/tokenizer.py
index 2f36a06..a8959d8 100644
--- a/tortoise/utils/tokenizer.py
+++ b/tortoise/utils/tokenizer.py
@@ -148,6 +148,7 @@ def english_cleaners(text):
   text = text.replace('"', '')
   return text
 
+
 def lev_distance(s1, s2):
   if len(s1) > len(s2):
     s1, s2 = s2, s1
@@ -163,6 +164,7 @@ def lev_distance(s1, s2):
     distances = distances_
   return distances[-1]
 
+
 class VoiceBpeTokenizer:
     def __init__(self, vocab_file='tortoise/data/tokenizer.json'):
         if vocab_file is not None: