Automatically pick batch size based on available GPU memory

2022-05-13 10:30:02 -06:00 · 2022-05-13 10:30:02 -06:00 · 50690e4465
commit 50690e4465
parent cb7adf16af
2 changed files with 20 additions and 2 deletions
--- a/tortoise/api.py
+++ b/tortoise/api.py
@ -160,12 +160,28 @@ def classify_audio_clip(clip):
    return results[0][0]


+def pick_best_batch_size_for_gpu():
+    """
+    Tries to pick a batch size that will fit in your GPU. These sizes aren't guaranteed to work, but they should give
+    you a good shot.
+    """
+    free, available = torch.cuda.mem_get_info()
+    availableGb = available / (1024 ** 3)
+    if availableGb > 14:
+        return 16
+    elif availableGb > 10:
+        return 8
+    elif availableGb > 7:
+        return 4
+    return 1
+
+
 class TextToSpeech:
    """
    Main entry point into Tortoise.
    """

-    def __init__(self, autoregressive_batch_size=16, models_dir='.models', enable_redaction=True):
+    def __init__(self, autoregressive_batch_size=None, models_dir='.models', enable_redaction=True):
        """
        Constructor
        :param autoregressive_batch_size: Specifies how many samples to generate per batch. Lower this if you are seeing
@ -176,7 +192,7 @@ class TextToSpeech:
                                 (but are still rendered by the model). This can be used for prompt engineering.
                                 Default is true.
        """
-        self.autoregressive_batch_size = autoregressive_batch_size
+        self.autoregressive_batch_size = pick_best_batch_size_for_gpu() if autoregressive_batch_size is None else autoregressive_batch_size
        self.enable_redaction = enable_redaction
        if self.enable_redaction:
            self.aligner = Wav2VecAlignment()
--- a/tortoise/utils/tokenizer.py
+++ b/tortoise/utils/tokenizer.py
@ -148,6 +148,7 @@ def english_cleaners(text):
  text = text.replace('"', '')
  return text

+
 def lev_distance(s1, s2):
  if len(s1) > len(s2):
    s1, s2 = s2, s1
@ -163,6 +164,7 @@ def lev_distance(s1, s2):
    distances = distances_
  return distances[-1]

+
 class VoiceBpeTokenizer:
    def __init__(self, vocab_file='tortoise/data/tokenizer.json'):
        if vocab_file is not None: