some kind of warning or something when this is first executed to make people aware that a cast happens and the operation quantization is performed in fp16.

2022-09-17 20:46:04 +03:00 · 2022-09-17 20:46:04 +03:00 · cc4858c2fd
commit cc4858c2fd
parent 3634fc738b
1 changed files with 4 additions and 0 deletions
--- a/bitsandbytes/autograd/_functions.py
+++ b/bitsandbytes/autograd/_functions.py
@ -1,4 +1,6 @@
 import operator
+import warnings
+
 import torch
 import bitsandbytes.functional as F

@ -229,6 +231,8 @@ class MatMul8bitLt(torch.autograd.Function):

        # Cast A to fp16
        A_dtype = A.dtype
+        if A_dtype != torch.float16:
+            warnings.warn(f"MatMul8bitLt: temporarily casting input matrix from {A_dtype} to float16")
        A = A.to(torch.float16)

        # 1. Quantize A