some kind of warning or something when this is first executed to make people aware that a cast happens and the operation quantization is performed in fp16.

This commit is contained in:
justheuristic 2022-09-17 20:46:04 +03:00
parent 3634fc738b
commit cc4858c2fd

View File

@ -1,4 +1,6 @@
import operator
import warnings
import torch
import bitsandbytes.functional as F
@ -229,6 +231,8 @@ class MatMul8bitLt(torch.autograd.Function):
# Cast A to fp16
A_dtype = A.dtype
if A_dtype != torch.float16:
warnings.warn(f"MatMul8bitLt: temporarily casting input matrix from {A_dtype} to float16")
A = A.to(torch.float16)
# 1. Quantize A