forked from mrq/bitsandbytes-rocm
cast properly
This commit is contained in:
parent
577275bd8c
commit
e35e2c665a
|
@ -231,7 +231,7 @@ class MatMul8bitLt(torch.autograd.Function):
|
|||
|
||||
# Cast A to fp16
|
||||
if A.dtype != torch.float16:
|
||||
warnings.warn(f"MatMul8bitLt: input matrix will be cast from {A.dtype} to float16")
|
||||
warnings.warn(f"MatMul8bitLt: inputs will be cast from {A.dtype} to float16 during quantization")
|
||||
|
||||
# 1. Quantize A
|
||||
if len(A.shape) == 3:
|
||||
|
|
|
@ -372,8 +372,10 @@ def test_matmullt(
|
|||
n = out_bnb.numel()
|
||||
err = torch.abs(out_bnb - out_torch).mean().item()
|
||||
# print(f'abs error {err:.4f}')
|
||||
out_error_rate = 0.0175 if dtype == torch.float16 else 0.02
|
||||
|
||||
idx = torch.isclose(out_bnb, out_torch, atol=0.01, rtol=0.1)
|
||||
assert (idx == 0).sum().item() <= n * 0.0175
|
||||
assert (idx == 0).sum().item() <= n * out_error_rate
|
||||
idx = torch.isclose(out_bnb, out_torch, atol=0.035, rtol=0.2)
|
||||
assert (idx == 0).sum().item() <= n * 0.001
|
||||
|
||||
|
|
Loading…
Reference in New Issue
Block a user