forked from mrq/bitsandbytes-rocm
cast properly
This commit is contained in:
parent
702cc72018
commit
45dc1983e9
|
@ -294,7 +294,7 @@ class MatMul8bitLt(torch.autograd.Function):
|
||||||
(outliers * state.SCB.view(-1, 1) / 127.0)
|
(outliers * state.SCB.view(-1, 1) / 127.0)
|
||||||
.t()
|
.t()
|
||||||
.contiguous()
|
.contiguous()
|
||||||
.half()
|
.to(B.dtype)
|
||||||
)
|
)
|
||||||
CA[:, state.idx.long()] = 0
|
CA[:, state.idx.long()] = 0
|
||||||
CAt[:, state.idx.long()] = 0
|
CAt[:, state.idx.long()] = 0
|
||||||
|
@ -321,7 +321,6 @@ class MatMul8bitLt(torch.autograd.Function):
|
||||||
|
|
||||||
# 4. Mixed-precision decomposition matmul
|
# 4. Mixed-precision decomposition matmul
|
||||||
if coo_tensorA is not None and subA is not None:
|
if coo_tensorA is not None and subA is not None:
|
||||||
assert subA.dtype == state.subB.dtype, (subA.dtype, state.subB.dtype)
|
|
||||||
output += torch.matmul(subA, state.subB)
|
output += torch.matmul(subA, state.subB)
|
||||||
|
|
||||||
# 5. Save state
|
# 5. Save state
|
||||||
|
|
Loading…
Reference in New Issue
Block a user