cast before allclose
This commit is contained in:
parent
95dafc6475
commit
28a9313ddc
|
@ -541,7 +541,7 @@ def test_linear8bitlt_no_fp16_weights(threshold, memory_efficient_backward):
|
||||||
mlp = MLP8bit(
|
mlp = MLP8bit(
|
||||||
32, 64, threshold=threshold, has_fp16_weights=False, memory_efficient_backward=memory_efficient_backward
|
32, 64, threshold=threshold, has_fp16_weights=False, memory_efficient_backward=memory_efficient_backward
|
||||||
)
|
)
|
||||||
w1, w2 = mlp.fc1.weight.clone(), mlp.fc2.weight.clone() # note: we grad original weights before quantization,
|
w1, w2 = mlp.fc1.weight.clone().cuda(), mlp.fc2.weight.clone().cuda() # grab weights before quantization,
|
||||||
mlp = mlp.cuda().half() # and this line triggers quantization
|
mlp = mlp.cuda().half() # and this line triggers quantization
|
||||||
|
|
||||||
for i in range(100):
|
for i in range(100):
|
||||||
|
@ -567,7 +567,7 @@ def test_linear8bitlt_no_fp16_weights(threshold, memory_efficient_backward):
|
||||||
|
|
||||||
mlp.zero_grad()
|
mlp.zero_grad()
|
||||||
(o1 * grad_proj).sum().backward()
|
(o1 * grad_proj).sum().backward()
|
||||||
grad_ref = grad_proj.flatten(2) @ w2.to(grad_proj.device) @ w1.to(grad_proj.device)
|
grad_ref = grad_proj.flatten(2) @ w2.to() @ w1.to(grad_proj.device)
|
||||||
assert torch.allclose(b1.grad, grad_ref)
|
assert torch.allclose(b1.grad, grad_ref)
|
||||||
|
|
||||||
|
|
||||||
|
|
Loading…
Reference in New Issue
Block a user