forked from mrq/bitsandbytes-rocm
Added analysis Adam.
This commit is contained in:
parent
eaf35ab949
commit
1ec0d54529
25
BUCK
25
BUCK
|
@ -1,25 +0,0 @@
|
||||||
prebuilt_python_library(
|
|
||||||
name = 'bnb-cuda102',
|
|
||||||
binary_src = ':bnb-cuda102-wheel',
|
|
||||||
)
|
|
||||||
|
|
||||||
|
|
||||||
remote_file(
|
|
||||||
name = 'bnb-cuda102-wheel',
|
|
||||||
url = 'https://test-files.pythonhosted.org/packages/4e/69/025b08bf1b7e777ca3800dc79ebe9dfd7309931f0a5f3de132d1433076ff/bitsandbytes_cuda102-0.0.22-py3-none-any.whl',
|
|
||||||
sha1 = '8c89e640afab18cdc6b7c5924c70e25036811686',
|
|
||||||
)
|
|
||||||
|
|
||||||
|
|
||||||
prebuilt_python_library(
|
|
||||||
name = 'bnb-cuda111',
|
|
||||||
binary_src = ':bnb-cuda111-wheel',
|
|
||||||
)
|
|
||||||
|
|
||||||
|
|
||||||
remote_file(
|
|
||||||
name = 'bnb-cuda111-wheel',
|
|
||||||
url = 'https://test-files.pythonhosted.org/packages/f9/38/2179701c80ae2aa9606bce7d498f397bd94e7bb2ff7e7c30ed032a3a39c2/bitsandbytes_cuda111-0.0.22-py3-none-any.whl',
|
|
||||||
sha1 = '433f534b225bc29391782c8a9d82635bc0eb9d33',
|
|
||||||
)
|
|
||||||
|
|
|
@ -3,6 +3,7 @@
|
||||||
# This source code is licensed under the MIT license found in the
|
# This source code is licensed under the MIT license found in the
|
||||||
# LICENSE file in the root directory of this source tree.
|
# LICENSE file in the root directory of this source tree.
|
||||||
from bitsandbytes.optim.optimizer import Optimizer2State
|
from bitsandbytes.optim.optimizer import Optimizer2State
|
||||||
|
import bitsandbytes.functional as F
|
||||||
|
|
||||||
class Adam(Optimizer2State):
|
class Adam(Optimizer2State):
|
||||||
def __init__(self, params, lr=1e-3, betas=(0.9, 0.999), eps=1e-8,
|
def __init__(self, params, lr=1e-3, betas=(0.9, 0.999), eps=1e-8,
|
||||||
|
@ -28,7 +29,7 @@ class Adam32bit(Optimizer2State):
|
||||||
|
|
||||||
|
|
||||||
class AnalysisAdam(torch.optim.Optimizer):
|
class AnalysisAdam(torch.optim.Optimizer):
|
||||||
"""Implements 8-bit Adam and performs error analysis.
|
"""Adam that performs 8-bit vs 32-bit error analysis.
|
||||||
|
|
||||||
This implementation is modified from torch.optim.Adam based on:
|
This implementation is modified from torch.optim.Adam based on:
|
||||||
`Fixed Weight Decay Regularization in Adam`
|
`Fixed Weight Decay Regularization in Adam`
|
||||||
|
@ -190,6 +191,11 @@ class AnalysisAdam(torch.optim.Optimizer):
|
||||||
state1 = F.dequantize_no_absmax(C1, code1)
|
state1 = F.dequantize_no_absmax(C1, code1)
|
||||||
C2 = F.quantize_no_absmax(exp_avg_sq, code=code2)
|
C2 = F.quantize_no_absmax(exp_avg_sq, code=code2)
|
||||||
state2 = F.dequantize_no_absmax(C2, code2)
|
state2 = F.dequantize_no_absmax(C2, code2)
|
||||||
|
elif self.analysis == 'my-quantization-routine':
|
||||||
|
# 1. get code
|
||||||
|
# 2. quantize
|
||||||
|
# 3. dequantize
|
||||||
|
# Error will be calculated automatically!
|
||||||
else:
|
else:
|
||||||
raise ValueError(f'Invalid analysis value: {self.analysis}!')
|
raise ValueError(f'Invalid analysis value: {self.analysis}!')
|
||||||
|
|
||||||
|
|
Loading…
Reference in New Issue
Block a user