Merge branch 'main' of github.com:facebookresearch/bitsandbytes into 0.26.0
This commit is contained in:
commit
3cff6795fb
|
@ -15,8 +15,8 @@ from bitsandbytes.optim import GlobalOptimManager
|
||||||
class StableEmbedding(torch.nn.Embedding):
|
class StableEmbedding(torch.nn.Embedding):
|
||||||
def __init__(self, num_embeddings: int, embedding_dim: int, padding_idx: Optional[int] = None,
|
def __init__(self, num_embeddings: int, embedding_dim: int, padding_idx: Optional[int] = None,
|
||||||
max_norm: Optional[float] = None, norm_type: float = 2., scale_grad_by_freq: bool = False,
|
max_norm: Optional[float] = None, norm_type: float = 2., scale_grad_by_freq: bool = False,
|
||||||
sparse: bool = True, _weight: Optional[Tensor] = None) -> None:
|
sparse: bool = False, _weight: Optional[Tensor] = None) -> None:
|
||||||
super(StableEmbedding, self).__init__(num_embeddings, embedding_dim, padding_idx, max_norm, norm_type, scale_grad_by_freq, False, _weight)
|
super(StableEmbedding, self).__init__(num_embeddings, embedding_dim, padding_idx, max_norm, norm_type, scale_grad_by_freq, sparse, _weight)
|
||||||
self.norm = torch.nn.LayerNorm(embedding_dim)
|
self.norm = torch.nn.LayerNorm(embedding_dim)
|
||||||
GlobalOptimManager.get_instance().register_parameters(self.weight)
|
GlobalOptimManager.get_instance().register_parameters(self.weight)
|
||||||
GlobalOptimManager.get_instance().override_config(self.weight, 'optim_bits', 32)
|
GlobalOptimManager.get_instance().override_config(self.weight, 'optim_bits', 32)
|
||||||
|
|
|
@ -2,7 +2,12 @@
|
||||||
#
|
#
|
||||||
# This source code is licensed under the MIT license found in the
|
# This source code is licensed under the MIT license found in the
|
||||||
# LICENSE file in the root directory of this source tree.
|
# LICENSE file in the root directory of this source tree.
|
||||||
|
|
||||||
|
import math
|
||||||
|
import os
|
||||||
|
|
||||||
import torch
|
import torch
|
||||||
|
import torch.distributed as dist
|
||||||
from bitsandbytes.optim.optimizer import Optimizer2State
|
from bitsandbytes.optim.optimizer import Optimizer2State
|
||||||
import bitsandbytes.functional as F
|
import bitsandbytes.functional as F
|
||||||
|
|
||||||
|
@ -219,9 +224,9 @@ class AnalysisAdam(torch.optim.Optimizer):
|
||||||
if self.savedir != '' and state['step'] % 100 == 0:
|
if self.savedir != '' and state['step'] % 100 == 0:
|
||||||
if not os.path.exists(self.savedir): os.makedirs(self.savedir)
|
if not os.path.exists(self.savedir): os.makedirs(self.savedir)
|
||||||
shapestr = '_'.join([str(dim) for dim in p_data_fp32.shape])
|
shapestr = '_'.join([str(dim) for dim in p_data_fp32.shape])
|
||||||
pathe = join(self.savedir, f'{p_id}_{shapestr}_abserr.pkl')
|
pathe = os.path.join(self.savedir, f'{p_id}_{shapestr}_abserr.pkl')
|
||||||
pathrele = join(self.savedir, f'{p_id}_{shapestr}_relerr.pkl')
|
pathrele = os.path.join(self.savedir, f'{p_id}_{shapestr}_relerr.pkl')
|
||||||
pathcounts = join(self.savedir, f'{p_id}_{shapestr}_counts.pkl')
|
pathcounts = os.path.join(self.savedir, f'{p_id}_{shapestr}_counts.pkl')
|
||||||
torch.save(e, pathe)
|
torch.save(e, pathe)
|
||||||
torch.save(rele, pathrele)
|
torch.save(rele, pathrele)
|
||||||
torch.save(counts, pathcounts)
|
torch.save(counts, pathcounts)
|
||||||
|
|
Loading…
Reference in New Issue
Block a user