Merge branch 'main' of github.com:facebookresearch/bitsandbytes into 0.26.0

This commit is contained in:
Tim Dettmers 2021-11-29 08:24:17 -08:00
commit 3cff6795fb
2 changed files with 10 additions and 5 deletions

View File

@ -15,8 +15,8 @@ from bitsandbytes.optim import GlobalOptimManager
class StableEmbedding(torch.nn.Embedding): class StableEmbedding(torch.nn.Embedding):
def __init__(self, num_embeddings: int, embedding_dim: int, padding_idx: Optional[int] = None, def __init__(self, num_embeddings: int, embedding_dim: int, padding_idx: Optional[int] = None,
max_norm: Optional[float] = None, norm_type: float = 2., scale_grad_by_freq: bool = False, max_norm: Optional[float] = None, norm_type: float = 2., scale_grad_by_freq: bool = False,
sparse: bool = True, _weight: Optional[Tensor] = None) -> None: sparse: bool = False, _weight: Optional[Tensor] = None) -> None:
super(StableEmbedding, self).__init__(num_embeddings, embedding_dim, padding_idx, max_norm, norm_type, scale_grad_by_freq, False, _weight) super(StableEmbedding, self).__init__(num_embeddings, embedding_dim, padding_idx, max_norm, norm_type, scale_grad_by_freq, sparse, _weight)
self.norm = torch.nn.LayerNorm(embedding_dim) self.norm = torch.nn.LayerNorm(embedding_dim)
GlobalOptimManager.get_instance().register_parameters(self.weight) GlobalOptimManager.get_instance().register_parameters(self.weight)
GlobalOptimManager.get_instance().override_config(self.weight, 'optim_bits', 32) GlobalOptimManager.get_instance().override_config(self.weight, 'optim_bits', 32)

View File

@ -2,7 +2,12 @@
# #
# This source code is licensed under the MIT license found in the # This source code is licensed under the MIT license found in the
# LICENSE file in the root directory of this source tree. # LICENSE file in the root directory of this source tree.
import math
import os
import torch import torch
import torch.distributed as dist
from bitsandbytes.optim.optimizer import Optimizer2State from bitsandbytes.optim.optimizer import Optimizer2State
import bitsandbytes.functional as F import bitsandbytes.functional as F
@ -219,9 +224,9 @@ class AnalysisAdam(torch.optim.Optimizer):
if self.savedir != '' and state['step'] % 100 == 0: if self.savedir != '' and state['step'] % 100 == 0:
if not os.path.exists(self.savedir): os.makedirs(self.savedir) if not os.path.exists(self.savedir): os.makedirs(self.savedir)
shapestr = '_'.join([str(dim) for dim in p_data_fp32.shape]) shapestr = '_'.join([str(dim) for dim in p_data_fp32.shape])
pathe = join(self.savedir, f'{p_id}_{shapestr}_abserr.pkl') pathe = os.path.join(self.savedir, f'{p_id}_{shapestr}_abserr.pkl')
pathrele = join(self.savedir, f'{p_id}_{shapestr}_relerr.pkl') pathrele = os.path.join(self.savedir, f'{p_id}_{shapestr}_relerr.pkl')
pathcounts = join(self.savedir, f'{p_id}_{shapestr}_counts.pkl') pathcounts = os.path.join(self.savedir, f'{p_id}_{shapestr}_counts.pkl')
torch.save(e, pathe) torch.save(e, pathe)
torch.save(rele, pathrele) torch.save(rele, pathrele)
torch.save(counts, pathcounts) torch.save(counts, pathcounts)