bitsandbytes-rocm/bitsandbytes/optim/adagrad.py

# Copyright (c) Facebook, Inc. and its affiliates.
#
# This source code is licensed under the MIT license found in the
# LICENSE file in the root directory of this source tree.
from bitsandbytes.optim.optimizer import Optimizer1State


class Adagrad(Optimizer1State):
    def __init__(
        self,
        params,
        lr=1e-2,
        lr_decay=0,
        weight_decay=0,
        initial_accumulator_value=0,
        eps=1e-10,
        optim_bits=32,
        args=None,
        min_8bit_size=4096,
        percentile_clipping=100,
        block_wise=True,
    ):
        if not 0.0 <= lr:
            raise ValueError("Invalid learning rate: {}".format(lr))
        if not 0.0 <= weight_decay:
            raise ValueError(
                "Invalid weight_decay value: {}".format(weight_decay)
            )
        if not 0.0 <= eps:
            raise ValueError("Invalid epsilon value: {}".format(eps))
        if initial_accumulator_value != 0.0:
            raise ValueError("Initial accumulator value != 0.0 not supported!")
        if lr_decay != 0.0:
            raise ValueError("Lr Decay != 0.0 not supported!")
        super(Adagrad, self).__init__(
            "adagrad",
            params,
            lr,
            (0.0, 0.0),
            eps,
            weight_decay,
            optim_bits,
            args,
            min_8bit_size,
            percentile_clipping,
            block_wise,
        )


class Adagrad8bit(Optimizer1State):
    def __init__(
        self,
        params,
        lr=1e-2,
        lr_decay=0,
        weight_decay=0,
        initial_accumulator_value=0,
        eps=1e-10,
        optim_bits=8,
        args=None,
        min_8bit_size=4096,
        percentile_clipping=100,
        block_wise=True,
    ):
        if not 0.0 <= lr:
            raise ValueError("Invalid learning rate: {}".format(lr))
        if not 0.0 <= weight_decay:
            raise ValueError(
                "Invalid weight_decay value: {}".format(weight_decay)
            )
        if not 0.0 <= eps:
            raise ValueError("Invalid epsilon value: {}".format(eps))
        if initial_accumulator_value != 0.0:
            raise ValueError("Initial accumulator value != 0.0 not supported!")
        if lr_decay != 0.0:
            raise ValueError("Lr Decay != 0.0 not supported!")
        assert block_wise
        super(Adagrad8bit, self).__init__(
            "adagrad",
            params,
            lr,
            (0.0, 0.0),
            eps,
            weight_decay,
            8,
            args,
            min_8bit_size,
            percentile_clipping,
            block_wise,
        )


class Adagrad32bit(Optimizer1State):
    def __init__(
        self,
        params,
        lr=1e-2,
        lr_decay=0,
        weight_decay=0,
        initial_accumulator_value=0,
        eps=1e-10,
        optim_bits=32,
        args=None,
        min_8bit_size=4096,
        percentile_clipping=100,
        block_wise=True,
    ):
        if not 0.0 <= lr:
            raise ValueError("Invalid learning rate: {}".format(lr))
        if not 0.0 <= weight_decay:
            raise ValueError(
                "Invalid weight_decay value: {}".format(weight_decay)
            )
        if not 0.0 <= eps:
            raise ValueError("Invalid epsilon value: {}".format(eps))
        if initial_accumulator_value != 0.0:
            raise ValueError("Initial accumulator value != 0.0 not supported!")
        if lr_decay != 0.0:
            raise ValueError("Lr Decay != 0.0 not supported!")
        super(Adagrad32bit, self).__init__(
            "adagrad",
            params,
            lr,
            (0.0, 0.0),
            eps,
            weight_decay,
            32,
            args,
            min_8bit_size,
            percentile_clipping,
            block_wise,
        )
ran black and isort for coherent code formatting 2022-08-01 10:31:48 +00:00			`# Copyright (c) Facebook, Inc. and its affiliates.`
			`#`
			`# This source code is licensed under the MIT license found in the`
Added adagrad with tests (no clipping). 2021-11-10 23:10:02 +00:00			`# LICENSE file in the root directory of this source tree.`
			`from bitsandbytes.optim.optimizer import Optimizer1State`

ran black and isort for coherent code formatting 2022-08-01 10:31:48 +00:00
Added adagrad with tests (no clipping). 2021-11-10 23:10:02 +00:00			`class Adagrad(Optimizer1State):`
ran black and isort for coherent code formatting 2022-08-01 10:31:48 +00:00			`def __init__(`
			`self,`
			`params,`
			`lr=1e-2,`
			`lr_decay=0,`
			`weight_decay=0,`
			`initial_accumulator_value=0,`
			`eps=1e-10,`
			`optim_bits=32,`
			`args=None,`
			`min_8bit_size=4096,`
			`percentile_clipping=100,`
			`block_wise=True,`
			`):`
Added adagrad with tests (no clipping). 2021-11-10 23:10:02 +00:00			`if not 0.0 <= lr:`
			`raise ValueError("Invalid learning rate: {}".format(lr))`
			`if not 0.0 <= weight_decay:`
reran black with linelength 80 for greater readability 2022-08-01 16:32:47 +00:00			`raise ValueError(`
			`"Invalid weight_decay value: {}".format(weight_decay)`
			`)`
Added adagrad with tests (no clipping). 2021-11-10 23:10:02 +00:00			`if not 0.0 <= eps:`
			`raise ValueError("Invalid epsilon value: {}".format(eps))`
			`if initial_accumulator_value != 0.0:`
ran black and isort for coherent code formatting 2022-08-01 10:31:48 +00:00			`raise ValueError("Initial accumulator value != 0.0 not supported!")`
Added adagrad with tests (no clipping). 2021-11-10 23:10:02 +00:00			`if lr_decay != 0.0:`
ran black and isort for coherent code formatting 2022-08-01 10:31:48 +00:00			`raise ValueError("Lr Decay != 0.0 not supported!")`
			`super(Adagrad, self).__init__(`
			`"adagrad",`
			`params,`
			`lr,`
			`(0.0, 0.0),`
			`eps,`
			`weight_decay,`
			`optim_bits,`
			`args,`
			`min_8bit_size,`
			`percentile_clipping,`
			`block_wise,`
			`)`

Added adagrad with tests (no clipping). 2021-11-10 23:10:02 +00:00
			`class Adagrad8bit(Optimizer1State):`
ran black and isort for coherent code formatting 2022-08-01 10:31:48 +00:00			`def __init__(`
			`self,`
			`params,`
			`lr=1e-2,`
			`lr_decay=0,`
			`weight_decay=0,`
			`initial_accumulator_value=0,`
			`eps=1e-10,`
			`optim_bits=8,`
			`args=None,`
			`min_8bit_size=4096,`
			`percentile_clipping=100,`
			`block_wise=True,`
			`):`
Added adagrad with tests (no clipping). 2021-11-10 23:10:02 +00:00			`if not 0.0 <= lr:`
			`raise ValueError("Invalid learning rate: {}".format(lr))`
			`if not 0.0 <= weight_decay:`
reran black with linelength 80 for greater readability 2022-08-01 16:32:47 +00:00			`raise ValueError(`
			`"Invalid weight_decay value: {}".format(weight_decay)`
			`)`
Added adagrad with tests (no clipping). 2021-11-10 23:10:02 +00:00			`if not 0.0 <= eps:`
			`raise ValueError("Invalid epsilon value: {}".format(eps))`
			`if initial_accumulator_value != 0.0:`
ran black and isort for coherent code formatting 2022-08-01 10:31:48 +00:00			`raise ValueError("Initial accumulator value != 0.0 not supported!")`
Added adagrad with tests (no clipping). 2021-11-10 23:10:02 +00:00			`if lr_decay != 0.0:`
ran black and isort for coherent code formatting 2022-08-01 10:31:48 +00:00			`raise ValueError("Lr Decay != 0.0 not supported!")`
Added adagrad with tests (no clipping). 2021-11-10 23:10:02 +00:00			`assert block_wise`
ran black and isort for coherent code formatting 2022-08-01 10:31:48 +00:00			`super(Adagrad8bit, self).__init__(`
			`"adagrad",`
			`params,`
			`lr,`
			`(0.0, 0.0),`
			`eps,`
			`weight_decay,`
			`8,`
			`args,`
			`min_8bit_size,`
			`percentile_clipping,`
			`block_wise,`
			`)`

Added adagrad with tests (no clipping). 2021-11-10 23:10:02 +00:00
			`class Adagrad32bit(Optimizer1State):`
ran black and isort for coherent code formatting 2022-08-01 10:31:48 +00:00			`def __init__(`
			`self,`
			`params,`
			`lr=1e-2,`
			`lr_decay=0,`
			`weight_decay=0,`
			`initial_accumulator_value=0,`
			`eps=1e-10,`
			`optim_bits=32,`
			`args=None,`
			`min_8bit_size=4096,`
			`percentile_clipping=100,`
			`block_wise=True,`
			`):`
Added adagrad with tests (no clipping). 2021-11-10 23:10:02 +00:00			`if not 0.0 <= lr:`
			`raise ValueError("Invalid learning rate: {}".format(lr))`
			`if not 0.0 <= weight_decay:`
reran black with linelength 80 for greater readability 2022-08-01 16:32:47 +00:00			`raise ValueError(`
			`"Invalid weight_decay value: {}".format(weight_decay)`
			`)`
Added adagrad with tests (no clipping). 2021-11-10 23:10:02 +00:00			`if not 0.0 <= eps:`
			`raise ValueError("Invalid epsilon value: {}".format(eps))`
			`if initial_accumulator_value != 0.0:`
ran black and isort for coherent code formatting 2022-08-01 10:31:48 +00:00			`raise ValueError("Initial accumulator value != 0.0 not supported!")`
Added adagrad with tests (no clipping). 2021-11-10 23:10:02 +00:00			`if lr_decay != 0.0:`
ran black and isort for coherent code formatting 2022-08-01 10:31:48 +00:00			`raise ValueError("Lr Decay != 0.0 not supported!")`
			`super(Adagrad32bit, self).__init__(`
			`"adagrad",`
			`params,`
			`lr,`
			`(0.0, 0.0),`
			`eps,`
			`weight_decay,`
			`32,`
			`args,`
			`min_8bit_size,`
			`percentile_clipping,`
			`block_wise,`
			`)`