PyTorch torchtune.modules.peft.lora

torchtune
https://docs.pytorch.org/torchtune/main/_modules/index.html
https://docs.pytorch.org/torchtune/0.6/_modules/index.html

1. Source code for torchtune.modules.peft.lora

https://docs.pytorch.org/torchtune/main/_modules/torchtune/modules/peft/lora.html
https://docs.pytorch.org/torchtune/0.6/_modules/torchtune/modules/peft/lora.html

# Copyright (c) Meta Platforms, Inc. and affiliates.
# All rights reserved.
#
# This source code is licensed under the BSD-style license found in the
# LICENSE file in the root directory of this source tree.
import math
from enum import Enum
from typing import Optional, Union

import torch
import torch.nn.functional as F

from torch import nn

from torchao.dtypes.nf4tensor import linear_nf4, to_nf4
from torchtune.modules.low_precision import _register_nf4_dispatch_ops  # noqa: F401
from torchtune.modules.peft import AdapterModule


class TrainableParams(Enum):
    FULL = "full"
    LORA = "lora"
    FROZEN = "frozen"


[docs]class LoRALinear(nn.Module, AdapterModule):
    """LoRA linear layer as introduced in `LoRA: Low-Rank Adaptation of Large Language Models <https://arxiv.org/abs/2106.09685>`_.

    LoRA perturbs a given layer via a low-rank approximation where only
    the rank decomposition matrices are trainable. In a linear layer instead of
    :math:`x \\mapsto W_0x` a LoRALinear layer is defined as
    :math:`x \\mapsto W_0x + (\\alpha / r)BAx`, where :math:`r` is the rank of
    the matrices :math:`A` and :math:`B` and :math:`\\alpha` is a scaling factor.
    As in the original implementation, we support dropout before multiplication
    by the low-rank matrices.

    Args:
        in_dim (int): input dimension
        out_dim (int): output dimension
        rank (int): rank of the low-rank approximation
        alpha (float): scaling factor for the low-rank approximation
        dropout (float): dropout probability. Default: 0.0
        use_bias (bool): whether to include bias in the original linear layer.
            Default: False
        quantize_base (bool): Whether to quantize base linear weight or not.
            Default: False
        **quantization_kwargs: Keyword arguments to pass to `to_nf4` when quantizing the base linear weight.
            Examples of valid arguments are `block_size` and `scaler_block_size`, which control the granularity of
            weight quantization and scaler quantization respectively. This is only used if `quantize_base` is True.
            Default None

    Raises:
        ValueError: If ``quantize_base`` is False, but quantization kwargs are provided.
    """

    def __init__(
        self,
        in_dim: int,
        out_dim: int,
        rank: int,
        alpha: float,
        dropout: float = 0.0,
        use_bias: bool = False,
        quantize_base: bool = False,
        **quantization_kwargs,
    ):
        super().__init__()
        self.in_dim = in_dim
        self.out_dim = out_dim
        self.rank = rank
        self.alpha = alpha
        self.use_bias = use_bias
        self._quantize_base = quantize_base

        if not self._quantize_base and any([v for v in quantization_kwargs.values()]):
            raise ValueError(
                f"``quantize_base`` is False, but received the following quantization arguments: {quantization_kwargs}"
            )

        # Setup weight and bias
        linear = nn.Linear(in_features=in_dim, out_features=out_dim, bias=self.use_bias)
        # y = x @ W^T + B
        # Input: (N, in_dim) = (N, in_features)
        # Weight: (out_dim, in_dim) = (out_features, in_features)
        # Bias: (out_dim) = (out_features)
        # Output: (N, out_dim) = (N, out_features)

        weight = (
            linear.weight
            if not self._quantize_base
            else to_nf4(linear.weight, **quantization_kwargs)
        )
        # Weight: (out_dim, in_dim) = (out_features, in_features)

        bias = linear.bias if self.use_bias else None
        # Bias: (out_dim) = (out_features)

        # 'self.disabled' is a flag showing whether to turn off LoRA adapters,
        # this can be used in DPO for treating the lora adapters as the policy model
        # and disabling it to treat the base model as the reference model
        self.disabled = False
        self.register_parameter("weight", nn.Parameter(weight))
        self.register_parameter(
            "bias", nn.Parameter(bias) if bias is not None else None
        )
        self.dropout = nn.Dropout(p=dropout) if dropout > 0.0 else nn.Identity()
        self.lora_a = nn.Linear(in_features=in_dim, out_features=rank, bias=False)
        # y = x @ W^T + B
        # Input: (N, in_dim) = (N, in_features)
        # Weight: (rank, in_dim) = (out_features, in_features)
        # Bias: (rank) = (out_features)
        # Output: (N, rank) = (N, out_features)

        self.lora_b = nn.Linear(in_features=rank, out_features=out_dim, bias=False)
        # y = x @ W^T + B
        # Input: (N, rank) = (N, in_features)
        # Weight: (out_dim, rank) = (out_features, in_features)
        # Bias: (out_dim) = (out_features)
        # Output: (N, out_dim) = (N, out_features)

        self.merged = False
        self.initialize_parameters()

[docs]    def to_empty(
        self, *, device: Optional[Union[str, torch.device, int]], recurse: bool = True
    ):
        self.lora_a.to_empty(device=device, recurse=recurse)
        self.lora_b.to_empty(device=device, recurse=recurse)

    def initialize_parameters(self):
        # Initialize as in
        # https://github.com/microsoft/LoRA/blob/4c0333854cb905966f8cc4e9a74068c1e507c7b7/loralib/layers.py#L119
        _lora_a_init_params(self.lora_a)
        _lora_b_init_params(self.lora_b)

[docs]    def adapter_params(self) -> list[str]:
        """
        Return a list of strings corresponding to the names of the ``nn.Parameter`` s in
        the model coming from the adapter.

        For LoRA this means lora_a.weight and lora_b.weight.
        """
        # NOTE: this function has to be updated if the names of "lora_a" and "lora_b"
        # in this module change.
        adapter_params = ["lora_a.weight", "lora_b.weight"]
        return adapter_params

[docs]    def forward(self, x: torch.Tensor) -> torch.Tensor:
        """
        Args:
            x (torch.Tensor): input tensor with shape ``(..., in_dim)``

        Returns:
            torch.Tensor: output tensor with shape ``(..., out_dim)``

        """
        if self._quantize_base:
            out = linear_nf4(input=x, weight=self.weight)
            if self.use_bias:
                out = out + self.bias
        else:
            out = F.linear(x, self.weight, self.bias)
            # y = x @ W^T + B
            # Input: (N, in_dim) = (N, in_features)
            # Weight: (out_dim, in_dim) = (out_features, in_features)
            # Bias: (out_dim) = (out_features)
            # Output: (N, out_dim) = (N, out_features)

        if self.disabled:
            return out
        lora_out = self.lora_a(self.dropout(x))
        # y = x @ W^T + B
        # Input: (N, in_dim) = (N, in_features)
        # Weight: (rank, in_dim) = (out_features, in_features)
        # Bias: (rank) = (out_features)
        # Output: (N, rank) = (N, out_features)
        
        lora_out = (self.alpha / self.rank) * self.lora_b(lora_out)
        # y = x @ W^T + B
        # Input: (N, rank) = (N, in_features)
        # Weight: (out_dim, rank) = (out_features, in_features)
        # Bias: (out_dim) = (out_features)
        # Output: (N, out_dim) = (N, out_features)
        
        return out + lora_out


class QATLoRALinear(LoRALinear):
    """
    LoRA linear layer with quantization-aware training (QAT) applied to the
    activations and/or weights before the low rank adapters.

    QAT leverages fake quantization to simulate the quantization numerics during
    training without actually casting the data to lower precision. This class
    combines LoRA with QAT to improve the final quantized accuracy during inference
    while reducing the memory required during training.

    Args:
        in_dim (int): input dimension
        out_dim (int): output dimension
        rank (int): rank of the low-rank approximation
        alpha (float): scaling factor for the low-rank approximation
        dropout (float): dropout probability. Default: 0.0
        activation_qat_config (Optional[FakeQuantizeConfig]): config for specifying
            how input activations will be fake quantized, defaults to None
        weight_qat_config (Optional[FakeQuantizeConfig]): config for specifying
            how weights will be fake quantized, defaults to None

    Raises:
        ValueError: If `in_dim` is not divisible by weight `group_size`

    Example usage::

        activation_qat_config = FakeQuantizeConfig(
            dtype=torch.int8,
            granularity="per_token",
            is_symmetric=False,
        )
        weight_qat_config = FakeQuantizeConfig(
            dtype=torch.int4,
            group_size=8,
            is_symmetric=True,
        )
        qat_lora_linear = QATLoRALinear(
            in_dim=512,
            out_dim=1024,
            rank=8,
            alpha=16,
            dropout=0.0,
            activation_qat_config=activation_qat_config,
            weight_qat_config=weight_qat_config,
        )
        qat_lora_linear(torch.randn(512))
    """

    def __init__(
        self,
        in_dim: int,
        out_dim: int,
        rank: int,
        alpha: float,
        dropout: float = 0.0,
        # fake quantize configs
        # TODO: make the types Optional[FakeQuantizeConfig] once we
        # support torchao 0.7+ by default
        activation_qat_config: Optional["FakeQuantizeConfig"] = None,
        weight_qat_config: Optional["FakeQuantizeConfig"] = None,
    ):
        super().__init__(
            in_dim,
            out_dim,
            rank,
            alpha,
            dropout,
            use_bias=False,
            quantize_base=False,
        )

        try:
            from torchao.quantization.qat.api import FakeQuantizeConfig
            from torchao.quantization.qat.fake_quantizer import FakeQuantizer
        except ImportError as err:
            raise ValueError(
                "QATLoRALinear is only compatible with torchao 0.7+"
            ) from err

        # initialize activation fake quantizer
        if activation_qat_config is not None:
            assert isinstance(activation_qat_config, FakeQuantizeConfig)
            self.activation_fake_quantizer = FakeQuantizer(activation_qat_config)
        else:
            self.activation_fake_quantizer = nn.Identity()

        # initialize weight fake quantizer
        if weight_qat_config is not None:
            assert isinstance(weight_qat_config, FakeQuantizeConfig)
            group_size = weight_qat_config.group_size
            if group_size is not None and in_dim % group_size != 0:
                raise ValueError(
                    "in_dim (%s) must be divisible by group_size (%s)"
                    % (in_dim, group_size)
                )
            self.weight_fake_quantizer = FakeQuantizer(weight_qat_config)
        else:
            self.weight_fake_quantizer = nn.Identity()

    def forward(self, x: torch.Tensor) -> torch.Tensor:
        """
        Args:
            x (torch.Tensor): input tensor with shape ``(..., in_dim)``

        Returns:
            torch.Tensor: output tensor with shape ``(..., out_dim)``

        """
        _x = self.activation_fake_quantizer(x)
        w = self.weight_fake_quantizer(self.weight)
        out = F.linear(_x, w)
        if self.disabled:
            return out
        lora_out = self.lora_a(self.dropout(x))
        lora_out = (self.alpha / self.rank) * self.lora_b(lora_out)
        return out + lora_out

    @classmethod
    def from_lora_linear(
        cls,
        lora_linear: LoRALinear,
        # TODO: make the types Optional[FakeQuantizeConfig] once we
        # support torchao 0.7+ by default
        activation_qat_config: Optional["FakeQuantizeConfig"] = None,
        weight_qat_config: Optional["FakeQuantizeConfig"] = None,
    ) -> "QATLoRALinear":
        """
        Create a `QATLoRALinear` from an existing `LoRALinear`,
        preserving the weights and adapters.
        """
        if lora_linear.bias is not None:
            ValueError("Bias is not supported in QAT + LoRA yet")
        if lora_linear._quantize_base:
            ValueError("quantize_base is not compatible with QAT + LoRA")
        if isinstance(lora_linear.dropout, nn.Dropout):
            dropout = lora_linear.dropout.p
        else:
            dropout = 0.0
        new_linear = cls(
            lora_linear.in_dim,
            lora_linear.out_dim,
            lora_linear.rank,
            lora_linear.alpha,
            dropout,
            activation_qat_config,
            weight_qat_config,
        )
        # In distributed training, the model may be instantiated
        # on the meta device, in which case there is no need to
        # copy the weights, and doing so will result in an error
        if lora_linear.weight.device != torch.device("meta"):
            new_linear.weight = lora_linear.weight
        if lora_linear.lora_a.weight.device != torch.device("meta"):
            new_linear.lora_a.weight = lora_linear.lora_a.weight
        if lora_linear.lora_b.weight.device != torch.device("meta"):
            new_linear.lora_b.weight = lora_linear.lora_b.weight
        return new_linear


def _lora_a_init_params(x: nn.Linear) -> None:
    """
    Initialize LoRA A weight to Kaiming uniform.
    """
    nn.init.kaiming_uniform_(x.weight, a=math.sqrt(5))


def _lora_b_init_params(x: nn.Linear) -> None:
    """
    Initialize LoRA B weight to zeros.
    """
    nn.init.zeros_(x.weight)

2. LoRALinear

https://docs.pytorch.org/torchtune/main/generated/torchtune.modules.peft.LoRALinear.html

class torchtune.modules.peft.LoRALinear(in_dim: int, out_dim: int, rank: int, alpha: float, dropout: float = 0.0, use_bias: bool = False, quantize_base: bool = False, **quantization_kwargs)

LoRA linear layer as introduced in LoRA: Low-Rank Adaptation of Large Language Models https://arxiv.org/abs/2106.09685.

LoRA perturbs a given layer via a low-rank approximation where only the rank decomposition matrices are trainable. In a linear layer instead of x ↦ W 0 x x \mapsto W_0x xW0x a LoRALinear layer is defined as x ↦ W 0 x + ( α / r ) B A x x \mapsto W_0x + (\alpha / r)BAx xW0x+(α/r)BAx, where r is the rank of the matrices A and B and α \alpha α is a scaling factor.

X = Input: (N, in_dim) = (N, in_features)
W = Weight: (out_dim, in_dim) = (out_features, in_features)
X @ (W)^T: (N, in_dim) @ (in_dim, out_dim) = (N, out_dim)

B Weight: (out_dim, rank) = (out_features, in_features)
A Weight: (rank, in_dim) = (out_features, in_features)
BA: (out_dim, rank) @ (rank, in_dim) = (out_dim, in_dim)
X @ (BA)^T -> Y: (N, in_dim) @ (in_dim, out_dim) = (N, out_dim)

X @ (A)^T -> V: (N, in_dim) @ (in_dim, rank) = (N, rank)
V @ (B)^T -> Y: (N, rank) @ (rank, out_dim) = (N, out_dim)

X @ (A)^T @ (B)^T = X @ (BA)^T -> Y: (N, out_dim)

As in the original implementation, we support dropout before multiplication by the low-rank matrices.

Parameters:

  • in_dim (int): input dimension
  • out_dim (int): output dimension
  • rank (int): rank of the low-rank approximation
  • alpha (float): scaling factor for the low-rank approximation
  • dropout (float): dropout probability. Default: 0.0
  • use_bias (bool): whether to include bias in the original linear layer. Default: False
  • quantize_base (bool): Whether to quantize base linear weight or not. Default: False
  • **quantization_kwargs: Keyword arguments to pass to to_nf4 when quantizing the base linear weight. Examples of valid arguments are block_size and scaler_block_size, which control the granularity of weight quantization and scaler quantization respectively. This is only used if quantize_base is True. Default None

Raises:

ValueError: If quantize_base is False, but quantization kwargs are provided.

2.1. def adapter_params(self) -> list[str]

Return a list of strings corresponding to the names of the nn.Parameter s in the model coming from the adapter.

For LoRA this means lora_a.weight and lora_b.weight.

2.2. def forward(self, x: torch.Tensor) -> torch.Tensor

Parameters:
x (torch.Tensor) - input tensor with shape (..., in_dim)

Returns:
output tensor with shape (..., out_dim)

Return type:
torch.Tensor

2.3. def to_empty(self, *, device: Optional[Union[str, torch.device, int]], recurse: bool = True)

Move the parameters and buffers to the specified device without copying storage.

Parameters:
device (torch.device) - The desired device of the parameters and buffers in this module.

recurse (bool) - Whether parameters and buffers of submodules should be recursively moved to the specified device.

Returns:
self

Return type:
Module

References

[1] Yongqiang Cheng, https://yongqiang.blog.csdn.net/
[2] Python operator - Standard operators as functions, https://yongqiang.blog.csdn.net/article/details/148656267

评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包

打赏作者

Yongqiang Cheng

梦想不是浮躁,而是沉淀和积累。

¥1 ¥2 ¥4 ¥6 ¥10 ¥20
扫码支付:¥1
获取中
扫码支付

您的余额不足,请更换扫码支付或充值

打赏作者

实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值