Delete diffq directory
This commit is contained in:
121
diffq/uniform.py
121
diffq/uniform.py
@@ -1,121 +0,0 @@
|
||||
# Copyright (c) Facebook, Inc. and its affiliates.
|
||||
# All rights reserved.
|
||||
#
|
||||
# This source code is licensed under the license found in the
|
||||
# LICENSE file in the root directory of this source tree.
|
||||
|
||||
"""
|
||||
Classic uniform quantization over n bits.
|
||||
"""
|
||||
from typing import Tuple
|
||||
import torch
|
||||
|
||||
from .base import BaseQuantizer
|
||||
from .utils import simple_repr
|
||||
|
||||
|
||||
def uniform_quantize(p: torch.Tensor, bits: torch.Tensor = torch.tensor(8.)):
|
||||
"""
|
||||
Quantize the given weights over `bits` bits.
|
||||
|
||||
Returns:
|
||||
- quantized levels
|
||||
- (min, max) range.
|
||||
|
||||
"""
|
||||
assert (bits >= 1).all() and (bits <= 15).all()
|
||||
num_levels = (2 ** bits.float()).long()
|
||||
mn = p.min().item()
|
||||
mx = p.max().item()
|
||||
p = (p - mn) / (mx - mn) # put p in [0, 1]
|
||||
unit = 1 / (num_levels - 1) # quantization unit
|
||||
levels = (p / unit).round()
|
||||
if (bits <= 8).all():
|
||||
levels = levels.byte()
|
||||
else:
|
||||
levels = levels.short()
|
||||
return levels, (mn, mx)
|
||||
|
||||
|
||||
def uniform_unquantize(levels: torch.Tensor, scales: Tuple[float, float],
|
||||
bits: torch.Tensor = torch.tensor(8.)):
|
||||
"""
|
||||
Unquantize the weights from the levels and scale. Return a float32 tensor.
|
||||
"""
|
||||
mn, mx = scales
|
||||
num_levels = 2 ** bits.float()
|
||||
unit = 1 / (num_levels - 1)
|
||||
levels = levels.float()
|
||||
p = levels * unit # in [0, 1]
|
||||
return p * (mx - mn) + mn
|
||||
|
||||
|
||||
class UniformQuantizer(BaseQuantizer):
|
||||
def __init__(self, model: torch.nn.Module, bits: float = 8., min_size: float = 0.01,
|
||||
float16: bool = False, qat: bool = False, exclude=[], detect_bound=True):
|
||||
"""
|
||||
Args:
|
||||
model (torch.nn.Module): model to quantize
|
||||
bits (float): number of bits to quantize over.
|
||||
min_size (float): minimum size in MB of a parameter to be quantized.
|
||||
float16 (bool): if a layer is smaller than min_size, should we still do float16?
|
||||
qat (bool): perform quantized aware training.
|
||||
exclude (list[str]): list of patterns used to match parameters to exclude.
|
||||
For instance `['bias']` to exclude all bias terms.
|
||||
detect_bound (bool): if True, will detect bound parameters and reuse
|
||||
the same quantized tensor for both.
|
||||
"""
|
||||
self.bits = float(bits)
|
||||
self.qat = qat
|
||||
|
||||
super().__init__(model, min_size, float16, exclude, detect_bound)
|
||||
|
||||
def __repr__(self):
|
||||
return simple_repr(self, )
|
||||
|
||||
def _pre_forward_train(self):
|
||||
if self.qat:
|
||||
for qparam in self._qparams:
|
||||
if qparam.other is not None:
|
||||
new_param = qparam.other.module._parameters[qparam.other.name]
|
||||
else:
|
||||
quantized = self._quantize_param(qparam)
|
||||
qvalue = self._unquantize_param(qparam, quantized)
|
||||
new_param = qparam.param + (qvalue - qparam.param).detach()
|
||||
qparam.module._parameters[qparam.name] = new_param
|
||||
return True
|
||||
return False
|
||||
|
||||
def _post_forward_train(self):
|
||||
if self.qat:
|
||||
for qparam in self._qparams:
|
||||
qparam.module._parameters[qparam.name] = qparam.param
|
||||
return True
|
||||
return False
|
||||
|
||||
def _quantize_param(self, qparam):
|
||||
levels, scales = uniform_quantize(qparam.param.data, torch.tensor(self.bits))
|
||||
return (levels, scales)
|
||||
|
||||
def _unquantize_param(self, qparam, quantized):
|
||||
levels, scales = quantized
|
||||
return uniform_unquantize(levels, scales, torch.tensor(self.bits))
|
||||
|
||||
def model_size(self):
|
||||
"""
|
||||
Non differentiable model size in MB.
|
||||
"""
|
||||
total = super().model_size()
|
||||
subtotal = 0
|
||||
for qparam in self._qparams:
|
||||
if qparam.other is None: # if parameter is bound, count only one copy.
|
||||
subtotal += self.bits * qparam.param.numel() + 64 # 2 float for the overall scales
|
||||
subtotal /= 2**20 * 8 # bits to MegaBytes
|
||||
return total + subtotal
|
||||
|
||||
def true_model_size(self):
|
||||
"""
|
||||
Return the true quantized model size, in MB, without extra
|
||||
compression.
|
||||
"""
|
||||
return self.model_size().item()
|
||||
Reference in New Issue
Block a user