Delete diffq directory

2022-06-13 02:10:10 -05:00
parent b16a422b39
commit 0e6f64ef20
5 changed files with 0 additions and 724 deletions
--- a/diffq/uniform.py
+++ b/diffq/uniform.py
@@ -1,121 +0,0 @@
-# Copyright (c) Facebook, Inc. and its affiliates.
-# All rights reserved.
-#
-# This source code is licensed under the license found in the
-# LICENSE file in the root directory of this source tree.
-
-"""
-Classic uniform quantization over n bits.
-"""
-from typing import Tuple
-import torch
-
-from .base import BaseQuantizer
-from .utils import simple_repr
-
-
-def uniform_quantize(p: torch.Tensor, bits: torch.Tensor = torch.tensor(8.)):
-    """
-    Quantize the given weights over `bits` bits.
-
-    Returns:
-        - quantized levels
-        - (min, max) range.
-
-    """
-    assert (bits >= 1).all() and (bits <= 15).all()
-    num_levels = (2 ** bits.float()).long()
-    mn = p.min().item()
-    mx = p.max().item()
-    p = (p - mn) / (mx - mn)  # put p in [0, 1]
-    unit = 1 / (num_levels - 1)  # quantization unit
-    levels = (p / unit).round()
-    if (bits <= 8).all():
-        levels = levels.byte()
-    else:
-        levels = levels.short()
-    return levels, (mn, mx)
-
-
-def uniform_unquantize(levels: torch.Tensor, scales: Tuple[float, float],
-                       bits: torch.Tensor = torch.tensor(8.)):
-    """
-    Unquantize the weights from the levels and scale. Return a float32 tensor.
-    """
-    mn, mx = scales
-    num_levels = 2 ** bits.float()
-    unit = 1 / (num_levels - 1)
-    levels = levels.float()
-    p = levels * unit  # in [0, 1]
-    return p * (mx - mn) + mn
-
-
-class UniformQuantizer(BaseQuantizer):
-    def __init__(self, model: torch.nn.Module, bits: float = 8., min_size: float = 0.01,
-                 float16: bool = False, qat: bool = False, exclude=[], detect_bound=True):
-        """
-        Args:
-            model (torch.nn.Module): model to quantize
-            bits (float): number of bits to quantize over.
-            min_size (float): minimum size in MB of a parameter to be quantized.
-            float16 (bool): if a layer is smaller than min_size, should we still do float16?
-            qat (bool): perform quantized aware training.
-            exclude (list[str]): list of patterns used to match parameters to exclude.
-                For instance `['bias']` to exclude all bias terms.
-            detect_bound (bool): if True, will detect bound parameters and reuse
-                the same quantized tensor for both.
-        """
-        self.bits = float(bits)
-        self.qat = qat
-
-        super().__init__(model, min_size, float16, exclude, detect_bound)
-
-    def __repr__(self):
-        return simple_repr(self, )
-
-    def _pre_forward_train(self):
-        if self.qat:
-            for qparam in self._qparams:
-                if qparam.other is not None:
-                    new_param = qparam.other.module._parameters[qparam.other.name]
-                else:
-                    quantized = self._quantize_param(qparam)
-                    qvalue = self._unquantize_param(qparam, quantized)
-                    new_param = qparam.param + (qvalue - qparam.param).detach()
-                qparam.module._parameters[qparam.name] = new_param
-            return True
-        return False
-
-    def _post_forward_train(self):
-        if self.qat:
-            for qparam in self._qparams:
-                qparam.module._parameters[qparam.name] = qparam.param
-            return True
-        return False
-
-    def _quantize_param(self, qparam):
-        levels, scales = uniform_quantize(qparam.param.data, torch.tensor(self.bits))
-        return (levels, scales)
-
-    def _unquantize_param(self, qparam, quantized):
-        levels, scales = quantized
-        return uniform_unquantize(levels, scales, torch.tensor(self.bits))
-
-    def model_size(self):
-        """
-        Non differentiable model size in MB.
-        """
-        total = super().model_size()
-        subtotal = 0
-        for qparam in self._qparams:
-            if qparam.other is None:  # if parameter is bound, count only one copy.
-                subtotal += self.bits * qparam.param.numel() + 64  # 2 float for the overall scales
-        subtotal /= 2**20 * 8  # bits to MegaBytes
-        return total + subtotal
-
-    def true_model_size(self):
-        """
-        Return the true quantized model size, in MB, without extra
-        compression.
-        """
-        return self.model_size().item()