From 7e8be000101cc6fe3846745b559f2d785430e253 Mon Sep 17 00:00:00 2001
From: Francisco Jerez <currojerez@riseup.net>
Date: Thu, 30 Jul 2015 15:51:58 +0300
Subject: [PATCH] i965/fs: Import image format conversion primitives.

Define bitfield packing, unpacking and type conversion operations in
terms of which the image format conversion code will be implemented.
These don't directly know about image formats: The packing and
unpacking functions take a 4-tuple of bit shifts and a 4-tuple of bit
widths as arguments, determining the bitfield position of each
component.  Most of the remaining functions perform integer, fixed
point normalized, and floating point type conversions, mapping between
a target type with per-component bit widths given by a parameter and a
matching native representation of the same type.

v2: Drop VEC4 suport.
v3: Rebase.
v4: Fix clamping of negative floats in the unsigned case of
    emit_convert_to_scaled().

Reviewed-by: Jason Ekstrand <jason.ekstrand@intel.com>
---
 .../drivers/dri/i965/brw_fs_surface_builder.cpp    | 265 +++++++++++++++++++++
 1 file changed, 265 insertions(+)

diff --git a/src/mesa/drivers/dri/i965/brw_fs_surface_builder.cpp b/src/mesa/drivers/dri/i965/brw_fs_surface_builder.cpp
index 9682ca3..441b2a0 100644
--- a/src/mesa/drivers/dri/i965/brw_fs_surface_builder.cpp
+++ b/src/mesa/drivers/dri/i965/brw_fs_surface_builder.cpp
@@ -584,4 +584,269 @@ namespace {
          return dst;
       }
    }
+
+   namespace image_format_conversion {
+      using image_format_info::color_u;
+
+      namespace {
+         /**
+          * Maximum representable value in an unsigned integer with the given
+          * number of bits.
+          */
+         inline unsigned
+         scale(unsigned n)
+         {
+            return (1 << n) - 1;
+         }
+      }
+
+      /**
+       * Pack the vector \p src in a bitfield given the per-component bit
+       * shifts and widths.  Note that bitfield components are not allowed to
+       * cross 32-bit boundaries.
+       */
+      fs_reg
+      emit_pack(const fs_builder &bld, const fs_reg &src,
+                const color_u &shifts, const color_u &widths)
+      {
+         const fs_reg dst = bld.vgrf(BRW_REGISTER_TYPE_UD, 4);
+         bool seen[4] = {};
+
+         for (unsigned c = 0; c < 4; ++c) {
+            if (widths[c]) {
+               const fs_reg tmp = bld.vgrf(BRW_REGISTER_TYPE_UD);
+
+               /* Shift each component left to the correct bitfield position. */
+               bld.SHL(tmp, offset(src, bld, c), fs_reg(shifts[c] % 32));
+
+               /* Add everything up. */
+               if (seen[shifts[c] / 32]) {
+                  bld.OR(offset(dst, bld, shifts[c] / 32),
+                         offset(dst, bld, shifts[c] / 32), tmp);
+               } else {
+                  bld.MOV(offset(dst, bld, shifts[c] / 32), tmp);
+                  seen[shifts[c] / 32] = true;
+               }
+            }
+         }
+
+         return dst;
+      }
+
+      /**
+       * Unpack a vector from the bitfield \p src given the per-component bit
+       * shifts and widths.  Note that bitfield components are not allowed to
+       * cross 32-bit boundaries.
+       */
+      fs_reg
+      emit_unpack(const fs_builder &bld, const fs_reg &src,
+                  const color_u &shifts, const color_u &widths)
+      {
+         const fs_reg dst = bld.vgrf(src.type, 4);
+
+         for (unsigned c = 0; c < 4; ++c) {
+            if (widths[c]) {
+               /* Shift left to discard the most significant bits. */
+               bld.SHL(offset(dst, bld, c),
+                       offset(src, bld, shifts[c] / 32),
+                       fs_reg(32 - shifts[c] % 32 - widths[c]));
+
+               /* Shift back to the least significant bits using an arithmetic
+                * shift to get sign extension on signed types.
+                */
+               bld.ASR(offset(dst, bld, c),
+                       offset(dst, bld, c), fs_reg(32 - widths[c]));
+            }
+         }
+
+         return dst;
+      }
+
+      /**
+       * Convert an integer vector into another integer vector of the
+       * specified bit widths, properly handling overflow.
+       */
+      fs_reg
+      emit_convert_to_integer(const fs_builder &bld, const fs_reg &src,
+                              const color_u &widths, bool is_signed)
+      {
+         const unsigned s = (is_signed ? 1 : 0);
+         const fs_reg dst = bld.vgrf(
+            is_signed ? BRW_REGISTER_TYPE_D : BRW_REGISTER_TYPE_UD, 4);
+         assert(src.type == dst.type);
+
+         for (unsigned c = 0; c < 4; ++c) {
+            if (widths[c]) {
+               /* Clamp to the maximum value. */
+               bld.emit_minmax(offset(dst, bld, c), offset(src, bld, c),
+                               fs_reg((int)scale(widths[c] - s)),
+                               BRW_CONDITIONAL_L);
+
+               /* Clamp to the minimum value. */
+               if (is_signed)
+                  bld.emit_minmax(offset(dst, bld, c), offset(dst, bld, c),
+                                  fs_reg(-(int)scale(widths[c] - s) - 1),
+                                  BRW_CONDITIONAL_G);
+            }
+         }
+
+         return dst;
+      }
+
+      /**
+       * Convert a normalized fixed-point vector of the specified signedness
+       * and bit widths into a floating point vector.
+       */
+      fs_reg
+      emit_convert_from_scaled(const fs_builder &bld, const fs_reg &src,
+                               const color_u &widths, bool is_signed)
+      {
+         const unsigned s = (is_signed ? 1 : 0);
+         const fs_reg dst = bld.vgrf(BRW_REGISTER_TYPE_F, 4);
+
+         for (unsigned c = 0; c < 4; ++c) {
+            if (widths[c]) {
+               /* Convert to float. */
+               bld.MOV(offset(dst, bld, c), offset(src, bld, c));
+
+               /* Divide by the normalization constants. */
+               bld.MUL(offset(dst, bld, c), offset(dst, bld, c),
+                       fs_reg(1.0f / scale(widths[c] - s)));
+
+               /* Clamp to the minimum value. */
+               if (is_signed)
+                  bld.emit_minmax(offset(dst, bld, c),
+                                  offset(dst, bld, c), fs_reg(-1.0f),
+                                  BRW_CONDITIONAL_G);
+            }
+         }
+         return dst;
+      }
+
+      /**
+       * Convert a floating-point vector into a normalized fixed-point vector
+       * of the specified signedness and bit widths.
+       */
+      fs_reg
+      emit_convert_to_scaled(const fs_builder &bld, const fs_reg &src,
+                             const color_u &widths, bool is_signed)
+      {
+         const unsigned s = (is_signed ? 1 : 0);
+         const fs_reg dst = bld.vgrf(
+            is_signed ? BRW_REGISTER_TYPE_D : BRW_REGISTER_TYPE_UD, 4);
+         const fs_reg fdst = retype(dst, BRW_REGISTER_TYPE_F);
+
+         for (unsigned c = 0; c < 4; ++c) {
+            if (widths[c]) {
+               /* Clamp the normalized floating-point argument. */
+               if (is_signed) {
+                  bld.emit_minmax(offset(fdst, bld, c), offset(src, bld, c),
+                                  fs_reg(-1.0f), BRW_CONDITIONAL_G);
+
+                  bld.emit_minmax(offset(fdst, bld, c), offset(fdst, bld, c),
+                                  fs_reg(1.0f), BRW_CONDITIONAL_L);
+               } else {
+                  set_saturate(true, bld.MOV(offset(fdst, bld, c),
+                                             offset(src, bld, c)));
+               }
+
+               /* Multiply by the normalization constants. */
+               bld.MUL(offset(fdst, bld, c), offset(fdst, bld, c),
+                       fs_reg((float)scale(widths[c] - s)));
+
+               /* Convert to integer. */
+               bld.RNDE(offset(fdst, bld, c), offset(fdst, bld, c));
+               bld.MOV(offset(dst, bld, c), offset(fdst, bld, c));
+            }
+         }
+
+         return dst;
+      }
+
+      /**
+       * Convert a floating point vector of the specified bit widths into a
+       * 32-bit floating point vector.
+       */
+      fs_reg
+      emit_convert_from_float(const fs_builder &bld, const fs_reg &src,
+                              const color_u &widths)
+      {
+         const fs_reg dst = bld.vgrf(BRW_REGISTER_TYPE_UD, 4);
+         const fs_reg fdst = retype(dst, BRW_REGISTER_TYPE_F);
+
+         for (unsigned c = 0; c < 4; ++c) {
+            if (widths[c]) {
+               bld.MOV(offset(dst, bld, c), offset(src, bld, c));
+
+               /* Extend 10-bit and 11-bit floating point numbers to 15 bits.
+                * This works because they have a 5-bit exponent just like the
+                * 16-bit floating point format, and they have no sign bit.
+                */
+               if (widths[c] < 16)
+                  bld.SHL(offset(dst, bld, c),
+                          offset(dst, bld, c), fs_reg(15 - widths[c]));
+
+               /* Convert to 32-bit floating point. */
+               bld.F16TO32(offset(fdst, bld, c), offset(dst, bld, c));
+            }
+         }
+
+         return fdst;
+      }
+
+      /**
+       * Convert a vector into a floating point vector of the specified bit
+       * widths.
+       */
+      fs_reg
+      emit_convert_to_float(const fs_builder &bld, const fs_reg &src,
+                            const color_u &widths)
+      {
+         const fs_reg dst = bld.vgrf(BRW_REGISTER_TYPE_UD, 4);
+         const fs_reg fdst = retype(dst, BRW_REGISTER_TYPE_F);
+
+         for (unsigned c = 0; c < 4; ++c) {
+            if (widths[c]) {
+               bld.MOV(offset(fdst, bld, c), offset(src, bld, c));
+
+               /* Clamp to the minimum value. */
+               if (widths[c] < 16)
+                  bld.emit_minmax(offset(fdst, bld, c), offset(fdst, bld, c),
+                                  fs_reg(0.0f), BRW_CONDITIONAL_G);
+
+               /* Convert to 16-bit floating-point. */
+               bld.F32TO16(offset(dst, bld, c), offset(fdst, bld, c));
+
+               /* Discard the least significant bits to get floating point
+                * numbers of the requested width.  This works because the
+                * 10-bit and 11-bit floating point formats have a 5-bit
+                * exponent just like the 16-bit format, and they have no sign
+                * bit.
+                */
+               if (widths[c] < 16)
+                  bld.SHR(offset(dst, bld, c), offset(dst, bld, c),
+                          fs_reg(15 - widths[c]));
+            }
+         }
+
+         return dst;
+      }
+
+      /**
+       * Fill missing components of a vector with 0, 0, 0, 1.
+       */
+      fs_reg
+      emit_pad(const fs_builder &bld, const fs_reg &src,
+               const color_u &widths)
+      {
+         const fs_reg dst = bld.vgrf(src.type, 4);
+         const unsigned pad[] = { 0, 0, 0, 1 };
+
+         for (unsigned c = 0; c < 4; ++c)
+            bld.MOV(offset(dst, bld, c),
+                    widths[c] ? offset(src, bld, c) : fs_reg(pad[c]));
+
+         return dst;
+      }
+   }
 }
-- 
2.7.4