From 8d93f360c582297b9ced11c234ab4bd53103a8a6 Mon Sep 17 00:00:00 2001
From: =?utf8?q?Jos=C3=A9=20Fonseca?= <jfonseca@vmware.com>
Date: Thu, 1 Jul 2010 13:57:48 +0100
Subject: [PATCH] gallivm: Support 4 x unorm8 in lp_build_fetch_rgba_aos().

Uses code and ideas from Brian Paul.
---
 src/gallium/auxiliary/draw/draw_llvm_translate.c  |   4 +-
 src/gallium/auxiliary/gallivm/lp_bld_format.h     |   9 +-
 src/gallium/auxiliary/gallivm/lp_bld_format_aos.c | 218 ++++++++++++++------
 src/gallium/auxiliary/gallivm/lp_bld_format_soa.c |   4 +-
 src/gallium/auxiliary/gallivm/lp_bld_type.h       |  48 +++++
 src/gallium/drivers/llvmpipe/lp_test_format.c     | 230 +++++++++++++++-------
 6 files changed, 372 insertions(+), 141 deletions(-)

diff --git a/src/gallium/auxiliary/draw/draw_llvm_translate.c b/src/gallium/auxiliary/draw/draw_llvm_translate.c
index d7da7ed..ec7d0a4 100644
--- a/src/gallium/auxiliary/draw/draw_llvm_translate.c
+++ b/src/gallium/auxiliary/draw/draw_llvm_translate.c
@@ -7,6 +7,7 @@
 #include "gallivm/lp_bld_struct.h"
 #include "gallivm/lp_bld_format.h"
 #include "gallivm/lp_bld_debug.h"
+#include "gallivm/lp_bld_type.h"
 
 #include "util/u_memory.h"
 #include "util/u_format.h"
@@ -466,6 +467,7 @@ draw_llvm_translate_from(LLVMBuilderRef builder,
    const struct util_format_description *format_desc;
    LLVMValueRef zero;
    int i;
+   struct lp_type type = lp_float32_vec4_type();
 
    /*
     * The above can only cope with straight arrays: no bitfields,
@@ -493,5 +495,5 @@ draw_llvm_translate_from(LLVMBuilderRef builder,
 
    format_desc = util_format_description(from_format);
    zero = LLVMConstNull(LLVMInt32Type());
-   return lp_build_fetch_rgba_aos(builder, format_desc, vbuffer, zero, zero);
+   return lp_build_fetch_rgba_aos(builder, format_desc, type, vbuffer, zero, zero);
 }
diff --git a/src/gallium/auxiliary/gallivm/lp_bld_format.h b/src/gallium/auxiliary/gallivm/lp_bld_format.h
index 5f5036e..c335ca4 100644
--- a/src/gallium/auxiliary/gallivm/lp_bld_format.h
+++ b/src/gallium/auxiliary/gallivm/lp_bld_format.h
@@ -48,9 +48,9 @@ struct lp_build_context;
  */
 
 LLVMValueRef
-lp_build_unpack_rgba_aos(LLVMBuilderRef builder,
-                         const struct util_format_description *desc,
-                         LLVMValueRef packed);
+lp_build_format_swizzle_aos(const struct util_format_description *desc,
+                            struct lp_build_context *bld,
+                            LLVMValueRef unswizzled);
 
 LLVMValueRef
 lp_build_pack_rgba_aos(LLVMBuilderRef builder,
@@ -60,6 +60,7 @@ lp_build_pack_rgba_aos(LLVMBuilderRef builder,
 LLVMValueRef
 lp_build_fetch_rgba_aos(LLVMBuilderRef builder,
                         const struct util_format_description *format_desc,
+                        struct lp_type type,
                         LLVMValueRef ptr,
                         LLVMValueRef i,
                         LLVMValueRef j);
@@ -72,7 +73,7 @@ lp_build_fetch_rgba_aos(LLVMBuilderRef builder,
 void
 lp_build_format_swizzle_soa(const struct util_format_description *format_desc,
                             struct lp_build_context *bld,
-                            const LLVMValueRef *unswizzled,
+                            const LLVMValueRef unswizzled[4],
                             LLVMValueRef swizzled_out[4]);
 
 void
diff --git a/src/gallium/auxiliary/gallivm/lp_bld_format_aos.c b/src/gallium/auxiliary/gallivm/lp_bld_format_aos.c
index 87e3e72..bec2a80 100644
--- a/src/gallium/auxiliary/gallivm/lp_bld_format_aos.c
+++ b/src/gallium/auxiliary/gallivm/lp_bld_format_aos.c
@@ -38,33 +38,122 @@
 #include "util/u_math.h"
 #include "util/u_string.h"
 
+#include "lp_bld_arit.h"
 #include "lp_bld_init.h"
 #include "lp_bld_type.h"
 #include "lp_bld_flow.h"
+#include "lp_bld_const.h"
+#include "lp_bld_conv.h"
+#include "lp_bld_swizzle.h"
 #include "lp_bld_format.h"
 
 
 /**
+ * Basic swizzling.  Rearrange the order of the unswizzled array elements
+ * according to the format description.  PIPE_SWIZZLE_ZERO/ONE are supported
+ * too.
+ * Ex: if unswizzled[4] = {B, G, R, x}, then swizzled_out[4] = {R, G, B, 1}.
+ */
+LLVMValueRef
+lp_build_format_swizzle_aos(const struct util_format_description *desc,
+                            struct lp_build_context *bld,
+                            LLVMValueRef unswizzled)
+{
+   unsigned char swizzles[4];
+   unsigned chan;
+
+   assert(bld->type.length % 4 == 0);
+
+   for (chan = 0; chan < 4; ++chan) {
+      enum util_format_swizzle swizzle;
+
+      if (desc->colorspace == UTIL_FORMAT_COLORSPACE_ZS) {
+         /*
+          * For ZS formats do RGBA = ZZZ1
+          */
+         if (chan == 3) {
+            swizzle = UTIL_FORMAT_SWIZZLE_1;
+         } else if (desc->swizzle[0] == UTIL_FORMAT_SWIZZLE_NONE) {
+            swizzle = UTIL_FORMAT_SWIZZLE_0;
+         } else {
+            swizzle = desc->swizzle[0];
+         }
+      } else {
+         swizzle = desc->swizzle[chan];
+      }
+      swizzles[chan] = swizzle;
+   }
+
+   return lp_build_swizzle_aos(bld, unswizzled, swizzles);
+}
+
+
+/**
+ * Whether the format matches the vector type, apart of swizzles.
+ */
+static INLINE boolean
+format_matches_type(const struct util_format_description *desc,
+                    struct lp_type type)
+{
+   enum util_format_type chan_type;
+   unsigned chan;
+
+   assert(type.length % 4 == 0);
+
+   if (desc->layout != UTIL_FORMAT_LAYOUT_PLAIN ||
+       desc->colorspace != UTIL_FORMAT_COLORSPACE_RGB) {
+      return FALSE;
+   }
+
+   if (type.floating) {
+      chan_type = UTIL_FORMAT_TYPE_FLOAT;
+   } else if (type.fixed) {
+      chan_type = UTIL_FORMAT_TYPE_FIXED;
+   } else if (type.sign) {
+      chan_type = UTIL_FORMAT_TYPE_SIGNED;
+   } else {
+      chan_type = UTIL_FORMAT_TYPE_UNSIGNED;
+   }
+
+   for (chan = 0; chan < desc->nr_channels; ++chan) {
+      if (desc->channel[chan].size != type.width) {
+         return FALSE;
+      }
+
+      if (desc->channel[chan].type != UTIL_FORMAT_TYPE_VOID) {
+         if (desc->channel[chan].type != chan_type ||
+             desc->channel[chan].normalized != type.norm) {
+            return FALSE;
+         }
+      }
+   }
+
+   return TRUE;
+}
+
+
+/**
  * Unpack a single pixel into its RGBA components.
  *
  * @param desc  the pixel format for the packed pixel value
+ * @param type  the desired return type (float[4] vs. ubyte[4])
  * @param packed integer pixel in a format such as PIPE_FORMAT_B8G8R8A8_UNORM
  *
- * @return RGBA in a 4 floats vector.
+ * @return RGBA in a float[4] or ubyte[4] or ushort[4] vector.
  */
-LLVMValueRef
-lp_build_unpack_rgba_aos(LLVMBuilderRef builder,
-                         const struct util_format_description *desc,
+static INLINE LLVMValueRef
+lp_build_unpack_rgba_aos(const struct util_format_description *desc,
+                         struct lp_build_context *bld,
                          LLVMValueRef packed)
 {
+   LLVMBuilderRef builder = bld->builder;
+   struct lp_type type = bld->type;
    LLVMValueRef shifted, casted, scaled, masked;
    LLVMValueRef shifts[4];
    LLVMValueRef masks[4];
    LLVMValueRef scales[4];
-   LLVMValueRef swizzles[4];
-   LLVMValueRef aux[4];
+
    boolean normalized;
-   int empty_channel;
    boolean needs_uitofp;
    unsigned shift;
    unsigned i;
@@ -98,7 +187,6 @@ lp_build_unpack_rgba_aos(LLVMBuilderRef builder,
    /* Initialize vector constants */
    normalized = FALSE;
    needs_uitofp = FALSE;
-   empty_channel = -1;
    shift = 0;
 
    /* Loop over 4 color components */
@@ -109,7 +197,6 @@ lp_build_unpack_rgba_aos(LLVMBuilderRef builder,
          shifts[i] = LLVMGetUndef(LLVMInt32Type());
          masks[i] = LLVMConstNull(LLVMInt32Type());
          scales[i] =  LLVMConstNull(LLVMFloatType());
-         empty_channel = i;
       }
       else {
          unsigned long long mask = (1ULL << bits) - 1;
@@ -158,52 +245,21 @@ lp_build_unpack_rgba_aos(LLVMBuilderRef builder,
    else
       scaled = casted;
 
-   for (i = 0; i < 4; ++i)
-      aux[i] = LLVMGetUndef(LLVMFloatType());
+   /*
+    * Type conversion.
+    *
+    * TODO: We could avoid floating conversion for integer to
+    * integer conversions.
+    */
 
-   /* Build swizzles vector to put components into R,G,B,A order */
-   for (i = 0; i < 4; ++i) {
-      enum util_format_swizzle swizzle;
+   lp_build_conv(builder,
+                 lp_float32_vec4_type(),
+                 type,
+                 &scaled, 1, &scaled, 1);
 
-      if (desc->colorspace == UTIL_FORMAT_COLORSPACE_ZS) {
-         /*
-          * For ZS formats do RGBA = ZZZ1
-          */
-         if (i == 3) {
-            swizzle = UTIL_FORMAT_SWIZZLE_1;
-         } else if (desc->swizzle[0] == UTIL_FORMAT_SWIZZLE_NONE) {
-            swizzle = UTIL_FORMAT_SWIZZLE_0;
-         } else {
-            swizzle = desc->swizzle[0];
-         }
-      } else {
-         swizzle = desc->swizzle[i];
-      }
-
-      switch (swizzle) {
-      case UTIL_FORMAT_SWIZZLE_X:
-      case UTIL_FORMAT_SWIZZLE_Y:
-      case UTIL_FORMAT_SWIZZLE_Z:
-      case UTIL_FORMAT_SWIZZLE_W:
-         swizzles[i] = LLVMConstInt(LLVMInt32Type(), swizzle, 0);
-         break;
-      case UTIL_FORMAT_SWIZZLE_0:
-         assert(empty_channel >= 0);
-         swizzles[i] = LLVMConstInt(LLVMInt32Type(), empty_channel, 0);
-         break;
-      case UTIL_FORMAT_SWIZZLE_1:
-         swizzles[i] = LLVMConstInt(LLVMInt32Type(), 4, 0);
-         aux[0] = LLVMConstReal(LLVMFloatType(), 1.0);
-         break;
-      case UTIL_FORMAT_SWIZZLE_NONE:
-         swizzles[i] = LLVMGetUndef(LLVMFloatType());
-         assert(0);
-         break;
-      }
-   }
+   scaled = lp_build_format_swizzle_aos(desc, bld, scaled);
 
-   return LLVMBuildShuffleVector(builder, scaled, LLVMConstVector(aux, 4),
-                                 LLVMConstVector(swizzles, 4), "");
+   return scaled;
 }
 
 
@@ -316,16 +372,23 @@ lp_build_pack_rgba_aos(LLVMBuilderRef builder,
  * \param format_desc  describes format of the image we're fetching from
  * \param ptr  address of the pixel block (or the texel if uncompressed)
  * \param i, j  the sub-block pixel coordinates.  For non-compressed formats
- *              these will always be (0,).
- * \return  valueRef with the float[4] RGBA pixel
+ *              these will always be (0, 0).
+ * \return  a 4 element vector with the pixel's RGBA values.
  */
 LLVMValueRef
 lp_build_fetch_rgba_aos(LLVMBuilderRef builder,
                         const struct util_format_description *format_desc,
+                        struct lp_type type,
                         LLVMValueRef ptr,
                         LLVMValueRef i,
                         LLVMValueRef j)
 {
+   struct lp_build_context bld;
+
+   /* XXX: For now we only support one pixel at a time */
+   assert(type.length == 4);
+
+   lp_build_context_init(&bld, builder, type);
 
    if (format_desc->layout == UTIL_FORMAT_LAYOUT_PLAIN &&
        (format_desc->colorspace == UTIL_FORMAT_COLORSPACE_RGB ||
@@ -347,7 +410,24 @@ lp_build_fetch_rgba_aos(LLVMBuilderRef builder,
 
       packed = LLVMBuildLoad(builder, ptr, "packed");
 
-      return lp_build_unpack_rgba_aos(builder, format_desc, packed);
+      if (format_matches_type(format_desc, type)) {
+         /*
+          * The format matches the type (apart of a swizzle) so no need for
+          * scaling or converting.
+          */
+
+         assert(format_desc->block.bits <= type.width * type.length);
+         if (format_desc->block.bits < type.width * type.length) {
+            packed = LLVMBuildZExt(builder, packed,
+                                   LLVMIntType(type.width * type.length), "");
+         }
+
+         packed = LLVMBuildBitCast(builder, packed, lp_build_vec_type(type), "");
+
+         return lp_build_format_swizzle_aos(format_desc, &bld, packed);
+      } else {
+         return lp_build_unpack_rgba_aos(format_desc, &bld, packed);
+      }
    }
    else if (format_desc->fetch_rgba_float) {
       /*
@@ -361,8 +441,12 @@ lp_build_fetch_rgba_aos(LLVMBuilderRef builder,
 
       LLVMModuleRef module = LLVMGetGlobalParent(LLVMGetBasicBlockParent(LLVMGetInsertBlock(builder)));
       char name[256];
+      LLVMTypeRef f32t = LLVMFloatType();
+      LLVMTypeRef f32x4t = LLVMVectorType(f32t, 4);
+      LLVMTypeRef pf32t = LLVMPointerType(f32t, 0);
       LLVMValueRef function;
-      LLVMValueRef tmp;
+      LLVMValueRef tmp_ptr;
+      LLVMValueRef tmp_val;
       LLVMValueRef args[4];
 
       util_snprintf(name, sizeof name, "util_format_%s_fetch_rgba_float",
@@ -379,7 +463,7 @@ lp_build_fetch_rgba_aos(LLVMBuilderRef builder,
          LLVMTypeRef function_type;
 
          ret_type = LLVMVoidType();
-         arg_types[0] = LLVMPointerType(LLVMFloatType(), 0);
+         arg_types[0] = pf32t;
          arg_types[1] = LLVMPointerType(LLVMInt8Type(), 0);
          arg_types[3] = arg_types[2] = LLVMIntType(sizeof(unsigned) * 8);
          function_type = LLVMFunctionType(ret_type, arg_types, Elements(arg_types), 0);
@@ -394,25 +478,35 @@ lp_build_fetch_rgba_aos(LLVMBuilderRef builder,
                               func_to_pointer((func_pointer)format_desc->fetch_rgba_float));
       }
 
-      tmp = lp_build_alloca(builder, LLVMVectorType(LLVMFloatType(), 4), "");
+      tmp_ptr = lp_build_alloca(builder, f32x4t, "");
 
       /*
        * Invoke format_desc->fetch_rgba_float() for each pixel and insert the result
        * in the SoA vectors.
        */
 
-      args[0] = LLVMBuildBitCast(builder, tmp,
-                                 LLVMPointerType(LLVMFloatType(), 0), "");
+      args[0] = LLVMBuildBitCast(builder, tmp_ptr, pf32t, "");
       args[1] = ptr;
       args[2] = i;
       args[3] = j;
 
       LLVMBuildCall(builder, function, args, Elements(args), "");
 
-      return LLVMBuildLoad(builder, tmp, "");
+      tmp_val = LLVMBuildLoad(builder, tmp_ptr, "");
+
+      if (type.floating) {
+         /* No further conversion necessary */
+      } else {
+         lp_build_conv(builder,
+                       lp_float32_vec4_type(),
+                       type,
+                       &tmp_val, 1, &tmp_val, 1);
+      }
+
+      return tmp_val;
    }
    else {
       assert(0);
-      return LLVMGetUndef(LLVMVectorType(LLVMFloatType(), 4));
+      return lp_build_undef(type);
    }
 }
diff --git a/src/gallium/auxiliary/gallivm/lp_bld_format_soa.c b/src/gallium/auxiliary/gallivm/lp_bld_format_soa.c
index e1b94ad..a4a36a0 100644
--- a/src/gallium/auxiliary/gallivm/lp_bld_format_soa.c
+++ b/src/gallium/auxiliary/gallivm/lp_bld_format_soa.c
@@ -324,8 +324,6 @@ lp_build_fetch_rgba_soa(LLVMBuilderRef builder,
 
       unsigned k, chan;
 
-      assert(type.floating);
-
       for (chan = 0; chan < 4; ++chan) {
          rgba_out[chan] = lp_build_undef(type);
       }
@@ -345,7 +343,7 @@ lp_build_fetch_rgba_soa(LLVMBuilderRef builder,
          j_elem = LLVMBuildExtractElement(builder, j, index, "");
 
          /* Get a single float[4]={R,G,B,A} pixel */
-         tmp = lp_build_fetch_rgba_aos(builder, format_desc, ptr,
+         tmp = lp_build_fetch_rgba_aos(builder, format_desc, type, ptr,
                                        i_elem, j_elem);
 
          /*
diff --git a/src/gallium/auxiliary/gallivm/lp_bld_type.h b/src/gallium/auxiliary/gallivm/lp_bld_type.h
index df77ef2..3ffe916 100644
--- a/src/gallium/auxiliary/gallivm/lp_bld_type.h
+++ b/src/gallium/auxiliary/gallivm/lp_bld_type.h
@@ -316,6 +316,54 @@ LLVMTypeRef
 lp_build_int32_vec4_type(void);
 
 
+static INLINE struct lp_type
+lp_float32_vec4_type(void)
+{
+   struct lp_type type;
+
+   memset(&type, 0, sizeof(type));
+   type.floating = TRUE;
+   type.sign = TRUE;
+   type.norm = FALSE;
+   type.width = 32;
+   type.length = 4;
+
+   return type;
+}
+
+
+static INLINE struct lp_type
+lp_int32_vec4_type(void)
+{
+   struct lp_type type;
+
+   memset(&type, 0, sizeof(type));
+   type.floating = FALSE;
+   type.sign = TRUE;
+   type.norm = FALSE;
+   type.width = 32;
+   type.length = 4;
+
+   return type;
+}
+
+
+static INLINE struct lp_type
+lp_unorm8_vec4_type(void)
+{
+   struct lp_type type;
+
+   memset(&type, 0, sizeof(type));
+   type.floating = FALSE;
+   type.sign = FALSE;
+   type.norm = TRUE;
+   type.width = 8;
+   type.length = 4;
+
+   return type;
+}
+
+
 struct lp_type
 lp_uint_type(struct lp_type type);
 
diff --git a/src/gallium/drivers/llvmpipe/lp_test_format.c b/src/gallium/drivers/llvmpipe/lp_test_format.c
index 8b6dc1c..80d2c68 100644
--- a/src/gallium/drivers/llvmpipe/lp_test_format.c
+++ b/src/gallium/drivers/llvmpipe/lp_test_format.c
@@ -31,6 +31,7 @@
 #include <float.h>
 
 #include "gallivm/lp_bld.h"
+#include "gallivm/lp_bld_debug.h"
 #include "gallivm/lp_bld_init.h"
 #include <llvm-c/Analysis.h>
 #include <llvm-c/Target.h>
@@ -38,6 +39,7 @@
 
 #include "util/u_memory.h"
 #include "util/u_pointer.h"
+#include "util/u_string.h"
 #include "util/u_format.h"
 #include "util/u_format_tests.h"
 #include "util/u_format_s3tc.h"
@@ -71,14 +73,16 @@ write_tsv_row(FILE *fp,
 
 
 typedef void
-(*fetch_ptr_t)(float *, const void *packed,
+(*fetch_ptr_t)(void *unpacked, const void *packed,
                unsigned i, unsigned j);
 
 
 static LLVMValueRef
-add_fetch_rgba_test(LLVMModuleRef lp_build_module,
-                    const struct util_format_description *desc)
+add_fetch_rgba_test(unsigned verbose,
+                    const struct util_format_description *desc,
+                    struct lp_type type)
 {
+   char name[256];
    LLVMTypeRef args[4];
    LLVMValueRef func;
    LLVMValueRef packed_ptr;
@@ -89,11 +93,15 @@ add_fetch_rgba_test(LLVMModuleRef lp_build_module,
    LLVMBuilderRef builder;
    LLVMValueRef rgba;
 
-   args[0] = LLVMPointerType(LLVMVectorType(LLVMFloatType(), 4), 0);
+   util_snprintf(name, sizeof name, "fetch_%s_%s", desc->short_name,
+                 type.floating ? "float" : "unorm8");
+
+   args[0] = LLVMPointerType(lp_build_vec_type(type), 0);
    args[1] = LLVMPointerType(LLVMInt8Type(), 0);
    args[3] = args[2] = LLVMInt32Type();
 
-   func = LLVMAddFunction(lp_build_module, "fetch", LLVMFunctionType(LLVMVoidType(), args, Elements(args), 0));
+   func = LLVMAddFunction(lp_build_module, name,
+                          LLVMFunctionType(LLVMVoidType(), args, Elements(args), 0));
    LLVMSetFunctionCallConv(func, LLVMCCallConv);
    rgba_ptr = LLVMGetParam(func, 0);
    packed_ptr = LLVMGetParam(func, 1);
@@ -104,91 +112,101 @@ add_fetch_rgba_test(LLVMModuleRef lp_build_module,
    builder = LLVMCreateBuilder();
    LLVMPositionBuilderAtEnd(builder, block);
 
-   rgba = lp_build_fetch_rgba_aos(builder, desc, packed_ptr, i, j);
+   rgba = lp_build_fetch_rgba_aos(builder, desc, type, packed_ptr, i, j);
 
    LLVMBuildStore(builder, rgba, rgba_ptr);
 
    LLVMBuildRetVoid(builder);
 
    LLVMDisposeBuilder(builder);
+
+   if (LLVMVerifyFunction(func, LLVMPrintMessageAction)) {
+      LLVMDumpValue(func);
+      abort();
+   }
+
+   LLVMRunFunctionPassManager(lp_build_pass, func);
+
+   if (verbose >= 1) {
+      LLVMDumpValue(func);
+   }
+
    return func;
 }
 
 
 PIPE_ALIGN_STACK
 static boolean
-test_format(unsigned verbose, FILE *fp,
-            const struct util_format_description *desc,
-            const struct util_format_test_case *test)
+test_format_float(unsigned verbose, FILE *fp,
+                  const struct util_format_description *desc)
 {
    LLVMValueRef fetch = NULL;
-   LLVMPassManagerRef pass = NULL;
    fetch_ptr_t fetch_ptr;
    PIPE_ALIGN_VAR(16) float unpacked[4];
-   boolean success;
-   unsigned i, j, k;
+   boolean first = TRUE;
+   boolean success = TRUE;
+   unsigned i, j, k, l;
 
-   fetch = add_fetch_rgba_test(lp_build_module, desc);
+   fetch = add_fetch_rgba_test(verbose, desc, lp_float32_vec4_type());
 
-   if (LLVMVerifyFunction(fetch, LLVMPrintMessageAction)) {
-      LLVMDumpValue(fetch);
-      abort();
+   fetch_ptr = (fetch_ptr_t)pointer_to_func(LLVMGetPointerToGlobal(lp_build_engine, fetch));
+
+   if (verbose >= 2) {
+      lp_disassemble(fetch_ptr);
    }
 
-#if 0
-   pass = LLVMCreatePassManager();
-   LLVMAddTargetData(LLVMGetExecutionEngineTargetData(lp_build_engine), pass);
-   /* These are the passes currently listed in llvm-c/Transforms/Scalar.h,
-    * but there are more on SVN. */
-   LLVMAddConstantPropagationPass(pass);
-   LLVMAddInstructionCombiningPass(pass);
-   LLVMAddPromoteMemoryToRegisterPass(pass);
-   LLVMAddGVNPass(pass);
-   LLVMAddCFGSimplificationPass(pass);
-   LLVMRunPassManager(pass, lp_build_module);
-#else
-   (void)pass;
-#endif
+   for (l = 0; l < util_format_nr_test_cases; ++l) {
+      const struct util_format_test_case *test = &util_format_test_cases[l];
 
-   fetch_ptr = (fetch_ptr_t)pointer_to_func(LLVMGetPointerToGlobal(lp_build_engine, fetch));
+      if (test->format == desc->format) {
 
-   for (i = 0; i < desc->block.height; ++i) {
-      for (j = 0; j < desc->block.width; ++j) {
-
-         memset(unpacked, 0, sizeof unpacked);
-
-         fetch_ptr(unpacked, test->packed, j, i);
-
-         success = TRUE;
-         for(k = 0; k < 4; ++k)
-            if (fabs((float)test->unpacked[i][j][k] - unpacked[k]) > FLT_EPSILON)
-               success = FALSE;
-
-         if (!success) {
-            printf("FAILED\n");
-            printf("  Packed: %02x %02x %02x %02x\n",
-                   test->packed[0], test->packed[1], test->packed[2], test->packed[3]);
-            printf("  Unpacked (%u,%u): %f %f %f %f obtained\n",
-                   j, i,
-                   unpacked[0], unpacked[1], unpacked[2], unpacked[3]);
-            printf("                  %f %f %f %f expected\n",
-                   test->unpacked[i][j][0],
-                   test->unpacked[i][j][1],
-                   test->unpacked[i][j][2],
-                   test->unpacked[i][j][3]);
+         if (first) {
+            printf("Testing %s (float) ...\n",
+                   desc->name);
+            first = FALSE;
+         }
+
+         for (i = 0; i < desc->block.height; ++i) {
+            for (j = 0; j < desc->block.width; ++j) {
+               boolean match;
+
+               memset(unpacked, 0, sizeof unpacked);
+
+               fetch_ptr(unpacked, test->packed, j, i);
+
+               match = TRUE;
+               for(k = 0; k < 4; ++k)
+                  if (fabs((float)test->unpacked[i][j][k] - unpacked[k]) > FLT_EPSILON)
+                     match = FALSE;
+
+               if (!match) {
+                  printf("FAILED\n");
+                  printf("  Packed: %02x %02x %02x %02x\n",
+                         test->packed[0], test->packed[1], test->packed[2], test->packed[3]);
+                  printf("  Unpacked (%u,%u): %f %f %f %f obtained\n",
+                         j, i,
+                         unpacked[0], unpacked[1], unpacked[2], unpacked[3]);
+                  printf("                  %f %f %f %f expected\n",
+                         test->unpacked[i][j][0],
+                         test->unpacked[i][j][1],
+                         test->unpacked[i][j][2],
+                         test->unpacked[i][j][3]);
+                  success = FALSE;
+               }
+            }
          }
       }
    }
 
-   if (!success)
-      LLVMDumpValue(fetch);
+   if (!success) {
+      if (verbose < 1) {
+         LLVMDumpValue(fetch);
+      }
+   }
 
    LLVMFreeMachineCodeForFunction(lp_build_engine, fetch);
    LLVMDeleteFunction(fetch);
 
-   if(pass)
-      LLVMDisposePassManager(pass);
-
    if(fp)
       write_tsv_row(fp, desc, success);
 
@@ -196,32 +214,102 @@ test_format(unsigned verbose, FILE *fp,
 }
 
 
-
+PIPE_ALIGN_STACK
 static boolean
-test_one(unsigned verbose, FILE *fp,
-         const struct util_format_description *format_desc)
+test_format_unorm8(unsigned verbose, FILE *fp,
+                   const struct util_format_description *desc)
 {
-   unsigned i;
+   LLVMValueRef fetch = NULL;
+   fetch_ptr_t fetch_ptr;
+   uint8_t unpacked[4];
    boolean first = TRUE;
    boolean success = TRUE;
+   unsigned i, j, k, l;
 
-   for (i = 0; i < util_format_nr_test_cases; ++i) {
-      const struct util_format_test_case *test = &util_format_test_cases[i];
+   fetch = add_fetch_rgba_test(verbose, desc, lp_unorm8_vec4_type());
 
-      if (test->format == format_desc->format) {
+   fetch_ptr = (fetch_ptr_t)pointer_to_func(LLVMGetPointerToGlobal(lp_build_engine, fetch));
+
+   if (verbose >= 2) {
+      lp_disassemble(fetch_ptr);
+   }
+
+   for (l = 0; l < util_format_nr_test_cases; ++l) {
+      const struct util_format_test_case *test = &util_format_test_cases[l];
+
+      if (test->format == desc->format) {
 
          if (first) {
-            printf("Testing %s ...\n",
-                   format_desc->name);
+            printf("Testing %s (unorm8) ...\n",
+                   desc->name);
             first = FALSE;
          }
 
-         if (!test_format(verbose, fp, format_desc, test)) {
-           success = FALSE;
+         for (i = 0; i < desc->block.height; ++i) {
+            for (j = 0; j < desc->block.width; ++j) {
+               boolean match;
+
+               memset(unpacked, 0, sizeof unpacked);
+
+               fetch_ptr(unpacked, test->packed, j, i);
+
+               match = TRUE;
+               for(k = 0; k < 4; ++k) {
+                  int error = float_to_ubyte(test->unpacked[i][j][k]) - unpacked[k];
+                  if (error < 0)
+                     error = -error;
+                  if (error > 1)
+                     match = FALSE;
+               }
+
+               if (!match) {
+                  printf("FAILED\n");
+                  printf("  Packed: %02x %02x %02x %02x\n",
+                         test->packed[0], test->packed[1], test->packed[2], test->packed[3]);
+                  printf("  Unpacked (%u,%u): %02x %02x %02x %02x obtained\n",
+                         j, i,
+                         unpacked[0], unpacked[1], unpacked[2], unpacked[3]);
+                  printf("                  %02x %02x %02x %02x expected\n",
+                         float_to_ubyte(test->unpacked[i][j][0]),
+                         float_to_ubyte(test->unpacked[i][j][1]),
+                         float_to_ubyte(test->unpacked[i][j][2]),
+                         float_to_ubyte(test->unpacked[i][j][3]));
+                  success = FALSE;
+               }
+            }
          }
       }
    }
 
+   if (!success)
+      LLVMDumpValue(fetch);
+
+   LLVMFreeMachineCodeForFunction(lp_build_engine, fetch);
+   LLVMDeleteFunction(fetch);
+
+   if(fp)
+      write_tsv_row(fp, desc, success);
+
+   return success;
+}
+
+
+
+
+static boolean
+test_one(unsigned verbose, FILE *fp,
+         const struct util_format_description *format_desc)
+{
+   boolean success = TRUE;
+
+   if (!test_format_float(verbose, fp, format_desc)) {
+     success = FALSE;
+   }
+
+   if (!test_format_unorm8(verbose, fp, format_desc)) {
+     success = FALSE;
+   }
+
    return success;
 }
 
-- 
2.7.4