llvmpipe: Immediate multiplication.
authorJosé Fonseca <jfonseca@vmware.com>
Sun, 25 Oct 2009 11:48:17 +0000 (11:48 +0000)
committerJosé Fonseca <jfonseca@vmware.com>
Sun, 25 Oct 2009 11:48:17 +0000 (11:48 +0000)
src/gallium/drivers/llvmpipe/lp_bld_arit.c
src/gallium/drivers/llvmpipe/lp_bld_arit.h
src/gallium/drivers/llvmpipe/lp_bld_interp.c
src/gallium/drivers/llvmpipe/lp_bld_sample_soa.c

index 93e797c..9c59677 100644 (file)
@@ -47,6 +47,7 @@
 
 #include "util/u_memory.h"
 #include "util/u_debug.h"
+#include "util/u_math.h"
 #include "util/u_string.h"
 #include "util/u_cpu_detect.h"
 
@@ -425,6 +426,59 @@ lp_build_mul(struct lp_build_context *bld,
 
 
 /**
+ * Small vector x scale multiplication optimization.
+ */
+LLVMValueRef
+lp_build_mul_imm(struct lp_build_context *bld,
+                 LLVMValueRef a,
+                 int b)
+{
+   LLVMValueRef factor;
+
+   if(b == 0)
+      return bld->zero;
+
+   if(b == 1)
+      return a;
+
+   if(b == -1)
+      return LLVMBuildNeg(bld->builder, a, "");
+
+   if(b == 2 && bld->type.floating)
+      return lp_build_add(bld, a, a);
+
+   if(util_is_pot(b)) {
+      unsigned shift = ffs(b) - 1;
+
+      if(bld->type.floating) {
+#if 0
+         /*
+          * Power of two multiplication by directly manipulating the mantissa.
+          *
+          * XXX: This might not be always faster, it will introduce a small error
+          * for multiplication by zero, and it will produce wrong results
+          * for Inf and NaN.
+          */
+         unsigned mantissa = lp_mantissa(bld->type);
+         factor = lp_build_int_const_scalar(bld->type, (unsigned long long)shift << mantissa);
+         a = LLVMBuildBitCast(bld->builder, a, lp_build_int_vec_type(bld->type), "");
+         a = LLVMBuildAdd(bld->builder, a, factor, "");
+         a = LLVMBuildBitCast(bld->builder, a, lp_build_vec_type(bld->type), "");
+         return a;
+#endif
+      }
+      else {
+         factor = lp_build_const_scalar(bld->type, shift);
+         return LLVMBuildShl(bld->builder, a, factor, "");
+      }
+   }
+
+   factor = lp_build_const_scalar(bld->type, (double)b);
+   return lp_build_mul(bld, a, factor);
+}
+
+
+/**
  * Generate a / b
  */
 LLVMValueRef
index 4e568c0..62be4b9 100644 (file)
@@ -67,6 +67,11 @@ lp_build_mul(struct lp_build_context *bld,
              LLVMValueRef b);
 
 LLVMValueRef
+lp_build_mul_imm(struct lp_build_context *bld,
+                 LLVMValueRef a,
+                 int b);
+
+LLVMValueRef
 lp_build_div(struct lp_build_context *bld,
              LLVMValueRef a,
              LLVMValueRef b);
index 338dbca..818c0e9 100644 (file)
@@ -109,32 +109,6 @@ coeffs_init(struct lp_build_interp_soa_context *bld,
 
 
 /**
- * Small vector x scale multiplication optimization.
- *
- * TODO: Should be elsewhere.
- */
-static LLVMValueRef
-coeff_multiply(struct lp_build_interp_soa_context *bld,
-               LLVMValueRef coeff,
-               int step)
-{
-   LLVMValueRef factor;
-
-   switch(step) {
-   case 0:
-      return bld->base.zero;
-   case 1:
-      return coeff;
-   case 2:
-      return lp_build_add(&bld->base, coeff, coeff);
-   default:
-      factor = lp_build_const_scalar(bld->base.type, (double)step);
-      return lp_build_mul(&bld->base, coeff, factor);
-   }
-}
-
-
-/**
  * Multiply the dadx and dady with the xstep and ystep respectively.
  */
 static void
@@ -149,8 +123,8 @@ coeffs_update(struct lp_build_interp_soa_context *bld)
       if (mode != TGSI_INTERPOLATE_CONSTANT) {
          for(chan = 0; chan < NUM_CHANNELS; ++chan) {
             if(mask & (1 << chan)) {
-               bld->dadx[attrib][chan] = coeff_multiply(bld, bld->dadx[attrib][chan], bld->xstep);
-               bld->dady[attrib][chan] = coeff_multiply(bld, bld->dady[attrib][chan], bld->ystep);
+               bld->dadx[attrib][chan] = lp_build_mul_imm(&bld->base, bld->dadx[attrib][chan], bld->xstep);
+               bld->dady[attrib][chan] = lp_build_mul_imm(&bld->base, bld->dady[attrib][chan], bld->ystep);
             }
          }
       }
index f7a030f..42e4ee6 100644 (file)
@@ -309,7 +309,7 @@ lp_build_sample_2d_linear_aos(struct lp_build_sample_context *bld,
    LLVMBuilderRef builder = bld->builder;
    struct lp_build_context i32, h16, u8n;
    LLVMTypeRef i32_vec_type, h16_vec_type, u8n_vec_type;
-   LLVMValueRef f32_c256, i32_c8, i32_c128, i32_c255;
+   LLVMValueRef i32_c8, i32_c128, i32_c255;
    LLVMValueRef s_ipart, s_fpart, s_fpart_lo, s_fpart_hi;
    LLVMValueRef t_ipart, t_fpart, t_fpart_lo, t_fpart_hi;
    LLVMValueRef x0, x1;
@@ -328,9 +328,8 @@ lp_build_sample_2d_linear_aos(struct lp_build_sample_context *bld,
    h16_vec_type = lp_build_vec_type(h16.type);
    u8n_vec_type = lp_build_vec_type(u8n.type);
 
-   f32_c256 = lp_build_const_scalar(bld->coord_type, 256.0);
-   s = lp_build_mul(&bld->coord_bld, s, f32_c256);
-   t = lp_build_mul(&bld->coord_bld, t, f32_c256);
+   s = lp_build_mul_imm(&bld->coord_bld, s, 256);
+   t = lp_build_mul_imm(&bld->coord_bld, t, 256);
 
    s = LLVMBuildFPToSI(builder, s, i32_vec_type, "");
    t = LLVMBuildFPToSI(builder, t, i32_vec_type, "");