Implement kilp and make it work
authorZack Rusin <zack@tungstengraphics.com>
Tue, 6 Nov 2007 11:06:04 +0000 (06:06 -0500)
committerZack Rusin <zack@tungstengraphics.com>
Tue, 11 Dec 2007 14:49:33 +0000 (09:49 -0500)
src/mesa/pipe/llvm/gallivm.cpp
src/mesa/pipe/llvm/gallivm_builtins.cpp
src/mesa/pipe/llvm/instructions.cpp
src/mesa/pipe/llvm/instructions.h
src/mesa/pipe/llvm/llvm_base_shader.cpp
src/mesa/pipe/llvm/llvm_builtins.c
src/mesa/pipe/llvm/llvm_entry.c
src/mesa/pipe/llvm/storage.cpp
src/mesa/pipe/llvm/storage.h
src/mesa/pipe/softpipe/sp_quad_fs.c

index 65c9507..a1101a7 100644 (file)
@@ -432,7 +432,11 @@ translate_instruction(llvm::Module *module,
       break;
    case TGSI_OPCODE_DDY:
       break;
-   case TGSI_OPCODE_KILP:
+   case TGSI_OPCODE_KILP: {
+      out = instr->kilp(inputs[0]);
+      storage->setKilElement(out);
+      return;
+   }
       break;
    case TGSI_OPCODE_PK2H:
       break;
@@ -929,11 +933,9 @@ int gallivm_fragment_shader_exec(struct gallivm_prog *prog,
    fragment_shader_runner runner = reinterpret_cast<fragment_shader_runner>(prog->function);
    assert(runner);
 
-   runner(fx, fy, dests, inputs, prog->num_interp,
-          consts, prog->num_consts,
-          samplers);
-
-   return 0;
+   return runner(fx, fy, dests, inputs, prog->num_interp,
+                 consts, prog->num_consts,
+                 samplers);
 }
 
 void gallivm_prog_dump(struct gallivm_prog *prog, const char *file_prefix)
index da1e6ae..48693ca 100644 (file)
@@ -69,6 +69,15 @@ FunctionType* FuncTy_12 = FunctionType::get(
 
 PointerType* PointerTy_11 = PointerType::get(FuncTy_12);
 
+std::vector<const Type*>FuncTy_13_args;
+FuncTy_13_args.push_back(VectorTy_4);
+ParamAttrsList *FuncTy_13_PAL = 0;
+FunctionType* FuncTy_13 = FunctionType::get(
+  /*Result=*/IntegerType::get(32),
+  /*Params=*/FuncTy_13_args,
+  /*isVarArg=*/false,
+  /*ParamAttrs=*/FuncTy_13_PAL);
+
 
 // Function Declarations
 
@@ -132,6 +141,12 @@ Function* func_vsin = new Function(
   /*Name=*/"vsin", mod); 
 func_vsin->setCallingConv(CallingConv::C);
 
+Function* func_kilp = new Function(
+  /*Type=*/FuncTy_13,
+  /*Linkage=*/GlobalValue::ExternalLinkage,
+  /*Name=*/"kilp", mod); 
+func_kilp->setCallingConv(CallingConv::C);
+
 // Global Variable Declarations
 
 
@@ -152,64 +167,64 @@ GlobalVariable* gvar_array__str1 = new GlobalVariable(
 mod);
 
 // Constant Definitions
-Constant* const_array_13 = ConstantArray::get("VEC IN   is %f %f %f %f\x0A", true);
-Constant* const_array_14 = ConstantArray::get("VEC OUT  is %f %f %f %f\x0A", true);
-ConstantFP* const_float_15 = ConstantFP::get(Type::FloatTy, APFloat(-1.280000e+02f));
-ConstantFP* const_float_16 = ConstantFP::get(Type::FloatTy, APFloat(1.280000e+02f));
-Constant* const_float_17 = Constant::getNullValue(Type::FloatTy);
-Constant* const_int32_18 = Constant::getNullValue(IntegerType::get(32));
-std::vector<Constant*> const_packed_19_elems;
-ConstantFP* const_float_20 = ConstantFP::get(Type::FloatTy, APFloat(1.000000e+00f));
-const_packed_19_elems.push_back(const_float_20);
-UndefValue* const_float_21 = UndefValue::get(Type::FloatTy);
-const_packed_19_elems.push_back(const_float_21);
-const_packed_19_elems.push_back(const_float_21);
-const_packed_19_elems.push_back(const_float_20);
-Constant* const_packed_19 = ConstantVector::get(VectorTy_4, const_packed_19_elems);
-ConstantInt* const_int32_22 = ConstantInt::get(APInt(32,  "1", 10));
-ConstantInt* const_int32_23 = ConstantInt::get(APInt(32,  "3", 10));
-ConstantInt* const_int32_24 = ConstantInt::get(APInt(32,  "2", 10));
-std::vector<Constant*> const_packed_25_elems;
-const_packed_25_elems.push_back(const_float_20);
-const_packed_25_elems.push_back(const_float_17);
-const_packed_25_elems.push_back(const_float_17);
-const_packed_25_elems.push_back(const_float_20);
-Constant* const_packed_25 = ConstantVector::get(VectorTy_4, const_packed_25_elems);
-Constant* const_double_26 = Constant::getNullValue(Type::DoubleTy);
-std::vector<Constant*> const_packed_27_elems;
-const_packed_27_elems.push_back(const_int32_18);
-ConstantInt* const_int32_28 = ConstantInt::get(APInt(32,  "5", 10));
-const_packed_27_elems.push_back(const_int32_28);
-const_packed_27_elems.push_back(const_int32_24);
-const_packed_27_elems.push_back(const_int32_23);
-Constant* const_packed_27 = ConstantVector::get(VectorTy_7, const_packed_27_elems);
-std::vector<Constant*> const_packed_29_elems;
-const_packed_29_elems.push_back(const_int32_18);
-const_packed_29_elems.push_back(const_int32_22);
-ConstantInt* const_int32_30 = ConstantInt::get(APInt(32,  "6", 10));
-const_packed_29_elems.push_back(const_int32_30);
-const_packed_29_elems.push_back(const_int32_23);
-Constant* const_packed_29 = ConstantVector::get(VectorTy_7, const_packed_29_elems);
-std::vector<Constant*> const_packed_31_elems;
-const_packed_31_elems.push_back(const_int32_18);
-const_packed_31_elems.push_back(const_int32_22);
-const_packed_31_elems.push_back(const_int32_24);
-ConstantInt* const_int32_32 = ConstantInt::get(APInt(32,  "7", 10));
-const_packed_31_elems.push_back(const_int32_32);
-Constant* const_packed_31 = ConstantVector::get(VectorTy_7, const_packed_31_elems);
-std::vector<Constant*> const_ptr_33_indices;
-const_ptr_33_indices.push_back(const_int32_18);
-const_ptr_33_indices.push_back(const_int32_18);
-Constant* const_ptr_33 = ConstantExpr::getGetElementPtr(gvar_array__str, &const_ptr_33_indices[0], const_ptr_33_indices.size() );
-UndefValue* const_packed_34 = UndefValue::get(VectorTy_4);
-std::vector<Constant*> const_ptr_35_indices;
-const_ptr_35_indices.push_back(const_int32_18);
-const_ptr_35_indices.push_back(const_int32_18);
-Constant* const_ptr_35 = ConstantExpr::getGetElementPtr(gvar_array__str1, &const_ptr_35_indices[0], const_ptr_35_indices.size() );
+Constant* const_array_14 = ConstantArray::get("VEC IN   is %f %f %f %f\x0A", true);
+Constant* const_array_15 = ConstantArray::get("VEC OUT  is %f %f %f %f\x0A", true);
+ConstantFP* const_float_16 = ConstantFP::get(Type::FloatTy, APFloat(-1.280000e+02f));
+ConstantFP* const_float_17 = ConstantFP::get(Type::FloatTy, APFloat(1.280000e+02f));
+Constant* const_float_18 = Constant::getNullValue(Type::FloatTy);
+Constant* const_int32_19 = Constant::getNullValue(IntegerType::get(32));
+std::vector<Constant*> const_packed_20_elems;
+ConstantFP* const_float_21 = ConstantFP::get(Type::FloatTy, APFloat(1.000000e+00f));
+const_packed_20_elems.push_back(const_float_21);
+UndefValue* const_float_22 = UndefValue::get(Type::FloatTy);
+const_packed_20_elems.push_back(const_float_22);
+const_packed_20_elems.push_back(const_float_22);
+const_packed_20_elems.push_back(const_float_21);
+Constant* const_packed_20 = ConstantVector::get(VectorTy_4, const_packed_20_elems);
+ConstantInt* const_int32_23 = ConstantInt::get(APInt(32,  "1", 10));
+ConstantInt* const_int32_24 = ConstantInt::get(APInt(32,  "3", 10));
+ConstantInt* const_int32_25 = ConstantInt::get(APInt(32,  "2", 10));
+std::vector<Constant*> const_packed_26_elems;
+const_packed_26_elems.push_back(const_float_21);
+const_packed_26_elems.push_back(const_float_18);
+const_packed_26_elems.push_back(const_float_18);
+const_packed_26_elems.push_back(const_float_21);
+Constant* const_packed_26 = ConstantVector::get(VectorTy_4, const_packed_26_elems);
+Constant* const_double_27 = Constant::getNullValue(Type::DoubleTy);
+std::vector<Constant*> const_packed_28_elems;
+const_packed_28_elems.push_back(const_int32_19);
+ConstantInt* const_int32_29 = ConstantInt::get(APInt(32,  "5", 10));
+const_packed_28_elems.push_back(const_int32_29);
+const_packed_28_elems.push_back(const_int32_25);
+const_packed_28_elems.push_back(const_int32_24);
+Constant* const_packed_28 = ConstantVector::get(VectorTy_7, const_packed_28_elems);
+std::vector<Constant*> const_packed_30_elems;
+const_packed_30_elems.push_back(const_int32_19);
+const_packed_30_elems.push_back(const_int32_23);
+ConstantInt* const_int32_31 = ConstantInt::get(APInt(32,  "6", 10));
+const_packed_30_elems.push_back(const_int32_31);
+const_packed_30_elems.push_back(const_int32_24);
+Constant* const_packed_30 = ConstantVector::get(VectorTy_7, const_packed_30_elems);
+std::vector<Constant*> const_packed_32_elems;
+const_packed_32_elems.push_back(const_int32_19);
+const_packed_32_elems.push_back(const_int32_23);
+const_packed_32_elems.push_back(const_int32_25);
+ConstantInt* const_int32_33 = ConstantInt::get(APInt(32,  "7", 10));
+const_packed_32_elems.push_back(const_int32_33);
+Constant* const_packed_32 = ConstantVector::get(VectorTy_7, const_packed_32_elems);
+std::vector<Constant*> const_ptr_34_indices;
+const_ptr_34_indices.push_back(const_int32_19);
+const_ptr_34_indices.push_back(const_int32_19);
+Constant* const_ptr_34 = ConstantExpr::getGetElementPtr(gvar_array__str, &const_ptr_34_indices[0], const_ptr_34_indices.size() );
+UndefValue* const_packed_35 = UndefValue::get(VectorTy_4);
+std::vector<Constant*> const_ptr_36_indices;
+const_ptr_36_indices.push_back(const_int32_19);
+const_ptr_36_indices.push_back(const_int32_19);
+Constant* const_ptr_36 = ConstantExpr::getGetElementPtr(gvar_array__str1, &const_ptr_36_indices[0], const_ptr_36_indices.size() );
 
 // Global Variable Definitions
-gvar_array__str->setInitializer(const_array_13);
-gvar_array__str1->setInitializer(const_array_14);
+gvar_array__str->setInitializer(const_array_14);
+gvar_array__str1->setInitializer(const_array_15);
 
 // Function Definitions
 
@@ -224,12 +239,12 @@ gvar_array__str1->setInitializer(const_array_14);
   BasicBlock* label_entry = new BasicBlock("entry",func_approx,0);
   
   // Block entry (label_entry)
-  FCmpInst* int1_cmp = new FCmpInst(FCmpInst::FCMP_OLT, float_b, const_float_15, "cmp", label_entry);
-  SelectInst* float_b_addr_0 = new SelectInst(int1_cmp, const_float_15, float_b, "b.addr.0", label_entry);
-  FCmpInst* int1_cmp3 = new FCmpInst(FCmpInst::FCMP_OGT, float_b_addr_0, const_float_16, "cmp3", label_entry);
-  SelectInst* float_b_addr_1 = new SelectInst(int1_cmp3, const_float_16, float_b_addr_0, "b.addr.1", label_entry);
-  FCmpInst* int1_cmp7 = new FCmpInst(FCmpInst::FCMP_OLT, float_a, const_float_17, "cmp7", label_entry);
-  SelectInst* float_a_addr_0 = new SelectInst(int1_cmp7, const_float_17, float_a, "a.addr.0", label_entry);
+  FCmpInst* int1_cmp = new FCmpInst(FCmpInst::FCMP_OLT, float_b, const_float_16, "cmp", label_entry);
+  SelectInst* float_b_addr_0 = new SelectInst(int1_cmp, const_float_16, float_b, "b.addr.0", label_entry);
+  FCmpInst* int1_cmp3 = new FCmpInst(FCmpInst::FCMP_OGT, float_b_addr_0, const_float_17, "cmp3", label_entry);
+  SelectInst* float_b_addr_1 = new SelectInst(int1_cmp3, const_float_17, float_b_addr_0, "b.addr.1", label_entry);
+  FCmpInst* int1_cmp7 = new FCmpInst(FCmpInst::FCMP_OLT, float_a, const_float_18, "cmp7", label_entry);
+  SelectInst* float_a_addr_0 = new SelectInst(int1_cmp7, const_float_18, float_a, "a.addr.0", label_entry);
   std::vector<Value*> float_call_params;
   float_call_params.push_back(float_a_addr_0);
   float_call_params.push_back(float_b_addr_1);
@@ -246,36 +261,36 @@ gvar_array__str1->setInitializer(const_array_14);
   Value* packed_tmp = args++;
   packed_tmp->setName("tmp");
   
-  BasicBlock* label_entry_37 = new BasicBlock("entry",func_lit,0);
+  BasicBlock* label_entry_38 = new BasicBlock("entry",func_lit,0);
   BasicBlock* label_ifthen = new BasicBlock("ifthen",func_lit,0);
   BasicBlock* label_UnifiedReturnBlock = new BasicBlock("UnifiedReturnBlock",func_lit,0);
   
-  // Block entry (label_entry_37)
-  ExtractElementInst* float_tmp7 = new ExtractElementInst(packed_tmp, const_int32_18, "tmp7", label_entry_37);
-  FCmpInst* int1_cmp_38 = new FCmpInst(FCmpInst::FCMP_OGT, float_tmp7, const_float_17, "cmp", label_entry_37);
-  new BranchInst(label_ifthen, label_UnifiedReturnBlock, int1_cmp_38, label_entry_37);
+  // Block entry (label_entry_38)
+  ExtractElementInst* float_tmp7 = new ExtractElementInst(packed_tmp, const_int32_19, "tmp7", label_entry_38);
+  FCmpInst* int1_cmp_39 = new FCmpInst(FCmpInst::FCMP_OGT, float_tmp7, const_float_18, "cmp", label_entry_38);
+  new BranchInst(label_ifthen, label_UnifiedReturnBlock, int1_cmp_39, label_entry_38);
   
   // Block ifthen (label_ifthen)
-  InsertElementInst* packed_tmp12 = new InsertElementInst(const_packed_19, float_tmp7, const_int32_22, "tmp12", label_ifthen);
-  ExtractElementInst* float_tmp14 = new ExtractElementInst(packed_tmp, const_int32_22, "tmp14", label_ifthen);
-  ExtractElementInst* float_tmp16 = new ExtractElementInst(packed_tmp, const_int32_23, "tmp16", label_ifthen);
-  FCmpInst* int1_cmp_i = new FCmpInst(FCmpInst::FCMP_OLT, float_tmp16, const_float_15, "cmp.i", label_ifthen);
-  SelectInst* float_b_addr_0_i = new SelectInst(int1_cmp_i, const_float_15, float_tmp16, "b.addr.0.i", label_ifthen);
-  FCmpInst* int1_cmp3_i = new FCmpInst(FCmpInst::FCMP_OGT, float_b_addr_0_i, const_float_16, "cmp3.i", label_ifthen);
-  SelectInst* float_b_addr_1_i = new SelectInst(int1_cmp3_i, const_float_16, float_b_addr_0_i, "b.addr.1.i", label_ifthen);
-  FCmpInst* int1_cmp7_i = new FCmpInst(FCmpInst::FCMP_OLT, float_tmp14, const_float_17, "cmp7.i", label_ifthen);
-  SelectInst* float_a_addr_0_i = new SelectInst(int1_cmp7_i, const_float_17, float_tmp14, "a.addr.0.i", label_ifthen);
+  InsertElementInst* packed_tmp12 = new InsertElementInst(const_packed_20, float_tmp7, const_int32_23, "tmp12", label_ifthen);
+  ExtractElementInst* float_tmp14 = new ExtractElementInst(packed_tmp, const_int32_23, "tmp14", label_ifthen);
+  ExtractElementInst* float_tmp16 = new ExtractElementInst(packed_tmp, const_int32_24, "tmp16", label_ifthen);
+  FCmpInst* int1_cmp_i = new FCmpInst(FCmpInst::FCMP_OLT, float_tmp16, const_float_16, "cmp.i", label_ifthen);
+  SelectInst* float_b_addr_0_i = new SelectInst(int1_cmp_i, const_float_16, float_tmp16, "b.addr.0.i", label_ifthen);
+  FCmpInst* int1_cmp3_i = new FCmpInst(FCmpInst::FCMP_OGT, float_b_addr_0_i, const_float_17, "cmp3.i", label_ifthen);
+  SelectInst* float_b_addr_1_i = new SelectInst(int1_cmp3_i, const_float_17, float_b_addr_0_i, "b.addr.1.i", label_ifthen);
+  FCmpInst* int1_cmp7_i = new FCmpInst(FCmpInst::FCMP_OLT, float_tmp14, const_float_18, "cmp7.i", label_ifthen);
+  SelectInst* float_a_addr_0_i = new SelectInst(int1_cmp7_i, const_float_18, float_tmp14, "a.addr.0.i", label_ifthen);
   std::vector<Value*> float_call_i_params;
   float_call_i_params.push_back(float_a_addr_0_i);
   float_call_i_params.push_back(float_b_addr_1_i);
   CallInst* float_call_i = new CallInst(func_powf, float_call_i_params.begin(), float_call_i_params.end(), "call.i", label_ifthen);
   float_call_i->setCallingConv(CallingConv::C);
   float_call_i->setTailCall(true);
-  InsertElementInst* packed_tmp18 = new InsertElementInst(packed_tmp12, float_call_i, const_int32_24, "tmp18", label_ifthen);
+  InsertElementInst* packed_tmp18 = new InsertElementInst(packed_tmp12, float_call_i, const_int32_25, "tmp18", label_ifthen);
   new ReturnInst(packed_tmp18, label_ifthen);
   
   // Block UnifiedReturnBlock (label_UnifiedReturnBlock)
-  new ReturnInst(const_packed_25, label_UnifiedReturnBlock);
+  new ReturnInst(const_packed_26, label_UnifiedReturnBlock);
   
 }
 
@@ -289,7 +304,7 @@ gvar_array__str1->setInitializer(const_array_14);
   Value* packed_tmp2 = args++;
   packed_tmp2->setName("tmp2");
   
-  BasicBlock* label_entry_42 = new BasicBlock("entry",func_cmp,0);
+  BasicBlock* label_entry_43 = new BasicBlock("entry",func_cmp,0);
   BasicBlock* label_cond__14 = new BasicBlock("cond.?14",func_cmp,0);
   BasicBlock* label_cond_cont20 = new BasicBlock("cond.cont20",func_cmp,0);
   BasicBlock* label_cond__28 = new BasicBlock("cond.?28",func_cmp,0);
@@ -297,28 +312,28 @@ gvar_array__str1->setInitializer(const_array_14);
   BasicBlock* label_cond__42 = new BasicBlock("cond.?42",func_cmp,0);
   BasicBlock* label_cond_cont48 = new BasicBlock("cond.cont48",func_cmp,0);
   
-  // Block entry (label_entry_42)
-  ExtractElementInst* float_tmp3 = new ExtractElementInst(packed_tmp0, const_int32_18, "tmp3", label_entry_42);
-  CastInst* double_conv = new FPExtInst(float_tmp3, Type::DoubleTy, "conv", label_entry_42);
-  FCmpInst* int1_cmp_43 = new FCmpInst(FCmpInst::FCMP_OLT, double_conv, const_double_26, "cmp", label_entry_42);
-  ExtractElementInst* float_tmp11 = new ExtractElementInst(packed_tmp0, const_int32_22, "tmp11", label_entry_42);
-  CastInst* double_conv12 = new FPExtInst(float_tmp11, Type::DoubleTy, "conv12", label_entry_42);
-  FCmpInst* int1_cmp13 = new FCmpInst(FCmpInst::FCMP_OLT, double_conv12, const_double_26, "cmp13", label_entry_42);
-  SelectInst* packed_tmp1_tmp2 = new SelectInst(int1_cmp_43, packed_tmp1, packed_tmp2, "tmp1.tmp2", label_entry_42);
-  new BranchInst(label_cond__14, label_cond_cont20, int1_cmp13, label_entry_42);
+  // Block entry (label_entry_43)
+  ExtractElementInst* float_tmp3 = new ExtractElementInst(packed_tmp0, const_int32_19, "tmp3", label_entry_43);
+  CastInst* double_conv = new FPExtInst(float_tmp3, Type::DoubleTy, "conv", label_entry_43);
+  FCmpInst* int1_cmp_44 = new FCmpInst(FCmpInst::FCMP_OLT, double_conv, const_double_27, "cmp", label_entry_43);
+  ExtractElementInst* float_tmp11 = new ExtractElementInst(packed_tmp0, const_int32_23, "tmp11", label_entry_43);
+  CastInst* double_conv12 = new FPExtInst(float_tmp11, Type::DoubleTy, "conv12", label_entry_43);
+  FCmpInst* int1_cmp13 = new FCmpInst(FCmpInst::FCMP_OLT, double_conv12, const_double_27, "cmp13", label_entry_43);
+  SelectInst* packed_tmp1_tmp2 = new SelectInst(int1_cmp_44, packed_tmp1, packed_tmp2, "tmp1.tmp2", label_entry_43);
+  new BranchInst(label_cond__14, label_cond_cont20, int1_cmp13, label_entry_43);
   
   // Block cond.?14 (label_cond__14)
-  ShuffleVectorInst* packed_tmp233 = new ShuffleVectorInst(packed_tmp1_tmp2, packed_tmp1, const_packed_27, "tmp233", label_cond__14);
-  ExtractElementInst* float_tmp254 = new ExtractElementInst(packed_tmp0, const_int32_24, "tmp254", label_cond__14);
+  ShuffleVectorInst* packed_tmp233 = new ShuffleVectorInst(packed_tmp1_tmp2, packed_tmp1, const_packed_28, "tmp233", label_cond__14);
+  ExtractElementInst* float_tmp254 = new ExtractElementInst(packed_tmp0, const_int32_25, "tmp254", label_cond__14);
   CastInst* double_conv265 = new FPExtInst(float_tmp254, Type::DoubleTy, "conv265", label_cond__14);
-  FCmpInst* int1_cmp276 = new FCmpInst(FCmpInst::FCMP_OLT, double_conv265, const_double_26, "cmp276", label_cond__14);
+  FCmpInst* int1_cmp276 = new FCmpInst(FCmpInst::FCMP_OLT, double_conv265, const_double_27, "cmp276", label_cond__14);
   new BranchInst(label_cond__28, label_cond_cont34, int1_cmp276, label_cond__14);
   
   // Block cond.cont20 (label_cond_cont20)
-  ShuffleVectorInst* packed_tmp23 = new ShuffleVectorInst(packed_tmp1_tmp2, packed_tmp2, const_packed_27, "tmp23", label_cond_cont20);
-  ExtractElementInst* float_tmp25 = new ExtractElementInst(packed_tmp0, const_int32_24, "tmp25", label_cond_cont20);
+  ShuffleVectorInst* packed_tmp23 = new ShuffleVectorInst(packed_tmp1_tmp2, packed_tmp2, const_packed_28, "tmp23", label_cond_cont20);
+  ExtractElementInst* float_tmp25 = new ExtractElementInst(packed_tmp0, const_int32_25, "tmp25", label_cond_cont20);
   CastInst* double_conv26 = new FPExtInst(float_tmp25, Type::DoubleTy, "conv26", label_cond_cont20);
-  FCmpInst* int1_cmp27 = new FCmpInst(FCmpInst::FCMP_OLT, double_conv26, const_double_26, "cmp27", label_cond_cont20);
+  FCmpInst* int1_cmp27 = new FCmpInst(FCmpInst::FCMP_OLT, double_conv26, const_double_27, "cmp27", label_cond_cont20);
   new BranchInst(label_cond__28, label_cond_cont34, int1_cmp27, label_cond_cont20);
   
   // Block cond.?28 (label_cond__28)
@@ -327,10 +342,10 @@ gvar_array__str1->setInitializer(const_array_14);
   packed_tmp23_reg2mem_0->addIncoming(packed_tmp233, label_cond__14);
   packed_tmp23_reg2mem_0->addIncoming(packed_tmp23, label_cond_cont20);
   
-  ShuffleVectorInst* packed_tmp378 = new ShuffleVectorInst(packed_tmp23_reg2mem_0, packed_tmp1, const_packed_29, "tmp378", label_cond__28);
-  ExtractElementInst* float_tmp399 = new ExtractElementInst(packed_tmp0, const_int32_23, "tmp399", label_cond__28);
+  ShuffleVectorInst* packed_tmp378 = new ShuffleVectorInst(packed_tmp23_reg2mem_0, packed_tmp1, const_packed_30, "tmp378", label_cond__28);
+  ExtractElementInst* float_tmp399 = new ExtractElementInst(packed_tmp0, const_int32_24, "tmp399", label_cond__28);
   CastInst* double_conv4010 = new FPExtInst(float_tmp399, Type::DoubleTy, "conv4010", label_cond__28);
-  FCmpInst* int1_cmp4111 = new FCmpInst(FCmpInst::FCMP_OLT, double_conv4010, const_double_26, "cmp4111", label_cond__28);
+  FCmpInst* int1_cmp4111 = new FCmpInst(FCmpInst::FCMP_OLT, double_conv4010, const_double_27, "cmp4111", label_cond__28);
   new BranchInst(label_cond__42, label_cond_cont48, int1_cmp4111, label_cond__28);
   
   // Block cond.cont34 (label_cond_cont34)
@@ -339,10 +354,10 @@ gvar_array__str1->setInitializer(const_array_14);
   packed_tmp23_reg2mem_1->addIncoming(packed_tmp233, label_cond__14);
   packed_tmp23_reg2mem_1->addIncoming(packed_tmp23, label_cond_cont20);
   
-  ShuffleVectorInst* packed_tmp37 = new ShuffleVectorInst(packed_tmp23_reg2mem_1, packed_tmp2, const_packed_29, "tmp37", label_cond_cont34);
-  ExtractElementInst* float_tmp39 = new ExtractElementInst(packed_tmp0, const_int32_23, "tmp39", label_cond_cont34);
+  ShuffleVectorInst* packed_tmp37 = new ShuffleVectorInst(packed_tmp23_reg2mem_1, packed_tmp2, const_packed_30, "tmp37", label_cond_cont34);
+  ExtractElementInst* float_tmp39 = new ExtractElementInst(packed_tmp0, const_int32_24, "tmp39", label_cond_cont34);
   CastInst* double_conv40 = new FPExtInst(float_tmp39, Type::DoubleTy, "conv40", label_cond_cont34);
-  FCmpInst* int1_cmp41 = new FCmpInst(FCmpInst::FCMP_OLT, double_conv40, const_double_26, "cmp41", label_cond_cont34);
+  FCmpInst* int1_cmp41 = new FCmpInst(FCmpInst::FCMP_OLT, double_conv40, const_double_27, "cmp41", label_cond_cont34);
   new BranchInst(label_cond__42, label_cond_cont48, int1_cmp41, label_cond_cont34);
   
   // Block cond.?42 (label_cond__42)
@@ -351,7 +366,7 @@ gvar_array__str1->setInitializer(const_array_14);
   packed_tmp37_reg2mem_0->addIncoming(packed_tmp378, label_cond__28);
   packed_tmp37_reg2mem_0->addIncoming(packed_tmp37, label_cond_cont34);
   
-  ShuffleVectorInst* packed_tmp5113 = new ShuffleVectorInst(packed_tmp37_reg2mem_0, packed_tmp1, const_packed_31, "tmp5113", label_cond__42);
+  ShuffleVectorInst* packed_tmp5113 = new ShuffleVectorInst(packed_tmp37_reg2mem_0, packed_tmp1, const_packed_32, "tmp5113", label_cond__42);
   new ReturnInst(packed_tmp5113, label_cond__42);
   
   // Block cond.cont48 (label_cond_cont48)
@@ -360,7 +375,7 @@ gvar_array__str1->setInitializer(const_array_14);
   packed_tmp37_reg2mem_1->addIncoming(packed_tmp378, label_cond__28);
   packed_tmp37_reg2mem_1->addIncoming(packed_tmp37, label_cond_cont34);
   
-  ShuffleVectorInst* packed_tmp51 = new ShuffleVectorInst(packed_tmp37_reg2mem_1, packed_tmp2, const_packed_31, "tmp51", label_cond_cont48);
+  ShuffleVectorInst* packed_tmp51 = new ShuffleVectorInst(packed_tmp37_reg2mem_1, packed_tmp2, const_packed_32, "tmp51", label_cond_cont48);
   new ReturnInst(packed_tmp51, label_cond_cont48);
   
 }
@@ -371,87 +386,125 @@ gvar_array__str1->setInitializer(const_array_14);
   Value* packed_val = args++;
   packed_val->setName("val");
   
-  BasicBlock* label_entry_51 = new BasicBlock("entry",func_vcos,0);
-  
-  // Block entry (label_entry_51)
-  ExtractElementInst* float_tmp1 = new ExtractElementInst(packed_val, const_int32_18, "tmp1", label_entry_51);
-  CastInst* double_conv_52 = new FPExtInst(float_tmp1, Type::DoubleTy, "conv", label_entry_51);
-  ExtractElementInst* float_tmp3_53 = new ExtractElementInst(packed_val, const_int32_22, "tmp3", label_entry_51);
-  CastInst* double_conv4 = new FPExtInst(float_tmp3_53, Type::DoubleTy, "conv4", label_entry_51);
-  ExtractElementInst* float_tmp6 = new ExtractElementInst(packed_val, const_int32_24, "tmp6", label_entry_51);
-  CastInst* double_conv7 = new FPExtInst(float_tmp6, Type::DoubleTy, "conv7", label_entry_51);
-  ExtractElementInst* float_tmp9 = new ExtractElementInst(packed_val, const_int32_23, "tmp9", label_entry_51);
-  CastInst* double_conv10 = new FPExtInst(float_tmp9, Type::DoubleTy, "conv10", label_entry_51);
+  BasicBlock* label_entry_52 = new BasicBlock("entry",func_vcos,0);
+  
+  // Block entry (label_entry_52)
+  ExtractElementInst* float_tmp1 = new ExtractElementInst(packed_val, const_int32_19, "tmp1", label_entry_52);
+  CastInst* double_conv_53 = new FPExtInst(float_tmp1, Type::DoubleTy, "conv", label_entry_52);
+  ExtractElementInst* float_tmp3_54 = new ExtractElementInst(packed_val, const_int32_23, "tmp3", label_entry_52);
+  CastInst* double_conv4 = new FPExtInst(float_tmp3_54, Type::DoubleTy, "conv4", label_entry_52);
+  ExtractElementInst* float_tmp6 = new ExtractElementInst(packed_val, const_int32_25, "tmp6", label_entry_52);
+  CastInst* double_conv7 = new FPExtInst(float_tmp6, Type::DoubleTy, "conv7", label_entry_52);
+  ExtractElementInst* float_tmp9 = new ExtractElementInst(packed_val, const_int32_24, "tmp9", label_entry_52);
+  CastInst* double_conv10 = new FPExtInst(float_tmp9, Type::DoubleTy, "conv10", label_entry_52);
   std::vector<Value*> int32_call_params;
-  int32_call_params.push_back(const_ptr_33);
-  int32_call_params.push_back(double_conv_52);
+  int32_call_params.push_back(const_ptr_34);
+  int32_call_params.push_back(double_conv_53);
   int32_call_params.push_back(double_conv4);
   int32_call_params.push_back(double_conv7);
   int32_call_params.push_back(double_conv10);
-  CallInst* int32_call = new CallInst(func_printf, int32_call_params.begin(), int32_call_params.end(), "call", label_entry_51);
+  CallInst* int32_call = new CallInst(func_printf, int32_call_params.begin(), int32_call_params.end(), "call", label_entry_52);
   int32_call->setCallingConv(CallingConv::C);
   int32_call->setTailCall(true);
-  CallInst* float_call13 = new CallInst(func_cosf, float_tmp1, "call13", label_entry_51);
+  CallInst* float_call13 = new CallInst(func_cosf, float_tmp1, "call13", label_entry_52);
   float_call13->setCallingConv(CallingConv::C);
   float_call13->setTailCall(true);
-  InsertElementInst* packed_tmp15 = new InsertElementInst(const_packed_34, float_call13, const_int32_18, "tmp15", label_entry_51);
-  InsertElementInst* packed_tmp20 = new InsertElementInst(packed_tmp15, float_call13, const_int32_22, "tmp20", label_entry_51);
-  InsertElementInst* packed_tmp25 = new InsertElementInst(packed_tmp20, float_call13, const_int32_24, "tmp25", label_entry_51);
-  InsertElementInst* packed_tmp30 = new InsertElementInst(packed_tmp25, float_call13, const_int32_23, "tmp30", label_entry_51);
-  CastInst* double_conv33 = new FPExtInst(float_call13, Type::DoubleTy, "conv33", label_entry_51);
+  InsertElementInst* packed_tmp15 = new InsertElementInst(const_packed_35, float_call13, const_int32_19, "tmp15", label_entry_52);
+  InsertElementInst* packed_tmp20 = new InsertElementInst(packed_tmp15, float_call13, const_int32_23, "tmp20", label_entry_52);
+  InsertElementInst* packed_tmp25 = new InsertElementInst(packed_tmp20, float_call13, const_int32_25, "tmp25", label_entry_52);
+  InsertElementInst* packed_tmp30 = new InsertElementInst(packed_tmp25, float_call13, const_int32_24, "tmp30", label_entry_52);
+  CastInst* double_conv33 = new FPExtInst(float_call13, Type::DoubleTy, "conv33", label_entry_52);
   std::vector<Value*> int32_call43_params;
-  int32_call43_params.push_back(const_ptr_35);
+  int32_call43_params.push_back(const_ptr_36);
   int32_call43_params.push_back(double_conv33);
   int32_call43_params.push_back(double_conv33);
   int32_call43_params.push_back(double_conv33);
   int32_call43_params.push_back(double_conv33);
-  CallInst* int32_call43 = new CallInst(func_printf, int32_call43_params.begin(), int32_call43_params.end(), "call43", label_entry_51);
+  CallInst* int32_call43 = new CallInst(func_printf, int32_call43_params.begin(), int32_call43_params.end(), "call43", label_entry_52);
   int32_call43->setCallingConv(CallingConv::C);
   int32_call43->setTailCall(true);
-  new ReturnInst(packed_tmp30, label_entry_51);
+  new ReturnInst(packed_tmp30, label_entry_52);
   
 }
 
 // Function: scs (func_scs)
 {
   Function::arg_iterator args = func_scs->arg_begin();
-  Value* packed_val_55 = args++;
-  packed_val_55->setName("val");
-  
-  BasicBlock* label_entry_56 = new BasicBlock("entry",func_scs,0);
-  
-  // Block entry (label_entry_56)
-  ExtractElementInst* float_tmp2 = new ExtractElementInst(packed_val_55, const_int32_18, "tmp2", label_entry_56);
-  CallInst* float_call_57 = new CallInst(func_cosf, float_tmp2, "call", label_entry_56);
-  float_call_57->setCallingConv(CallingConv::C);
-  float_call_57->setTailCall(true);
-  InsertElementInst* packed_tmp5 = new InsertElementInst(const_packed_34, float_call_57, const_int32_18, "tmp5", label_entry_56);
-  CallInst* float_call7 = new CallInst(func_sinf, float_tmp2, "call7", label_entry_56);
+  Value* packed_val_56 = args++;
+  packed_val_56->setName("val");
+  
+  BasicBlock* label_entry_57 = new BasicBlock("entry",func_scs,0);
+  
+  // Block entry (label_entry_57)
+  ExtractElementInst* float_tmp2 = new ExtractElementInst(packed_val_56, const_int32_19, "tmp2", label_entry_57);
+  CallInst* float_call_58 = new CallInst(func_cosf, float_tmp2, "call", label_entry_57);
+  float_call_58->setCallingConv(CallingConv::C);
+  float_call_58->setTailCall(true);
+  InsertElementInst* packed_tmp5 = new InsertElementInst(const_packed_35, float_call_58, const_int32_19, "tmp5", label_entry_57);
+  CallInst* float_call7 = new CallInst(func_sinf, float_tmp2, "call7", label_entry_57);
   float_call7->setCallingConv(CallingConv::C);
   float_call7->setTailCall(true);
-  InsertElementInst* packed_tmp9 = new InsertElementInst(packed_tmp5, float_call7, const_int32_22, "tmp9", label_entry_56);
-  new ReturnInst(packed_tmp9, label_entry_56);
+  InsertElementInst* packed_tmp9 = new InsertElementInst(packed_tmp5, float_call7, const_int32_23, "tmp9", label_entry_57);
+  new ReturnInst(packed_tmp9, label_entry_57);
   
 }
 
 // Function: vsin (func_vsin)
 {
   Function::arg_iterator args = func_vsin->arg_begin();
-  Value* packed_val_59 = args++;
-  packed_val_59->setName("val");
-  
-  BasicBlock* label_entry_60 = new BasicBlock("entry",func_vsin,0);
-  
-  // Block entry (label_entry_60)
-  ExtractElementInst* float_tmp2_61 = new ExtractElementInst(packed_val_59, const_int32_18, "tmp2", label_entry_60);
-  CallInst* float_call_62 = new CallInst(func_sinf, float_tmp2_61, "call", label_entry_60);
-  float_call_62->setCallingConv(CallingConv::C);
-  float_call_62->setTailCall(true);
-  InsertElementInst* packed_tmp6 = new InsertElementInst(const_packed_34, float_call_62, const_int32_18, "tmp6", label_entry_60);
-  InsertElementInst* packed_tmp9_63 = new InsertElementInst(packed_tmp6, float_call_62, const_int32_22, "tmp9", label_entry_60);
-  InsertElementInst* packed_tmp12_64 = new InsertElementInst(packed_tmp9_63, float_call_62, const_int32_24, "tmp12", label_entry_60);
-  InsertElementInst* packed_tmp15_65 = new InsertElementInst(packed_tmp12_64, float_call_62, const_int32_23, "tmp15", label_entry_60);
-  new ReturnInst(packed_tmp15_65, label_entry_60);
+  Value* packed_val_60 = args++;
+  packed_val_60->setName("val");
+  
+  BasicBlock* label_entry_61 = new BasicBlock("entry",func_vsin,0);
+  
+  // Block entry (label_entry_61)
+  ExtractElementInst* float_tmp2_62 = new ExtractElementInst(packed_val_60, const_int32_19, "tmp2", label_entry_61);
+  CallInst* float_call_63 = new CallInst(func_sinf, float_tmp2_62, "call", label_entry_61);
+  float_call_63->setCallingConv(CallingConv::C);
+  float_call_63->setTailCall(true);
+  InsertElementInst* packed_tmp6 = new InsertElementInst(const_packed_35, float_call_63, const_int32_19, "tmp6", label_entry_61);
+  InsertElementInst* packed_tmp9_64 = new InsertElementInst(packed_tmp6, float_call_63, const_int32_23, "tmp9", label_entry_61);
+  InsertElementInst* packed_tmp12_65 = new InsertElementInst(packed_tmp9_64, float_call_63, const_int32_25, "tmp12", label_entry_61);
+  InsertElementInst* packed_tmp15_66 = new InsertElementInst(packed_tmp12_65, float_call_63, const_int32_24, "tmp15", label_entry_61);
+  new ReturnInst(packed_tmp15_66, label_entry_61);
+  
+}
+
+// Function: kilp (func_kilp)
+{
+  Function::arg_iterator args = func_kilp->arg_begin();
+  Value* packed_val_68 = args++;
+  packed_val_68->setName("val");
+  
+  BasicBlock* label_entry_69 = new BasicBlock("entry",func_kilp,0);
+  BasicBlock* label_lor_rhs = new BasicBlock("lor_rhs",func_kilp,0);
+  BasicBlock* label_lor_rhs6 = new BasicBlock("lor_rhs6",func_kilp,0);
+  BasicBlock* label_lor_rhs13 = new BasicBlock("lor_rhs13",func_kilp,0);
+  BasicBlock* label_UnifiedReturnBlock_70 = new BasicBlock("UnifiedReturnBlock",func_kilp,0);
+  
+  // Block entry (label_entry_69)
+  ExtractElementInst* float_tmp1_71 = new ExtractElementInst(packed_val_68, const_int32_19, "tmp1", label_entry_69);
+  FCmpInst* int1_cmp_72 = new FCmpInst(FCmpInst::FCMP_OLT, float_tmp1_71, const_float_18, "cmp", label_entry_69);
+  new BranchInst(label_UnifiedReturnBlock_70, label_lor_rhs, int1_cmp_72, label_entry_69);
+  
+  // Block lor_rhs (label_lor_rhs)
+  ExtractElementInst* float_tmp3_74 = new ExtractElementInst(packed_val_68, const_int32_23, "tmp3", label_lor_rhs);
+  FCmpInst* int1_cmp5 = new FCmpInst(FCmpInst::FCMP_OLT, float_tmp3_74, const_float_18, "cmp5", label_lor_rhs);
+  new BranchInst(label_UnifiedReturnBlock_70, label_lor_rhs6, int1_cmp5, label_lor_rhs);
+  
+  // Block lor_rhs6 (label_lor_rhs6)
+  ExtractElementInst* float_tmp8 = new ExtractElementInst(packed_val_68, const_int32_25, "tmp8", label_lor_rhs6);
+  FCmpInst* int1_cmp10 = new FCmpInst(FCmpInst::FCMP_OLT, float_tmp8, const_float_18, "cmp10", label_lor_rhs6);
+  new BranchInst(label_UnifiedReturnBlock_70, label_lor_rhs13, int1_cmp10, label_lor_rhs6);
+  
+  // Block lor_rhs13 (label_lor_rhs13)
+  ExtractElementInst* float_tmp15 = new ExtractElementInst(packed_val_68, const_int32_24, "tmp15", label_lor_rhs13);
+  FCmpInst* int1_cmp17 = new FCmpInst(FCmpInst::FCMP_OLT, float_tmp15, const_float_18, "cmp17", label_lor_rhs13);
+  CastInst* int32_retval = new ZExtInst(int1_cmp17, IntegerType::get(32), "retval", label_lor_rhs13);
+  new ReturnInst(int32_retval, label_lor_rhs13);
+  
+  // Block UnifiedReturnBlock (label_UnifiedReturnBlock_70)
+  new ReturnInst(const_int32_23, label_UnifiedReturnBlock_70);
   
 }
 
index 7a70aec..c8d1992 100644 (file)
@@ -864,6 +864,15 @@ llvm::Value * Instructions::scs(llvm::Value *in)
    return call;
 }
 
+llvm::Value * Instructions::kilp(llvm::Value *in)
+{
+   llvm::Function *func = m_mod->getFunction("kilp");
+   assert(func);
+
+   CallInst *call = m_builder.CreateCall(func, in, name("kilpres"));
+   call->setTailCall(false);
+   return call;
+}
 
 llvm::Value * Instructions::sin(llvm::Value *in)
 {
@@ -876,3 +885,4 @@ llvm::Value * Instructions::sin(llvm::Value *in)
 }
 #endif //MESA_LLVM
 
+
index c31cc4f..9ebc17d 100644 (file)
@@ -79,6 +79,7 @@ public:
    llvm::Value *floor(llvm::Value *in);
    llvm::Value *frc(llvm::Value *in);
    void         ifop(llvm::Value *in);
+   llvm::Value *kilp(llvm::Value *in);
    llvm::Value *lerp(llvm::Value *in1, llvm::Value *in2,
                      llvm::Value *in3);
    llvm::Value *lit(llvm::Value *in);
index 6e7fa32..82ad6cf 100644 (file)
@@ -810,246 +810,257 @@ Module* createBaseShader() {
     AllocaInst* ptr_results_142 = new AllocaInst(ArrayTy_33, "results", label_entry_134);
     AllocaInst* ptr_temps_143 = new AllocaInst(ArrayTy_25, "temps", label_entry_134);
     AllocaInst* ptr_args_144 = new AllocaInst(StructTy_struct_ShaderInput, "args", label_entry_134);
-    ICmpInst* int1_cmp5_i_145 = new ICmpInst(ICmpInst::ICMP_SGT, int32_num_inputs_131, const_int32_34, "cmp5.i", label_entry_134);
-    new BranchInst(label_forbody6_i_135, label_from_array_exit_136, int1_cmp5_i_145, label_entry_134);
+    std::vector<Value*> ptr_tmp_indices;
+    ptr_tmp_indices.push_back(const_int32_34);
+    ptr_tmp_indices.push_back(const_int32_39);
+    Instruction* ptr_tmp = new GetElementPtrInst(ptr_args_144, ptr_tmp_indices.begin(), ptr_tmp_indices.end(), "tmp", label_entry_134);
+    StoreInst* void_145 = new StoreInst(const_int32_34, ptr_tmp, false, label_entry_134);
+    ICmpInst* int1_cmp5_i_146 = new ICmpInst(ICmpInst::ICMP_SGT, int32_num_inputs_131, const_int32_34, "cmp5.i", label_entry_134);
+    new BranchInst(label_forbody6_i_135, label_from_array_exit_136, int1_cmp5_i_146, label_entry_134);
     
     // Block forbody6.i (label_forbody6_i_135)
-    Argument* fwdref_148 = new Argument(IntegerType::get(32));
-    PHINode* int32_j_0_reg2mem_0_i_147 = new PHINode(IntegerType::get(32), "j.0.reg2mem.0.i", label_forbody6_i_135);
-    int32_j_0_reg2mem_0_i_147->reserveOperandSpace(2);
-    int32_j_0_reg2mem_0_i_147->addIncoming(const_int32_34, label_entry_134);
-    int32_j_0_reg2mem_0_i_147->addIncoming(fwdref_148, label_forbody6_i_135);
-    
-    Argument* fwdref_150 = new Argument(VectorTy_1);
-    PHINode* packed_vec_0_reg2mem_0_i_149 = new PHINode(VectorTy_1, "vec.0.reg2mem.0.i", label_forbody6_i_135);
-    packed_vec_0_reg2mem_0_i_149->reserveOperandSpace(2);
-    packed_vec_0_reg2mem_0_i_149->addIncoming(const_packed_35, label_entry_134);
-    packed_vec_0_reg2mem_0_i_149->addIncoming(fwdref_150, label_forbody6_i_135);
-    
-    std::vector<Value*> ptr_arraydecay11_i_151_indices;
-    ptr_arraydecay11_i_151_indices.push_back(const_int32_34);
-    ptr_arraydecay11_i_151_indices.push_back(int32_j_0_reg2mem_0_i_147);
-    ptr_arraydecay11_i_151_indices.push_back(const_int32_34);
-    Instruction* ptr_arraydecay11_i_151 = new GetElementPtrInst(ptr_ainputs_130, ptr_arraydecay11_i_151_indices.begin(), ptr_arraydecay11_i_151_indices.end(), "arraydecay11.i", label_forbody6_i_135);
-    LoadInst* float_tmp13_i_152 = new LoadInst(ptr_arraydecay11_i_151, "tmp13.i", false, label_forbody6_i_135);
-    InsertElementInst* packed_tmp15_i_153 = new InsertElementInst(packed_vec_0_reg2mem_0_i_149, float_tmp13_i_152, const_int32_34, "tmp15.i", label_forbody6_i_135);
-    std::vector<Value*> ptr_arrayidx23_i_154_indices;
-    ptr_arrayidx23_i_154_indices.push_back(const_int32_34);
-    ptr_arrayidx23_i_154_indices.push_back(int32_j_0_reg2mem_0_i_147);
-    ptr_arrayidx23_i_154_indices.push_back(const_int32_36);
-    Instruction* ptr_arrayidx23_i_154 = new GetElementPtrInst(ptr_ainputs_130, ptr_arrayidx23_i_154_indices.begin(), ptr_arrayidx23_i_154_indices.end(), "arrayidx23.i", label_forbody6_i_135);
-    LoadInst* float_tmp24_i_155 = new LoadInst(ptr_arrayidx23_i_154, "tmp24.i", false, label_forbody6_i_135);
-    InsertElementInst* packed_tmp26_i_156 = new InsertElementInst(packed_tmp15_i_153, float_tmp24_i_155, const_int32_36, "tmp26.i", label_forbody6_i_135);
-    std::vector<Value*> ptr_arrayidx34_i_157_indices;
-    ptr_arrayidx34_i_157_indices.push_back(const_int32_34);
-    ptr_arrayidx34_i_157_indices.push_back(int32_j_0_reg2mem_0_i_147);
-    ptr_arrayidx34_i_157_indices.push_back(const_int32_37);
-    Instruction* ptr_arrayidx34_i_157 = new GetElementPtrInst(ptr_ainputs_130, ptr_arrayidx34_i_157_indices.begin(), ptr_arrayidx34_i_157_indices.end(), "arrayidx34.i", label_forbody6_i_135);
-    LoadInst* float_tmp35_i_158 = new LoadInst(ptr_arrayidx34_i_157, "tmp35.i", false, label_forbody6_i_135);
-    InsertElementInst* packed_tmp37_i_159 = new InsertElementInst(packed_tmp26_i_156, float_tmp35_i_158, const_int32_37, "tmp37.i", label_forbody6_i_135);
-    std::vector<Value*> ptr_arrayidx45_i_160_indices;
-    ptr_arrayidx45_i_160_indices.push_back(const_int32_34);
-    ptr_arrayidx45_i_160_indices.push_back(int32_j_0_reg2mem_0_i_147);
-    ptr_arrayidx45_i_160_indices.push_back(const_int32_38);
-    Instruction* ptr_arrayidx45_i_160 = new GetElementPtrInst(ptr_ainputs_130, ptr_arrayidx45_i_160_indices.begin(), ptr_arrayidx45_i_160_indices.end(), "arrayidx45.i", label_forbody6_i_135);
-    LoadInst* float_tmp46_i_161 = new LoadInst(ptr_arrayidx45_i_160, "tmp46.i", false, label_forbody6_i_135);
-    InsertElementInst* packed_tmp48_i_162 = new InsertElementInst(packed_tmp37_i_159, float_tmp46_i_161, const_int32_38, "tmp48.i", label_forbody6_i_135);
-    std::vector<Value*> ptr_arrayidx54_i_163_indices;
-    ptr_arrayidx54_i_163_indices.push_back(const_int32_34);
-    ptr_arrayidx54_i_163_indices.push_back(const_int32_34);
-    ptr_arrayidx54_i_163_indices.push_back(int32_j_0_reg2mem_0_i_147);
-    Instruction* ptr_arrayidx54_i_163 = new GetElementPtrInst(ptr_inputs_140, ptr_arrayidx54_i_163_indices.begin(), ptr_arrayidx54_i_163_indices.end(), "arrayidx54.i", label_forbody6_i_135);
-    StoreInst* void_164 = new StoreInst(packed_tmp48_i_162, ptr_arrayidx54_i_163, false, label_forbody6_i_135);
-    BinaryOperator* int32_inc_i_165 = BinaryOperator::create(Instruction::Add, int32_j_0_reg2mem_0_i_147, const_int32_36, "inc.i", label_forbody6_i_135);
-    ICmpInst* int1_cmp59_i_166 = new ICmpInst(ICmpInst::ICMP_SLT, int32_inc_i_165, int32_num_inputs_131, "cmp59.i", label_forbody6_i_135);
-    new BranchInst(label_forbody6_i_135, label_forbody6_i_1, int1_cmp59_i_166, label_forbody6_i_135);
+    Argument* fwdref_149 = new Argument(IntegerType::get(32));
+    PHINode* int32_j_0_reg2mem_0_i_148 = new PHINode(IntegerType::get(32), "j.0.reg2mem.0.i", label_forbody6_i_135);
+    int32_j_0_reg2mem_0_i_148->reserveOperandSpace(2);
+    int32_j_0_reg2mem_0_i_148->addIncoming(const_int32_34, label_entry_134);
+    int32_j_0_reg2mem_0_i_148->addIncoming(fwdref_149, label_forbody6_i_135);
+    
+    Argument* fwdref_151 = new Argument(VectorTy_1);
+    PHINode* packed_vec_0_reg2mem_0_i_150 = new PHINode(VectorTy_1, "vec.0.reg2mem.0.i", label_forbody6_i_135);
+    packed_vec_0_reg2mem_0_i_150->reserveOperandSpace(2);
+    packed_vec_0_reg2mem_0_i_150->addIncoming(const_packed_35, label_entry_134);
+    packed_vec_0_reg2mem_0_i_150->addIncoming(fwdref_151, label_forbody6_i_135);
+    
+    std::vector<Value*> ptr_arraydecay11_i_152_indices;
+    ptr_arraydecay11_i_152_indices.push_back(const_int32_34);
+    ptr_arraydecay11_i_152_indices.push_back(int32_j_0_reg2mem_0_i_148);
+    ptr_arraydecay11_i_152_indices.push_back(const_int32_34);
+    Instruction* ptr_arraydecay11_i_152 = new GetElementPtrInst(ptr_ainputs_130, ptr_arraydecay11_i_152_indices.begin(), ptr_arraydecay11_i_152_indices.end(), "arraydecay11.i", label_forbody6_i_135);
+    LoadInst* float_tmp13_i_153 = new LoadInst(ptr_arraydecay11_i_152, "tmp13.i", false, label_forbody6_i_135);
+    InsertElementInst* packed_tmp15_i_154 = new InsertElementInst(packed_vec_0_reg2mem_0_i_150, float_tmp13_i_153, const_int32_34, "tmp15.i", label_forbody6_i_135);
+    std::vector<Value*> ptr_arrayidx23_i_155_indices;
+    ptr_arrayidx23_i_155_indices.push_back(const_int32_34);
+    ptr_arrayidx23_i_155_indices.push_back(int32_j_0_reg2mem_0_i_148);
+    ptr_arrayidx23_i_155_indices.push_back(const_int32_36);
+    Instruction* ptr_arrayidx23_i_155 = new GetElementPtrInst(ptr_ainputs_130, ptr_arrayidx23_i_155_indices.begin(), ptr_arrayidx23_i_155_indices.end(), "arrayidx23.i", label_forbody6_i_135);
+    LoadInst* float_tmp24_i_156 = new LoadInst(ptr_arrayidx23_i_155, "tmp24.i", false, label_forbody6_i_135);
+    InsertElementInst* packed_tmp26_i_157 = new InsertElementInst(packed_tmp15_i_154, float_tmp24_i_156, const_int32_36, "tmp26.i", label_forbody6_i_135);
+    std::vector<Value*> ptr_arrayidx34_i_158_indices;
+    ptr_arrayidx34_i_158_indices.push_back(const_int32_34);
+    ptr_arrayidx34_i_158_indices.push_back(int32_j_0_reg2mem_0_i_148);
+    ptr_arrayidx34_i_158_indices.push_back(const_int32_37);
+    Instruction* ptr_arrayidx34_i_158 = new GetElementPtrInst(ptr_ainputs_130, ptr_arrayidx34_i_158_indices.begin(), ptr_arrayidx34_i_158_indices.end(), "arrayidx34.i", label_forbody6_i_135);
+    LoadInst* float_tmp35_i_159 = new LoadInst(ptr_arrayidx34_i_158, "tmp35.i", false, label_forbody6_i_135);
+    InsertElementInst* packed_tmp37_i_160 = new InsertElementInst(packed_tmp26_i_157, float_tmp35_i_159, const_int32_37, "tmp37.i", label_forbody6_i_135);
+    std::vector<Value*> ptr_arrayidx45_i_161_indices;
+    ptr_arrayidx45_i_161_indices.push_back(const_int32_34);
+    ptr_arrayidx45_i_161_indices.push_back(int32_j_0_reg2mem_0_i_148);
+    ptr_arrayidx45_i_161_indices.push_back(const_int32_38);
+    Instruction* ptr_arrayidx45_i_161 = new GetElementPtrInst(ptr_ainputs_130, ptr_arrayidx45_i_161_indices.begin(), ptr_arrayidx45_i_161_indices.end(), "arrayidx45.i", label_forbody6_i_135);
+    LoadInst* float_tmp46_i_162 = new LoadInst(ptr_arrayidx45_i_161, "tmp46.i", false, label_forbody6_i_135);
+    InsertElementInst* packed_tmp48_i_163 = new InsertElementInst(packed_tmp37_i_160, float_tmp46_i_162, const_int32_38, "tmp48.i", label_forbody6_i_135);
+    std::vector<Value*> ptr_arrayidx54_i_164_indices;
+    ptr_arrayidx54_i_164_indices.push_back(const_int32_34);
+    ptr_arrayidx54_i_164_indices.push_back(const_int32_34);
+    ptr_arrayidx54_i_164_indices.push_back(int32_j_0_reg2mem_0_i_148);
+    Instruction* ptr_arrayidx54_i_164 = new GetElementPtrInst(ptr_inputs_140, ptr_arrayidx54_i_164_indices.begin(), ptr_arrayidx54_i_164_indices.end(), "arrayidx54.i", label_forbody6_i_135);
+    StoreInst* void_165 = new StoreInst(packed_tmp48_i_163, ptr_arrayidx54_i_164, false, label_forbody6_i_135);
+    BinaryOperator* int32_inc_i_166 = BinaryOperator::create(Instruction::Add, int32_j_0_reg2mem_0_i_148, const_int32_36, "inc.i", label_forbody6_i_135);
+    ICmpInst* int1_cmp59_i_167 = new ICmpInst(ICmpInst::ICMP_SLT, int32_inc_i_166, int32_num_inputs_131, "cmp59.i", label_forbody6_i_135);
+    new BranchInst(label_forbody6_i_135, label_forbody6_i_1, int1_cmp59_i_167, label_forbody6_i_135);
     
     // Block from_array.exit (label_from_array_exit_136)
-    ICmpInst* int1_cmp_i_168 = new ICmpInst(ICmpInst::ICMP_SGT, int32_num_consts_133, const_int32_34, "cmp.i", label_from_array_exit_136);
-    new BranchInst(label_forbody_i13, label_from_consts_exit_137, int1_cmp_i_168, label_from_array_exit_136);
+    ICmpInst* int1_cmp_i_169 = new ICmpInst(ICmpInst::ICMP_SGT, int32_num_consts_133, const_int32_34, "cmp.i", label_from_array_exit_136);
+    new BranchInst(label_forbody_i13, label_from_consts_exit_137, int1_cmp_i_169, label_from_array_exit_136);
     
     // Block forbody.i13 (label_forbody_i13)
-    Argument* fwdref_170 = new Argument(IntegerType::get(32));
+    Argument* fwdref_171 = new Argument(IntegerType::get(32));
     PHINode* int32_i_0_reg2mem_0_i3 = new PHINode(IntegerType::get(32), "i.0.reg2mem.0.i3", label_forbody_i13);
     int32_i_0_reg2mem_0_i3->reserveOperandSpace(2);
     int32_i_0_reg2mem_0_i3->addIncoming(const_int32_34, label_from_array_exit_136);
-    int32_i_0_reg2mem_0_i3->addIncoming(fwdref_170, label_forbody_i13);
+    int32_i_0_reg2mem_0_i3->addIncoming(fwdref_171, label_forbody_i13);
     
-    Argument* fwdref_171 = new Argument(VectorTy_1);
+    Argument* fwdref_172 = new Argument(VectorTy_1);
     PHINode* packed_vec_0_reg2mem_0_i4 = new PHINode(VectorTy_1, "vec.0.reg2mem.0.i4", label_forbody_i13);
     packed_vec_0_reg2mem_0_i4->reserveOperandSpace(2);
     packed_vec_0_reg2mem_0_i4->addIncoming(const_packed_35, label_from_array_exit_136);
-    packed_vec_0_reg2mem_0_i4->addIncoming(fwdref_171, label_forbody_i13);
+    packed_vec_0_reg2mem_0_i4->addIncoming(fwdref_172, label_forbody_i13);
     
     std::vector<Value*> ptr_arraydecay_i5_indices;
     ptr_arraydecay_i5_indices.push_back(int32_i_0_reg2mem_0_i3);
     ptr_arraydecay_i5_indices.push_back(const_int32_34);
     Instruction* ptr_arraydecay_i5 = new GetElementPtrInst(ptr_aconsts_132, ptr_arraydecay_i5_indices.begin(), ptr_arraydecay_i5_indices.end(), "arraydecay.i5", label_forbody_i13);
-    LoadInst* float_tmp5_i_172 = new LoadInst(ptr_arraydecay_i5, "tmp5.i", false, label_forbody_i13);
-    InsertElementInst* packed_tmp7_i6 = new InsertElementInst(packed_vec_0_reg2mem_0_i4, float_tmp5_i_172, const_int32_34, "tmp7.i6", label_forbody_i13);
-    std::vector<Value*> ptr_arrayidx12_i_173_indices;
-    ptr_arrayidx12_i_173_indices.push_back(int32_i_0_reg2mem_0_i3);
-    ptr_arrayidx12_i_173_indices.push_back(const_int32_36);
-    Instruction* ptr_arrayidx12_i_173 = new GetElementPtrInst(ptr_aconsts_132, ptr_arrayidx12_i_173_indices.begin(), ptr_arrayidx12_i_173_indices.end(), "arrayidx12.i", label_forbody_i13);
-    LoadInst* float_tmp13_i7 = new LoadInst(ptr_arrayidx12_i_173, "tmp13.i7", false, label_forbody_i13);
+    LoadInst* float_tmp5_i_173 = new LoadInst(ptr_arraydecay_i5, "tmp5.i", false, label_forbody_i13);
+    InsertElementInst* packed_tmp7_i6 = new InsertElementInst(packed_vec_0_reg2mem_0_i4, float_tmp5_i_173, const_int32_34, "tmp7.i6", label_forbody_i13);
+    std::vector<Value*> ptr_arrayidx12_i_174_indices;
+    ptr_arrayidx12_i_174_indices.push_back(int32_i_0_reg2mem_0_i3);
+    ptr_arrayidx12_i_174_indices.push_back(const_int32_36);
+    Instruction* ptr_arrayidx12_i_174 = new GetElementPtrInst(ptr_aconsts_132, ptr_arrayidx12_i_174_indices.begin(), ptr_arrayidx12_i_174_indices.end(), "arrayidx12.i", label_forbody_i13);
+    LoadInst* float_tmp13_i7 = new LoadInst(ptr_arrayidx12_i_174, "tmp13.i7", false, label_forbody_i13);
     InsertElementInst* packed_tmp15_i8 = new InsertElementInst(packed_tmp7_i6, float_tmp13_i7, const_int32_36, "tmp15.i8", label_forbody_i13);
-    std::vector<Value*> ptr_arrayidx20_i_174_indices;
-    ptr_arrayidx20_i_174_indices.push_back(int32_i_0_reg2mem_0_i3);
-    ptr_arrayidx20_i_174_indices.push_back(const_int32_37);
-    Instruction* ptr_arrayidx20_i_174 = new GetElementPtrInst(ptr_aconsts_132, ptr_arrayidx20_i_174_indices.begin(), ptr_arrayidx20_i_174_indices.end(), "arrayidx20.i", label_forbody_i13);
-    LoadInst* float_tmp21_i_175 = new LoadInst(ptr_arrayidx20_i_174, "tmp21.i", false, label_forbody_i13);
-    InsertElementInst* packed_tmp23_i9 = new InsertElementInst(packed_tmp15_i8, float_tmp21_i_175, const_int32_37, "tmp23.i9", label_forbody_i13);
-    std::vector<Value*> ptr_arrayidx28_i_176_indices;
-    ptr_arrayidx28_i_176_indices.push_back(int32_i_0_reg2mem_0_i3);
-    ptr_arrayidx28_i_176_indices.push_back(const_int32_38);
-    Instruction* ptr_arrayidx28_i_176 = new GetElementPtrInst(ptr_aconsts_132, ptr_arrayidx28_i_176_indices.begin(), ptr_arrayidx28_i_176_indices.end(), "arrayidx28.i", label_forbody_i13);
-    LoadInst* float_tmp29_i_177 = new LoadInst(ptr_arrayidx28_i_176, "tmp29.i", false, label_forbody_i13);
-    InsertElementInst* packed_tmp31_i_178 = new InsertElementInst(packed_tmp23_i9, float_tmp29_i_177, const_int32_38, "tmp31.i", label_forbody_i13);
+    std::vector<Value*> ptr_arrayidx20_i_175_indices;
+    ptr_arrayidx20_i_175_indices.push_back(int32_i_0_reg2mem_0_i3);
+    ptr_arrayidx20_i_175_indices.push_back(const_int32_37);
+    Instruction* ptr_arrayidx20_i_175 = new GetElementPtrInst(ptr_aconsts_132, ptr_arrayidx20_i_175_indices.begin(), ptr_arrayidx20_i_175_indices.end(), "arrayidx20.i", label_forbody_i13);
+    LoadInst* float_tmp21_i_176 = new LoadInst(ptr_arrayidx20_i_175, "tmp21.i", false, label_forbody_i13);
+    InsertElementInst* packed_tmp23_i9 = new InsertElementInst(packed_tmp15_i8, float_tmp21_i_176, const_int32_37, "tmp23.i9", label_forbody_i13);
+    std::vector<Value*> ptr_arrayidx28_i_177_indices;
+    ptr_arrayidx28_i_177_indices.push_back(int32_i_0_reg2mem_0_i3);
+    ptr_arrayidx28_i_177_indices.push_back(const_int32_38);
+    Instruction* ptr_arrayidx28_i_177 = new GetElementPtrInst(ptr_aconsts_132, ptr_arrayidx28_i_177_indices.begin(), ptr_arrayidx28_i_177_indices.end(), "arrayidx28.i", label_forbody_i13);
+    LoadInst* float_tmp29_i_178 = new LoadInst(ptr_arrayidx28_i_177, "tmp29.i", false, label_forbody_i13);
+    InsertElementInst* packed_tmp31_i_179 = new InsertElementInst(packed_tmp23_i9, float_tmp29_i_178, const_int32_38, "tmp31.i", label_forbody_i13);
     std::vector<Value*> ptr_arrayidx34_i10_indices;
     ptr_arrayidx34_i10_indices.push_back(const_int32_34);
     ptr_arrayidx34_i10_indices.push_back(int32_i_0_reg2mem_0_i3);
     Instruction* ptr_arrayidx34_i10 = new GetElementPtrInst(ptr_consts_141, ptr_arrayidx34_i10_indices.begin(), ptr_arrayidx34_i10_indices.end(), "arrayidx34.i10", label_forbody_i13);
-    StoreInst* void_179 = new StoreInst(packed_tmp31_i_178, ptr_arrayidx34_i10, false, label_forbody_i13);
-    BinaryOperator* int32_indvar_next22 = BinaryOperator::create(Instruction::Add, int32_i_0_reg2mem_0_i3, const_int32_36, "indvar.next22", label_forbody_i13);
-    ICmpInst* int1_exitcond23 = new ICmpInst(ICmpInst::ICMP_EQ, int32_indvar_next22, int32_num_consts_133, "exitcond23", label_forbody_i13);
-    new BranchInst(label_from_consts_exit_137, label_forbody_i13, int1_exitcond23, label_forbody_i13);
+    StoreInst* void_180 = new StoreInst(packed_tmp31_i_179, ptr_arrayidx34_i10, false, label_forbody_i13);
+    BinaryOperator* int32_indvar_next23 = BinaryOperator::create(Instruction::Add, int32_i_0_reg2mem_0_i3, const_int32_36, "indvar.next23", label_forbody_i13);
+    ICmpInst* int1_exitcond24 = new ICmpInst(ICmpInst::ICMP_EQ, int32_indvar_next23, int32_num_consts_133, "exitcond24", label_forbody_i13);
+    new BranchInst(label_from_consts_exit_137, label_forbody_i13, int1_exitcond24, label_forbody_i13);
     
     // Block from_consts.exit (label_from_consts_exit_137)
-    std::vector<Value*> ptr_tmp5_indices;
-    ptr_tmp5_indices.push_back(const_int32_34);
-    ptr_tmp5_indices.push_back(const_int32_38);
-    Instruction* ptr_tmp5 = new GetElementPtrInst(ptr_args_144, ptr_tmp5_indices.begin(), ptr_tmp5_indices.end(), "tmp5", label_from_consts_exit_137);
-    std::vector<Value*> ptr_arraydecay6_indices;
-    ptr_arraydecay6_indices.push_back(const_int32_34);
-    ptr_arraydecay6_indices.push_back(const_int32_34);
-    Instruction* ptr_arraydecay6 = new GetElementPtrInst(ptr_consts_141, ptr_arraydecay6_indices.begin(), ptr_arraydecay6_indices.end(), "arraydecay6", label_from_consts_exit_137);
-    StoreInst* void_181 = new StoreInst(ptr_arraydecay6, ptr_tmp5, false, label_from_consts_exit_137);
-    std::vector<Value*> ptr_tmp7_indices;
-    ptr_tmp7_indices.push_back(const_int32_34);
-    ptr_tmp7_indices.push_back(const_int32_37);
-    Instruction* ptr_tmp7 = new GetElementPtrInst(ptr_args_144, ptr_tmp7_indices.begin(), ptr_tmp7_indices.end(), "tmp7", label_from_consts_exit_137);
-    std::vector<Value*> ptr_arraydecay8_indices;
-    ptr_arraydecay8_indices.push_back(const_int32_34);
-    ptr_arraydecay8_indices.push_back(const_int32_34);
-    Instruction* ptr_arraydecay8 = new GetElementPtrInst(ptr_temps_143, ptr_arraydecay8_indices.begin(), ptr_arraydecay8_indices.end(), "arraydecay8", label_from_consts_exit_137);
-    StoreInst* void_182 = new StoreInst(ptr_arraydecay8, ptr_tmp7, false, label_from_consts_exit_137);
-    std::vector<Value*> ptr_tmp10_indices;
-    ptr_tmp10_indices.push_back(const_int32_34);
-    ptr_tmp10_indices.push_back(const_int32_36);
-    Instruction* ptr_tmp10 = new GetElementPtrInst(ptr_args_144, ptr_tmp10_indices.begin(), ptr_tmp10_indices.end(), "tmp10", label_from_consts_exit_137);
-    std::vector<Value*> ptr_tmp14_indices;
-    ptr_tmp14_indices.push_back(const_int32_34);
-    ptr_tmp14_indices.push_back(const_int32_34);
-    Instruction* ptr_tmp14 = new GetElementPtrInst(ptr_args_144, ptr_tmp14_indices.begin(), ptr_tmp14_indices.end(), "tmp14", label_from_consts_exit_137);
+    std::vector<Value*> ptr_tmp6_182_indices;
+    ptr_tmp6_182_indices.push_back(const_int32_34);
+    ptr_tmp6_182_indices.push_back(const_int32_38);
+    Instruction* ptr_tmp6_182 = new GetElementPtrInst(ptr_args_144, ptr_tmp6_182_indices.begin(), ptr_tmp6_182_indices.end(), "tmp6", label_from_consts_exit_137);
+    std::vector<Value*> ptr_arraydecay7_183_indices;
+    ptr_arraydecay7_183_indices.push_back(const_int32_34);
+    ptr_arraydecay7_183_indices.push_back(const_int32_34);
+    Instruction* ptr_arraydecay7_183 = new GetElementPtrInst(ptr_consts_141, ptr_arraydecay7_183_indices.begin(), ptr_arraydecay7_183_indices.end(), "arraydecay7", label_from_consts_exit_137);
+    StoreInst* void_184 = new StoreInst(ptr_arraydecay7_183, ptr_tmp6_182, false, label_from_consts_exit_137);
+    std::vector<Value*> ptr_tmp8_185_indices;
+    ptr_tmp8_185_indices.push_back(const_int32_34);
+    ptr_tmp8_185_indices.push_back(const_int32_37);
+    Instruction* ptr_tmp8_185 = new GetElementPtrInst(ptr_args_144, ptr_tmp8_185_indices.begin(), ptr_tmp8_185_indices.end(), "tmp8", label_from_consts_exit_137);
+    std::vector<Value*> ptr_arraydecay9_186_indices;
+    ptr_arraydecay9_186_indices.push_back(const_int32_34);
+    ptr_arraydecay9_186_indices.push_back(const_int32_34);
+    Instruction* ptr_arraydecay9_186 = new GetElementPtrInst(ptr_temps_143, ptr_arraydecay9_186_indices.begin(), ptr_arraydecay9_186_indices.end(), "arraydecay9", label_from_consts_exit_137);
+    StoreInst* void_187 = new StoreInst(ptr_arraydecay9_186, ptr_tmp8_185, false, label_from_consts_exit_137);
+    std::vector<Value*> ptr_tmp11_indices;
+    ptr_tmp11_indices.push_back(const_int32_34);
+    ptr_tmp11_indices.push_back(const_int32_36);
+    Instruction* ptr_tmp11 = new GetElementPtrInst(ptr_args_144, ptr_tmp11_indices.begin(), ptr_tmp11_indices.end(), "tmp11", label_from_consts_exit_137);
+    std::vector<Value*> ptr_tmp15_indices;
+    ptr_tmp15_indices.push_back(const_int32_34);
+    ptr_tmp15_indices.push_back(const_int32_34);
+    Instruction* ptr_tmp15 = new GetElementPtrInst(ptr_args_144, ptr_tmp15_indices.begin(), ptr_tmp15_indices.end(), "tmp15", label_from_consts_exit_137);
     new BranchInst(label_forbody_138, label_from_consts_exit_137);
     
     // Block forbody (label_forbody_138)
-    Argument* fwdref_185 = new Argument(IntegerType::get(32));
-    PHINode* int32_i_0_reg2mem_0_184 = new PHINode(IntegerType::get(32), "i.0.reg2mem.0", label_forbody_138);
-    int32_i_0_reg2mem_0_184->reserveOperandSpace(2);
-    int32_i_0_reg2mem_0_184->addIncoming(const_int32_34, label_from_consts_exit_137);
-    int32_i_0_reg2mem_0_184->addIncoming(fwdref_185, label_forbody_138);
-    
-    std::vector<Value*> ptr_arraydecay13_indices;
-    ptr_arraydecay13_indices.push_back(const_int32_34);
-    ptr_arraydecay13_indices.push_back(int32_i_0_reg2mem_0_184);
-    ptr_arraydecay13_indices.push_back(const_int32_34);
-    Instruction* ptr_arraydecay13 = new GetElementPtrInst(ptr_inputs_140, ptr_arraydecay13_indices.begin(), ptr_arraydecay13_indices.end(), "arraydecay13", label_forbody_138);
-    StoreInst* void_186 = new StoreInst(ptr_arraydecay13, ptr_tmp10, false, label_forbody_138);
-    std::vector<Value*> ptr_arraydecay18_indices;
-    ptr_arraydecay18_indices.push_back(const_int32_34);
-    ptr_arraydecay18_indices.push_back(int32_i_0_reg2mem_0_184);
-    ptr_arraydecay18_indices.push_back(const_int32_34);
-    Instruction* ptr_arraydecay18 = new GetElementPtrInst(ptr_results_142, ptr_arraydecay18_indices.begin(), ptr_arraydecay18_indices.end(), "arraydecay18", label_forbody_138);
-    StoreInst* void_187 = new StoreInst(ptr_arraydecay18, ptr_tmp14, false, label_forbody_138);
-    CallInst* void_188 = new CallInst(func_execute_shader, ptr_args_144, "", label_forbody_138);
-    void_188->setCallingConv(CallingConv::C);
-    void_188->setTailCall(false);
-    LoadInst* ptr_tmp24 = new LoadInst(ptr_tmp14, "tmp24", false, label_forbody_138);
+    Argument* fwdref_189 = new Argument(IntegerType::get(32));
+    PHINode* int32_tmp21_rle = new PHINode(IntegerType::get(32), "tmp21.rle", label_forbody_138);
+    int32_tmp21_rle->reserveOperandSpace(2);
+    int32_tmp21_rle->addIncoming(const_int32_34, label_from_consts_exit_137);
+    int32_tmp21_rle->addIncoming(fwdref_189, label_forbody_138);
+    
+    Argument* fwdref_191 = new Argument(IntegerType::get(32));
+    PHINode* int32_i_0_reg2mem_0_190 = new PHINode(IntegerType::get(32), "i.0.reg2mem.0", label_forbody_138);
+    int32_i_0_reg2mem_0_190->reserveOperandSpace(2);
+    int32_i_0_reg2mem_0_190->addIncoming(const_int32_34, label_from_consts_exit_137);
+    int32_i_0_reg2mem_0_190->addIncoming(fwdref_191, label_forbody_138);
+    
+    std::vector<Value*> ptr_arraydecay14_indices;
+    ptr_arraydecay14_indices.push_back(const_int32_34);
+    ptr_arraydecay14_indices.push_back(int32_i_0_reg2mem_0_190);
+    ptr_arraydecay14_indices.push_back(const_int32_34);
+    Instruction* ptr_arraydecay14 = new GetElementPtrInst(ptr_inputs_140, ptr_arraydecay14_indices.begin(), ptr_arraydecay14_indices.end(), "arraydecay14", label_forbody_138);
+    StoreInst* void_192 = new StoreInst(ptr_arraydecay14, ptr_tmp11, false, label_forbody_138);
+    std::vector<Value*> ptr_arraydecay19_indices;
+    ptr_arraydecay19_indices.push_back(const_int32_34);
+    ptr_arraydecay19_indices.push_back(int32_i_0_reg2mem_0_190);
+    ptr_arraydecay19_indices.push_back(const_int32_34);
+    Instruction* ptr_arraydecay19 = new GetElementPtrInst(ptr_results_142, ptr_arraydecay19_indices.begin(), ptr_arraydecay19_indices.end(), "arraydecay19", label_forbody_138);
+    StoreInst* void_193 = new StoreInst(ptr_arraydecay19, ptr_tmp15, false, label_forbody_138);
+    StoreInst* void_194 = new StoreInst(const_int32_34, ptr_tmp, false, label_forbody_138);
+    CallInst* void_195 = new CallInst(func_execute_shader, ptr_args_144, "", label_forbody_138);
+    void_195->setCallingConv(CallingConv::C);
+    void_195->setTailCall(false);
+    LoadInst* int32_tmp26 = new LoadInst(ptr_tmp, "tmp26", false, label_forbody_138);
+    BinaryOperator* int32_shl = BinaryOperator::create(Instruction::Shl, int32_tmp26, int32_i_0_reg2mem_0_190, "shl", label_forbody_138);
+    BinaryOperator* int32_or = BinaryOperator::create(Instruction::Or, int32_shl, int32_tmp21_rle, "or", label_forbody_138);
+    StoreInst* void_196 = new StoreInst(int32_or, ptr_tmp, false, label_forbody_138);
+    LoadInst* ptr_tmp33 = new LoadInst(ptr_tmp15, "tmp33", false, label_forbody_138);
     std::vector<Value*> ptr_arraydecay_i_indices;
-    ptr_arraydecay_i_indices.push_back(int32_i_0_reg2mem_0_184);
+    ptr_arraydecay_i_indices.push_back(int32_i_0_reg2mem_0_190);
     ptr_arraydecay_i_indices.push_back(const_int32_34);
     ptr_arraydecay_i_indices.push_back(const_int32_34);
     Instruction* ptr_arraydecay_i = new GetElementPtrInst(ptr_dests_129, ptr_arraydecay_i_indices.begin(), ptr_arraydecay_i_indices.end(), "arraydecay.i", label_forbody_138);
-    LoadInst* packed_tmp7_i = new LoadInst(ptr_tmp24, "tmp7.i", false, label_forbody_138);
+    LoadInst* packed_tmp7_i = new LoadInst(ptr_tmp33, "tmp7.i", false, label_forbody_138);
     ExtractElementInst* float_tmp11_i = new ExtractElementInst(packed_tmp7_i, const_int32_34, "tmp11.i", label_forbody_138);
-    StoreInst* void_189 = new StoreInst(float_tmp11_i, ptr_arraydecay_i, false, label_forbody_138);
+    StoreInst* void_197 = new StoreInst(float_tmp11_i, ptr_arraydecay_i, false, label_forbody_138);
     std::vector<Value*> ptr_arrayidx13_i_indices;
-    ptr_arrayidx13_i_indices.push_back(int32_i_0_reg2mem_0_184);
+    ptr_arrayidx13_i_indices.push_back(int32_i_0_reg2mem_0_190);
     ptr_arrayidx13_i_indices.push_back(const_int32_34);
     ptr_arrayidx13_i_indices.push_back(const_int32_36);
     Instruction* ptr_arrayidx13_i = new GetElementPtrInst(ptr_dests_129, ptr_arrayidx13_i_indices.begin(), ptr_arrayidx13_i_indices.end(), "arrayidx13.i", label_forbody_138);
     ExtractElementInst* float_tmp15_i2 = new ExtractElementInst(packed_tmp7_i, const_int32_36, "tmp15.i2", label_forbody_138);
-    StoreInst* void_190 = new StoreInst(float_tmp15_i2, ptr_arrayidx13_i, false, label_forbody_138);
+    StoreInst* void_198 = new StoreInst(float_tmp15_i2, ptr_arrayidx13_i, false, label_forbody_138);
     std::vector<Value*> ptr_arrayidx17_i_indices;
-    ptr_arrayidx17_i_indices.push_back(int32_i_0_reg2mem_0_184);
+    ptr_arrayidx17_i_indices.push_back(int32_i_0_reg2mem_0_190);
     ptr_arrayidx17_i_indices.push_back(const_int32_34);
     ptr_arrayidx17_i_indices.push_back(const_int32_37);
     Instruction* ptr_arrayidx17_i = new GetElementPtrInst(ptr_dests_129, ptr_arrayidx17_i_indices.begin(), ptr_arrayidx17_i_indices.end(), "arrayidx17.i", label_forbody_138);
     ExtractElementInst* float_tmp19_i = new ExtractElementInst(packed_tmp7_i, const_int32_37, "tmp19.i", label_forbody_138);
-    StoreInst* void_191 = new StoreInst(float_tmp19_i, ptr_arrayidx17_i, false, label_forbody_138);
+    StoreInst* void_199 = new StoreInst(float_tmp19_i, ptr_arrayidx17_i, false, label_forbody_138);
     std::vector<Value*> ptr_arrayidx21_i_indices;
-    ptr_arrayidx21_i_indices.push_back(int32_i_0_reg2mem_0_184);
+    ptr_arrayidx21_i_indices.push_back(int32_i_0_reg2mem_0_190);
     ptr_arrayidx21_i_indices.push_back(const_int32_34);
     ptr_arrayidx21_i_indices.push_back(const_int32_38);
     Instruction* ptr_arrayidx21_i = new GetElementPtrInst(ptr_dests_129, ptr_arrayidx21_i_indices.begin(), ptr_arrayidx21_i_indices.end(), "arrayidx21.i", label_forbody_138);
     ExtractElementInst* float_tmp23_i = new ExtractElementInst(packed_tmp7_i, const_int32_38, "tmp23.i", label_forbody_138);
-    StoreInst* void_192 = new StoreInst(float_tmp23_i, ptr_arrayidx21_i, false, label_forbody_138);
+    StoreInst* void_200 = new StoreInst(float_tmp23_i, ptr_arrayidx21_i, false, label_forbody_138);
     std::vector<Value*> ptr_arraydecay_i_1_indices;
-    ptr_arraydecay_i_1_indices.push_back(int32_i_0_reg2mem_0_184);
+    ptr_arraydecay_i_1_indices.push_back(int32_i_0_reg2mem_0_190);
     ptr_arraydecay_i_1_indices.push_back(const_int32_36);
     ptr_arraydecay_i_1_indices.push_back(const_int32_34);
     Instruction* ptr_arraydecay_i_1 = new GetElementPtrInst(ptr_dests_129, ptr_arraydecay_i_1_indices.begin(), ptr_arraydecay_i_1_indices.end(), "arraydecay.i.1", label_forbody_138);
-    GetElementPtrInst* ptr_arrayidx6_i_1 = new GetElementPtrInst(ptr_tmp24, const_int32_36, "arrayidx6.i.1", label_forbody_138);
+    GetElementPtrInst* ptr_arrayidx6_i_1 = new GetElementPtrInst(ptr_tmp33, const_int32_36, "arrayidx6.i.1", label_forbody_138);
     LoadInst* packed_tmp7_i_1 = new LoadInst(ptr_arrayidx6_i_1, "tmp7.i.1", false, label_forbody_138);
     ExtractElementInst* float_tmp11_i_1 = new ExtractElementInst(packed_tmp7_i_1, const_int32_34, "tmp11.i.1", label_forbody_138);
-    StoreInst* void_193 = new StoreInst(float_tmp11_i_1, ptr_arraydecay_i_1, false, label_forbody_138);
+    StoreInst* void_201 = new StoreInst(float_tmp11_i_1, ptr_arraydecay_i_1, false, label_forbody_138);
     std::vector<Value*> ptr_arrayidx13_i_1_indices;
-    ptr_arrayidx13_i_1_indices.push_back(int32_i_0_reg2mem_0_184);
+    ptr_arrayidx13_i_1_indices.push_back(int32_i_0_reg2mem_0_190);
     ptr_arrayidx13_i_1_indices.push_back(const_int32_36);
     ptr_arrayidx13_i_1_indices.push_back(const_int32_36);
     Instruction* ptr_arrayidx13_i_1 = new GetElementPtrInst(ptr_dests_129, ptr_arrayidx13_i_1_indices.begin(), ptr_arrayidx13_i_1_indices.end(), "arrayidx13.i.1", label_forbody_138);
     ExtractElementInst* float_tmp15_i2_1 = new ExtractElementInst(packed_tmp7_i_1, const_int32_36, "tmp15.i2.1", label_forbody_138);
-    StoreInst* void_194 = new StoreInst(float_tmp15_i2_1, ptr_arrayidx13_i_1, false, label_forbody_138);
+    StoreInst* void_202 = new StoreInst(float_tmp15_i2_1, ptr_arrayidx13_i_1, false, label_forbody_138);
     std::vector<Value*> ptr_arrayidx17_i_1_indices;
-    ptr_arrayidx17_i_1_indices.push_back(int32_i_0_reg2mem_0_184);
+    ptr_arrayidx17_i_1_indices.push_back(int32_i_0_reg2mem_0_190);
     ptr_arrayidx17_i_1_indices.push_back(const_int32_36);
     ptr_arrayidx17_i_1_indices.push_back(const_int32_37);
     Instruction* ptr_arrayidx17_i_1 = new GetElementPtrInst(ptr_dests_129, ptr_arrayidx17_i_1_indices.begin(), ptr_arrayidx17_i_1_indices.end(), "arrayidx17.i.1", label_forbody_138);
     ExtractElementInst* float_tmp19_i_1 = new ExtractElementInst(packed_tmp7_i_1, const_int32_37, "tmp19.i.1", label_forbody_138);
-    StoreInst* void_195 = new StoreInst(float_tmp19_i_1, ptr_arrayidx17_i_1, false, label_forbody_138);
+    StoreInst* void_203 = new StoreInst(float_tmp19_i_1, ptr_arrayidx17_i_1, false, label_forbody_138);
     std::vector<Value*> ptr_arrayidx21_i_1_indices;
-    ptr_arrayidx21_i_1_indices.push_back(int32_i_0_reg2mem_0_184);
+    ptr_arrayidx21_i_1_indices.push_back(int32_i_0_reg2mem_0_190);
     ptr_arrayidx21_i_1_indices.push_back(const_int32_36);
     ptr_arrayidx21_i_1_indices.push_back(const_int32_38);
     Instruction* ptr_arrayidx21_i_1 = new GetElementPtrInst(ptr_dests_129, ptr_arrayidx21_i_1_indices.begin(), ptr_arrayidx21_i_1_indices.end(), "arrayidx21.i.1", label_forbody_138);
     ExtractElementInst* float_tmp23_i_1 = new ExtractElementInst(packed_tmp7_i_1, const_int32_38, "tmp23.i.1", label_forbody_138);
-    StoreInst* void_196 = new StoreInst(float_tmp23_i_1, ptr_arrayidx21_i_1, false, label_forbody_138);
-    BinaryOperator* int32_indvar_next20 = BinaryOperator::create(Instruction::Add, int32_i_0_reg2mem_0_184, const_int32_36, "indvar.next20", label_forbody_138);
-    ICmpInst* int1_exitcond21 = new ICmpInst(ICmpInst::ICMP_EQ, int32_indvar_next20, const_int32_39, "exitcond21", label_forbody_138);
-    new BranchInst(label_afterfor_139, label_forbody_138, int1_exitcond21, label_forbody_138);
+    StoreInst* void_204 = new StoreInst(float_tmp23_i_1, ptr_arrayidx21_i_1, false, label_forbody_138);
+    BinaryOperator* int32_indvar_next21 = BinaryOperator::create(Instruction::Add, int32_i_0_reg2mem_0_190, const_int32_36, "indvar.next21", label_forbody_138);
+    ICmpInst* int1_exitcond22 = new ICmpInst(ICmpInst::ICMP_EQ, int32_indvar_next21, const_int32_39, "exitcond22", label_forbody_138);
+    new BranchInst(label_afterfor_139, label_forbody_138, int1_exitcond22, label_forbody_138);
     
     // Block afterfor (label_afterfor_139)
-    std::vector<Value*> ptr_tmp26_indices;
-    ptr_tmp26_indices.push_back(const_int32_34);
-    ptr_tmp26_indices.push_back(const_int32_39);
-    Instruction* ptr_tmp26 = new GetElementPtrInst(ptr_args_144, ptr_tmp26_indices.begin(), ptr_tmp26_indices.end(), "tmp26", label_afterfor_139);
-    LoadInst* int32_tmp27 = new LoadInst(ptr_tmp26, "tmp27", false, label_afterfor_139);
-    BinaryOperator* int32_neg = BinaryOperator::create(Instruction::Xor, int32_tmp27, const_int32_40, "neg", label_afterfor_139);
+    BinaryOperator* int32_neg = BinaryOperator::create(Instruction::Xor, int32_or, const_int32_40, "neg", label_afterfor_139);
     new ReturnInst(int32_neg, label_afterfor_139);
     
     // Block forbody6.i.1 (label_forbody6_i_1)
-    Argument* fwdref_199 = new Argument(IntegerType::get(32));
+    Argument* fwdref_207 = new Argument(IntegerType::get(32));
     PHINode* int32_j_0_reg2mem_0_i_1 = new PHINode(IntegerType::get(32), "j.0.reg2mem.0.i.1", label_forbody6_i_1);
     int32_j_0_reg2mem_0_i_1->reserveOperandSpace(2);
     int32_j_0_reg2mem_0_i_1->addIncoming(const_int32_34, label_forbody6_i_135);
-    int32_j_0_reg2mem_0_i_1->addIncoming(fwdref_199, label_forbody6_i_1);
+    int32_j_0_reg2mem_0_i_1->addIncoming(fwdref_207, label_forbody6_i_1);
     
-    Argument* fwdref_200 = new Argument(VectorTy_1);
+    Argument* fwdref_208 = new Argument(VectorTy_1);
     PHINode* packed_vec_0_reg2mem_0_i_1 = new PHINode(VectorTy_1, "vec.0.reg2mem.0.i.1", label_forbody6_i_1);
     packed_vec_0_reg2mem_0_i_1->reserveOperandSpace(2);
-    packed_vec_0_reg2mem_0_i_1->addIncoming(packed_tmp48_i_162, label_forbody6_i_135);
-    packed_vec_0_reg2mem_0_i_1->addIncoming(fwdref_200, label_forbody6_i_1);
+    packed_vec_0_reg2mem_0_i_1->addIncoming(packed_tmp48_i_163, label_forbody6_i_135);
+    packed_vec_0_reg2mem_0_i_1->addIncoming(fwdref_208, label_forbody6_i_1);
     
     std::vector<Value*> ptr_arraydecay11_i_1_indices;
     ptr_arraydecay11_i_1_indices.push_back(const_int32_36);
@@ -1084,23 +1095,23 @@ Module* createBaseShader() {
     ptr_arrayidx54_i_1_indices.push_back(const_int32_36);
     ptr_arrayidx54_i_1_indices.push_back(int32_j_0_reg2mem_0_i_1);
     Instruction* ptr_arrayidx54_i_1 = new GetElementPtrInst(ptr_inputs_140, ptr_arrayidx54_i_1_indices.begin(), ptr_arrayidx54_i_1_indices.end(), "arrayidx54.i.1", label_forbody6_i_1);
-    StoreInst* void_201 = new StoreInst(packed_tmp48_i_1, ptr_arrayidx54_i_1, false, label_forbody6_i_1);
+    StoreInst* void_209 = new StoreInst(packed_tmp48_i_1, ptr_arrayidx54_i_1, false, label_forbody6_i_1);
     BinaryOperator* int32_inc_i_1 = BinaryOperator::create(Instruction::Add, int32_j_0_reg2mem_0_i_1, const_int32_36, "inc.i.1", label_forbody6_i_1);
     ICmpInst* int1_cmp59_i_1 = new ICmpInst(ICmpInst::ICMP_SLT, int32_inc_i_1, int32_num_inputs_131, "cmp59.i.1", label_forbody6_i_1);
     new BranchInst(label_forbody6_i_1, label_forbody6_i_2, int1_cmp59_i_1, label_forbody6_i_1);
     
     // Block forbody6.i.2 (label_forbody6_i_2)
-    Argument* fwdref_203 = new Argument(IntegerType::get(32));
+    Argument* fwdref_211 = new Argument(IntegerType::get(32));
     PHINode* int32_j_0_reg2mem_0_i_2 = new PHINode(IntegerType::get(32), "j.0.reg2mem.0.i.2", label_forbody6_i_2);
     int32_j_0_reg2mem_0_i_2->reserveOperandSpace(2);
     int32_j_0_reg2mem_0_i_2->addIncoming(const_int32_34, label_forbody6_i_1);
-    int32_j_0_reg2mem_0_i_2->addIncoming(fwdref_203, label_forbody6_i_2);
+    int32_j_0_reg2mem_0_i_2->addIncoming(fwdref_211, label_forbody6_i_2);
     
-    Argument* fwdref_204 = new Argument(VectorTy_1);
+    Argument* fwdref_212 = new Argument(VectorTy_1);
     PHINode* packed_vec_0_reg2mem_0_i_2 = new PHINode(VectorTy_1, "vec.0.reg2mem.0.i.2", label_forbody6_i_2);
     packed_vec_0_reg2mem_0_i_2->reserveOperandSpace(2);
     packed_vec_0_reg2mem_0_i_2->addIncoming(packed_tmp48_i_1, label_forbody6_i_1);
-    packed_vec_0_reg2mem_0_i_2->addIncoming(fwdref_204, label_forbody6_i_2);
+    packed_vec_0_reg2mem_0_i_2->addIncoming(fwdref_212, label_forbody6_i_2);
     
     std::vector<Value*> ptr_arraydecay11_i_2_indices;
     ptr_arraydecay11_i_2_indices.push_back(const_int32_37);
@@ -1135,23 +1146,23 @@ Module* createBaseShader() {
     ptr_arrayidx54_i_2_indices.push_back(const_int32_37);
     ptr_arrayidx54_i_2_indices.push_back(int32_j_0_reg2mem_0_i_2);
     Instruction* ptr_arrayidx54_i_2 = new GetElementPtrInst(ptr_inputs_140, ptr_arrayidx54_i_2_indices.begin(), ptr_arrayidx54_i_2_indices.end(), "arrayidx54.i.2", label_forbody6_i_2);
-    StoreInst* void_205 = new StoreInst(packed_tmp48_i_2, ptr_arrayidx54_i_2, false, label_forbody6_i_2);
+    StoreInst* void_213 = new StoreInst(packed_tmp48_i_2, ptr_arrayidx54_i_2, false, label_forbody6_i_2);
     BinaryOperator* int32_inc_i_2 = BinaryOperator::create(Instruction::Add, int32_j_0_reg2mem_0_i_2, const_int32_36, "inc.i.2", label_forbody6_i_2);
     ICmpInst* int1_cmp59_i_2 = new ICmpInst(ICmpInst::ICMP_SLT, int32_inc_i_2, int32_num_inputs_131, "cmp59.i.2", label_forbody6_i_2);
     new BranchInst(label_forbody6_i_2, label_forbody6_i_3, int1_cmp59_i_2, label_forbody6_i_2);
     
     // Block forbody6.i.3 (label_forbody6_i_3)
-    Argument* fwdref_207 = new Argument(IntegerType::get(32));
+    Argument* fwdref_215 = new Argument(IntegerType::get(32));
     PHINode* int32_j_0_reg2mem_0_i_3 = new PHINode(IntegerType::get(32), "j.0.reg2mem.0.i.3", label_forbody6_i_3);
     int32_j_0_reg2mem_0_i_3->reserveOperandSpace(2);
     int32_j_0_reg2mem_0_i_3->addIncoming(const_int32_34, label_forbody6_i_2);
-    int32_j_0_reg2mem_0_i_3->addIncoming(fwdref_207, label_forbody6_i_3);
+    int32_j_0_reg2mem_0_i_3->addIncoming(fwdref_215, label_forbody6_i_3);
     
-    Argument* fwdref_208 = new Argument(VectorTy_1);
+    Argument* fwdref_216 = new Argument(VectorTy_1);
     PHINode* packed_vec_0_reg2mem_0_i_3 = new PHINode(VectorTy_1, "vec.0.reg2mem.0.i.3", label_forbody6_i_3);
     packed_vec_0_reg2mem_0_i_3->reserveOperandSpace(2);
     packed_vec_0_reg2mem_0_i_3->addIncoming(packed_tmp48_i_2, label_forbody6_i_2);
-    packed_vec_0_reg2mem_0_i_3->addIncoming(fwdref_208, label_forbody6_i_3);
+    packed_vec_0_reg2mem_0_i_3->addIncoming(fwdref_216, label_forbody6_i_3);
     
     std::vector<Value*> ptr_arraydecay11_i_3_indices;
     ptr_arraydecay11_i_3_indices.push_back(const_int32_38);
@@ -1186,23 +1197,24 @@ Module* createBaseShader() {
     ptr_arrayidx54_i_3_indices.push_back(const_int32_38);
     ptr_arrayidx54_i_3_indices.push_back(int32_j_0_reg2mem_0_i_3);
     Instruction* ptr_arrayidx54_i_3 = new GetElementPtrInst(ptr_inputs_140, ptr_arrayidx54_i_3_indices.begin(), ptr_arrayidx54_i_3_indices.end(), "arrayidx54.i.3", label_forbody6_i_3);
-    StoreInst* void_209 = new StoreInst(packed_tmp48_i_3, ptr_arrayidx54_i_3, false, label_forbody6_i_3);
+    StoreInst* void_217 = new StoreInst(packed_tmp48_i_3, ptr_arrayidx54_i_3, false, label_forbody6_i_3);
     BinaryOperator* int32_inc_i_3 = BinaryOperator::create(Instruction::Add, int32_j_0_reg2mem_0_i_3, const_int32_36, "inc.i.3", label_forbody6_i_3);
     ICmpInst* int1_cmp59_i_3 = new ICmpInst(ICmpInst::ICMP_SLT, int32_inc_i_3, int32_num_inputs_131, "cmp59.i.3", label_forbody6_i_3);
     new BranchInst(label_forbody6_i_3, label_from_array_exit_136, int1_cmp59_i_3, label_forbody6_i_3);
     
     // Resolve Forward References
-    fwdref_208->replaceAllUsesWith(packed_tmp48_i_3); delete fwdref_208;
-    fwdref_207->replaceAllUsesWith(int32_inc_i_3); delete fwdref_207;
-    fwdref_150->replaceAllUsesWith(packed_tmp48_i_162); delete fwdref_150;
-    fwdref_148->replaceAllUsesWith(int32_inc_i_165); delete fwdref_148;
-    fwdref_171->replaceAllUsesWith(packed_tmp31_i_178); delete fwdref_171;
-    fwdref_170->replaceAllUsesWith(int32_indvar_next22); delete fwdref_170;
-    fwdref_185->replaceAllUsesWith(int32_indvar_next20); delete fwdref_185;
-    fwdref_200->replaceAllUsesWith(packed_tmp48_i_1); delete fwdref_200;
-    fwdref_199->replaceAllUsesWith(int32_inc_i_1); delete fwdref_199;
-    fwdref_204->replaceAllUsesWith(packed_tmp48_i_2); delete fwdref_204;
-    fwdref_203->replaceAllUsesWith(int32_inc_i_2); delete fwdref_203;
+    fwdref_216->replaceAllUsesWith(packed_tmp48_i_3); delete fwdref_216;
+    fwdref_215->replaceAllUsesWith(int32_inc_i_3); delete fwdref_215;
+    fwdref_151->replaceAllUsesWith(packed_tmp48_i_163); delete fwdref_151;
+    fwdref_149->replaceAllUsesWith(int32_inc_i_166); delete fwdref_149;
+    fwdref_172->replaceAllUsesWith(packed_tmp31_i_179); delete fwdref_172;
+    fwdref_171->replaceAllUsesWith(int32_indvar_next23); delete fwdref_171;
+    fwdref_189->replaceAllUsesWith(int32_or); delete fwdref_189;
+    fwdref_191->replaceAllUsesWith(int32_indvar_next21); delete fwdref_191;
+    fwdref_208->replaceAllUsesWith(packed_tmp48_i_1); delete fwdref_208;
+    fwdref_207->replaceAllUsesWith(int32_inc_i_1); delete fwdref_207;
+    fwdref_212->replaceAllUsesWith(packed_tmp48_i_2); delete fwdref_212;
+    fwdref_211->replaceAllUsesWith(int32_inc_i_2); delete fwdref_211;
     
   }
   
index 517aa2e..4f98d75 100644 (file)
@@ -32,7 +32,6 @@
   */
 typedef __attribute__(( ocu_vector_type(4) )) float float4;
 
-
 extern float powf(float a, float b);
 
 inline float approx(float a, float b)
@@ -106,3 +105,11 @@ inline float4 vsin(float4 val)
    result.w = res;
    return result;
 }
+
+inline int kilp(float4 val)
+{
+   if (val.x < 0 || val.y < 0 || val.z < 0 || val.w < 0)
+      return 1;
+   else
+      return 0;
+}
index 6bdb311..03f7ac8 100644 (file)
@@ -239,6 +239,8 @@ int run_fragment_shader(float x, float y,
    float4  results[4][16];
    float4  temps[128];//MAX_PROGRAM_TEMPS
    struct ShaderInput args;
+   int mask = 0;
+   args.kilmask = 0;
 
    from_array(inputs, ainputs, 4, num_inputs);
    from_consts(consts, aconsts, num_consts);
@@ -248,7 +250,11 @@ int run_fragment_shader(float x, float y,
    for (int i = 0; i < 4; ++i) {
       args.inputs  = inputs[i];
       args.dests   = results[i];
+      mask = args.kilmask;
+      args.kilmask = 0;
       execute_shader(&args);
+      args.kilmask = mask | (args.kilmask << i);
+
       to_array(dests[i], args.dests, 2);
    }
    return ~args.kilmask;
index 71045fa..c33b9bf 100644 (file)
@@ -343,6 +343,20 @@ llvm::Value * Storage::element(Args arg, int idx, llvm::Value *indIdx )
    return elemIdx(val, idx, indIdx);
 }
 
+void Storage::setKilElement(llvm::Value *val)
+{
+   std::vector<Value*> indices;
+   indices.push_back(constantInt(0));
+   indices.push_back(constantInt(static_cast<int>(KilArg)));
+   GetElementPtrInst *elem = new GetElementPtrInst(m_INPUT,
+                                                   indices.begin(),
+                                                   indices.end(),
+                                                   name("kil_ptr"),
+                                                   m_block);
+   StoreInst *st = new StoreInst(val, elem, false, m_block);
+   st->setAlignment(8);
+}
+
 #endif //MESA_LLVM
 
 
index 7f1a8bf..8574f75 100644 (file)
@@ -71,6 +71,8 @@ public:
    llvm::Value *addrElement(int idx) const;
    void setAddrElement(int idx, llvm::Value *val, int mask);
 
+   void setKilElement(llvm::Value *val);
+
    llvm::Value *shuffleVector(llvm::Value *vec, int shuffle);
 
    llvm::Value *extractIndex(llvm::Value *vec);
index 9307ed2..5ea07f9 100644 (file)
@@ -202,7 +202,7 @@ shade_quad_llvm(struct quad_stage *qs,
    }
 #endif
 
-   /*quad->mask &=*/
+   quad->mask &=
       gallivm_fragment_shader_exec(llvm, fx, fy, dests, inputs,
                                    softpipe->mapped_constants[PIPE_SHADER_FRAGMENT],
                                    qss->samplers);