swr/rast: Fix alloca usage in jitter
authorGeorge Kyriazis <george.kyriazis@intel.com>
Thu, 5 Apr 2018 20:59:54 +0000 (15:59 -0500)
committerGeorge Kyriazis <george.kyriazis@intel.com>
Wed, 18 Apr 2018 15:51:38 +0000 (10:51 -0500)
Fix issue where temporary allocas were getting hoisted to function entry
unnecessarily. We now explicitly mark temporary allocas and skip hoisting
during the hoist pass. Shuold reduce stack usage.

Reviewed-by: Bruce Cherniak <bruce.cherniak@intel.com>
src/gallium/drivers/swr/rasterizer/jitter/builder.cpp
src/gallium/drivers/swr/rasterizer/jitter/builder.h
src/gallium/drivers/swr/rasterizer/jitter/builder_mem.cpp

index 53947c3..bd81560 100644 (file)
@@ -111,4 +111,21 @@ namespace SwrJit
         mSimdVectorIntTy = ArrayType::get(mSimdInt32Ty, 4);
         mSimdVectorTRTy = ArrayType::get(mSimdFP32Ty, 5);
     }
+
+    /// @brief Mark this alloca as temporary to avoid hoisting later on
+    void Builder::SetTempAlloca(Value* inst)
+    {
+        AllocaInst* pAlloca = dyn_cast<AllocaInst>(inst);
+        SWR_ASSERT(pAlloca, "Unexpected non-alloca instruction");
+        MDNode* N = MDNode::get(JM()->mContext, MDString::get(JM()->mContext, "is_temp_alloca"));
+        pAlloca->setMetadata("is_temp_alloca", N);
+    }
+
+    bool Builder::IsTempAlloca(Value* inst)
+    {
+        AllocaInst* pAlloca = dyn_cast<AllocaInst>(inst);
+        SWR_ASSERT(pAlloca, "Unexpected non-alloca instruction");
+
+        return (pAlloca->getMetadata("is_temp_alloca") != nullptr);
+    }
 }
index 4c79bab..27a32bc 100644 (file)
@@ -96,6 +96,8 @@ namespace SwrJit
         Type*                mSimd32Int8Ty;
 
         void SetTargetWidth(uint32_t width);
+        void SetTempAlloca(Value* inst);
+        bool IsTempAlloca(Value* inst);
 
 #include "gen_builder.hpp"
 #include "gen_builder_meta.hpp"
index c5f0b2b..eccf0ad 100644 (file)
@@ -229,6 +229,7 @@ namespace SwrJit
 
             // store vSrc on the stack.  this way we can select between a valid load address and the vSrc address
             Value* vSrcPtr = ALLOCA(vSrc->getType());
+            SetTempAlloca(vSrcPtr);
             STORE(vSrc, vSrcPtr);
 
             vGather = UndefValue::get(VectorType::get(mDoubleTy, 4));