From 77a45576efa853c52feb48560ab1c7a6210b5878 Mon Sep 17 00:00:00 2001
From: Krzysztof Parzyszek <kparzysz@codeaurora.org>
Date: Thu, 8 Dec 2016 20:33:45 +0000
Subject: [PATCH] [RDF] Fix incorrect lane mask calculation

This was exposed by some code that used more than one level of sub-
registers. There is no testcase, because there is no such code in the
Hexagon backend.

llvm-svn: 289099
---
 llvm/lib/Target/Hexagon/RDFGraph.cpp | 38 +++++++++++++++++++++++++++++-------
 1 file changed, 31 insertions(+), 7 deletions(-)

diff --git a/llvm/lib/Target/Hexagon/RDFGraph.cpp b/llvm/lib/Target/Hexagon/RDFGraph.cpp
index c696eb6..963b04b 100644
--- a/llvm/lib/Target/Hexagon/RDFGraph.cpp
+++ b/llvm/lib/Target/Hexagon/RDFGraph.cpp
@@ -651,10 +651,10 @@ RegisterRef RegisterAggr::normalize(RegisterRef RR) const {
     SuperReg = *SR;
   }
 
-  uint32_t Sub = TRI.getSubRegIndex(SuperReg, RR.Reg);
   const TargetRegisterClass &RC = *TRI.getMinimalPhysRegClass(RR.Reg);
-  LaneBitmask SuperMask = RR.Mask &
-                          TRI.composeSubRegIndexLaneMask(Sub, RC.LaneMask);
+  LaneBitmask Common = RR.Mask & RC.LaneMask;
+  uint32_t Sub = TRI.getSubRegIndex(SuperReg, RR.Reg);
+  LaneBitmask SuperMask = TRI.composeSubRegIndexLaneMask(Sub, Common);
   return RegisterRef(SuperReg, SuperMask);
 }
 
@@ -1206,12 +1206,36 @@ bool DataFlowGraph::alias(RegisterRef RA, RegisterRef RB) const {
   while (UMA.isValid() && UMB.isValid()) {
     std::pair<uint32_t,LaneBitmask> PA = *UMA;
     std::pair<uint32_t,LaneBitmask> PB = *UMB;
-    // If the returned lane mask is 0, it should be treated as ~0
-    // (or the lane mask from the given register ref should be ignored).
-    // This can happen when a register has only one unit.
     if (PA.first == PB.first) {
-      if (!PA.second || !PB.second || (PA.second & PB.second))
+      // Lane mask of 0 (given by the iterator) should be treated as "full".
+      // This can happen when the register has only one unit, or when the
+      // unit corresponds to explicit aliasing. In such cases, the lane mask
+      // from RegisterRef should be ignored.
+      if (!PA.second || !PB.second)
         return true;
+
+      // At this point the common unit corresponds to a subregister. The lane
+      // masks correspond to the lane mask of that unit within the original
+      // register, for example assuming register quadruple q0 = r3:0, and
+      // a register pair d1 = r3:2, the lane mask of r2 in q0 may be 0b0100,
+      // while the lane mask of r2 in d1 may be 0b0001.
+      LaneBitmask LA = PA.second & RA.Mask;
+      LaneBitmask LB = PB.second & RB.Mask;
+      if (LA != 0 && LB != 0) {
+        unsigned Root = *MCRegUnitRootIterator(PA.first, &TRI);
+        // If register units were guaranteed to only have 1 bit in any lane
+        // mask, the code below would not be necessary. This is because LA
+        // and LB would have at most 1 bit set each, and that bit would be
+        // guaranteed to correspond to the given register unit.
+        uint32_t SubA = TRI.getSubRegIndex(RA.Reg, Root);
+        uint32_t SubB = TRI.getSubRegIndex(RB.Reg, Root);
+        const TargetRegisterClass &RC = *TRI.getMinimalPhysRegClass(Root);
+        LaneBitmask MaskA = TRI.reverseComposeSubRegIndexLaneMask(SubA, LA);
+        LaneBitmask MaskB = TRI.reverseComposeSubRegIndexLaneMask(SubB, LB);
+        if (MaskA & MaskB & RC.LaneMask)
+          return true;
+      }
+
       ++UMA;
       ++UMB;
       continue;
-- 
2.7.4