if (copy->dest == copy->src.value)
return;
+ /* We can swap lo/hi halves of a 32-bit register with a 32-bit extr */
+ if (copy->src.size == AGX_SIZE_16 &&
+ (copy->dest >> 1) == (copy->src.value >> 1)) {
+
+ assert(((copy->dest & 1) == (1 - (copy->src.value & 1))) &&
+ "no trivial swaps, and only 2 halves of a register");
+
+ /* r0 = extr r0, r0, #16
+ * = (((r0 << 32) | r0) >> 16) & 0xFFFFFFFF
+ * = (((r0 << 32) >> 16) & 0xFFFFFFFF) | (r0 >> 16)
+ * = (r0l << 16) | r0h
+ */
+ agx_index reg32 = agx_register(copy->dest & ~1, AGX_SIZE_32);
+ agx_extr_to(b, reg32, reg32, reg32, agx_immediate(16), 0);
+ return;
+ }
+
agx_index x = agx_register(copy->dest, copy->src.size);
agx_index y = copy->src;
} while (0)
static inline void
+extr_swap(agx_builder *b, agx_index x)
+{
+ x.size = AGX_SIZE_32;
+ agx_extr_to(b, x, x, x, agx_immediate(16), 0);
+}
+
+static inline void
xor_swap(agx_builder *b, agx_index x, agx_index y)
{
agx_xor_to(b, x, x, y);
{.dest = 1, .src = agx_register(0, AGX_SIZE_16)},
};
- CASE(test_2, {
- xor_swap(b, agx_register(0, AGX_SIZE_16), agx_register(1, AGX_SIZE_16));
- });
+ CASE(test_2, { extr_swap(b, agx_register(0, AGX_SIZE_16)); });
}
TEST_F(LowerParallelCopy, Cycle3)
{.dest = 2, .src = agx_register(0, AGX_SIZE_16)},
};
- /* XXX: requires 6 instructions. if we had a temp free, could do it in 4 */
CASE(test, {
- xor_swap(b, agx_register(0, AGX_SIZE_16), agx_register(1, AGX_SIZE_16));
+ extr_swap(b, agx_register(0, AGX_SIZE_16));
xor_swap(b, agx_register(1, AGX_SIZE_16), agx_register(2, AGX_SIZE_16));
});
}