int pos = 0;
int i;
+ /* When allocano cost of GENERAL_REGS is same as MASK_REGS, allocate
+ MASK_REGS first since it has already been disparaged. This is for
+ testcase bitwise_mask_op3.c where the input is allocated as mask
+ registers, then mask bitwise instructions should be used there.
+ Refer to pr101142. */
+ /* Mask register. */
+ for (i = FIRST_MASK_REG; i <= LAST_MASK_REG; i++)
+ reg_alloc_order [pos++] = i;
+
/* First allocate the local general purpose registers. */
for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
if (GENERAL_REGNO_P (i) && call_used_or_fixed_reg_p (i))
for (i = FIRST_EXT_REX_SSE_REG; i <= LAST_EXT_REX_SSE_REG; i++)
reg_alloc_order [pos++] = i;
- /* Mask register. */
- for (i = FIRST_MASK_REG; i <= LAST_MASK_REG; i++)
- reg_alloc_order [pos++] = i;
-
/* x87 registers. */
if (TARGET_SSE_MATH)
for (i = FIRST_STACK_REG; i <= LAST_STACK_REG; i++)
})
(define_insn "*anddi_1"
- [(set (match_operand:DI 0 "nonimmediate_operand" "=r,rm,r,r,k")
+ [(set (match_operand:DI 0 "nonimmediate_operand" "=r,rm,r,r,?k")
(and:DI
(match_operand:DI 1 "nonimmediate_operand" "%0,0,0,qm,k")
(match_operand:DI 2 "x86_64_szext_general_operand" "Z,re,m,L,k")))
(set_attr "mode" "SI")])
(define_insn "*and<mode>_1"
- [(set (match_operand:SWI24 0 "nonimmediate_operand" "=rm,r,Ya,k")
+ [(set (match_operand:SWI24 0 "nonimmediate_operand" "=rm,r,Ya,?k")
(and:SWI24 (match_operand:SWI24 1 "nonimmediate_operand" "%0,0,qm,k")
(match_operand:SWI24 2 "<general_operand>" "r<i>,m,L,k")))
(clobber (reg:CC FLAGS_REG))]
(set_attr "mode" "<MODE>,<MODE>,SI,<MODE>")])
(define_insn "*andqi_1"
- [(set (match_operand:QI 0 "nonimmediate_operand" "=qm,q,r,k")
+ [(set (match_operand:QI 0 "nonimmediate_operand" "=qm,q,r,?k")
(and:QI (match_operand:QI 1 "nonimmediate_operand" "%0,0,0,k")
(match_operand:QI 2 "general_operand" "qn,m,rn,k")))
(clobber (reg:CC FLAGS_REG))]
})
(define_insn "*andn<mode>_1"
- [(set (match_operand:SWI48 0 "register_operand" "=r,r,k")
+ [(set (match_operand:SWI48 0 "register_operand" "=r,r,?k")
(and:SWI48
(not:SWI48 (match_operand:SWI48 1 "register_operand" "r,r,k"))
(match_operand:SWI48 2 "nonimmediate_operand" "r,m,k")))
(set_attr "mode" "<MODE>")])
(define_insn "*andn<mode>_1"
- [(set (match_operand:SWI12 0 "register_operand" "=r,k")
+ [(set (match_operand:SWI12 0 "register_operand" "=r,?k")
(and:SWI12
(not:SWI12 (match_operand:SWI12 1 "register_operand" "r,k"))
(match_operand:SWI12 2 "register_operand" "r,k")))
})
(define_insn "*<code><mode>_1"
- [(set (match_operand:SWI248 0 "nonimmediate_operand" "=rm,r,k")
+ [(set (match_operand:SWI248 0 "nonimmediate_operand" "=rm,r,?k")
(any_or:SWI248
(match_operand:SWI248 1 "nonimmediate_operand" "%0,0,k")
(match_operand:SWI248 2 "<general_operand>" "r<i>,m,k")))
(set_attr "mode" "SI")])
(define_insn "*<code>qi_1"
- [(set (match_operand:QI 0 "nonimmediate_operand" "=qm,q,r,k")
+ [(set (match_operand:QI 0 "nonimmediate_operand" "=qm,q,r,?k")
(any_or:QI (match_operand:QI 1 "nonimmediate_operand" "%0,0,0,k")
(match_operand:QI 2 "general_operand" "qn,m,rn,k")))
(clobber (reg:CC FLAGS_REG))]
"split_double_mode (DImode, &operands[0], 2, &operands[0], &operands[2]);")
(define_insn "*one_cmpl<mode>2_1"
- [(set (match_operand:SWI248 0 "nonimmediate_operand" "=rm,k")
+ [(set (match_operand:SWI248 0 "nonimmediate_operand" "=rm,?k")
(not:SWI248 (match_operand:SWI248 1 "nonimmediate_operand" "0,k")))]
"ix86_unary_operator_ok (NOT, <MODE>mode, operands)"
"@
(set_attr "mode" "<MODE>")])
(define_insn "*one_cmplsi2_1_zext"
- [(set (match_operand:DI 0 "register_operand" "=r,k")
+ [(set (match_operand:DI 0 "register_operand" "=r,?k")
(zero_extend:DI
(not:SI (match_operand:SI 1 "register_operand" "0,k"))))]
"TARGET_64BIT && ix86_unary_operator_ok (NOT, SImode, operands)"
(set_attr "mode" "SI,SI")])
(define_insn "*one_cmplqi2_1"
- [(set (match_operand:QI 0 "nonimmediate_operand" "=qm,r,k")
+ [(set (match_operand:QI 0 "nonimmediate_operand" "=qm,r,?k")
(not:QI (match_operand:QI 1 "nonimmediate_operand" "0,0,k")))]
"ix86_unary_operator_ok (NOT, QImode, operands)"
"@
/* { dg-do compile } */
-/* { dg-options "-O2 -march=skylake-avx512" } */
-
-#ifndef DTYPE
-#define DTYPE u32
-#endif
+/* { dg-options "-O2 -march=skylake-avx512 -DDTYPE32" } */
typedef unsigned long long u64;
typedef unsigned int u32;
typedef unsigned short u16;
typedef unsigned char u8;
+#ifdef DTYPE32
+typedef u32 DTYPE;
+#define byteswap byteswapu32
+#endif
+
+#ifdef DTYPE16
+typedef u16 DTYPE;
+#define byteswap byteswapu16
+#endif
+
+#ifdef DTYPE8
+typedef u16 DTYPE;
+#define byteswap byteswapu8
+#endif
+
+#ifdef DTYPE64
+typedef u16 DTYPE;
+#define byteswap byteswapu64
+#endif
+
#define R(x,n) ( (x >> n) | (x << (32 - n)))
#define S0(x) (R(x, 2) ^ R(x,13) ^ R(x,22))
d += tmp1; \
}
-static inline DTYPE byteswap(DTYPE x)
+static inline u32 byteswapu32(u32 x)
{
- x = (x & 0x0000FFFF) << 16 | (x & 0xFFFF0000) >> 16;
- x = (x & 0x00FF00FF) << 8 | (x & 0xFF00FF00) >> 8;
- return x;
+ x = (x & 0x0000FFFF) << 16 | (x & 0xFFFF0000) >> 16;
+ x = (x & 0x00FF00FF) << 8 | (x & 0xFF00FF00) >> 8;
+ return x;
}
-#define BE_LOAD32(n,b,i) (n) = byteswap(*(DTYPE *)(b + i))
+static inline u16 byteswapu16(u16 x)
+{
+ x = (x & 0x00FF) << 8 | (x & 0xFF00) >> 8;
+ return x;
+}
+
+static inline u8 byteswapu8(u8 x)
+{
+ return x;
+}
+
+static inline u64 byteswapu64(u64 x)
+{
+ x = ((u64)(byteswapu32 (x & 0x00000000FFFFFFFF))) << 32 | byteswapu32((x & 0xFFFFFFFF00000000) >> 32);
+ return x;
+}
-void foo (u8 *in, DTYPE out[8], const DTYPE C[16])
+void foo (DTYPE in[16], DTYPE out[8], const DTYPE C[16])
{
DTYPE tmp1 = 0, tmp2 = 0, a, b, c, d, e, f, g, h;
DTYPE w0, w1, w2, w3, w4, w5, w6, w7,
w8, w9, w10, w11, w12, w13, w14, w15;
- w0 = byteswap(*(DTYPE *)(in + 0));
- w1 = byteswap(*(DTYPE *)(in + 4));
- w2 = byteswap(*(DTYPE *)(in + 8));
- w3 = byteswap(*(DTYPE *)(in + 12));
- w4 = byteswap(*(DTYPE *)(in + 16));
- w5 = byteswap(*(DTYPE *)(in + 20));
- w6 = byteswap(*(DTYPE *)(in + 24));
- w7 = byteswap(*(DTYPE *)(in + 28));
- w8 = byteswap(*(DTYPE *)(in + 32));
- w9 = byteswap(*(DTYPE *)(in + 36));
- w10 = byteswap(*(DTYPE *)(in + 40));
- w11 = byteswap(*(DTYPE *)(in + 44));
- w12 = byteswap(*(DTYPE *)(in + 48));
- w13 = byteswap(*(DTYPE *)(in + 52));
- w14 = byteswap(*(DTYPE *)(in + 56));
- w15 = byteswap(*(DTYPE *)(in + 60));
+ w0 = byteswap(in[0]);
+ w1 = byteswap(in[1]);
+ w2 = byteswap(in[2]);
+ w3 = byteswap(in[3]);
+ w4 = byteswap(in[4]);
+ w5 = byteswap(in[5]);
+ w6 = byteswap(in[6]);
+ w7 = byteswap(in[7]);
+ w8 = byteswap(in[8]);
+ w9 = byteswap(in[9]);
+ w10 = byteswap(in[10]);
+ w11 = byteswap(in[11]);
+ w12 = byteswap(in[12]);
+ w13 = byteswap(in[13]);
+ w14 = byteswap(in[14]);
+ w15 = byteswap(in[15]);
a = out[0];
b = out[1];
c = out[2];
}
/* { dg-final { scan-assembler "kmovd" } } */
+/* { dg-final { scan-assembler-not "knot" } } */
+/* { dg-final { scan-assembler-not "kxor" } } */
+/* { dg-final { scan-assembler-not "kor" } } */
+/* { dg-final { scan-assembler-not "kandn" } } */
/* { dg-do compile } */
-/* { dg-options "-O2 -march=skylake-avx512" } */
-
-#ifndef DTYPE
-#define DTYPE u16
-#endif
+/* { dg-options "-O2 -march=skylake-avx512 -DDTYPE16" } */
#include "spill_to_mask-1.c"
-/* { dg-final { scan-assembler "kmovw" } } */
+/* { dg-final { scan-assembler-not "knot" } } */
+/* { dg-final { scan-assembler-not "kxor" } } */
+/* { dg-final { scan-assembler-not "kor" } } */
+/* { dg-final { scan-assembler-not "kandn" } } */
/* { dg-do compile } */
-/* { dg-options "-O2 -march=skylake-avx512" } */
-
-#ifndef DTYPE
-#define DTYPE u8
-#endif
+/* { dg-options "-O2 -march=skylake-avx512 -DDTYPE8" } */
#include "spill_to_mask-1.c"
-/* { dg-final { scan-assembler "kmovb" } } */
+/* { dg-final { scan-assembler-not "knot" } } */
+/* { dg-final { scan-assembler-not "kxor" } } */
+/* { dg-final { scan-assembler-not "kor" } } */
+/* { dg-final { scan-assembler-not "kandn" } } */
/* { dg-do compile { target { ! ia32 } } } */
-/* { dg-options "-O2 -march=skylake-avx512" } */
-
-#ifndef DTYPE
-#define DTYPE u64
-#endif
+/* { dg-options "-O2 -march=skylake-avx512 -DDTYPE64" } */
#include "spill_to_mask-1.c"
-/* { dg-final { scan-assembler "kmovq" } } */
+/* { dg-final { scan-assembler-not "knot" } } */
+/* { dg-final { scan-assembler-not "kxor" } } */
+/* { dg-final { scan-assembler-not "kor" } } */
+/* { dg-final { scan-assembler-not "kandn" } } */