}
}
+ /* use 32-bit test instruction if there are no sign issues */
+ if (GET_CODE (operands[1]) == CONST_INT
+ && !(INTVAL (operands[1]) & ~0x7fff)
+ && i386_aligned_p (operands[0]))
+ return AS2 (test%L0,%1,%k0);
+
if (CONSTANT_P (operands[1]) || GET_CODE (operands[0]) == MEM)
return AS2 (test%W0,%1,%0);
if (REG_P (operands[0]))
{
- if (REG_P (operands[1]))
- return AS2 (mov%L0,%k1,%k0);
- else if (CONSTANT_P (operands[1]))
- return AS2 (mov%L0,%1,%k0);
+ if (i386_aligned_p (operands[1]))
+ {
+ operands[1] = i386_sext16_if_const (operands[1]);
+ return AS2 (mov%L0,%k1,%k0);
+ }
+ if (TARGET_PENTIUMPRO)
+ {
+ /* movzwl is faster than movw on the Pentium Pro,
+ * although not as fast as an aligned movl. */
+#ifdef INTEL_SYNTAX
+ return AS2 (movzx,%1,%k0);
+#else
+ return AS2 (movz%W0%L0,%1,%k0);
+#endif
+ }
}
return AS2 (mov%W0,%1,%0);
{
rtx link;
if (operands[1] == const0_rtx && REG_P (operands[0]))
- return AS2 (xor%B0,%0,%0);
+ return AS2 (xor%L0,%k0,%k0);
if (operands[1] == const1_rtx
&& (link = find_reg_note (insn, REG_WAS_0, 0))
{
xops[0] = operands[0];
xops[1] = gen_rtx (CONST_INT, VOIDmode, 0xffff);
- output_asm_insn (AS2 (mov%W0,%1,%w0),operands);
+ if (i386_aligned_p (operands[1]))
+ output_asm_insn (AS2 (mov%L0,%k1,%k0),operands);
+ else
+ output_asm_insn (AS2 (mov%W0,%1,%w0),operands);
output_asm_insn (AS2 (and%L0,%1,%k0), xops);
RET;
}
}")
(define_insn "addsidi3_2"
- [(set (match_operand:DI 0 "nonimmediate_operand" "=&r,r,o,&r,!&r,r,o,o,!o")
+ [(set (match_operand:DI 0 "nonimmediate_operand" "=&r,r,o,&r,!&r,&r,o,o,!o")
(plus:DI (zero_extend:DI (match_operand:SI 2 "general_operand" "o,ri,ri,o,o,ri,ri,i,r"))
(match_operand:DI 1 "general_operand" "0,0,0,iF,ro,roiF,riF,o,o")))
(clobber (match_scratch:SI 3 "=X,X,X,X,X,X,X,&r,&r"))]
if (operands[2] == constm1_rtx)
return AS1 (dec%L0,%0);
+ /* subl $-128,%ebx is smaller than addl $128,%ebx. */
+ if (GET_CODE (operands[2]) == CONST_INT
+ && INTVAL (operands[2]) == 128)
+ {
+ /* This doesn't compute the carry bit in the same way
+ * as add%L0, but we use inc and dec above and they
+ * don't set the carry bit at all. If inc/dec don't need
+ * a CC_STATUS_INIT, this doesn't either... */
+ operands[2] = GEN_INT (-128);
+ return AS2 (sub%L0,%2,%0);
+ }
+
return AS2 (add%L0,%2,%0);
}")
"*
{
/* ??? what about offsettable memory references? */
- if (QI_REG_P (operands[0])
+ if (!TARGET_PENTIUMPRO /* partial stalls are just too painful to risk. */
+ && QI_REG_P (operands[0])
&& GET_CODE (operands[2]) == CONST_INT
- && (INTVAL (operands[2]) & 0xff) == 0)
+ && (INTVAL (operands[2]) & 0xff) == 0
+ && i386_cc_probably_useless_p (insn))
{
int byteval = (INTVAL (operands[2]) >> 8) & 0xff;
CC_STATUS_INIT;
return AS2 (add%B0,%2,%h0);
}
+ /* Use a 32-bit operation when possible, to avoid the prefix penalty. */
+ if (REG_P (operands[0])
+ && i386_aligned_p (operands[2])
+ && i386_cc_probably_useless_p (insn))
+ {
+ CC_STATUS_INIT;
+
+ if (GET_CODE (operands[2]) == CONST_INT)
+ {
+ HOST_WIDE_INT intval = 0xffff & INTVAL (operands[2]);
+
+ if (intval == 1)
+ return AS1 (inc%L0,%k0);
+
+ if (intval == 0xffff)
+ return AS1 (dec%L0,%k0);
+
+ operands[2] = i386_sext16_if_const (operands[2]);
+ }
+ return AS2 (add%L0,%k2,%k0);
+ }
+
if (operands[2] == const1_rtx)
return AS1 (inc%W0,%0);
(minus:HI (match_operand:HI 1 "nonimmediate_operand" "0,0")
(match_operand:HI 2 "general_operand" "ri,rm")))]
"ix86_binary_operator_ok (MINUS, HImode, operands)"
- "* return AS2 (sub%W0,%2,%0);")
+ "*
+{
+ if (REG_P (operands[0])
+ && i386_aligned_p (operands[2])
+ && i386_cc_probably_useless_p (insn))
+ {
+ CC_STATUS_INIT;
+ operands[2] = i386_sext16_if_const (operands[2]);
+ return AS2 (sub%L0,%k2,%k0);
+ }
+ return AS2 (sub%W0,%2,%0);
+}")
(define_expand "subqi3"
[(set (match_operand:QI 0 "general_operand" "")
;; The `r' in `rm' for operand 3 looks redundant, but it causes
;; optional reloads to be generated if op 3 is a pseudo in a stack slot.
-;; ??? What if we only change one byte of an offsettable memory reference?
(define_insn "andsi3"
[(set (match_operand:SI 0 "nonimmediate_operand" "=rm,r")
(and:SI (match_operand:SI 1 "nonimmediate_operand" "%0,0")
""
"*
{
+ HOST_WIDE_INT intval;
if (!rtx_equal_p (operands[0], operands[1])
&& rtx_equal_p (operands[0], operands[2]))
{
operands[1] = operands[2];
operands[2] = tmp;
}
- if (GET_CODE (operands[2]) == CONST_INT
- && ! (GET_CODE (operands[0]) == MEM && MEM_VOLATILE_P (operands[0])))
+ switch (GET_CODE (operands[2]))
{
- if (INTVAL (operands[2]) == 0xffff && REG_P (operands[0])
+ case CONST_INT:
+ if (GET_CODE (operands[0]) == MEM && MEM_VOLATILE_P (operands[0]))
+ break;
+ intval = INTVAL (operands[2]);
+ /* zero-extend 16->32? */
+ if (intval == 0xffff && REG_P (operands[0])
&& (! REG_P (operands[1])
|| REGNO (operands[0]) != 0 || REGNO (operands[1]) != 0)
&& (!TARGET_ZERO_EXTEND_WITH_AND || ! rtx_equal_p (operands[0], operands[1])))
#endif
}
- if (INTVAL (operands[2]) == 0xff && REG_P (operands[0])
+ /* zero extend 8->32? */
+ if (intval == 0xff && REG_P (operands[0])
&& !(REG_P (operands[1]) && NON_QI_REG_P (operands[1]))
&& (! REG_P (operands[1])
|| REGNO (operands[0]) != 0 || REGNO (operands[1]) != 0)
#endif
}
- if (QI_REG_P (operands[0]) && ~(INTVAL (operands[2]) | 0xff) == 0)
- {
- CC_STATUS_INIT;
+ /* Check partial bytes.. non-QI-regs are not available */
+ if (REG_P (operands[0]) && ! QI_REG_P (operands[0]))
+ break;
- if (INTVAL (operands[2]) == 0xffffff00)
+ /* only low byte has zero bits? */
+ if (~(intval | 0xff) == 0)
+ {
+ intval &= 0xff;
+ if (REG_P (operands[0]))
{
- operands[2] = const0_rtx;
- return AS2 (mov%B0,%2,%b0);
+ if (intval == 0)
+ {
+ CC_STATUS_INIT;
+ return AS2 (xor%B0,%b0,%b0);
+ }
+
+ /* we're better off with the 32-bit version if reg != EAX */
+ /* the value is sign-extended in 8 bits */
+ if (REGNO (operands[0]) != 0 && (intval & 0x80))
+ break;
}
- operands[2] = GEN_INT (INTVAL (operands[2]) & 0xff);
+ CC_STATUS_INIT;
+
+ operands[2] = GEN_INT (intval);
+
+ if (intval == 0)
+ return AS2 (mov%B0,%2,%b0);
+
return AS2 (and%B0,%2,%b0);
}
- if (QI_REG_P (operands[0]) && ~(INTVAL (operands[2]) | 0xff00) == 0)
+ /* only second byte has zero? */
+ if (~(intval | 0xff00) == 0)
{
CC_STATUS_INIT;
- if (INTVAL (operands[2]) == 0xffff00ff)
+ intval = (intval >> 8) & 0xff;
+ operands[2] = GEN_INT (intval);
+ if (intval == 0)
{
- operands[2] = const0_rtx;
- return AS2 (mov%B0,%2,%h0);
+ if (REG_P (operands[0]))
+ return AS2 (xor%B0,%h0,%h0);
+ operands[0] = adj_offsettable_operand (operands[0], 1);
+ return AS2 (mov%B0,%2,%b0);
}
- operands[2] = GEN_INT ((INTVAL (operands[2]) >> 8) & 0xff);
- return AS2 (and%B0,%2,%h0);
+ if (REG_P (operands[0]))
+ return AS2 (and%B0,%2,%h0);
+
+ operands[0] = adj_offsettable_operand (operands[0], 1);
+ return AS2 (and%B0,%2,%b0);
}
- if (GET_CODE (operands[0]) == MEM && INTVAL (operands[2]) == 0xffff0000)
+ if (REG_P (operands[0]))
+ break;
+
+ /* third byte has zero bits? */
+ if (~(intval | 0xff0000) == 0)
+ {
+ intval = (intval >> 16) & 0xff;
+ operands[0] = adj_offsettable_operand (operands[0], 2);
+byte_and_operation:
+ CC_STATUS_INIT;
+ operands[2] = GEN_INT (intval);
+ if (intval == 0)
+ return AS2 (mov%B0,%2,%b0);
+ return AS2 (and%B0,%2,%b0);
+ }
+
+ /* fourth byte has zero bits? */
+ if (~(intval | 0xff000000) == 0)
+ {
+ intval = (intval >> 24) & 0xff;
+ operands[0] = adj_offsettable_operand (operands[0], 3);
+ goto byte_and_operation;
+ }
+
+ /* Low word is zero? */
+ if (intval == 0xffff0000)
{
+word_zero_and_operation:
+ CC_STATUS_INIT;
operands[2] = const0_rtx;
return AS2 (mov%W0,%2,%w0);
}
+
+ /* High word is zero? */
+ if (intval == 0x0000ffff)
+ {
+ operands[0] = adj_offsettable_operand (operands[0], 2);
+ goto word_zero_and_operation;
+ }
}
return AS2 (and%L0,%2,%0);
operands[2] = GEN_INT ((INTVAL (operands[2]) >> 8) & 0xff);
return AS2 (and%B0,%2,%h0);
}
+
+ /* use 32-bit ops on registers when there are no sign issues.. */
+ if (REG_P (operands[0]))
+ {
+ if (!(INTVAL (operands[2]) & ~0x7fff))
+ return AS2 (and%L0,%2,%k0);
+ }
+ }
+
+ if (REG_P (operands[0])
+ && i386_aligned_p (operands[2]))
+ {
+ CC_STATUS_INIT;
+ /* If op[2] is constant, we should zero-extend it and */
+ /* make a note that op[0] has been zero-extended, so */
+ /* that we could use 32-bit ops on it forthwith, but */
+ /* there is no such reg-note available. Instead we do */
+ /* a sign extension as that can result in shorter asm */
+ operands[2] = i386_sext16_if_const (operands[2]);
+ return AS2 (and%L0,%k2,%k0);
+ }
+
+ /* Use a 32-bit word with the upper bits set, invalidate CC */
+ if (GET_CODE (operands[2]) == CONST_INT
+ && i386_aligned_p (operands[0]))
+ {
+ HOST_WIDE_INT val = INTVAL (operands[2]);
+ CC_STATUS_INIT;
+ val |= ~0xffff;
+ if (val != INTVAL (operands[2]))
+ operands[2] = GEN_INT (val);
+ return AS2 (and%L0,%k2,%k0);
}
return AS2 (and%W0,%2,%0);
\f
;;- Bit set (inclusive or) instructions
-;; ??? What if we only change one byte of an offsettable memory reference?
+;; This optimizes known byte-wide operations to memory, and in some cases
+;; to QI registers.. Note that we don't want to use the QI registers too
+;; aggressively, because often the 32-bit register instruction is the same
+;; size, and likely to be faster on PentiumPro.
(define_insn "iorsi3"
[(set (match_operand:SI 0 "nonimmediate_operand" "=rm,r")
(ior:SI (match_operand:SI 1 "nonimmediate_operand" "%0,0")
""
"*
{
- if (GET_CODE (operands[2]) == CONST_INT
- && ! (GET_CODE (operands[0]) == MEM && MEM_VOLATILE_P (operands[0])))
+ HOST_WIDE_INT intval;
+ switch (GET_CODE (operands[2]))
{
- if ((! REG_P (operands[0]) || QI_REG_P (operands[0]))
- && (INTVAL (operands[2]) & ~0xff) == 0)
- {
- CC_STATUS_INIT;
+ case CONST_INT:
- if (INTVAL (operands[2]) == 0xff)
- return AS2 (mov%B0,%2,%b0);
+ if (REG_P (operands[0]) && ! QI_REG_P (operands[0]))
+ break;
+
+ /* don't try to optimize volatile accesses */
+ if (GET_CODE (operands[0]) == MEM && MEM_VOLATILE_P (operands[0]))
+ break;
+
+ intval = INTVAL (operands[2]);
+ if ((intval & ~0xff) == 0)
+ {
+ if (REG_P (operands[0]))
+ {
+ /* Do low byte access only for %eax or when high bit is set */
+ if (REGNO (operands[0]) != 0 && !(intval & 0x80))
+ break;
+ }
- return AS2 (or%B0,%2,%b0);
+byte_or_operation:
+ CC_STATUS_INIT;
+
+ if (intval != INTVAL (operands[2]))
+ operands[2] = GEN_INT (intval);
+
+ if (intval == 0xff)
+ return AS2 (mov%B0,%2,%b0);
+
+ return AS2 (or%B0,%2,%b0);
}
- if (QI_REG_P (operands[0]) && (INTVAL (operands[2]) & ~0xff00) == 0)
+ /* second byte? */
+ if ((intval & ~0xff00) == 0)
{
- CC_STATUS_INIT;
- operands[2] = GEN_INT (INTVAL (operands[2]) >> 8);
+ intval >>= 8;
- if (INTVAL (operands[2]) == 0xff)
- return AS2 (mov%B0,%2,%h0);
+ if (REG_P (operands[0]))
+ {
+ CC_STATUS_INIT;
+ operands[2] = GEN_INT (intval);
+ if (intval == 0xff)
+ return AS2 (mov%B0,%2,%h0);
- return AS2 (or%B0,%2,%h0);
+ return AS2 (or%B0,%2,%h0);
+ }
+
+ operands[0] = adj_offsettable_operand (operands[0], 1);
+ goto byte_or_operation;
+ }
+
+ if (REG_P (operands[0]))
+ break;
+
+ /* third byte? */
+ if ((intval & ~0xff0000) == 0)
+ {
+ intval >>= 16;
+ operands[0] = adj_offsettable_operand (operands[0], 2);
+ goto byte_or_operation;
+ }
+
+ /* fourth byte? */
+ if ((intval & ~0xff000000) == 0)
+ {
+ intval = (intval >> 24) & 0xff;
+ operands[0] = adj_offsettable_operand (operands[0], 3);
+ goto byte_or_operation;
}
}
""
"*
{
- if (GET_CODE (operands[2]) == CONST_INT
- && ! (GET_CODE (operands[0]) == MEM && MEM_VOLATILE_P (operands[0])))
+ HOST_WIDE_INT intval;
+ switch (GET_CODE (operands[2]))
{
- /* Can we ignore the upper byte? */
- if ((! REG_P (operands[0]) || QI_REG_P (operands[0]))
- && (INTVAL (operands[2]) & 0xff00) == 0)
- {
- CC_STATUS_INIT;
- if (INTVAL (operands[2]) & 0xffff0000)
- operands[2] = GEN_INT (INTVAL (operands[2]) & 0xffff);
+ case CONST_INT:
- if (INTVAL (operands[2]) == 0xff)
- return AS2 (mov%B0,%2,%b0);
+ if (REG_P (operands[0]) && ! QI_REG_P (operands[0]))
+ break;
+
+ /* don't try to optimize volatile accesses */
+ if (GET_CODE (operands[0]) == MEM && MEM_VOLATILE_P (operands[0]))
+ break;
+
+ intval = 0xffff & INTVAL (operands[2]);
+
+ if ((intval & 0xff00) == 0)
+ {
+ if (REG_P (operands[0]))
+ {
+ /* Do low byte access only for %eax or when high bit is set */
+ if (REGNO (operands[0]) != 0 && !(intval & 0x80))
+ break;
+ }
+
+byte_or_operation:
+ CC_STATUS_INIT;
+
+ if (intval == 0xff)
+ return AS2 (mov%B0,%2,%b0);
- return AS2 (or%B0,%2,%b0);
+ return AS2 (or%B0,%2,%b0);
}
- /* Can we ignore the lower byte? */
- /* ??? what about offsettable memory references? */
- if (QI_REG_P (operands[0])
- && (INTVAL (operands[2]) & 0xff) == 0)
+ /* high byte? */
+ if ((intval & 0xff) == 0)
{
- CC_STATUS_INIT;
- operands[2] = GEN_INT ((INTVAL (operands[2]) >> 8) & 0xff);
+ intval >>= 8;
+ operands[2] = GEN_INT (intval);
- if (INTVAL (operands[2]) == 0xff)
- return AS2 (mov%B0,%2,%h0);
+ if (REG_P (operands[0]))
+ {
+ CC_STATUS_INIT;
+ if (intval == 0xff)
+ return AS2 (mov%B0,%2,%h0);
+
+ return AS2 (or%B0,%2,%h0);
+ }
+
+ operands[0] = adj_offsettable_operand (operands[0], 1);
- return AS2 (or%B0,%2,%h0);
+ goto byte_or_operation;
}
}
+ if (REG_P (operands[0])
+ && i386_aligned_p (operands[2]))
+ {
+ CC_STATUS_INIT;
+ operands[2] = i386_sext16_if_const (operands[2]);
+ return AS2 (or%L0,%k2,%k0);
+ }
+
+ if (GET_CODE (operands[2]) == CONST_INT
+ && i386_aligned_p (operands[0]))
+ {
+ CC_STATUS_INIT;
+ intval = 0xffff & INTVAL (operands[2]);
+ if (intval != INTVAL (operands[2]))
+ operands[2] = GEN_INT (intval);
+ return AS2 (or%L0,%2,%k0);
+ }
+
return AS2 (or%W0,%2,%0);
}")
\f
;;- xor instructions
-;; ??? What if we only change one byte of an offsettable memory reference?
(define_insn "xorsi3"
[(set (match_operand:SI 0 "nonimmediate_operand" "=rm,r")
(xor:SI (match_operand:SI 1 "nonimmediate_operand" "%0,0")
""
"*
{
- if (GET_CODE (operands[2]) == CONST_INT
- && ! (GET_CODE (operands[0]) == MEM && MEM_VOLATILE_P (operands[0])))
+ HOST_WIDE_INT intval;
+ switch (GET_CODE (operands[2]))
{
- if ((! REG_P (operands[0]) || QI_REG_P (operands[0]))
- && (INTVAL (operands[2]) & ~0xff) == 0)
- {
- CC_STATUS_INIT;
+ case CONST_INT:
- if (INTVAL (operands[2]) == 0xff)
- return AS1 (not%B0,%b0);
+ if (REG_P (operands[0]) && ! QI_REG_P (operands[0]))
+ break;
- return AS2 (xor%B0,%2,%b0);
+ /* don't try to optimize volatile accesses */
+ if (GET_CODE (operands[0]) == MEM && MEM_VOLATILE_P (operands[0]))
+ break;
+
+ intval = INTVAL (operands[2]);
+ if ((intval & ~0xff) == 0)
+ {
+ if (REG_P (operands[0]))
+ {
+ /* Do low byte access only for %eax or when high bit is set */
+ if (REGNO (operands[0]) != 0 && !(intval & 0x80))
+ break;
+ }
+
+byte_xor_operation:
+ CC_STATUS_INIT;
+
+ if (intval == 0xff)
+ return AS1 (not%B0,%b0);
+
+ if (intval != INTVAL (operands[2]))
+ operands[2] = GEN_INT (intval);
+ return AS2 (xor%B0,%2,%b0);
}
- if (QI_REG_P (operands[0]) && (INTVAL (operands[2]) & ~0xff00) == 0)
+ /* second byte? */
+ if ((intval & ~0xff00) == 0)
{
- CC_STATUS_INIT;
- operands[2] = GEN_INT (INTVAL (operands[2]) >> 8);
+ intval >>= 8;
- if (INTVAL (operands[2]) == 0xff)
- return AS1 (not%B0,%h0);
+ if (REG_P (operands[0]))
+ {
+ CC_STATUS_INIT;
+ if (intval == 0xff)
+ return AS1 (not%B0,%h0);
- return AS2 (xor%B0,%2,%h0);
+ operands[2] = GEN_INT (intval);
+ return AS2 (xor%B0,%2,%h0);
+ }
+
+ operands[0] = adj_offsettable_operand (operands[0], 1);
+
+ goto byte_xor_operation;
+ }
+
+ if (REG_P (operands[0]))
+ break;
+
+ /* third byte? */
+ if ((intval & ~0xff0000) == 0)
+ {
+ intval >>= 16;
+ operands[0] = adj_offsettable_operand (operands[0], 2);
+ goto byte_xor_operation;
+ }
+
+ /* fourth byte? */
+ if ((intval & ~0xff000000) == 0)
+ {
+ intval = (intval >> 24) & 0xff;
+ operands[0] = adj_offsettable_operand (operands[0], 3);
+ goto byte_xor_operation;
}
}
}
}
+ if (REG_P (operands[0])
+ && i386_aligned_p (operands[2]))
+ {
+ CC_STATUS_INIT;
+ operands[2] = i386_sext16_if_const (operands[2]);
+ return AS2 (xor%L0,%k2,%k0);
+ }
+
+ if (GET_CODE (operands[2]) == CONST_INT
+ && i386_aligned_p (operands[0]))
+ {
+ HOST_WIDE_INT intval;
+ CC_STATUS_INIT;
+ intval = 0xffff & INTVAL (operands[2]);
+ if (intval != INTVAL (operands[2]))
+ operands[2] = GEN_INT (intval);
+ return AS2 (xor%L0,%2,%k0);
+ }
+
return AS2 (xor%W0,%2,%0);
}")