+Mon Jul 13 16:15:10 1998 John Carr <jfc@mit.edu>
+
+ * sparc.c, sparc.h, sparc.md: New trampoline code.
+ Allow integer operand 1 to V8+ DImode shift instructions.
+ Fix bugs in V8+ wide multiply patterns.
+ In 32 bit mode, split DImode register moves and logical instructions.
+ Write V9 branch prediction flag.
+ Use V9 conditional move more often for scc.
+
Mon Jul 13 15:10:09 1998 Philippe De Muyter <phdm@macqel.be>
* invoke.texi(-fno-builtin): Explain that the names of built-in
int fix = offset - i * 4;
/* Back up to the appropriate place. */
- temp[1] = gen_rtx_CONST_INT (VOIDmode, -fix);
+ temp[1] = GEN_INT (-fix);
if (addreg0)
{
temp[0] = addreg0;
wordpart[i]);
/* Don't modify the register that's the destination of the
move. */
- temp[0] = gen_rtx_CONST_INT (VOIDmode, -(offset - fix));
+ temp[0] = GEN_INT (-(offset - fix));
if (addreg0 && REGNO (addreg0) != REGNO (wordpart[i][0]))
{
temp[1] = addreg0;
}
if (offset)
{
- temp[1] = gen_rtx_CONST_INT (VOIDmode, -offset);
+ temp[1] = GEN_INT (-offset);
/* Undo the adds we just did. */
if (addreg0)
{
rtx label = 0, next = insn;
int need_label = 0;
+ /* This code used to be called with final_sequence nonzero (for fpcc
+ delay slots), but that is no longer allowed. */
+ if (final_sequence)
+ abort ();
+
+ /* On UltraSPARC a conditional moves blocks until 3 cycles after prior loads
+ complete. It might be beneficial here to use branches if any recent
+ instructions were loads. */
+ if (TARGET_V9 && REGNO (operands[1]) == SPARC_ICC_REG)
+ return "mov 0,%0\n\tmov%C2 %x1,1,%0";
+
/* Try doing a jump optimization which jump.c can't do for us
because we did not expose that setcc works by using branches.
if (GET_CODE (next) == CODE_LABEL)
label = next;
next = NEXT_INSN (next);
- if (next == 0)
- break;
}
- while (GET_CODE (next) == NOTE || GET_CODE (next) == CODE_LABEL);
+ while (next && GET_CODE (next) == NOTE || GET_CODE (next) == CODE_LABEL);
- /* If we are in a sequence, and the following insn is a sequence also,
- then just following the current insn's next field will take us to the
- first insn of the next sequence, which is the wrong place. We don't
- want to optimize with a branch that has had its delay slot filled.
- Avoid this by verifying that NEXT_INSN (PREV_INSN (next)) == next
- which fails only if NEXT is such a branch. */
-
- if (next && GET_CODE (next) == JUMP_INSN && simplejump_p (next)
- && (! final_sequence || NEXT_INSN (PREV_INSN (next)) == next))
+ if (next && GET_CODE (next) == JUMP_INSN && simplejump_p (next))
label = JUMP_LABEL (next);
+
/* If not optimizing, jump label fields are not set. To be safe, always
check here to whether label is still zero. */
if (label == 0)
/* operands[3] is an unused slot. */
operands[3] = label;
- /* If we are in a delay slot, assume it is the delay slot of an fpcc
- insn since our type isn't allowed anywhere else. */
-
- /* ??? Fpcc instructions no longer have delay slots, so this code is
- probably obsolete. */
-
- /* The fastest way to emit code for this is an annulled branch followed
- by two move insns. This will take two cycles if the branch is taken,
- and three cycles if the branch is not taken.
-
- However, if we are in the delay slot of another branch, this won't work,
- because we can't put a branch in the delay slot of another branch.
- The above sequence would effectively take 3 or 4 cycles respectively
- since a no op would have be inserted between the two branches.
- In this case, we want to emit a move, annulled branch, and then the
- second move. This sequence always takes 3 cycles, and hence is faster
- when we are in a branch delay slot. */
-
- if (final_sequence)
- {
- strcpy (string, "mov 0,%0\n\t");
- strcat (string, output_cbranch (operands[2], 3, 0, 1, 0));
- strcat (string, "\n\tmov 1,%0");
- }
- else
- {
- strcpy (string, output_cbranch (operands[2], 3, 0, 1, 0));
- strcat (string, "\n\tmov 1,%0\n\tmov 0,%0");
- }
+ strcpy (string, output_cbranch (operands[2], 3, 0, 1, 0));
+ strcat (string, "\n\tmov 1,%0\n\tmov 0,%0");
if (need_label)
strcat (string, "\n%l3:");
ANNUL is non-zero if we should generate an annulling branch.
- NOOP is non-zero if we have to follow this branch by a noop. */
+ NOOP is non-zero if we have to follow this branch by a noop.
+
+ INSN, if set, is the insn. */
char *
-output_cbranch (op, label, reversed, annul, noop)
+output_cbranch (op, label, reversed, annul, noop, insn)
rtx op;
int label;
int reversed, annul, noop;
+ rtx insn;
{
static char string[20];
enum rtx_code code = GET_CODE (op);
if (annul)
strcat (string, ",a");
- /* ??? If v9, optional prediction bit ",pt" or ",pf" goes here. */
-
if (! TARGET_V9)
{
labeloff = 3;
}
else
{
+ rtx note;
+
+ if (insn && (note = find_reg_note (insn, REG_BR_PRED, NULL_RTX)))
+ strcat (string, INTVAL (XEXP (note, 0)) & ATTR_FLAG_likely ? ",pt" : ",pn");
+
labeloff = 9;
if (mode == CCFPmode || mode == CCFPEmode)
{
sparc_initialize_trampoline (tramp, fnaddr, cxt)
rtx tramp, fnaddr, cxt;
{
- rtx high_cxt = expand_shift (RSHIFT_EXPR, SImode, cxt,
- size_int (10), 0, 1);
- rtx high_fn = expand_shift (RSHIFT_EXPR, SImode, fnaddr,
- size_int (10), 0, 1);
- rtx low_cxt = expand_and (cxt, GEN_INT (0x3ff), 0);
- rtx low_fn = expand_and (fnaddr, GEN_INT (0x3ff), 0);
- rtx g1_sethi = gen_rtx_HIGH (SImode, GEN_INT (0x03000000));
- rtx g2_sethi = gen_rtx_HIGH (SImode, GEN_INT (0x05000000));
- rtx g1_ori = gen_rtx_HIGH (SImode, GEN_INT (0x82106000));
- rtx g2_ori = gen_rtx_HIGH (SImode, GEN_INT (0x8410A000));
- rtx tem = gen_reg_rtx (SImode);
- emit_move_insn (tem, g1_sethi);
- emit_insn (gen_iorsi3 (high_fn, high_fn, tem));
- emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, 0)), high_fn);
- emit_move_insn (tem, g1_ori);
- emit_insn (gen_iorsi3 (low_fn, low_fn, tem));
- emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, 4)), low_fn);
- emit_move_insn (tem, g2_sethi);
- emit_insn (gen_iorsi3 (high_cxt, high_cxt, tem));
- emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, 8)), high_cxt);
- emit_move_insn (tem, g2_ori);
- emit_insn (gen_iorsi3 (low_cxt, low_cxt, tem));
- emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, 16)), low_cxt);
+ /* SPARC 32 bit trampoline:
+
+ sethi %hi(fn),%g1
+ sethi %hi(static),%g2
+ jmp %g1+%lo(fn)
+ or %g2,%lo(static),%g2
+
+ SETHI i,r = 00rr rrr1 00ii iiii iiii iiii iiii iiii
+ JMPL r+i,d = 10dd ddd1 1100 0rrr rr1i iiii iiii iiii
+ */
+
+ emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, 0)),
+ expand_binop (SImode, ior_optab,
+ expand_shift (RSHIFT_EXPR, SImode, fnaddr,
+ size_int (10), 0, 1),
+ GEN_INT (0x03000000),
+ NULL_RTX, 1, OPTAB_DIRECT));
+
+ emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, 4)),
+ expand_binop (SImode, ior_optab,
+ expand_shift (RSHIFT_EXPR, SImode, cxt,
+ size_int (10), 0, 1),
+ GEN_INT (0x05000000),
+ NULL_RTX, 1, OPTAB_DIRECT));
+
+ emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, 8)),
+ expand_binop (SImode, ior_optab,
+ expand_and (fnaddr, GEN_INT (0x3ff), NULL_RTX),
+ GEN_INT (0x81c06000),
+ NULL_RTX, 1, OPTAB_DIRECT));
+
+ emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, 12)),
+ expand_binop (SImode, ior_optab,
+ expand_and (cxt, GEN_INT (0x3ff), NULL_RTX),
+ GEN_INT (0x8410a000),
+ NULL_RTX, 1, OPTAB_DIRECT));
+
emit_insn (gen_flush (validize_mem (gen_rtx_MEM (SImode, tramp))));
- emit_insn (gen_flush (validize_mem (gen_rtx_MEM (SImode,
- plus_constant (tramp, 8)))));
- emit_insn (gen_flush (validize_mem (gen_rtx_MEM (SImode,
- plus_constant (tramp, 16)))));
+ /* On UltraSPARC a flush flushes an entire cache line. The trampoline is
+ aligned on a 16 byte boundary so one flush clears it all. */
+ if (sparc_cpu != PROCESSOR_ULTRASPARC)
+ emit_insn (gen_flush (validize_mem (gen_rtx_MEM (SImode,
+ plus_constant (tramp, 8)))));
}
/* The 64 bit version is simpler because it makes more sense to load the
sparc64_initialize_trampoline (tramp, fnaddr, cxt)
rtx tramp, fnaddr, cxt;
{
- emit_move_insn (gen_rtx_MEM (DImode, plus_constant (tramp, 24)), cxt);
- emit_move_insn (gen_rtx_MEM (DImode, plus_constant (tramp, 32)), fnaddr);
+ /*
+ rd %pc,%g1
+ ldx [%g1+24],%g5
+ jmp %g5
+ ldx [%g1+16],%g5
+ +16 bytes data
+ */
+
+ emit_move_insn (gen_rtx_MEM (SImode, tramp),
+ GEN_INT (0x83414000));
+ emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, 4)),
+ GEN_INT (0xca586018));
+ emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, 8)),
+ GEN_INT (0x81c04000));
+ emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, 12)),
+ GEN_INT (0xca586010));
+ emit_move_insn (gen_rtx_MEM (DImode, plus_constant (tramp, 16)), cxt);
+ emit_move_insn (gen_rtx_MEM (DImode, plus_constant (tramp, 20)), fnaddr);
emit_insn (gen_flush (validize_mem (gen_rtx_MEM (DImode, tramp))));
- emit_insn (gen_flush (validize_mem (gen_rtx_MEM (DImode,
- plus_constant (tramp, 8)))));
- emit_insn (gen_flush (validize_mem (gen_rtx_MEM (DImode,
- plus_constant (tramp, 16)))));
- emit_insn (gen_flush (validize_mem (gen_rtx_MEM (DImode,
- plus_constant (tramp, 24)))));
- emit_insn (gen_flush (validize_mem (gen_rtx_MEM (DImode,
- plus_constant (tramp, 32)))));
+ if (sparc_cpu != PROCESSOR_ULTRASPARC)
+ emit_insn (gen_flush (validize_mem (gen_rtx_MEM (DImode, plus_constant (tramp, 8)))));
}
\f
/* Subroutines to support a flat (single) register window calling
if (GET_CODE (operands[3]) == SCRATCH)
operands[3] = operands[0];
- output_asm_insn ("sllx %H1,32,%3", operands);
- if (sparc_check_64 (operands[1], insn) <= 0)
- output_asm_insn ("srl %L1,0,%L1", operands);
- output_asm_insn ("or %L1,%3,%3", operands);
+ if (GET_CODE (operands[1]) == CONST_INT)
+ {
+ output_asm_insn ("mov %1,%3", operands);
+ }
+ else
+ {
+ output_asm_insn ("sllx %H1,32,%3", operands);
+ if (sparc_check_64 (operands[1], insn) <= 0)
+ output_asm_insn ("srl %L1,0,%L1", operands);
+ output_asm_insn ("or %L1,%3,%3", operands);
+ }
strcpy(asm_code, opcode);
if (which_alternative != 2)
#define EPILOGUE_USES(REGNO) \
(!TARGET_FLAT && REGNO == 31)
\f
-/* Output assembler code for a block containing the constant parts
- of a trampoline, leaving space for the variable parts. */
-
-/* On 32 bit sparcs, the trampoline contains five instructions:
- sethi #TOP_OF_FUNCTION,%g1
- or #BOTTOM_OF_FUNCTION,%g1,%g1
- sethi #TOP_OF_STATIC,%g2
- jmp g1
- or #BOTTOM_OF_STATIC,%g2,%g2
-
- On 64 bit sparcs, the trampoline contains 4 insns and two pseudo-immediate
- constants (plus some padding):
- rd %pc,%g1
- ldx[%g1+20],%g5
- ldx[%g1+28],%g1
- jmp %g1
- nop
- nop
- .xword context
- .xword function */
-/* ??? Stack is execute-protected in v9. */
-
-#define TRAMPOLINE_TEMPLATE(FILE) \
-do { \
- if (TARGET_ARCH64) \
- { \
- fprintf (FILE, "\trd %%pc,%%g1\n"); \
- fprintf (FILE, "\tldx [%%g1+24],%%g5\n"); \
- fprintf (FILE, "\tldx [%%g1+32],%%g1\n"); \
- fprintf (FILE, "\tjmp %%g1\n"); \
- fprintf (FILE, "\tnop\n"); \
- fprintf (FILE, "\tnop\n"); \
- /* -mmedlow shouldn't generate .xwords, so don't use them at all */ \
- fprintf (FILE, "\t.word 0,0,0,0\n"); \
- } \
- else \
- { \
- ASM_OUTPUT_INT (FILE, const0_rtx); \
- ASM_OUTPUT_INT (FILE, const0_rtx); \
- ASM_OUTPUT_INT (FILE, const0_rtx); \
- ASM_OUTPUT_INT (FILE, GEN_INT (0x81C04000)); \
- ASM_OUTPUT_INT (FILE, const0_rtx); \
- } \
-} while (0)
-
/* Length in units of the trampoline for entering a nested function. */
-#define TRAMPOLINE_SIZE (TARGET_ARCH64 ? 40 : 20)
+#define TRAMPOLINE_SIZE (TARGET_ARCH64 ? 32 : 16)
+
+#define TRAMPOLINE_ALIGNMENT 128 /* 16 bytes */
/* Emit RTL insns to initialize the variable parts of a trampoline.
FNADDR is an RTX for the address of the function's pure code.
void sparc_initialize_trampoline ();
void sparc64_initialize_trampoline ();
#define INITIALIZE_TRAMPOLINE(TRAMP, FNADDR, CXT) \
- do { \
if (TARGET_ARCH64) \
sparc64_initialize_trampoline (TRAMP, FNADDR, CXT); \
else \
- sparc_initialize_trampoline (TRAMP, FNADDR, CXT); \
- } while (0)
+ sparc_initialize_trampoline (TRAMP, FNADDR, CXT)
\f
/* Generate necessary RTL for __builtin_saveregs().
ARGLIST is the argument list; see expr.c. */
[(set_attr "type" "multi")
(set_attr "length" "3")])
+(define_split
+ [(set (match_operand:SI 0 "register_operand" "=r")
+ (match_operator:SI 2 "noov_compare_op"
+ [(match_operand 1 "icc_or_fcc_reg_operand" "")
+ (const_int 0)]))]
+ ;; 32 bit LTU/GEU are better implemented using addx/subx
+ "TARGET_V9 && REGNO (operands[1]) == SPARC_ICC_REG
+ && (GET_MODE (operands[1]) == CCXmode
+ || (GET_CODE (operands[2]) != LTU && GET_CODE (operands[2]) != GEU))"
+ [(set (match_dup 0) (const_int 0))
+ (set (match_dup 0)
+ (if_then_else:SI (match_op_dup:SI 2 [(match_dup 1) (const_int 0)])
+ (const_int 1)
+ (match_dup 0)))]
+ "")
+
(define_insn "*scc_di"
[(set (match_operand:DI 0 "register_operand" "=r")
(match_operator:DI 2 "noov_compare_op"
{
return output_cbranch (operands[0], 1, 0,
final_sequence && INSN_ANNULLED_BRANCH_P (insn),
- ! final_sequence);
+ ! final_sequence, insn);
}"
[(set_attr "type" "branch")])
{
return output_cbranch (operands[0], 1, 1,
final_sequence && INSN_ANNULLED_BRANCH_P (insn),
- ! final_sequence);
+ ! final_sequence, insn);
}"
[(set_attr "type" "branch")])
{
return output_cbranch (operands[1], 2, 0,
final_sequence && INSN_ANNULLED_BRANCH_P (insn),
- ! final_sequence);
+ ! final_sequence, insn);
}"
[(set_attr "type" "branch")])
{
return output_cbranch (operands[1], 2, 1,
final_sequence && INSN_ANNULLED_BRANCH_P (insn),
- ! final_sequence);
+ ! final_sequence, insn);
}"
[(set_attr "type" "branch")])
{
return output_cbranch (operands[1], 2, 0,
final_sequence && INSN_ANNULLED_BRANCH_P (insn),
- ! final_sequence);
+ ! final_sequence, insn);
}"
[(set_attr "type" "branch")])
{
return output_cbranch (operands[1], 2, 1,
final_sequence && INSN_ANNULLED_BRANCH_P (insn),
- ! final_sequence);
+ ! final_sequence, insn);
}"
[(set_attr "type" "branch")])
operands[2] = copy_rtx (operands[0]);
PUT_MODE (operands[2], SImode);")
+;; Split register to register moves.
+(define_split
+ [(set (match_operand:DI 0 "register_operand" "=r")
+ (match_operand:DI 1 "arith_double_operand" "rIN"))]
+ "! TARGET_ARCH64 && GET_CODE (operands[1]) == REG && REGNO (operands[0]) < 32
+ && (GET_CODE (operands[1]) != REG || REGNO (operands[1]) < 32)
+ && ! reg_overlap_mentioned_p (operands[0], operands[1])"
+ [(set (match_dup 2) (match_dup 4))
+ (set (match_dup 3) (match_dup 5))]
+ "operands[2] = gen_highpart (SImode, operands[0]);
+ operands[3] = gen_lowpart (SImode, operands[0]);
+ operands[4] = gen_highpart (SImode, operands[1]);
+ operands[5] = gen_lowpart (SImode, operands[1]);")
+
(define_insn "*movdi_sp32"
[(set (match_operand:DI 0 "reg_or_nonsymb_mem_operand" "=r,T,U,Q,r,r,?f,?f,?Q")
(match_operand:DI 1 "general_operand" "r,U,T,r,Q,i,f,Q,f"))]
if (which_alternative == 1)
return \"st %1,%0\";
if (sparc_check_64 (operands[1], insn) > 0)
- return \"nop\";
+ return final_sequence ? \"nop\" : \"\";
return \"srl %1,0,%0\";
"
[(set_attr "type" "shift,store")])
gen_rtx_PLUS (DImode, operands[1],
operands[2])),
gen_rtx_CLOBBER (VOIDmode,
- gen_rtx_REG (SImode, SPARC_ICC_REG)))));
+ gen_rtx_REG (CCmode, SPARC_ICC_REG)))));
DONE;
}
}")
[(set (match_operand:DI 0 "register_operand" "=r")
(plus:DI (match_operand:DI 1 "arith_double_operand" "%r")
(match_operand:DI 2 "arith_double_operand" "rHI")))
- (clobber (reg:SI 100))]
+ (clobber (reg:CC 100))]
"! TARGET_ARCH64"
"*
{
[(set (match_operand:DI 0 "register_operand" "=r")
(plus:DI (match_operand:DI 1 "arith_double_operand" "%r")
(match_operand:DI 2 "arith_double_operand" "rHI")))
- (clobber (reg:SI 100))]
+ (clobber (reg:CC 100))]
"! TARGET_ARCH64 && reload_completed"
[(parallel [(set (reg:CC_NOOV 100)
(compare:CC_NOOV (plus:SI (match_dup 4)
operands[5] = gen_lowpart (SImode, operands[2]);
operands[6] = gen_highpart (SImode, operands[0]);
operands[7] = gen_highpart (SImode, operands[1]);
- if (GET_CODE (operands[2]) == CONST_INT)
- operands[8] = INTVAL (operands[2]) < 0 ? constm1_rtx : const0_rtx;
- else
- operands[8] = gen_highpart (SImode, operands[2]);")
+ operands[8] = gen_highpart (SImode, operands[2]);")
(define_split
[(set (match_operand:DI 0 "register_operand" "=r")
(minus:DI (match_operand:DI 1 "arith_double_operand" "r")
(match_operand:DI 2 "arith_double_operand" "rHI")))
- (clobber (reg:SI 100))]
+ (clobber (reg:CC 100))]
"! TARGET_ARCH64 && reload_completed"
[(parallel [(set (reg:CC_NOOV 100)
(compare:CC_NOOV (minus:SI (match_dup 4)
[(set (match_operand:DI 0 "register_operand" "=r")
(plus:DI (zero_extend:DI (match_operand:SI 1 "register_operand" "r"))
(match_operand:DI 2 "register_operand" "r")))
- (clobber (reg:SI 100))]
+ (clobber (reg:CC 100))]
"! TARGET_ARCH64"
"addcc %L2,%1,%L0\;addx %H2,0,%H0"
[(set_attr "type" "multi")])
gen_rtx_MINUS (DImode, operands[1],
operands[2])),
gen_rtx_CLOBBER (VOIDmode,
- gen_rtx_REG (SImode, SPARC_ICC_REG)))));
+ gen_rtx_REG (CCmode, SPARC_ICC_REG)))));
DONE;
}
}")
[(set (match_operand:DI 0 "register_operand" "=r")
(minus:DI (match_operand:DI 1 "register_operand" "r")
(match_operand:DI 2 "arith_double_operand" "rHI")))
- (clobber (reg:SI 100))]
+ (clobber (reg:CC 100))]
"! TARGET_ARCH64"
"*
{
[(set (match_operand:DI 0 "register_operand" "=r")
(minus:DI (match_operand:DI 1 "register_operand" "r")
(zero_extend:DI (match_operand:SI 2 "register_operand" "r"))))
- (clobber (reg:SI 100))]
+ (clobber (reg:CC 100))]
"! TARGET_ARCH64"
"subcc %L1,%2,%L0\;addx %H1,0,%H0"
[(set_attr "type" "multi")])
smul %1,%2,%4\;srlx %4,%3,%0"
[(set_attr "length" "2")])
+;; The combiner changes TRUNCATE in the previous pattern to SUBREG.
+(define_insn ""
+ [(set (match_operand:SI 0 "register_operand" "=h,r")
+ (subreg:SI
+ (lshiftrt:DI
+ (mult:DI (sign_extend:DI (match_operand:SI 1 "register_operand" "r,r"))
+ (sign_extend:DI (match_operand:SI 2 "register_operand" "r,r")))
+ (match_operand:SI 3 "const_int_operand" "i,i"))
+ 1))
+ (clobber (match_scratch:SI 4 "=X,&h"))]
+ "TARGET_V8PLUS"
+ "@
+ smul %1,%2,%0\;srlx %0,%3,%0
+ smul %1,%2,%4\;srlx %4,%3,%0"
+ [(set_attr "length" "2")])
+
(define_insn "const_smulsi3_highpart_v8plus"
[(set (match_operand:SI 0 "register_operand" "=h,r")
(truncate:SI
(lshiftrt:DI (mult:DI (sign_extend:DI (match_operand:SI 1 "register_operand" "r,r"))
- (match_operand:SI 2 "register_operand" "r,r"))
+ (match_operand 2 "small_int" "i,i"))
(match_operand:SI 3 "const_int_operand" "i,i"))))
(clobber (match_scratch:SI 4 "=X,&h"))]
"TARGET_V8PLUS"
operands[4] = GEN_INT (~INTVAL (operands[2]) & 0xffffffff);
}")
+;; Split DImode logical operations requiring two instructions.
+(define_split
+ [(set (match_operand:DI 0 "register_operand" "")
+ (match_operator:DI 1 "cc_arithop" ; AND, IOR, XOR
+ [(match_operand:DI 2 "register_operand" "")
+ (match_operand:DI 3 "arith_double_operand" "")]))]
+ "! TARGET_ARCH64 && reload_completed
+ && GET_CODE (operands[0]) == REG && REGNO (operands[0]) < 32"
+ [(set (match_dup 4) (match_op_dup:SI 1 [(match_dup 6) (match_dup 8)]))
+ (set (match_dup 5) (match_op_dup:SI 1 [(match_dup 7) (match_dup 9)]))]
+ "operands[4] = gen_highpart (SImode, operands[0]);
+ operands[5] = gen_lowpart (SImode, operands[0]);
+ operands[6] = gen_highpart (SImode, operands[2]);
+ operands[7] = gen_lowpart (SImode, operands[2]);
+ operands[8] = gen_highpart (SImode, operands[3]);
+ operands[9] = gen_lowpart (SImode, operands[3]);")
+
(define_insn "*and_not_di_sp32"
[(set (match_operand:DI 0 "register_operand" "=r,b")
(and:DI (not:DI (match_operand:DI 1 "register_operand" "r,b"))
gen_rtx_SET (VOIDmode, operand0,
gen_rtx_NEG (DImode, operand1)),
gen_rtx_CLOBBER (VOIDmode,
- gen_rtx_REG (SImode, SPARC_ICC_REG)))));
+ gen_rtx_REG (CCmode, SPARC_ICC_REG)))));
DONE;
}
}")
(define_insn "*negdi2_sp32"
[(set (match_operand:DI 0 "register_operand" "=r")
(neg:DI (match_operand:DI 1 "register_operand" "r")))
- (clobber (reg:SI 100))]
+ (clobber (reg:CC 100))]
"! TARGET_ARCH64"
"*
{
(define_insn "ashldi3_v8plus"
[(set (match_operand:DI 0 "register_operand" "=&h,&h,r")
- (ashift:DI (match_operand:DI 1 "register_operand" "r,0,r")
+ (ashift:DI (match_operand:DI 1 "arith_operand" "rI,0,rI")
(match_operand:SI 2 "arith_operand" "rI,rI,rI")))
(clobber (match_scratch:SI 3 "=X,X,&h"))]
"TARGET_V8PLUS"
"*return sparc_v8plus_shift (operands, insn, \"sllx\");"
[(set_attr "length" "5,5,6")])
+;; Optimize (1LL<<x)-1
+(define_insn ""
+ [(set (match_operand:DI 0 "register_operand" "=h")
+ (plus:DI (ashift:DI (const_int 1)
+ (match_operand:SI 2 "arith_operand" "rI"))
+ (const_int -1)))]
+ "TARGET_V8PLUS"
+ "*
+{
+ if (GET_CODE (operands[2]) == REG && REGNO (operands[2]) == REGNO (operands[0]))
+ return \"mov 1,%L0\;sllx %L0,%2,%L0\;sub %L0,1,%L0\;srlx %L0,32,%H0\";
+ return \"mov 1,%H0\;sllx %H0,%2,%L0\;sub %L0,1,%L0\;srlx %L0,32,%H0\";
+}"
+ [(set_attr "length" "4")])
+
(define_insn "*cmp_cc_ashift_1"
[(set (reg:CC_NOOV 100)
(compare:CC_NOOV (ashift:SI (match_operand:SI 0 "register_operand" "r")
(define_insn "ashrdi3_v8plus"
[(set (match_operand:DI 0 "register_operand" "=&h,&h,r")
- (ashiftrt:DI (match_operand:DI 1 "register_operand" "r,0,r")
+ (ashiftrt:DI (match_operand:DI 1 "arith_operand" "rI,0,rI")
(match_operand:SI 2 "arith_operand" "rI,rI,rI")))
(clobber (match_scratch:SI 3 "=X,X,&h"))]
"TARGET_V8PLUS"
(define_insn "lshrdi3_v8plus"
[(set (match_operand:DI 0 "register_operand" "=&h,&h,r")
- (lshiftrt:DI (match_operand:DI 1 "register_operand" "r,0,r")
+ (lshiftrt:DI (match_operand:DI 1 "arith_operand" "rI,0,rI")
(match_operand:SI 2 "arith_operand" "rI,rI,rI")))
(clobber (match_scratch:SI 3 "=X,X,&h"))]
"TARGET_V8PLUS"
"TARGET_ARCH64 && short_branch (INSN_UID (insn), INSN_UID (operands[2])) && in_same_eh_region (insn, operands[2]) && in_same_eh_region (insn, ins1)"
"call %a0,%1\;add %%o7,(%l2-.-4),%%o7")
-;; Other miscellaneous peepholes.
-
-;; (reg:SI 100) is created by the {add,neg,sub}di patterns.
-(define_peephole
- [(parallel [(set (match_operand:SI 0 "register_operand" "=r")
- (minus:SI (match_operand:SI 1 "reg_or_0_operand" "rJ")
- (reg:SI 100)))
- (clobber (reg:CC 100))])
- (set (reg:CC 100) (compare (match_dup 0) (const_int 0)))]
- ""
- "subxcc %r1,0,%0")
-
;; After a nonlocal goto, we need to restore the PIC register, but only
;; if we need it. So do nothing much here, but we'll check for this in
;; finalize_pic.
case 't':
putc (' ', outfile);
- fprintf (outfile, HOST_WIDE_INT_PRINT_HEX,
- (HOST_WIDE_INT) XTREE (in_rtx, i));
+ fprintf (outfile, HOST_PTR_PRINTF, (char *) XTREE (in_rtx, i));
break;
case '*':