eliminated during reloading in favor of either the stack or frame
pointer. */
-#define FIRST_PSEUDO_REGISTER 19
+#define FIRST_PSEUDO_REGISTER 20
/* Number of hardware registers that go into the DWARF-2 unwind info.
If not defined, equals FIRST_PSEUDO_REGISTER. */
and are not available for the register allocator.
On the 80386, the stack pointer is such, as is the arg pointer. */
#define FIXED_REGISTERS \
-/*ax,dx,cx,bx,si,di,bp,sp,st,st1,st2,st3,st4,st5,st6,st7,arg,flags,fpsr*/ \
-{ 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0 }
+/*ax,dx,cx,bx,si,di,bp,sp,st,st1,st2,st3,st4,st5,st6,st7,arg,flags,fpsr, dir*/ \
+{ 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0 }
/* 1 for registers not available across function calls.
These must include the FIXED_REGISTERS and also any
Aside from that, you can include as many other registers as you like. */
#define CALL_USED_REGISTERS \
-/*ax,dx,cx,bx,si,di,bp,sp,st,st1,st2,st3,st4,st5,st6,st7,arg,flags,fpsr*/ \
-{ 1, 1, 1, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1 }
+/*ax,dx,cx,bx,si,di,bp,sp,st,st1,st2,st3,st4,st5,st6,st7,arg,flags,fpsr, dir*/ \
+{ 1, 1, 1, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1 }
/* Order in which to allocate registers. Each register must be
listed once, even those in FIXED_REGISTERS. List frame pointer
generated by allocating edx first, so restore the 'natural' order of things. */
#define REG_ALLOC_ORDER \
-/*ax,dx,cx,bx,si,di,bp,sp,st,st1,st2,st3,st4,st5,st6,st7,arg,cc,fpsr*/ \
-{ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16,17, 18 }
+/*ax,dx,cx,bx,si,di,bp,sp,st,st1,st2,st3,st4,st5,st6,st7,arg,cc,fpsr, dir*/ \
+{ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16,17, 18, 19 }
/* A C statement (sans semicolon) to choose the order in which to
allocate hard registers for pseudo-registers local to a basic
#define FLAGS_REG 17
#define FPSR_REG 18
+#define DIRFLAG_REG 19
/* Value should be nonzero if functions must have frame pointers.
Zero means the frame pointer need not be set up (and parms
do { \
(CLOBBERS) = tree_cons (NULL_TREE, build_string (5, "flags"), (CLOBBERS));\
(CLOBBERS) = tree_cons (NULL_TREE, build_string (4, "fpsr"), (CLOBBERS)); \
+ (CLOBBERS) = tree_cons (NULL_TREE, build_string (7, "dirflag"), (CLOBBERS)); \
} while (0)
\f
/* Stack layout; function entry, exit and calling. */
#define HI_REGISTER_NAMES \
{"ax","dx","cx","bx","si","di","bp","sp", \
"st","st(1)","st(2)","st(3)","st(4)","st(5)","st(6)","st(7)","", \
- "flags","fpsr" }
+ "flags","fpsr", "dirflag" }
#define REGISTER_NAMES HI_REGISTER_NAMES
fprintf (FILE, "%d ", REGNO (X)); \
if (REGNO (X) == FLAGS_REG) \
{ fputs ("flags", FILE); break; } \
+ if (REGNO (X) == DIRFLAG_REG) \
+ { fputs ("dirflag", FILE); break; } \
if (REGNO (X) == FPSR_REG) \
{ fputs ("fpsr", FILE); break; } \
if (REGNO (X) == ARG_POINTER_REGNUM) \
;; A basic instruction type. Refinements due to arguments to be
;; provided in other attributes.
(define_attr "type"
- "other,multi,alu1,negnot,alu,icmp,imov,imovx,lea,incdec,ishift,imul,idiv,ibr,setcc,push,pop,call,callv,icmov,fmov,fop,fop1,fsgn,fmul,fdiv,fpspc,fcmov,fcmp,fxch"
+ "other,multi,alu1,negnot,alu,icmp,imov,imovx,lea,incdec,ishift,imul,idiv,ibr,setcc,push,pop,call,callv,icmov,fmov,fop,fop1,fsgn,fmul,fdiv,fpspc,fcmov,fcmp,fxch,str,cld"
(const_string "other"))
;; The (bounding maximum) length of an instruction in bytes.
(define_attr "length_opcode" ""
(cond [(eq_attr "type" "imovx,setcc,icmov")
(const_int 3)
+ (eq_attr "type" "str,cld")
+ (const_int 1)
(and (eq_attr "type" "incdec")
(ior (match_operand:SI 1 "register_operand" "")
(match_operand:HI 1 "register_operand" "")))
;; if the instruction is complex.
(define_attr "memory" "none,load,store,both,unknown"
- (cond [(eq_attr "type" "other,multi")
+ (cond [(eq_attr "type" "other,multi,str")
(const_string "unknown")
- (eq_attr "type" "lea,fcmov,fpspc")
+ (eq_attr "type" "lea,fcmov,fpspc,cld")
(const_string "none")
(eq_attr "type" "push")
(if_then_else (match_operand 1 "memory_operand" "")
(eq_attr "type" "imul"))
11 11)
+;; Rep movs takes minimally 12 cycles.
+(define_function_unit "pent_np" 1 0
+ (and (eq_attr "cpu" "pentium")
+ (eq_attr "type" "str"))
+ 12 12)
+
; ??? IDIV for SI takes 46 cycles, for HI 30, for QI 22
(define_function_unit "pent_np" 1 0
(and (eq_attr "cpu" "pentium")
(eq_attr "memory" "store"))))
2 2)
+(define_function_unit "pent_np" 1 0
+ (and (eq_attr "cpu" "pentium")
+ (eq_attr "type" "cld"))
+ 2 2)
+
(define_function_unit "fpu" 1 0
(and (eq_attr "cpu" "pentium")
(and (eq_attr "type" "fmov")
;; cycles to decode in decoder 0.
(define_attr "ppro_uops" "one,few,many"
- (cond [(eq_attr "type" "other,multi,call,callv,fpspc")
+ (cond [(eq_attr "type" "other,multi,call,callv,fpspc,str")
(const_string "many")
- (eq_attr "type" "icmov,fcmov")
+ (eq_attr "type" "icmov,fcmov,str,cld")
(const_string "few")
(eq_attr "type" "imov")
(if_then_else (eq_attr "memory" "store,both")
(define_function_unit "ppro_p0" 1 0
(and (eq_attr "cpu" "pentiumpro")
- (eq_attr "type" "ishift,lea,ibr"))
+ (eq_attr "type" "ishift,lea,ibr,cld"))
1 1)
(define_function_unit "ppro_p0" 1 0
;; Shift instructions and certain arithmetic are issued only to X pipe.
(define_function_unit "k6_alux" 1 0
(and (eq_attr "cpu" "k6")
- (eq_attr "type" "ishift,alu1,negnot"))
+ (eq_attr "type" "ishift,alu1,negnot,cld"))
1 1)
;; The QI mode arithmetic is issued to X pipe only.
(eq_attr "memory" "load,both")))
1 1)
+(define_function_unit "k6_load" 1 0
+ (and (eq_attr "cpu" "k6")
+ (and (eq_attr "type" "str")
+ (eq_attr "memory" "load,both")))
+ 10 10)
+
;; Lea have two instructions, so latency is probably 2
(define_function_unit "k6_store" 1 0
(and (eq_attr "cpu" "k6")
(define_function_unit "k6_store" 1 0
(and (eq_attr "cpu" "k6")
+ (eq_attr "type" "str"))
+ 10 10)
+
+(define_function_unit "k6_store" 1 0
+ (and (eq_attr "cpu" "k6")
(ior (eq_attr "type" "push")
(eq_attr "memory" "store,both")))
1 1)
;; communicates with all the execution units seperately instead.
(define_attr "athlon_decode" "direct,vector"
- (cond [(eq_attr "type" "call,imul,idiv,other,multi,fcmov,fpspc")
+ (cond [(eq_attr "type" "call,imul,idiv,other,multi,fcmov,fpspc,str")
(const_string "vector")
(and (eq_attr "type" "push")
(match_operand 1 "memory_operand" ""))
(define_function_unit "athlon_ieu" 3 0
(and (eq_attr "cpu" "athlon")
- (eq_attr "type" "alu1,negnot,alu,icmp,imov,imovx,lea,incdec,ishift,imul,idiv,ibr,setcc,push,pop,call,callv,icmov"))
+ (eq_attr "type" "alu1,negnot,alu,icmp,imov,imovx,lea,incdec,ishift,imul,idiv,ibr,setcc,push,pop,call,callv,icmov,str,cld"))
1 1)
(define_function_unit "athlon_ieu" 3 0
(and (eq_attr "cpu" "athlon")
+ (eq_attr "type" "str"))
+ 15 15)
+
+(define_function_unit "athlon_ieu" 3 0
+ (and (eq_attr "cpu" "athlon")
(eq_attr "type" "imul"))
4 0)
\f
;; Block operation instructions
+(define_insn "cld"
+ [(set (reg:SI 19) (const_int 0))]
+ ""
+ "cld"
+ [(set_attr "type" "cld")])
+
(define_expand "movstrsi"
[(parallel [(set (match_operand:BLK 0 "memory_operand" "")
(match_operand:BLK 1 "memory_operand" ""))
(use (match_operand:SI 2 "const_int_operand" ""))
(use (match_operand:SI 3 "const_int_operand" ""))
+ (use (reg:SI 19))
(clobber (match_scratch:SI 4 ""))
(clobber (match_dup 5))
(clobber (match_dup 6))])]
operands[0] = change_address (operands[0], VOIDmode, addr0);
operands[1] = change_address (operands[1], VOIDmode, addr1);
+ emit_insn (gen_cld ());
}")
;; It might seem that operands 0 & 1 could use predicate register_operand.
(mem:BLK (match_operand:SI 1 "address_operand" "S")))
(use (match_operand:SI 2 "const_int_operand" "n"))
(use (match_operand:SI 3 "immediate_operand" "i"))
+ (use (reg:SI 19))
(clobber (match_scratch:SI 4 "=&c"))
(clobber (match_dup 0))
(clobber (match_dup 1))]
{
rtx xops[2];
- output_asm_insn (\"cld\", operands);
if (GET_CODE (operands[2]) == CONST_INT)
{
if (INTVAL (operands[2]) & ~0x03)
[(set_attr "type" "multi")])
(define_expand "clrstrsi"
- [(set (match_dup 3) (const_int 0))
+ [(set (reg:SI 19) (const_int 0))
+ (set (match_dup 3) (const_int 0))
(parallel [(set (match_operand:BLK 0 "memory_operand" "")
(const_int 0))
(use (match_operand:SI 1 "const_int_operand" ""))
(use (match_operand:SI 2 "const_int_operand" ""))
(use (match_dup 3))
+ (use (reg:SI 19))
(clobber (match_scratch:SI 4 ""))
(clobber (match_dup 5))])]
""
operands[5] = addr0;
operands[0] = gen_rtx_MEM (BLKmode, addr0);
+
+ emit_insn (gen_cld ());
}")
;; It might seem that operand 0 could use predicate register_operand.
(use (match_operand:SI 1 "const_int_operand" "n"))
(use (match_operand:SI 2 "immediate_operand" "i"))
(use (match_operand:SI 3 "register_operand" "a"))
+ (use (reg:SI 19))
(clobber (match_scratch:SI 4 "=&c"))
(clobber (match_dup 0))]
""
{
rtx xops[2];
- output_asm_insn (\"cld\", operands);
if (GET_CODE (operands[1]) == CONST_INT)
{
unsigned int count = INTVAL (operands[1]) & 0xffffffff;
once cc0 is dead. */
align = operands[4];
+ emit_insn (gen_cld ());
if (GET_CODE (count) == CONST_INT)
{
if (INTVAL (count) == 0)
(mem:BLK (match_operand:SI 1 "address_operand" "D"))))
(use (match_operand:SI 2 "register_operand" "c"))
(use (match_operand:SI 3 "immediate_operand" "i"))
+ (use (reg:SI 19))
(clobber (match_dup 0))
(clobber (match_dup 1))
(clobber (match_dup 2))]
""
- "cld\;repz{\;| }cmpsb"
+ "repz{\;| }cmpsb"
[(set_attr "type" "multi")
(set_attr "length" "3")])
(mem:BLK (match_operand:SI 1 "address_operand" "D")))
(const_int 0)))
(use (match_operand:SI 3 "immediate_operand" "i"))
+ (use (reg:SI 19))
(clobber (match_dup 0))
(clobber (match_dup 1))
(clobber (match_dup 2))]
""
;; The initial compare sets the zero flag.
- "cmp{l}\\t%2, %2\;cld\;repz{\;| }cmpsb"
+ "cmp{l}\\t%2, %2\;repz{\;| }cmpsb"
[(set_attr "type" "multi")
(set_attr "length" "5")])
emit_move_insn (scratch3, addr);
+ emit_insn (gen_cld ());
emit_insn (gen_strlensi_1 (scratch1, scratch3, eoschar,
align, constm1_rtx));
emit_insn (gen_one_cmplsi2 (scratch2, scratch1));
(match_operand:QI 2 "general_operand" "a")
(match_operand:SI 3 "immediate_operand" "i")
(match_operand:SI 4 "immediate_operand" "0")] 0))
+ (use (reg:SI 19))
(clobber (match_dup 1))
(clobber (reg:CC 17))]
""
- "cld\;repnz{\;| }scasb"
+ "repnz{\;| }scasb"
[(set_attr "type" "multi")
(set_attr "length" "3")])
\f