[X86][Atom] Convert Atom scheduler model to SchedRW (PR32431)
authorSimon Pilgrim <llvm-dev@redking.me.uk>
Wed, 11 Apr 2018 18:23:01 +0000 (18:23 +0000)
committerSimon Pilgrim <llvm-dev@redking.me.uk>
Wed, 11 Apr 2018 18:23:01 +0000 (18:23 +0000)
Atom is the only x86 target that still uses schedule itineraries, if we can remove this then we can begin the work on removing x86 itineraries. I've also found that it will help with PR36550.

I've focussed on matching the existing model as closely as possible (relying on the schedule tests), PR36895 indicated a lot of these were incorrect but we can just as easily fix these after this patch as before. Hopefully we can get llvm-exegesis to help here,

There are a few instructions that rely on itinerary scheduling (mainly push/pop/return) of multiple resource stages, but I don't think any of these are show stoppers.

There are also a few codegen changes that seem related to the post-ra scheduler acting a little differently, I haven't tracked these down but they don't seem critical.

NOTE: I don't have access to any Atom hardware, so this hasn't been tested in the wild.

Differential Revision: https://reviews.llvm.org/D45486

llvm-svn: 329837

12 files changed:
llvm/lib/Target/X86/X86ScheduleAtom.td
llvm/test/CodeGen/X86/lsr-loop-exit-cond.ll
llvm/test/CodeGen/X86/lsr-static-addr.ll
llvm/test/CodeGen/X86/mmx-schedule.ll
llvm/test/CodeGen/X86/schedule-x86_32.ll
llvm/test/CodeGen/X86/schedule-x86_64.ll
llvm/test/CodeGen/X86/select.ll
llvm/test/CodeGen/X86/sse-schedule.ll
llvm/test/CodeGen/X86/sse2-schedule.ll
llvm/test/CodeGen/X86/sse3-schedule.ll
llvm/test/CodeGen/X86/ssse3-schedule.ll
llvm/test/CodeGen/X86/x87-schedule.ll

index 98a9d86..74940b3 100644 (file)
 //
 // Scheduling information derived from the "Intel 64 and IA32 Architectures
 // Optimization Reference Manual", Chapter 13, Section 4.
-// Functional Units
-//    Port 0
-def Port0 : FuncUnit; // ALU: ALU0, shift/rotate, load/store
-                      // SIMD/FP: SIMD ALU, Shuffle,SIMD/FP multiply, divide
-def Port1 : FuncUnit; // ALU: ALU1, bit processing, jump, and LEA
-                      // SIMD/FP: SIMD ALU, FP Adder
-
-def AtomItineraries : ProcessorItineraries<
-  [ Port0, Port1 ],
-  [], [
-  // P0 only
-  // InstrItinData<class, [InstrStage<N, [P0]>] >,
-  // P0 or P1
-  // InstrItinData<class, [InstrStage<N, [P0, P1]>] >,
-  // P0 and P1
-  // InstrItinData<class, [InstrStage<N, [P0], 0>,  InstrStage<N, [P1]>] >,
-  //
-  // Default is 1 cycle, port0 or port1
-  InstrItinData<IIC_ALU_MEM, [InstrStage<1, [Port0]>] >,
-  InstrItinData<IIC_ALU_NONMEM, [InstrStage<1, [Port0, Port1]>] >,
-  InstrItinData<IIC_LEA, [InstrStage<1, [Port1]>] >,
-  InstrItinData<IIC_LEA_16, [InstrStage<2, [Port0, Port1]>] >,
-  // mul
-  InstrItinData<IIC_MUL8_MEM, [InstrStage<7, [Port0, Port1]>] >,
-  InstrItinData<IIC_MUL8_REG, [InstrStage<7, [Port0, Port1]>] >,
-  InstrItinData<IIC_MUL16_MEM, [InstrStage<8, [Port0, Port1]>] >,
-  InstrItinData<IIC_MUL16_REG, [InstrStage<7, [Port0, Port1]>] >,
-  InstrItinData<IIC_MUL32_MEM, [InstrStage<7, [Port0, Port1]>] >,
-  InstrItinData<IIC_MUL32_REG, [InstrStage<6, [Port0, Port1]>] >,
-  InstrItinData<IIC_MUL64_MEM, [InstrStage<12, [Port0, Port1]>] >,
-  InstrItinData<IIC_MUL64_REG, [InstrStage<12, [Port0, Port1]>] >,
-  // imul by al, ax, eax, rax
-  InstrItinData<IIC_IMUL8_MEM, [InstrStage<7, [Port0, Port1]>] >,
-  InstrItinData<IIC_IMUL8_REG, [InstrStage<7, [Port0, Port1]>] >,
-  InstrItinData<IIC_IMUL16_MEM, [InstrStage<8, [Port0, Port1]>] >,
-  InstrItinData<IIC_IMUL16_REG, [InstrStage<7, [Port0, Port1]>] >,
-  InstrItinData<IIC_IMUL32_MEM, [InstrStage<7, [Port0, Port1]>] >,
-  InstrItinData<IIC_IMUL32_REG, [InstrStage<6, [Port0, Port1]>] >,
-  InstrItinData<IIC_IMUL64_MEM, [InstrStage<12, [Port0, Port1]>] >,
-  InstrItinData<IIC_IMUL64_REG, [InstrStage<12, [Port0, Port1]>] >,
-  // imul reg by reg|mem
-  InstrItinData<IIC_IMUL16_RM, [InstrStage<7, [Port0, Port1]>] >,
-  InstrItinData<IIC_IMUL16_RR, [InstrStage<6, [Port0, Port1]>] >,
-  InstrItinData<IIC_IMUL32_RM, [InstrStage<5, [Port0]>] >,
-  InstrItinData<IIC_IMUL32_RR, [InstrStage<5, [Port0]>] >,
-  InstrItinData<IIC_IMUL64_RM, [InstrStage<12, [Port0, Port1]>] >,
-  InstrItinData<IIC_IMUL64_RR, [InstrStage<12, [Port0, Port1]>] >,
-  // imul reg = reg/mem * imm
-  InstrItinData<IIC_IMUL16_RRI, [InstrStage<6, [Port0, Port1]>] >,
-  InstrItinData<IIC_IMUL32_RRI, [InstrStage<5, [Port0]>] >,
-  InstrItinData<IIC_IMUL64_RRI, [InstrStage<14, [Port0, Port1]>] >,
-  InstrItinData<IIC_IMUL16_RMI, [InstrStage<7, [Port0, Port1]>] >,
-  InstrItinData<IIC_IMUL32_RMI, [InstrStage<5, [Port0]>] >,
-  InstrItinData<IIC_IMUL64_RMI, [InstrStage<14, [Port0, Port1]>] >,
-  // idiv
-  InstrItinData<IIC_IDIV8_MEM, [InstrStage<62, [Port0, Port1]>] >,
-  InstrItinData<IIC_IDIV8_REG, [InstrStage<62, [Port0, Port1]>] >,
-  InstrItinData<IIC_IDIV16_MEM, [InstrStage<62, [Port0, Port1]>] >,
-  InstrItinData<IIC_IDIV16_REG, [InstrStage<62, [Port0, Port1]>] >,
-  InstrItinData<IIC_IDIV32_MEM, [InstrStage<62, [Port0, Port1]>] >,
-  InstrItinData<IIC_IDIV32_REG, [InstrStage<62, [Port0, Port1]>] >,
-  InstrItinData<IIC_IDIV64_MEM, [InstrStage<130, [Port0, Port1]>] >,
-  InstrItinData<IIC_IDIV64_REG, [InstrStage<130, [Port0, Port1]>] >,
-  // div
-  InstrItinData<IIC_DIV8_REG, [InstrStage<50, [Port0, Port1]>] >,
-  InstrItinData<IIC_DIV8_MEM, [InstrStage<68, [Port0, Port1]>] >,
-  InstrItinData<IIC_DIV16_MEM, [InstrStage<50, [Port0, Port1]>] >,
-  InstrItinData<IIC_DIV16_REG, [InstrStage<50, [Port0, Port1]>] >,
-  InstrItinData<IIC_DIV32_MEM, [InstrStage<50, [Port0, Port1]>] >,
-  InstrItinData<IIC_DIV32_REG, [InstrStage<50, [Port0, Port1]>] >,
-  InstrItinData<IIC_DIV64_MEM, [InstrStage<130, [Port0, Port1]>] >,
-  InstrItinData<IIC_DIV64_REG, [InstrStage<130, [Port0, Port1]>] >,
-  // neg/not/inc/dec
-  InstrItinData<IIC_UNARY_REG, [InstrStage<1, [Port0, Port1]>] >,
-  InstrItinData<IIC_UNARY_MEM, [InstrStage<1, [Port0]>] >,
-  // add/sub/and/or/xor/cmp/test
-  InstrItinData<IIC_BIN_NONMEM, [InstrStage<1, [Port0, Port1]>] >,
-  InstrItinData<IIC_BIN_MEM, [InstrStage<1, [Port0]>] >,
-  // adc/sbc
-  InstrItinData<IIC_BIN_CARRY_NONMEM, [InstrStage<1, [Port0, Port1]>] >,
-  InstrItinData<IIC_BIN_CARRY_MEM, [InstrStage<1, [Port0]>] >,
-  // shift/rotate
-  InstrItinData<IIC_SR, [InstrStage<1, [Port0]>] >,
-  // shift double
-  InstrItinData<IIC_SHD16_REG_IM, [InstrStage<6, [Port0, Port1]>] >,
-  InstrItinData<IIC_SHD16_REG_CL, [InstrStage<6, [Port0, Port1]>] >,
-  InstrItinData<IIC_SHD16_MEM_IM, [InstrStage<6, [Port0, Port1]>] >,
-  InstrItinData<IIC_SHD16_MEM_CL, [InstrStage<6, [Port0, Port1]>] >,
-  InstrItinData<IIC_SHD32_REG_IM, [InstrStage<2, [Port0, Port1]>] >,
-  InstrItinData<IIC_SHD32_REG_CL, [InstrStage<2, [Port0, Port1]>] >,
-  InstrItinData<IIC_SHD32_MEM_IM, [InstrStage<4, [Port0, Port1]>] >,
-  InstrItinData<IIC_SHD32_MEM_CL, [InstrStage<4, [Port0, Port1]>] >,
-  InstrItinData<IIC_SHD64_REG_IM, [InstrStage<9, [Port0, Port1]>] >,
-  InstrItinData<IIC_SHD64_REG_CL, [InstrStage<8, [Port0, Port1]>] >,
-  InstrItinData<IIC_SHD64_MEM_IM, [InstrStage<9, [Port0, Port1]>] >,
-  InstrItinData<IIC_SHD64_MEM_CL, [InstrStage<9, [Port0, Port1]>] >,
-  // cmov
-  InstrItinData<IIC_CMOV16_RM, [InstrStage<1, [Port0]>] >,
-  InstrItinData<IIC_CMOV16_RR, [InstrStage<1, [Port0, Port1]>] >,
-  InstrItinData<IIC_CMOV32_RM, [InstrStage<1, [Port0]>] >,
-  InstrItinData<IIC_CMOV32_RR, [InstrStage<1, [Port0, Port1]>] >,
-  InstrItinData<IIC_CMOV64_RM, [InstrStage<1, [Port0]>] >,
-  InstrItinData<IIC_CMOV64_RR, [InstrStage<1, [Port0, Port1]>] >,
-  // set
-  InstrItinData<IIC_SET_M, [InstrStage<2, [Port0, Port1]>] >,
-  InstrItinData<IIC_SET_R, [InstrStage<1, [Port0, Port1]>] >,
-  // jcc
-  InstrItinData<IIC_Jcc, [InstrStage<1, [Port1]>] >,
-  // jcxz/jecxz/jrcxz
-  InstrItinData<IIC_JCXZ, [InstrStage<4, [Port0, Port1]>] >,
-  // jmp rel
-  InstrItinData<IIC_JMP_REL, [InstrStage<1, [Port1]>] >,
-  // jmp indirect
-  InstrItinData<IIC_JMP_REG, [InstrStage<1, [Port1]>] >,
-  InstrItinData<IIC_JMP_MEM, [InstrStage<2, [Port0, Port1]>] >,
-  // jmp far
-  InstrItinData<IIC_JMP_FAR_MEM, [InstrStage<32, [Port0, Port1]>] >,
-  InstrItinData<IIC_JMP_FAR_PTR, [InstrStage<31, [Port0, Port1]>] >,
-  // loop/loope/loopne
-  InstrItinData<IIC_LOOP, [InstrStage<18, [Port0, Port1]>] >,
-  InstrItinData<IIC_LOOPE, [InstrStage<8, [Port0, Port1]>] >,
-  InstrItinData<IIC_LOOPNE, [InstrStage<17, [Port0, Port1]>] >,
-  // call - all but reg/imm
-  InstrItinData<IIC_CALL_RI, [InstrStage<1, [Port0], 0>,
-                              InstrStage<1, [Port1]>] >,
-  InstrItinData<IIC_CALL_MEM, [InstrStage<15, [Port0, Port1]>] >,
-  InstrItinData<IIC_CALL_FAR_MEM, [InstrStage<40, [Port0, Port1]>] >,
-  InstrItinData<IIC_CALL_FAR_PTR, [InstrStage<39, [Port0, Port1]>] >,
-  //ret
-  InstrItinData<IIC_RET, [InstrStage<79, [Port0, Port1]>] >,
-  InstrItinData<IIC_RET_IMM, [InstrStage<1, [Port0], 0>,  InstrStage<1, [Port1]>] >,
-  //sign extension movs
-  InstrItinData<IIC_MOVSX,[InstrStage<1, [Port0] >] >,
-  InstrItinData<IIC_MOVSX_R16_R8, [InstrStage<2, [Port0, Port1]>] >,
-  InstrItinData<IIC_MOVSX_R16_M8, [InstrStage<3, [Port0, Port1]>] >,
-  //zero extension movs
-  InstrItinData<IIC_MOVZX,[InstrStage<1, [Port0]>] >,
-  InstrItinData<IIC_MOVZX_R16_R8, [InstrStage<2, [Port0, Port1]>] >,
-  InstrItinData<IIC_MOVZX_R16_M8, [InstrStage<3, [Port0, Port1]>] >,
-
-  InstrItinData<IIC_REP_MOVS, [InstrStage<75, [Port0, Port1]>] >,
-  InstrItinData<IIC_REP_STOS, [InstrStage<74, [Port0, Port1]>] >,
-
-  // SSE binary operations
-  // arithmetic fp scalar
-  InstrItinData<IIC_SSE_ALU_F32S_RR, [InstrStage<5, [Port1]>] >,
-  InstrItinData<IIC_SSE_ALU_F32S_RM, [InstrStage<5, [Port0], 0>,
-                                   InstrStage<5, [Port1]>] >,
-  InstrItinData<IIC_SSE_ALU_F64S_RR, [InstrStage<5, [Port1]>] >,
-  InstrItinData<IIC_SSE_ALU_F64S_RM, [InstrStage<5, [Port0], 0>,
-                                   InstrStage<5, [Port1]>] >,
-  InstrItinData<IIC_SSE_MUL_F32S_RR, [InstrStage<4, [Port0]>] >,
-  InstrItinData<IIC_SSE_MUL_F32S_RM, [InstrStage<4, [Port0]>] >,
-  InstrItinData<IIC_SSE_MUL_F64S_RR, [InstrStage<5, [Port0]>] >,
-  InstrItinData<IIC_SSE_MUL_F64S_RM, [InstrStage<5, [Port0]>] >,
-  InstrItinData<IIC_SSE_DIV_F32S_RR, [InstrStage<34, [Port0, Port1]>] >,
-  InstrItinData<IIC_SSE_DIV_F32S_RM, [InstrStage<34, [Port0, Port1]>] >,
-  InstrItinData<IIC_SSE_DIV_F64S_RR, [InstrStage<62, [Port0, Port1]>] >,
-  InstrItinData<IIC_SSE_DIV_F64S_RM, [InstrStage<62, [Port0, Port1]>] >,
-
-  InstrItinData<IIC_SSE_COMIS_RR, [InstrStage<9, [Port0, Port1]>] >,
-  InstrItinData<IIC_SSE_COMIS_RM, [InstrStage<10, [Port0, Port1]>] >,
-
-  InstrItinData<IIC_SSE_HADDSUB_RR, [InstrStage<8, [Port0, Port1]>] >,
-  InstrItinData<IIC_SSE_HADDSUB_RM, [InstrStage<9, [Port0, Port1]>] >,
-
-  // arithmetic fp parallel
-  InstrItinData<IIC_SSE_ALU_F32P_RR, [InstrStage<5, [Port1]>] >,
-  InstrItinData<IIC_SSE_ALU_F32P_RM, [InstrStage<5, [Port0], 0>,
-                                   InstrStage<5, [Port1]>] >,
-  InstrItinData<IIC_SSE_ALU_F64P_RR, [InstrStage<6, [Port0, Port1]>] >,
-  InstrItinData<IIC_SSE_ALU_F64P_RM, [InstrStage<7, [Port0, Port1]>] >,
-  InstrItinData<IIC_SSE_MUL_F32P_RR, [InstrStage<5, [Port0]>] >,
-  InstrItinData<IIC_SSE_MUL_F32P_RM, [InstrStage<5, [Port0]>] >,
-  InstrItinData<IIC_SSE_MUL_F64P_RR, [InstrStage<9, [Port0, Port1]>] >,
-  InstrItinData<IIC_SSE_MUL_F64P_RM, [InstrStage<10, [Port0, Port1]>] >,
-  InstrItinData<IIC_SSE_DIV_F32P_RR, [InstrStage<70, [Port0, Port1]>] >,
-  InstrItinData<IIC_SSE_DIV_F32P_RM, [InstrStage<70, [Port0, Port1]>] >,
-  InstrItinData<IIC_SSE_DIV_F64P_RR, [InstrStage<125, [Port0, Port1]>] >,
-  InstrItinData<IIC_SSE_DIV_F64P_RM, [InstrStage<125, [Port0, Port1]>] >,
-
-  // bitwise parallel
-  InstrItinData<IIC_SSE_BIT_P_RR, [InstrStage<1, [Port0, Port1]>] >,
-  InstrItinData<IIC_SSE_BIT_P_RM, [InstrStage<1, [Port0]>] >,
-
-  // arithmetic int parallel
-  InstrItinData<IIC_SSE_INTALU_P_RR, [InstrStage<1, [Port0, Port1]>] >,
-  InstrItinData<IIC_SSE_INTALU_P_RM, [InstrStage<1, [Port0]>] >,
-  InstrItinData<IIC_SSE_INTALUQ_P_RR, [InstrStage<2, [Port0, Port1]>] >,
-  InstrItinData<IIC_SSE_INTALUQ_P_RM, [InstrStage<3, [Port0, Port1]>] >,
-
-  // multiply int parallel
-  InstrItinData<IIC_SSE_INTMUL_P_RR, [InstrStage<5, [Port0]>] >,
-  InstrItinData<IIC_SSE_INTMUL_P_RM, [InstrStage<5, [Port0]>] >,
-
-  // shift parallel
-  InstrItinData<IIC_SSE_INTSH_P_RR, [InstrStage<2, [Port0, Port1]>] >,
-  InstrItinData<IIC_SSE_INTSH_P_RM, [InstrStage<3, [Port0, Port1]>] >,
-  InstrItinData<IIC_SSE_INTSH_P_RI, [InstrStage<1, [Port0, Port1]>] >,
-
-  InstrItinData<IIC_SSE_INTSHDQ_P_RI, [InstrStage<1, [Port0, Port1]>] >,
-
-  InstrItinData<IIC_SSE_SHUFP, [InstrStage<1, [Port0]>] >,
-  InstrItinData<IIC_SSE_PSHUF_RI, [InstrStage<1, [Port0]>] >,
-  InstrItinData<IIC_SSE_PSHUF_MI, [InstrStage<1, [Port0]>] >,
-
-  InstrItinData<IIC_SSE_PACK, [InstrStage<1, [Port0]>] >,
-  InstrItinData<IIC_SSE_UNPCK, [InstrStage<1, [Port0]>] >,
-
-  InstrItinData<IIC_SSE_SQRTPS_RR, [InstrStage<70, [Port0, Port1]>] >,
-  InstrItinData<IIC_SSE_SQRTPS_RM, [InstrStage<70, [Port0, Port1]>] >,
-  InstrItinData<IIC_SSE_SQRTSS_RR, [InstrStage<34, [Port0, Port1]>] >,
-  InstrItinData<IIC_SSE_SQRTSS_RM, [InstrStage<34, [Port0, Port1]>] >,
-
-  InstrItinData<IIC_SSE_SQRTPD_RR, [InstrStage<125, [Port0, Port1]>] >,
-  InstrItinData<IIC_SSE_SQRTPD_RM, [InstrStage<125, [Port0, Port1]>] >,
-  InstrItinData<IIC_SSE_SQRTSD_RR, [InstrStage<62, [Port0, Port1]>] >,
-  InstrItinData<IIC_SSE_SQRTSD_RM, [InstrStage<62, [Port0, Port1]>] >,
-
-  InstrItinData<IIC_SSE_RSQRTPS_RR, [InstrStage<9, [Port0, Port1]>] >,
-  InstrItinData<IIC_SSE_RSQRTPS_RM, [InstrStage<10, [Port0, Port1]>] >,
-  InstrItinData<IIC_SSE_RSQRTSS_RR, [InstrStage<4, [Port0]>] >,
-  InstrItinData<IIC_SSE_RSQRTSS_RM, [InstrStage<4, [Port0]>] >,
-
-  InstrItinData<IIC_SSE_RCPP_RR, [InstrStage<9, [Port0, Port1]>] >,
-  InstrItinData<IIC_SSE_RCPP_RM, [InstrStage<10, [Port0, Port1]>] >,
-  InstrItinData<IIC_SSE_RCPS_RR, [InstrStage<4, [Port0]>] >,
-  InstrItinData<IIC_SSE_RCPS_RM, [InstrStage<4, [Port0]>] >,
-
-  InstrItinData<IIC_SSE_MOVMSK, [InstrStage<3, [Port0]>] >,
-  InstrItinData<IIC_SSE_MASKMOV, [InstrStage<2, [Port0, Port1]>] >,
-
-  InstrItinData<IIC_SSE_PEXTRW, [InstrStage<4, [Port0, Port1]>] >,
-  InstrItinData<IIC_SSE_PINSRW, [InstrStage<1, [Port0]>] >,
-
-  InstrItinData<IIC_SSE_PABS_RR, [InstrStage<1, [Port0, Port1]>] >,
-  InstrItinData<IIC_SSE_PABS_RM, [InstrStage<1, [Port0]>] >,
-
-  InstrItinData<IIC_SSE_MOV_S_RR, [InstrStage<1, [Port0, Port1]>] >,
-  InstrItinData<IIC_SSE_MOV_S_RM, [InstrStage<1, [Port0]>] >,
-  InstrItinData<IIC_SSE_MOV_S_MR, [InstrStage<1, [Port0]>] >,
-
-  InstrItinData<IIC_SSE_MOVA_P_RR, [InstrStage<1, [Port0, Port1]>] >,
-  InstrItinData<IIC_SSE_MOVA_P_RM, [InstrStage<1, [Port0]>] >,
-  InstrItinData<IIC_SSE_MOVA_P_MR, [InstrStage<1, [Port0]>] >,
-
-  InstrItinData<IIC_SSE_MOVU_P_RR, [InstrStage<1, [Port0, Port1]>] >,
-  InstrItinData<IIC_SSE_MOVU_P_RM, [InstrStage<3, [Port0, Port1]>] >,
-  InstrItinData<IIC_SSE_MOVU_P_MR, [InstrStage<2, [Port0, Port1]>] >,
-
-  InstrItinData<IIC_SSE_MOV_LH, [InstrStage<1, [Port0]>] >,
-
-  InstrItinData<IIC_SSE_LDDQU, [InstrStage<3, [Port0, Port1]>] >,
-
-  InstrItinData<IIC_SSE_MOVDQ, [InstrStage<1, [Port0]>] >,
-  InstrItinData<IIC_SSE_MOVD_ToGP, [InstrStage<3, [Port0]>] >,
-  InstrItinData<IIC_SSE_MOVQ_RR, [InstrStage<1, [Port0, Port1]>] >,
-
-  InstrItinData<IIC_SSE_MOVNT, [InstrStage<1, [Port0]>] >,
-
-  InstrItinData<IIC_SSE_PREFETCH, [InstrStage<1, [Port0]>] >,
-  InstrItinData<IIC_SSE_PAUSE, [InstrStage<17, [Port0, Port1]>] >,
-  InstrItinData<IIC_SSE_LFENCE, [InstrStage<1, [Port0, Port1]>] >,
-  InstrItinData<IIC_SSE_MFENCE, [InstrStage<1, [Port0]>] >,
-  InstrItinData<IIC_SSE_SFENCE, [InstrStage<1, [Port0]>] >,
-  InstrItinData<IIC_SSE_LDMXCSR, [InstrStage<5, [Port0, Port1]>] >,
-  InstrItinData<IIC_SSE_STMXCSR, [InstrStage<15, [Port0, Port1]>] >,
-
-  InstrItinData<IIC_SSE_PHADDSUBD_RR, [InstrStage<3, [Port0, Port1]>] >,
-  InstrItinData<IIC_SSE_PHADDSUBD_RM, [InstrStage<4, [Port0, Port1]>] >,
-  InstrItinData<IIC_SSE_PHADDSUBSW_RR, [InstrStage<7, [Port0, Port1]>] >,
-  InstrItinData<IIC_SSE_PHADDSUBSW_RM, [InstrStage<8, [Port0, Port1]>] >,
-  InstrItinData<IIC_SSE_PHADDSUBW_RR, [InstrStage<7, [Port0, Port1]>] >,
-  InstrItinData<IIC_SSE_PHADDSUBW_RM, [InstrStage<8, [Port0, Port1]>] >,
-  InstrItinData<IIC_SSE_PSHUFB_RR, [InstrStage<4, [Port0, Port1]>] >,
-  InstrItinData<IIC_SSE_PSHUFB_RM, [InstrStage<5, [Port0, Port1]>] >,
-  InstrItinData<IIC_SSE_PSIGN_RR, [InstrStage<1, [Port0, Port1]>] >,
-  InstrItinData<IIC_SSE_PSIGN_RM, [InstrStage<1, [Port0]>] >,
-
-  InstrItinData<IIC_SSE_PMADD, [InstrStage<5, [Port0]>] >,
-  InstrItinData<IIC_SSE_PMULHRSW, [InstrStage<5, [Port0]>] >,
-  InstrItinData<IIC_SSE_PALIGNRR, [InstrStage<1, [Port0]>] >,
-  InstrItinData<IIC_SSE_PALIGNRM, [InstrStage<1, [Port0]>] >,
-  InstrItinData<IIC_SSE_MWAIT, [InstrStage<46, [Port0, Port1]>] >,
-  InstrItinData<IIC_SSE_MONITOR, [InstrStage<45, [Port0, Port1]>] >,
-
-  // conversions
-  // to/from PD ...
-  InstrItinData<IIC_SSE_CVT_PD_RR, [InstrStage<7, [Port0, Port1]>] >,
-  InstrItinData<IIC_SSE_CVT_PD_RM, [InstrStage<8, [Port0, Port1]>] >,
-  // to/from PS except to/from PD and PS2PI
-  InstrItinData<IIC_SSE_CVT_PS_RR, [InstrStage<6, [Port0, Port1]>] >,
-  InstrItinData<IIC_SSE_CVT_PS_RM, [InstrStage<7, [Port0, Port1]>] >,
-  InstrItinData<IIC_SSE_CVT_Scalar_RR, [InstrStage<6, [Port0, Port1]>] >,
-  InstrItinData<IIC_SSE_CVT_Scalar_RM, [InstrStage<7, [Port0, Port1]>] >,
-  InstrItinData<IIC_SSE_CVT_SS2SI32_RR, [InstrStage<8, [Port0, Port1]>] >,
-  InstrItinData<IIC_SSE_CVT_SS2SI32_RM, [InstrStage<9, [Port0, Port1]>] >,
-  InstrItinData<IIC_SSE_CVT_SS2SI64_RR, [InstrStage<9, [Port0, Port1]>] >,
-  InstrItinData<IIC_SSE_CVT_SS2SI64_RM, [InstrStage<10, [Port0, Port1]>] >,
-  InstrItinData<IIC_SSE_CVT_SD2SI_RR, [InstrStage<8, [Port0, Port1]>] >,
-  InstrItinData<IIC_SSE_CVT_SD2SI_RM, [InstrStage<9, [Port0, Port1]>] >,
-
-  // MMX MOVs
-  InstrItinData<IIC_MMX_MOV_MM_RM,  [InstrStage<1, [Port0]>] >,
-  InstrItinData<IIC_MMX_MOV_REG_MM, [InstrStage<3, [Port0]>] >,
-  InstrItinData<IIC_MMX_MOVQ_RM, [InstrStage<1, [Port0]>] >,
-  InstrItinData<IIC_MMX_MOVQ_RR, [InstrStage<1, [Port0, Port1]>] >,
-  // other MMX
-  InstrItinData<IIC_MMX_ALU_RM,  [InstrStage<1, [Port0]>] >,
-  InstrItinData<IIC_MMX_ALU_RR,  [InstrStage<1, [Port0, Port1]>] >,
-  InstrItinData<IIC_MMX_ALUQ_RM, [InstrStage<3, [Port0, Port1]>] >,
-  InstrItinData<IIC_MMX_ALUQ_RR, [InstrStage<2, [Port0, Port1]>] >,
-  InstrItinData<IIC_MMX_PHADDSUBW_RM, [InstrStage<6, [Port0, Port1]>] >,
-  InstrItinData<IIC_MMX_PHADDSUBW_RR, [InstrStage<5, [Port0, Port1]>] >,
-  InstrItinData<IIC_MMX_PHADDSUBD_RM, [InstrStage<4, [Port0, Port1]>] >,
-  InstrItinData<IIC_MMX_PHADDSUBD_RR, [InstrStage<3, [Port0, Port1]>] >,
-  InstrItinData<IIC_MMX_PMUL, [InstrStage<4, [Port0]>] >,
-  InstrItinData<IIC_MMX_MISC_FUNC_MEM, [InstrStage<1, [Port0]>] >,
-  InstrItinData<IIC_MMX_MISC_FUNC_REG, [InstrStage<1, [Port0, Port1]>] >,
-  InstrItinData<IIC_MMX_PSADBW,   [InstrStage<4, [Port0, Port1]>] >,
-  InstrItinData<IIC_MMX_SHIFT_RI, [InstrStage<1, [Port0, Port1]>] >,
-  InstrItinData<IIC_MMX_SHIFT_RM, [InstrStage<3, [Port0, Port1]>] >,
-  InstrItinData<IIC_MMX_SHIFT_RR, [InstrStage<2, [Port0, Port1]>] >,
-  InstrItinData<IIC_MMX_UNPCK_H_RM, [InstrStage<1, [Port0]>] >,
-  InstrItinData<IIC_MMX_UNPCK_H_RR, [InstrStage<1, [Port0, Port1]>] >,
-  InstrItinData<IIC_MMX_UNPCK_L, [InstrStage<1, [Port0]>] >,
-  InstrItinData<IIC_MMX_PCK_RM,  [InstrStage<1, [Port0]>] >,
-  InstrItinData<IIC_MMX_PCK_RR,  [InstrStage<1, [Port0, Port1]>] >,
-  InstrItinData<IIC_MMX_PSHUF,   [InstrStage<1, [Port0]>] >,
-  InstrItinData<IIC_MMX_PEXTR,   [InstrStage<4, [Port0, Port1]>] >,
-  InstrItinData<IIC_MMX_PINSRW,  [InstrStage<1, [Port0]>] >,
-  InstrItinData<IIC_MMX_MASKMOV, [InstrStage<1, [Port0]>] >,
-  InstrItinData<IIC_MMX_MOVMSK, [InstrStage<3, [Port0]>] >,
-  // conversions
-  // from/to PD
-  InstrItinData<IIC_MMX_CVT_PD_RR, [InstrStage<7, [Port0, Port1]>] >,
-  InstrItinData<IIC_MMX_CVT_PD_RM, [InstrStage<8, [Port0, Port1]>] >,
-  // from/to PI
-  InstrItinData<IIC_MMX_CVT_PS_RR, [InstrStage<5, [Port1]>] >,
-  InstrItinData<IIC_MMX_CVT_PS_RM, [InstrStage<5, [Port0], 0>,
-                                    InstrStage<5, [Port1]>]>,
-
-  InstrItinData<IIC_CMPX_LOCK, [InstrStage<14, [Port0, Port1]>] >,
-  InstrItinData<IIC_CMPX_LOCK_8, [InstrStage<6, [Port0, Port1]>] >,
-  InstrItinData<IIC_CMPX_LOCK_8B, [InstrStage<18, [Port0, Port1]>] >,
-  InstrItinData<IIC_CMPX_LOCK_16B, [InstrStage<22, [Port0, Port1]>] >,
-
-  InstrItinData<IIC_XADD_LOCK_MEM, [InstrStage<2, [Port0, Port1]>] >,
-  InstrItinData<IIC_XADD_LOCK_MEM, [InstrStage<3, [Port0, Port1]>] >,
-
-  InstrItinData<IIC_FILD, [InstrStage<5, [Port0], 0>, InstrStage<5, [Port1]>] >,
-  InstrItinData<IIC_FLD,  [InstrStage<1, [Port0]>] >,
-  InstrItinData<IIC_FLD80, [InstrStage<4, [Port0, Port1]>] >,
-
-  InstrItinData<IIC_FST,   [InstrStage<2, [Port0, Port1]>] >,
-  InstrItinData<IIC_FST80, [InstrStage<5, [Port0, Port1]>] >,
-  InstrItinData<IIC_FIST,  [InstrStage<6, [Port0, Port1]>] >,
-
-  InstrItinData<IIC_FCMOV,  [InstrStage<9, [Port0, Port1]>] >,
-  InstrItinData<IIC_FLDZ,   [InstrStage<1, [Port0, Port1]>] >,
-  InstrItinData<IIC_FUCOM,  [InstrStage<1, [Port1]>] >,
-  InstrItinData<IIC_FUCOMI, [InstrStage<9, [Port0, Port1]>] >,
-  InstrItinData<IIC_FCOMI,  [InstrStage<9, [Port0, Port1]>] >,
-  InstrItinData<IIC_FNSTSW, [InstrStage<10, [Port0, Port1]>] >,
-  InstrItinData<IIC_FNSTCW, [InstrStage<8, [Port0, Port1]>] >,
-  InstrItinData<IIC_FLDCW,  [InstrStage<5, [Port0, Port1]>] >,
-  InstrItinData<IIC_FNINIT, [InstrStage<63, [Port0, Port1]>] >,
-  InstrItinData<IIC_FFREE,  [InstrStage<1, [Port0, Port1]>] >,
-  InstrItinData<IIC_FNCLEX, [InstrStage<25, [Port0, Port1]>] >,
-  InstrItinData<IIC_WAIT,  [InstrStage<1, [Port0, Port1]>] >,
-  InstrItinData<IIC_FXAM,  [InstrStage<1, [Port0]>] >,
-  InstrItinData<IIC_FNOP,  [InstrStage<1, [Port0, Port1]>] >,
-  InstrItinData<IIC_FLDL,  [InstrStage<10, [Port0, Port1]>] >,
-  InstrItinData<IIC_F2XM1,  [InstrStage<99, [Port0, Port1]>] >,
-  InstrItinData<IIC_FYL2X,  [InstrStage<146, [Port0, Port1]>] >,
-  InstrItinData<IIC_FPTAN,  [InstrStage<168, [Port0, Port1]>] >,
-  InstrItinData<IIC_FPATAN,  [InstrStage<183, [Port0, Port1]>] >,
-  InstrItinData<IIC_FXTRACT,  [InstrStage<25, [Port0, Port1]>] >,
-  InstrItinData<IIC_FPREM1,  [InstrStage<71, [Port0, Port1]>] >,
-  InstrItinData<IIC_FPSTP,  [InstrStage<1, [Port0, Port1]>] >,
-  InstrItinData<IIC_FPREM,  [InstrStage<55, [Port0, Port1]>] >,
-  InstrItinData<IIC_FYL2XP1,  [InstrStage<147, [Port0, Port1]>] >,
-  InstrItinData<IIC_FSINCOS,  [InstrStage<174, [Port0, Port1]>] >,
-  InstrItinData<IIC_FRNDINT,  [InstrStage<46, [Port0, Port1]>] >,
-  InstrItinData<IIC_FSCALE,  [InstrStage<77, [Port0, Port1]>] >,
-  InstrItinData<IIC_FCOMPP,  [InstrStage<1, [Port1]>] >,
-  InstrItinData<IIC_FXSAVE,  [InstrStage<140, [Port0, Port1]>] >,
-  InstrItinData<IIC_FXRSTOR,  [InstrStage<141, [Port0, Port1]>] >,
-  InstrItinData<IIC_FXCH, [InstrStage<1, [Port0], 0>, InstrStage<1, [Port1]>] >,
-  InstrItinData<IIC_FSIGN,  [InstrStage<1, [Port1]>] >,
-  InstrItinData<IIC_FSQRT,  [InstrStage<71, [Port0, Port1]>] >,
-
-  // System instructions
-  InstrItinData<IIC_CPUID, [InstrStage<121, [Port0, Port1]>] >,
-  InstrItinData<IIC_INT,   [InstrStage<127, [Port0, Port1]>] >,
-  InstrItinData<IIC_INT3,  [InstrStage<130, [Port0, Port1]>] >,
-  InstrItinData<IIC_INVD,  [InstrStage<1003, [Port0, Port1]>] >,
-  InstrItinData<IIC_INVLPG, [InstrStage<71, [Port0, Port1]>] >,
-  InstrItinData<IIC_IRET,  [InstrStage<109, [Port0, Port1]>] >,
-  InstrItinData<IIC_HLT,   [InstrStage<121, [Port0, Port1]>] >,
-  InstrItinData<IIC_LXS,   [InstrStage<10, [Port0, Port1]>] >,
-  InstrItinData<IIC_LTR,   [InstrStage<83, [Port0, Port1]>] >,
-  InstrItinData<IIC_RDTSC, [InstrStage<30, [Port0, Port1]>] >,
-  InstrItinData<IIC_RDTSCP, [InstrStage<30, [Port0, Port1]>] >,
-  InstrItinData<IIC_RSM,   [InstrStage<741, [Port0, Port1]>] >,
-  InstrItinData<IIC_SIDT,  [InstrStage<4, [Port0, Port1]>] >,
-  InstrItinData<IIC_SGDT,  [InstrStage<4, [Port0, Port1]>] >,
-  InstrItinData<IIC_SLDT,  [InstrStage<3, [Port0, Port1]>] >,
-  InstrItinData<IIC_STR,    [InstrStage<3, [Port0, Port1]>] >,
-  InstrItinData<IIC_SWAPGS, [InstrStage<22, [Port0, Port1]>] >,
-  InstrItinData<IIC_SYSCALL, [InstrStage<96, [Port0, Port1]>] >,
-  InstrItinData<IIC_SYS_ENTER_EXIT, [InstrStage<88, [Port0, Port1]>] >,
-
-  InstrItinData<IIC_IN_RR,  [InstrStage<94, [Port0, Port1]>] >,
-  InstrItinData<IIC_IN_RI,  [InstrStage<92, [Port0, Port1]>] >,
-  InstrItinData<IIC_OUT_RR, [InstrStage<68, [Port0, Port1]>] >,
-  InstrItinData<IIC_OUT_IR, [InstrStage<72, [Port0, Port1]>] >,
-  InstrItinData<IIC_INS,    [InstrStage<59, [Port0, Port1]>] >,
-
-  InstrItinData<IIC_MOV_REG_DR, [InstrStage<88, [Port0, Port1]>] >,
-  InstrItinData<IIC_MOV_DR_REG, [InstrStage<123, [Port0, Port1]>] >,
-  // worst case for mov REG_CRx
-  InstrItinData<IIC_MOV_REG_CR, [InstrStage<12, [Port0, Port1]>] >,
-  InstrItinData<IIC_MOV_CR_REG, [InstrStage<136, [Port0, Port1]>] >,
-
-  InstrItinData<IIC_MOV_REG_SR, [InstrStage<1, [Port0]>] >,
-  InstrItinData<IIC_MOV_MEM_SR, [InstrStage<2, [Port0, Port1]>] >,
-  InstrItinData<IIC_MOV_SR_REG, [InstrStage<21, [Port0, Port1]>] >,
-  InstrItinData<IIC_MOV_SR_MEM, [InstrStage<26, [Port0, Port1]>] >,
-  // LAR
-  InstrItinData<IIC_LAR_RM,  [InstrStage<50, [Port0, Port1]>] >,
-  InstrItinData<IIC_LAR_RR,  [InstrStage<54, [Port0, Port1]>] >,
-  // LSL
-  InstrItinData<IIC_LSL_RM,  [InstrStage<46, [Port0, Port1]>] >,
-  InstrItinData<IIC_LSL_RR,  [InstrStage<49, [Port0, Port1]>] >,
-
-  InstrItinData<IIC_LGDT, [InstrStage<44, [Port0, Port1]>] >,
-  InstrItinData<IIC_LIDT, [InstrStage<44, [Port0, Port1]>] >,
-  InstrItinData<IIC_LLDT_REG, [InstrStage<60, [Port0, Port1]>] >,
-  InstrItinData<IIC_LLDT_MEM, [InstrStage<64, [Port0, Port1]>] >,
-  // push control register, segment registers
-  InstrItinData<IIC_PUSH_CS, [InstrStage<2, [Port0, Port1]>] >,
-  InstrItinData<IIC_PUSH_SR, [InstrStage<2, [Port0, Port1]>] >,
-  // pop control register, segment registers
-  InstrItinData<IIC_POP_SR,    [InstrStage<29, [Port0, Port1]>] >,
-  InstrItinData<IIC_POP_SR_SS, [InstrStage<48, [Port0, Port1]>] >,
-  // VERR, VERW
-  InstrItinData<IIC_VERR,     [InstrStage<41, [Port0, Port1]>] >,
-  InstrItinData<IIC_VERW_REG, [InstrStage<51, [Port0, Port1]>] >,
-  InstrItinData<IIC_VERW_MEM, [InstrStage<50, [Port0, Port1]>] >,
-  // WRMSR, RDMSR
-  InstrItinData<IIC_WRMSR, [InstrStage<202, [Port0, Port1]>] >,
-  InstrItinData<IIC_RDMSR, [InstrStage<78, [Port0, Port1]>] >,
-  InstrItinData<IIC_RDPMC, [InstrStage<46, [Port0, Port1]>] >,
-  // SMSW, LMSW
-  InstrItinData<IIC_SMSW, [InstrStage<9, [Port0, Port1]>] >,
-  InstrItinData<IIC_LMSW_REG, [InstrStage<69, [Port0, Port1]>] >,
-  InstrItinData<IIC_LMSW_MEM, [InstrStage<67, [Port0, Port1]>] >,
-
-  InstrItinData<IIC_ENTER, [InstrStage<32, [Port0, Port1]>] >,
-  InstrItinData<IIC_LEAVE, [InstrStage<2, [Port0, Port1]>] >,
-
-  InstrItinData<IIC_POP_MEM, [InstrStage<3, [Port0, Port1]>] >,
-  InstrItinData<IIC_POP_REG16, [InstrStage<2, [Port0, Port1]>] >,
-  InstrItinData<IIC_POP_REG, [InstrStage<1, [Port0], 0>,
-                            InstrStage<1, [Port1]>] >,
-  InstrItinData<IIC_POP_F, [InstrStage<32, [Port0, Port1]>] >,
-  InstrItinData<IIC_POP_FD, [InstrStage<26, [Port0, Port1]>] >,
-  InstrItinData<IIC_POP_A, [InstrStage<9, [Port0, Port1]>] >,
-
-  InstrItinData<IIC_PUSH_IMM, [InstrStage<1, [Port0], 0>,
-                               InstrStage<1, [Port1]>] >,
-  InstrItinData<IIC_PUSH_MEM, [InstrStage<2, [Port0, Port1]>] >,
-  InstrItinData<IIC_PUSH_REG, [InstrStage<1, [Port0], 0>,
-                               InstrStage<1, [Port1]>] >,
-  InstrItinData<IIC_PUSH_F, [InstrStage<9, [Port0, Port1]>] >,
-  InstrItinData<IIC_PUSH_A, [InstrStage<8, [Port0, Port1]>] >,
-
-  InstrItinData<IIC_BSWAP, [InstrStage<1, [Port0]>] >,
-  InstrItinData<IIC_BIT_SCAN_MEM, [InstrStage<16, [Port0, Port1]>] >,
-  InstrItinData<IIC_BIT_SCAN_REG, [InstrStage<16, [Port0, Port1]>] >,
-  InstrItinData<IIC_MOVS, [InstrStage<3, [Port0, Port1]>] >,
-  InstrItinData<IIC_STOS, [InstrStage<1, [Port0, Port1]>] >,
-  InstrItinData<IIC_SCAS, [InstrStage<2, [Port0, Port1]>] >,
-  InstrItinData<IIC_CMPS, [InstrStage<3, [Port0, Port1]>] >,
-  InstrItinData<IIC_MOV, [InstrStage<1, [Port0, Port1]>] >,
-  InstrItinData<IIC_MOV_MEM, [InstrStage<1, [Port0]>] >,
-  InstrItinData<IIC_AHF, [InstrStage<1, [Port0, Port1]>] >,
-  InstrItinData<IIC_BT_MI, [InstrStage<1, [Port0, Port1]>] >,
-  InstrItinData<IIC_BT_MR, [InstrStage<9, [Port0, Port1]>] >,
-  InstrItinData<IIC_BT_RI, [InstrStage<1, [Port1]>] >,
-  InstrItinData<IIC_BT_RR, [InstrStage<1, [Port1]>] >,
-  InstrItinData<IIC_BTX_MI, [InstrStage<2, [Port0, Port1]>] >,
-  InstrItinData<IIC_BTX_MR, [InstrStage<11, [Port0, Port1]>] >,
-  InstrItinData<IIC_BTX_RI, [InstrStage<1, [Port1]>] >,
-  InstrItinData<IIC_BTX_RR, [InstrStage<1, [Port1]>] >,
-  InstrItinData<IIC_XCHG_REG, [InstrStage<2, [Port0, Port1]>] >,
-  InstrItinData<IIC_XCHG_MEM, [InstrStage<3, [Port0, Port1]>] >,
-  InstrItinData<IIC_XADD_REG, [InstrStage<2, [Port0, Port1]>] >,
-  InstrItinData<IIC_XADD_MEM, [InstrStage<3, [Port0, Port1]>] >,
-  InstrItinData<IIC_CMPXCHG_MEM, [InstrStage<14, [Port0, Port1]>] >,
-  InstrItinData<IIC_CMPXCHG_REG, [InstrStage<15, [Port0, Port1]>] >,
-  InstrItinData<IIC_CMPXCHG_MEM8, [InstrStage<6, [Port0, Port1]>] >,
-  InstrItinData<IIC_CMPXCHG_REG8, [InstrStage<9, [Port0, Port1]>] >,
-  InstrItinData<IIC_CMPXCHG_8B, [InstrStage<18, [Port0, Port1]>] >,
-  InstrItinData<IIC_CMPXCHG_16B, [InstrStage<22, [Port0, Port1]>] >,
-  InstrItinData<IIC_LODS, [InstrStage<2, [Port0, Port1]>] >,
-  InstrItinData<IIC_OUTS, [InstrStage<74, [Port0, Port1]>] >,
-  InstrItinData<IIC_CLC_CMC_STC, [InstrStage<1, [Port0, Port1]>] >,
-  InstrItinData<IIC_CLD, [InstrStage<3, [Port0, Port1]>] >,
-  InstrItinData<IIC_CLI, [InstrStage<14, [Port0, Port1]>] >,
-  InstrItinData<IIC_CLTS, [InstrStage<33, [Port0, Port1]>] >,
-  InstrItinData<IIC_STI, [InstrStage<17, [Port0, Port1]>] >,
-  InstrItinData<IIC_STD, [InstrStage<21, [Port0, Port1]>] >,
-  InstrItinData<IIC_XLAT, [InstrStage<6, [Port0, Port1]>] >,
-  InstrItinData<IIC_AAA, [InstrStage<13, [Port0, Port1]>] >,
-  InstrItinData<IIC_AAD, [InstrStage<7, [Port0, Port1]>] >,
-  InstrItinData<IIC_AAM, [InstrStage<21, [Port0, Port1]>] >,
-  InstrItinData<IIC_AAS, [InstrStage<13, [Port0, Port1]>] >,
-  InstrItinData<IIC_DAA, [InstrStage<18, [Port0, Port1]>] >,
-  InstrItinData<IIC_DAS, [InstrStage<20, [Port0, Port1]>] >,
-  InstrItinData<IIC_BOUND, [InstrStage<11, [Port0, Port1]>] >,
-  InstrItinData<IIC_ARPL_REG, [InstrStage<24, [Port0, Port1]>] >,
-  InstrItinData<IIC_ARPL_MEM, [InstrStage<23, [Port0, Port1]>] >,
-  InstrItinData<IIC_MOVBE, [InstrStage<1, [Port0]>] >,
-  InstrItinData<IIC_CBW, [InstrStage<4, [Port0, Port1]>] >,
-  InstrItinData<IIC_MMX_EMMS, [InstrStage<5, [Port0, Port1]>] >,
-
-  InstrItinData<IIC_NOP, [InstrStage<1, [Port0, Port1]>] >
-  ]>;
 
 // Atom machine model.
 def AtomModel : SchedMachineModel {
   let IssueWidth = 2;  // Allows 2 instructions per scheduling group.
   let MicroOpBufferSize = 0; // In-order execution, always hide latency.
-  let LoadLatency = 3; // Expected cycles, may be overriden by OperandCycles.
-  let HighLatency = 30;// Expected, may be overriden by OperandCycles.
+  let LoadLatency = 3; // Expected cycles, may be overriden.
+  let HighLatency = 30;// Expected, may be overriden.
 
   // On the Atom, the throughput for taken branches is 2 cycles. For small
   // simple loops, expand by a small factor to hide the backedge cost.
   let LoopMicroOpBufferSize = 10;
   let PostRAScheduler = 1;
   let CompleteModel = 0;
+}
+
+let SchedModel = AtomModel in {
+
+// Functional Units
+def AtomPort0 : ProcResource<1>; // ALU: ALU0, shift/rotate, load/store
+                                 // SIMD/FP: SIMD ALU, Shuffle,SIMD/FP multiply, divide
+def AtomPort1 : ProcResource<1>; // ALU: ALU1, bit processing, jump, and LEA
+                                 // SIMD/FP: SIMD ALU, FP Adder
+
+def AtomPort01 : ProcResGroup<[AtomPort0, AtomPort1]>;
+
+// Loads are 3 cycles, so ReadAfterLd registers needn't be available until 3
+// cycles after the memory operand.
+def : ReadAdvance<ReadAfterLd, 3>;
+
+// Many SchedWrites are defined in pairs with and without a folded load.
+// Instructions with folded loads are usually micro-fused, so they only appear
+// as two micro-ops when dispatched by the schedulers.
+// This multiclass defines the resource usage for variants with and without
+// folded loads.
+multiclass AtomWriteResPair<X86FoldableSchedWrite SchedRW,
+                            list<ProcResourceKind> RRPorts,
+                            list<ProcResourceKind> RMPorts,
+                            int RRLat = 1, int RMLat = 1,
+                            list<int> RRRes = [1],
+                            list<int> RMRes = [1]> {
+  // Register variant is using a single cycle on ExePort.
+  def : WriteRes<SchedRW, RRPorts> {
+    let Latency = RRLat;
+    let ResourceCycles = RRRes;
+  }
+
+  // Memory variant also uses a cycle on JLAGU and adds 3 cycles to the
+  // latency.
+  def : WriteRes<SchedRW.Folded, RMPorts> {
+    let Latency = RMLat;
+    let ResourceCycles = RMRes;
+  }
+}
+
+// A folded store needs a cycle on Port0 for the store data.
+def : WriteRes<WriteRMW, [AtomPort0]>;
+
+////////////////////////////////////////////////////////////////////////////////
+// Arithmetic.
+////////////////////////////////////////////////////////////////////////////////
+
+defm : AtomWriteResPair<WriteALU,   [AtomPort01], [AtomPort0]>;
+defm : AtomWriteResPair<WriteIMul,  [AtomPort01], [AtomPort01],  7,  7, [7], [7]>;
+defm : AtomWriteResPair<WriteIDiv,  [AtomPort01], [AtomPort01], 62, 62, [62], [62]>;
+defm : AtomWriteResPair<WriteCRC32, [AtomPort01], [AtomPort01]>; // NOTE: Doesn't exist on Atom.
+
+defm : AtomWriteResPair<WriteCMOV,  [AtomPort01], [AtomPort0]>;
+
+def  : WriteRes<WriteSETCC, [AtomPort01]>;
+def  : WriteRes<WriteSETCCStore, [AtomPort01]> {
+  let Latency = 2;
+  let ResourceCycles = [2];
+}
+
+def : WriteRes<WriteIMulH, [AtomPort01]>; // NOTE: Doesn't exist on Atom.
+
+// This is for simple LEAs with one or two input operands.
+def : WriteRes<WriteLEA, [AtomPort1]>;
+
+def AtomWriteIMul16Ld : SchedWriteRes<[AtomPort01]> {
+  let Latency = 8;
+  let ResourceCycles = [8];
+}
+def : InstRW<[AtomWriteIMul16Ld], (instrs MUL16m, IMUL16m)>;
+
+def AtomWriteIMul32 : SchedWriteRes<[AtomPort01]> {
+  let Latency = 6;
+  let ResourceCycles = [6];
+}
+def : InstRW<[AtomWriteIMul32], (instrs MUL32r, IMUL32r)>;
+
+def AtomWriteIMul64 : SchedWriteRes<[AtomPort01]> {
+  let Latency = 12;
+  let ResourceCycles = [12];
+}
+def : InstRW<[AtomWriteIMul64], (instrs MUL64r, IMUL64r, IMUL64rr, IMUL64rm,
+                                        MUL64m, IMUL64m)>;
+
+def AtomWriteIMul64I : SchedWriteRes<[AtomPort01]> {
+  let Latency = 14;
+  let ResourceCycles = [14];
+}
+def : InstRW<[AtomWriteIMul64I], (instrs IMUL64rri8, IMUL64rri32,
+                                         IMUL64rmi8, IMUL64rmi32)>;
+
+def AtomWriteDiv : SchedWriteRes<[AtomPort01]> {
+  let Latency = 50;
+  let ResourceCycles = [50];
+}
+def : InstRW<[AtomWriteDiv], (instrs DIV8r,
+                                     DIV16r, DIV16m,
+                                     DIV32r, DIV32m)>;
+
+def AtomWriteDiv8Ld : SchedWriteRes<[AtomPort01]> {
+  let Latency = 68;
+  let ResourceCycles = [68];
+}
+def : InstRW<[AtomWriteDiv8Ld], (instrs DIV8m)>;
+
+def AtomWriteIDiv64 : SchedWriteRes<[AtomPort01]> {
+  let Latency = 130;
+  let ResourceCycles = [130];
+}
+def : InstRW<[AtomWriteIDiv64], (instrs DIV64r, IDIV64r,
+                                        DIV64m, IDIV64m)>;
+
+// Bit counts.
+defm : AtomWriteResPair<WriteBitScan, [AtomPort01], [AtomPort01], 16, 16, [16], [16]>;
+defm : AtomWriteResPair<WritePOPCNT,  [AtomPort01], [AtomPort01]>; // NOTE: Doesn't exist on Atom.
+defm : AtomWriteResPair<WriteLZCNT,   [AtomPort01], [AtomPort01]>; // NOTE: Doesn't exist on Atom.
+defm : AtomWriteResPair<WriteTZCNT,   [AtomPort01], [AtomPort01]>; // NOTE: Doesn't exist on Atom.
+
+// BMI1 BEXTR, BMI2 BZHI
+defm : AtomWriteResPair<WriteBEXTR, [AtomPort01], [AtomPort01]>; // NOTE: Doesn't exist on Atom.
+defm : AtomWriteResPair<WriteBZHI,  [AtomPort01], [AtomPort01]>; // NOTE: Doesn't exist on Atom.
+
+////////////////////////////////////////////////////////////////////////////////
+// Integer shifts and rotates.
+////////////////////////////////////////////////////////////////////////////////
+
+defm : AtomWriteResPair<WriteShift, [AtomPort0], [AtomPort0]>;
+
+////////////////////////////////////////////////////////////////////////////////
+// Loads, stores, and moves, not folded with other operations.
+////////////////////////////////////////////////////////////////////////////////
+
+def : WriteRes<WriteLoad,  [AtomPort0]>;
+def : WriteRes<WriteStore, [AtomPort0]>;
+def : WriteRes<WriteMove,  [AtomPort01]>;
+
+// Treat misc copies as a move.
+def : InstRW<[WriteMove], (instrs COPY)>;
+
+////////////////////////////////////////////////////////////////////////////////
+// Idioms that clear a register, like xorps %xmm0, %xmm0.
+// These can often bypass execution ports completely.
+////////////////////////////////////////////////////////////////////////////////
+
+def : WriteRes<WriteZero,  []>;
+
+////////////////////////////////////////////////////////////////////////////////
+// Branches don't produce values, so they have no latency, but they still
+// consume resources. Indirect branches can fold loads.
+////////////////////////////////////////////////////////////////////////////////
+
+defm : AtomWriteResPair<WriteJump, [AtomPort1], [AtomPort1]>;
+
+////////////////////////////////////////////////////////////////////////////////
+// Special case scheduling classes.
+////////////////////////////////////////////////////////////////////////////////
+
+def : WriteRes<WriteSystem,     [AtomPort01]> { let Latency = 100; }
+def : WriteRes<WriteMicrocoded, [AtomPort01]> { let Latency = 100; }
+def : WriteRes<WriteFence,      [AtomPort0]>;
+
+// Nops don't have dependencies, so there's no actual latency, but we set this
+// to '1' to tell the scheduler that the nop uses an ALU slot for a cycle.
+def : WriteRes<WriteNop, [AtomPort01]>;
+
+////////////////////////////////////////////////////////////////////////////////
+// Floating point. This covers both scalar and vector operations.
+////////////////////////////////////////////////////////////////////////////////
+
+def  : WriteRes<WriteFLoad,  [AtomPort0]>;
+def  : WriteRes<WriteFStore, [AtomPort0]>;
+def  : WriteRes<WriteFMove,  [AtomPort01]>;
+
+defm : AtomWriteResPair<WriteFAdd,           [AtomPort0],  [AtomPort0],  5,  5,  [5],  [5]>;
+defm : AtomWriteResPair<WriteFMul,           [AtomPort0],  [AtomPort0],  4,  4,  [4],  [4]>;
+defm : AtomWriteResPair<WriteFRcp,           [AtomPort0],  [AtomPort0],  4,  4,  [4],  [4]>;
+defm : AtomWriteResPair<WriteFRsqrt,         [AtomPort0],  [AtomPort0],  4,  4,  [4],  [4]>;
+defm : AtomWriteResPair<WriteFDiv,          [AtomPort01], [AtomPort01], 34, 34, [34], [34]>;
+defm : AtomWriteResPair<WriteFSqrt,         [AtomPort01], [AtomPort01], 34, 34, [34], [34]>;
+defm : AtomWriteResPair<WriteFShuffle,       [AtomPort0],  [AtomPort0]>;
+defm : AtomWriteResPair<WriteFVarShuffle,    [AtomPort0],  [AtomPort0]>; // NOTE: Doesn't exist on Atom.
+defm : AtomWriteResPair<WriteFMA,            [AtomPort0],  [AtomPort0]>; // NOTE: Doesn't exist on Atom.
+defm : AtomWriteResPair<WriteFBlend,         [AtomPort0],  [AtomPort0]>; // NOTE: Doesn't exist on Atom.
+defm : AtomWriteResPair<WriteFVarBlend,      [AtomPort0],  [AtomPort0]>; // NOTE: Doesn't exist on Atom.
+defm : AtomWriteResPair<WriteFShuffle256,    [AtomPort0],  [AtomPort0]>; // NOTE: Doesn't exist on Atom.
+defm : AtomWriteResPair<WriteFVarShuffle256, [AtomPort0],  [AtomPort0]>; // NOTE: Doesn't exist on Atom.
+
+////////////////////////////////////////////////////////////////////////////////
+// Conversions.
+////////////////////////////////////////////////////////////////////////////////
+
+defm : AtomWriteResPair<WriteCvtF2I, [AtomPort01], [AtomPort01], 8, 9, [8], [9]>; // Float -> Integer.
+defm : AtomWriteResPair<WriteCvtI2F, [AtomPort01], [AtomPort01], 6, 7, [6], [7]>; // Integer -> Float.
+defm : AtomWriteResPair<WriteCvtF2F, [AtomPort01], [AtomPort01], 6, 7, [6], [7]>; // Float -> Float size conversion.
+
+////////////////////////////////////////////////////////////////////////////////
+// Vector integer operations.
+////////////////////////////////////////////////////////////////////////////////
+
+def  : WriteRes<WriteVecLoad,  [AtomPort0]>;
+def  : WriteRes<WriteVecStore, [AtomPort0]>;
+def  : WriteRes<WriteVecMove,  [AtomPort01]>;
+
+defm : AtomWriteResPair<WriteVecALU,       [AtomPort01],  [AtomPort0], 1, 1>;
+defm : AtomWriteResPair<WriteVecLogic,     [AtomPort01],  [AtomPort0], 1, 1>;
+defm : AtomWriteResPair<WriteVecShift,     [AtomPort01], [AtomPort01], 2, 3, [2], [3]>;
+defm : AtomWriteResPair<WriteVecIMul,       [AtomPort0],  [AtomPort0], 5, 5, [5], [5]>;
+defm : AtomWriteResPair<WritePMULLD,       [AtomPort01],  [AtomPort0], 1, 1>;
+defm : AtomWriteResPair<WriteMPSAD,        [AtomPort01],  [AtomPort0], 1, 1>;
+defm : AtomWriteResPair<WriteShuffle,       [AtomPort0],  [AtomPort0], 1, 1>;
+defm : AtomWriteResPair<WriteVarShuffle,   [AtomPort01], [AtomPort01], 4, 5, [4], [5]>;
+defm : AtomWriteResPair<WriteBlend,         [AtomPort0],  [AtomPort0]>; // NOTE: Doesn't exist on Atom.
+defm : AtomWriteResPair<WriteVarBlend,      [AtomPort0],  [AtomPort0]>; // NOTE: Doesn't exist on Atom.
+defm : AtomWriteResPair<WriteShuffle256,    [AtomPort0],  [AtomPort0]>; // NOTE: Doesn't exist on Atom.
+defm : AtomWriteResPair<WriteVarShuffle256, [AtomPort0],  [AtomPort0]>; // NOTE: Doesn't exist on Atom.
+defm : AtomWriteResPair<WriteVarVecShift,   [AtomPort0],  [AtomPort0]>; // NOTE: Doesn't exist on Atom.
+
+////////////////////////////////////////////////////////////////////////////////
+// SSE42 String instructions.
+////////////////////////////////////////////////////////////////////////////////
+
+defm : AtomWriteResPair<WritePCmpIStrI, [AtomPort01], [AtomPort01]>; // NOTE: Doesn't exist on Atom.
+defm : AtomWriteResPair<WritePCmpIStrM, [AtomPort01], [AtomPort01]>; // NOTE: Doesn't exist on Atom.
+defm : AtomWriteResPair<WritePCmpEStrI, [AtomPort01], [AtomPort01]>; // NOTE: Doesn't exist on Atom.
+defm : AtomWriteResPair<WritePCmpEStrM, [AtomPort01], [AtomPort01]>; // NOTE: Doesn't exist on Atom.
+
+////////////////////////////////////////////////////////////////////////////////
+// MOVMSK Instructions.
+////////////////////////////////////////////////////////////////////////////////
+
+def  : WriteRes<WriteFMOVMSK,   [AtomPort0]> { let Latency = 3; let ResourceCycles = [3]; }
+def  : WriteRes<WriteVecMOVMSK, [AtomPort0]> { let Latency = 3; let ResourceCycles = [3]; }
+def  : WriteRes<WriteMMXMOVMSK, [AtomPort0]> { let Latency = 3; let ResourceCycles = [3]; }
+
+////////////////////////////////////////////////////////////////////////////////
+// AES Instructions.
+////////////////////////////////////////////////////////////////////////////////
+
+defm : AtomWriteResPair<WriteAESIMC,    [AtomPort01], [AtomPort01]>; // NOTE: Doesn't exist on Atom.
+defm : AtomWriteResPair<WriteAESKeyGen, [AtomPort01], [AtomPort01]>; // NOTE: Doesn't exist on Atom.
+defm : AtomWriteResPair<WriteAESDecEnc, [AtomPort01], [AtomPort01]>; // NOTE: Doesn't exist on Atom.
+
+////////////////////////////////////////////////////////////////////////////////
+// Horizontal add/sub  instructions.
+////////////////////////////////////////////////////////////////////////////////
+
+defm : AtomWriteResPair<WriteFHAdd, [AtomPort01], [AtomPort01], 8, 9, [8], [9]>;
+defm : AtomWriteResPair<WritePHAdd, [AtomPort01], [AtomPort01], 7, 8, [7], [8]>;
+
+////////////////////////////////////////////////////////////////////////////////
+// Carry-less multiplication instructions.
+////////////////////////////////////////////////////////////////////////////////
+
+defm : AtomWriteResPair<WriteCLMul, [AtomPort01], [AtomPort01]>; // NOTE: Doesn't exist on Atom.
+
+////////////////////////////////////////////////////////////////////////////////
+// Special Cases.
+////////////////////////////////////////////////////////////////////////////////
+
+// Port0
+def AtomWrite0_1 : SchedWriteRes<[AtomPort0]> {
+  let Latency = 1;
+  let ResourceCycles = [1];
+}
+def : InstRW<[AtomWrite0_1], (instrs FXAM,
+                                     BSWAP32r, BSWAP64r,
+                                     DEC8m, DEC16m, DEC32m, DEC64m,
+                                     INC8m, INC16m, INC32m, INC64m,
+                                     MOVSX64rr32,
+                                     MMX_MOVD64rr, MMX_MOVD64mr,
+                                     MMX_MOVD64to64rr, MMX_MOVD64to64rm,
+                                     MMX_PSHUFBrr, MMX_PSHUFBrm,
+                                     MOVDI2PDIrr, MOVDI2PDIrm,
+                                     MOV64toPQIrr, MOV64toPQIrm,
+                                     MOV64toSDrr, MOV64toSDrm, MOVSDto64mr,
+                                     MOVDI2SSrr, MOVDI2SSrm,
+                                     MOVPDI2DImr, MOVPQIto64mr, MOVSS2DImr, MOVQI2PQIrm, MOVPQI2QImr)>;
+def : InstRW<[AtomWrite0_1], (instregex "(ADC|ADD|AND|NEG|NOT|OR|SBB|SUB|XOR)(8|16|32|64)m",
+                                        "(RCL|RCR|ROL|ROR|SAR|SHL|SHR)(8|16|32|64)m",
+                                        "MOV(S|Z)X(32|64)(rr|rm)(8|8_NOREX|16)",
+                                        "LD_F(P)?(16|32|64)?(m|rr)",
+                                        "MMX_MASKMOVQ(64)?",
+                                        "MMX_PAVG(B|W)irm",
+                                        "MMX_P(MAX|MIN)(UB|SW)irm",
+                                        "MMX_PSIGN(B|D|W)rm")>;
+                                        
+def AtomWrite0_3 : SchedWriteRes<[AtomPort0]> {
+  let Latency = 3;
+  let ResourceCycles = [3];
+}
+def : InstRW<[AtomWrite0_3], (instrs MMX_MOVD64from64rr, MMX_MOVD64grr,
+                                     MOVPDI2DIrr, MOVPQIto64rr,
+                                     MOVSDto64rr, MOVSS2DIrr)>;
+
+def AtomWrite0_4 : SchedWriteRes<[AtomPort0]> {
+  let Latency = 4;
+  let ResourceCycles = [4];
+}
+def : InstRW<[AtomWrite0_4], (instrs MMX_PMADDUBSWrr, MMX_PMADDUBSWrm,
+                                     MMX_PMADDWDirr, MMX_PMADDWDirm,
+                                     MMX_PMULHRSWrr, MMX_PMULHRSWrm,
+                                     MMX_PMULHUWirr, MMX_PMULHUWirm,
+                                     MMX_PMULHWirr, MMX_PMULHWirm,
+                                     MMX_PMULLWirr, MMX_PMULLWirm,
+                                     MMX_PMULUDQirr, MMX_PMULUDQirm)>;
+
+def AtomWrite0_5 : SchedWriteRes<[AtomPort0]> {
+  let Latency = 5;
+  let ResourceCycles = [5];
+}
+def : InstRW<[AtomWrite0_5], (instregex "IMUL32(rm|rr)",
+                                        "MUL(PS|SD)(rr|rm)(_Int)?")>;
+
+// Port1
+def AtomWrite1_1 : SchedWriteRes<[AtomPort1]> {
+  let Latency = 1;
+  let ResourceCycles = [1];
+}
+def : InstRW<[AtomWrite1_1], (instrs FCOMPP)>;
+def : InstRW<[AtomWrite1_1], (instregex "ABS_F", "CHS_F",
+                                        "UCOM_F(P|PP)?r",
+                                        "BT(C|R|S)?(16|32|64)(rr|ri8)")>;
+
+def AtomWrite1_5 : SchedWriteRes<[AtomPort1]> {
+  let Latency = 5;
+  let ResourceCycles = [5];
+}
+def : InstRW<[AtomWrite1_5], (instrs MMX_CVTPI2PSirr, MMX_CVTPI2PSirm,
+                                     MMX_CVTPS2PIirr, MMX_CVTTPS2PIirr)>;
+
+// Port0 and Port1
+def AtomWrite0_1_1 : SchedWriteRes<[AtomPort0, AtomPort1]> {
+  let Latency = 1;
+  let ResourceCycles = [1, 1];
+}
+def : InstRW<[AtomWrite0_1_1], (instrs POP32r, POP64r,
+                                       POP16rmr, POP32rmr, POP64rmr,
+                                       PUSH16r, PUSH32r, PUSH64r,
+                                       PUSHi16, PUSHi32,
+                                       PUSH16rmr, PUSH32rmr, PUSH64rmr,
+                                       PUSH16i8, PUSH32i8, PUSH64i8, PUSH64i32,
+                                       XCH_F)>;
+def : InstRW<[AtomWrite0_1_1], (instregex "RETI(L|Q|W)$",
+                                          "IRET(16|32|64)?")>;
+
+def AtomWrite0_1_5 : SchedWriteRes<[AtomPort0, AtomPort1]> {
+  let Latency = 5;
+  let ResourceCycles = [5, 5];
+}
+def : InstRW<[AtomWrite0_1_5], (instrs MMX_CVTPS2PIirm, MMX_CVTTPS2PIirm)>;
+def : InstRW<[AtomWrite0_1_5], (instregex "ILD_F(16|32|64)")>;
+
+// Port0 or Port1
+def AtomWrite01_1 : SchedWriteRes<[AtomPort01]> {
+  let Latency = 1;
+  let ResourceCycles = [1];
+}
+def : InstRW<[AtomWrite01_1], (instrs FDECSTP, FFREE, FFREEP, FINCSTP, LD_F0, WAIT,
+                                      LFENCE,
+                                      STOSB, STOSL, STOSQ, STOSW,
+                                      MOVSSrr, MOVSSrr_REV,
+                                      PSLLDQri, PSRLDQri)>;
+def : InstRW<[AtomWrite01_1], (instregex "(MMX_)?PS(LL|RA|RL)(D|Q|W)ri",
+                                         "MMX_PAVG(B|W)irr",
+                                         "MMX_P(MAX|MIN)(UB|SW)irr",
+                                         "MMX_PSIGN(B|D|W)rr",
+                                         "MMX_PACK(SSDW|SSWB|USWB)irr",
+                                         "MMX_PUNPCKH(BW|DQ|WD)irr")>;
+
+def AtomWrite01_2 : SchedWriteRes<[AtomPort01]> {
+  let Latency = 2;
+  let ResourceCycles = [2];
+}
+def : InstRW<[AtomWrite01_2], (instrs LEAVE, LEAVE64, POP16r,
+                                      PUSH16rmm, PUSH32rmm, PUSH64rmm,
+                                      LODSB, LODSL, LODSQ, LODSW,
+                                      SCASB, SCASL, SCASQ, SCASW,
+                                      SHLD32rrCL, SHRD32rrCL,
+                                      SHLD32rri8, SHRD32rri8)>;
+def : InstRW<[AtomWrite01_2], (instregex "BT(C|R|S)(16|32|64)mi8",
+                                         "PUSH(CS|DS|ES|FS|GS|SS)(16|32|64)",
+                                         "XADD(8|16|32|64)rr",
+                                         "XCHG(8|16|32|64)(ar|rr)",
+                                         "(ST|ISTT)_F(P)?(16|32|64)?(m|rr)",
+                                         "MMX_P(ADD|SUB)Qirr",
+                                         "MOV(S|Z)X16rr8",
+                                         "MOV(UPS|UPD|DQU)mr",
+                                         "MASKMOVDQU(64)?",
+                                         "P(ADD|SUB)Qrr")>;
+
+def AtomWrite01_3 : SchedWriteRes<[AtomPort01]> {
+  let Latency = 3;
+  let ResourceCycles = [3];
+}
+def : InstRW<[AtomWrite01_3], (instrs CLD, LDDQUrm,
+                                      CMPSB, CMPSL, CMPSQ, CMPSW,
+                                      MOVSB, MOVSL, MOVSQ, MOVSW,
+                                      POP16rmm, POP32rmm, POP64rmm)>;
+def : InstRW<[AtomWrite01_3], (instregex "XADD(8|16|32|64)rm",
+                                         "XCHG(8|16|32|64)rm",
+                                         "(MMX_)?PH(ADD|SUB)Drr",
+                                         "MOV(S|Z)X16rm8",
+                                         "MMX_P(ADD|SUB)Qirm",
+                                         "MOV(UPS|UPD|DQU)rm",
+                                         "P(ADD|SUB)Qrm")>;
+
+def AtomWrite01_4 : SchedWriteRes<[AtomPort01]> {
+  let Latency = 4;
+  let ResourceCycles = [4];
+}
+def : InstRW<[AtomWrite01_4], (instrs CBW, CWD, CWDE, CDQ, CDQE, CQO,
+                                      JCXZ, JECXZ, JRCXZ,
+                                      SHLD32mrCL, SHRD32mrCL,
+                                      SHLD32mri8, SHRD32mri8,
+                                      LD_F80m,
+                                      MMX_PSADBWirr, MMX_PSADBWirm)>;
+def : InstRW<[AtomWrite01_4], (instregex "(MMX_)?PH(ADD|SUB)Drm",
+                                         "(MMX_)?PEXTRWrr(_REV)?")>;
+
+def AtomWrite01_5 : SchedWriteRes<[AtomPort01]> {
+  let Latency = 5;
+  let ResourceCycles = [5];
+}
+def : InstRW<[AtomWrite01_5], (instrs FLDCW16m, LDMXCSR,
+                                      MMX_EMMS)>;
+def : InstRW<[AtomWrite01_5], (instregex "ST_FP80m",
+                                         "MMX_PH(ADD|SUB)S?Wrr")>;
+
+def AtomWrite01_6 : SchedWriteRes<[AtomPort01]> {
+  let Latency = 6;
+  let ResourceCycles = [6];
+}
+def : InstRW<[AtomWrite01_6], (instrs LD_F1, CMPXCHG8rm, INTO, XLAT,
+                                      SHLD16rrCL, SHRD16rrCL,
+                                      SHLD16rri8, SHRD16rri8,
+                                      SHLD16mrCL, SHRD16mrCL,
+                                      SHLD16mri8, SHRD16mri8,
+                                      ADDSUBPDrr, ADDSUBPDrm,
+                                      CVTPS2DQrr, CVTTPS2DQrr)>;
+def : InstRW<[AtomWrite01_6], (instregex "IMUL16rr",
+                                         "IST_F(P)?(16|32|64)?m",
+                                         "MMX_PH(ADD|SUB)S?Wrm",
+                                         "(ADD|SUB|MAX|MIN)PDrr",
+                                         "CMPPDrri")>;
+
+def AtomWrite01_7 : SchedWriteRes<[AtomPort01]> {
+  let Latency = 7;
+  let ResourceCycles = [7];
+}
+def : InstRW<[AtomWrite01_7], (instrs AAD8i8,
+                                      CVTDQ2PDrr,
+                                      CVTPD2DQrr,
+                                      CVTPD2PSrr,
+                                      CVTPS2DQrm,
+                                      CVTPS2PDrr,
+                                      CVTTPD2DQrr,
+                                      CVTTPS2DQrm,
+                                      MMX_CVTPD2PIirr,
+                                      MMX_CVTPI2PDirr,
+                                      MMX_CVTTPD2PIirr)>;
+def : InstRW<[AtomWrite01_7], (instregex "(ADD|SUB|MAX|MIN)PDrm",
+                                         "CMPPDrmi")>;
+
+def AtomWrite01_8 : SchedWriteRes<[AtomPort01]> {
+  let Latency = 8;
+  let ResourceCycles = [8];
+}
+def : InstRW<[AtomWrite01_8], (instrs LOOPE,
+                                      PUSHA16, PUSHA32,
+                                      SHLD64rrCL, SHRD64rrCL,
+                                      FNSTCW16m,
+                                      CVTDQ2PDrm,
+                                      CVTPD2DQrm,
+                                      CVTPD2PSrm,
+                                      CVTPS2PDrm,
+                                      CVTTPD2DQrm,
+                                      MMX_CVTPD2PIirm,
+                                      MMX_CVTPI2PDirm,
+                                      MMX_CVTTPD2PIirm)>;
+
+def AtomWrite01_9 : SchedWriteRes<[AtomPort01]> {
+  let Latency = 9;
+  let ResourceCycles = [9];
+}
+def : InstRW<[AtomWrite01_9], (instrs BT16mr, BT32mr, BT64mr,
+                                      POPA16, POPA32,
+                                      PUSHF16, PUSHF32, PUSHF64,
+                                      SHLD64mrCL, SHRD64mrCL,
+                                      SHLD64mri8, SHRD64mri8,
+                                      SHLD64rri8, SHRD64rri8,
+                                      CMPXCHG8rr,
+                                      MULPDrr, RCPPSr, RSQRTPSr)>;
+def : InstRW<[AtomWrite01_9], (instregex "CMOV(B|BE|E|P|NB|NBE|NE|NP)_F",
+                                         "(U)?COM_FI", "TST_F",
+                                         "(U)?COMIS(D|S)rr",
+                                         "CVT(T)?SS2SI64rr(_Int)?")>;
+
+def AtomWrite01_10 : SchedWriteRes<[AtomPort01]> {
+  let Latency = 10;
+  let ResourceCycles = [10];
+}
+def : InstRW<[AtomWrite01_10], (instrs FLDL2E, FLDL2T, FLDLG2, FLDLN2, FLDPI,
+                                       MULPDrm, RCPPSm, RSQRTPSm)>;
+def : InstRW<[AtomWrite01_10], (instregex "(U)?COMIS(D|S)rm",
+                                          "CVT(T)?SS2SI64rm(_Int)?")>;
+
+def AtomWrite01_11 : SchedWriteRes<[AtomPort01]> {
+  let Latency = 11;
+  let ResourceCycles = [11];
+}
+def : InstRW<[AtomWrite01_11], (instrs BOUNDS16rm, BOUNDS32rm)>;
+def : InstRW<[AtomWrite01_11], (instregex "BT(C|R|S)(16|32|64)mr")>;
+
+def AtomWrite01_13 : SchedWriteRes<[AtomPort01]> {
+  let Latency = 13;
+  let ResourceCycles = [13];
+}
+def : InstRW<[AtomWrite01_13], (instrs AAA, AAS)>;
+
+def AtomWrite01_14 : SchedWriteRes<[AtomPort01]> {
+  let Latency = 14;
+  let ResourceCycles = [14];
+}
+def : InstRW<[AtomWrite01_14], (instrs CMPXCHG16rm, CMPXCHG32rm, CMPXCHG64rm)>;
+
+def AtomWrite01_15 : SchedWriteRes<[AtomPort01]> {
+  let Latency = 15;
+  let ResourceCycles = [15];
+}
+def : InstRW<[AtomWrite01_15], (instrs CMPXCHG16rr, CMPXCHG32rr, CMPXCHG64rr,
+                                       STMXCSR)>;
+
+def AtomWrite01_17 : SchedWriteRes<[AtomPort01]> {
+  let Latency = 17;
+  let ResourceCycles = [17];
+}
+def : InstRW<[AtomWrite01_17], (instrs LOOPNE, PAUSE)>;
+
+def AtomWrite01_18 : SchedWriteRes<[AtomPort01]> {
+  let Latency = 18;
+  let ResourceCycles = [18];
+}
+def : InstRW<[AtomWrite01_18], (instrs CMPXCHG8B, DAA, LOOP)>;
+
+def AtomWrite01_20 : SchedWriteRes<[AtomPort01]> {
+  let Latency = 20;
+  let ResourceCycles = [20];
+}
+def : InstRW<[AtomWrite01_20], (instrs DAS)>;
+
+def AtomWrite01_21 : SchedWriteRes<[AtomPort01]> {
+  let Latency = 21;
+  let ResourceCycles = [21];
+}
+def : InstRW<[AtomWrite01_21], (instrs AAM8i8, STD)>;
+
+def AtomWrite01_22 : SchedWriteRes<[AtomPort01]> {
+  let Latency = 22;
+  let ResourceCycles = [22];
+}
+def : InstRW<[AtomWrite01_22], (instrs CMPXCHG16B)>;
+
+def AtomWrite01_23 : SchedWriteRes<[AtomPort01]> {
+  let Latency = 23;
+  let ResourceCycles = [23];
+}
+def : InstRW<[AtomWrite01_23], (instrs ARPL16mr, ARPL16rr)>;
 
-  let Itineraries = AtomItineraries;
+def AtomWrite01_25 : SchedWriteRes<[AtomPort01]> {
+  let Latency = 25;
+  let ResourceCycles = [25];
 }
+def : InstRW<[AtomWrite01_25], (instrs FNCLEX, FXTRACT)>;
+
+def AtomWrite01_26 : SchedWriteRes<[AtomPort01]> {
+  let Latency = 26;
+  let ResourceCycles = [26];
+}
+def : InstRW<[AtomWrite01_26], (instrs POPF32, POPF64)>;
+
+def AtomWrite01_29 : SchedWriteRes<[AtomPort01]> {
+  let Latency = 29;
+  let ResourceCycles = [29];
+}
+def : InstRW<[AtomWrite01_29], (instregex "POP(DS|ES|FS|GS)(16|32|64)")>;
+
+def AtomWrite01_30 : SchedWriteRes<[AtomPort01]> {
+  let Latency = 30;
+  let ResourceCycles = [30];
+}
+def : InstRW<[AtomWrite01_30], (instrs RDTSC, RDTSCP)>;
+
+def AtomWrite01_32 : SchedWriteRes<[AtomPort01]> {
+  let Latency = 32;
+  let ResourceCycles = [32];
+}
+def : InstRW<[AtomWrite01_32], (instrs ENTER, POPF16)>;
+
+def AtomWrite01_45 : SchedWriteRes<[AtomPort01]> {
+  let Latency = 45;
+  let ResourceCycles = [45];
+}
+def : InstRW<[AtomWrite01_45], (instrs MONITORrrr)>;
+
+def AtomWrite01_46 : SchedWriteRes<[AtomPort01]> {
+  let Latency = 46;
+  let ResourceCycles = [46];
+}
+def : InstRW<[AtomWrite01_46], (instrs FRNDINT, MWAITrr, RDPMC)>;
+
+def AtomWrite01_48 : SchedWriteRes<[AtomPort01]> {
+  let Latency = 48;
+  let ResourceCycles = [48];
+}
+def : InstRW<[AtomWrite01_48], (instrs POPSS16, POPSS32)>;
+
+def AtomWrite01_55 : SchedWriteRes<[AtomPort01]> {
+  let Latency = 55;
+  let ResourceCycles = [55];
+}
+def : InstRW<[AtomWrite01_55], (instrs FPREM)>;
+
+def AtomWrite01_59 : SchedWriteRes<[AtomPort01]> {
+  let Latency = 59;
+  let ResourceCycles = [59];
+}
+def : InstRW<[AtomWrite01_59], (instrs INSB, INSL, INSW)>;
+
+def AtomWrite01_62 : SchedWriteRes<[AtomPort01]> {
+  let Latency = 62;
+  let ResourceCycles = [62];
+}
+def : InstRW<[AtomWrite01_62], (instregex "DIVSD(r|m)(_Int)?",
+                                          "SQRTSD(r|m)(_Int)?")>;
+
+def AtomWrite01_63 : SchedWriteRes<[AtomPort01]> {
+  let Latency = 63;
+  let ResourceCycles = [63];
+}
+def : InstRW<[AtomWrite01_63], (instrs FNINIT)>;
+
+def AtomWrite01_68 : SchedWriteRes<[AtomPort01]> {
+  let Latency = 68;
+  let ResourceCycles = [68];
+}
+def : InstRW<[AtomWrite01_68], (instrs OUT8rr, OUT16rr, OUT32rr)>;
+
+def AtomWrite01_70 : SchedWriteRes<[AtomPort01]> {
+  let Latency = 70;
+  let ResourceCycles = [70];
+}
+def : InstRW<[AtomWrite01_70], (instrs DIVPSrr, DIVPSrm, SQRTPSr, SQRTPSm)>;
+
+def AtomWrite01_71 : SchedWriteRes<[AtomPort01]> {
+  let Latency = 71;
+  let ResourceCycles = [71];
+}
+def : InstRW<[AtomWrite01_71], (instrs FPREM1,
+                                       INVLPG, INVLPGA32, INVLPGA64)>;
+def : InstRW<[AtomWrite01_71], (instregex "SQRT_F")>;
+
+def AtomWrite01_72 : SchedWriteRes<[AtomPort01]> {
+  let Latency = 72;
+  let ResourceCycles = [72];
+}
+def : InstRW<[AtomWrite01_72], (instrs OUT8ir, OUT16ir, OUT32ir)>;
+
+def AtomWrite01_74 : SchedWriteRes<[AtomPort01]> {
+  let Latency = 74;
+  let ResourceCycles = [74];
+}
+def : InstRW<[AtomWrite01_74], (instrs OUTSB, OUTSL, OUTSW)>;
+
+def AtomWrite01_77 : SchedWriteRes<[AtomPort01]> {
+  let Latency = 77;
+  let ResourceCycles = [77];
+}
+def : InstRW<[AtomWrite01_77], (instrs FSCALE)>;
+
+def AtomWrite01_78 : SchedWriteRes<[AtomPort01]> {
+  let Latency = 78;
+  let ResourceCycles = [78];
+}
+def : InstRW<[AtomWrite01_78], (instrs RDMSR)>;
+
+def AtomWrite01_79 : SchedWriteRes<[AtomPort01]> {
+  let Latency = 79;
+  let ResourceCycles = [79];
+}
+def : InstRW<[AtomWrite01_79], (instregex "RET(L|Q|W)?$",
+                                          "LRETI?(L|Q|W)")>;
+
+def AtomWrite01_92 : SchedWriteRes<[AtomPort01]> {
+  let Latency = 92;
+  let ResourceCycles = [92];
+}
+def : InstRW<[AtomWrite01_92], (instrs IN8ri, IN16ri, IN32ri)>;
+
+def AtomWrite01_94 : SchedWriteRes<[AtomPort01]> {
+  let Latency = 94;
+  let ResourceCycles = [94];
+}
+def : InstRW<[AtomWrite01_94], (instrs IN8rr, IN16rr, IN32rr)>;
+
+def AtomWrite01_99 : SchedWriteRes<[AtomPort01]> {
+  let Latency = 99;
+  let ResourceCycles = [99];
+}
+def : InstRW<[AtomWrite01_99], (instrs F2XM1)>;
+
+def AtomWrite01_121 : SchedWriteRes<[AtomPort01]> {
+  let Latency = 121;
+  let ResourceCycles = [121];
+}
+def : InstRW<[AtomWrite01_121], (instrs CPUID)>;
+
+def AtomWrite01_125 : SchedWriteRes<[AtomPort01]> {
+  let Latency = 125;
+  let ResourceCycles = [125];
+}
+def : InstRW<[AtomWrite01_125], (instrs DIVPDrr, DIVPDrm, SQRTPDr, SQRTPDm)>;
+
+def AtomWrite01_127 : SchedWriteRes<[AtomPort01]> {
+  let Latency = 127;
+  let ResourceCycles = [127];
+}
+def : InstRW<[AtomWrite01_127], (instrs INT)>;
+
+def AtomWrite01_130 : SchedWriteRes<[AtomPort01]> {
+  let Latency = 130;
+  let ResourceCycles = [130];
+}
+def : InstRW<[AtomWrite01_130], (instrs INT3)>;
+
+def AtomWrite01_140 : SchedWriteRes<[AtomPort01]> {
+  let Latency = 140;
+  let ResourceCycles = [140];
+}
+def : InstRW<[AtomWrite01_140], (instrs FXSAVE, FXSAVE64)>;
+
+def AtomWrite01_141 : SchedWriteRes<[AtomPort01]> {
+  let Latency = 141;
+  let ResourceCycles = [141];
+}
+def : InstRW<[AtomWrite01_141], (instrs FXRSTOR, FXRSTOR64)>;
+
+def AtomWrite01_146 : SchedWriteRes<[AtomPort01]> {
+  let Latency = 146;
+  let ResourceCycles = [146];
+}
+def : InstRW<[AtomWrite01_146], (instrs FYL2X)>;
+
+def AtomWrite01_147 : SchedWriteRes<[AtomPort01]> {
+  let Latency = 147;
+  let ResourceCycles = [147];
+}
+def : InstRW<[AtomWrite01_147], (instrs FYL2XP1)>;
+
+def AtomWrite01_168 : SchedWriteRes<[AtomPort01]> {
+  let Latency = 168;
+  let ResourceCycles = [168];
+}
+def : InstRW<[AtomWrite01_168], (instrs FPTAN)>;
+
+def AtomWrite01_174 : SchedWriteRes<[AtomPort01]> {
+  let Latency = 174;
+  let ResourceCycles = [174];
+}
+def : InstRW<[AtomWrite01_174], (instrs FSINCOS)>;
+def : InstRW<[AtomWrite01_174], (instregex "(COS|SIN)_F")>;
+
+def AtomWrite01_183 : SchedWriteRes<[AtomPort01]> {
+  let Latency = 183;
+  let ResourceCycles = [183];
+}
+def : InstRW<[AtomWrite01_183], (instrs FPATAN)>;
+
+def AtomWrite01_202 : SchedWriteRes<[AtomPort01]> {
+  let Latency = 202;
+  let ResourceCycles = [202];
+}
+def : InstRW<[AtomWrite01_202], (instrs WRMSR)>;
+
+} // SchedModel
index 2400261..c594354 100644 (file)
@@ -7,8 +7,10 @@
 ; CHECK-NEXT: jne
 
 ; ATOM-LABEL: t:
-; ATOM: movl (%r9,%r{{.+}},4), %e{{..}}
+; ATOM: movl (%r9,%r{{.+}},4), %r{{..}}
+; ATOM-NEXT: xorl
 ; ATOM-NEXT: testq
+; ATOM-NEXT: movl
 ; ATOM-NEXT: jne
 
 @Te0 = external global [256 x i32]             ; <[256 x i32]*> [#uses=5]
index 1d4cb3c..beea001 100644 (file)
@@ -1,5 +1,5 @@
 ; RUN: llc -mcpu=generic -mtriple=x86_64-unknown-linux-gnu -relocation-model=static -asm-verbose=false < %s | FileCheck %s
-; RUN: llc -mcpu=atom -mtriple=x86_64-unknown-linux-gnu -relocation-model=static -asm-verbose=false < %s | FileCheck -check-prefix=ATOM %s
+; RUN: llc -mcpu=atom -mtriple=x86_64-unknown-linux-gnu -relocation-model=static -asm-verbose=false < %s | FileCheck %s
 
 ; CHECK: xorl  %eax, %eax
 ; CHECK: movsd .LCPI0_0(%rip), %xmm0
 ; CHECK-NEXT: movsd
 ; CHECK-NEXT: incq %rax
 
-
-; ATOM: movsd .LCPI0_0(%rip), %xmm0
-; ATOM: xorl  %eax, %eax
-; ATOM: align
-; ATOM-NEXT: BB0_2:
-; ATOM-NEXT: movsd A(,%rax,8)
-; ATOM-NEXT: mulsd
-; ATOM-NEXT: movsd
-; ATOM-NEXT: incq %rax
-
 @A = external global [0 x double]
 
 define void @foo(i64 %n) nounwind {
index 65271fd..53cd12e 100644 (file)
@@ -647,11 +647,11 @@ define i32 @test_movd(x86_mmx %a0, i32 %a1, i32 *%a2) {
 ;
 ; ATOM-LABEL: test_movd:
 ; ATOM:       # %bb.0:
-; ATOM-NEXT:    movd (%rsi), %mm1 # sched: [1:1.00]
-; ATOM-NEXT:    movd %edi, %mm2 # sched: [1:1.00]
-; ATOM-NEXT:    paddd %mm2, %mm1 # sched: [1:0.50]
-; ATOM-NEXT:    paddd %mm1, %mm0 # sched: [1:0.50]
-; ATOM-NEXT:    movd %mm1, %ecx # sched: [3:3.00]
+; ATOM-NEXT:    movd %edi, %mm1 # sched: [1:1.00]
+; ATOM-NEXT:    movd (%rsi), %mm2 # sched: [1:1.00]
+; ATOM-NEXT:    paddd %mm1, %mm2 # sched: [1:0.50]
+; ATOM-NEXT:    paddd %mm2, %mm0 # sched: [1:0.50]
+; ATOM-NEXT:    movd %mm2, %ecx # sched: [3:3.00]
 ; ATOM-NEXT:    movd %mm0, %eax # sched: [3:3.00]
 ; ATOM-NEXT:    movl %ecx, (%rsi) # sched: [1:1.00]
 ; ATOM-NEXT:    retq # sched: [79:39.50]
@@ -3509,8 +3509,8 @@ define i64 @test_pinsrw(x86_mmx %a0, i32 %a1, i16* %a2) optsize {
 ;
 ; ATOM-LABEL: test_pinsrw:
 ; ATOM:       # %bb.0:
-; ATOM-NEXT:    movswl (%rsi), %eax # sched: [1:1.00]
 ; ATOM-NEXT:    pinsrw $0, %edi, %mm0 # sched: [1:1.00]
+; ATOM-NEXT:    movswl (%rsi), %eax # sched: [1:1.00]
 ; ATOM-NEXT:    pinsrw $1, %eax, %mm0 # sched: [1:1.00]
 ; ATOM-NEXT:    movq %mm0, %rax # sched: [3:3.00]
 ; ATOM-NEXT:    retq # sched: [79:39.50]
index 6f5c403..bcd1c6b 100644 (file)
@@ -1220,7 +1220,7 @@ define void @test_into() optsize {
 ; ATOM-LABEL: test_into:
 ; ATOM:       # %bb.0:
 ; ATOM-NEXT:    #APP
-; ATOM-NEXT:    into # sched: [0:?]
+; ATOM-NEXT:    into # sched: [6:3.00]
 ; ATOM-NEXT:    #NO_APP
 ; ATOM-NEXT:    retl # sched: [79:39.50]
 ;
index db49c55..859591f 100644 (file)
@@ -15737,7 +15737,7 @@ define void @test_ud2() optsize {
 ; ATOM-LABEL: test_ud2:
 ; ATOM:       # %bb.0:
 ; ATOM-NEXT:    #APP
-; ATOM-NEXT:    ud2 # sched: [0:?]
+; ATOM-NEXT:    ud2 # sched: [100:0.50]
 ; ATOM-NEXT:    #NO_APP
 ; ATOM-NEXT:    retq # sched: [79:39.50]
 ;
index e7ee265..7e881de 100644 (file)
@@ -104,14 +104,23 @@ declare i1 @return_false()
 
 ;; Select between two floating point constants.
 define float @test3(i32 %x) nounwind readnone {
-; CHECK-LABEL: test3:
-; CHECK:       ## %bb.0: ## %entry
-; CHECK-NEXT:    xorl %eax, %eax
-; CHECK-NEXT:    testl %edi, %edi
-; CHECK-NEXT:    sete %al
-; CHECK-NEXT:    leaq {{.*}}(%rip), %rcx
-; CHECK-NEXT:    movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
-; CHECK-NEXT:    retq
+; GENERIC-LABEL: test3:
+; GENERIC:       ## %bb.0: ## %entry
+; GENERIC-NEXT:    xorl %eax, %eax
+; GENERIC-NEXT:    testl %edi, %edi
+; GENERIC-NEXT:    sete %al
+; GENERIC-NEXT:    leaq {{.*}}(%rip), %rcx
+; GENERIC-NEXT:    movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
+; GENERIC-NEXT:    retq
+;
+; ATOM-LABEL: test3:
+; ATOM:       ## %bb.0: ## %entry
+; ATOM-NEXT:    xorl %eax, %eax
+; ATOM-NEXT:    leaq {{.*}}(%rip), %rcx
+; ATOM-NEXT:    testl %edi, %edi
+; ATOM-NEXT:    sete %al
+; ATOM-NEXT:    movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
+; ATOM-NEXT:    retq
 ;
 ; MCU-LABEL: test3:
 ; MCU:       # %bb.0: # %entry
@@ -266,15 +275,25 @@ define void @test6(i32 %C, <4 x float>* %A, <4 x float>* %B) nounwind {
 
 ; Select with fp80's
 define x86_fp80 @test7(i32 %tmp8) nounwind {
-; CHECK-LABEL: test7:
-; CHECK:       ## %bb.0:
-; CHECK-NEXT:    xorl %eax, %eax
-; CHECK-NEXT:    testl %edi, %edi
-; CHECK-NEXT:    setns %al
-; CHECK-NEXT:    shlq $4, %rax
-; CHECK-NEXT:    leaq {{.*}}(%rip), %rcx
-; CHECK-NEXT:    fldt (%rax,%rcx)
-; CHECK-NEXT:    retq
+; GENERIC-LABEL: test7:
+; GENERIC:       ## %bb.0:
+; GENERIC-NEXT:    xorl %eax, %eax
+; GENERIC-NEXT:    testl %edi, %edi
+; GENERIC-NEXT:    setns %al
+; GENERIC-NEXT:    shlq $4, %rax
+; GENERIC-NEXT:    leaq {{.*}}(%rip), %rcx
+; GENERIC-NEXT:    fldt (%rax,%rcx)
+; GENERIC-NEXT:    retq
+;
+; ATOM-LABEL: test7:
+; ATOM:       ## %bb.0:
+; ATOM-NEXT:    xorl %eax, %eax
+; ATOM-NEXT:    leaq {{.*}}(%rip), %rcx
+; ATOM-NEXT:    testl %edi, %edi
+; ATOM-NEXT:    setns %al
+; ATOM-NEXT:    shlq $4, %rax
+; ATOM-NEXT:    fldt (%rax,%rcx)
+; ATOM-NEXT:    retq
 ;
 ; MCU-LABEL: test7:
 ; MCU:       # %bb.0:
@@ -330,31 +349,32 @@ define void @test8(i1 %c, <6 x i32>* %dst.addr, <6 x i32> %src1,<6 x i32> %src2)
 ; ATOM-NEXT:    testb $1, %dil
 ; ATOM-NEXT:    jne LBB7_1
 ; ATOM-NEXT:  ## %bb.2:
-; ATOM-NEXT:    movd {{.*#+}} xmm0 = mem[0],zero,zero,zero
+; ATOM-NEXT:    movd {{.*#+}} xmm1 = mem[0],zero,zero,zero
 ; ATOM-NEXT:    movd {{.*#+}} xmm2 = mem[0],zero,zero,zero
 ; ATOM-NEXT:    movd {{.*#+}} xmm3 = mem[0],zero,zero,zero
-; ATOM-NEXT:    movd {{.*#+}} xmm4 = mem[0],zero,zero,zero
-; ATOM-NEXT:    punpckldq {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1]
 ; ATOM-NEXT:    movd {{.*#+}} xmm0 = mem[0],zero,zero,zero
+; ATOM-NEXT:    punpckldq {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1]
+; ATOM-NEXT:    punpckldq {{.*#+}} xmm0 = xmm0[0],xmm3[0],xmm0[1],xmm3[1]
+; ATOM-NEXT:    movd {{.*#+}} xmm3 = mem[0],zero,zero,zero
 ; ATOM-NEXT:    movd {{.*#+}} xmm1 = mem[0],zero,zero,zero
 ; ATOM-NEXT:    jmp LBB7_3
 ; ATOM-NEXT:  LBB7_1:
-; ATOM-NEXT:    movd %r9d, %xmm0
+; ATOM-NEXT:    movd %r9d, %xmm1
 ; ATOM-NEXT:    movd %r8d, %xmm2
-; ATOM-NEXT:    punpckldq {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1]
 ; ATOM-NEXT:    movd %ecx, %xmm3
 ; ATOM-NEXT:    movd %edx, %xmm0
-; ATOM-NEXT:    movd {{.*#+}} xmm4 = mem[0],zero,zero,zero
+; ATOM-NEXT:    punpckldq {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1]
+; ATOM-NEXT:    punpckldq {{.*#+}} xmm0 = xmm0[0],xmm3[0],xmm0[1],xmm3[1]
+; ATOM-NEXT:    movd {{.*#+}} xmm3 = mem[0],zero,zero,zero
 ; ATOM-NEXT:    movd {{.*#+}} xmm1 = mem[0],zero,zero,zero
 ; ATOM-NEXT:  LBB7_3:
-; ATOM-NEXT:    punpckldq {{.*#+}} xmm0 = xmm0[0],xmm3[0],xmm0[1],xmm3[1]
 ; ATOM-NEXT:    punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm2[0]
 ; ATOM-NEXT:    pcmpeqd %xmm2, %xmm2
-; ATOM-NEXT:    punpckldq {{.*#+}} xmm1 = xmm1[0],xmm4[0],xmm1[1],xmm4[1]
+; ATOM-NEXT:    punpckldq {{.*#+}} xmm1 = xmm1[0],xmm3[0],xmm1[1],xmm3[1]
 ; ATOM-NEXT:    paddd %xmm2, %xmm0
 ; ATOM-NEXT:    paddd %xmm2, %xmm1
-; ATOM-NEXT:    movdqa %xmm0, (%rsi)
 ; ATOM-NEXT:    movq %xmm1, 16(%rsi)
+; ATOM-NEXT:    movdqa %xmm0, (%rsi)
 ; ATOM-NEXT:    retq
 ;
 ; MCU-LABEL: test8:
@@ -634,8 +654,8 @@ define noalias i8* @test12(i64 %count) nounwind ssp noredzone {
 ; ATOM:       ## %bb.0: ## %entry
 ; ATOM-NEXT:    movq %rdi, %rax
 ; ATOM-NEXT:    movl $4, %ecx
-; ATOM-NEXT:    mulq %rcx
 ; ATOM-NEXT:    movq $-1, %rdi
+; ATOM-NEXT:    mulq %rcx
 ; ATOM-NEXT:    cmovnoq %rax, %rdi
 ; ATOM-NEXT:    jmp __Znam ## TAILCALL
 ;
@@ -894,8 +914,8 @@ define void @clamp_i8(i32 %src, i8* %dst) {
 ; ATOM:       ## %bb.0:
 ; ATOM-NEXT:    cmpl $127, %edi
 ; ATOM-NEXT:    movl $127, %eax
-; ATOM-NEXT:    cmovlel %edi, %eax
 ; ATOM-NEXT:    movb $-128, %cl
+; ATOM-NEXT:    cmovlel %edi, %eax
 ; ATOM-NEXT:    cmpl $-128, %eax
 ; ATOM-NEXT:    jl LBB22_2
 ; ATOM-NEXT:  ## %bb.1:
@@ -946,8 +966,8 @@ define void @clamp(i32 %src, i16* %dst) {
 ; ATOM:       ## %bb.0:
 ; ATOM-NEXT:    cmpl $32767, %edi ## imm = 0x7FFF
 ; ATOM-NEXT:    movl $32767, %eax ## imm = 0x7FFF
-; ATOM-NEXT:    cmovlel %edi, %eax
 ; ATOM-NEXT:    movl $32768, %ecx ## imm = 0x8000
+; ATOM-NEXT:    cmovlel %edi, %eax
 ; ATOM-NEXT:    cmpl $-32768, %eax ## imm = 0x8000
 ; ATOM-NEXT:    cmovgel %eax, %ecx
 ; ATOM-NEXT:    movw %cx, (%rsi)
index 7b6831a..dcd6643 100644 (file)
@@ -6133,8 +6133,6 @@ define <4 x float> @test_fnop() nounwind {
 ; ATOM-NEXT:    nop # sched: [1:0.50]
 ; ATOM-NEXT:    nop # sched: [1:0.50]
 ; ATOM-NEXT:    nop # sched: [1:0.50]
-; ATOM-NEXT:    nop # sched: [1:0.50]
-; ATOM-NEXT:    nop # sched: [1:0.50]
 ; ATOM-NEXT:    retq # sched: [79:39.50]
 ;
 ; SLM-LABEL: test_fnop:
index 5a1f1fb..76c5360 100644 (file)
@@ -4670,10 +4670,10 @@ define i64 @test_movd_64(<2 x i64> %a0, i64 %a1, i64 *%a2) {
 ; ATOM:       # %bb.0:
 ; ATOM-NEXT:    movq {{.*#+}} xmm1 = mem[0],zero sched: [1:1.00]
 ; ATOM-NEXT:    movq %rdi, %xmm2 # sched: [1:1.00]
-; ATOM-NEXT:    paddq %xmm0, %xmm2 # sched: [2:1.00]
 ; ATOM-NEXT:    paddq %xmm0, %xmm1 # sched: [2:1.00]
-; ATOM-NEXT:    movq %xmm2, (%rsi) # sched: [1:1.00]
+; ATOM-NEXT:    paddq %xmm0, %xmm2 # sched: [2:1.00]
 ; ATOM-NEXT:    movq %xmm1, %rax # sched: [3:3.00]
+; ATOM-NEXT:    movq %xmm2, (%rsi) # sched: [1:1.00]
 ; ATOM-NEXT:    retq # sched: [79:39.50]
 ;
 ; SLM-LABEL: test_movd_64:
@@ -10447,10 +10447,11 @@ define <4 x i32> @test_pshufd(<4 x i32> %a0, <4 x i32> *%a1) {
 ;
 ; ATOM-LABEL: test_pshufd:
 ; ATOM:       # %bb.0:
-; ATOM-NEXT:    pshufd {{.*#+}} xmm1 = mem[3,2,1,0] sched: [1:1.00]
-; ATOM-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[1,0,3,2] sched: [1:1.00]
-; ATOM-NEXT:    paddd %xmm0, %xmm1 # sched: [1:0.50]
-; ATOM-NEXT:    movdqa %xmm1, %xmm0 # sched: [1:0.50]
+; ATOM-NEXT:    pshufd {{.*#+}} xmm1 = xmm0[1,0,3,2] sched: [1:1.00]
+; ATOM-NEXT:    pshufd {{.*#+}} xmm0 = mem[3,2,1,0] sched: [1:1.00]
+; ATOM-NEXT:    paddd %xmm1, %xmm0 # sched: [1:0.50]
+; ATOM-NEXT:    nop # sched: [1:0.50]
+; ATOM-NEXT:    nop # sched: [1:0.50]
 ; ATOM-NEXT:    retq # sched: [79:39.50]
 ;
 ; SLM-LABEL: test_pshufd:
@@ -10575,10 +10576,11 @@ define <8 x i16> @test_pshufhw(<8 x i16> %a0, <8 x i16> *%a1) {
 ;
 ; ATOM-LABEL: test_pshufhw:
 ; ATOM:       # %bb.0:
-; ATOM-NEXT:    pshufhw {{.*#+}} xmm1 = mem[0,1,2,3,7,6,5,4] sched: [1:1.00]
-; ATOM-NEXT:    pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,5,4,7,6] sched: [1:1.00]
-; ATOM-NEXT:    paddw %xmm0, %xmm1 # sched: [1:0.50]
-; ATOM-NEXT:    movdqa %xmm1, %xmm0 # sched: [1:0.50]
+; ATOM-NEXT:    pshufhw {{.*#+}} xmm1 = xmm0[0,1,2,3,5,4,7,6] sched: [1:1.00]
+; ATOM-NEXT:    pshufhw {{.*#+}} xmm0 = mem[0,1,2,3,7,6,5,4] sched: [1:1.00]
+; ATOM-NEXT:    paddw %xmm1, %xmm0 # sched: [1:0.50]
+; ATOM-NEXT:    nop # sched: [1:0.50]
+; ATOM-NEXT:    nop # sched: [1:0.50]
 ; ATOM-NEXT:    retq # sched: [79:39.50]
 ;
 ; SLM-LABEL: test_pshufhw:
@@ -10703,10 +10705,11 @@ define <8 x i16> @test_pshuflw(<8 x i16> %a0, <8 x i16> *%a1) {
 ;
 ; ATOM-LABEL: test_pshuflw:
 ; ATOM:       # %bb.0:
-; ATOM-NEXT:    pshuflw {{.*#+}} xmm1 = mem[3,2,1,0,4,5,6,7] sched: [1:1.00]
-; ATOM-NEXT:    pshuflw {{.*#+}} xmm0 = xmm0[1,0,3,2,4,5,6,7] sched: [1:1.00]
-; ATOM-NEXT:    paddw %xmm0, %xmm1 # sched: [1:0.50]
-; ATOM-NEXT:    movdqa %xmm1, %xmm0 # sched: [1:0.50]
+; ATOM-NEXT:    pshuflw {{.*#+}} xmm1 = xmm0[1,0,3,2,4,5,6,7] sched: [1:1.00]
+; ATOM-NEXT:    pshuflw {{.*#+}} xmm0 = mem[3,2,1,0,4,5,6,7] sched: [1:1.00]
+; ATOM-NEXT:    paddw %xmm1, %xmm0 # sched: [1:0.50]
+; ATOM-NEXT:    nop # sched: [1:0.50]
+; ATOM-NEXT:    nop # sched: [1:0.50]
 ; ATOM-NEXT:    retq # sched: [79:39.50]
 ;
 ; SLM-LABEL: test_pshuflw:
index 50bdf20..8c2424e 100644 (file)
@@ -899,10 +899,9 @@ define <2 x double> @test_movddup(<2 x double> %a0, <2 x double> *%a1) {
 ;
 ; ATOM-LABEL: test_movddup:
 ; ATOM:       # %bb.0:
-; ATOM-NEXT:    movddup {{.*#+}} xmm1 = mem[0,0] sched: [1:1.00]
-; ATOM-NEXT:    movddup {{.*#+}} xmm0 = xmm0[0,0] sched: [1:1.00]
-; ATOM-NEXT:    subpd %xmm0, %xmm1 # sched: [6:3.00]
-; ATOM-NEXT:    movapd %xmm1, %xmm0 # sched: [1:0.50]
+; ATOM-NEXT:    movddup {{.*#+}} xmm1 = xmm0[0,0] sched: [1:1.00]
+; ATOM-NEXT:    movddup {{.*#+}} xmm0 = mem[0,0] sched: [1:1.00]
+; ATOM-NEXT:    subpd %xmm1, %xmm0 # sched: [6:3.00]
 ; ATOM-NEXT:    retq # sched: [79:39.50]
 ;
 ; SLM-LABEL: test_movddup:
@@ -1027,10 +1026,9 @@ define <4 x float> @test_movshdup(<4 x float> %a0, <4 x float> *%a1) {
 ;
 ; ATOM-LABEL: test_movshdup:
 ; ATOM:       # %bb.0:
-; ATOM-NEXT:    movshdup {{.*#+}} xmm1 = mem[1,1,3,3] sched: [1:1.00]
-; ATOM-NEXT:    movshdup {{.*#+}} xmm0 = xmm0[1,1,3,3] sched: [1:1.00]
-; ATOM-NEXT:    addps %xmm0, %xmm1 # sched: [5:5.00]
-; ATOM-NEXT:    movaps %xmm1, %xmm0 # sched: [1:0.50]
+; ATOM-NEXT:    movshdup {{.*#+}} xmm1 = xmm0[1,1,3,3] sched: [1:1.00]
+; ATOM-NEXT:    movshdup {{.*#+}} xmm0 = mem[1,1,3,3] sched: [1:1.00]
+; ATOM-NEXT:    addps %xmm1, %xmm0 # sched: [5:5.00]
 ; ATOM-NEXT:    retq # sched: [79:39.50]
 ;
 ; SLM-LABEL: test_movshdup:
@@ -1155,10 +1153,9 @@ define <4 x float> @test_movsldup(<4 x float> %a0, <4 x float> *%a1) {
 ;
 ; ATOM-LABEL: test_movsldup:
 ; ATOM:       # %bb.0:
-; ATOM-NEXT:    movsldup {{.*#+}} xmm1 = mem[0,0,2,2] sched: [1:1.00]
-; ATOM-NEXT:    movsldup {{.*#+}} xmm0 = xmm0[0,0,2,2] sched: [1:1.00]
-; ATOM-NEXT:    addps %xmm0, %xmm1 # sched: [5:5.00]
-; ATOM-NEXT:    movaps %xmm1, %xmm0 # sched: [1:0.50]
+; ATOM-NEXT:    movsldup {{.*#+}} xmm1 = xmm0[0,0,2,2] sched: [1:1.00]
+; ATOM-NEXT:    movsldup {{.*#+}} xmm0 = mem[0,0,2,2] sched: [1:1.00]
+; ATOM-NEXT:    addps %xmm1, %xmm0 # sched: [5:5.00]
 ; ATOM-NEXT:    retq # sched: [79:39.50]
 ;
 ; SLM-LABEL: test_movsldup:
index ffa7ef1..d617743 100644 (file)
@@ -29,10 +29,11 @@ define <16 x i8> @test_pabsb(<16 x i8> %a0, <16 x i8> *%a1) {
 ;
 ; ATOM-LABEL: test_pabsb:
 ; ATOM:       # %bb.0:
-; ATOM-NEXT:    pabsb (%rdi), %xmm1 # sched: [1:1.00]
-; ATOM-NEXT:    pabsb %xmm0, %xmm0 # sched: [1:0.50]
-; ATOM-NEXT:    por %xmm0, %xmm1 # sched: [1:0.50]
-; ATOM-NEXT:    movdqa %xmm1, %xmm0 # sched: [1:0.50]
+; ATOM-NEXT:    pabsb %xmm0, %xmm1 # sched: [1:0.50]
+; ATOM-NEXT:    pabsb (%rdi), %xmm0 # sched: [1:1.00]
+; ATOM-NEXT:    por %xmm1, %xmm0 # sched: [1:0.50]
+; ATOM-NEXT:    nop # sched: [1:0.50]
+; ATOM-NEXT:    nop # sched: [1:0.50]
 ; ATOM-NEXT:    retq # sched: [79:39.50]
 ;
 ; SLM-LABEL: test_pabsb:
@@ -157,10 +158,11 @@ define <4 x i32> @test_pabsd(<4 x i32> %a0, <4 x i32> *%a1) {
 ;
 ; ATOM-LABEL: test_pabsd:
 ; ATOM:       # %bb.0:
-; ATOM-NEXT:    pabsd (%rdi), %xmm1 # sched: [1:1.00]
-; ATOM-NEXT:    pabsd %xmm0, %xmm0 # sched: [1:0.50]
-; ATOM-NEXT:    por %xmm0, %xmm1 # sched: [1:0.50]
-; ATOM-NEXT:    movdqa %xmm1, %xmm0 # sched: [1:0.50]
+; ATOM-NEXT:    pabsd %xmm0, %xmm1 # sched: [1:0.50]
+; ATOM-NEXT:    pabsd (%rdi), %xmm0 # sched: [1:1.00]
+; ATOM-NEXT:    por %xmm1, %xmm0 # sched: [1:0.50]
+; ATOM-NEXT:    nop # sched: [1:0.50]
+; ATOM-NEXT:    nop # sched: [1:0.50]
 ; ATOM-NEXT:    retq # sched: [79:39.50]
 ;
 ; SLM-LABEL: test_pabsd:
@@ -285,10 +287,11 @@ define <8 x i16> @test_pabsw(<8 x i16> %a0, <8 x i16> *%a1) {
 ;
 ; ATOM-LABEL: test_pabsw:
 ; ATOM:       # %bb.0:
-; ATOM-NEXT:    pabsw (%rdi), %xmm1 # sched: [1:1.00]
-; ATOM-NEXT:    pabsw %xmm0, %xmm0 # sched: [1:0.50]
-; ATOM-NEXT:    por %xmm0, %xmm1 # sched: [1:0.50]
-; ATOM-NEXT:    movdqa %xmm1, %xmm0 # sched: [1:0.50]
+; ATOM-NEXT:    pabsw %xmm0, %xmm1 # sched: [1:0.50]
+; ATOM-NEXT:    pabsw (%rdi), %xmm0 # sched: [1:1.00]
+; ATOM-NEXT:    por %xmm1, %xmm0 # sched: [1:0.50]
+; ATOM-NEXT:    nop # sched: [1:0.50]
+; ATOM-NEXT:    nop # sched: [1:0.50]
 ; ATOM-NEXT:    retq # sched: [79:39.50]
 ;
 ; SLM-LABEL: test_pabsw:
index 41b62e8..5d01286 100644 (file)
@@ -177,10 +177,10 @@ define void @test_fadd(float *%a0, double *%a1) optsize {
 ; ATOM-NEXT:    movl {{[0-9]+}}(%esp), %eax # sched: [1:1.00]
 ; ATOM-NEXT:    movl {{[0-9]+}}(%esp), %ecx # sched: [1:1.00]
 ; ATOM-NEXT:    #APP
-; ATOM-NEXT:    fadd %st(0), %st(1) # sched: [0:?]
-; ATOM-NEXT:    fadd %st(2) # sched: [0:?]
-; ATOM-NEXT:    fadds (%ecx) # sched: [0:?]
-; ATOM-NEXT:    faddl (%eax) # sched: [0:?]
+; ATOM-NEXT:    fadd %st(0), %st(1) # sched: [5:5.00]
+; ATOM-NEXT:    fadd %st(2) # sched: [5:5.00]
+; ATOM-NEXT:    fadds (%ecx) # sched: [5:5.00]
+; ATOM-NEXT:    faddl (%eax) # sched: [5:5.00]
 ; ATOM-NEXT:    #NO_APP
 ; ATOM-NEXT:    retl # sched: [79:39.50]
 ;
@@ -301,10 +301,10 @@ define void @test_faddp_fiadd(i16 *%a0, i32 *%a1) optsize {
 ; ATOM-NEXT:    movl {{[0-9]+}}(%esp), %eax # sched: [1:1.00]
 ; ATOM-NEXT:    movl {{[0-9]+}}(%esp), %ecx # sched: [1:1.00]
 ; ATOM-NEXT:    #APP
-; ATOM-NEXT:    faddp %st(1) # sched: [0:?]
-; ATOM-NEXT:    faddp %st(2) # sched: [0:?]
-; ATOM-NEXT:    fiadds (%ecx) # sched: [0:?]
-; ATOM-NEXT:    fiaddl (%eax) # sched: [0:?]
+; ATOM-NEXT:    faddp %st(1) # sched: [5:5.00]
+; ATOM-NEXT:    faddp %st(2) # sched: [5:5.00]
+; ATOM-NEXT:    fiadds (%ecx) # sched: [5:5.00]
+; ATOM-NEXT:    fiaddl (%eax) # sched: [5:5.00]
 ; ATOM-NEXT:    #NO_APP
 ; ATOM-NEXT:    retl # sched: [79:39.50]
 ;
@@ -421,8 +421,8 @@ define void @test_fbld_fbstp(i8* %a0) optsize {
 ; ATOM:       # %bb.0:
 ; ATOM-NEXT:    movl {{[0-9]+}}(%esp), %eax # sched: [1:1.00]
 ; ATOM-NEXT:    #APP
-; ATOM-NEXT:    fbld (%eax) # sched: [0:?]
-; ATOM-NEXT:    fbstp (%eax) # sched: [0:?]
+; ATOM-NEXT:    fbld (%eax) # sched: [100:0.50]
+; ATOM-NEXT:    fbstp (%eax) # sched: [100:0.50]
 ; ATOM-NEXT:    #NO_APP
 ; ATOM-NEXT:    retl # sched: [79:39.50]
 ;
@@ -895,10 +895,10 @@ define void @test_fcom(float *%a0, double *%a1) optsize {
 ; ATOM-NEXT:    movl {{[0-9]+}}(%esp), %eax # sched: [1:1.00]
 ; ATOM-NEXT:    movl {{[0-9]+}}(%esp), %ecx # sched: [1:1.00]
 ; ATOM-NEXT:    #APP
-; ATOM-NEXT:    fcom %st(1) # sched: [0:?]
-; ATOM-NEXT:    fcom %st(3) # sched: [0:?]
-; ATOM-NEXT:    fcoms (%ecx) # sched: [0:?]
-; ATOM-NEXT:    fcoml (%eax) # sched: [0:?]
+; ATOM-NEXT:    fcom %st(1) # sched: [5:5.00]
+; ATOM-NEXT:    fcom %st(3) # sched: [5:5.00]
+; ATOM-NEXT:    fcoms (%ecx) # sched: [5:5.00]
+; ATOM-NEXT:    fcoml (%eax) # sched: [5:5.00]
 ; ATOM-NEXT:    #NO_APP
 ; ATOM-NEXT:    retl # sched: [79:39.50]
 ;
@@ -1020,10 +1020,10 @@ define void @test_fcomp_fcompp(float *%a0, double *%a1) optsize {
 ; ATOM-NEXT:    movl {{[0-9]+}}(%esp), %eax # sched: [1:1.00]
 ; ATOM-NEXT:    movl {{[0-9]+}}(%esp), %ecx # sched: [1:1.00]
 ; ATOM-NEXT:    #APP
-; ATOM-NEXT:    fcomp %st(1) # sched: [0:?]
-; ATOM-NEXT:    fcomp %st(3) # sched: [0:?]
-; ATOM-NEXT:    fcomps (%ecx) # sched: [0:?]
-; ATOM-NEXT:    fcompl (%eax) # sched: [0:?]
+; ATOM-NEXT:    fcomp %st(1) # sched: [5:5.00]
+; ATOM-NEXT:    fcomp %st(3) # sched: [5:5.00]
+; ATOM-NEXT:    fcomps (%ecx) # sched: [5:5.00]
+; ATOM-NEXT:    fcompl (%eax) # sched: [5:5.00]
 ; ATOM-NEXT:    fcompp # sched: [1:1.00]
 ; ATOM-NEXT:    #NO_APP
 ; ATOM-NEXT:    retl # sched: [79:39.50]
@@ -1385,10 +1385,10 @@ define void @test_fdiv(float *%a0, double *%a1) optsize {
 ; ATOM-NEXT:    movl {{[0-9]+}}(%esp), %eax # sched: [1:1.00]
 ; ATOM-NEXT:    movl {{[0-9]+}}(%esp), %ecx # sched: [1:1.00]
 ; ATOM-NEXT:    #APP
-; ATOM-NEXT:    fdiv %st(0), %st(1) # sched: [0:?]
-; ATOM-NEXT:    fdiv %st(2) # sched: [0:?]
-; ATOM-NEXT:    fdivs (%ecx) # sched: [0:?]
-; ATOM-NEXT:    fdivl (%eax) # sched: [0:?]
+; ATOM-NEXT:    fdiv %st(0), %st(1) # sched: [34:17.00]
+; ATOM-NEXT:    fdiv %st(2) # sched: [34:17.00]
+; ATOM-NEXT:    fdivs (%ecx) # sched: [34:17.00]
+; ATOM-NEXT:    fdivl (%eax) # sched: [34:17.00]
 ; ATOM-NEXT:    #NO_APP
 ; ATOM-NEXT:    retl # sched: [79:39.50]
 ;
@@ -1509,10 +1509,10 @@ define void @test_fdivp_fidiv(i16 *%a0, i32 *%a1) optsize {
 ; ATOM-NEXT:    movl {{[0-9]+}}(%esp), %eax # sched: [1:1.00]
 ; ATOM-NEXT:    movl {{[0-9]+}}(%esp), %ecx # sched: [1:1.00]
 ; ATOM-NEXT:    #APP
-; ATOM-NEXT:    fdivp %st(1) # sched: [0:?]
-; ATOM-NEXT:    fdivp %st(2) # sched: [0:?]
-; ATOM-NEXT:    fidivs (%ecx) # sched: [0:?]
-; ATOM-NEXT:    fidivl (%eax) # sched: [0:?]
+; ATOM-NEXT:    fdivp %st(1) # sched: [34:17.00]
+; ATOM-NEXT:    fdivp %st(2) # sched: [34:17.00]
+; ATOM-NEXT:    fidivs (%ecx) # sched: [34:17.00]
+; ATOM-NEXT:    fidivl (%eax) # sched: [34:17.00]
 ; ATOM-NEXT:    #NO_APP
 ; ATOM-NEXT:    retl # sched: [79:39.50]
 ;
@@ -1633,10 +1633,10 @@ define void @test_fdivr(float *%a0, double *%a1) optsize {
 ; ATOM-NEXT:    movl {{[0-9]+}}(%esp), %eax # sched: [1:1.00]
 ; ATOM-NEXT:    movl {{[0-9]+}}(%esp), %ecx # sched: [1:1.00]
 ; ATOM-NEXT:    #APP
-; ATOM-NEXT:    fdivr %st(0), %st(1) # sched: [0:?]
-; ATOM-NEXT:    fdivr %st(2) # sched: [0:?]
-; ATOM-NEXT:    fdivrs (%ecx) # sched: [0:?]
-; ATOM-NEXT:    fdivrl (%eax) # sched: [0:?]
+; ATOM-NEXT:    fdivr %st(0), %st(1) # sched: [34:17.00]
+; ATOM-NEXT:    fdivr %st(2) # sched: [34:17.00]
+; ATOM-NEXT:    fdivrs (%ecx) # sched: [34:17.00]
+; ATOM-NEXT:    fdivrl (%eax) # sched: [34:17.00]
 ; ATOM-NEXT:    #NO_APP
 ; ATOM-NEXT:    retl # sched: [79:39.50]
 ;
@@ -1757,10 +1757,10 @@ define void @test_fdivrp_fidivr(i16 *%a0, i32 *%a1) optsize {
 ; ATOM-NEXT:    movl {{[0-9]+}}(%esp), %eax # sched: [1:1.00]
 ; ATOM-NEXT:    movl {{[0-9]+}}(%esp), %ecx # sched: [1:1.00]
 ; ATOM-NEXT:    #APP
-; ATOM-NEXT:    fdivrp %st(1) # sched: [0:?]
-; ATOM-NEXT:    fdivrp %st(2) # sched: [0:?]
-; ATOM-NEXT:    fidivrs (%ecx) # sched: [0:?]
-; ATOM-NEXT:    fidivrl (%eax) # sched: [0:?]
+; ATOM-NEXT:    fdivrp %st(1) # sched: [34:17.00]
+; ATOM-NEXT:    fdivrp %st(2) # sched: [34:17.00]
+; ATOM-NEXT:    fidivrs (%ecx) # sched: [34:17.00]
+; ATOM-NEXT:    fidivrl (%eax) # sched: [34:17.00]
 ; ATOM-NEXT:    #NO_APP
 ; ATOM-NEXT:    retl # sched: [79:39.50]
 ;
@@ -1955,10 +1955,10 @@ define void @test_ficom(i16 *%a0, i32 *%a1) optsize {
 ; ATOM-NEXT:    movl {{[0-9]+}}(%esp), %eax # sched: [1:1.00]
 ; ATOM-NEXT:    movl {{[0-9]+}}(%esp), %ecx # sched: [1:1.00]
 ; ATOM-NEXT:    #APP
-; ATOM-NEXT:    ficoms (%ecx) # sched: [0:?]
-; ATOM-NEXT:    ficoml (%eax) # sched: [0:?]
-; ATOM-NEXT:    ficomps (%ecx) # sched: [0:?]
-; ATOM-NEXT:    ficompl (%eax) # sched: [0:?]
+; ATOM-NEXT:    ficoms (%ecx) # sched: [5:5.00]
+; ATOM-NEXT:    ficoml (%eax) # sched: [5:5.00]
+; ATOM-NEXT:    ficomps (%ecx) # sched: [5:5.00]
+; ATOM-NEXT:    ficompl (%eax) # sched: [5:5.00]
 ; ATOM-NEXT:    #NO_APP
 ; ATOM-NEXT:    retl # sched: [79:39.50]
 ;
@@ -2740,7 +2740,7 @@ define void @test_fldcw_fldenv(i8* %a0) optsize {
 ; ATOM-NEXT:    movl {{[0-9]+}}(%esp), %eax # sched: [1:1.00]
 ; ATOM-NEXT:    #APP
 ; ATOM-NEXT:    fldcw (%eax) # sched: [5:2.50]
-; ATOM-NEXT:    fldenv (%eax) # sched: [0:?]
+; ATOM-NEXT:    fldenv (%eax) # sched: [100:0.50]
 ; ATOM-NEXT:    #NO_APP
 ; ATOM-NEXT:    retl # sched: [79:39.50]
 ;
@@ -2961,10 +2961,10 @@ define void @test_fmul(float *%a0, double *%a1) optsize {
 ; ATOM-NEXT:    movl {{[0-9]+}}(%esp), %eax # sched: [1:1.00]
 ; ATOM-NEXT:    movl {{[0-9]+}}(%esp), %ecx # sched: [1:1.00]
 ; ATOM-NEXT:    #APP
-; ATOM-NEXT:    fmul %st(0), %st(1) # sched: [0:?]
-; ATOM-NEXT:    fmul %st(2) # sched: [0:?]
-; ATOM-NEXT:    fmuls (%ecx) # sched: [0:?]
-; ATOM-NEXT:    fmull (%eax) # sched: [0:?]
+; ATOM-NEXT:    fmul %st(0), %st(1) # sched: [4:4.00]
+; ATOM-NEXT:    fmul %st(2) # sched: [4:4.00]
+; ATOM-NEXT:    fmuls (%ecx) # sched: [4:4.00]
+; ATOM-NEXT:    fmull (%eax) # sched: [4:4.00]
 ; ATOM-NEXT:    #NO_APP
 ; ATOM-NEXT:    retl # sched: [79:39.50]
 ;
@@ -3085,10 +3085,10 @@ define void @test_fmulp_fimul(i16 *%a0, i32 *%a1) optsize {
 ; ATOM-NEXT:    movl {{[0-9]+}}(%esp), %eax # sched: [1:1.00]
 ; ATOM-NEXT:    movl {{[0-9]+}}(%esp), %ecx # sched: [1:1.00]
 ; ATOM-NEXT:    #APP
-; ATOM-NEXT:    fmulp %st(1) # sched: [0:?]
-; ATOM-NEXT:    fmulp %st(2) # sched: [0:?]
-; ATOM-NEXT:    fimuls (%ecx) # sched: [0:?]
-; ATOM-NEXT:    fimull (%eax) # sched: [0:?]
+; ATOM-NEXT:    fmulp %st(1) # sched: [4:4.00]
+; ATOM-NEXT:    fmulp %st(2) # sched: [4:4.00]
+; ATOM-NEXT:    fimuls (%ecx) # sched: [4:4.00]
+; ATOM-NEXT:    fimull (%eax) # sched: [4:4.00]
 ; ATOM-NEXT:    #NO_APP
 ; ATOM-NEXT:    retl # sched: [79:39.50]
 ;
@@ -3584,7 +3584,7 @@ define void @test_frstor(i8* %a0) optsize {
 ; ATOM:       # %bb.0:
 ; ATOM-NEXT:    movl {{[0-9]+}}(%esp), %eax # sched: [1:1.00]
 ; ATOM-NEXT:    #APP
-; ATOM-NEXT:    frstor (%eax) # sched: [0:?]
+; ATOM-NEXT:    frstor (%eax) # sched: [100:0.50]
 ; ATOM-NEXT:    #NO_APP
 ; ATOM-NEXT:    retl # sched: [79:39.50]
 ;
@@ -3670,7 +3670,7 @@ define void @test_fsave(i8* %a0) optsize {
 ; ATOM-NEXT:    movl {{[0-9]+}}(%esp), %eax # sched: [1:1.00]
 ; ATOM-NEXT:    #APP
 ; ATOM-NEXT:    wait # sched: [1:0.50]
-; ATOM-NEXT:    fnsave (%eax) # sched: [0:?]
+; ATOM-NEXT:    fnsave (%eax) # sched: [100:0.50]
 ; ATOM-NEXT:    #NO_APP
 ; ATOM-NEXT:    retl # sched: [79:39.50]
 ;
@@ -3762,7 +3762,7 @@ define void @test_fnsave(i8* %a0) optsize {
 ; ATOM:       # %bb.0:
 ; ATOM-NEXT:    movl {{[0-9]+}}(%esp), %eax # sched: [1:1.00]
 ; ATOM-NEXT:    #APP
-; ATOM-NEXT:    fnsave (%eax) # sched: [0:?]
+; ATOM-NEXT:    fnsave (%eax) # sched: [100:0.50]
 ; ATOM-NEXT:    #NO_APP
 ; ATOM-NEXT:    retl # sched: [79:39.50]
 ;
@@ -4314,9 +4314,9 @@ define void @test_fstcw_fstenv_fstsw(i8* %a0) optsize {
 ; ATOM-NEXT:    wait # sched: [1:0.50]
 ; ATOM-NEXT:    fnstcw (%eax) # sched: [8:4.00]
 ; ATOM-NEXT:    wait # sched: [1:0.50]
-; ATOM-NEXT:    fnstenv (%eax) # sched: [0:?]
+; ATOM-NEXT:    fnstenv (%eax) # sched: [100:0.50]
 ; ATOM-NEXT:    wait # sched: [1:0.50]
-; ATOM-NEXT:    fnstsw (%eax) # sched: [0:?]
+; ATOM-NEXT:    fnstsw (%eax) # sched: [100:0.50]
 ; ATOM-NEXT:    #NO_APP
 ; ATOM-NEXT:    retl # sched: [79:39.50]
 ;
@@ -4443,8 +4443,8 @@ define void @test_fnstcw_fnstenv_fnstsw(i8* %a0) optsize {
 ; ATOM-NEXT:    movl {{[0-9]+}}(%esp), %eax # sched: [1:1.00]
 ; ATOM-NEXT:    #APP
 ; ATOM-NEXT:    fnstcw (%eax) # sched: [8:4.00]
-; ATOM-NEXT:    fnstenv (%eax) # sched: [0:?]
-; ATOM-NEXT:    fnstsw (%eax) # sched: [0:?]
+; ATOM-NEXT:    fnstenv (%eax) # sched: [100:0.50]
+; ATOM-NEXT:    fnstsw (%eax) # sched: [100:0.50]
 ; ATOM-NEXT:    #NO_APP
 ; ATOM-NEXT:    retl # sched: [79:39.50]
 ;
@@ -4549,10 +4549,10 @@ define void @test_fsub(float *%a0, double *%a1) optsize {
 ; ATOM-NEXT:    movl {{[0-9]+}}(%esp), %eax # sched: [1:1.00]
 ; ATOM-NEXT:    movl {{[0-9]+}}(%esp), %ecx # sched: [1:1.00]
 ; ATOM-NEXT:    #APP
-; ATOM-NEXT:    fsub %st(0), %st(1) # sched: [0:?]
-; ATOM-NEXT:    fsub %st(2) # sched: [0:?]
-; ATOM-NEXT:    fsubs (%ecx) # sched: [0:?]
-; ATOM-NEXT:    fsubl (%eax) # sched: [0:?]
+; ATOM-NEXT:    fsub %st(0), %st(1) # sched: [5:5.00]
+; ATOM-NEXT:    fsub %st(2) # sched: [5:5.00]
+; ATOM-NEXT:    fsubs (%ecx) # sched: [5:5.00]
+; ATOM-NEXT:    fsubl (%eax) # sched: [5:5.00]
 ; ATOM-NEXT:    #NO_APP
 ; ATOM-NEXT:    retl # sched: [79:39.50]
 ;
@@ -4673,10 +4673,10 @@ define void @test_fsubp_fisub(i16 *%a0, i32 *%a1) optsize {
 ; ATOM-NEXT:    movl {{[0-9]+}}(%esp), %eax # sched: [1:1.00]
 ; ATOM-NEXT:    movl {{[0-9]+}}(%esp), %ecx # sched: [1:1.00]
 ; ATOM-NEXT:    #APP
-; ATOM-NEXT:    fsubp %st(1) # sched: [0:?]
-; ATOM-NEXT:    fsubp %st(2) # sched: [0:?]
-; ATOM-NEXT:    fisubs (%ecx) # sched: [0:?]
-; ATOM-NEXT:    fisubl (%eax) # sched: [0:?]
+; ATOM-NEXT:    fsubp %st(1) # sched: [5:5.00]
+; ATOM-NEXT:    fsubp %st(2) # sched: [5:5.00]
+; ATOM-NEXT:    fisubs (%ecx) # sched: [5:5.00]
+; ATOM-NEXT:    fisubl (%eax) # sched: [5:5.00]
 ; ATOM-NEXT:    #NO_APP
 ; ATOM-NEXT:    retl # sched: [79:39.50]
 ;
@@ -4797,10 +4797,10 @@ define void @test_fsubr(float *%a0, double *%a1) optsize {
 ; ATOM-NEXT:    movl {{[0-9]+}}(%esp), %eax # sched: [1:1.00]
 ; ATOM-NEXT:    movl {{[0-9]+}}(%esp), %ecx # sched: [1:1.00]
 ; ATOM-NEXT:    #APP
-; ATOM-NEXT:    fsubr %st(0), %st(1) # sched: [0:?]
-; ATOM-NEXT:    fsubr %st(2) # sched: [0:?]
-; ATOM-NEXT:    fsubrs (%ecx) # sched: [0:?]
-; ATOM-NEXT:    fsubrl (%eax) # sched: [0:?]
+; ATOM-NEXT:    fsubr %st(0), %st(1) # sched: [5:5.00]
+; ATOM-NEXT:    fsubr %st(2) # sched: [5:5.00]
+; ATOM-NEXT:    fsubrs (%ecx) # sched: [5:5.00]
+; ATOM-NEXT:    fsubrl (%eax) # sched: [5:5.00]
 ; ATOM-NEXT:    #NO_APP
 ; ATOM-NEXT:    retl # sched: [79:39.50]
 ;
@@ -4921,10 +4921,10 @@ define void @test_fsubrp_fisubr(i16 *%a0, i32 *%a1) optsize {
 ; ATOM-NEXT:    movl {{[0-9]+}}(%esp), %eax # sched: [1:1.00]
 ; ATOM-NEXT:    movl {{[0-9]+}}(%esp), %ecx # sched: [1:1.00]
 ; ATOM-NEXT:    #APP
-; ATOM-NEXT:    fsubrp %st(1) # sched: [0:?]
-; ATOM-NEXT:    fsubrp %st(2) # sched: [0:?]
-; ATOM-NEXT:    fisubrs (%ecx) # sched: [0:?]
-; ATOM-NEXT:    fisubrl (%eax) # sched: [0:?]
+; ATOM-NEXT:    fsubrp %st(1) # sched: [5:5.00]
+; ATOM-NEXT:    fsubrp %st(2) # sched: [5:5.00]
+; ATOM-NEXT:    fisubrs (%ecx) # sched: [5:5.00]
+; ATOM-NEXT:    fisubrl (%eax) # sched: [5:5.00]
 ; ATOM-NEXT:    #NO_APP
 ; ATOM-NEXT:    retl # sched: [79:39.50]
 ;