From: Samuel Parker Date: Wed, 15 Feb 2023 10:32:16 +0000 (+0000) Subject: [DAGCombine] Fold redundant select X-Git-Tag: upstream/17.0.6~17431 X-Git-Url: http://review.tizen.org/git/?a=commitdiff_plain;h=c7f9344d0f8f6a00adab138037e2e7b406ef2b69;p=platform%2Fupstream%2Fllvm.git [DAGCombine] Fold redundant select Recommit bbdf24357932b064f2aa18ea1356b474e0220dde. Original commit message: If a chain of two selects share a true/false value and are controlled by two setcc nodes, that are never both true, we can fold away one of the selects. So, the following: (select (setcc X, const0, eq), Y, (select (setcc X, const1, eq), Z, Y)) Can be combined to: select (setcc X, const1, eq) Z, Y Differential Revision: https://reviews.llvm.org/D142535 --- diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp index 8862aef..a64026f 100644 --- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp @@ -11228,6 +11228,67 @@ SDValue DAGCombiner::visitSELECT(SDNode *N) { } } + // If we have a chain of two selects, which share a true/false value and + // both are controlled from the two setcc nodes which cannot produce the + // same value, we can fold away N. + // select (setcc X), Y, (select (setcc X), Z, Y) -> select (setcc X), Z, Y + auto IsSelect = [](SDValue Op) { + return Op->getOpcode() == ISD::SELECT; + }; + if ((IsSelect(N1) || IsSelect(N2)) && (N1.getOpcode() != N2.getOpcode())) { + auto AreSame = [](SDValue Op0, SDValue Op1) { + if (Op0 == Op1) + return true; + auto *C0 = dyn_cast(Op0); + auto *C1 = dyn_cast(Op1); + return C0 && C1 && + APInt::isSameValue(C0->getAPIntValue(), C1->getAPIntValue()); + }; + + SDValue OtherSelect; + bool SelectsShareOp = false; + if (IsSelect(N1)) { + OtherSelect = N1; + SelectsShareOp = AreSame(OtherSelect.getOperand(1), N2); + } else { + OtherSelect = N2; + SelectsShareOp = AreSame(OtherSelect.getOperand(2), N1); + } + + auto CanNeverBeEqual = [](SDValue SetCC0, SDValue SetCC1) { + if (SetCC0->getOpcode() != ISD::SETCC || + SetCC1->getOpcode() != ISD::SETCC || + SetCC0->getOperand(0) != SetCC1->getOperand(0)) + return false; + + ISD::CondCode CC0 = cast(SetCC0.getOperand(2))->get(); + ISD::CondCode CC1 = cast(SetCC1.getOperand(2))->get(); + auto *C0 = dyn_cast(SetCC0.getOperand(1)); + auto *C1 = dyn_cast(SetCC1.getOperand(1)); + if (!C0 || !C1) + return false; + + bool AreInverse = ISD::getSetCCInverse(CC0, C0->getValueType(0)) == CC1; + bool ConstantsAreSame = + APInt::isSameValue(C0->getAPIntValue(), C1->getAPIntValue()); + auto IsEqual = [](ISD::CondCode CC) { + return CC == ISD::SETEQ; + }; + + if (ConstantsAreSame && AreInverse) + return true; + if (!ConstantsAreSame && IsEqual(CC0) && IsEqual(CC1)) + return true; + + return false; + }; + + SDValue SetCC0 = N0; + SDValue SetCC1 = OtherSelect.getOperand(0); + if (SelectsShareOp && CanNeverBeEqual(SetCC0, SetCC1)) + return OtherSelect; + } + if (TLI.isOperationLegal(ISD::SELECT_CC, VT) || (!LegalOperations && TLI.isOperationLegalOrCustom(ISD::SELECT_CC, VT))) { diff --git a/llvm/test/CodeGen/AArch64/redundant-select.ll b/llvm/test/CodeGen/AArch64/redundant-select.ll new file mode 100644 index 0000000..0ebe738 --- /dev/null +++ b/llvm/test/CodeGen/AArch64/redundant-select.ll @@ -0,0 +1,77 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -mtriple=aarch64 %s -o - | FileCheck %s + +define i64 @not_redundant_ugt_eq(i64 %arg0, i64 %arg1, i64 %arg2, i64 %arg3) { +; CHECK-LABEL: not_redundant_ugt_eq: +; CHECK: // %bb.0: +; CHECK-NEXT: cmp x0, #40 +; CHECK-NEXT: csel x8, x1, x2, eq +; CHECK-NEXT: csel x8, x8, x1, hi +; CHECK-NEXT: sub x0, x8, x3 +; CHECK-NEXT: ret + %ugt = icmp ugt i64 %arg0, 40 + %eq = icmp eq i64 %arg0, 40 + %select = select i1 %eq, i64 %arg1, i64 %arg2 + %select.1 = select i1 %ugt, i64 %select, i64 %arg1 + %res = sub i64 %select.1, %arg3 + ret i64 %res +} +define i64 @redundant_ugt_ule(i64 %arg0, i64 %arg1, i64 %arg2, i64 %arg3) { +; CHECK-LABEL: redundant_ugt_ule: +; CHECK: // %bb.0: +; CHECK-NEXT: cmp x0, #41 +; CHECK-NEXT: csel x8, x1, x2, lo +; CHECK-NEXT: sub x0, x8, x3 +; CHECK-NEXT: ret + %ugt = icmp ugt i64 %arg0, 40 + %ule = icmp ule i64 %arg0, 40 + %select = select i1 %ule, i64 %arg1, i64 %arg2 + %select.1 = select i1 %ugt, i64 %select, i64 %arg1 + %res = sub i64 %select.1, %arg3 + ret i64 %res +} +define i64 @not_redundant_uge_ule(i64 %arg0, i64 %arg1, i64 %arg2, i64 %arg3) { +; CHECK-LABEL: not_redundant_uge_ule: +; CHECK: // %bb.0: +; CHECK-NEXT: cmp x0, #41 +; CHECK-NEXT: csel x8, x1, x2, lo +; CHECK-NEXT: cmp x0, #39 +; CHECK-NEXT: csel x8, x8, x1, hi +; CHECK-NEXT: sub x0, x8, x3 +; CHECK-NEXT: ret + %uge = icmp uge i64 %arg0, 40 + %ule = icmp ule i64 %arg0, 40 + %select = select i1 %ule, i64 %arg1, i64 %arg2 + %select.1 = select i1 %uge, i64 %select, i64 %arg1 + %res = sub i64 %select.1, %arg3 + ret i64 %res +} +define i64 @not_redundant_eq_eq(i64 %arg0, i64 %arg1, i64 %arg2, i64 %arg3) { +; CHECK-LABEL: not_redundant_eq_eq: +; CHECK: // %bb.0: +; CHECK-NEXT: cmp x0, #40 +; CHECK-NEXT: csel x8, x1, x2, eq +; CHECK-NEXT: csel x8, x8, x1, eq +; CHECK-NEXT: sub x0, x8, x3 +; CHECK-NEXT: ret + %ugt = icmp eq i64 %arg0, 40 + %eq = icmp eq i64 %arg0, 40 + %select = select i1 %eq, i64 %arg1, i64 %arg2 + %select.1 = select i1 %ugt, i64 %select, i64 %arg1 + %res = sub i64 %select.1, %arg3 + ret i64 %res +} +define i64 @redundant_ne_eq(i64 %arg0, i64 %arg1, i64 %arg2, i64 %arg3) { +; CHECK-LABEL: redundant_ne_eq: +; CHECK: // %bb.0: +; CHECK-NEXT: cmp x0, #40 +; CHECK-NEXT: csel x8, x1, x2, eq +; CHECK-NEXT: sub x0, x8, x3 +; CHECK-NEXT: ret + %ne = icmp ne i64 %arg0, 40 + %eq = icmp eq i64 %arg0, 40 + %select = select i1 %eq, i64 %arg1, i64 %arg2 + %select.1 = select i1 %ne, i64 %select, i64 %arg1 + %res = sub i64 %select.1, %arg3 + ret i64 %res +} diff --git a/llvm/test/CodeGen/RISCV/fpclamptosat_vec.ll b/llvm/test/CodeGen/RISCV/fpclamptosat_vec.ll index 77faf67..d3669c2 100644 --- a/llvm/test/CodeGen/RISCV/fpclamptosat_vec.ll +++ b/llvm/test/CodeGen/RISCV/fpclamptosat_vec.ll @@ -5572,11 +5572,7 @@ define <2 x i64> @utest_f64i64_mm(<2 x double> %x) { ; CHECK-NOV-NEXT: mv s1, a1 ; CHECK-NOV-NEXT: fmv.d fa0, fs0 ; CHECK-NOV-NEXT: call __fixunsdfti@plt -; CHECK-NOV-NEXT: snez a2, a1 -; CHECK-NOV-NEXT: addi a2, a2, -1 -; CHECK-NOV-NEXT: and a0, a2, a0 -; CHECK-NOV-NEXT: addi a1, a1, -1 -; CHECK-NOV-NEXT: seqz a1, a1 +; CHECK-NOV-NEXT: snez a1, a1 ; CHECK-NOV-NEXT: addi a1, a1, -1 ; CHECK-NOV-NEXT: and a0, a1, a0 ; CHECK-NOV-NEXT: snez a1, s1 @@ -5627,11 +5623,7 @@ define <2 x i64> @utest_f64i64_mm(<2 x double> %x) { ; CHECK-V-NEXT: seqz a3, s1 ; CHECK-V-NEXT: addi a3, a3, -1 ; CHECK-V-NEXT: and a2, a3, a2 -; CHECK-V-NEXT: snez a3, a1 -; CHECK-V-NEXT: addi a3, a3, -1 -; CHECK-V-NEXT: and a0, a3, a0 -; CHECK-V-NEXT: addi a1, a1, -1 -; CHECK-V-NEXT: seqz a1, a1 +; CHECK-V-NEXT: snez a1, a1 ; CHECK-V-NEXT: addi a1, a1, -1 ; CHECK-V-NEXT: and a0, a1, a0 ; CHECK-V-NEXT: sd a0, 24(sp) @@ -6062,11 +6054,7 @@ define <2 x i64> @utest_f32i64_mm(<2 x float> %x) { ; CHECK-NOV-NEXT: mv s1, a1 ; CHECK-NOV-NEXT: fmv.s fa0, fs0 ; CHECK-NOV-NEXT: call __fixunssfti@plt -; CHECK-NOV-NEXT: snez a2, a1 -; CHECK-NOV-NEXT: addi a2, a2, -1 -; CHECK-NOV-NEXT: and a0, a2, a0 -; CHECK-NOV-NEXT: addi a1, a1, -1 -; CHECK-NOV-NEXT: seqz a1, a1 +; CHECK-NOV-NEXT: snez a1, a1 ; CHECK-NOV-NEXT: addi a1, a1, -1 ; CHECK-NOV-NEXT: and a0, a1, a0 ; CHECK-NOV-NEXT: snez a1, s1 @@ -6117,11 +6105,7 @@ define <2 x i64> @utest_f32i64_mm(<2 x float> %x) { ; CHECK-V-NEXT: seqz a3, s1 ; CHECK-V-NEXT: addi a3, a3, -1 ; CHECK-V-NEXT: and a2, a3, a2 -; CHECK-V-NEXT: snez a3, a1 -; CHECK-V-NEXT: addi a3, a3, -1 -; CHECK-V-NEXT: and a0, a3, a0 -; CHECK-V-NEXT: addi a1, a1, -1 -; CHECK-V-NEXT: seqz a1, a1 +; CHECK-V-NEXT: snez a1, a1 ; CHECK-V-NEXT: addi a1, a1, -1 ; CHECK-V-NEXT: and a0, a1, a0 ; CHECK-V-NEXT: sd a0, 24(sp) @@ -6547,11 +6531,7 @@ define <2 x i64> @utesth_f16i64_mm(<2 x half> %x) { ; CHECK-NOV-NEXT: mv a0, s0 ; CHECK-NOV-NEXT: call __extendhfsf2@plt ; CHECK-NOV-NEXT: call __fixunssfti@plt -; CHECK-NOV-NEXT: snez a2, a1 -; CHECK-NOV-NEXT: addi a2, a2, -1 -; CHECK-NOV-NEXT: and a0, a2, a0 -; CHECK-NOV-NEXT: addi a1, a1, -1 -; CHECK-NOV-NEXT: seqz a1, a1 +; CHECK-NOV-NEXT: snez a1, a1 ; CHECK-NOV-NEXT: addi a1, a1, -1 ; CHECK-NOV-NEXT: and a0, a1, a0 ; CHECK-NOV-NEXT: snez a1, s2 @@ -6580,8 +6560,7 @@ define <2 x i64> @utesth_f16i64_mm(<2 x half> %x) { ; CHECK-V-NEXT: .cfi_offset s0, -16 ; CHECK-V-NEXT: .cfi_offset s1, -24 ; CHECK-V-NEXT: .cfi_offset s2, -32 -; CHECK-V-NEXT: mv s0, a0 -; CHECK-V-NEXT: mv a0, a1 +; CHECK-V-NEXT: mv s0, a1 ; CHECK-V-NEXT: call __extendhfsf2@plt ; CHECK-V-NEXT: call __fixunssfti@plt ; CHECK-V-NEXT: mv s1, a0 @@ -6599,17 +6578,13 @@ define <2 x i64> @utesth_f16i64_mm(<2 x half> %x) { ; CHECK-V-NEXT: snez a1, s2 ; CHECK-V-NEXT: addi a1, a1, -1 ; CHECK-V-NEXT: and a1, a1, s1 -; CHECK-V-NEXT: addi s2, s2, -1 -; CHECK-V-NEXT: seqz a2, s2 -; CHECK-V-NEXT: addi a2, a2, -1 -; CHECK-V-NEXT: and a1, a2, a1 -; CHECK-V-NEXT: sd a1, 8(sp) -; CHECK-V-NEXT: sd a0, 0(sp) -; CHECK-V-NEXT: addi a0, sp, 8 -; CHECK-V-NEXT: vsetivli zero, 1, e64, m1, ta, ma -; CHECK-V-NEXT: vle64.v v9, (a0) +; CHECK-V-NEXT: sd a1, 0(sp) +; CHECK-V-NEXT: sd a0, 8(sp) ; CHECK-V-NEXT: mv a0, sp +; CHECK-V-NEXT: vsetivli zero, 1, e64, m1, ta, ma ; CHECK-V-NEXT: vle64.v v8, (a0) +; CHECK-V-NEXT: addi a0, sp, 8 +; CHECK-V-NEXT: vle64.v v9, (a0) ; CHECK-V-NEXT: vsetivli zero, 2, e64, m1, ta, ma ; CHECK-V-NEXT: vslideup.vi v8, v9, 1 ; CHECK-V-NEXT: ld ra, 40(sp) # 8-byte Folded Reload diff --git a/llvm/test/CodeGen/WebAssembly/fpclamptosat.ll b/llvm/test/CodeGen/WebAssembly/fpclamptosat.ll index 9a39c62..bd5fbdb 100644 --- a/llvm/test/CodeGen/WebAssembly/fpclamptosat.ll +++ b/llvm/test/CodeGen/WebAssembly/fpclamptosat.ll @@ -574,16 +574,11 @@ define i64 @utest_f64i64_cse_combine(double %x) #0 { ; CHECK-NEXT: i32.const 16 ; CHECK-NEXT: i32.add ; CHECK-NEXT: global.set __stack_pointer -; CHECK-NEXT: i64.const 0 ; CHECK-NEXT: local.get 3 ; CHECK-NEXT: i64.const 0 ; CHECK-NEXT: local.get 2 ; CHECK-NEXT: i64.eqz ; CHECK-NEXT: i64.select -; CHECK-NEXT: local.get 2 -; CHECK-NEXT: i64.const 1 -; CHECK-NEXT: i64.eq -; CHECK-NEXT: i64.select ; CHECK-NEXT: # fallthrough-return entry: %conv = fptoui double %x to i128 @@ -1509,16 +1504,11 @@ define i64 @utest_f64i64_mm(double %x) { ; CHECK-NEXT: i32.const 16 ; CHECK-NEXT: i32.add ; CHECK-NEXT: global.set __stack_pointer -; CHECK-NEXT: i64.const 0 ; CHECK-NEXT: local.get 3 ; CHECK-NEXT: i64.const 0 ; CHECK-NEXT: local.get 2 ; CHECK-NEXT: i64.eqz ; CHECK-NEXT: i64.select -; CHECK-NEXT: local.get 2 -; CHECK-NEXT: i64.const 1 -; CHECK-NEXT: i64.eq -; CHECK-NEXT: i64.select ; CHECK-NEXT: # fallthrough-return entry: %conv = fptoui double %x to i128 @@ -1627,16 +1617,11 @@ define i64 @utest_f32i64_mm(float %x) { ; CHECK-NEXT: i32.const 16 ; CHECK-NEXT: i32.add ; CHECK-NEXT: global.set __stack_pointer -; CHECK-NEXT: i64.const 0 ; CHECK-NEXT: local.get 3 ; CHECK-NEXT: i64.const 0 ; CHECK-NEXT: local.get 2 ; CHECK-NEXT: i64.eqz ; CHECK-NEXT: i64.select -; CHECK-NEXT: local.get 2 -; CHECK-NEXT: i64.const 1 -; CHECK-NEXT: i64.eq -; CHECK-NEXT: i64.select ; CHECK-NEXT: # fallthrough-return entry: %conv = fptoui float %x to i128 @@ -1749,16 +1734,11 @@ define i64 @utesth_f16i64_mm(half %x) { ; CHECK-NEXT: i32.const 16 ; CHECK-NEXT: i32.add ; CHECK-NEXT: global.set __stack_pointer -; CHECK-NEXT: i64.const 0 ; CHECK-NEXT: local.get 3 ; CHECK-NEXT: i64.const 0 ; CHECK-NEXT: local.get 2 ; CHECK-NEXT: i64.eqz ; CHECK-NEXT: i64.select -; CHECK-NEXT: local.get 2 -; CHECK-NEXT: i64.const 1 -; CHECK-NEXT: i64.eq -; CHECK-NEXT: i64.select ; CHECK-NEXT: # fallthrough-return entry: %conv = fptoui half %x to i128 diff --git a/llvm/test/CodeGen/WebAssembly/fpclamptosat_vec.ll b/llvm/test/CodeGen/WebAssembly/fpclamptosat_vec.ll index 007802d..18a90e6 100644 --- a/llvm/test/CodeGen/WebAssembly/fpclamptosat_vec.ll +++ b/llvm/test/CodeGen/WebAssembly/fpclamptosat_vec.ll @@ -2309,27 +2309,17 @@ define <2 x i64> @utest_f64i64_mm(<2 x double> %x) { ; CHECK-NEXT: i32.const 32 ; CHECK-NEXT: i32.add ; CHECK-NEXT: global.set __stack_pointer -; CHECK-NEXT: i64.const 0 ; CHECK-NEXT: local.get 5 ; CHECK-NEXT: i64.const 0 ; CHECK-NEXT: local.get 4 ; CHECK-NEXT: i64.eqz ; CHECK-NEXT: i64.select -; CHECK-NEXT: local.get 4 -; CHECK-NEXT: i64.const 1 -; CHECK-NEXT: i64.eq -; CHECK-NEXT: i64.select ; CHECK-NEXT: i64x2.splat -; CHECK-NEXT: i64.const 0 ; CHECK-NEXT: local.get 3 ; CHECK-NEXT: i64.const 0 ; CHECK-NEXT: local.get 2 ; CHECK-NEXT: i64.eqz ; CHECK-NEXT: i64.select -; CHECK-NEXT: local.get 2 -; CHECK-NEXT: i64.const 1 -; CHECK-NEXT: i64.eq -; CHECK-NEXT: i64.select ; CHECK-NEXT: i64x2.replace_lane 1 ; CHECK-NEXT: # fallthrough-return entry: @@ -2611,27 +2601,17 @@ define <2 x i64> @utest_f32i64_mm(<2 x float> %x) { ; CHECK-NEXT: i32.const 32 ; CHECK-NEXT: i32.add ; CHECK-NEXT: global.set __stack_pointer -; CHECK-NEXT: i64.const 0 ; CHECK-NEXT: local.get 5 ; CHECK-NEXT: i64.const 0 ; CHECK-NEXT: local.get 4 ; CHECK-NEXT: i64.eqz ; CHECK-NEXT: i64.select -; CHECK-NEXT: local.get 4 -; CHECK-NEXT: i64.const 1 -; CHECK-NEXT: i64.eq -; CHECK-NEXT: i64.select ; CHECK-NEXT: i64x2.splat -; CHECK-NEXT: i64.const 0 ; CHECK-NEXT: local.get 3 ; CHECK-NEXT: i64.const 0 ; CHECK-NEXT: local.get 2 ; CHECK-NEXT: i64.eqz ; CHECK-NEXT: i64.select -; CHECK-NEXT: local.get 2 -; CHECK-NEXT: i64.const 1 -; CHECK-NEXT: i64.eq -; CHECK-NEXT: i64.select ; CHECK-NEXT: i64x2.replace_lane 1 ; CHECK-NEXT: # fallthrough-return entry: @@ -2917,27 +2897,17 @@ define <2 x i64> @utesth_f16i64_mm(<2 x half> %x) { ; CHECK-NEXT: i32.const 32 ; CHECK-NEXT: i32.add ; CHECK-NEXT: global.set __stack_pointer -; CHECK-NEXT: i64.const 0 ; CHECK-NEXT: local.get 6 ; CHECK-NEXT: i64.const 0 ; CHECK-NEXT: local.get 5 ; CHECK-NEXT: i64.eqz ; CHECK-NEXT: i64.select -; CHECK-NEXT: local.get 5 -; CHECK-NEXT: i64.const 1 -; CHECK-NEXT: i64.eq -; CHECK-NEXT: i64.select ; CHECK-NEXT: i64x2.splat -; CHECK-NEXT: i64.const 0 ; CHECK-NEXT: local.get 4 ; CHECK-NEXT: i64.const 0 ; CHECK-NEXT: local.get 3 ; CHECK-NEXT: i64.eqz ; CHECK-NEXT: i64.select -; CHECK-NEXT: local.get 3 -; CHECK-NEXT: i64.const 1 -; CHECK-NEXT: i64.eq -; CHECK-NEXT: i64.select ; CHECK-NEXT: i64x2.replace_lane 1 ; CHECK-NEXT: # fallthrough-return entry: diff --git a/llvm/test/CodeGen/X86/fpclamptosat_vec.ll b/llvm/test/CodeGen/X86/fpclamptosat_vec.ll index c351c1b..e9c787c 100644 --- a/llvm/test/CodeGen/X86/fpclamptosat_vec.ll +++ b/llvm/test/CodeGen/X86/fpclamptosat_vec.ll @@ -2766,8 +2766,6 @@ define <2 x i64> @utest_f64i64_mm(<2 x double> %x) { ; CHECK-NEXT: cmoveq %rcx, %rax ; CHECK-NEXT: testq %r14, %r14 ; CHECK-NEXT: cmovneq %rcx, %rbx -; CHECK-NEXT: cmpq $1, %r14 -; CHECK-NEXT: cmoveq %rcx, %rbx ; CHECK-NEXT: movq %rbx, %xmm0 ; CHECK-NEXT: movq %rax, %xmm1 ; CHECK-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0] @@ -2930,8 +2928,6 @@ define <2 x i64> @utest_f32i64_mm(<2 x float> %x) { ; CHECK-NEXT: cmoveq %rcx, %rax ; CHECK-NEXT: testq %r14, %r14 ; CHECK-NEXT: cmovneq %rcx, %rbx -; CHECK-NEXT: cmpq $1, %r14 -; CHECK-NEXT: cmoveq %rcx, %rbx ; CHECK-NEXT: movq %rbx, %xmm0 ; CHECK-NEXT: movq %rax, %xmm1 ; CHECK-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0] @@ -3095,8 +3091,6 @@ define <2 x i64> @utesth_f16i64_mm(<2 x half> %x) { ; CHECK-NEXT: cmoveq %rcx, %rax ; CHECK-NEXT: testq %r14, %r14 ; CHECK-NEXT: cmovneq %rcx, %rbx -; CHECK-NEXT: cmpq $1, %r14 -; CHECK-NEXT: cmoveq %rcx, %rbx ; CHECK-NEXT: movq %rbx, %xmm0 ; CHECK-NEXT: movq %rax, %xmm1 ; CHECK-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]