From: Samuel Parker Date: Thu, 26 Jan 2023 10:26:24 +0000 (+0000) Subject: [NFC][WebAssembly] Updated tests X-Git-Tag: upstream/17.0.6~19536 X-Git-Url: http://review.tizen.org/git/?a=commitdiff_plain;h=41080b2fdd4b6c57d5a2926d6157b9847342b3a1;p=platform%2Fupstream%2Fllvm.git [NFC][WebAssembly] Updated tests Run update_llc_test_checks on a number of codegen tests. --- diff --git a/llvm/test/CodeGen/WebAssembly/comparisons-f32.ll b/llvm/test/CodeGen/WebAssembly/comparisons-f32.ll index f4144f8..b26b502 100644 --- a/llvm/test/CodeGen/WebAssembly/comparisons-f32.ll +++ b/llvm/test/CodeGen/WebAssembly/comparisons-f32.ll @@ -1,94 +1,118 @@ -; RUN: llc < %s -asm-verbose=false -disable-wasm-fallthrough-return-opt -wasm-keep-registers | FileCheck %s +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc < %s -disable-wasm-fallthrough-return-opt -wasm-keep-registers | FileCheck %s ; Test that basic 32-bit floating-point comparison operations assemble as ; expected. target triple = "wasm32-unknown-unknown" -; CHECK-LABEL: ord_f32: -; CHECK-NEXT: .functype ord_f32 (f32, f32) -> (i32){{$}} -; CHECK-NEXT: local.get $push[[L0:[0-9]+]]=, 0{{$}} -; CHECK-NEXT: local.get $push[[L1:[0-9]+]]=, 0{{$}} -; CHECK-NEXT: f32.eq $push[[NUM0:[0-9]+]]=, $pop[[L0]], $pop[[L1]]{{$}} -; CHECK-NEXT: local.get $push[[L2:[0-9]+]]=, 1{{$}} -; CHECK-NEXT: local.get $push[[L3:[0-9]+]]=, 1{{$}} -; CHECK-NEXT: f32.eq $push[[NUM1:[0-9]+]]=, $pop[[L2]], $pop[[L3]]{{$}} -; CHECK-NEXT: i32.and $push[[NUM2:[0-9]+]]=, $pop[[NUM0]], $pop[[NUM1]]{{$}} -; CHECK-NEXT: return $pop[[NUM2]]{{$}} define i32 @ord_f32(float %x, float %y) { +; CHECK-LABEL: ord_f32: +; CHECK: .functype ord_f32 (f32, f32) -> (i32) +; CHECK-NEXT: # %bb.0: +; CHECK-NEXT: local.get $push4=, 0 +; CHECK-NEXT: local.get $push3=, 0 +; CHECK-NEXT: f32.eq $push1=, $pop4, $pop3 +; CHECK-NEXT: local.get $push6=, 1 +; CHECK-NEXT: local.get $push5=, 1 +; CHECK-NEXT: f32.eq $push0=, $pop6, $pop5 +; CHECK-NEXT: i32.and $push2=, $pop1, $pop0 +; CHECK-NEXT: return $pop2 %a = fcmp ord float %x, %y %b = zext i1 %a to i32 ret i32 %b } -; CHECK-LABEL: uno_f32: -; CHECK-NEXT: .functype uno_f32 (f32, f32) -> (i32){{$}} -; CHECK-NEXT: local.get $push[[L0:[0-9]+]]=, 0{{$}} -; CHECK-NEXT: local.get $push[[L1:[0-9]+]]=, 0{{$}} -; CHECK-NEXT: f32.ne $push[[NUM0:[0-9]+]]=, $pop[[L0]], $pop[[L1]]{{$}} -; CHECK-NEXT: local.get $push[[L2:[0-9]+]]=, 1{{$}} -; CHECK-NEXT: local.get $push[[L3:[0-9]+]]=, 1{{$}} -; CHECK-NEXT: f32.ne $push[[NUM1:[0-9]+]]=, $pop[[L2]], $pop[[L3]]{{$}} -; CHECK-NEXT: i32.or $push[[NUM2:[0-9]+]]=, $pop[[NUM0]], $pop[[NUM1]]{{$}} -; CHECK-NEXT: return $pop[[NUM2]]{{$}} define i32 @uno_f32(float %x, float %y) { +; CHECK-LABEL: uno_f32: +; CHECK: .functype uno_f32 (f32, f32) -> (i32) +; CHECK-NEXT: # %bb.0: +; CHECK-NEXT: local.get $push4=, 0 +; CHECK-NEXT: local.get $push3=, 0 +; CHECK-NEXT: f32.ne $push1=, $pop4, $pop3 +; CHECK-NEXT: local.get $push6=, 1 +; CHECK-NEXT: local.get $push5=, 1 +; CHECK-NEXT: f32.ne $push0=, $pop6, $pop5 +; CHECK-NEXT: i32.or $push2=, $pop1, $pop0 +; CHECK-NEXT: return $pop2 %a = fcmp uno float %x, %y %b = zext i1 %a to i32 ret i32 %b } -; CHECK-LABEL: oeq_f32: -; CHECK-NEXT: .functype oeq_f32 (f32, f32) -> (i32){{$}} -; CHECK-NEXT: local.get $push[[L0:[0-9]+]]=, 0{{$}} -; CHECK-NEXT: local.get $push[[L1:[0-9]+]]=, 1{{$}} -; CHECK-NEXT: f32.eq $push[[NUM:[0-9]+]]=, $pop[[L0]], $pop[[L1]]{{$}} -; CHECK-NEXT: return $pop[[NUM]]{{$}} define i32 @oeq_f32(float %x, float %y) { +; CHECK-LABEL: oeq_f32: +; CHECK: .functype oeq_f32 (f32, f32) -> (i32) +; CHECK-NEXT: # %bb.0: +; CHECK-NEXT: local.get $push2=, 0 +; CHECK-NEXT: local.get $push1=, 1 +; CHECK-NEXT: f32.eq $push0=, $pop2, $pop1 +; CHECK-NEXT: return $pop0 %a = fcmp oeq float %x, %y %b = zext i1 %a to i32 ret i32 %b } -; CHECK-LABEL: une_f32: -; CHECK: f32.ne $push[[NUM:[0-9]+]]=, $pop[[L0]], $pop[[L1]]{{$}} -; CHECK-NEXT: return $pop[[NUM]]{{$}} define i32 @une_f32(float %x, float %y) { +; CHECK-LABEL: une_f32: +; CHECK: .functype une_f32 (f32, f32) -> (i32) +; CHECK-NEXT: # %bb.0: +; CHECK-NEXT: local.get $push2=, 0 +; CHECK-NEXT: local.get $push1=, 1 +; CHECK-NEXT: f32.ne $push0=, $pop2, $pop1 +; CHECK-NEXT: return $pop0 %a = fcmp une float %x, %y %b = zext i1 %a to i32 ret i32 %b } -; CHECK-LABEL: olt_f32: -; CHECK: f32.lt $push[[NUM:[0-9]+]]=, $pop[[L0]], $pop[[L1]]{{$}} -; CHECK-NEXT: return $pop[[NUM]]{{$}} define i32 @olt_f32(float %x, float %y) { +; CHECK-LABEL: olt_f32: +; CHECK: .functype olt_f32 (f32, f32) -> (i32) +; CHECK-NEXT: # %bb.0: +; CHECK-NEXT: local.get $push2=, 0 +; CHECK-NEXT: local.get $push1=, 1 +; CHECK-NEXT: f32.lt $push0=, $pop2, $pop1 +; CHECK-NEXT: return $pop0 %a = fcmp olt float %x, %y %b = zext i1 %a to i32 ret i32 %b } -; CHECK-LABEL: ole_f32: -; CHECK: f32.le $push[[NUM:[0-9]+]]=, $pop[[L0]], $pop[[L1]]{{$}} -; CHECK-NEXT: return $pop[[NUM]]{{$}} define i32 @ole_f32(float %x, float %y) { +; CHECK-LABEL: ole_f32: +; CHECK: .functype ole_f32 (f32, f32) -> (i32) +; CHECK-NEXT: # %bb.0: +; CHECK-NEXT: local.get $push2=, 0 +; CHECK-NEXT: local.get $push1=, 1 +; CHECK-NEXT: f32.le $push0=, $pop2, $pop1 +; CHECK-NEXT: return $pop0 %a = fcmp ole float %x, %y %b = zext i1 %a to i32 ret i32 %b } -; CHECK-LABEL: ogt_f32: -; CHECK: f32.gt $push[[NUM:[0-9]+]]=, $pop[[L0]], $pop[[L1]]{{$}} -; CHECK-NEXT: return $pop[[NUM]]{{$}} define i32 @ogt_f32(float %x, float %y) { +; CHECK-LABEL: ogt_f32: +; CHECK: .functype ogt_f32 (f32, f32) -> (i32) +; CHECK-NEXT: # %bb.0: +; CHECK-NEXT: local.get $push2=, 0 +; CHECK-NEXT: local.get $push1=, 1 +; CHECK-NEXT: f32.gt $push0=, $pop2, $pop1 +; CHECK-NEXT: return $pop0 %a = fcmp ogt float %x, %y %b = zext i1 %a to i32 ret i32 %b } -; CHECK-LABEL: oge_f32: -; CHECK: f32.ge $push[[NUM:[0-9]+]]=, $pop[[L0]], $pop[[L1]]{{$}} -; CHECK-NEXT: return $pop[[NUM]]{{$}} define i32 @oge_f32(float %x, float %y) { +; CHECK-LABEL: oge_f32: +; CHECK: .functype oge_f32 (f32, f32) -> (i32) +; CHECK-NEXT: # %bb.0: +; CHECK-NEXT: local.get $push2=, 0 +; CHECK-NEXT: local.get $push1=, 1 +; CHECK-NEXT: f32.ge $push0=, $pop2, $pop1 +; CHECK-NEXT: return $pop0 %a = fcmp oge float %x, %y %b = zext i1 %a to i32 ret i32 %b @@ -97,104 +121,117 @@ define i32 @oge_f32(float %x, float %y) { ; Expanded comparisons, which also check for NaN. ; These simply rely on SDAG's Expand cond code action. -; CHECK-LABEL: ueq_f32: -; CHECK-NEXT: .functype ueq_f32 (f32, f32) -> (i32){{$}} -; CHECK-NEXT: local.get $push[[L0:[0-9]+]]=, 0{{$}} -; CHECK-NEXT: local.get $push[[L1:[0-9]+]]=, 1{{$}} -; CHECK-NEXT: f32.gt $push[[NUM0:[0-9]+]]=, $pop[[L0]], $pop[[L1]]{{$}} -; CHECK-NEXT: local.get $push[[L2:[0-9]+]]=, 0{{$}} -; CHECK-NEXT: local.get $push[[L3:[0-9]+]]=, 1{{$}} -; CHECK-NEXT: f32.lt $push[[NUM1:[0-9]+]]=, $pop[[L2]], $pop[[L3]]{{$}} -; CHECK-NEXT: i32.or $push[[NUM2:[0-9]+]]=, $pop[[NUM0]], $pop[[NUM1]]{{$}} -; CHECK-NEXT: i32.const $push[[C0:[0-9]+]]=, 1 -; CHECK-NEXT: i32.xor $push[[NUM3:[0-9]+]]=, $pop[[NUM2]], $pop[[C0]]{{$}} -; CHECK-NEXT: return $pop[[NUM3]]{{$}} define i32 @ueq_f32(float %x, float %y) { +; CHECK-LABEL: ueq_f32: +; CHECK: .functype ueq_f32 (f32, f32) -> (i32) +; CHECK-NEXT: # %bb.0: +; CHECK-NEXT: local.get $push6=, 0 +; CHECK-NEXT: local.get $push5=, 1 +; CHECK-NEXT: f32.gt $push1=, $pop6, $pop5 +; CHECK-NEXT: local.get $push8=, 0 +; CHECK-NEXT: local.get $push7=, 1 +; CHECK-NEXT: f32.lt $push0=, $pop8, $pop7 +; CHECK-NEXT: i32.or $push2=, $pop1, $pop0 +; CHECK-NEXT: i32.const $push3=, 1 +; CHECK-NEXT: i32.xor $push4=, $pop2, $pop3 +; CHECK-NEXT: return $pop4 %a = fcmp ueq float %x, %y %b = zext i1 %a to i32 ret i32 %b } -; CHECK-LABEL: one_f32: -; CHECK-NEXT: .functype one_f32 (f32, f32) -> (i32){{$}} -; CHECK-NEXT: local.get $push[[L0:[0-9]+]]=, 0{{$}} -; CHECK-NEXT: local.get $push[[L1:[0-9]+]]=, 1{{$}} -; CHECK-NEXT: f32.gt $push[[NUM0:[0-9]+]]=, $pop[[L0]], $pop[[L1]]{{$}} -; CHECK-NEXT: local.get $push[[L2:[0-9]+]]=, 0{{$}} -; CHECK-NEXT: local.get $push[[L3:[0-9]+]]=, 1{{$}} -; CHECK-NEXT: f32.lt $push[[NUM1:[0-9]+]]=, $pop[[L2]], $pop[[L3]]{{$}} -; CHECK-NEXT: i32.or $push[[NUM4:[0-9]+]]=, $pop[[NUM0]], $pop[[NUM1]]{{$}} -; CHECK-NEXT: return $pop[[NUM4]] define i32 @one_f32(float %x, float %y) { +; CHECK-LABEL: one_f32: +; CHECK: .functype one_f32 (f32, f32) -> (i32) +; CHECK-NEXT: # %bb.0: +; CHECK-NEXT: local.get $push4=, 0 +; CHECK-NEXT: local.get $push3=, 1 +; CHECK-NEXT: f32.gt $push1=, $pop4, $pop3 +; CHECK-NEXT: local.get $push6=, 0 +; CHECK-NEXT: local.get $push5=, 1 +; CHECK-NEXT: f32.lt $push0=, $pop6, $pop5 +; CHECK-NEXT: i32.or $push2=, $pop1, $pop0 +; CHECK-NEXT: return $pop2 %a = fcmp one float %x, %y %b = zext i1 %a to i32 ret i32 %b } -; CHECK-LABEL: ult_f32: -; CHECK-NEXT: .functype ult_f32 (f32, f32) -> (i32){{$}} -; CHECK-NEXT: local.get $push[[L0:[0-9]+]]=, 0{{$}} -; CHECK-NEXT: local.get $push[[L1:[0-9]+]]=, 1{{$}} -; CHECK-NEXT: f32.ge $push[[NUM0:[0-9]+]]=, $pop[[L0]], $pop[[L1]]{{$}} -; CHECK-NEXT: i32.const $push[[C0:[0-9]+]]=, 1 -; CHECK-NEXT: i32.xor $push[[NUM2:[0-9]+]]=, $pop[[NUM0]], $pop[[C0]]{{$}} -; CHECK-NEXT: return $pop[[NUM2]]{{$}} define i32 @ult_f32(float %x, float %y) { +; CHECK-LABEL: ult_f32: +; CHECK: .functype ult_f32 (f32, f32) -> (i32) +; CHECK-NEXT: # %bb.0: +; CHECK-NEXT: local.get $push4=, 0 +; CHECK-NEXT: local.get $push3=, 1 +; CHECK-NEXT: f32.ge $push0=, $pop4, $pop3 +; CHECK-NEXT: i32.const $push1=, 1 +; CHECK-NEXT: i32.xor $push2=, $pop0, $pop1 +; CHECK-NEXT: return $pop2 %a = fcmp ult float %x, %y %b = zext i1 %a to i32 ret i32 %b } -; CHECK-LABEL: ule_f32: -; CHECK-NEXT: .functype ule_f32 (f32, f32) -> (i32){{$}} -; CHECK-NEXT: local.get $push[[L0:[0-9]+]]=, 0{{$}} -; CHECK-NEXT: local.get $push[[L1:[0-9]+]]=, 1{{$}} -; CHECK-NEXT: f32.gt $push[[NUM0:[0-9]+]]=, $pop[[L0]], $pop[[L1]]{{$}} -; CHECK-NEXT: i32.const $push[[C0:[0-9]+]]=, 1 -; CHECK-NEXT: i32.xor $push[[NUM2:[0-9]+]]=, $pop[[NUM0]], $pop[[C0]]{{$}} -; CHECK-NEXT: return $pop[[NUM2]]{{$}} define i32 @ule_f32(float %x, float %y) { +; CHECK-LABEL: ule_f32: +; CHECK: .functype ule_f32 (f32, f32) -> (i32) +; CHECK-NEXT: # %bb.0: +; CHECK-NEXT: local.get $push4=, 0 +; CHECK-NEXT: local.get $push3=, 1 +; CHECK-NEXT: f32.gt $push0=, $pop4, $pop3 +; CHECK-NEXT: i32.const $push1=, 1 +; CHECK-NEXT: i32.xor $push2=, $pop0, $pop1 +; CHECK-NEXT: return $pop2 %a = fcmp ule float %x, %y %b = zext i1 %a to i32 ret i32 %b } -; CHECK-LABEL: ugt_f32: -; CHECK-NEXT: .functype ugt_f32 (f32, f32) -> (i32){{$}} -; CHECK-NEXT: local.get $push[[L0:[0-9]+]]=, 0{{$}} -; CHECK-NEXT: local.get $push[[L1:[0-9]+]]=, 1{{$}} -; CHECK-NEXT: f32.le $push[[NUM0:[0-9]+]]=, $pop[[L0]], $pop[[L1]]{{$}} -; CHECK-NEXT: i32.const $push[[C0:[0-9]+]]=, 1 -; CHECK-NEXT: i32.xor $push[[NUM2:[0-9]+]]=, $pop[[NUM0]], $pop[[C0]]{{$}} -; CHECK-NEXT: return $pop[[NUM2]]{{$}} define i32 @ugt_f32(float %x, float %y) { +; CHECK-LABEL: ugt_f32: +; CHECK: .functype ugt_f32 (f32, f32) -> (i32) +; CHECK-NEXT: # %bb.0: +; CHECK-NEXT: local.get $push4=, 0 +; CHECK-NEXT: local.get $push3=, 1 +; CHECK-NEXT: f32.le $push0=, $pop4, $pop3 +; CHECK-NEXT: i32.const $push1=, 1 +; CHECK-NEXT: i32.xor $push2=, $pop0, $pop1 +; CHECK-NEXT: return $pop2 %a = fcmp ugt float %x, %y %b = zext i1 %a to i32 ret i32 %b } -; CHECK-LABEL: uge_f32: -; CHECK-NEXT: .functype uge_f32 (f32, f32) -> (i32){{$}} -; CHECK-NEXT: local.get $push[[L0:[0-9]+]]=, 0{{$}} -; CHECK-NEXT: local.get $push[[L1:[0-9]+]]=, 1{{$}} -; CHECK-NEXT: f32.lt $push[[NUM0:[0-9]+]]=, $pop[[L0]], $pop[[L1]]{{$}} -; CHECK-NEXT: i32.const $push[[C0:[0-9]+]]=, 1 -; CHECK-NEXT: i32.xor $push[[NUM2:[0-9]+]]=, $pop[[NUM0]], $pop[[C0]]{{$}} -; CHECK-NEXT: return $pop[[NUM2]]{{$}} define i32 @uge_f32(float %x, float %y) { +; CHECK-LABEL: uge_f32: +; CHECK: .functype uge_f32 (f32, f32) -> (i32) +; CHECK-NEXT: # %bb.0: +; CHECK-NEXT: local.get $push4=, 0 +; CHECK-NEXT: local.get $push3=, 1 +; CHECK-NEXT: f32.lt $push0=, $pop4, $pop3 +; CHECK-NEXT: i32.const $push1=, 1 +; CHECK-NEXT: i32.xor $push2=, $pop0, $pop1 +; CHECK-NEXT: return $pop2 %a = fcmp uge float %x, %y %b = zext i1 %a to i32 ret i32 %b } -; CHECK-LABEL: olt_f32_branch -; CHECK: local.get $push[[L4:[0-9]+]]=, 0 -; CHECK-NEXT: local.get $push[[L3:[0-9]+]]=, 1 -; CHECK-NEXT: f32.lt $push[[NUM0:[0-9]+]]=, $pop[[L4]], $pop[[L3]] -; CHECK-NEXT: i32.eqz $push[[NUM3:[0-9]+]]=, $pop[[NUM0]] -; CHECK-NEXT: br_if 0, $pop[[NUM3]] -; CHECK-NEXT: call call1 define void @olt_f32_branch(float %a, float %b) { +; CHECK-LABEL: olt_f32_branch: +; CHECK: .functype olt_f32_branch (f32, f32) -> () +; CHECK-NEXT: # %bb.0: # %entry +; CHECK-NEXT: block +; CHECK-NEXT: local.get $push2=, 0 +; CHECK-NEXT: local.get $push1=, 1 +; CHECK-NEXT: f32.lt $push0=, $pop2, $pop1 +; CHECK-NEXT: i32.eqz $push3=, $pop0 +; CHECK-NEXT: br_if 0, $pop3 # 0: down to label0 +; CHECK-NEXT: # %bb.1: # %if.then +; CHECK-NEXT: call call1 +; CHECK-NEXT: .LBB14_2: # %if.end +; CHECK-NEXT: end_block # label0: +; CHECK-NEXT: return entry: %cmp = fcmp olt float %a, %b br i1 %cmp, label %if.then, label %if.end @@ -207,14 +244,21 @@ if.end: ret void } -; CHECK-LABEL: ole_f32_branch -; CHECK: local.get $push[[L4:[0-9]+]]=, 0 -; CHECK-NEXT: local.get $push[[L3:[0-9]+]]=, 1 -; CHECK-NEXT: f32.le $push[[NUM0:[0-9]+]]=, $pop[[L4]], $pop[[L3]] -; CHECK-NEXT: i32.eqz $push[[NUM3:[0-9]+]]=, $pop[[NUM0]] -; CHECK-NEXT: br_if 0, $pop[[NUM3]] -; CHECK-NEXT: call call1 define void @ole_f32_branch(float %a, float %b) { +; CHECK-LABEL: ole_f32_branch: +; CHECK: .functype ole_f32_branch (f32, f32) -> () +; CHECK-NEXT: # %bb.0: # %entry +; CHECK-NEXT: block +; CHECK-NEXT: local.get $push2=, 0 +; CHECK-NEXT: local.get $push1=, 1 +; CHECK-NEXT: f32.le $push0=, $pop2, $pop1 +; CHECK-NEXT: i32.eqz $push3=, $pop0 +; CHECK-NEXT: br_if 0, $pop3 # 0: down to label1 +; CHECK-NEXT: # %bb.1: # %if.then +; CHECK-NEXT: call call1 +; CHECK-NEXT: .LBB15_2: # %if.end +; CHECK-NEXT: end_block # label1: +; CHECK-NEXT: return entry: %cmp = fcmp ole float %a, %b br i1 %cmp, label %if.then, label %if.end @@ -227,14 +271,21 @@ if.end: ret void } -; CHECK-LABEL: ugt_f32_branch -; CHECK: local.get $push[[L4:[0-9]+]]=, 0 -; CHECK-NEXT: local.get $push[[L3:[0-9]+]]=, 1 -; CHECK-NEXT: f32.le $push[[NUM0:[0-9]+]]=, $pop[[L4]], $pop[[L3]] -; CHECK-NEXT: i32.eqz $push[[NUM3:[0-9]+]]=, $pop[[NUM0]] -; CHECK-NEXT: br_if 0, $pop[[NUM3]] -; CHECK-NEXT: call call1 define void @ugt_f32_branch(float %a, float %b) { +; CHECK-LABEL: ugt_f32_branch: +; CHECK: .functype ugt_f32_branch (f32, f32) -> () +; CHECK-NEXT: # %bb.0: # %entry +; CHECK-NEXT: block +; CHECK-NEXT: local.get $push2=, 0 +; CHECK-NEXT: local.get $push1=, 1 +; CHECK-NEXT: f32.le $push0=, $pop2, $pop1 +; CHECK-NEXT: i32.eqz $push3=, $pop0 +; CHECK-NEXT: br_if 0, $pop3 # 0: down to label2 +; CHECK-NEXT: # %bb.1: # %if.then +; CHECK-NEXT: call call1 +; CHECK-NEXT: .LBB16_2: # %if.end +; CHECK-NEXT: end_block # label2: +; CHECK-NEXT: return entry: %cmp = fcmp ugt float %a, %b br i1 %cmp, label %if.end, label %if.then @@ -247,14 +298,21 @@ if.end: ret void } -; CHECK-LABEL: ogt_f32_branch -; CHECK: local.get $push[[L4:[0-9]+]]=, 0 -; CHECK-NEXT: local.get $push[[L3:[0-9]+]]=, 1 -; CHECK-NEXT: f32.gt $push[[NUM0:[0-9]+]]=, $pop[[L4]], $pop[[L3]] -; CHECK-NEXT: i32.eqz $push[[NUM3:[0-9]+]]=, $pop[[NUM0]] -; CHECK-NEXT: br_if 0, $pop[[NUM3]] -; CHECK-NEXT: call call1 define void @ogt_f32_branch(float %a, float %b) { +; CHECK-LABEL: ogt_f32_branch: +; CHECK: .functype ogt_f32_branch (f32, f32) -> () +; CHECK-NEXT: # %bb.0: # %entry +; CHECK-NEXT: block +; CHECK-NEXT: local.get $push2=, 0 +; CHECK-NEXT: local.get $push1=, 1 +; CHECK-NEXT: f32.gt $push0=, $pop2, $pop1 +; CHECK-NEXT: i32.eqz $push3=, $pop0 +; CHECK-NEXT: br_if 0, $pop3 # 0: down to label3 +; CHECK-NEXT: # %bb.1: # %if.then +; CHECK-NEXT: call call1 +; CHECK-NEXT: .LBB17_2: # %if.end +; CHECK-NEXT: end_block # label3: +; CHECK-NEXT: return entry: %cmp = fcmp ogt float %a, %b br i1 %cmp, label %if.then, label %if.end @@ -267,14 +325,21 @@ if.end: ret void } -; CHECK-LABEL: ult_f32_branch -; CHECK: local.get $push[[L4:[0-9]+]]=, 0 -; CHECK-NEXT: local.get $push[[L3:[0-9]+]]=, 1 -; CHECK-NEXT: f32.ge $push[[NUM0:[0-9]+]]=, $pop[[L4]], $pop[[L3]] -; CHECK-NEXT: i32.eqz $push[[NUM3:[0-9]+]]=, $pop[[NUM0]] -; CHECK-NEXT: br_if 0, $pop[[NUM3]] -; CHECK-NEXT: call call1 define void @ult_f32_branch(float %a, float %b) { +; CHECK-LABEL: ult_f32_branch: +; CHECK: .functype ult_f32_branch (f32, f32) -> () +; CHECK-NEXT: # %bb.0: # %entry +; CHECK-NEXT: block +; CHECK-NEXT: local.get $push2=, 0 +; CHECK-NEXT: local.get $push1=, 1 +; CHECK-NEXT: f32.ge $push0=, $pop2, $pop1 +; CHECK-NEXT: i32.eqz $push3=, $pop0 +; CHECK-NEXT: br_if 0, $pop3 # 0: down to label4 +; CHECK-NEXT: # %bb.1: # %if.then +; CHECK-NEXT: call call1 +; CHECK-NEXT: .LBB18_2: # %if.end +; CHECK-NEXT: end_block # label4: +; CHECK-NEXT: return entry: %cmp = fcmp ult float %a, %b br i1 %cmp, label %if.end, label %if.then @@ -287,14 +352,21 @@ if.end: ret void } -; CHECK-LABEL: ule_f32_branch -; CHECK: local.get $push[[L4:[0-9]+]]=, 0 -; CHECK-NEXT: local.get $push[[L3:[0-9]+]]=, 1 -; CHECK-NEXT: f32.ge $push[[NUM0:[0-9]+]]=, $pop[[L4]], $pop[[L3]] -; CHECK-NEXT: i32.eqz $push[[NUM3:[0-9]+]]=, $pop[[NUM0]] -; CHECK-NEXT: br_if 0, $pop[[NUM3]] -; CHECK-NEXT: call call1 define void @ule_f32_branch(float %a, float %b) { +; CHECK-LABEL: ule_f32_branch: +; CHECK: .functype ule_f32_branch (f32, f32) -> () +; CHECK-NEXT: # %bb.0: # %entry +; CHECK-NEXT: block +; CHECK-NEXT: local.get $push2=, 0 +; CHECK-NEXT: local.get $push1=, 1 +; CHECK-NEXT: f32.ge $push0=, $pop2, $pop1 +; CHECK-NEXT: i32.eqz $push3=, $pop0 +; CHECK-NEXT: br_if 0, $pop3 # 0: down to label5 +; CHECK-NEXT: # %bb.1: # %if.then +; CHECK-NEXT: call call1 +; CHECK-NEXT: .LBB19_2: # %if.end +; CHECK-NEXT: end_block # label5: +; CHECK-NEXT: return entry: %cmp = fcmp ult float %a, %b br i1 %cmp, label %if.end, label %if.then @@ -307,16 +379,31 @@ if.end: ret void } -; CHECK-LABEL: xor_zext_switch -; CHECK: i32.const $push[[L1:[0-9]+]]=, 0 -; CHECK-NEXT: br_if 0, $pop[[L1]] -; CHECK-NEXT: block -; CHECK-NEXT: block -; CHECK-NEXT: local.get $push[[L3:[0-9]+]]=, 0 -; CHECK-NEXT: local.get $push[[L2:[0-9]+]]=, 1 -; CHECK-NEXT: f32.ge $push[[L0:[0-9]+]]=, $pop[[L3]], $pop[[L2]] -; CHECK-NEXT: br_table $pop[[L0]], 0, 1, 0 define void @xor_zext_switch(float %a, float %b) { +; CHECK-LABEL: xor_zext_switch: +; CHECK: .functype xor_zext_switch (f32, f32) -> () +; CHECK-NEXT: # %bb.0: # %entry +; CHECK-NEXT: block +; CHECK-NEXT: i32.const $push1=, 0 +; CHECK-NEXT: br_if 0, $pop1 # 0: down to label6 +; CHECK-NEXT: # %bb.1: # %entry +; CHECK-NEXT: block +; CHECK-NEXT: block +; CHECK-NEXT: local.get $push3=, 0 +; CHECK-NEXT: local.get $push2=, 1 +; CHECK-NEXT: f32.ge $push0=, $pop3, $pop2 +; CHECK-NEXT: br_table $pop0, 0, 1, 0 # 0: down to label8 +; CHECK-NEXT: # 1: down to label7 +; CHECK-NEXT: .LBB20_2: # %sw.bb.1 +; CHECK-NEXT: end_block # label8: +; CHECK-NEXT: call foo1 +; CHECK-NEXT: return +; CHECK-NEXT: .LBB20_3: # %sw.bb.2 +; CHECK-NEXT: end_block # label7: +; CHECK-NEXT: call foo2 +; CHECK-NEXT: .LBB20_4: # %exit +; CHECK-NEXT: end_block # label6: +; CHECK-NEXT: return entry: %cmp = fcmp ult float %a, %b %zext = zext i1 %cmp to i32 @@ -338,18 +425,41 @@ exit: ret void } -; CHECK-LABEL: xor_add_switch -; CHECK: local.get $push[[L8:[0-9]+]]=, 0 -; CHECK-NEXT: local.get $push[[L7:[0-9]+]]=, 1 -; CHECK-NEXT: f32.ge $push[[L1:[0-9]+]]=, $pop[[L8]], $pop[[L7]] -; CHECK-NEXT: i32.const $push[[L2:[0-9]+]]=, 1 -; CHECK-NEXT: i32.xor $push[[L3:[0-9]+]]=, $pop[[L1]], $pop[[L2]] -; CHECK-NEXT: i32.const $push[[L6:[0-9]+]]=, 1 -; CHECK-NEXT: i32.add $push[[L4:[0-9]+]]=, $pop[[L3]], $pop[[L6]] -; CHECK-NEXT: i32.const $push[[L5:[0-9]+]]=, 1 -; CHECK-NEXT: i32.xor $push[[L0:[0-9]+]]=, $pop[[L4]], $pop[[L5]] -; CHECK-NEXT: br_table $pop[[L0]], 0, 1, 2, 3 define void @xor_add_switch(float %a, float %b) { +; CHECK-LABEL: xor_add_switch: +; CHECK: .functype xor_add_switch (f32, f32) -> () +; CHECK-NEXT: # %bb.0: # %entry +; CHECK-NEXT: block +; CHECK-NEXT: block +; CHECK-NEXT: block +; CHECK-NEXT: block +; CHECK-NEXT: local.get $push8=, 0 +; CHECK-NEXT: local.get $push7=, 1 +; CHECK-NEXT: f32.ge $push1=, $pop8, $pop7 +; CHECK-NEXT: i32.const $push2=, 1 +; CHECK-NEXT: i32.xor $push3=, $pop1, $pop2 +; CHECK-NEXT: i32.const $push6=, 1 +; CHECK-NEXT: i32.add $push4=, $pop3, $pop6 +; CHECK-NEXT: i32.const $push5=, 1 +; CHECK-NEXT: i32.xor $push0=, $pop4, $pop5 +; CHECK-NEXT: br_table $pop0, 0, 1, 2, 3 # 0: down to label12 +; CHECK-NEXT: # 1: down to label11 +; CHECK-NEXT: # 2: down to label10 +; CHECK-NEXT: # 3: down to label9 +; CHECK-NEXT: .LBB21_1: # %sw.bb.1 +; CHECK-NEXT: end_block # label12: +; CHECK-NEXT: call foo1 +; CHECK-NEXT: return +; CHECK-NEXT: .LBB21_2: # %sw.bb.2 +; CHECK-NEXT: end_block # label11: +; CHECK-NEXT: call foo2 +; CHECK-NEXT: return +; CHECK-NEXT: .LBB21_3: # %sw.bb.3 +; CHECK-NEXT: end_block # label10: +; CHECK-NEXT: call foo3 +; CHECK-NEXT: .LBB21_4: # %exit +; CHECK-NEXT: end_block # label9: +; CHECK-NEXT: return entry: %cmp = fcmp ult float %a, %b %zext = zext i1 %cmp to i32 diff --git a/llvm/test/CodeGen/WebAssembly/comparisons-f64.ll b/llvm/test/CodeGen/WebAssembly/comparisons-f64.ll index 03a85f4..30828ad 100644 --- a/llvm/test/CodeGen/WebAssembly/comparisons-f64.ll +++ b/llvm/test/CodeGen/WebAssembly/comparisons-f64.ll @@ -1,94 +1,118 @@ -; RUN: llc < %s -asm-verbose=false -disable-wasm-fallthrough-return-opt -wasm-keep-registers | FileCheck %s +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc < %s -disable-wasm-fallthrough-return-opt -wasm-keep-registers | FileCheck %s ; Test that basic 64-bit floating-point comparison operations assemble as ; expected. target triple = "wasm32-unknown-unknown" -; CHECK-LABEL: ord_f64: -; CHECK-NEXT: .functype ord_f64 (f64, f64) -> (i32){{$}} -; CHECK-NEXT: local.get $push[[L0:[0-9]+]]=, 0{{$}} -; CHECK-NEXT: local.get $push[[L1:[0-9]+]]=, 0{{$}} -; CHECK-NEXT: f64.eq $push[[NUM0:[0-9]+]]=, $pop[[L0]], $pop[[L1]]{{$}} -; CHECK-NEXT: local.get $push[[L2:[0-9]+]]=, 1{{$}} -; CHECK-NEXT: local.get $push[[L3:[0-9]+]]=, 1{{$}} -; CHECK-NEXT: f64.eq $push[[NUM1:[0-9]+]]=, $pop[[L2]], $pop[[L3]]{{$}} -; CHECK-NEXT: i32.and $push[[NUM2:[0-9]+]]=, $pop[[NUM0]], $pop[[NUM1]]{{$}} -; CHECK-NEXT: return $pop[[NUM2]]{{$}} define i32 @ord_f64(double %x, double %y) { +; CHECK-LABEL: ord_f64: +; CHECK: .functype ord_f64 (f64, f64) -> (i32) +; CHECK-NEXT: # %bb.0: +; CHECK-NEXT: local.get $push4=, 0 +; CHECK-NEXT: local.get $push3=, 0 +; CHECK-NEXT: f64.eq $push1=, $pop4, $pop3 +; CHECK-NEXT: local.get $push6=, 1 +; CHECK-NEXT: local.get $push5=, 1 +; CHECK-NEXT: f64.eq $push0=, $pop6, $pop5 +; CHECK-NEXT: i32.and $push2=, $pop1, $pop0 +; CHECK-NEXT: return $pop2 %a = fcmp ord double %x, %y %b = zext i1 %a to i32 ret i32 %b } -; CHECK-LABEL: uno_f64: -; CHECK-NEXT: .functype uno_f64 (f64, f64) -> (i32){{$}} -; CHECK-NEXT: local.get $push[[L0:[0-9]+]]=, 0{{$}} -; CHECK-NEXT: local.get $push[[L1:[0-9]+]]=, 0{{$}} -; CHECK-NEXT: f64.ne $push[[NUM0:[0-9]+]]=, $pop[[L0]], $pop[[L1]]{{$}} -; CHECK-NEXT: local.get $push[[L2:[0-9]+]]=, 1{{$}} -; CHECK-NEXT: local.get $push[[L3:[0-9]+]]=, 1{{$}} -; CHECK-NEXT: f64.ne $push[[NUM1:[0-9]+]]=, $pop[[L2]], $pop[[L3]]{{$}} -; CHECK-NEXT: i32.or $push[[NUM2:[0-9]+]]=, $pop[[NUM0]], $pop[[NUM1]]{{$}} -; CHECK-NEXT: return $pop[[NUM2]]{{$}} define i32 @uno_f64(double %x, double %y) { +; CHECK-LABEL: uno_f64: +; CHECK: .functype uno_f64 (f64, f64) -> (i32) +; CHECK-NEXT: # %bb.0: +; CHECK-NEXT: local.get $push4=, 0 +; CHECK-NEXT: local.get $push3=, 0 +; CHECK-NEXT: f64.ne $push1=, $pop4, $pop3 +; CHECK-NEXT: local.get $push6=, 1 +; CHECK-NEXT: local.get $push5=, 1 +; CHECK-NEXT: f64.ne $push0=, $pop6, $pop5 +; CHECK-NEXT: i32.or $push2=, $pop1, $pop0 +; CHECK-NEXT: return $pop2 %a = fcmp uno double %x, %y %b = zext i1 %a to i32 ret i32 %b } -; CHECK-LABEL: oeq_f64: -; CHECK-NEXT: .functype oeq_f64 (f64, f64) -> (i32){{$}} -; CHECK-NEXT: local.get $push[[L0:[0-9]+]]=, 0{{$}} -; CHECK-NEXT: local.get $push[[L1:[0-9]+]]=, 1{{$}} -; CHECK-NEXT: f64.eq $push[[NUM:[0-9]+]]=, $pop[[L0]], $pop[[L1]]{{$}} -; CHECK-NEXT: return $pop[[NUM]]{{$}} define i32 @oeq_f64(double %x, double %y) { +; CHECK-LABEL: oeq_f64: +; CHECK: .functype oeq_f64 (f64, f64) -> (i32) +; CHECK-NEXT: # %bb.0: +; CHECK-NEXT: local.get $push2=, 0 +; CHECK-NEXT: local.get $push1=, 1 +; CHECK-NEXT: f64.eq $push0=, $pop2, $pop1 +; CHECK-NEXT: return $pop0 %a = fcmp oeq double %x, %y %b = zext i1 %a to i32 ret i32 %b } -; CHECK-LABEL: une_f64: -; CHECK: f64.ne $push[[NUM:[0-9]+]]=, $pop[[L0]], $pop[[L1]]{{$}} -; CHECK-NEXT: return $pop[[NUM]]{{$}} define i32 @une_f64(double %x, double %y) { +; CHECK-LABEL: une_f64: +; CHECK: .functype une_f64 (f64, f64) -> (i32) +; CHECK-NEXT: # %bb.0: +; CHECK-NEXT: local.get $push2=, 0 +; CHECK-NEXT: local.get $push1=, 1 +; CHECK-NEXT: f64.ne $push0=, $pop2, $pop1 +; CHECK-NEXT: return $pop0 %a = fcmp une double %x, %y %b = zext i1 %a to i32 ret i32 %b } -; CHECK-LABEL: olt_f64: -; CHECK: f64.lt $push[[NUM:[0-9]+]]=, $pop[[L0]], $pop[[L1]]{{$}} -; CHECK-NEXT: return $pop[[NUM]]{{$}} define i32 @olt_f64(double %x, double %y) { +; CHECK-LABEL: olt_f64: +; CHECK: .functype olt_f64 (f64, f64) -> (i32) +; CHECK-NEXT: # %bb.0: +; CHECK-NEXT: local.get $push2=, 0 +; CHECK-NEXT: local.get $push1=, 1 +; CHECK-NEXT: f64.lt $push0=, $pop2, $pop1 +; CHECK-NEXT: return $pop0 %a = fcmp olt double %x, %y %b = zext i1 %a to i32 ret i32 %b } -; CHECK-LABEL: ole_f64: -; CHECK: f64.le $push[[NUM:[0-9]+]]=, $pop[[L0]], $pop[[L1]]{{$}} -; CHECK-NEXT: return $pop[[NUM]]{{$}} define i32 @ole_f64(double %x, double %y) { +; CHECK-LABEL: ole_f64: +; CHECK: .functype ole_f64 (f64, f64) -> (i32) +; CHECK-NEXT: # %bb.0: +; CHECK-NEXT: local.get $push2=, 0 +; CHECK-NEXT: local.get $push1=, 1 +; CHECK-NEXT: f64.le $push0=, $pop2, $pop1 +; CHECK-NEXT: return $pop0 %a = fcmp ole double %x, %y %b = zext i1 %a to i32 ret i32 %b } -; CHECK-LABEL: ogt_f64: -; CHECK: f64.gt $push[[NUM:[0-9]+]]=, $pop[[L0]], $pop[[L1]]{{$}} -; CHECK-NEXT: return $pop[[NUM]]{{$}} define i32 @ogt_f64(double %x, double %y) { +; CHECK-LABEL: ogt_f64: +; CHECK: .functype ogt_f64 (f64, f64) -> (i32) +; CHECK-NEXT: # %bb.0: +; CHECK-NEXT: local.get $push2=, 0 +; CHECK-NEXT: local.get $push1=, 1 +; CHECK-NEXT: f64.gt $push0=, $pop2, $pop1 +; CHECK-NEXT: return $pop0 %a = fcmp ogt double %x, %y %b = zext i1 %a to i32 ret i32 %b } -; CHECK-LABEL: oge_f64: -; CHECK: f64.ge $push[[NUM:[0-9]+]]=, $pop[[L0]], $pop[[L1]]{{$}} -; CHECK-NEXT: return $pop[[NUM]]{{$}} define i32 @oge_f64(double %x, double %y) { +; CHECK-LABEL: oge_f64: +; CHECK: .functype oge_f64 (f64, f64) -> (i32) +; CHECK-NEXT: # %bb.0: +; CHECK-NEXT: local.get $push2=, 0 +; CHECK-NEXT: local.get $push1=, 1 +; CHECK-NEXT: f64.ge $push0=, $pop2, $pop1 +; CHECK-NEXT: return $pop0 %a = fcmp oge double %x, %y %b = zext i1 %a to i32 ret i32 %b @@ -96,104 +120,117 @@ define i32 @oge_f64(double %x, double %y) { ; Expanded comparisons, which also check for NaN. -; CHECK-LABEL: ueq_f64: -; CHECK-NEXT: .functype ueq_f64 (f64, f64) -> (i32){{$}} -; CHECK-NEXT: local.get $push[[L0:[0-9]+]]=, 0{{$}} -; CHECK-NEXT: local.get $push[[L1:[0-9]+]]=, 1{{$}} -; CHECK-NEXT: f64.gt $push[[NUM0:[0-9]+]]=, $pop[[L0]], $pop[[L1]]{{$}} -; CHECK-NEXT: local.get $push[[L2:[0-9]+]]=, 0{{$}} -; CHECK-NEXT: local.get $push[[L3:[0-9]+]]=, 1{{$}} -; CHECK-NEXT: f64.lt $push[[NUM1:[0-9]+]]=, $pop[[L2]], $pop[[L3]]{{$}} -; CHECK-NEXT: i32.or $push[[NUM2:[0-9]+]]=, $pop[[NUM0]], $pop[[NUM1]]{{$}} -; CHECK-NEXT: i32.const $push[[C0:[0-9]+]]=, 1 -; CHECK-NEXT: i32.xor $push[[NUM3:[0-9]+]]=, $pop[[NUM2]], $pop[[C0]]{{$}} -; CHECK-NEXT: return $pop[[NUM3]]{{$}} define i32 @ueq_f64(double %x, double %y) { +; CHECK-LABEL: ueq_f64: +; CHECK: .functype ueq_f64 (f64, f64) -> (i32) +; CHECK-NEXT: # %bb.0: +; CHECK-NEXT: local.get $push6=, 0 +; CHECK-NEXT: local.get $push5=, 1 +; CHECK-NEXT: f64.gt $push1=, $pop6, $pop5 +; CHECK-NEXT: local.get $push8=, 0 +; CHECK-NEXT: local.get $push7=, 1 +; CHECK-NEXT: f64.lt $push0=, $pop8, $pop7 +; CHECK-NEXT: i32.or $push2=, $pop1, $pop0 +; CHECK-NEXT: i32.const $push3=, 1 +; CHECK-NEXT: i32.xor $push4=, $pop2, $pop3 +; CHECK-NEXT: return $pop4 %a = fcmp ueq double %x, %y %b = zext i1 %a to i32 ret i32 %b } -; CHECK-LABEL: one_f64: -; CHECK-NEXT: .functype one_f64 (f64, f64) -> (i32){{$}} -; CHECK-NEXT: local.get $push[[L0:[0-9]+]]=, 0{{$}} -; CHECK-NEXT: local.get $push[[L1:[0-9]+]]=, 1{{$}} -; CHECK-NEXT: f64.gt $push[[NUM0:[0-9]+]]=, $pop[[L0]], $pop[[L1]]{{$}} -; CHECK-NEXT: local.get $push[[L2:[0-9]+]]=, 0{{$}} -; CHECK-NEXT: local.get $push[[L3:[0-9]+]]=, 1{{$}} -; CHECK-NEXT: f64.lt $push[[NUM1:[0-9]+]]=, $pop[[L2]], $pop[[L3]]{{$}} -; CHECK-NEXT: i32.or $push[[NUM4:[0-9]+]]=, $pop[[NUM0]], $pop[[NUM1]]{{$}} -; CHECK-NEXT: return $pop[[NUM4]] define i32 @one_f64(double %x, double %y) { +; CHECK-LABEL: one_f64: +; CHECK: .functype one_f64 (f64, f64) -> (i32) +; CHECK-NEXT: # %bb.0: +; CHECK-NEXT: local.get $push4=, 0 +; CHECK-NEXT: local.get $push3=, 1 +; CHECK-NEXT: f64.gt $push1=, $pop4, $pop3 +; CHECK-NEXT: local.get $push6=, 0 +; CHECK-NEXT: local.get $push5=, 1 +; CHECK-NEXT: f64.lt $push0=, $pop6, $pop5 +; CHECK-NEXT: i32.or $push2=, $pop1, $pop0 +; CHECK-NEXT: return $pop2 %a = fcmp one double %x, %y %b = zext i1 %a to i32 ret i32 %b } -; CHECK-LABEL: ult_f64: -; CHECK-NEXT: .functype ult_f64 (f64, f64) -> (i32){{$}} -; CHECK-NEXT: local.get $push[[L0:[0-9]+]]=, 0{{$}} -; CHECK-NEXT: local.get $push[[L1:[0-9]+]]=, 1{{$}} -; CHECK-NEXT: f64.ge $push[[NUM0:[0-9]+]]=, $pop[[L0]], $pop[[L1]]{{$}} -; CHECK-NEXT: i32.const $push[[C0:[0-9]+]]=, 1 -; CHECK-NEXT: i32.xor $push[[NUM2:[0-9]+]]=, $pop[[NUM0]], $pop[[C0]]{{$}} -; CHECK-NEXT: return $pop[[NUM2]]{{$}} define i32 @ult_f64(double %x, double %y) { +; CHECK-LABEL: ult_f64: +; CHECK: .functype ult_f64 (f64, f64) -> (i32) +; CHECK-NEXT: # %bb.0: +; CHECK-NEXT: local.get $push4=, 0 +; CHECK-NEXT: local.get $push3=, 1 +; CHECK-NEXT: f64.ge $push0=, $pop4, $pop3 +; CHECK-NEXT: i32.const $push1=, 1 +; CHECK-NEXT: i32.xor $push2=, $pop0, $pop1 +; CHECK-NEXT: return $pop2 %a = fcmp ult double %x, %y %b = zext i1 %a to i32 ret i32 %b } -; CHECK-LABEL: ule_f64: -; CHECK-NEXT: .functype ule_f64 (f64, f64) -> (i32){{$}} -; CHECK-NEXT: local.get $push[[L0:[0-9]+]]=, 0{{$}} -; CHECK-NEXT: local.get $push[[L1:[0-9]+]]=, 1{{$}} -; CHECK-NEXT: f64.gt $push[[NUM0:[0-9]+]]=, $pop[[L0]], $pop[[L1]]{{$}} -; CHECK-NEXT: i32.const $push[[C0:[0-9]+]]=, 1 -; CHECK-NEXT: i32.xor $push[[NUM2:[0-9]+]]=, $pop[[NUM0]], $pop[[C0]]{{$}} -; CHECK-NEXT: return $pop[[NUM2]]{{$}} define i32 @ule_f64(double %x, double %y) { +; CHECK-LABEL: ule_f64: +; CHECK: .functype ule_f64 (f64, f64) -> (i32) +; CHECK-NEXT: # %bb.0: +; CHECK-NEXT: local.get $push4=, 0 +; CHECK-NEXT: local.get $push3=, 1 +; CHECK-NEXT: f64.gt $push0=, $pop4, $pop3 +; CHECK-NEXT: i32.const $push1=, 1 +; CHECK-NEXT: i32.xor $push2=, $pop0, $pop1 +; CHECK-NEXT: return $pop2 %a = fcmp ule double %x, %y %b = zext i1 %a to i32 ret i32 %b } -; CHECK-LABEL: ugt_f64: -; CHECK-NEXT: .functype ugt_f64 (f64, f64) -> (i32){{$}} -; CHECK-NEXT: local.get $push[[L0:[0-9]+]]=, 0{{$}} -; CHECK-NEXT: local.get $push[[L1:[0-9]+]]=, 1{{$}} -; CHECK-NEXT: f64.le $push[[NUM0:[0-9]+]]=, $pop[[L0]], $pop[[L1]]{{$}} -; CHECK-NEXT: i32.const $push[[C0:[0-9]+]]=, 1 -; CHECK-NEXT: i32.xor $push[[NUM2:[0-9]+]]=, $pop[[NUM0]], $pop[[C0]]{{$}} -; CHECK-NEXT: return $pop[[NUM2]]{{$}} define i32 @ugt_f64(double %x, double %y) { +; CHECK-LABEL: ugt_f64: +; CHECK: .functype ugt_f64 (f64, f64) -> (i32) +; CHECK-NEXT: # %bb.0: +; CHECK-NEXT: local.get $push4=, 0 +; CHECK-NEXT: local.get $push3=, 1 +; CHECK-NEXT: f64.le $push0=, $pop4, $pop3 +; CHECK-NEXT: i32.const $push1=, 1 +; CHECK-NEXT: i32.xor $push2=, $pop0, $pop1 +; CHECK-NEXT: return $pop2 %a = fcmp ugt double %x, %y %b = zext i1 %a to i32 ret i32 %b } -; CHECK-LABEL: uge_f64: -; CHECK-NEXT: .functype uge_f64 (f64, f64) -> (i32){{$}} -; CHECK-NEXT: local.get $push[[L0:[0-9]+]]=, 0{{$}} -; CHECK-NEXT: local.get $push[[L1:[0-9]+]]=, 1{{$}} -; CHECK-NEXT: f64.lt $push[[NUM0:[0-9]+]]=, $pop[[L0]], $pop[[L1]]{{$}} -; CHECK-NEXT: i32.const $push[[C0:[0-9]+]]=, 1 -; CHECK-NEXT: i32.xor $push[[NUM2:[0-9]+]]=, $pop[[NUM0]], $pop[[C0]]{{$}} -; CHECK-NEXT: return $pop[[NUM2]]{{$}} define i32 @uge_f64(double %x, double %y) { +; CHECK-LABEL: uge_f64: +; CHECK: .functype uge_f64 (f64, f64) -> (i32) +; CHECK-NEXT: # %bb.0: +; CHECK-NEXT: local.get $push4=, 0 +; CHECK-NEXT: local.get $push3=, 1 +; CHECK-NEXT: f64.lt $push0=, $pop4, $pop3 +; CHECK-NEXT: i32.const $push1=, 1 +; CHECK-NEXT: i32.xor $push2=, $pop0, $pop1 +; CHECK-NEXT: return $pop2 %a = fcmp uge double %x, %y %b = zext i1 %a to i32 ret i32 %b } -; CHECK-LABEL: olt_f64_branch: -; CHECK: local.get $push[[L0:[0-9]+]]=, 0 -; CHECK-NEXT: local.get $push[[L1:[0-9]+]]=, 1 -; CHECK-NEXT: f64.lt $push[[NUM0:[0-9]+]]=, $pop[[L0]], $pop[[L1]] -; CHECK-NEXT: i32.eqz $push[[NUM3:[0-9]+]]=, $pop[[NUM0]] -; CHECK-NEXT: br_if 0, $pop[[NUM3]] -; CHECK-NEXT: call call1 define void @olt_f64_branch(double %a, double %b) { +; CHECK-LABEL: olt_f64_branch: +; CHECK: .functype olt_f64_branch (f64, f64) -> () +; CHECK-NEXT: # %bb.0: # %entry +; CHECK-NEXT: block +; CHECK-NEXT: local.get $push2=, 0 +; CHECK-NEXT: local.get $push1=, 1 +; CHECK-NEXT: f64.lt $push0=, $pop2, $pop1 +; CHECK-NEXT: i32.eqz $push3=, $pop0 +; CHECK-NEXT: br_if 0, $pop3 # 0: down to label0 +; CHECK-NEXT: # %bb.1: # %if.then +; CHECK-NEXT: call call1 +; CHECK-NEXT: .LBB14_2: # %if.end +; CHECK-NEXT: end_block # label0: +; CHECK-NEXT: return entry: %cmp = fcmp olt double %a, %b br i1 %cmp, label %if.then, label %if.end @@ -206,14 +243,21 @@ if.end: ret void } -; CHECK-LABEL: ole_f64_branch: -; CHECK: local.get $push[[L0:[0-9]+]]=, 0 -; CHECK-NEXT: local.get $push[[L1:[0-9]+]]=, 1 -; CHECK-NEXT: f64.le $push[[NUM0:[0-9]+]]=, $pop[[L0]], $pop[[L1]] -; CHECK-NEXT: i32.eqz $push[[NUM3:[0-9]+]]=, $pop[[NUM0]] -; CHECK-NEXT: br_if 0, $pop[[NUM3]] -; CHECK-NEXT: call call1 define void @ole_f64_branch(double %a, double %b) { +; CHECK-LABEL: ole_f64_branch: +; CHECK: .functype ole_f64_branch (f64, f64) -> () +; CHECK-NEXT: # %bb.0: # %entry +; CHECK-NEXT: block +; CHECK-NEXT: local.get $push2=, 0 +; CHECK-NEXT: local.get $push1=, 1 +; CHECK-NEXT: f64.le $push0=, $pop2, $pop1 +; CHECK-NEXT: i32.eqz $push3=, $pop0 +; CHECK-NEXT: br_if 0, $pop3 # 0: down to label1 +; CHECK-NEXT: # %bb.1: # %if.then +; CHECK-NEXT: call call1 +; CHECK-NEXT: .LBB15_2: # %if.end +; CHECK-NEXT: end_block # label1: +; CHECK-NEXT: return entry: %cmp = fcmp ole double %a, %b br i1 %cmp, label %if.then, label %if.end @@ -226,14 +270,21 @@ if.end: ret void } -; CHECK-LABEL: ugt_f64_branch: -; CHECK: local.get $push[[L0:[0-9]+]]=, 0 -; CHECK-NEXT: local.get $push[[L1:[0-9]+]]=, 1 -; CHECK-NEXT: f64.le $push[[NUM0:[0-9]+]]=, $pop[[L0]], $pop[[L1]] -; CHECK-NEXT: i32.eqz $push[[NUM3:[0-9]+]]=, $pop[[NUM0]] -; CHECK-NEXT: br_if 0, $pop[[NUM3]] -; CHECK-NEXT: call call1 define void @ugt_f64_branch(double %a, double %b) { +; CHECK-LABEL: ugt_f64_branch: +; CHECK: .functype ugt_f64_branch (f64, f64) -> () +; CHECK-NEXT: # %bb.0: # %entry +; CHECK-NEXT: block +; CHECK-NEXT: local.get $push2=, 0 +; CHECK-NEXT: local.get $push1=, 1 +; CHECK-NEXT: f64.le $push0=, $pop2, $pop1 +; CHECK-NEXT: i32.eqz $push3=, $pop0 +; CHECK-NEXT: br_if 0, $pop3 # 0: down to label2 +; CHECK-NEXT: # %bb.1: # %if.then +; CHECK-NEXT: call call1 +; CHECK-NEXT: .LBB16_2: # %if.end +; CHECK-NEXT: end_block # label2: +; CHECK-NEXT: return entry: %cmp = fcmp ugt double %a, %b br i1 %cmp, label %if.end, label %if.then @@ -246,14 +297,21 @@ if.end: ret void } -; CHECK-LABEL: ogt_f64_branch: -; CHECK: local.get $push[[L0:[0-9]+]]=, 0 -; CHECK-NEXT: local.get $push[[L1:[0-9]+]]=, 1 -; CHECK-NEXT: f64.gt $push[[NUM0:[0-9]+]]=, $pop[[L0]], $pop[[L1]] -; CHECK-NEXT: i32.eqz $push[[NUM3:[0-9]+]]=, $pop[[NUM0]] -; CHECK-NEXT: br_if 0, $pop[[NUM3]] -; CHECK-NEXT: call call1 define void @ogt_f64_branch(double %a, double %b) { +; CHECK-LABEL: ogt_f64_branch: +; CHECK: .functype ogt_f64_branch (f64, f64) -> () +; CHECK-NEXT: # %bb.0: # %entry +; CHECK-NEXT: block +; CHECK-NEXT: local.get $push2=, 0 +; CHECK-NEXT: local.get $push1=, 1 +; CHECK-NEXT: f64.gt $push0=, $pop2, $pop1 +; CHECK-NEXT: i32.eqz $push3=, $pop0 +; CHECK-NEXT: br_if 0, $pop3 # 0: down to label3 +; CHECK-NEXT: # %bb.1: # %if.then +; CHECK-NEXT: call call1 +; CHECK-NEXT: .LBB17_2: # %if.end +; CHECK-NEXT: end_block # label3: +; CHECK-NEXT: return entry: %cmp = fcmp ogt double %a, %b br i1 %cmp, label %if.then, label %if.end @@ -266,14 +324,21 @@ if.end: ret void } -; CHECK-LABEL: ult_f64_branch: -; CHECK: local.get $push[[L0:[0-9]+]]=, 0 -; CHECK-NEXT: local.get $push[[L1:[0-9]+]]=, 1 -; CHECK-NEXT: f64.ge $push[[NUM0:[0-9]+]]=, $pop[[L0]], $pop[[L1]] -; CHECK-NEXT: i32.eqz $push[[NUM3:[0-9]+]]=, $pop[[NUM0]] -; CHECK-NEXT: br_if 0, $pop[[NUM3]] -; CHECK-NEXT: call call1 define void @ult_f64_branch(double %a, double %b) { +; CHECK-LABEL: ult_f64_branch: +; CHECK: .functype ult_f64_branch (f64, f64) -> () +; CHECK-NEXT: # %bb.0: # %entry +; CHECK-NEXT: block +; CHECK-NEXT: local.get $push2=, 0 +; CHECK-NEXT: local.get $push1=, 1 +; CHECK-NEXT: f64.ge $push0=, $pop2, $pop1 +; CHECK-NEXT: i32.eqz $push3=, $pop0 +; CHECK-NEXT: br_if 0, $pop3 # 0: down to label4 +; CHECK-NEXT: # %bb.1: # %if.then +; CHECK-NEXT: call call1 +; CHECK-NEXT: .LBB18_2: # %if.end +; CHECK-NEXT: end_block # label4: +; CHECK-NEXT: return entry: %cmp = fcmp ult double %a, %b br i1 %cmp, label %if.end, label %if.then @@ -286,14 +351,21 @@ if.end: ret void } -; CHECK-LABEL: ule_f64_branch: -; CHECK: local.get $push[[L0:[0-9]+]]=, 0 -; CHECK-NEXT: local.get $push[[L1:[0-9]+]]=, 1 -; CHECK-NEXT: f64.gt $push[[NUM0:[0-9]+]]=, $pop[[L0]], $pop[[L1]] -; CHECK-NEXT: i32.eqz $push[[NUM3:[0-9]+]]=, $pop[[NUM0]] -; CHECK-NEXT: br_if 0, $pop[[NUM3]] -; CHECK-NEXT: call call1 define void @ule_f64_branch(double %a, double %b) { +; CHECK-LABEL: ule_f64_branch: +; CHECK: .functype ule_f64_branch (f64, f64) -> () +; CHECK-NEXT: # %bb.0: # %entry +; CHECK-NEXT: block +; CHECK-NEXT: local.get $push2=, 0 +; CHECK-NEXT: local.get $push1=, 1 +; CHECK-NEXT: f64.gt $push0=, $pop2, $pop1 +; CHECK-NEXT: i32.eqz $push3=, $pop0 +; CHECK-NEXT: br_if 0, $pop3 # 0: down to label5 +; CHECK-NEXT: # %bb.1: # %if.then +; CHECK-NEXT: call call1 +; CHECK-NEXT: .LBB19_2: # %if.end +; CHECK-NEXT: end_block # label5: +; CHECK-NEXT: return entry: %cmp = fcmp ule double %a, %b br i1 %cmp, label %if.end, label %if.then @@ -306,16 +378,31 @@ if.end: ret void } -; CHECK-LABEL: xor_zext_switch -; CHECK: i32.const $push[[L1:[0-9]+]]=, 0 -; CHECK-NEXT: br_if 0, $pop[[L1]] -; CHECK-NEXT: block -; CHECK-NEXT: block -; CHECK-NEXT: local.get $push[[L3:[0-9]+]]=, 0 -; CHECK-NEXT: local.get $push[[L2:[0-9]+]]=, 1 -; CHECK-NEXT: f64.ge $push[[L0:[0-9]+]]=, $pop[[L3]], $pop[[L2]] -; CHECK-NEXT: br_table $pop[[L0]], 0, 1, 0 define void @xor_zext_switch(double %a, double %b) { +; CHECK-LABEL: xor_zext_switch: +; CHECK: .functype xor_zext_switch (f64, f64) -> () +; CHECK-NEXT: # %bb.0: # %entry +; CHECK-NEXT: block +; CHECK-NEXT: i32.const $push1=, 0 +; CHECK-NEXT: br_if 0, $pop1 # 0: down to label6 +; CHECK-NEXT: # %bb.1: # %entry +; CHECK-NEXT: block +; CHECK-NEXT: block +; CHECK-NEXT: local.get $push3=, 0 +; CHECK-NEXT: local.get $push2=, 1 +; CHECK-NEXT: f64.ge $push0=, $pop3, $pop2 +; CHECK-NEXT: br_table $pop0, 0, 1, 0 # 0: down to label8 +; CHECK-NEXT: # 1: down to label7 +; CHECK-NEXT: .LBB20_2: # %sw.bb.1 +; CHECK-NEXT: end_block # label8: +; CHECK-NEXT: call foo1 +; CHECK-NEXT: return +; CHECK-NEXT: .LBB20_3: # %sw.bb.2 +; CHECK-NEXT: end_block # label7: +; CHECK-NEXT: call foo2 +; CHECK-NEXT: .LBB20_4: # %exit +; CHECK-NEXT: end_block # label6: +; CHECK-NEXT: return entry: %cmp = fcmp ult double %a, %b %zext = zext i1 %cmp to i32 @@ -337,18 +424,41 @@ exit: ret void } -; CHECK-LABEL: xor_add_switch -; CHECK: local.get $push[[L8:[0-9]+]]=, 0 -; CHECK-NEXT: local.get $push[[L7:[0-9]+]]=, 1 -; CHECK-NEXT: f64.ge $push[[L1:[0-9]+]]=, $pop[[L8]], $pop[[L7]] -; CHECK-NEXT: i32.const $push[[L2:[0-9]+]]=, 1 -; CHECK-NEXT: i32.xor $push[[L3:[0-9]+]]=, $pop[[L1]], $pop[[L2]] -; CHECK-NEXT: i32.const $push[[L6:[0-9]+]]=, 1 -; CHECK-NEXT: i32.add $push[[L4:[0-9]+]]=, $pop[[L3]], $pop[[L6]] -; CHECK-NEXT: i32.const $push[[L5:[0-9]+]]=, 1 -; CHECK-NEXT: i32.xor $push[[L0:[0-9]+]]=, $pop[[L4]], $pop[[L5]] -; CHECK-NEXT: br_table $pop[[L0]], 0, 1, 2, 3 define void @xor_add_switch(double %a, double %b) { +; CHECK-LABEL: xor_add_switch: +; CHECK: .functype xor_add_switch (f64, f64) -> () +; CHECK-NEXT: # %bb.0: # %entry +; CHECK-NEXT: block +; CHECK-NEXT: block +; CHECK-NEXT: block +; CHECK-NEXT: block +; CHECK-NEXT: local.get $push8=, 0 +; CHECK-NEXT: local.get $push7=, 1 +; CHECK-NEXT: f64.ge $push1=, $pop8, $pop7 +; CHECK-NEXT: i32.const $push2=, 1 +; CHECK-NEXT: i32.xor $push3=, $pop1, $pop2 +; CHECK-NEXT: i32.const $push6=, 1 +; CHECK-NEXT: i32.add $push4=, $pop3, $pop6 +; CHECK-NEXT: i32.const $push5=, 1 +; CHECK-NEXT: i32.xor $push0=, $pop4, $pop5 +; CHECK-NEXT: br_table $pop0, 0, 1, 2, 3 # 0: down to label12 +; CHECK-NEXT: # 1: down to label11 +; CHECK-NEXT: # 2: down to label10 +; CHECK-NEXT: # 3: down to label9 +; CHECK-NEXT: .LBB21_1: # %sw.bb.1 +; CHECK-NEXT: end_block # label12: +; CHECK-NEXT: call foo1 +; CHECK-NEXT: return +; CHECK-NEXT: .LBB21_2: # %sw.bb.2 +; CHECK-NEXT: end_block # label11: +; CHECK-NEXT: call foo2 +; CHECK-NEXT: return +; CHECK-NEXT: .LBB21_3: # %sw.bb.3 +; CHECK-NEXT: end_block # label10: +; CHECK-NEXT: call foo3 +; CHECK-NEXT: .LBB21_4: # %exit +; CHECK-NEXT: end_block # label9: +; CHECK-NEXT: return entry: %cmp = fcmp ult double %a, %b %zext = zext i1 %cmp to i32 diff --git a/llvm/test/CodeGen/WebAssembly/f32.ll b/llvm/test/CodeGen/WebAssembly/f32.ll index 1a75245..6a3f31f 100644 --- a/llvm/test/CodeGen/WebAssembly/f32.ll +++ b/llvm/test/CodeGen/WebAssembly/f32.ll @@ -1,4 +1,5 @@ -; RUN: llc < %s -asm-verbose=false -disable-wasm-fallthrough-return-opt -wasm-keep-registers | FileCheck %s +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc < %s -disable-wasm-fallthrough-return-opt -wasm-keep-registers | FileCheck %s ; Test that basic 32-bit floating-point operations assemble as expected. @@ -14,171 +15,241 @@ declare float @llvm.nearbyint.f32(float) declare float @llvm.rint.f32(float) declare float @llvm.fma.f32(float, float, float) -; CHECK-LABEL: fadd32: -; CHECK-NEXT: .functype fadd32 (f32, f32) -> (f32){{$}} -; CHECK-NEXT: local.get $push[[L0:[0-9]+]]=, 0{{$}} -; CHECK-NEXT: local.get $push[[L1:[0-9]+]]=, 1{{$}} -; CHECK-NEXT: f32.add $push[[LR:[0-9]+]]=, $pop[[L0]], $pop[[L1]]{{$}} -; CHECK-NEXT: return $pop[[LR]]{{$}} define float @fadd32(float %x, float %y) { +; CHECK-LABEL: fadd32: +; CHECK: .functype fadd32 (f32, f32) -> (f32) +; CHECK-NEXT: # %bb.0: +; CHECK-NEXT: local.get $push2=, 0 +; CHECK-NEXT: local.get $push1=, 1 +; CHECK-NEXT: f32.add $push0=, $pop2, $pop1 +; CHECK-NEXT: return $pop0 %a = fadd float %x, %y ret float %a } -; CHECK-LABEL: fsub32: -; CHECK: f32.sub $push[[LR:[0-9]+]]=, $pop{{[0-9]+}}, $pop{{[0-9]+}}{{$}} -; CHECK-NEXT: return $pop[[LR]]{{$}} define float @fsub32(float %x, float %y) { +; CHECK-LABEL: fsub32: +; CHECK: .functype fsub32 (f32, f32) -> (f32) +; CHECK-NEXT: # %bb.0: +; CHECK-NEXT: local.get $push2=, 0 +; CHECK-NEXT: local.get $push1=, 1 +; CHECK-NEXT: f32.sub $push0=, $pop2, $pop1 +; CHECK-NEXT: return $pop0 %a = fsub float %x, %y ret float %a } -; CHECK-LABEL: fmul32: -; CHECK: f32.mul $push[[LR:[0-9]+]]=, $pop{{[0-9]+}}, $pop{{[0-9]+}}{{$}} -; CHECK-NEXT: return $pop[[LR]]{{$}} define float @fmul32(float %x, float %y) { +; CHECK-LABEL: fmul32: +; CHECK: .functype fmul32 (f32, f32) -> (f32) +; CHECK-NEXT: # %bb.0: +; CHECK-NEXT: local.get $push2=, 0 +; CHECK-NEXT: local.get $push1=, 1 +; CHECK-NEXT: f32.mul $push0=, $pop2, $pop1 +; CHECK-NEXT: return $pop0 %a = fmul float %x, %y ret float %a } -; CHECK-LABEL: fdiv32: -; CHECK: f32.div $push[[LR:[0-9]+]]=, $pop{{[0-9]+}}, $pop{{[0-9]+}}{{$}} -; CHECK-NEXT: return $pop[[LR]]{{$}} define float @fdiv32(float %x, float %y) { +; CHECK-LABEL: fdiv32: +; CHECK: .functype fdiv32 (f32, f32) -> (f32) +; CHECK-NEXT: # %bb.0: +; CHECK-NEXT: local.get $push2=, 0 +; CHECK-NEXT: local.get $push1=, 1 +; CHECK-NEXT: f32.div $push0=, $pop2, $pop1 +; CHECK-NEXT: return $pop0 %a = fdiv float %x, %y ret float %a } -; CHECK-LABEL: fabs32: -; CHECK: f32.abs $push[[LR:[0-9]+]]=, $pop{{[0-9]+}}{{$}} -; CHECK-NEXT: return $pop[[LR]]{{$}} define float @fabs32(float %x) { +; CHECK-LABEL: fabs32: +; CHECK: .functype fabs32 (f32) -> (f32) +; CHECK-NEXT: # %bb.0: +; CHECK-NEXT: local.get $push1=, 0 +; CHECK-NEXT: f32.abs $push0=, $pop1 +; CHECK-NEXT: return $pop0 %a = call float @llvm.fabs.f32(float %x) ret float %a } -; CHECK-LABEL: fneg32: -; CHECK: f32.neg $push[[LR:[0-9]+]]=, $pop{{[0-9]+}}{{$}} -; CHECK-NEXT: return $pop[[LR]]{{$}} define float @fneg32(float %x) { +; CHECK-LABEL: fneg32: +; CHECK: .functype fneg32 (f32) -> (f32) +; CHECK-NEXT: # %bb.0: +; CHECK-NEXT: local.get $push1=, 0 +; CHECK-NEXT: f32.neg $push0=, $pop1 +; CHECK-NEXT: return $pop0 %a = fsub float -0., %x ret float %a } -; CHECK-LABEL: copysign32: -; CHECK: f32.copysign $push[[LR:[0-9]+]]=, $pop{{[0-9]+}}, $pop{{[0-9]+}}{{$}} -; CHECK-NEXT: return $pop[[LR]]{{$}} define float @copysign32(float %x, float %y) { +; CHECK-LABEL: copysign32: +; CHECK: .functype copysign32 (f32, f32) -> (f32) +; CHECK-NEXT: # %bb.0: +; CHECK-NEXT: local.get $push2=, 0 +; CHECK-NEXT: local.get $push1=, 1 +; CHECK-NEXT: f32.copysign $push0=, $pop2, $pop1 +; CHECK-NEXT: return $pop0 %a = call float @llvm.copysign.f32(float %x, float %y) ret float %a } -; CHECK-LABEL: sqrt32: -; CHECK: f32.sqrt $push[[LR:[0-9]+]]=, $pop{{[0-9]+}}{{$}} -; CHECK-NEXT: return $pop[[LR]]{{$}} define float @sqrt32(float %x) { +; CHECK-LABEL: sqrt32: +; CHECK: .functype sqrt32 (f32) -> (f32) +; CHECK-NEXT: # %bb.0: +; CHECK-NEXT: local.get $push1=, 0 +; CHECK-NEXT: f32.sqrt $push0=, $pop1 +; CHECK-NEXT: return $pop0 %a = call float @llvm.sqrt.f32(float %x) ret float %a } -; CHECK-LABEL: ceil32: -; CHECK: f32.ceil $push[[LR:[0-9]+]]=, $pop{{[0-9]+}}{{$}} -; CHECK-NEXT: return $pop[[LR]]{{$}} define float @ceil32(float %x) { +; CHECK-LABEL: ceil32: +; CHECK: .functype ceil32 (f32) -> (f32) +; CHECK-NEXT: # %bb.0: +; CHECK-NEXT: local.get $push1=, 0 +; CHECK-NEXT: f32.ceil $push0=, $pop1 +; CHECK-NEXT: return $pop0 %a = call float @llvm.ceil.f32(float %x) ret float %a } -; CHECK-LABEL: floor32: -; CHECK: f32.floor $push[[LR:[0-9]+]]=, $pop{{[0-9]+}}{{$}} -; CHECK-NEXT: return $pop[[LR]]{{$}} define float @floor32(float %x) { +; CHECK-LABEL: floor32: +; CHECK: .functype floor32 (f32) -> (f32) +; CHECK-NEXT: # %bb.0: +; CHECK-NEXT: local.get $push1=, 0 +; CHECK-NEXT: f32.floor $push0=, $pop1 +; CHECK-NEXT: return $pop0 %a = call float @llvm.floor.f32(float %x) ret float %a } -; CHECK-LABEL: trunc32: -; CHECK: f32.trunc $push[[LR:[0-9]+]]=, $pop{{[0-9]+}}{{$}} -; CHECK-NEXT: return $pop[[LR]]{{$}} define float @trunc32(float %x) { +; CHECK-LABEL: trunc32: +; CHECK: .functype trunc32 (f32) -> (f32) +; CHECK-NEXT: # %bb.0: +; CHECK-NEXT: local.get $push1=, 0 +; CHECK-NEXT: f32.trunc $push0=, $pop1 +; CHECK-NEXT: return $pop0 %a = call float @llvm.trunc.f32(float %x) ret float %a } -; CHECK-LABEL: nearest32: -; CHECK: f32.nearest $push[[LR:[0-9]+]]=, $pop{{[0-9]+}}{{$}} -; CHECK-NEXT: return $pop[[LR]]{{$}} define float @nearest32(float %x) { +; CHECK-LABEL: nearest32: +; CHECK: .functype nearest32 (f32) -> (f32) +; CHECK-NEXT: # %bb.0: +; CHECK-NEXT: local.get $push1=, 0 +; CHECK-NEXT: f32.nearest $push0=, $pop1 +; CHECK-NEXT: return $pop0 %a = call float @llvm.nearbyint.f32(float %x) ret float %a } -; CHECK-LABEL: nearest32_via_rint: -; CHECK: f32.nearest $push[[LR:[0-9]+]]=, $pop{{[0-9]+}}{{$}} -; CHECK-NEXT: return $pop[[LR]]{{$}} define float @nearest32_via_rint(float %x) { +; CHECK-LABEL: nearest32_via_rint: +; CHECK: .functype nearest32_via_rint (f32) -> (f32) +; CHECK-NEXT: # %bb.0: +; CHECK-NEXT: local.get $push1=, 0 +; CHECK-NEXT: f32.nearest $push0=, $pop1 +; CHECK-NEXT: return $pop0 %a = call float @llvm.rint.f32(float %x) ret float %a } -; CHECK-LABEL: fmin32: -; CHECK: f32.min $push1=, $pop{{[0-9]+}}, $pop[[LR]]{{$}} -; CHECK-NEXT: return $pop1{{$}} define float @fmin32(float %x) { +; CHECK-LABEL: fmin32: +; CHECK: .functype fmin32 (f32) -> (f32) +; CHECK-NEXT: # %bb.0: +; CHECK-NEXT: local.get $push2=, 0 +; CHECK-NEXT: f32.const $push0=, 0x0p0 +; CHECK-NEXT: f32.min $push1=, $pop2, $pop0 +; CHECK-NEXT: return $pop1 %a = fcmp ult float %x, 0.0 %b = select i1 %a, float %x, float 0.0 ret float %b } -; CHECK-LABEL: fmax32: -; CHECK: f32.max $push1=, $pop{{[0-9]+}}, $pop[[LR]]{{$}} -; CHECK-NEXT: return $pop1{{$}} define float @fmax32(float %x) { +; CHECK-LABEL: fmax32: +; CHECK: .functype fmax32 (f32) -> (f32) +; CHECK-NEXT: # %bb.0: +; CHECK-NEXT: local.get $push2=, 0 +; CHECK-NEXT: f32.const $push0=, 0x0p0 +; CHECK-NEXT: f32.max $push1=, $pop2, $pop0 +; CHECK-NEXT: return $pop1 %a = fcmp ugt float %x, 0.0 %b = select i1 %a, float %x, float 0.0 ret float %b } -; CHECK-LABEL: fmin32_intrinsic: -; CHECK: f32.min $push0=, $pop{{[0-9]+}}, $pop{{[0-9]+}}{{$}} -; CHECK-NEXT: return $pop0{{$}} declare float @llvm.minimum.f32(float, float) define float @fmin32_intrinsic(float %x, float %y) { +; CHECK-LABEL: fmin32_intrinsic: +; CHECK: .functype fmin32_intrinsic (f32, f32) -> (f32) +; CHECK-NEXT: # %bb.0: +; CHECK-NEXT: local.get $push2=, 0 +; CHECK-NEXT: local.get $push1=, 1 +; CHECK-NEXT: f32.min $push0=, $pop2, $pop1 +; CHECK-NEXT: return $pop0 %a = call float @llvm.minimum.f32(float %x, float %y) ret float %a } -; CHECK-LABEL: fminnum32_intrinsic: -; CHECK: f32.min $push0=, $pop{{[0-9]+}}, $pop{{[0-9]+}}{{$}} -; CHECK-NEXT: return $pop0{{$}} declare float @llvm.minnum.f32(float, float) define float @fminnum32_intrinsic(float %x, float %y) { +; CHECK-LABEL: fminnum32_intrinsic: +; CHECK: .functype fminnum32_intrinsic (f32, f32) -> (f32) +; CHECK-NEXT: # %bb.0: +; CHECK-NEXT: local.get $push2=, 0 +; CHECK-NEXT: local.get $push1=, 1 +; CHECK-NEXT: f32.min $push0=, $pop2, $pop1 +; CHECK-NEXT: return $pop0 %a = call nnan float @llvm.minnum.f32(float %x, float %y) ret float %a } -; CHECK-LABEL: fmax32_intrinsic: -; CHECK: f32.max $push0=, $pop{{[0-9]+}}, $pop{{[0-9]+}}{{$}} -; CHECK-NEXT: return $pop0{{$}} declare float @llvm.maximum.f32(float, float) define float @fmax32_intrinsic(float %x, float %y) { +; CHECK-LABEL: fmax32_intrinsic: +; CHECK: .functype fmax32_intrinsic (f32, f32) -> (f32) +; CHECK-NEXT: # %bb.0: +; CHECK-NEXT: local.get $push2=, 0 +; CHECK-NEXT: local.get $push1=, 1 +; CHECK-NEXT: f32.max $push0=, $pop2, $pop1 +; CHECK-NEXT: return $pop0 %a = call float @llvm.maximum.f32(float %x, float %y) ret float %a } -; CHECK-LABEL: fmaxnum32_intrinsic: -; CHECK: f32.max $push0=, $pop{{[0-9]+}}, $pop{{[0-9]+}}{{$}} -; CHECK-NEXT: return $pop0{{$}} declare float @llvm.maxnum.f32(float, float) define float @fmaxnum32_intrinsic(float %x, float %y) { +; CHECK-LABEL: fmaxnum32_intrinsic: +; CHECK: .functype fmaxnum32_intrinsic (f32, f32) -> (f32) +; CHECK-NEXT: # %bb.0: +; CHECK-NEXT: local.get $push2=, 0 +; CHECK-NEXT: local.get $push1=, 1 +; CHECK-NEXT: f32.max $push0=, $pop2, $pop1 +; CHECK-NEXT: return $pop0 %a = call nnan float @llvm.maxnum.f32(float %x, float %y) ret float %a } -; CHECK-LABEL: fma32: -; CHECK: {{^}} call $push[[LR:[0-9]+]]=, fmaf, $pop{{[0-9]+}}, $pop{{[0-9]+}}, $pop{{[0-9]+}}{{$}} -; CHECK-NEXT: return $pop[[LR]]{{$}} define float @fma32(float %a, float %b, float %c) { +; CHECK-LABEL: fma32: +; CHECK: .functype fma32 (f32, f32, f32) -> (f32) +; CHECK-NEXT: # %bb.0: +; CHECK-NEXT: local.get $push3=, 0 +; CHECK-NEXT: local.get $push2=, 1 +; CHECK-NEXT: local.get $push1=, 2 +; CHECK-NEXT: call $push0=, fmaf, $pop3, $pop2, $pop1 +; CHECK-NEXT: return $pop0 %d = call float @llvm.fma.f32(float %a, float %b, float %c) ret float %d } diff --git a/llvm/test/CodeGen/WebAssembly/f64.ll b/llvm/test/CodeGen/WebAssembly/f64.ll index 138044f..ab31b5d 100644 --- a/llvm/test/CodeGen/WebAssembly/f64.ll +++ b/llvm/test/CodeGen/WebAssembly/f64.ll @@ -1,4 +1,5 @@ -; RUN: llc < %s -asm-verbose=false -disable-wasm-fallthrough-return-opt -wasm-keep-registers | FileCheck %s +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc < %s -disable-wasm-fallthrough-return-opt -wasm-keep-registers | FileCheck %s ; Test that basic 64-bit floating-point operations assemble as expected. @@ -14,153 +15,215 @@ declare double @llvm.nearbyint.f64(double) declare double @llvm.rint.f64(double) declare double @llvm.fma.f64(double, double, double) -; CHECK-LABEL: fadd64: -; CHECK-NEXT: .functype fadd64 (f64, f64) -> (f64){{$}} -; CHECK-NEXT: local.get $push[[L0:[0-9]+]]=, 0{{$}} -; CHECK-NEXT: local.get $push[[L1:[0-9]+]]=, 1{{$}} -; CHECK-NEXT: f64.add $push[[LR:[0-9]+]]=, $pop[[L0]], $pop[[L1]]{{$}} -; CHECK-NEXT: return $pop[[LR]]{{$}} define double @fadd64(double %x, double %y) { +; CHECK-LABEL: fadd64: +; CHECK: .functype fadd64 (f64, f64) -> (f64) +; CHECK-NEXT: # %bb.0: +; CHECK-NEXT: local.get $push2=, 0 +; CHECK-NEXT: local.get $push1=, 1 +; CHECK-NEXT: f64.add $push0=, $pop2, $pop1 +; CHECK-NEXT: return $pop0 %a = fadd double %x, %y ret double %a } -; CHECK-LABEL: fsub64: -; CHECK: f64.sub $push[[LR:[0-9]+]]=, $pop{{[0-9]+}}, $pop{{[0-9]+}}{{$}} -; CHECK-NEXT: return $pop[[LR]]{{$}} define double @fsub64(double %x, double %y) { +; CHECK-LABEL: fsub64: +; CHECK: .functype fsub64 (f64, f64) -> (f64) +; CHECK-NEXT: # %bb.0: +; CHECK-NEXT: local.get $push2=, 0 +; CHECK-NEXT: local.get $push1=, 1 +; CHECK-NEXT: f64.sub $push0=, $pop2, $pop1 +; CHECK-NEXT: return $pop0 %a = fsub double %x, %y ret double %a } -; CHECK-LABEL: fmul64: -; CHECK: f64.mul $push[[LR:[0-9]+]]=, $pop{{[0-9]+}}, $pop{{[0-9]+}}{{$}} -; CHECK-NEXT: return $pop[[LR]]{{$}} define double @fmul64(double %x, double %y) { +; CHECK-LABEL: fmul64: +; CHECK: .functype fmul64 (f64, f64) -> (f64) +; CHECK-NEXT: # %bb.0: +; CHECK-NEXT: local.get $push2=, 0 +; CHECK-NEXT: local.get $push1=, 1 +; CHECK-NEXT: f64.mul $push0=, $pop2, $pop1 +; CHECK-NEXT: return $pop0 %a = fmul double %x, %y ret double %a } -; CHECK-LABEL: fdiv64: -; CHECK: f64.div $push[[LR:[0-9]+]]=, $pop{{[0-9]+}}, $pop{{[0-9]+}}{{$}} -; CHECK-NEXT: return $pop[[LR]]{{$}} define double @fdiv64(double %x, double %y) { +; CHECK-LABEL: fdiv64: +; CHECK: .functype fdiv64 (f64, f64) -> (f64) +; CHECK-NEXT: # %bb.0: +; CHECK-NEXT: local.get $push2=, 0 +; CHECK-NEXT: local.get $push1=, 1 +; CHECK-NEXT: f64.div $push0=, $pop2, $pop1 +; CHECK-NEXT: return $pop0 %a = fdiv double %x, %y ret double %a } -; CHECK-LABEL: fabs64: -; CHECK: f64.abs $push[[LR:[0-9]+]]=, $pop{{[0-9]+}}{{$}} -; CHECK-NEXT: return $pop[[LR]]{{$}} define double @fabs64(double %x) { +; CHECK-LABEL: fabs64: +; CHECK: .functype fabs64 (f64) -> (f64) +; CHECK-NEXT: # %bb.0: +; CHECK-NEXT: local.get $push1=, 0 +; CHECK-NEXT: f64.abs $push0=, $pop1 +; CHECK-NEXT: return $pop0 %a = call double @llvm.fabs.f64(double %x) ret double %a } -; CHECK-LABEL: fneg64: -; CHECK: f64.neg $push[[LR:[0-9]+]]=, $pop{{[0-9]+}}{{$}} -; CHECK-NEXT: return $pop[[LR]]{{$}} define double @fneg64(double %x) { +; CHECK-LABEL: fneg64: +; CHECK: .functype fneg64 (f64) -> (f64) +; CHECK-NEXT: # %bb.0: +; CHECK-NEXT: local.get $push1=, 0 +; CHECK-NEXT: f64.neg $push0=, $pop1 +; CHECK-NEXT: return $pop0 %a = fsub double -0., %x ret double %a } -; CHECK-LABEL: copysign64: -; CHECK: f64.copysign $push[[LR:[0-9]+]]=, $pop{{[0-9]+}}, $pop{{[0-9]+}}{{$}} -; CHECK-NEXT: return $pop[[LR]]{{$}} define double @copysign64(double %x, double %y) { +; CHECK-LABEL: copysign64: +; CHECK: .functype copysign64 (f64, f64) -> (f64) +; CHECK-NEXT: # %bb.0: +; CHECK-NEXT: local.get $push2=, 0 +; CHECK-NEXT: local.get $push1=, 1 +; CHECK-NEXT: f64.copysign $push0=, $pop2, $pop1 +; CHECK-NEXT: return $pop0 %a = call double @llvm.copysign.f64(double %x, double %y) ret double %a } -; CHECK-LABEL: sqrt64: -; CHECK: f64.sqrt $push[[LR:[0-9]+]]=, $pop{{[0-9]+}}{{$}} -; CHECK-NEXT: return $pop[[LR]]{{$}} define double @sqrt64(double %x) { +; CHECK-LABEL: sqrt64: +; CHECK: .functype sqrt64 (f64) -> (f64) +; CHECK-NEXT: # %bb.0: +; CHECK-NEXT: local.get $push1=, 0 +; CHECK-NEXT: f64.sqrt $push0=, $pop1 +; CHECK-NEXT: return $pop0 %a = call double @llvm.sqrt.f64(double %x) ret double %a } -; CHECK-LABEL: ceil64: -; CHECK: f64.ceil $push[[LR:[0-9]+]]=, $pop{{[0-9]+}}{{$}} -; CHECK-NEXT: return $pop[[LR]]{{$}} define double @ceil64(double %x) { +; CHECK-LABEL: ceil64: +; CHECK: .functype ceil64 (f64) -> (f64) +; CHECK-NEXT: # %bb.0: +; CHECK-NEXT: local.get $push1=, 0 +; CHECK-NEXT: f64.ceil $push0=, $pop1 +; CHECK-NEXT: return $pop0 %a = call double @llvm.ceil.f64(double %x) ret double %a } -; CHECK-LABEL: floor64: -; CHECK: f64.floor $push[[LR:[0-9]+]]=, $pop{{[0-9]+}}{{$}} -; CHECK-NEXT: return $pop[[LR]]{{$}} define double @floor64(double %x) { +; CHECK-LABEL: floor64: +; CHECK: .functype floor64 (f64) -> (f64) +; CHECK-NEXT: # %bb.0: +; CHECK-NEXT: local.get $push1=, 0 +; CHECK-NEXT: f64.floor $push0=, $pop1 +; CHECK-NEXT: return $pop0 %a = call double @llvm.floor.f64(double %x) ret double %a } -; CHECK-LABEL: trunc64: -; CHECK: f64.trunc $push[[LR:[0-9]+]]=, $pop{{[0-9]+}}{{$}} -; CHECK-NEXT: return $pop[[LR]]{{$}} define double @trunc64(double %x) { +; CHECK-LABEL: trunc64: +; CHECK: .functype trunc64 (f64) -> (f64) +; CHECK-NEXT: # %bb.0: +; CHECK-NEXT: local.get $push1=, 0 +; CHECK-NEXT: f64.trunc $push0=, $pop1 +; CHECK-NEXT: return $pop0 %a = call double @llvm.trunc.f64(double %x) ret double %a } -; CHECK-LABEL: nearest64: -; CHECK: f64.nearest $push[[LR:[0-9]+]]=, $pop{{[0-9]+}}{{$}} -; CHECK-NEXT: return $pop[[LR]]{{$}} define double @nearest64(double %x) { +; CHECK-LABEL: nearest64: +; CHECK: .functype nearest64 (f64) -> (f64) +; CHECK-NEXT: # %bb.0: +; CHECK-NEXT: local.get $push1=, 0 +; CHECK-NEXT: f64.nearest $push0=, $pop1 +; CHECK-NEXT: return $pop0 %a = call double @llvm.nearbyint.f64(double %x) ret double %a } -; CHECK-LABEL: nearest64_via_rint: -; CHECK: f64.nearest $push[[LR:[0-9]+]]=, $pop{{[0-9]+}}{{$}} -; CHECK-NEXT: return $pop[[LR]]{{$}} define double @nearest64_via_rint(double %x) { +; CHECK-LABEL: nearest64_via_rint: +; CHECK: .functype nearest64_via_rint (f64) -> (f64) +; CHECK-NEXT: # %bb.0: +; CHECK-NEXT: local.get $push1=, 0 +; CHECK-NEXT: f64.nearest $push0=, $pop1 +; CHECK-NEXT: return $pop0 %a = call double @llvm.rint.f64(double %x) ret double %a } -; CHECK-LABEL: fmin64: -; CHECK: f64.min $push1=, $pop{{[0-9]+}}, $pop[[LR]]{{$}} -; CHECK-NEXT: return $pop1{{$}} define double @fmin64(double %x) { +; CHECK-LABEL: fmin64: +; CHECK: .functype fmin64 (f64) -> (f64) +; CHECK-NEXT: # %bb.0: +; CHECK-NEXT: local.get $push2=, 0 +; CHECK-NEXT: f64.const $push0=, 0x0p0 +; CHECK-NEXT: f64.min $push1=, $pop2, $pop0 +; CHECK-NEXT: return $pop1 %a = fcmp ult double %x, 0.0 %b = select i1 %a, double %x, double 0.0 ret double %b } -; CHECK-LABEL: fmax64: -; CHECK: f64.max $push1=, $pop{{[0-9]+}}, $pop[[LR]]{{$}} -; CHECK-NEXT: return $pop1{{$}} define double @fmax64(double %x) { +; CHECK-LABEL: fmax64: +; CHECK: .functype fmax64 (f64) -> (f64) +; CHECK-NEXT: # %bb.0: +; CHECK-NEXT: local.get $push2=, 0 +; CHECK-NEXT: f64.const $push0=, 0x0p0 +; CHECK-NEXT: f64.max $push1=, $pop2, $pop0 +; CHECK-NEXT: return $pop1 %a = fcmp ugt double %x, 0.0 %b = select i1 %a, double %x, double 0.0 ret double %b } -; CHECK-LABEL: fmin64_intrinsic: -; CHECK: f64.min $push0=, $pop{{[0-9]+}}, $pop{{[0-9]+}}{{$}} -; CHECK-NEXT: return $pop0{{$}} declare double @llvm.minimum.f64(double, double) define double @fmin64_intrinsic(double %x, double %y) { +; CHECK-LABEL: fmin64_intrinsic: +; CHECK: .functype fmin64_intrinsic (f64, f64) -> (f64) +; CHECK-NEXT: # %bb.0: +; CHECK-NEXT: local.get $push2=, 0 +; CHECK-NEXT: local.get $push1=, 1 +; CHECK-NEXT: f64.min $push0=, $pop2, $pop1 +; CHECK-NEXT: return $pop0 %a = call double @llvm.minimum.f64(double %x, double %y) ret double %a } -; CHECK-LABEL: fmax64_intrinsic: -; CHECK: f64.max $push0=, $pop{{[0-9]+}}, $pop{{[0-9]+}}{{$}} -; CHECK-NEXT: return $pop0{{$}} declare double @llvm.maximum.f64(double, double) define double @fmax64_intrinsic(double %x, double %y) { +; CHECK-LABEL: fmax64_intrinsic: +; CHECK: .functype fmax64_intrinsic (f64, f64) -> (f64) +; CHECK-NEXT: # %bb.0: +; CHECK-NEXT: local.get $push2=, 0 +; CHECK-NEXT: local.get $push1=, 1 +; CHECK-NEXT: f64.max $push0=, $pop2, $pop1 +; CHECK-NEXT: return $pop0 %a = call double @llvm.maximum.f64(double %x, double %y) ret double %a } -; CHECK-LABEL: fma64: -; CHECK: {{^}} call $push[[LR:[0-9]+]]=, fma, $pop{{[0-9]+}}, $pop{{[0-9]+}}, $pop{{[0-9]+}}{{$}} -; CHECK-NEXT: return $pop[[LR]]{{$}} define double @fma64(double %a, double %b, double %c) { +; CHECK-LABEL: fma64: +; CHECK: .functype fma64 (f64, f64, f64) -> (f64) +; CHECK-NEXT: # %bb.0: +; CHECK-NEXT: local.get $push3=, 0 +; CHECK-NEXT: local.get $push2=, 1 +; CHECK-NEXT: local.get $push1=, 2 +; CHECK-NEXT: call $push0=, fma, $pop3, $pop2, $pop1 +; CHECK-NEXT: return $pop0 %d = call double @llvm.fma.f64(double %a, double %b, double %c) ret double %d } diff --git a/llvm/test/CodeGen/WebAssembly/i128.ll b/llvm/test/CodeGen/WebAssembly/i128.ll index 6be1457..50d4680 100644 --- a/llvm/test/CodeGen/WebAssembly/i128.ll +++ b/llvm/test/CodeGen/WebAssembly/i128.ll @@ -1,4 +1,5 @@ -; RUN: llc < %s -asm-verbose=false -disable-wasm-fallthrough-return-opt -wasm-keep-registers | FileCheck %s +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc < %s -disable-wasm-fallthrough-return-opt -wasm-keep-registers | FileCheck %s ; Test that basic 128-bit integer operations assemble as expected. @@ -8,208 +9,572 @@ declare i128 @llvm.ctlz.i128(i128, i1) declare i128 @llvm.cttz.i128(i128, i1) declare i128 @llvm.ctpop.i128(i128) -; CHECK-LABEL: add128: -; CHECK-NEXT: .functype add128 (i32, i64, i64, i64, i64) -> (){{$}} -; CHECK-NOT: .result -; CHECK: i64.add -; CHECK: i64.store -; CHECK: i64.add -; CHECK: i64.store -; CHECK-NEXT: return{{$}} define i128 @add128(i128 %x, i128 %y) { +; CHECK-LABEL: add128: +; CHECK: .functype add128 (i32, i64, i64, i64, i64) -> () +; CHECK-NEXT: # %bb.0: +; CHECK-NEXT: local.get $push8=, 0 +; CHECK-NEXT: local.get $push7=, 1 +; CHECK-NEXT: local.get $push6=, 3 +; CHECK-NEXT: i64.add $push5=, $pop7, $pop6 +; CHECK-NEXT: local.tee $push4=, 3, $pop5 +; CHECK-NEXT: i64.store 0($pop8), $pop4 +; CHECK-NEXT: local.get $push13=, 0 +; CHECK-NEXT: local.get $push10=, 2 +; CHECK-NEXT: local.get $push9=, 4 +; CHECK-NEXT: i64.add $push0=, $pop10, $pop9 +; CHECK-NEXT: local.get $push12=, 3 +; CHECK-NEXT: local.get $push11=, 1 +; CHECK-NEXT: i64.lt_u $push1=, $pop12, $pop11 +; CHECK-NEXT: i64.extend_i32_u $push2=, $pop1 +; CHECK-NEXT: i64.add $push3=, $pop0, $pop2 +; CHECK-NEXT: i64.store 8($pop13), $pop3 +; CHECK-NEXT: return %a = add i128 %x, %y ret i128 %a } -; CHECK-LABEL: sub128: -; CHECK-NEXT: .functype sub128 (i32, i64, i64, i64, i64) -> (){{$}} -; CHECK: i64.sub -; CHECK: i64.store -; CHECK: i64.sub -; CHECK: i64.store -; CHECK-NEXT: return{{$}} define i128 @sub128(i128 %x, i128 %y) { +; CHECK-LABEL: sub128: +; CHECK: .functype sub128 (i32, i64, i64, i64, i64) -> () +; CHECK-NEXT: # %bb.0: +; CHECK-NEXT: local.get $push7=, 0 +; CHECK-NEXT: local.get $push6=, 1 +; CHECK-NEXT: local.get $push5=, 3 +; CHECK-NEXT: i64.sub $push0=, $pop6, $pop5 +; CHECK-NEXT: i64.store 0($pop7), $pop0 +; CHECK-NEXT: local.get $push12=, 0 +; CHECK-NEXT: local.get $push9=, 2 +; CHECK-NEXT: local.get $push8=, 4 +; CHECK-NEXT: i64.sub $push1=, $pop9, $pop8 +; CHECK-NEXT: local.get $push11=, 1 +; CHECK-NEXT: local.get $push10=, 3 +; CHECK-NEXT: i64.lt_u $push2=, $pop11, $pop10 +; CHECK-NEXT: i64.extend_i32_u $push3=, $pop2 +; CHECK-NEXT: i64.sub $push4=, $pop1, $pop3 +; CHECK-NEXT: i64.store 8($pop12), $pop4 +; CHECK-NEXT: return %a = sub i128 %x, %y ret i128 %a } -; CHECK-LABEL: mul128: -; CHECK-NEXT: .functype mul128 (i32, i64, i64, i64, i64) -> (){{$}} -; CHECK: call __multi3, ${{.+}}, ${{.+}}, ${{.+}}, ${{.+}}, ${{.+}}{{$}} -; CHECK: return{{$}} define i128 @mul128(i128 %x, i128 %y) { +; CHECK-LABEL: mul128: +; CHECK: .functype mul128 (i32, i64, i64, i64, i64) -> () +; CHECK-NEXT: .local i32 +; CHECK-NEXT: # %bb.0: +; CHECK-NEXT: global.get $push4=, __stack_pointer +; CHECK-NEXT: i32.const $push5=, 16 +; CHECK-NEXT: i32.sub $push9=, $pop4, $pop5 +; CHECK-NEXT: local.tee $push8=, 5, $pop9 +; CHECK-NEXT: global.set __stack_pointer, $pop8 +; CHECK-NEXT: local.get $push14=, 5 +; CHECK-NEXT: local.get $push13=, 1 +; CHECK-NEXT: local.get $push12=, 2 +; CHECK-NEXT: local.get $push11=, 3 +; CHECK-NEXT: local.get $push10=, 4 +; CHECK-NEXT: call __multi3, $pop14, $pop13, $pop12, $pop11, $pop10 +; CHECK-NEXT: local.get $push16=, 0 +; CHECK-NEXT: local.get $push15=, 5 +; CHECK-NEXT: i32.const $push0=, 8 +; CHECK-NEXT: i32.add $push1=, $pop15, $pop0 +; CHECK-NEXT: i64.load $push2=, 0($pop1) +; CHECK-NEXT: i64.store 8($pop16), $pop2 +; CHECK-NEXT: local.get $push18=, 0 +; CHECK-NEXT: local.get $push17=, 5 +; CHECK-NEXT: i64.load $push3=, 0($pop17) +; CHECK-NEXT: i64.store 0($pop18), $pop3 +; CHECK-NEXT: local.get $push19=, 5 +; CHECK-NEXT: i32.const $push6=, 16 +; CHECK-NEXT: i32.add $push7=, $pop19, $pop6 +; CHECK-NEXT: global.set __stack_pointer, $pop7 +; CHECK-NEXT: return %a = mul i128 %x, %y ret i128 %a } -; CHECK-LABEL: sdiv128: -; CHECK-NEXT: .functype sdiv128 (i32, i64, i64, i64, i64) -> (){{$}} -; CHECK: call __divti3, ${{.+}}, ${{.+}}, ${{.+}}, ${{.+}}, ${{.+}}{{$}} -; CHECK: return{{$}} define i128 @sdiv128(i128 %x, i128 %y) { +; CHECK-LABEL: sdiv128: +; CHECK: .functype sdiv128 (i32, i64, i64, i64, i64) -> () +; CHECK-NEXT: .local i32 +; CHECK-NEXT: # %bb.0: +; CHECK-NEXT: global.get $push4=, __stack_pointer +; CHECK-NEXT: i32.const $push5=, 16 +; CHECK-NEXT: i32.sub $push9=, $pop4, $pop5 +; CHECK-NEXT: local.tee $push8=, 5, $pop9 +; CHECK-NEXT: global.set __stack_pointer, $pop8 +; CHECK-NEXT: local.get $push14=, 5 +; CHECK-NEXT: local.get $push13=, 1 +; CHECK-NEXT: local.get $push12=, 2 +; CHECK-NEXT: local.get $push11=, 3 +; CHECK-NEXT: local.get $push10=, 4 +; CHECK-NEXT: call __divti3, $pop14, $pop13, $pop12, $pop11, $pop10 +; CHECK-NEXT: local.get $push16=, 0 +; CHECK-NEXT: local.get $push15=, 5 +; CHECK-NEXT: i32.const $push0=, 8 +; CHECK-NEXT: i32.add $push1=, $pop15, $pop0 +; CHECK-NEXT: i64.load $push2=, 0($pop1) +; CHECK-NEXT: i64.store 8($pop16), $pop2 +; CHECK-NEXT: local.get $push18=, 0 +; CHECK-NEXT: local.get $push17=, 5 +; CHECK-NEXT: i64.load $push3=, 0($pop17) +; CHECK-NEXT: i64.store 0($pop18), $pop3 +; CHECK-NEXT: local.get $push19=, 5 +; CHECK-NEXT: i32.const $push6=, 16 +; CHECK-NEXT: i32.add $push7=, $pop19, $pop6 +; CHECK-NEXT: global.set __stack_pointer, $pop7 +; CHECK-NEXT: return %a = sdiv i128 %x, %y ret i128 %a } -; CHECK-LABEL: udiv128: -; CHECK-NEXT: .functype udiv128 (i32, i64, i64, i64, i64) -> (){{$}} -; CHECK: call __udivti3, ${{.+}}, ${{.+}}, ${{.+}}, ${{.+}}, ${{.+}}{{$}} -; CHECK: return{{$}} define i128 @udiv128(i128 %x, i128 %y) { +; CHECK-LABEL: udiv128: +; CHECK: .functype udiv128 (i32, i64, i64, i64, i64) -> () +; CHECK-NEXT: .local i32 +; CHECK-NEXT: # %bb.0: +; CHECK-NEXT: global.get $push4=, __stack_pointer +; CHECK-NEXT: i32.const $push5=, 16 +; CHECK-NEXT: i32.sub $push9=, $pop4, $pop5 +; CHECK-NEXT: local.tee $push8=, 5, $pop9 +; CHECK-NEXT: global.set __stack_pointer, $pop8 +; CHECK-NEXT: local.get $push14=, 5 +; CHECK-NEXT: local.get $push13=, 1 +; CHECK-NEXT: local.get $push12=, 2 +; CHECK-NEXT: local.get $push11=, 3 +; CHECK-NEXT: local.get $push10=, 4 +; CHECK-NEXT: call __udivti3, $pop14, $pop13, $pop12, $pop11, $pop10 +; CHECK-NEXT: local.get $push16=, 0 +; CHECK-NEXT: local.get $push15=, 5 +; CHECK-NEXT: i32.const $push0=, 8 +; CHECK-NEXT: i32.add $push1=, $pop15, $pop0 +; CHECK-NEXT: i64.load $push2=, 0($pop1) +; CHECK-NEXT: i64.store 8($pop16), $pop2 +; CHECK-NEXT: local.get $push18=, 0 +; CHECK-NEXT: local.get $push17=, 5 +; CHECK-NEXT: i64.load $push3=, 0($pop17) +; CHECK-NEXT: i64.store 0($pop18), $pop3 +; CHECK-NEXT: local.get $push19=, 5 +; CHECK-NEXT: i32.const $push6=, 16 +; CHECK-NEXT: i32.add $push7=, $pop19, $pop6 +; CHECK-NEXT: global.set __stack_pointer, $pop7 +; CHECK-NEXT: return %a = udiv i128 %x, %y ret i128 %a } -; CHECK-LABEL: srem128: -; CHECK-NEXT: .functype srem128 (i32, i64, i64, i64, i64) -> (){{$}} -; CHECK: call __modti3, ${{.+}}, ${{.+}}, ${{.+}}, ${{.+}}, ${{.+}}{{$}} -; CHECK: return{{$}} define i128 @srem128(i128 %x, i128 %y) { +; CHECK-LABEL: srem128: +; CHECK: .functype srem128 (i32, i64, i64, i64, i64) -> () +; CHECK-NEXT: .local i32 +; CHECK-NEXT: # %bb.0: +; CHECK-NEXT: global.get $push4=, __stack_pointer +; CHECK-NEXT: i32.const $push5=, 16 +; CHECK-NEXT: i32.sub $push9=, $pop4, $pop5 +; CHECK-NEXT: local.tee $push8=, 5, $pop9 +; CHECK-NEXT: global.set __stack_pointer, $pop8 +; CHECK-NEXT: local.get $push14=, 5 +; CHECK-NEXT: local.get $push13=, 1 +; CHECK-NEXT: local.get $push12=, 2 +; CHECK-NEXT: local.get $push11=, 3 +; CHECK-NEXT: local.get $push10=, 4 +; CHECK-NEXT: call __modti3, $pop14, $pop13, $pop12, $pop11, $pop10 +; CHECK-NEXT: local.get $push16=, 0 +; CHECK-NEXT: local.get $push15=, 5 +; CHECK-NEXT: i32.const $push0=, 8 +; CHECK-NEXT: i32.add $push1=, $pop15, $pop0 +; CHECK-NEXT: i64.load $push2=, 0($pop1) +; CHECK-NEXT: i64.store 8($pop16), $pop2 +; CHECK-NEXT: local.get $push18=, 0 +; CHECK-NEXT: local.get $push17=, 5 +; CHECK-NEXT: i64.load $push3=, 0($pop17) +; CHECK-NEXT: i64.store 0($pop18), $pop3 +; CHECK-NEXT: local.get $push19=, 5 +; CHECK-NEXT: i32.const $push6=, 16 +; CHECK-NEXT: i32.add $push7=, $pop19, $pop6 +; CHECK-NEXT: global.set __stack_pointer, $pop7 +; CHECK-NEXT: return %a = srem i128 %x, %y ret i128 %a } -; CHECK-LABEL: urem128: -; CHECK-NEXT: .functype urem128 (i32, i64, i64, i64, i64) -> (){{$}} -; CHECK: call __umodti3, ${{.+}}, ${{.+}}, ${{.+}}, ${{.+}}, ${{.+}}{{$}} -; CHECK: return{{$}} define i128 @urem128(i128 %x, i128 %y) { +; CHECK-LABEL: urem128: +; CHECK: .functype urem128 (i32, i64, i64, i64, i64) -> () +; CHECK-NEXT: .local i32 +; CHECK-NEXT: # %bb.0: +; CHECK-NEXT: global.get $push4=, __stack_pointer +; CHECK-NEXT: i32.const $push5=, 16 +; CHECK-NEXT: i32.sub $push9=, $pop4, $pop5 +; CHECK-NEXT: local.tee $push8=, 5, $pop9 +; CHECK-NEXT: global.set __stack_pointer, $pop8 +; CHECK-NEXT: local.get $push14=, 5 +; CHECK-NEXT: local.get $push13=, 1 +; CHECK-NEXT: local.get $push12=, 2 +; CHECK-NEXT: local.get $push11=, 3 +; CHECK-NEXT: local.get $push10=, 4 +; CHECK-NEXT: call __umodti3, $pop14, $pop13, $pop12, $pop11, $pop10 +; CHECK-NEXT: local.get $push16=, 0 +; CHECK-NEXT: local.get $push15=, 5 +; CHECK-NEXT: i32.const $push0=, 8 +; CHECK-NEXT: i32.add $push1=, $pop15, $pop0 +; CHECK-NEXT: i64.load $push2=, 0($pop1) +; CHECK-NEXT: i64.store 8($pop16), $pop2 +; CHECK-NEXT: local.get $push18=, 0 +; CHECK-NEXT: local.get $push17=, 5 +; CHECK-NEXT: i64.load $push3=, 0($pop17) +; CHECK-NEXT: i64.store 0($pop18), $pop3 +; CHECK-NEXT: local.get $push19=, 5 +; CHECK-NEXT: i32.const $push6=, 16 +; CHECK-NEXT: i32.add $push7=, $pop19, $pop6 +; CHECK-NEXT: global.set __stack_pointer, $pop7 +; CHECK-NEXT: return %a = urem i128 %x, %y ret i128 %a } -; CHECK-LABEL: and128: -; CHECK-NEXT: .functype and128 (i32, i64, i64, i64, i64) -> (){{$}} -; CHECK-NOT: .result -; CHECK: i64.and -; CHECK: i64.store -; CHECK: i64.and -; CHECK: i64.store -; CHECK-NEXT: return{{$}} define i128 @and128(i128 %x, i128 %y) { +; CHECK-LABEL: and128: +; CHECK: .functype and128 (i32, i64, i64, i64, i64) -> () +; CHECK-NEXT: # %bb.0: +; CHECK-NEXT: local.get $push4=, 0 +; CHECK-NEXT: local.get $push3=, 2 +; CHECK-NEXT: local.get $push2=, 4 +; CHECK-NEXT: i64.and $push0=, $pop3, $pop2 +; CHECK-NEXT: i64.store 8($pop4), $pop0 +; CHECK-NEXT: local.get $push7=, 0 +; CHECK-NEXT: local.get $push6=, 1 +; CHECK-NEXT: local.get $push5=, 3 +; CHECK-NEXT: i64.and $push1=, $pop6, $pop5 +; CHECK-NEXT: i64.store 0($pop7), $pop1 +; CHECK-NEXT: return %a = and i128 %x, %y ret i128 %a } -; CHECK-LABEL: or128: -; CHECK-NEXT: .functype or128 (i32, i64, i64, i64, i64) -> (){{$}} -; CHECK: i64.or -; CHECK: i64.store -; CHECK: i64.or -; CHECK: i64.store -; CHECK-NEXT: return{{$}} define i128 @or128(i128 %x, i128 %y) { +; CHECK-LABEL: or128: +; CHECK: .functype or128 (i32, i64, i64, i64, i64) -> () +; CHECK-NEXT: # %bb.0: +; CHECK-NEXT: local.get $push4=, 0 +; CHECK-NEXT: local.get $push3=, 2 +; CHECK-NEXT: local.get $push2=, 4 +; CHECK-NEXT: i64.or $push0=, $pop3, $pop2 +; CHECK-NEXT: i64.store 8($pop4), $pop0 +; CHECK-NEXT: local.get $push7=, 0 +; CHECK-NEXT: local.get $push6=, 1 +; CHECK-NEXT: local.get $push5=, 3 +; CHECK-NEXT: i64.or $push1=, $pop6, $pop5 +; CHECK-NEXT: i64.store 0($pop7), $pop1 +; CHECK-NEXT: return %a = or i128 %x, %y ret i128 %a } -; CHECK-LABEL: xor128: -; CHECK-NEXT: .functype xor128 (i32, i64, i64, i64, i64) -> (){{$}} -; CHECK: i64.xor -; CHECK: i64.store -; CHECK: i64.xor -; CHECK: i64.store -; CHECK-NEXT: return{{$}} define i128 @xor128(i128 %x, i128 %y) { +; CHECK-LABEL: xor128: +; CHECK: .functype xor128 (i32, i64, i64, i64, i64) -> () +; CHECK-NEXT: # %bb.0: +; CHECK-NEXT: local.get $push4=, 0 +; CHECK-NEXT: local.get $push3=, 2 +; CHECK-NEXT: local.get $push2=, 4 +; CHECK-NEXT: i64.xor $push0=, $pop3, $pop2 +; CHECK-NEXT: i64.store 8($pop4), $pop0 +; CHECK-NEXT: local.get $push7=, 0 +; CHECK-NEXT: local.get $push6=, 1 +; CHECK-NEXT: local.get $push5=, 3 +; CHECK-NEXT: i64.xor $push1=, $pop6, $pop5 +; CHECK-NEXT: i64.store 0($pop7), $pop1 +; CHECK-NEXT: return %a = xor i128 %x, %y ret i128 %a } -; CHECK-LABEL: shl128: -; CHECK-NEXT: .functype shl128 (i32, i64, i64, i64, i64) -> (){{$}} -; CHECK: call __ashlti3, ${{.+}}, ${{.+}}, ${{.+}}, ${{.+}}{{$}} -; CHECK: return{{$}} define i128 @shl128(i128 %x, i128 %y) { +; CHECK-LABEL: shl128: +; CHECK: .functype shl128 (i32, i64, i64, i64, i64) -> () +; CHECK-NEXT: .local i32 +; CHECK-NEXT: # %bb.0: +; CHECK-NEXT: global.get $push5=, __stack_pointer +; CHECK-NEXT: i32.const $push6=, 16 +; CHECK-NEXT: i32.sub $push10=, $pop5, $pop6 +; CHECK-NEXT: local.tee $push9=, 5, $pop10 +; CHECK-NEXT: global.set __stack_pointer, $pop9 +; CHECK-NEXT: local.get $push14=, 5 +; CHECK-NEXT: local.get $push13=, 1 +; CHECK-NEXT: local.get $push12=, 2 +; CHECK-NEXT: local.get $push11=, 3 +; CHECK-NEXT: i32.wrap_i64 $push0=, $pop11 +; CHECK-NEXT: call __ashlti3, $pop14, $pop13, $pop12, $pop0 +; CHECK-NEXT: local.get $push16=, 0 +; CHECK-NEXT: local.get $push15=, 5 +; CHECK-NEXT: i32.const $push1=, 8 +; CHECK-NEXT: i32.add $push2=, $pop15, $pop1 +; CHECK-NEXT: i64.load $push3=, 0($pop2) +; CHECK-NEXT: i64.store 8($pop16), $pop3 +; CHECK-NEXT: local.get $push18=, 0 +; CHECK-NEXT: local.get $push17=, 5 +; CHECK-NEXT: i64.load $push4=, 0($pop17) +; CHECK-NEXT: i64.store 0($pop18), $pop4 +; CHECK-NEXT: local.get $push19=, 5 +; CHECK-NEXT: i32.const $push7=, 16 +; CHECK-NEXT: i32.add $push8=, $pop19, $pop7 +; CHECK-NEXT: global.set __stack_pointer, $pop8 +; CHECK-NEXT: return %a = shl i128 %x, %y ret i128 %a } -; CHECK-LABEL: shr128: -; CHECK-NEXT: .functype shr128 (i32, i64, i64, i64, i64) -> (){{$}} -; CHECK: call __lshrti3, ${{.+}}, ${{.+}}, ${{.+}}, ${{.+}}{{$}} -; CHECK: return{{$}} define i128 @shr128(i128 %x, i128 %y) { +; CHECK-LABEL: shr128: +; CHECK: .functype shr128 (i32, i64, i64, i64, i64) -> () +; CHECK-NEXT: .local i32 +; CHECK-NEXT: # %bb.0: +; CHECK-NEXT: global.get $push5=, __stack_pointer +; CHECK-NEXT: i32.const $push6=, 16 +; CHECK-NEXT: i32.sub $push10=, $pop5, $pop6 +; CHECK-NEXT: local.tee $push9=, 5, $pop10 +; CHECK-NEXT: global.set __stack_pointer, $pop9 +; CHECK-NEXT: local.get $push14=, 5 +; CHECK-NEXT: local.get $push13=, 1 +; CHECK-NEXT: local.get $push12=, 2 +; CHECK-NEXT: local.get $push11=, 3 +; CHECK-NEXT: i32.wrap_i64 $push0=, $pop11 +; CHECK-NEXT: call __lshrti3, $pop14, $pop13, $pop12, $pop0 +; CHECK-NEXT: local.get $push16=, 0 +; CHECK-NEXT: local.get $push15=, 5 +; CHECK-NEXT: i32.const $push1=, 8 +; CHECK-NEXT: i32.add $push2=, $pop15, $pop1 +; CHECK-NEXT: i64.load $push3=, 0($pop2) +; CHECK-NEXT: i64.store 8($pop16), $pop3 +; CHECK-NEXT: local.get $push18=, 0 +; CHECK-NEXT: local.get $push17=, 5 +; CHECK-NEXT: i64.load $push4=, 0($pop17) +; CHECK-NEXT: i64.store 0($pop18), $pop4 +; CHECK-NEXT: local.get $push19=, 5 +; CHECK-NEXT: i32.const $push7=, 16 +; CHECK-NEXT: i32.add $push8=, $pop19, $pop7 +; CHECK-NEXT: global.set __stack_pointer, $pop8 +; CHECK-NEXT: return %a = lshr i128 %x, %y ret i128 %a } -; CHECK-LABEL: sar128: -; CHECK-NEXT: .functype sar128 (i32, i64, i64, i64, i64) -> (){{$}} -; CHECK: call __ashrti3, ${{.+}}, ${{.+}}, ${{.+}}, ${{.+}}{{$}} -; CHECK: return{{$}} define i128 @sar128(i128 %x, i128 %y) { +; CHECK-LABEL: sar128: +; CHECK: .functype sar128 (i32, i64, i64, i64, i64) -> () +; CHECK-NEXT: .local i32 +; CHECK-NEXT: # %bb.0: +; CHECK-NEXT: global.get $push5=, __stack_pointer +; CHECK-NEXT: i32.const $push6=, 16 +; CHECK-NEXT: i32.sub $push10=, $pop5, $pop6 +; CHECK-NEXT: local.tee $push9=, 5, $pop10 +; CHECK-NEXT: global.set __stack_pointer, $pop9 +; CHECK-NEXT: local.get $push14=, 5 +; CHECK-NEXT: local.get $push13=, 1 +; CHECK-NEXT: local.get $push12=, 2 +; CHECK-NEXT: local.get $push11=, 3 +; CHECK-NEXT: i32.wrap_i64 $push0=, $pop11 +; CHECK-NEXT: call __ashrti3, $pop14, $pop13, $pop12, $pop0 +; CHECK-NEXT: local.get $push16=, 0 +; CHECK-NEXT: local.get $push15=, 5 +; CHECK-NEXT: i32.const $push1=, 8 +; CHECK-NEXT: i32.add $push2=, $pop15, $pop1 +; CHECK-NEXT: i64.load $push3=, 0($pop2) +; CHECK-NEXT: i64.store 8($pop16), $pop3 +; CHECK-NEXT: local.get $push18=, 0 +; CHECK-NEXT: local.get $push17=, 5 +; CHECK-NEXT: i64.load $push4=, 0($pop17) +; CHECK-NEXT: i64.store 0($pop18), $pop4 +; CHECK-NEXT: local.get $push19=, 5 +; CHECK-NEXT: i32.const $push7=, 16 +; CHECK-NEXT: i32.add $push8=, $pop19, $pop7 +; CHECK-NEXT: global.set __stack_pointer, $pop8 +; CHECK-NEXT: return %a = ashr i128 %x, %y ret i128 %a } -; CHECK-LABEL: clz128: -; CHECK-NEXT: .functype clz128 (i32, i64, i64) -> (){{$}} -; CHECK-NOT: .result -; CHECK: i64.clz -; CHECK: i64.clz -; CHECK: return{{$}} define i128 @clz128(i128 %x) { +; CHECK-LABEL: clz128: +; CHECK: .functype clz128 (i32, i64, i64) -> () +; CHECK-NEXT: # %bb.0: +; CHECK-NEXT: local.get $push8=, 0 +; CHECK-NEXT: i64.const $push0=, 0 +; CHECK-NEXT: i64.store 8($pop8), $pop0 +; CHECK-NEXT: local.get $push12=, 0 +; CHECK-NEXT: local.get $push9=, 2 +; CHECK-NEXT: i64.clz $push5=, $pop9 +; CHECK-NEXT: local.get $push10=, 1 +; CHECK-NEXT: i64.clz $push2=, $pop10 +; CHECK-NEXT: i64.const $push3=, 64 +; CHECK-NEXT: i64.add $push4=, $pop2, $pop3 +; CHECK-NEXT: local.get $push11=, 2 +; CHECK-NEXT: i64.const $push7=, 0 +; CHECK-NEXT: i64.ne $push1=, $pop11, $pop7 +; CHECK-NEXT: i64.select $push6=, $pop5, $pop4, $pop1 +; CHECK-NEXT: i64.store 0($pop12), $pop6 +; CHECK-NEXT: return %a = call i128 @llvm.ctlz.i128(i128 %x, i1 false) ret i128 %a } -; CHECK-LABEL: clz128_zero_undef: -; CHECK-NEXT: .functype clz128_zero_undef (i32, i64, i64) -> (){{$}} -; CHECK: i64.clz -; CHECK: i64.clz -; CHECK: return{{$}} define i128 @clz128_zero_undef(i128 %x) { +; CHECK-LABEL: clz128_zero_undef: +; CHECK: .functype clz128_zero_undef (i32, i64, i64) -> () +; CHECK-NEXT: # %bb.0: +; CHECK-NEXT: local.get $push8=, 0 +; CHECK-NEXT: i64.const $push0=, 0 +; CHECK-NEXT: i64.store 8($pop8), $pop0 +; CHECK-NEXT: local.get $push12=, 0 +; CHECK-NEXT: local.get $push9=, 2 +; CHECK-NEXT: i64.clz $push5=, $pop9 +; CHECK-NEXT: local.get $push10=, 1 +; CHECK-NEXT: i64.clz $push2=, $pop10 +; CHECK-NEXT: i64.const $push3=, 64 +; CHECK-NEXT: i64.add $push4=, $pop2, $pop3 +; CHECK-NEXT: local.get $push11=, 2 +; CHECK-NEXT: i64.const $push7=, 0 +; CHECK-NEXT: i64.ne $push1=, $pop11, $pop7 +; CHECK-NEXT: i64.select $push6=, $pop5, $pop4, $pop1 +; CHECK-NEXT: i64.store 0($pop12), $pop6 +; CHECK-NEXT: return %a = call i128 @llvm.ctlz.i128(i128 %x, i1 true) ret i128 %a } -; CHECK-LABEL: ctz128: -; CHECK-NEXT: .functype ctz128 (i32, i64, i64) -> (){{$}} -; CHECK: i64.ctz -; CHECK: i64.ctz -; CHECK: return{{$}} define i128 @ctz128(i128 %x) { +; CHECK-LABEL: ctz128: +; CHECK: .functype ctz128 (i32, i64, i64) -> () +; CHECK-NEXT: # %bb.0: +; CHECK-NEXT: local.get $push8=, 0 +; CHECK-NEXT: i64.const $push0=, 0 +; CHECK-NEXT: i64.store 8($pop8), $pop0 +; CHECK-NEXT: local.get $push12=, 0 +; CHECK-NEXT: local.get $push9=, 1 +; CHECK-NEXT: i64.ctz $push5=, $pop9 +; CHECK-NEXT: local.get $push10=, 2 +; CHECK-NEXT: i64.ctz $push2=, $pop10 +; CHECK-NEXT: i64.const $push3=, 64 +; CHECK-NEXT: i64.add $push4=, $pop2, $pop3 +; CHECK-NEXT: local.get $push11=, 1 +; CHECK-NEXT: i64.const $push7=, 0 +; CHECK-NEXT: i64.ne $push1=, $pop11, $pop7 +; CHECK-NEXT: i64.select $push6=, $pop5, $pop4, $pop1 +; CHECK-NEXT: i64.store 0($pop12), $pop6 +; CHECK-NEXT: return %a = call i128 @llvm.cttz.i128(i128 %x, i1 false) ret i128 %a } -; CHECK-LABEL: ctz128_zero_undef: -; CHECK-NEXT: .functype ctz128_zero_undef (i32, i64, i64) -> (){{$}} -; CHECK: i64.ctz -; CHECK: i64.ctz -; CHECK: return{{$}} define i128 @ctz128_zero_undef(i128 %x) { +; CHECK-LABEL: ctz128_zero_undef: +; CHECK: .functype ctz128_zero_undef (i32, i64, i64) -> () +; CHECK-NEXT: # %bb.0: +; CHECK-NEXT: local.get $push8=, 0 +; CHECK-NEXT: i64.const $push0=, 0 +; CHECK-NEXT: i64.store 8($pop8), $pop0 +; CHECK-NEXT: local.get $push12=, 0 +; CHECK-NEXT: local.get $push9=, 1 +; CHECK-NEXT: i64.ctz $push5=, $pop9 +; CHECK-NEXT: local.get $push10=, 2 +; CHECK-NEXT: i64.ctz $push2=, $pop10 +; CHECK-NEXT: i64.const $push3=, 64 +; CHECK-NEXT: i64.add $push4=, $pop2, $pop3 +; CHECK-NEXT: local.get $push11=, 1 +; CHECK-NEXT: i64.const $push7=, 0 +; CHECK-NEXT: i64.ne $push1=, $pop11, $pop7 +; CHECK-NEXT: i64.select $push6=, $pop5, $pop4, $pop1 +; CHECK-NEXT: i64.store 0($pop12), $pop6 +; CHECK-NEXT: return %a = call i128 @llvm.cttz.i128(i128 %x, i1 true) ret i128 %a } -; CHECK-LABEL: popcnt128: -; CHECK-NEXT: .functype popcnt128 (i32, i64, i64) -> (){{$}} -; CHECK: i64.popcnt -; CHECK: i64.popcnt -; CHECK: return{{$}} define i128 @popcnt128(i128 %x) { +; CHECK-LABEL: popcnt128: +; CHECK: .functype popcnt128 (i32, i64, i64) -> () +; CHECK-NEXT: # %bb.0: +; CHECK-NEXT: local.get $push4=, 0 +; CHECK-NEXT: i64.const $push0=, 0 +; CHECK-NEXT: i64.store 8($pop4), $pop0 +; CHECK-NEXT: local.get $push7=, 0 +; CHECK-NEXT: local.get $push5=, 1 +; CHECK-NEXT: i64.popcnt $push2=, $pop5 +; CHECK-NEXT: local.get $push6=, 2 +; CHECK-NEXT: i64.popcnt $push1=, $pop6 +; CHECK-NEXT: i64.add $push3=, $pop2, $pop1 +; CHECK-NEXT: i64.store 0($pop7), $pop3 +; CHECK-NEXT: return %a = call i128 @llvm.ctpop.i128(i128 %x) ret i128 %a } -; CHECK-LABEL: eqz128: -; CHECK-NEXT: .functype eqz128 (i64, i64) -> (i32){{$}} -; CHECK: i64.or -; CHECK: i64.eqz -; CHECK: return $ define i32 @eqz128(i128 %x) { +; CHECK-LABEL: eqz128: +; CHECK: .functype eqz128 (i64, i64) -> (i32) +; CHECK-NEXT: # %bb.0: +; CHECK-NEXT: local.get $push3=, 0 +; CHECK-NEXT: local.get $push2=, 1 +; CHECK-NEXT: i64.or $push0=, $pop3, $pop2 +; CHECK-NEXT: i64.eqz $push1=, $pop0 +; CHECK-NEXT: return $pop1 %a = icmp eq i128 %x, 0 %b = zext i1 %a to i32 ret i32 %b } -; CHECK-LABEL: rotl: -; CHECK-NEXT: .functype rotl (i32, i64, i64, i64, i64) -> (){{$}} -; CHECK: call __ashlti3, ${{.+}}, ${{.+}}, ${{.+}}, ${{.+}}{{$}} -; CHECK: call __lshrti3, ${{.+}}, ${{.+}}, ${{.+}}, ${{.+}}{{$}} -; CHECK: return{{$}} define i128 @rotl(i128 %x, i128 %y) { +; CHECK-LABEL: rotl: +; CHECK: .functype rotl (i32, i64, i64, i64, i64) -> () +; CHECK-NEXT: .local i32, i32 +; CHECK-NEXT: # %bb.0: +; CHECK-NEXT: global.get $push11=, __stack_pointer +; CHECK-NEXT: i32.const $push12=, 32 +; CHECK-NEXT: i32.sub $push23=, $pop11, $pop12 +; CHECK-NEXT: local.tee $push22=, 5, $pop23 +; CHECK-NEXT: global.set __stack_pointer, $pop22 +; CHECK-NEXT: local.get $push24=, 5 +; CHECK-NEXT: i32.const $push15=, 16 +; CHECK-NEXT: i32.add $push16=, $pop24, $pop15 +; CHECK-NEXT: local.get $push27=, 1 +; CHECK-NEXT: local.get $push26=, 2 +; CHECK-NEXT: local.get $push25=, 3 +; CHECK-NEXT: i32.wrap_i64 $push21=, $pop25 +; CHECK-NEXT: local.tee $push20=, 6, $pop21 +; CHECK-NEXT: call __ashlti3, $pop16, $pop27, $pop26, $pop20 +; CHECK-NEXT: local.get $push31=, 5 +; CHECK-NEXT: local.get $push30=, 1 +; CHECK-NEXT: local.get $push29=, 2 +; CHECK-NEXT: i32.const $push0=, 128 +; CHECK-NEXT: local.get $push28=, 6 +; CHECK-NEXT: i32.sub $push1=, $pop0, $pop28 +; CHECK-NEXT: call __lshrti3, $pop31, $pop30, $pop29, $pop1 +; CHECK-NEXT: local.get $push34=, 0 +; CHECK-NEXT: local.get $push32=, 5 +; CHECK-NEXT: i32.const $push17=, 16 +; CHECK-NEXT: i32.add $push18=, $pop32, $pop17 +; CHECK-NEXT: i32.const $push2=, 8 +; CHECK-NEXT: i32.add $push3=, $pop18, $pop2 +; CHECK-NEXT: i64.load $push4=, 0($pop3) +; CHECK-NEXT: local.get $push33=, 5 +; CHECK-NEXT: i32.const $push19=, 8 +; CHECK-NEXT: i32.add $push5=, $pop33, $pop19 +; CHECK-NEXT: i64.load $push6=, 0($pop5) +; CHECK-NEXT: i64.or $push7=, $pop4, $pop6 +; CHECK-NEXT: i64.store 8($pop34), $pop7 +; CHECK-NEXT: local.get $push37=, 0 +; CHECK-NEXT: local.get $push35=, 5 +; CHECK-NEXT: i64.load $push8=, 16($pop35) +; CHECK-NEXT: local.get $push36=, 5 +; CHECK-NEXT: i64.load $push9=, 0($pop36) +; CHECK-NEXT: i64.or $push10=, $pop8, $pop9 +; CHECK-NEXT: i64.store 0($pop37), $pop10 +; CHECK-NEXT: local.get $push38=, 5 +; CHECK-NEXT: i32.const $push13=, 32 +; CHECK-NEXT: i32.add $push14=, $pop38, $pop13 +; CHECK-NEXT: global.set __stack_pointer, $pop14 +; CHECK-NEXT: return %z = sub i128 128, %y %b = shl i128 %x, %y %c = lshr i128 %x, %z @@ -217,12 +582,59 @@ define i128 @rotl(i128 %x, i128 %y) { ret i128 %d } -; CHECK-LABEL: masked_rotl: -; CHECK-NEXT: .functype masked_rotl (i32, i64, i64, i64, i64) -> (){{$}} -; CHECK: call __ashlti3, ${{.+}}, ${{.+}}, ${{.+}}, ${{.+}}{{$}} -; CHECK: call __lshrti3, ${{.+}}, ${{.+}}, ${{.+}}, ${{.+}}{{$}} -; CHECK: return{{$}} define i128 @masked_rotl(i128 %x, i128 %y) { +; CHECK-LABEL: masked_rotl: +; CHECK: .functype masked_rotl (i32, i64, i64, i64, i64) -> () +; CHECK-NEXT: .local i32, i32 +; CHECK-NEXT: # %bb.0: +; CHECK-NEXT: global.get $push13=, __stack_pointer +; CHECK-NEXT: i32.const $push14=, 32 +; CHECK-NEXT: i32.sub $push25=, $pop13, $pop14 +; CHECK-NEXT: local.tee $push24=, 5, $pop25 +; CHECK-NEXT: global.set __stack_pointer, $pop24 +; CHECK-NEXT: local.get $push26=, 5 +; CHECK-NEXT: i32.const $push17=, 16 +; CHECK-NEXT: i32.add $push18=, $pop26, $pop17 +; CHECK-NEXT: local.get $push29=, 1 +; CHECK-NEXT: local.get $push28=, 2 +; CHECK-NEXT: local.get $push27=, 3 +; CHECK-NEXT: i32.wrap_i64 $push0=, $pop27 +; CHECK-NEXT: i32.const $push1=, 127 +; CHECK-NEXT: i32.and $push23=, $pop0, $pop1 +; CHECK-NEXT: local.tee $push22=, 6, $pop23 +; CHECK-NEXT: call __ashlti3, $pop18, $pop29, $pop28, $pop22 +; CHECK-NEXT: local.get $push33=, 5 +; CHECK-NEXT: local.get $push32=, 1 +; CHECK-NEXT: local.get $push31=, 2 +; CHECK-NEXT: i32.const $push2=, 128 +; CHECK-NEXT: local.get $push30=, 6 +; CHECK-NEXT: i32.sub $push3=, $pop2, $pop30 +; CHECK-NEXT: call __lshrti3, $pop33, $pop32, $pop31, $pop3 +; CHECK-NEXT: local.get $push36=, 0 +; CHECK-NEXT: local.get $push34=, 5 +; CHECK-NEXT: i32.const $push19=, 16 +; CHECK-NEXT: i32.add $push20=, $pop34, $pop19 +; CHECK-NEXT: i32.const $push4=, 8 +; CHECK-NEXT: i32.add $push5=, $pop20, $pop4 +; CHECK-NEXT: i64.load $push6=, 0($pop5) +; CHECK-NEXT: local.get $push35=, 5 +; CHECK-NEXT: i32.const $push21=, 8 +; CHECK-NEXT: i32.add $push7=, $pop35, $pop21 +; CHECK-NEXT: i64.load $push8=, 0($pop7) +; CHECK-NEXT: i64.or $push9=, $pop6, $pop8 +; CHECK-NEXT: i64.store 8($pop36), $pop9 +; CHECK-NEXT: local.get $push39=, 0 +; CHECK-NEXT: local.get $push37=, 5 +; CHECK-NEXT: i64.load $push10=, 16($pop37) +; CHECK-NEXT: local.get $push38=, 5 +; CHECK-NEXT: i64.load $push11=, 0($pop38) +; CHECK-NEXT: i64.or $push12=, $pop10, $pop11 +; CHECK-NEXT: i64.store 0($pop39), $pop12 +; CHECK-NEXT: local.get $push40=, 5 +; CHECK-NEXT: i32.const $push15=, 32 +; CHECK-NEXT: i32.add $push16=, $pop40, $pop15 +; CHECK-NEXT: global.set __stack_pointer, $pop16 +; CHECK-NEXT: return %a = and i128 %y, 127 %z = sub i128 128, %a %b = shl i128 %x, %a @@ -231,12 +643,57 @@ define i128 @masked_rotl(i128 %x, i128 %y) { ret i128 %d } -; CHECK-LABEL: rotr: -; CHECK-NEXT: .functype rotr (i32, i64, i64, i64, i64) -> (){{$}} -; CHECK: call __lshrti3, ${{.+}}, ${{.+}}, ${{.+}}, ${{.+}}{{$}} -; CHECK: call __ashlti3, ${{.+}}, ${{.+}}, ${{.+}}, ${{.+}}{{$}} -; CHECK: return{{$}} define i128 @rotr(i128 %x, i128 %y) { +; CHECK-LABEL: rotr: +; CHECK: .functype rotr (i32, i64, i64, i64, i64) -> () +; CHECK-NEXT: .local i32, i32 +; CHECK-NEXT: # %bb.0: +; CHECK-NEXT: global.get $push11=, __stack_pointer +; CHECK-NEXT: i32.const $push12=, 32 +; CHECK-NEXT: i32.sub $push23=, $pop11, $pop12 +; CHECK-NEXT: local.tee $push22=, 5, $pop23 +; CHECK-NEXT: global.set __stack_pointer, $pop22 +; CHECK-NEXT: local.get $push24=, 5 +; CHECK-NEXT: i32.const $push15=, 16 +; CHECK-NEXT: i32.add $push16=, $pop24, $pop15 +; CHECK-NEXT: local.get $push27=, 1 +; CHECK-NEXT: local.get $push26=, 2 +; CHECK-NEXT: local.get $push25=, 3 +; CHECK-NEXT: i32.wrap_i64 $push21=, $pop25 +; CHECK-NEXT: local.tee $push20=, 6, $pop21 +; CHECK-NEXT: call __lshrti3, $pop16, $pop27, $pop26, $pop20 +; CHECK-NEXT: local.get $push31=, 5 +; CHECK-NEXT: local.get $push30=, 1 +; CHECK-NEXT: local.get $push29=, 2 +; CHECK-NEXT: i32.const $push0=, 128 +; CHECK-NEXT: local.get $push28=, 6 +; CHECK-NEXT: i32.sub $push1=, $pop0, $pop28 +; CHECK-NEXT: call __ashlti3, $pop31, $pop30, $pop29, $pop1 +; CHECK-NEXT: local.get $push34=, 0 +; CHECK-NEXT: local.get $push32=, 5 +; CHECK-NEXT: i32.const $push17=, 16 +; CHECK-NEXT: i32.add $push18=, $pop32, $pop17 +; CHECK-NEXT: i32.const $push2=, 8 +; CHECK-NEXT: i32.add $push3=, $pop18, $pop2 +; CHECK-NEXT: i64.load $push4=, 0($pop3) +; CHECK-NEXT: local.get $push33=, 5 +; CHECK-NEXT: i32.const $push19=, 8 +; CHECK-NEXT: i32.add $push5=, $pop33, $pop19 +; CHECK-NEXT: i64.load $push6=, 0($pop5) +; CHECK-NEXT: i64.or $push7=, $pop4, $pop6 +; CHECK-NEXT: i64.store 8($pop34), $pop7 +; CHECK-NEXT: local.get $push37=, 0 +; CHECK-NEXT: local.get $push35=, 5 +; CHECK-NEXT: i64.load $push8=, 16($pop35) +; CHECK-NEXT: local.get $push36=, 5 +; CHECK-NEXT: i64.load $push9=, 0($pop36) +; CHECK-NEXT: i64.or $push10=, $pop8, $pop9 +; CHECK-NEXT: i64.store 0($pop37), $pop10 +; CHECK-NEXT: local.get $push38=, 5 +; CHECK-NEXT: i32.const $push13=, 32 +; CHECK-NEXT: i32.add $push14=, $pop38, $pop13 +; CHECK-NEXT: global.set __stack_pointer, $pop14 +; CHECK-NEXT: return %z = sub i128 128, %y %b = lshr i128 %x, %y %c = shl i128 %x, %z @@ -244,12 +701,59 @@ define i128 @rotr(i128 %x, i128 %y) { ret i128 %d } -; CHECK-LABEL: masked_rotr: -; CHECK-NEXT: .functype masked_rotr (i32, i64, i64, i64, i64) -> (){{$}} -; CHECK: call __lshrti3, ${{.+}}, ${{.+}}, ${{.+}}, ${{.+}}{{$}} -; CHECK: call __ashlti3, ${{.+}}, ${{.+}}, ${{.+}}, ${{.+}}{{$}} -; CHECK: return{{$}} define i128 @masked_rotr(i128 %x, i128 %y) { +; CHECK-LABEL: masked_rotr: +; CHECK: .functype masked_rotr (i32, i64, i64, i64, i64) -> () +; CHECK-NEXT: .local i32, i32 +; CHECK-NEXT: # %bb.0: +; CHECK-NEXT: global.get $push13=, __stack_pointer +; CHECK-NEXT: i32.const $push14=, 32 +; CHECK-NEXT: i32.sub $push25=, $pop13, $pop14 +; CHECK-NEXT: local.tee $push24=, 5, $pop25 +; CHECK-NEXT: global.set __stack_pointer, $pop24 +; CHECK-NEXT: local.get $push26=, 5 +; CHECK-NEXT: i32.const $push17=, 16 +; CHECK-NEXT: i32.add $push18=, $pop26, $pop17 +; CHECK-NEXT: local.get $push29=, 1 +; CHECK-NEXT: local.get $push28=, 2 +; CHECK-NEXT: local.get $push27=, 3 +; CHECK-NEXT: i32.wrap_i64 $push0=, $pop27 +; CHECK-NEXT: i32.const $push1=, 127 +; CHECK-NEXT: i32.and $push23=, $pop0, $pop1 +; CHECK-NEXT: local.tee $push22=, 6, $pop23 +; CHECK-NEXT: call __lshrti3, $pop18, $pop29, $pop28, $pop22 +; CHECK-NEXT: local.get $push33=, 5 +; CHECK-NEXT: local.get $push32=, 1 +; CHECK-NEXT: local.get $push31=, 2 +; CHECK-NEXT: i32.const $push2=, 128 +; CHECK-NEXT: local.get $push30=, 6 +; CHECK-NEXT: i32.sub $push3=, $pop2, $pop30 +; CHECK-NEXT: call __ashlti3, $pop33, $pop32, $pop31, $pop3 +; CHECK-NEXT: local.get $push36=, 0 +; CHECK-NEXT: local.get $push34=, 5 +; CHECK-NEXT: i32.const $push19=, 16 +; CHECK-NEXT: i32.add $push20=, $pop34, $pop19 +; CHECK-NEXT: i32.const $push4=, 8 +; CHECK-NEXT: i32.add $push5=, $pop20, $pop4 +; CHECK-NEXT: i64.load $push6=, 0($pop5) +; CHECK-NEXT: local.get $push35=, 5 +; CHECK-NEXT: i32.const $push21=, 8 +; CHECK-NEXT: i32.add $push7=, $pop35, $pop21 +; CHECK-NEXT: i64.load $push8=, 0($pop7) +; CHECK-NEXT: i64.or $push9=, $pop6, $pop8 +; CHECK-NEXT: i64.store 8($pop36), $pop9 +; CHECK-NEXT: local.get $push39=, 0 +; CHECK-NEXT: local.get $push37=, 5 +; CHECK-NEXT: i64.load $push10=, 16($pop37) +; CHECK-NEXT: local.get $push38=, 5 +; CHECK-NEXT: i64.load $push11=, 0($pop38) +; CHECK-NEXT: i64.or $push12=, $pop10, $pop11 +; CHECK-NEXT: i64.store 0($pop39), $pop12 +; CHECK-NEXT: local.get $push40=, 5 +; CHECK-NEXT: i32.const $push15=, 32 +; CHECK-NEXT: i32.add $push16=, $pop40, $pop15 +; CHECK-NEXT: global.set __stack_pointer, $pop16 +; CHECK-NEXT: return %a = and i128 %y, 127 %z = sub i128 128, %a %b = lshr i128 %x, %a diff --git a/llvm/test/CodeGen/WebAssembly/libcalls.ll b/llvm/test/CodeGen/WebAssembly/libcalls.ll index efa4041..d11f0d4 100644 --- a/llvm/test/CodeGen/WebAssembly/libcalls.ll +++ b/llvm/test/CodeGen/WebAssembly/libcalls.ll @@ -1,4 +1,5 @@ -; RUN: llc < %s -asm-verbose=false -disable-wasm-fallthrough-return-opt -wasm-keep-registers | FileCheck %s +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc < %s -disable-wasm-fallthrough-return-opt -wasm-keep-registers | FileCheck %s ; Test a subset of compiler-rt/libm libcalls expected to be emitted by the wasm backend @@ -21,61 +22,239 @@ declare i32 @llvm.lround(double) -; CHECK-LABEL: fp128libcalls: define fp128 @fp128libcalls(fp128 %x, fp128 %y, i32 %z) { ; compiler-rt call - ; CHECK: call __addtf3 +; CHECK-LABEL: fp128libcalls: +; CHECK: .functype fp128libcalls (i32, i64, i64, i64, i64, i32) -> () +; CHECK-NEXT: .local i32 +; CHECK-NEXT: # %bb.0: +; CHECK-NEXT: global.get $push28=, __stack_pointer +; CHECK-NEXT: i32.const $push29=, 144 +; CHECK-NEXT: i32.sub $push73=, $pop28, $pop29 +; CHECK-NEXT: local.tee $push72=, 6, $pop73 +; CHECK-NEXT: global.set __stack_pointer, $pop72 +; CHECK-NEXT: local.get $push74=, 6 +; CHECK-NEXT: i32.const $push32=, 128 +; CHECK-NEXT: i32.add $push33=, $pop74, $pop32 +; CHECK-NEXT: local.get $push78=, 1 +; CHECK-NEXT: local.get $push77=, 2 +; CHECK-NEXT: local.get $push76=, 3 +; CHECK-NEXT: local.get $push75=, 4 +; CHECK-NEXT: call __addtf3, $pop33, $pop78, $pop77, $pop76, $pop75 +; CHECK-NEXT: local.get $push79=, 6 +; CHECK-NEXT: i32.const $push36=, 112 +; CHECK-NEXT: i32.add $push37=, $pop79, $pop36 +; CHECK-NEXT: local.get $push80=, 6 +; CHECK-NEXT: i64.load $push3=, 128($pop80) +; CHECK-NEXT: local.get $push81=, 6 +; CHECK-NEXT: i32.const $push34=, 128 +; CHECK-NEXT: i32.add $push35=, $pop81, $pop34 +; CHECK-NEXT: i32.const $push0=, 8 +; CHECK-NEXT: i32.add $push1=, $pop35, $pop0 +; CHECK-NEXT: i64.load $push2=, 0($pop1) +; CHECK-NEXT: local.get $push83=, 3 +; CHECK-NEXT: local.get $push82=, 4 +; CHECK-NEXT: call __multf3, $pop37, $pop3, $pop2, $pop83, $pop82 +; CHECK-NEXT: local.get $push84=, 6 +; CHECK-NEXT: i32.const $push40=, 96 +; CHECK-NEXT: i32.add $push41=, $pop84, $pop40 +; CHECK-NEXT: local.get $push85=, 6 +; CHECK-NEXT: i64.load $push6=, 112($pop85) +; CHECK-NEXT: local.get $push86=, 6 +; CHECK-NEXT: i32.const $push38=, 112 +; CHECK-NEXT: i32.add $push39=, $pop86, $pop38 +; CHECK-NEXT: i32.const $push71=, 8 +; CHECK-NEXT: i32.add $push4=, $pop39, $pop71 +; CHECK-NEXT: i64.load $push5=, 0($pop4) +; CHECK-NEXT: local.get $push88=, 3 +; CHECK-NEXT: local.get $push87=, 4 +; CHECK-NEXT: call __divtf3, $pop41, $pop6, $pop5, $pop88, $pop87 +; CHECK-NEXT: local.get $push89=, 6 +; CHECK-NEXT: i32.const $push44=, 80 +; CHECK-NEXT: i32.add $push45=, $pop89, $pop44 +; CHECK-NEXT: local.get $push90=, 6 +; CHECK-NEXT: i64.load $push9=, 96($pop90) +; CHECK-NEXT: local.get $push91=, 6 +; CHECK-NEXT: i32.const $push42=, 96 +; CHECK-NEXT: i32.add $push43=, $pop91, $pop42 +; CHECK-NEXT: i32.const $push70=, 8 +; CHECK-NEXT: i32.add $push7=, $pop43, $pop70 +; CHECK-NEXT: i64.load $push8=, 0($pop7) +; CHECK-NEXT: call sqrtl, $pop45, $pop9, $pop8 +; CHECK-NEXT: local.get $push92=, 6 +; CHECK-NEXT: i32.const $push48=, 64 +; CHECK-NEXT: i32.add $push49=, $pop92, $pop48 +; CHECK-NEXT: local.get $push93=, 6 +; CHECK-NEXT: i64.load $push12=, 80($pop93) +; CHECK-NEXT: local.get $push94=, 6 +; CHECK-NEXT: i32.const $push46=, 80 +; CHECK-NEXT: i32.add $push47=, $pop94, $pop46 +; CHECK-NEXT: i32.const $push69=, 8 +; CHECK-NEXT: i32.add $push10=, $pop47, $pop69 +; CHECK-NEXT: i64.load $push11=, 0($pop10) +; CHECK-NEXT: call floorl, $pop49, $pop12, $pop11 +; CHECK-NEXT: local.get $push95=, 6 +; CHECK-NEXT: i32.const $push52=, 48 +; CHECK-NEXT: i32.add $push53=, $pop95, $pop52 +; CHECK-NEXT: local.get $push96=, 6 +; CHECK-NEXT: i64.load $push15=, 64($pop96) +; CHECK-NEXT: local.get $push97=, 6 +; CHECK-NEXT: i32.const $push50=, 64 +; CHECK-NEXT: i32.add $push51=, $pop97, $pop50 +; CHECK-NEXT: i32.const $push68=, 8 +; CHECK-NEXT: i32.add $push13=, $pop51, $pop68 +; CHECK-NEXT: i64.load $push14=, 0($pop13) +; CHECK-NEXT: local.get $push99=, 3 +; CHECK-NEXT: local.get $push98=, 4 +; CHECK-NEXT: call powl, $pop53, $pop15, $pop14, $pop99, $pop98 +; CHECK-NEXT: local.get $push100=, 6 +; CHECK-NEXT: i32.const $push56=, 32 +; CHECK-NEXT: i32.add $push57=, $pop100, $pop56 +; CHECK-NEXT: local.get $push101=, 6 +; CHECK-NEXT: i64.load $push18=, 48($pop101) +; CHECK-NEXT: local.get $push102=, 6 +; CHECK-NEXT: i32.const $push54=, 48 +; CHECK-NEXT: i32.add $push55=, $pop102, $pop54 +; CHECK-NEXT: i32.const $push67=, 8 +; CHECK-NEXT: i32.add $push16=, $pop55, $pop67 +; CHECK-NEXT: i64.load $push17=, 0($pop16) +; CHECK-NEXT: local.get $push103=, 5 +; CHECK-NEXT: call __powitf2, $pop57, $pop18, $pop17, $pop103 +; CHECK-NEXT: local.get $push104=, 6 +; CHECK-NEXT: i32.const $push60=, 16 +; CHECK-NEXT: i32.add $push61=, $pop104, $pop60 +; CHECK-NEXT: local.get $push105=, 6 +; CHECK-NEXT: i64.load $push21=, 32($pop105) +; CHECK-NEXT: local.get $push106=, 6 +; CHECK-NEXT: i32.const $push58=, 32 +; CHECK-NEXT: i32.add $push59=, $pop106, $pop58 +; CHECK-NEXT: i32.const $push66=, 8 +; CHECK-NEXT: i32.add $push19=, $pop59, $pop66 +; CHECK-NEXT: i64.load $push20=, 0($pop19) +; CHECK-NEXT: call truncl, $pop61, $pop21, $pop20 +; CHECK-NEXT: local.get $push109=, 6 +; CHECK-NEXT: local.get $push107=, 6 +; CHECK-NEXT: i64.load $push24=, 16($pop107) +; CHECK-NEXT: local.get $push108=, 6 +; CHECK-NEXT: i32.const $push62=, 16 +; CHECK-NEXT: i32.add $push63=, $pop108, $pop62 +; CHECK-NEXT: i32.const $push65=, 8 +; CHECK-NEXT: i32.add $push22=, $pop63, $pop65 +; CHECK-NEXT: i64.load $push23=, 0($pop22) +; CHECK-NEXT: call nearbyintl, $pop109, $pop24, $pop23 +; CHECK-NEXT: local.get $push111=, 0 +; CHECK-NEXT: local.get $push110=, 6 +; CHECK-NEXT: i32.const $push64=, 8 +; CHECK-NEXT: i32.add $push25=, $pop110, $pop64 +; CHECK-NEXT: i64.load $push26=, 0($pop25) +; CHECK-NEXT: i64.store 8($pop111), $pop26 +; CHECK-NEXT: local.get $push113=, 0 +; CHECK-NEXT: local.get $push112=, 6 +; CHECK-NEXT: i64.load $push27=, 0($pop112) +; CHECK-NEXT: i64.store 0($pop113), $pop27 +; CHECK-NEXT: local.get $push114=, 6 +; CHECK-NEXT: i32.const $push30=, 144 +; CHECK-NEXT: i32.add $push31=, $pop114, $pop30 +; CHECK-NEXT: global.set __stack_pointer, $pop31 +; CHECK-NEXT: return %a = fadd fp128 %x, %y - ; CHECK: call __multf3 %b = fmul fp128 %a, %y - ; CHECK: call __divtf3 %c = fdiv fp128 %b, %y ; libm calls - ; CHECK: call sqrtl %d = call fp128 @llvm.sqrt.f128(fp128 %c) - ; CHECK: call floorl %e = call fp128 @llvm.floor.f128(fp128 %d) - ; CHECK: call powl %f = call fp128 @llvm.pow.f128(fp128 %e, fp128 %y) - ; CHECK: call __powitf2 %g = call fp128 @llvm.powi.f128.i32(fp128 %f, i32 %z) - ; CHECK: call truncl %h = call fp128 @llvm.trunc.f128(fp128 %g) - ; CHECK: call nearbyintl %i = call fp128 @llvm.nearbyint.f128(fp128 %h) ret fp128 %i } -; CHECK-LABEL: i128libcalls: define i128 @i128libcalls(i128 %x, i128 %y) { ; Basic ops should be expanded - ; CHECK: .local - ; CHECK-NOT: call +; CHECK-LABEL: i128libcalls: +; CHECK: .functype i128libcalls (i32, i64, i64, i64, i64) -> () +; CHECK-NEXT: .local i32, i64 +; CHECK-NEXT: # %bb.0: +; CHECK-NEXT: global.get $push11=, __stack_pointer +; CHECK-NEXT: i32.const $push12=, 32 +; CHECK-NEXT: i32.sub $push23=, $pop11, $pop12 +; CHECK-NEXT: local.tee $push22=, 5, $pop23 +; CHECK-NEXT: global.set __stack_pointer, $pop22 +; CHECK-NEXT: local.get $push24=, 5 +; CHECK-NEXT: i32.const $push15=, 16 +; CHECK-NEXT: i32.add $push16=, $pop24, $pop15 +; CHECK-NEXT: local.get $push26=, 1 +; CHECK-NEXT: local.get $push25=, 3 +; CHECK-NEXT: i64.add $push21=, $pop26, $pop25 +; CHECK-NEXT: local.tee $push20=, 6, $pop21 +; CHECK-NEXT: local.get $push28=, 2 +; CHECK-NEXT: local.get $push27=, 4 +; CHECK-NEXT: i64.add $push0=, $pop28, $pop27 +; CHECK-NEXT: local.get $push30=, 6 +; CHECK-NEXT: local.get $push29=, 1 +; CHECK-NEXT: i64.lt_u $push1=, $pop30, $pop29 +; CHECK-NEXT: i64.extend_i32_u $push2=, $pop1 +; CHECK-NEXT: i64.add $push3=, $pop0, $pop2 +; CHECK-NEXT: local.get $push32=, 3 +; CHECK-NEXT: local.get $push31=, 4 +; CHECK-NEXT: call __multi3, $pop16, $pop20, $pop3, $pop32, $pop31 +; CHECK-NEXT: local.get $push37=, 5 +; CHECK-NEXT: local.get $push33=, 5 +; CHECK-NEXT: i64.load $push7=, 16($pop33) +; CHECK-NEXT: local.get $push34=, 5 +; CHECK-NEXT: i32.const $push17=, 16 +; CHECK-NEXT: i32.add $push18=, $pop34, $pop17 +; CHECK-NEXT: i32.const $push4=, 8 +; CHECK-NEXT: i32.add $push5=, $pop18, $pop4 +; CHECK-NEXT: i64.load $push6=, 0($pop5) +; CHECK-NEXT: local.get $push36=, 3 +; CHECK-NEXT: local.get $push35=, 4 +; CHECK-NEXT: call __umodti3, $pop37, $pop7, $pop6, $pop36, $pop35 +; CHECK-NEXT: local.get $push39=, 0 +; CHECK-NEXT: local.get $push38=, 5 +; CHECK-NEXT: i32.const $push19=, 8 +; CHECK-NEXT: i32.add $push8=, $pop38, $pop19 +; CHECK-NEXT: i64.load $push9=, 0($pop8) +; CHECK-NEXT: i64.store 8($pop39), $pop9 +; CHECK-NEXT: local.get $push41=, 0 +; CHECK-NEXT: local.get $push40=, 5 +; CHECK-NEXT: i64.load $push10=, 0($pop40) +; CHECK-NEXT: i64.store 0($pop41), $pop10 +; CHECK-NEXT: local.get $push42=, 5 +; CHECK-NEXT: i32.const $push13=, 32 +; CHECK-NEXT: i32.add $push14=, $pop42, $pop13 +; CHECK-NEXT: global.set __stack_pointer, $pop14 +; CHECK-NEXT: return %a = add i128 %x, %y - ; CHECK: call __multi3 %b = mul i128 %a, %y - ; CHECK: call __umodti3 %c = urem i128 %b, %y ret i128 %c } -; CHECK-LABEL: f64libcalls: define i32 @f64libcalls(double %x, double %y, i32 %z) { - ; CHECK: call $push{{[0-9]}}=, cos +; CHECK-LABEL: f64libcalls: +; CHECK: .functype f64libcalls (f64, f64, i32) -> (i32) +; CHECK-NEXT: # %bb.0: +; CHECK-NEXT: local.get $push8=, 0 +; CHECK-NEXT: call $push0=, cos, $pop8 +; CHECK-NEXT: call $push1=, log10, $pop0 +; CHECK-NEXT: local.get $push9=, 1 +; CHECK-NEXT: call $push2=, pow, $pop1, $pop9 +; CHECK-NEXT: local.get $push10=, 2 +; CHECK-NEXT: call $push3=, __powidf2, $pop2, $pop10 +; CHECK-NEXT: call $push4=, log, $pop3 +; CHECK-NEXT: call $push5=, exp, $pop4 +; CHECK-NEXT: call $push6=, cbrt, $pop5 +; CHECK-NEXT: call $push7=, lround, $pop6 +; CHECK-NEXT: return $pop7 %a = call double @llvm.cos.f64(double %x) - ; CHECK: call $push{{[0-9]}}=, log10 %b = call double @llvm.log10.f64(double %a) - ; CHECK: call $push{{[0-9]}}=, pow %c = call double @llvm.pow.f64(double %b, double %y) - ; CHECK: call $push{{[0-9]}}=, __powidf2 %d = call double @llvm.powi.f64.i32(double %c, i32 %z) - ; CHECK: call $push{{[0-9]}}=, log %e = call double @llvm.log.f64(double %d) - ; CHECK: call $push{{[0-9]}}=, exp %f = call double @llvm.exp.f64(double %e) - ; CHECK: call $push{{[0-9]}}=, cbrt %g = call fast double @llvm.pow.f64(double %f, double 0x3FD5555555555555) - ; CHECK: call $push{{[0-9]}}=, lround %h = call i32 @llvm.lround(double %g) ret i32 %h } @@ -84,45 +263,85 @@ define i32 @f64libcalls(double %x, double %y, i32 %z) { ; comment in WebAssemblyRunimeLibcallSignatures.cpp) so check them separately. ; no libcalls are needed for f32 and f64 -; CHECK-LABEL: unordd: define i1 @unordd(double %x, double %y) { - ; CHECK-NOT: call - ; CHECK: f64.ne +; CHECK-LABEL: unordd: +; CHECK: .functype unordd (f64, f64) -> (i32) +; CHECK-NEXT: # %bb.0: +; CHECK-NEXT: local.get $push8=, 0 +; CHECK-NEXT: local.get $push7=, 0 +; CHECK-NEXT: f64.ne $push4=, $pop8, $pop7 +; CHECK-NEXT: local.get $push10=, 1 +; CHECK-NEXT: local.get $push9=, 1 +; CHECK-NEXT: f64.ne $push3=, $pop10, $pop9 +; CHECK-NEXT: i32.or $push5=, $pop4, $pop3 +; CHECK-NEXT: local.get $push12=, 0 +; CHECK-NEXT: local.get $push11=, 0 +; CHECK-NEXT: f64.eq $push1=, $pop12, $pop11 +; CHECK-NEXT: local.get $push14=, 1 +; CHECK-NEXT: local.get $push13=, 1 +; CHECK-NEXT: f64.eq $push0=, $pop14, $pop13 +; CHECK-NEXT: i32.and $push2=, $pop1, $pop0 +; CHECK-NEXT: i32.xor $push6=, $pop5, $pop2 +; CHECK-NEXT: return $pop6 %a = fcmp uno double %x, %y - ; CHECK-NOT: call - ; CHECK: f64.eq %b = fcmp ord double %x, %y - ; CHECK: i32.xor %c = xor i1 %a, %b ret i1 %c } -; CHECK-LABEL: unordf: define i1 @unordf(float %x, float %y) { - ; CHECK-NOT: call - ; CHECK: f32.ne +; CHECK-LABEL: unordf: +; CHECK: .functype unordf (f32, f32) -> (i32) +; CHECK-NEXT: # %bb.0: +; CHECK-NEXT: local.get $push8=, 0 +; CHECK-NEXT: local.get $push7=, 0 +; CHECK-NEXT: f32.ne $push4=, $pop8, $pop7 +; CHECK-NEXT: local.get $push10=, 1 +; CHECK-NEXT: local.get $push9=, 1 +; CHECK-NEXT: f32.ne $push3=, $pop10, $pop9 +; CHECK-NEXT: i32.or $push5=, $pop4, $pop3 +; CHECK-NEXT: local.get $push12=, 0 +; CHECK-NEXT: local.get $push11=, 0 +; CHECK-NEXT: f32.eq $push1=, $pop12, $pop11 +; CHECK-NEXT: local.get $push14=, 1 +; CHECK-NEXT: local.get $push13=, 1 +; CHECK-NEXT: f32.eq $push0=, $pop14, $pop13 +; CHECK-NEXT: i32.and $push2=, $pop1, $pop0 +; CHECK-NEXT: i32.xor $push6=, $pop5, $pop2 +; CHECK-NEXT: return $pop6 %a = fcmp uno float %x, %y - ; CHECK-NOT: call - ; CHECK: f32.eq %b = fcmp ord float %x, %y - ; CHECK: i32.xor %c = xor i1 %a, %b ret i1 %c } -; CHECK-LABEL: unordt: define i1 @unordt(fp128 %x, fp128 %y) { - ; CHECK: call $push[[CALL:[0-9]]]=, __unordtf2 - ; CHECK-NEXT: i32.const $push[[ZERO:[0-9]+]]=, 0 - ; CHECK-NEXT: i32.ne $push{{[0-9]}}=, $pop[[CALL]], $pop[[ZERO]] +; CHECK-LABEL: unordt: +; CHECK: .functype unordt (i64, i64, i64, i64) -> (i32) +; CHECK-NEXT: # %bb.0: +; CHECK-NEXT: local.get $push6=, 0 +; CHECK-NEXT: local.get $push5=, 1 +; CHECK-NEXT: local.get $push4=, 2 +; CHECK-NEXT: local.get $push3=, 3 +; CHECK-NEXT: call $push1=, __unordtf2, $pop6, $pop5, $pop4, $pop3 +; CHECK-NEXT: i32.const $push0=, 0 +; CHECK-NEXT: i32.ne $push2=, $pop1, $pop0 +; CHECK-NEXT: return $pop2 %a = fcmp uno fp128 %x, %y ret i1 %a } -; CHECK-LABEL: ordt: define i1 @ordt(fp128 %x, fp128 %y) { - ; CHECK: call $push[[CALL:[0-9]]]=, __unordtf2 - ; CHECK-NEXT: i32.eqz $push{{[0-9]}}=, $pop[[CALL]] +; CHECK-LABEL: ordt: +; CHECK: .functype ordt (i64, i64, i64, i64) -> (i32) +; CHECK-NEXT: # %bb.0: +; CHECK-NEXT: local.get $push5=, 0 +; CHECK-NEXT: local.get $push4=, 1 +; CHECK-NEXT: local.get $push3=, 2 +; CHECK-NEXT: local.get $push2=, 3 +; CHECK-NEXT: call $push0=, __unordtf2, $pop5, $pop4, $pop3, $pop2 +; CHECK-NEXT: i32.eqz $push1=, $pop0 +; CHECK-NEXT: return $pop1 %a = fcmp ord fp128 %x, %y ret i1 %a } diff --git a/llvm/test/CodeGen/WebAssembly/mem-intrinsics.ll b/llvm/test/CodeGen/WebAssembly/mem-intrinsics.ll index dccd727..51e6c28 100644 --- a/llvm/test/CodeGen/WebAssembly/mem-intrinsics.ll +++ b/llvm/test/CodeGen/WebAssembly/mem-intrinsics.ll @@ -1,4 +1,5 @@ -; RUN: llc < %s -asm-verbose=false -mcpu=mvp -disable-wasm-fallthrough-return-opt -wasm-disable-explicit-locals -wasm-keep-registers -tail-dup-placement=0 | FileCheck %s +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc < %s -mcpu=mvp -disable-wasm-fallthrough-return-opt -wasm-disable-explicit-locals -wasm-keep-registers -tail-dup-placement=0 | FileCheck %s ; Test memcpy, memmove, and memset intrinsics. @@ -10,60 +11,86 @@ declare void @llvm.memset.p0.i32(ptr nocapture, i8, i32, i1) ; Test that return values are optimized. -; CHECK-LABEL: copy_yes: -; CHECK: call $push0=, memcpy, $0, $1, $2{{$}} -; CHECK-NEXT: return $pop0{{$}} define ptr @copy_yes(ptr %dst, ptr %src, i32 %len) { +; CHECK-LABEL: copy_yes: +; CHECK: .functype copy_yes (i32, i32, i32) -> (i32) +; CHECK-NEXT: # %bb.0: +; CHECK-NEXT: call $push0=, memcpy, $0, $1, $2 +; CHECK-NEXT: return $pop0 call void @llvm.memcpy.p0.p0.i32(ptr %dst, ptr %src, i32 %len, i1 false) ret ptr %dst } - -; CHECK-LABEL: copy_no: -; CHECK: call $drop=, memcpy, $0, $1, $2{{$}} -; CHECK-NEXT: return{{$}} define void @copy_no(ptr %dst, ptr %src, i32 %len) { +; CHECK-LABEL: copy_no: +; CHECK: .functype copy_no (i32, i32, i32) -> () +; CHECK-NEXT: # %bb.0: +; CHECK-NEXT: call $drop=, memcpy, $0, $1, $2 +; CHECK-NEXT: return call void @llvm.memcpy.p0.p0.i32(ptr %dst, ptr %src, i32 %len, i1 false) ret void } -; CHECK-LABEL: move_yes: -; CHECK: call $push0=, memmove, $0, $1, $2{{$}} -; CHECK-NEXT: return $pop0{{$}} define ptr @move_yes(ptr %dst, ptr %src, i32 %len) { +; CHECK-LABEL: move_yes: +; CHECK: .functype move_yes (i32, i32, i32) -> (i32) +; CHECK-NEXT: # %bb.0: +; CHECK-NEXT: call $push0=, memmove, $0, $1, $2 +; CHECK-NEXT: return $pop0 call void @llvm.memmove.p0.p0.i32(ptr %dst, ptr %src, i32 %len, i1 false) ret ptr %dst } -; CHECK-LABEL: move_no: -; CHECK: call $drop=, memmove, $0, $1, $2{{$}} -; CHECK-NEXT: return{{$}} define void @move_no(ptr %dst, ptr %src, i32 %len) { +; CHECK-LABEL: move_no: +; CHECK: .functype move_no (i32, i32, i32) -> () +; CHECK-NEXT: # %bb.0: +; CHECK-NEXT: call $drop=, memmove, $0, $1, $2 +; CHECK-NEXT: return call void @llvm.memmove.p0.p0.i32(ptr %dst, ptr %src, i32 %len, i1 false) ret void } -; CHECK-LABEL: set_yes: -; CHECK: call $push0=, memset, $0, $1, $2{{$}} -; CHECK-NEXT: return $pop0{{$}} define ptr @set_yes(ptr %dst, i8 %src, i32 %len) { +; CHECK-LABEL: set_yes: +; CHECK: .functype set_yes (i32, i32, i32) -> (i32) +; CHECK-NEXT: # %bb.0: +; CHECK-NEXT: call $push0=, memset, $0, $1, $2 +; CHECK-NEXT: return $pop0 call void @llvm.memset.p0.i32(ptr %dst, i8 %src, i32 %len, i1 false) ret ptr %dst } -; CHECK-LABEL: set_no: -; CHECK: call $drop=, memset, $0, $1, $2{{$}} -; CHECK-NEXT: return{{$}} define void @set_no(ptr %dst, i8 %src, i32 %len) { +; CHECK-LABEL: set_no: +; CHECK: .functype set_no (i32, i32, i32) -> () +; CHECK-NEXT: # %bb.0: +; CHECK-NEXT: call $drop=, memset, $0, $1, $2 +; CHECK-NEXT: return call void @llvm.memset.p0.i32(ptr %dst, i8 %src, i32 %len, i1 false) ret void } - -; CHECK-LABEL: frame_index: -; CHECK: call $drop=, memset, $pop{{[0-9]+}}, $pop{{[0-9]+}}, $pop{{[0-9]+}}{{$}} -; CHECK: call $push{{[0-9]+}}=, memset, ${{[0-9]+}}, $pop{{[0-9]+}}, $pop{{[0-9]+}}{{$}} -; CHECK: return{{$}} define void @frame_index() { +; CHECK-LABEL: frame_index: +; CHECK: .functype frame_index () -> () +; CHECK-NEXT: # %bb.0: # %entry +; CHECK-NEXT: global.get $push3=, __stack_pointer +; CHECK-NEXT: i32.const $push4=, 4096 +; CHECK-NEXT: i32.sub $push12=, $pop3, $pop4 +; CHECK-NEXT: local.tee $push11=, $0=, $pop12 +; CHECK-NEXT: global.set __stack_pointer, $pop11 +; CHECK-NEXT: i32.const $push7=, 2048 +; CHECK-NEXT: i32.add $push8=, $0, $pop7 +; CHECK-NEXT: i32.const $push1=, 0 +; CHECK-NEXT: i32.const $push0=, 1024 +; CHECK-NEXT: call $drop=, memset, $pop8, $pop1, $pop0 +; CHECK-NEXT: i32.const $push10=, 0 +; CHECK-NEXT: i32.const $push9=, 1024 +; CHECK-NEXT: call $push2=, memset, $0, $pop10, $pop9 +; CHECK-NEXT: i32.const $push5=, 4096 +; CHECK-NEXT: i32.add $push6=, $pop2, $pop5 +; CHECK-NEXT: global.set __stack_pointer, $pop6 +; CHECK-NEXT: return entry: %a = alloca [2048 x i8], align 16 %b = alloca [2048 x i8], align 16 @@ -76,11 +103,28 @@ entry: ; $drop. Note that we use a call to prevent tail dup so that we can test ; this specific functionality. -; CHECK-LABEL: drop_result: -; CHECK: call $drop=, memset, $0, $1, $2 declare ptr @def() declare void @block_tail_dup() define ptr @drop_result(ptr %arg, i8 %arg1, i32 %arg2, i32 %arg3, i32 %arg4) { +; CHECK-LABEL: drop_result: +; CHECK: .functype drop_result (i32, i32, i32, i32, i32) -> (i32) +; CHECK-NEXT: # %bb.0: # %bb +; CHECK-NEXT: block +; CHECK-NEXT: block +; CHECK-NEXT: br_if 0, $3 # 0: down to label1 +; CHECK-NEXT: # %bb.1: # %bb5 +; CHECK-NEXT: br_if 1, $4 # 1: down to label0 +; CHECK-NEXT: # %bb.2: # %bb7 +; CHECK-NEXT: call $drop=, memset, $0, $1, $2 +; CHECK-NEXT: call block_tail_dup +; CHECK-NEXT: return $0 +; CHECK-NEXT: .LBB7_3: # %bb9 +; CHECK-NEXT: end_block # label1: +; CHECK-NEXT: call $0=, def +; CHECK-NEXT: .LBB7_4: # %bb11 +; CHECK-NEXT: end_block # label0: +; CHECK-NEXT: call block_tail_dup +; CHECK-NEXT: return $0 bb: %tmp = icmp eq i32 %arg3, 0 br i1 %tmp, label %bb5, label %bb9 @@ -109,9 +153,24 @@ bb11: ; This is the same as drop_result, except we let tail dup happen, so the ; result of the memset *is* stackified. -; CHECK-LABEL: tail_dup_to_reuse_result: -; CHECK: call $push{{[0-9]+}}=, memset, $0, $1, $2 define ptr @tail_dup_to_reuse_result(ptr %arg, i8 %arg1, i32 %arg2, i32 %arg3, i32 %arg4) { +; CHECK-LABEL: tail_dup_to_reuse_result: +; CHECK: .functype tail_dup_to_reuse_result (i32, i32, i32, i32, i32) -> (i32) +; CHECK-NEXT: # %bb.0: # %bb +; CHECK-NEXT: block +; CHECK-NEXT: block +; CHECK-NEXT: br_if 0, $3 # 0: down to label3 +; CHECK-NEXT: # %bb.1: # %bb5 +; CHECK-NEXT: br_if 1, $4 # 1: down to label2 +; CHECK-NEXT: # %bb.2: # %bb7 +; CHECK-NEXT: call $push0=, memset, $0, $1, $2 +; CHECK-NEXT: return $pop0 +; CHECK-NEXT: .LBB8_3: # %bb9 +; CHECK-NEXT: end_block # label3: +; CHECK-NEXT: call $0=, def +; CHECK-NEXT: .LBB8_4: # %bb11 +; CHECK-NEXT: end_block # label2: +; CHECK-NEXT: return $0 bb: %tmp = icmp eq i32 %arg3, 0 br i1 %tmp, label %bb5, label %bb9 diff --git a/llvm/test/CodeGen/WebAssembly/offset-fastisel.ll b/llvm/test/CodeGen/WebAssembly/offset-fastisel.ll index f94a6a8..b896f8f 100644 --- a/llvm/test/CodeGen/WebAssembly/offset-fastisel.ll +++ b/llvm/test/CodeGen/WebAssembly/offset-fastisel.ll @@ -1,83 +1,112 @@ -; RUN: llc < %s -asm-verbose=false -wasm-disable-explicit-locals -wasm-keep-registers -fast-isel -fast-isel-abort=1 | FileCheck %s +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc < %s -wasm-disable-explicit-locals -wasm-keep-registers -fast-isel -fast-isel-abort=1 | FileCheck %s ; TODO: Merge this with offset.ll when fast-isel matches better. target triple = "wasm32-unknown-unknown" -; CHECK-LABEL: store_i8_with_variable_gep_offset: -; CHECK: i32.add $push[[L0:[0-9]+]]=, $0, $1{{$}} -; CHECK: i32.const $push[[L1:[0-9]+]]=, 0{{$}} -; CHECK: i32.store8 0($pop[[L0]]), $pop[[L1]]{{$}} define void @store_i8_with_variable_gep_offset(ptr %p, i32 %idx) { +; CHECK-LABEL: store_i8_with_variable_gep_offset: +; CHECK: .functype store_i8_with_variable_gep_offset (i32, i32) -> () +; CHECK-NEXT: # %bb.0: +; CHECK-NEXT: i32.add $push1=, $0, $1 +; CHECK-NEXT: i32.const $push0=, 0 +; CHECK-NEXT: i32.store8 0($pop1), $pop0 +; CHECK-NEXT: # fallthrough-return %s = getelementptr inbounds i8, ptr %p, i32 %idx store i8 0, ptr %s ret void } -; CHECK-LABEL: store_i8_with_array_alloca_gep: -; CHECK: global.get $push[[L0:[0-9]+]]=, __stack_pointer -; CHECK: i32.const $push[[L1:[0-9]+]]=, 32{{$}} -; CHECK: i32.sub $push[[L2:[0-9]+]]=, $pop[[L0]], $pop[[L1]]{{$}} -; CHECK: local.copy $push[[L3:[0-9]+]]=, $pop[[L2]] -; CHECK: i32.add $push[[L4:[0-9]+]]=, $pop[[L3]], $0{{$}} -; CHECK: i32.const $push[[L5:[0-9]+]]=, 0{{$}} -; CHECK: i32.store8 0($pop[[L4]]), $pop[[L5]]{{$}} define hidden void @store_i8_with_array_alloca_gep(i32 %idx) { +; CHECK-LABEL: store_i8_with_array_alloca_gep: +; CHECK: .functype store_i8_with_array_alloca_gep (i32) -> () +; CHECK-NEXT: # %bb.0: +; CHECK-NEXT: global.get $push3=, __stack_pointer +; CHECK-NEXT: i32.const $push4=, 32 +; CHECK-NEXT: i32.sub $push5=, $pop3, $pop4 +; CHECK-NEXT: local.copy $push1=, $pop5 +; CHECK-NEXT: i32.add $push2=, $pop1, $0 +; CHECK-NEXT: i32.const $push0=, 0 +; CHECK-NEXT: i32.store8 0($pop2), $pop0 +; CHECK-NEXT: # fallthrough-return %A = alloca [30 x i8], align 16 %s = getelementptr inbounds [30 x i8], ptr %A, i32 0, i32 %idx store i8 0, ptr %s, align 1 ret void } -; CHECK-LABEL: store_i32_with_unfolded_gep_offset: -; CHECK: i32.const $push[[L0:[0-9]+]]=, 24{{$}} -; CHECK: i32.add $push[[L1:[0-9]+]]=, $0, $pop[[L0]]{{$}} -; CHECK: i32.const $push[[L2:[0-9]+]]=, 0{{$}} -; CHECK: i32.store 0($pop[[L1]]), $pop[[L2]]{{$}} define void @store_i32_with_unfolded_gep_offset(ptr %p) { +; CHECK-LABEL: store_i32_with_unfolded_gep_offset: +; CHECK: .functype store_i32_with_unfolded_gep_offset (i32) -> () +; CHECK-NEXT: # %bb.0: +; CHECK-NEXT: i32.const $push1=, 24 +; CHECK-NEXT: i32.add $push2=, $0, $pop1 +; CHECK-NEXT: i32.const $push0=, 0 +; CHECK-NEXT: i32.store 0($pop2), $pop0 +; CHECK-NEXT: # fallthrough-return %s = getelementptr i32, ptr %p, i32 6 store i32 0, ptr %s ret void } -; CHECK-LABEL: store_i32_with_folded_gep_offset: -; CHECK: i32.store 24($0), $pop{{[0-9]+$}} define void @store_i32_with_folded_gep_offset(ptr %p) { +; CHECK-LABEL: store_i32_with_folded_gep_offset: +; CHECK: .functype store_i32_with_folded_gep_offset (i32) -> () +; CHECK-NEXT: # %bb.0: +; CHECK-NEXT: i32.const $push0=, 0 +; CHECK-NEXT: i32.store 24($0), $pop0 +; CHECK-NEXT: # fallthrough-return %s = getelementptr inbounds i32, ptr %p, i32 6 store i32 0, ptr %s ret void } -; CHECK-LABEL: load_i32_with_folded_gep_offset: -; CHECK: i32.load $push{{[0-9]+}}=, 24($0){{$}} define i32 @load_i32_with_folded_gep_offset(ptr %p) { +; CHECK-LABEL: load_i32_with_folded_gep_offset: +; CHECK: .functype load_i32_with_folded_gep_offset (i32) -> (i32) +; CHECK-NEXT: # %bb.0: +; CHECK-NEXT: i32.load $push0=, 24($0) +; CHECK-NEXT: # fallthrough-return %s = getelementptr inbounds i32, ptr %p, i32 6 %t = load i32, ptr %s ret i32 %t } -; CHECK-LABEL: store_i64_with_unfolded_gep_offset: -; CHECK: i32.const $push[[L0:[0-9]+]]=, 24{{$}} -; CHECK: i32.add $push[[L1:[0-9]+]]=, $0, $pop[[L0]]{{$}} -; CHECK: i64.const $push[[L2:[0-9]+]]=, 0{{$}} -; CHECK: i64.store 0($pop[[L1]]), $pop[[L2]]{{$}} define void @store_i64_with_unfolded_gep_offset(ptr %p) { +; CHECK-LABEL: store_i64_with_unfolded_gep_offset: +; CHECK: .functype store_i64_with_unfolded_gep_offset (i32) -> () +; CHECK-NEXT: # %bb.0: +; CHECK-NEXT: i32.const $push1=, 24 +; CHECK-NEXT: i32.add $push2=, $0, $pop1 +; CHECK-NEXT: i64.const $push0=, 0 +; CHECK-NEXT: i64.store 0($pop2), $pop0 +; CHECK-NEXT: # fallthrough-return %s = getelementptr i64, ptr %p, i32 3 store i64 0, ptr %s ret void } -; CHECK-LABEL: store_i8_with_folded_gep_offset: -; CHECK: i32.store8 24($0), $pop{{[0-9]+$}} define void @store_i8_with_folded_gep_offset(ptr %p) { +; CHECK-LABEL: store_i8_with_folded_gep_offset: +; CHECK: .functype store_i8_with_folded_gep_offset (i32) -> () +; CHECK-NEXT: # %bb.0: +; CHECK-NEXT: i32.const $push0=, 0 +; CHECK-NEXT: i32.store8 24($0), $pop0 +; CHECK-NEXT: # fallthrough-return %s = getelementptr inbounds i8, ptr %p, i32 24 store i8 0, ptr %s ret void } -; CHECK-LABEL: load_i8_u_with_folded_offset: -; CHECK: i32.load8_u $push{{[0-9]+}}=, 24($0){{$}} define i32 @load_i8_u_with_folded_offset(ptr %p) { +; CHECK-LABEL: load_i8_u_with_folded_offset: +; CHECK: .functype load_i8_u_with_folded_offset (i32) -> (i32) +; CHECK-NEXT: # %bb.0: +; CHECK-NEXT: i32.load8_u $push2=, 24($0) +; CHECK-NEXT: i32.const $push0=, 255 +; CHECK-NEXT: i32.and $push1=, $pop2, $pop0 +; CHECK-NEXT: # fallthrough-return %q = ptrtoint ptr %p to i32 %r = add nuw i32 %q, 24 %s = inttoptr i32 %r to ptr @@ -87,9 +116,16 @@ define i32 @load_i8_u_with_folded_offset(ptr %p) { } ; TODO: this should be load8_s, need to fold sign-/zero-extend in fast-isel -; CHECK-LABEL: load_i8_s_with_folded_offset: -; CHECK: i32.load8_u $push{{[0-9]+}}=, 24($0){{$}} define i32 @load_i8_s_with_folded_offset(ptr %p) { +; CHECK-LABEL: load_i8_s_with_folded_offset: +; CHECK: .functype load_i8_s_with_folded_offset (i32) -> (i32) +; CHECK-NEXT: # %bb.0: +; CHECK-NEXT: i32.load8_u $push3=, 24($0) +; CHECK-NEXT: i32.const $push0=, 24 +; CHECK-NEXT: i32.shl $push1=, $pop3, $pop0 +; CHECK-NEXT: i32.const $push4=, 24 +; CHECK-NEXT: i32.shr_s $push2=, $pop1, $pop4 +; CHECK-NEXT: # fallthrough-return %q = ptrtoint ptr %p to i32 %r = add nuw i32 %q, 24 %s = inttoptr i32 %r to ptr diff --git a/llvm/test/CodeGen/WebAssembly/return-int32.ll b/llvm/test/CodeGen/WebAssembly/return-int32.ll index 32fbc10..e680562 100644 --- a/llvm/test/CodeGen/WebAssembly/return-int32.ll +++ b/llvm/test/CodeGen/WebAssembly/return-int32.ll @@ -1,24 +1,66 @@ -; RUN: llc < %s -asm-verbose=false -wasm-keep-registers | FileCheck %s -; RUN: llc < %s -asm-verbose=false -wasm-keep-registers -fast-isel -fast-isel-abort=1 | FileCheck %s +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc < %s -wasm-keep-registers | FileCheck %s +; RUN: llc < %s -wasm-keep-registers -fast-isel -fast-isel-abort=1 | FileCheck %s --check-prefix=FAST target triple = "wasm32-unknown-unknown" -; CHECK-LABEL: return_i32: -; CHECK-NEXT: .functype return_i32 (i32) -> (i32){{$}} -; CHECK-NEXT: local.get $push0=, 0 -; CHECK-NEXT: end_function{{$}} define i32 @return_i32(i32 %p) { +; CHECK-LABEL: return_i32: +; CHECK: .functype return_i32 (i32) -> (i32) +; CHECK-NEXT: # %bb.0: +; CHECK-NEXT: local.get $push0=, 0 +; CHECK-NEXT: # fallthrough-return +; +; FAST-LABEL: return_i32: +; FAST: .functype return_i32 (i32) -> (i32) +; FAST-NEXT: # %bb.0: +; FAST-NEXT: local.get $push0=, 0 +; FAST-NEXT: # fallthrough-return ret i32 %p } -; CHECK-LABEL: return_i32_twice: -; CHECK: store -; CHECK-NEXT: i32.const $push[[L0:[^,]+]]=, 1{{$}} -; CHECK-NEXT: return $pop[[L0]]{{$}} -; CHECK: store -; CHECK-NEXT: i32.const $push{{[^,]+}}=, 3{{$}} -; CHECK-NEXT: end_function{{$}} define i32 @return_i32_twice(i32 %a) { +; CHECK-LABEL: return_i32_twice: +; CHECK: .functype return_i32_twice (i32) -> (i32) +; CHECK-NEXT: # %bb.0: +; CHECK-NEXT: block +; CHECK-NEXT: local.get $push6=, 0 +; CHECK-NEXT: i32.eqz $push7=, $pop6 +; CHECK-NEXT: br_if 0, $pop7 # 0: down to label0 +; CHECK-NEXT: # %bb.1: # %true +; CHECK-NEXT: i32.const $push3=, 0 +; CHECK-NEXT: i32.const $push5=, 0 +; CHECK-NEXT: i32.store 0($pop3), $pop5 +; CHECK-NEXT: i32.const $push4=, 1 +; CHECK-NEXT: return $pop4 +; CHECK-NEXT: .LBB1_2: # %false +; CHECK-NEXT: end_block # label0: +; CHECK-NEXT: i32.const $push1=, 0 +; CHECK-NEXT: i32.const $push0=, 2 +; CHECK-NEXT: i32.store 0($pop1), $pop0 +; CHECK-NEXT: i32.const $push2=, 3 +; CHECK-NEXT: # fallthrough-return +; +; FAST-LABEL: return_i32_twice: +; FAST: .functype return_i32_twice (i32) -> (i32) +; FAST-NEXT: # %bb.0: +; FAST-NEXT: block +; FAST-NEXT: local.get $push6=, 0 +; FAST-NEXT: i32.eqz $push7=, $pop6 +; FAST-NEXT: br_if 0, $pop7 # 0: down to label0 +; FAST-NEXT: # %bb.1: # %true +; FAST-NEXT: i32.const $push4=, 0 +; FAST-NEXT: i32.const $push5=, 0 +; FAST-NEXT: i32.store 0($pop4), $pop5 +; FAST-NEXT: i32.const $push3=, 1 +; FAST-NEXT: return $pop3 +; FAST-NEXT: .LBB1_2: # %false +; FAST-NEXT: end_block # label0: +; FAST-NEXT: i32.const $push1=, 0 +; FAST-NEXT: i32.const $push2=, 2 +; FAST-NEXT: i32.store 0($pop1), $pop2 +; FAST-NEXT: i32.const $push0=, 3 +; FAST-NEXT: # fallthrough-return %b = icmp ne i32 %a, 0 br i1 %b, label %true, label %false diff --git a/llvm/test/CodeGen/WebAssembly/return-void.ll b/llvm/test/CodeGen/WebAssembly/return-void.ll index 7f2ef09..bf109e3 100644 --- a/llvm/test/CodeGen/WebAssembly/return-void.ll +++ b/llvm/test/CodeGen/WebAssembly/return-void.ll @@ -1,20 +1,60 @@ -; RUN: llc < %s -asm-verbose=false | FileCheck %s -; RUN: llc < %s -asm-verbose=false -fast-isel -fast-isel-abort=1 | FileCheck %s +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc < %s | FileCheck %s +; RUN: llc < %s -fast-isel -fast-isel-abort=1 | FileCheck %s --check-prefix=FAST target triple = "wasm32-unknown-unknown" -; CHECK-LABEL: return_void: -; CHECK: end_function{{$}} define void @return_void() { +; CHECK-LABEL: return_void: +; CHECK: .functype return_void () -> () +; CHECK-NEXT: # %bb.0: +; CHECK-NEXT: # fallthrough-return +; +; FAST-LABEL: return_void: +; FAST: .functype return_void () -> () +; FAST-NEXT: # %bb.0: +; FAST-NEXT: # fallthrough-return ret void } -; CHECK-LABEL: return_void_twice: -; CHECK: store -; CHECK-NEXT: return{{$}} -; CHECK: store -; CHECK-NEXT: end_function{{$}} define void @return_void_twice(i32 %a) { +; CHECK-LABEL: return_void_twice: +; CHECK: .functype return_void_twice (i32) -> () +; CHECK-NEXT: # %bb.0: +; CHECK-NEXT: block +; CHECK-NEXT: local.get 0 +; CHECK-NEXT: i32.eqz +; CHECK-NEXT: br_if 0 # 0: down to label0 +; CHECK-NEXT: # %bb.1: # %true +; CHECK-NEXT: i32.const 0 +; CHECK-NEXT: i32.const 0 +; CHECK-NEXT: i32.store 0 +; CHECK-NEXT: return +; CHECK-NEXT: .LBB1_2: # %false +; CHECK-NEXT: end_block # label0: +; CHECK-NEXT: i32.const 0 +; CHECK-NEXT: i32.const 1 +; CHECK-NEXT: i32.store 0 +; CHECK-NEXT: # fallthrough-return +; +; FAST-LABEL: return_void_twice: +; FAST: .functype return_void_twice (i32) -> () +; FAST-NEXT: # %bb.0: +; FAST-NEXT: block +; FAST-NEXT: local.get 0 +; FAST-NEXT: i32.eqz +; FAST-NEXT: br_if 0 # 0: down to label0 +; FAST-NEXT: # %bb.1: # %true +; FAST-NEXT: i32.const 0 +; FAST-NEXT: i32.const 0 +; FAST-NEXT: i32.store 0 +; FAST-NEXT: return +; FAST-NEXT: .LBB1_2: # %false +; FAST-NEXT: end_block # label0: +; FAST-NEXT: i32.const 0 +; FAST-NEXT: i32.const 1 +; FAST-NEXT: i32.store 0 +; FAST-NEXT: # fallthrough-return %b = icmp ne i32 %a, 0 br i1 %b, label %true, label %false diff --git a/llvm/test/CodeGen/WebAssembly/returned.ll b/llvm/test/CodeGen/WebAssembly/returned.ll index b00a670..e767e29 100644 --- a/llvm/test/CodeGen/WebAssembly/returned.ll +++ b/llvm/test/CodeGen/WebAssembly/returned.ll @@ -1,31 +1,34 @@ -; RUN: llc < %s -asm-verbose=false -disable-wasm-fallthrough-return-opt -wasm-disable-explicit-locals -wasm-keep-registers | FileCheck %s +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc < %s -disable-wasm-fallthrough-return-opt -wasm-disable-explicit-locals -wasm-keep-registers | FileCheck %s ; Test that the "returned" attribute is optimized effectively. target triple = "wasm32-unknown-unknown" -; CHECK-LABEL: _Z3foov: -; CHECK-NEXT: .functype _Z3foov () -> (i32){{$}} -; CHECK-NEXT: i32.const $push0=, 1{{$}} -; CHECK-NEXT: {{^}} call $push1=, _Znwm, $pop0{{$}} -; CHECK-NEXT: {{^}} call $push2=, _ZN5AppleC1Ev, $pop1{{$}} -; CHECK-NEXT: return $pop2{{$}} %class.Apple = type { i8 } declare noalias ptr @_Znwm(i32) declare ptr @_ZN5AppleC1Ev(ptr returned) define ptr @_Z3foov() { +; CHECK-LABEL: _Z3foov: +; CHECK: .functype _Z3foov () -> (i32) +; CHECK-NEXT: # %bb.0: # %entry +; CHECK-NEXT: i32.const $push0=, 1 +; CHECK-NEXT: call $push1=, _Znwm, $pop0 +; CHECK-NEXT: call $push2=, _ZN5AppleC1Ev, $pop1 +; CHECK-NEXT: return $pop2 entry: %call = tail call noalias ptr @_Znwm(i32 1) %call1 = tail call ptr @_ZN5AppleC1Ev(ptr %call) ret ptr %call } -; CHECK-LABEL: _Z3barPvS_l: -; CHECK-NEXT: .functype _Z3barPvS_l (i32, i32, i32) -> (i32){{$}} -; CHECK-NEXT: {{^}} call $push0=, memcpy, $0, $1, $2{{$}} -; CHECK-NEXT: return $pop0{{$}} declare ptr @memcpy(ptr returned, ptr, i32) define ptr @_Z3barPvS_l(ptr %p, ptr %s, i32 %n) { +; CHECK-LABEL: _Z3barPvS_l: +; CHECK: .functype _Z3barPvS_l (i32, i32, i32) -> (i32) +; CHECK-NEXT: # %bb.0: # %entry +; CHECK-NEXT: call $push0=, memcpy, $0, $1, $2 +; CHECK-NEXT: return $pop0 entry: %call = tail call ptr @memcpy(ptr %p, ptr %s, i32 %n) ret ptr %p @@ -33,13 +36,15 @@ entry: ; Test that the optimization isn't performed on constant arguments. -; CHECK-LABEL: test_constant_arg: -; CHECK: i32.const $push0=, global{{$}} -; CHECK-NEXT: {{^}} call $drop=, returns_arg, $pop0{{$}} -; CHECK-NEXT: return{{$}} @global = external global i32 @addr = global ptr @global define void @test_constant_arg() { +; CHECK-LABEL: test_constant_arg: +; CHECK: .functype test_constant_arg () -> () +; CHECK-NEXT: # %bb.0: +; CHECK-NEXT: i32.const $push0=, global +; CHECK-NEXT: call $drop=, returns_arg, $pop0 +; CHECK-NEXT: return %call = call ptr @returns_arg(ptr @global) ret void } @@ -47,16 +52,17 @@ declare ptr @returns_arg(ptr returned) ; Test that the optimization isn't performed on arguments without the ; "returned" attribute. - -; CHECK-LABEL: test_other_skipped: -; CHECK-NEXT: .functype test_other_skipped (i32, i32, f64) -> (){{$}} -; CHECK-NEXT: {{^}} call $drop=, do_something, $0, $1, $2{{$}} -; CHECK-NEXT: {{^}} call do_something_with_i32, $1{{$}} -; CHECK-NEXT: {{^}} call do_something_with_double, $2{{$}} declare i32 @do_something(i32 returned, i32, double) declare void @do_something_with_i32(i32) declare void @do_something_with_double(double) define void @test_other_skipped(i32 %a, i32 %b, double %c) { +; CHECK-LABEL: test_other_skipped: +; CHECK: .functype test_other_skipped (i32, i32, f64) -> () +; CHECK-NEXT: # %bb.0: +; CHECK-NEXT: call $drop=, do_something, $0, $1, $2 +; CHECK-NEXT: call do_something_with_i32, $1 +; CHECK-NEXT: call do_something_with_double, $2 +; CHECK-NEXT: return %call = call i32 @do_something(i32 %a, i32 %b, double %c) call void @do_something_with_i32(i32 %b) call void @do_something_with_double(double %c) @@ -64,13 +70,13 @@ define void @test_other_skipped(i32 %a, i32 %b, double %c) { } ; Test that the optimization is performed on arguments other than the first. - -; CHECK-LABEL: test_second_arg: -; CHECK-NEXT: .functype test_second_arg (i32, i32) -> (i32){{$}} -; CHECK-NEXT: {{^}} call $push0=, do_something_else, $0, $1{{$}} -; CHECK-NEXT: return $pop0{{$}} declare i32 @do_something_else(i32, i32 returned) define i32 @test_second_arg(i32 %a, i32 %b) { +; CHECK-LABEL: test_second_arg: +; CHECK: .functype test_second_arg (i32, i32) -> (i32) +; CHECK-NEXT: # %bb.0: +; CHECK-NEXT: call $push0=, do_something_else, $0, $1 +; CHECK-NEXT: return $pop0 %call = call i32 @do_something_else(i32 %a, i32 %b) ret i32 %b } diff --git a/llvm/test/CodeGen/WebAssembly/select.ll b/llvm/test/CodeGen/WebAssembly/select.ll index 53ad565..93faf2e 100644 --- a/llvm/test/CodeGen/WebAssembly/select.ll +++ b/llvm/test/CodeGen/WebAssembly/select.ll @@ -1,218 +1,383 @@ -; RUN: llc < %s -asm-verbose=false -disable-wasm-fallthrough-return-opt -wasm-disable-explicit-locals -wasm-keep-registers | FileCheck %s --check-prefixes CHECK,SLOW -; RUN: llc < %s -asm-verbose=false -disable-wasm-fallthrough-return-opt -wasm-disable-explicit-locals -wasm-keep-registers -fast-isel -fast-isel-abort=1 | FileCheck %s +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc < %s -disable-wasm-fallthrough-return-opt -wasm-disable-explicit-locals -wasm-keep-registers | FileCheck %s +; RUN: llc < %s -disable-wasm-fallthrough-return-opt -wasm-disable-explicit-locals -wasm-keep-registers -fast-isel -fast-isel-abort=1 | FileCheck %s --check-prefix=FAST ; Test that wasm select instruction is selected from LLVM select instruction. target triple = "wasm32-unknown-unknown" -; CHECK-LABEL: select_i32_bool: -; CHECK-NEXT: .functype select_i32_bool (i32, i32, i32) -> (i32){{$}} -; CHECK-NEXT: i32.select $push0=, $1, $2, $0{{$}} -; CHECK-NEXT: return $pop0{{$}} define i32 @select_i32_bool(i1 zeroext %a, i32 %b, i32 %c) { +; CHECK-LABEL: select_i32_bool: +; CHECK: .functype select_i32_bool (i32, i32, i32) -> (i32) +; CHECK-NEXT: # %bb.0: +; CHECK-NEXT: i32.select $push0=, $1, $2, $0 +; CHECK-NEXT: return $pop0 +; +; FAST-LABEL: select_i32_bool: +; FAST: .functype select_i32_bool (i32, i32, i32) -> (i32) +; FAST-NEXT: # %bb.0: +; FAST-NEXT: i32.select $push0=, $1, $2, $0 +; FAST-NEXT: return $pop0 %cond = select i1 %a, i32 %b, i32 %c ret i32 %cond } -; CHECK-LABEL: select_i32_bool_nozext: -; CHECK-NEXT: .functype select_i32_bool_nozext (i32, i32, i32) -> (i32){{$}} -; CHECK-NEXT: i32.const $push0=, 1{{$}} -; CHECK-NEXT: i32.and $push1=, $0, $pop0{{$}} -; CHECK-NEXT: i32.select $push2=, $1, $2, $pop1{{$}} -; CHECK-NEXT: return $pop2{{$}} define i32 @select_i32_bool_nozext(i1 %a, i32 %b, i32 %c) { +; CHECK-LABEL: select_i32_bool_nozext: +; CHECK: .functype select_i32_bool_nozext (i32, i32, i32) -> (i32) +; CHECK-NEXT: # %bb.0: +; CHECK-NEXT: i32.const $push0=, 1 +; CHECK-NEXT: i32.and $push1=, $0, $pop0 +; CHECK-NEXT: i32.select $push2=, $1, $2, $pop1 +; CHECK-NEXT: return $pop2 +; +; FAST-LABEL: select_i32_bool_nozext: +; FAST: .functype select_i32_bool_nozext (i32, i32, i32) -> (i32) +; FAST-NEXT: # %bb.0: +; FAST-NEXT: i32.const $push0=, 1 +; FAST-NEXT: i32.and $push1=, $0, $pop0 +; FAST-NEXT: i32.select $push2=, $1, $2, $pop1 +; FAST-NEXT: return $pop2 %cond = select i1 %a, i32 %b, i32 %c ret i32 %cond } -; CHECK-LABEL: select_i32_eq: -; CHECK-NEXT: .functype select_i32_eq (i32, i32, i32) -> (i32){{$}} -; CHECK-NEXT: i32.select $push0=, $2, $1, $0{{$}} -; CHECK-NEXT: return $pop0{{$}} define i32 @select_i32_eq(i32 %a, i32 %b, i32 %c) { +; CHECK-LABEL: select_i32_eq: +; CHECK: .functype select_i32_eq (i32, i32, i32) -> (i32) +; CHECK-NEXT: # %bb.0: +; CHECK-NEXT: i32.select $push0=, $2, $1, $0 +; CHECK-NEXT: return $pop0 +; +; FAST-LABEL: select_i32_eq: +; FAST: .functype select_i32_eq (i32, i32, i32) -> (i32) +; FAST-NEXT: # %bb.0: +; FAST-NEXT: i32.select $push0=, $2, $1, $0 +; FAST-NEXT: return $pop0 %cmp = icmp eq i32 %a, 0 %cond = select i1 %cmp, i32 %b, i32 %c ret i32 %cond } -; CHECK-LABEL: select_i32_ne: -; CHECK-NEXT: .functype select_i32_ne (i32, i32, i32) -> (i32){{$}} -; CHECK-NEXT: i32.select $push0=, $1, $2, $0{{$}} -; CHECK-NEXT: return $pop0{{$}} define i32 @select_i32_ne(i32 %a, i32 %b, i32 %c) { +; CHECK-LABEL: select_i32_ne: +; CHECK: .functype select_i32_ne (i32, i32, i32) -> (i32) +; CHECK-NEXT: # %bb.0: +; CHECK-NEXT: i32.select $push0=, $1, $2, $0 +; CHECK-NEXT: return $pop0 +; +; FAST-LABEL: select_i32_ne: +; FAST: .functype select_i32_ne (i32, i32, i32) -> (i32) +; FAST-NEXT: # %bb.0: +; FAST-NEXT: i32.select $push0=, $1, $2, $0 +; FAST-NEXT: return $pop0 %cmp = icmp ne i32 %a, 0 %cond = select i1 %cmp, i32 %b, i32 %c ret i32 %cond } -; CHECK-LABEL: select_i64_bool: -; CHECK-NEXT: .functype select_i64_bool (i32, i64, i64) -> (i64){{$}} -; CHECK-NEXT: i64.select $push0=, $1, $2, $0{{$}} -; CHECK-NEXT: return $pop0{{$}} define i64 @select_i64_bool(i1 zeroext %a, i64 %b, i64 %c) { +; CHECK-LABEL: select_i64_bool: +; CHECK: .functype select_i64_bool (i32, i64, i64) -> (i64) +; CHECK-NEXT: # %bb.0: +; CHECK-NEXT: i64.select $push0=, $1, $2, $0 +; CHECK-NEXT: return $pop0 +; +; FAST-LABEL: select_i64_bool: +; FAST: .functype select_i64_bool (i32, i64, i64) -> (i64) +; FAST-NEXT: # %bb.0: +; FAST-NEXT: i64.select $push0=, $1, $2, $0 +; FAST-NEXT: return $pop0 %cond = select i1 %a, i64 %b, i64 %c ret i64 %cond } -; CHECK-LABEL: select_i64_bool_nozext: -; CHECK-NEXT: .functype select_i64_bool_nozext (i32, i64, i64) -> (i64){{$}} -; CHECK-NEXT: i32.const $push0=, 1{{$}} -; CHECK-NEXT: i32.and $push1=, $0, $pop0{{$}} -; CHECK-NEXT: i64.select $push2=, $1, $2, $pop1{{$}} -; CHECK-NEXT: return $pop2{{$}} define i64 @select_i64_bool_nozext(i1 %a, i64 %b, i64 %c) { +; CHECK-LABEL: select_i64_bool_nozext: +; CHECK: .functype select_i64_bool_nozext (i32, i64, i64) -> (i64) +; CHECK-NEXT: # %bb.0: +; CHECK-NEXT: i32.const $push0=, 1 +; CHECK-NEXT: i32.and $push1=, $0, $pop0 +; CHECK-NEXT: i64.select $push2=, $1, $2, $pop1 +; CHECK-NEXT: return $pop2 +; +; FAST-LABEL: select_i64_bool_nozext: +; FAST: .functype select_i64_bool_nozext (i32, i64, i64) -> (i64) +; FAST-NEXT: # %bb.0: +; FAST-NEXT: i32.const $push0=, 1 +; FAST-NEXT: i32.and $push1=, $0, $pop0 +; FAST-NEXT: i64.select $push2=, $1, $2, $pop1 +; FAST-NEXT: return $pop2 %cond = select i1 %a, i64 %b, i64 %c ret i64 %cond } -; CHECK-LABEL: select_i64_eq: -; CHECK-NEXT: .functype select_i64_eq (i32, i64, i64) -> (i64){{$}} -; CHECK-NEXT: i64.select $push0=, $2, $1, $0{{$}} -; CHECK-NEXT: return $pop0{{$}} define i64 @select_i64_eq(i32 %a, i64 %b, i64 %c) { +; CHECK-LABEL: select_i64_eq: +; CHECK: .functype select_i64_eq (i32, i64, i64) -> (i64) +; CHECK-NEXT: # %bb.0: +; CHECK-NEXT: i64.select $push0=, $2, $1, $0 +; CHECK-NEXT: return $pop0 +; +; FAST-LABEL: select_i64_eq: +; FAST: .functype select_i64_eq (i32, i64, i64) -> (i64) +; FAST-NEXT: # %bb.0: +; FAST-NEXT: i64.select $push0=, $2, $1, $0 +; FAST-NEXT: return $pop0 %cmp = icmp eq i32 %a, 0 %cond = select i1 %cmp, i64 %b, i64 %c ret i64 %cond } -; CHECK-LABEL: select_i64_ne: -; CHECK-NEXT: .functype select_i64_ne (i32, i64, i64) -> (i64){{$}} -; CHECK-NEXT: i64.select $push0=, $1, $2, $0{{$}} -; CHECK-NEXT: return $pop0{{$}} define i64 @select_i64_ne(i32 %a, i64 %b, i64 %c) { +; CHECK-LABEL: select_i64_ne: +; CHECK: .functype select_i64_ne (i32, i64, i64) -> (i64) +; CHECK-NEXT: # %bb.0: +; CHECK-NEXT: i64.select $push0=, $1, $2, $0 +; CHECK-NEXT: return $pop0 +; +; FAST-LABEL: select_i64_ne: +; FAST: .functype select_i64_ne (i32, i64, i64) -> (i64) +; FAST-NEXT: # %bb.0: +; FAST-NEXT: i64.select $push0=, $1, $2, $0 +; FAST-NEXT: return $pop0 %cmp = icmp ne i32 %a, 0 %cond = select i1 %cmp, i64 %b, i64 %c ret i64 %cond } -; CHECK-LABEL: select_f32_bool: -; CHECK-NEXT: .functype select_f32_bool (i32, f32, f32) -> (f32){{$}} -; CHECK-NEXT: f32.select $push0=, $1, $2, $0{{$}} -; CHECK-NEXT: return $pop0{{$}} define float @select_f32_bool(i1 zeroext %a, float %b, float %c) { +; CHECK-LABEL: select_f32_bool: +; CHECK: .functype select_f32_bool (i32, f32, f32) -> (f32) +; CHECK-NEXT: # %bb.0: +; CHECK-NEXT: f32.select $push0=, $1, $2, $0 +; CHECK-NEXT: return $pop0 +; +; FAST-LABEL: select_f32_bool: +; FAST: .functype select_f32_bool (i32, f32, f32) -> (f32) +; FAST-NEXT: # %bb.0: +; FAST-NEXT: f32.select $push0=, $1, $2, $0 +; FAST-NEXT: return $pop0 %cond = select i1 %a, float %b, float %c ret float %cond } -; CHECK-LABEL: select_f32_bool_nozext: -; CHECK-NEXT: .functype select_f32_bool_nozext (i32, f32, f32) -> (f32){{$}} -; CHECK-NEXT: i32.const $push0=, 1{{$}} -; CHECK-NEXT: i32.and $push1=, $0, $pop0{{$}} -; CHECK-NEXT: f32.select $push2=, $1, $2, $pop1{{$}} -; CHECK-NEXT: return $pop2{{$}} define float @select_f32_bool_nozext(i1 %a, float %b, float %c) { +; CHECK-LABEL: select_f32_bool_nozext: +; CHECK: .functype select_f32_bool_nozext (i32, f32, f32) -> (f32) +; CHECK-NEXT: # %bb.0: +; CHECK-NEXT: i32.const $push0=, 1 +; CHECK-NEXT: i32.and $push1=, $0, $pop0 +; CHECK-NEXT: f32.select $push2=, $1, $2, $pop1 +; CHECK-NEXT: return $pop2 +; +; FAST-LABEL: select_f32_bool_nozext: +; FAST: .functype select_f32_bool_nozext (i32, f32, f32) -> (f32) +; FAST-NEXT: # %bb.0: +; FAST-NEXT: i32.const $push0=, 1 +; FAST-NEXT: i32.and $push1=, $0, $pop0 +; FAST-NEXT: f32.select $push2=, $1, $2, $pop1 +; FAST-NEXT: return $pop2 %cond = select i1 %a, float %b, float %c ret float %cond } -; CHECK-LABEL: select_f32_eq: -; CHECK-NEXT: .functype select_f32_eq (i32, f32, f32) -> (f32){{$}} -; CHECK-NEXT: f32.select $push0=, $2, $1, $0{{$}} -; CHECK-NEXT: return $pop0{{$}} define float @select_f32_eq(i32 %a, float %b, float %c) { +; CHECK-LABEL: select_f32_eq: +; CHECK: .functype select_f32_eq (i32, f32, f32) -> (f32) +; CHECK-NEXT: # %bb.0: +; CHECK-NEXT: f32.select $push0=, $2, $1, $0 +; CHECK-NEXT: return $pop0 +; +; FAST-LABEL: select_f32_eq: +; FAST: .functype select_f32_eq (i32, f32, f32) -> (f32) +; FAST-NEXT: # %bb.0: +; FAST-NEXT: f32.select $push0=, $2, $1, $0 +; FAST-NEXT: return $pop0 %cmp = icmp eq i32 %a, 0 %cond = select i1 %cmp, float %b, float %c ret float %cond } -; CHECK-LABEL: select_f32_ne: -; CHECK-NEXT: .functype select_f32_ne (i32, f32, f32) -> (f32){{$}} -; CHECK-NEXT: f32.select $push0=, $1, $2, $0{{$}} -; CHECK-NEXT: return $pop0{{$}} define float @select_f32_ne(i32 %a, float %b, float %c) { +; CHECK-LABEL: select_f32_ne: +; CHECK: .functype select_f32_ne (i32, f32, f32) -> (f32) +; CHECK-NEXT: # %bb.0: +; CHECK-NEXT: f32.select $push0=, $1, $2, $0 +; CHECK-NEXT: return $pop0 +; +; FAST-LABEL: select_f32_ne: +; FAST: .functype select_f32_ne (i32, f32, f32) -> (f32) +; FAST-NEXT: # %bb.0: +; FAST-NEXT: f32.select $push0=, $1, $2, $0 +; FAST-NEXT: return $pop0 %cmp = icmp ne i32 %a, 0 %cond = select i1 %cmp, float %b, float %c ret float %cond } -; CHECK-LABEL: select_f64_bool: -; CHECK-NEXT: .functype select_f64_bool (i32, f64, f64) -> (f64){{$}} -; CHECK-NEXT: f64.select $push0=, $1, $2, $0{{$}} -; CHECK-NEXT: return $pop0{{$}} define double @select_f64_bool(i1 zeroext %a, double %b, double %c) { +; CHECK-LABEL: select_f64_bool: +; CHECK: .functype select_f64_bool (i32, f64, f64) -> (f64) +; CHECK-NEXT: # %bb.0: +; CHECK-NEXT: f64.select $push0=, $1, $2, $0 +; CHECK-NEXT: return $pop0 +; +; FAST-LABEL: select_f64_bool: +; FAST: .functype select_f64_bool (i32, f64, f64) -> (f64) +; FAST-NEXT: # %bb.0: +; FAST-NEXT: f64.select $push0=, $1, $2, $0 +; FAST-NEXT: return $pop0 %cond = select i1 %a, double %b, double %c ret double %cond } -; CHECK-LABEL: select_f64_bool_nozext: -; CHECK-NEXT: .functype select_f64_bool_nozext (i32, f64, f64) -> (f64){{$}} -; CHECK-NEXT: i32.const $push0=, 1{{$}} -; CHECK-NEXT: i32.and $push1=, $0, $pop0{{$}} -; CHECK-NEXT: f64.select $push2=, $1, $2, $pop1{{$}} -; CHECK-NEXT: return $pop2{{$}} define double @select_f64_bool_nozext(i1 %a, double %b, double %c) { +; CHECK-LABEL: select_f64_bool_nozext: +; CHECK: .functype select_f64_bool_nozext (i32, f64, f64) -> (f64) +; CHECK-NEXT: # %bb.0: +; CHECK-NEXT: i32.const $push0=, 1 +; CHECK-NEXT: i32.and $push1=, $0, $pop0 +; CHECK-NEXT: f64.select $push2=, $1, $2, $pop1 +; CHECK-NEXT: return $pop2 +; +; FAST-LABEL: select_f64_bool_nozext: +; FAST: .functype select_f64_bool_nozext (i32, f64, f64) -> (f64) +; FAST-NEXT: # %bb.0: +; FAST-NEXT: i32.const $push0=, 1 +; FAST-NEXT: i32.and $push1=, $0, $pop0 +; FAST-NEXT: f64.select $push2=, $1, $2, $pop1 +; FAST-NEXT: return $pop2 %cond = select i1 %a, double %b, double %c ret double %cond } -; CHECK-LABEL: select_f64_eq: -; CHECK-NEXT: .functype select_f64_eq (i32, f64, f64) -> (f64){{$}} -; CHECK-NEXT: f64.select $push0=, $2, $1, $0{{$}} -; CHECK-NEXT: return $pop0{{$}} define double @select_f64_eq(i32 %a, double %b, double %c) { +; CHECK-LABEL: select_f64_eq: +; CHECK: .functype select_f64_eq (i32, f64, f64) -> (f64) +; CHECK-NEXT: # %bb.0: +; CHECK-NEXT: f64.select $push0=, $2, $1, $0 +; CHECK-NEXT: return $pop0 +; +; FAST-LABEL: select_f64_eq: +; FAST: .functype select_f64_eq (i32, f64, f64) -> (f64) +; FAST-NEXT: # %bb.0: +; FAST-NEXT: f64.select $push0=, $2, $1, $0 +; FAST-NEXT: return $pop0 %cmp = icmp eq i32 %a, 0 %cond = select i1 %cmp, double %b, double %c ret double %cond } -; CHECK-LABEL: select_f64_ne: -; CHECK-NEXT: .functype select_f64_ne (i32, f64, f64) -> (f64){{$}} -; CHECK-NEXT: f64.select $push0=, $1, $2, $0{{$}} -; CHECK-NEXT: return $pop0{{$}} define double @select_f64_ne(i32 %a, double %b, double %c) { +; CHECK-LABEL: select_f64_ne: +; CHECK: .functype select_f64_ne (i32, f64, f64) -> (f64) +; CHECK-NEXT: # %bb.0: +; CHECK-NEXT: f64.select $push0=, $1, $2, $0 +; CHECK-NEXT: return $pop0 +; +; FAST-LABEL: select_f64_ne: +; FAST: .functype select_f64_ne (i32, f64, f64) -> (f64) +; FAST-NEXT: # %bb.0: +; FAST-NEXT: f64.select $push0=, $1, $2, $0 +; FAST-NEXT: return $pop0 %cmp = icmp ne i32 %a, 0 %cond = select i1 %cmp, double %b, double %c ret double %cond } -; CHECK-LABEL: pr40805_i32: -; CHECK-NEXT: .functype pr40805_i32 (i32, i32, i32) -> (i32){{$}} -; SLOW-NEXT: i32.const $push0=, 1{{$}} -; SLOW-NEXT: i32.and $push1=, $0, $pop0{{$}} -; SLOW-NEXT: i32.select $push2=, $1, $2, $pop1{{$}} -; SLOW-NEXT: return $pop2{{$}} define i32 @pr40805_i32(i32 %x, i32 %y, i32 %z) { +; CHECK-LABEL: pr40805_i32: +; CHECK: .functype pr40805_i32 (i32, i32, i32) -> (i32) +; CHECK-NEXT: # %bb.0: +; CHECK-NEXT: i32.const $push0=, 1 +; CHECK-NEXT: i32.and $push1=, $0, $pop0 +; CHECK-NEXT: i32.select $push2=, $1, $2, $pop1 +; CHECK-NEXT: return $pop2 +; +; FAST-LABEL: pr40805_i32: +; FAST: .functype pr40805_i32 (i32, i32, i32) -> (i32) +; FAST-NEXT: # %bb.0: +; FAST-NEXT: i32.const $push1=, 1 +; FAST-NEXT: i32.and $push2=, $0, $pop1 +; FAST-NEXT: i32.select $push0=, $1, $2, $pop2 +; FAST-NEXT: return $pop0 %a = and i32 %x, 1 %b = icmp ne i32 %a, 0 %c = select i1 %b, i32 %y, i32 %z ret i32 %c } -; CHECK-LABEL: pr40805_i64: -; CHECK-NEXT: .functype pr40805_i64 (i64, i64, i64) -> (i64){{$}} -; SLOW-NEXT: i32.wrap_i64 $push0=, $0{{$}} -; SLOW-NEXT: i32.const $push1=, 1{{$}} -; SLOW-NEXT: i32.and $push2=, $pop0, $pop1{{$}} -; SLOW-NEXT: i64.select $push3=, $1, $2, $pop2{{$}} -; SLOW-NEXT: return $pop3{{$}} define i64 @pr40805_i64(i64 %x, i64 %y, i64 %z) { +; CHECK-LABEL: pr40805_i64: +; CHECK: .functype pr40805_i64 (i64, i64, i64) -> (i64) +; CHECK-NEXT: # %bb.0: +; CHECK-NEXT: i32.wrap_i64 $push0=, $0 +; CHECK-NEXT: i32.const $push1=, 1 +; CHECK-NEXT: i32.and $push2=, $pop0, $pop1 +; CHECK-NEXT: i64.select $push3=, $1, $2, $pop2 +; CHECK-NEXT: return $pop3 +; +; FAST-LABEL: pr40805_i64: +; FAST: .functype pr40805_i64 (i64, i64, i64) -> (i64) +; FAST-NEXT: # %bb.0: +; FAST-NEXT: i64.const $push5=, 1 +; FAST-NEXT: i64.and $push6=, $0, $pop5 +; FAST-NEXT: i64.const $push3=, 0 +; FAST-NEXT: i64.ne $push4=, $pop6, $pop3 +; FAST-NEXT: i32.const $push0=, 1 +; FAST-NEXT: i32.and $push1=, $pop4, $pop0 +; FAST-NEXT: i64.select $push2=, $1, $2, $pop1 +; FAST-NEXT: return $pop2 %a = and i64 %x, 1 %b = icmp ne i64 %a, 0 %c = select i1 %b, i64 %y, i64 %z ret i64 %c } -; CHECK-LABEL: pr44012_i32: -; CHECK-NEXT: .functype pr44012_i32 (i32, f32, f32) -> (f32){{$}} -; SLOW-NEXT: i32.const $push0=, 1{{$}} -; SLOW-NEXT: i32.and $push1=, $0, $pop0{{$}} -; SLOW-NEXT: f32.select $push2=, $1, $2, $pop1{{$}} -; SLOW-NEXT: return $pop2{{$}} define float @pr44012_i32(i32 %x, float %y, float %z) { +; CHECK-LABEL: pr44012_i32: +; CHECK: .functype pr44012_i32 (i32, f32, f32) -> (f32) +; CHECK-NEXT: # %bb.0: +; CHECK-NEXT: i32.const $push0=, 1 +; CHECK-NEXT: i32.and $push1=, $0, $pop0 +; CHECK-NEXT: f32.select $push2=, $1, $2, $pop1 +; CHECK-NEXT: return $pop2 +; +; FAST-LABEL: pr44012_i32: +; FAST: .functype pr44012_i32 (i32, f32, f32) -> (f32) +; FAST-NEXT: # %bb.0: +; FAST-NEXT: i32.const $push1=, 1 +; FAST-NEXT: i32.and $push2=, $0, $pop1 +; FAST-NEXT: f32.select $push0=, $1, $2, $pop2 +; FAST-NEXT: return $pop0 %a = and i32 %x, 1 %b = icmp ne i32 %a, 0 %c = select i1 %b, float %y, float %z ret float %c } -; CHECK-LABEL: pr44012_i64: -; CHECK-NEXT: .functype pr44012_i64 (i64, f32, f32) -> (f32){{$}} -; SLOW-NEXT: i32.wrap_i64 $push0=, $0{{$}} -; SLOW-NEXT: i32.const $push1=, 1{{$}} -; SLOW-NEXT: i32.and $push2=, $pop0, $pop1{{$}} -; SLOW-NEXT: f32.select $push3=, $1, $2, $pop2{{$}} -; SLOW-NEXT: return $pop3{{$}} define float @pr44012_i64(i64 %x, float %y, float %z) { +; CHECK-LABEL: pr44012_i64: +; CHECK: .functype pr44012_i64 (i64, f32, f32) -> (f32) +; CHECK-NEXT: # %bb.0: +; CHECK-NEXT: i32.wrap_i64 $push0=, $0 +; CHECK-NEXT: i32.const $push1=, 1 +; CHECK-NEXT: i32.and $push2=, $pop0, $pop1 +; CHECK-NEXT: f32.select $push3=, $1, $2, $pop2 +; CHECK-NEXT: return $pop3 +; +; FAST-LABEL: pr44012_i64: +; FAST: .functype pr44012_i64 (i64, f32, f32) -> (f32) +; FAST-NEXT: # %bb.0: +; FAST-NEXT: i64.const $push5=, 1 +; FAST-NEXT: i64.and $push6=, $0, $pop5 +; FAST-NEXT: i64.const $push3=, 0 +; FAST-NEXT: i64.ne $push4=, $pop6, $pop3 +; FAST-NEXT: i32.const $push0=, 1 +; FAST-NEXT: i32.and $push1=, $pop4, $pop0 +; FAST-NEXT: f32.select $push2=, $1, $2, $pop1 +; FAST-NEXT: return $pop2 %a = and i64 %x, 1 %b = icmp ne i64 %a, 0 %c = select i1 %b, float %y, float %z diff --git a/llvm/test/CodeGen/WebAssembly/simd-arith.ll b/llvm/test/CodeGen/WebAssembly/simd-arith.ll index 78fdccc..013482f 100644 --- a/llvm/test/CodeGen/WebAssembly/simd-arith.ll +++ b/llvm/test/CodeGen/WebAssembly/simd-arith.ll @@ -1,10 +1,8 @@ -; RUN: llc < %s -asm-verbose=false -verify-machineinstrs -disable-wasm-fallthrough-return-opt -wasm-disable-explicit-locals -wasm-keep-registers -mattr=+simd128 | FileCheck %s --check-prefixes CHECK,SIMD128,SIMD128-SLOW - -; RUN: llc < %s -asm-verbose=false -verify-machineinstrs -disable-wasm-fallthrough-return-opt -wasm-disable-explicit-locals -wasm-keep-registers -mattr=+simd128 -fast-isel | FileCheck %s --check-prefixes CHECK,SIMD128,SIMD128-FAST - -; RUN: llc < %s -asm-verbose=false -verify-machineinstrs -disable-wasm-fallthrough-return-opt -wasm-disable-explicit-locals -wasm-keep-registers | FileCheck %s --check-prefixes CHECK,NO-SIMD128 - -; RUN: llc < %s -asm-verbose=false -verify-machineinstrs -disable-wasm-fallthrough-return-opt -wasm-disable-explicit-locals -wasm-keep-registers -fast-isel | FileCheck %s --check-prefixes CHECK,NO-SIMD128 +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc < %s -verify-machineinstrs -disable-wasm-fallthrough-return-opt -wasm-disable-explicit-locals -wasm-keep-registers -mattr=+simd128 | FileCheck %s --check-prefix=SIMD128 +; RUN: llc < %s -verify-machineinstrs -disable-wasm-fallthrough-return-opt -wasm-disable-explicit-locals -wasm-keep-registers -mattr=+simd128 -fast-isel | FileCheck %s --check-prefix=SIMD128-FAST +; RUN: llc < %s -verify-machineinstrs -disable-wasm-fallthrough-return-opt -wasm-disable-explicit-locals -wasm-keep-registers | FileCheck %s --check-prefix=NO-SIMD128 +; RUN: llc < %s -verify-machineinstrs -disable-wasm-fallthrough-return-opt -wasm-disable-explicit-locals -wasm-keep-registers -fast-isel | FileCheck %s --check-prefix=NO-SIMD128-FAST ; check that a non-test run (including explicit locals pass) at least finishes ; RUN: llc < %s -O0 -mattr=+simd128 @@ -17,87 +15,1908 @@ target triple = "wasm32-unknown-unknown" ; ============================================================================== ; 16 x i8 ; ============================================================================== -; CHECK-LABEL: add_v16i8: -; NO-SIMD128-NOT: i8x16 -; SIMD128-NEXT: .functype add_v16i8 (v128, v128) -> (v128){{$}} -; SIMD128-NEXT: i8x16.add $push[[R:[0-9]+]]=, $0, $1{{$}} -; SIMD128-NEXT: return $pop[[R]]{{$}} define <16 x i8> @add_v16i8(<16 x i8> %x, <16 x i8> %y) { +; SIMD128-LABEL: add_v16i8: +; SIMD128: .functype add_v16i8 (v128, v128) -> (v128) +; SIMD128-NEXT: # %bb.0: +; SIMD128-NEXT: i8x16.add $push0=, $0, $1 +; SIMD128-NEXT: return $pop0 +; +; SIMD128-FAST-LABEL: add_v16i8: +; SIMD128-FAST: .functype add_v16i8 (v128, v128) -> (v128) +; SIMD128-FAST-NEXT: # %bb.0: +; SIMD128-FAST-NEXT: i8x16.add $push0=, $0, $1 +; SIMD128-FAST-NEXT: return $pop0 +; +; NO-SIMD128-LABEL: add_v16i8: +; NO-SIMD128: .functype add_v16i8 (i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32) -> () +; NO-SIMD128-NEXT: # %bb.0: +; NO-SIMD128-NEXT: i32.add $push0=, $9, $25 +; NO-SIMD128-NEXT: i32.store8 8($0), $pop0 +; NO-SIMD128-NEXT: i32.add $push1=, $5, $21 +; NO-SIMD128-NEXT: i32.store8 4($0), $pop1 +; NO-SIMD128-NEXT: i32.add $push2=, $3, $19 +; NO-SIMD128-NEXT: i32.store8 2($0), $pop2 +; NO-SIMD128-NEXT: i32.add $push3=, $2, $18 +; NO-SIMD128-NEXT: i32.store8 1($0), $pop3 +; NO-SIMD128-NEXT: i32.add $push4=, $1, $17 +; NO-SIMD128-NEXT: i32.store8 0($0), $pop4 +; NO-SIMD128-NEXT: i32.const $push6=, 15 +; NO-SIMD128-NEXT: i32.add $push7=, $0, $pop6 +; NO-SIMD128-NEXT: i32.add $push5=, $16, $32 +; NO-SIMD128-NEXT: i32.store8 0($pop7), $pop5 +; NO-SIMD128-NEXT: i32.const $push9=, 14 +; NO-SIMD128-NEXT: i32.add $push10=, $0, $pop9 +; NO-SIMD128-NEXT: i32.add $push8=, $15, $31 +; NO-SIMD128-NEXT: i32.store8 0($pop10), $pop8 +; NO-SIMD128-NEXT: i32.const $push12=, 13 +; NO-SIMD128-NEXT: i32.add $push13=, $0, $pop12 +; NO-SIMD128-NEXT: i32.add $push11=, $14, $30 +; NO-SIMD128-NEXT: i32.store8 0($pop13), $pop11 +; NO-SIMD128-NEXT: i32.const $push15=, 12 +; NO-SIMD128-NEXT: i32.add $push16=, $0, $pop15 +; NO-SIMD128-NEXT: i32.add $push14=, $13, $29 +; NO-SIMD128-NEXT: i32.store8 0($pop16), $pop14 +; NO-SIMD128-NEXT: i32.const $push18=, 11 +; NO-SIMD128-NEXT: i32.add $push19=, $0, $pop18 +; NO-SIMD128-NEXT: i32.add $push17=, $12, $28 +; NO-SIMD128-NEXT: i32.store8 0($pop19), $pop17 +; NO-SIMD128-NEXT: i32.const $push21=, 10 +; NO-SIMD128-NEXT: i32.add $push22=, $0, $pop21 +; NO-SIMD128-NEXT: i32.add $push20=, $11, $27 +; NO-SIMD128-NEXT: i32.store8 0($pop22), $pop20 +; NO-SIMD128-NEXT: i32.const $push24=, 9 +; NO-SIMD128-NEXT: i32.add $push25=, $0, $pop24 +; NO-SIMD128-NEXT: i32.add $push23=, $10, $26 +; NO-SIMD128-NEXT: i32.store8 0($pop25), $pop23 +; NO-SIMD128-NEXT: i32.const $push27=, 7 +; NO-SIMD128-NEXT: i32.add $push28=, $0, $pop27 +; NO-SIMD128-NEXT: i32.add $push26=, $8, $24 +; NO-SIMD128-NEXT: i32.store8 0($pop28), $pop26 +; NO-SIMD128-NEXT: i32.const $push30=, 6 +; NO-SIMD128-NEXT: i32.add $push31=, $0, $pop30 +; NO-SIMD128-NEXT: i32.add $push29=, $7, $23 +; NO-SIMD128-NEXT: i32.store8 0($pop31), $pop29 +; NO-SIMD128-NEXT: i32.const $push33=, 5 +; NO-SIMD128-NEXT: i32.add $push34=, $0, $pop33 +; NO-SIMD128-NEXT: i32.add $push32=, $6, $22 +; NO-SIMD128-NEXT: i32.store8 0($pop34), $pop32 +; NO-SIMD128-NEXT: i32.const $push36=, 3 +; NO-SIMD128-NEXT: i32.add $push37=, $0, $pop36 +; NO-SIMD128-NEXT: i32.add $push35=, $4, $20 +; NO-SIMD128-NEXT: i32.store8 0($pop37), $pop35 +; NO-SIMD128-NEXT: return +; +; NO-SIMD128-FAST-LABEL: add_v16i8: +; NO-SIMD128-FAST: .functype add_v16i8 (i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32) -> () +; NO-SIMD128-FAST-NEXT: # %bb.0: +; NO-SIMD128-FAST-NEXT: i32.add $push0=, $1, $17 +; NO-SIMD128-FAST-NEXT: i32.store8 0($0), $pop0 +; NO-SIMD128-FAST-NEXT: i32.add $push1=, $2, $18 +; NO-SIMD128-FAST-NEXT: i32.store8 1($0), $pop1 +; NO-SIMD128-FAST-NEXT: i32.add $push2=, $3, $19 +; NO-SIMD128-FAST-NEXT: i32.store8 2($0), $pop2 +; NO-SIMD128-FAST-NEXT: i32.const $push3=, 3 +; NO-SIMD128-FAST-NEXT: i32.add $push4=, $0, $pop3 +; NO-SIMD128-FAST-NEXT: i32.add $push5=, $4, $20 +; NO-SIMD128-FAST-NEXT: i32.store8 0($pop4), $pop5 +; NO-SIMD128-FAST-NEXT: i32.add $push6=, $5, $21 +; NO-SIMD128-FAST-NEXT: i32.store8 4($0), $pop6 +; NO-SIMD128-FAST-NEXT: i32.const $push7=, 5 +; NO-SIMD128-FAST-NEXT: i32.add $push8=, $0, $pop7 +; NO-SIMD128-FAST-NEXT: i32.add $push9=, $6, $22 +; NO-SIMD128-FAST-NEXT: i32.store8 0($pop8), $pop9 +; NO-SIMD128-FAST-NEXT: i32.const $push10=, 6 +; NO-SIMD128-FAST-NEXT: i32.add $push11=, $0, $pop10 +; NO-SIMD128-FAST-NEXT: i32.add $push12=, $7, $23 +; NO-SIMD128-FAST-NEXT: i32.store8 0($pop11), $pop12 +; NO-SIMD128-FAST-NEXT: i32.const $push13=, 7 +; NO-SIMD128-FAST-NEXT: i32.add $push14=, $0, $pop13 +; NO-SIMD128-FAST-NEXT: i32.add $push15=, $8, $24 +; NO-SIMD128-FAST-NEXT: i32.store8 0($pop14), $pop15 +; NO-SIMD128-FAST-NEXT: i32.add $push16=, $9, $25 +; NO-SIMD128-FAST-NEXT: i32.store8 8($0), $pop16 +; NO-SIMD128-FAST-NEXT: i32.const $push17=, 9 +; NO-SIMD128-FAST-NEXT: i32.add $push18=, $0, $pop17 +; NO-SIMD128-FAST-NEXT: i32.add $push19=, $10, $26 +; NO-SIMD128-FAST-NEXT: i32.store8 0($pop18), $pop19 +; NO-SIMD128-FAST-NEXT: i32.const $push20=, 10 +; NO-SIMD128-FAST-NEXT: i32.add $push21=, $0, $pop20 +; NO-SIMD128-FAST-NEXT: i32.add $push22=, $11, $27 +; NO-SIMD128-FAST-NEXT: i32.store8 0($pop21), $pop22 +; NO-SIMD128-FAST-NEXT: i32.const $push23=, 11 +; NO-SIMD128-FAST-NEXT: i32.add $push24=, $0, $pop23 +; NO-SIMD128-FAST-NEXT: i32.add $push25=, $12, $28 +; NO-SIMD128-FAST-NEXT: i32.store8 0($pop24), $pop25 +; NO-SIMD128-FAST-NEXT: i32.const $push26=, 12 +; NO-SIMD128-FAST-NEXT: i32.add $push27=, $0, $pop26 +; NO-SIMD128-FAST-NEXT: i32.add $push28=, $13, $29 +; NO-SIMD128-FAST-NEXT: i32.store8 0($pop27), $pop28 +; NO-SIMD128-FAST-NEXT: i32.const $push29=, 13 +; NO-SIMD128-FAST-NEXT: i32.add $push30=, $0, $pop29 +; NO-SIMD128-FAST-NEXT: i32.add $push31=, $14, $30 +; NO-SIMD128-FAST-NEXT: i32.store8 0($pop30), $pop31 +; NO-SIMD128-FAST-NEXT: i32.const $push32=, 14 +; NO-SIMD128-FAST-NEXT: i32.add $push33=, $0, $pop32 +; NO-SIMD128-FAST-NEXT: i32.add $push34=, $15, $31 +; NO-SIMD128-FAST-NEXT: i32.store8 0($pop33), $pop34 +; NO-SIMD128-FAST-NEXT: i32.const $push35=, 15 +; NO-SIMD128-FAST-NEXT: i32.add $push36=, $0, $pop35 +; NO-SIMD128-FAST-NEXT: i32.add $push37=, $16, $32 +; NO-SIMD128-FAST-NEXT: i32.store8 0($pop36), $pop37 +; NO-SIMD128-FAST-NEXT: return %a = add <16 x i8> %x, %y ret <16 x i8> %a } -; CHECK-LABEL: sub_v16i8: -; NO-SIMD128-NOT: i8x16 -; SIMD128-NEXT: .functype sub_v16i8 (v128, v128) -> (v128){{$}} -; SIMD128-NEXT: i8x16.sub $push[[R:[0-9]+]]=, $0, $1{{$}} -; SIMD128-NEXT: return $pop[[R]]{{$}} define <16 x i8> @sub_v16i8(<16 x i8> %x, <16 x i8> %y) { +; SIMD128-LABEL: sub_v16i8: +; SIMD128: .functype sub_v16i8 (v128, v128) -> (v128) +; SIMD128-NEXT: # %bb.0: +; SIMD128-NEXT: i8x16.sub $push0=, $0, $1 +; SIMD128-NEXT: return $pop0 +; +; SIMD128-FAST-LABEL: sub_v16i8: +; SIMD128-FAST: .functype sub_v16i8 (v128, v128) -> (v128) +; SIMD128-FAST-NEXT: # %bb.0: +; SIMD128-FAST-NEXT: i8x16.sub $push0=, $0, $1 +; SIMD128-FAST-NEXT: return $pop0 +; +; NO-SIMD128-LABEL: sub_v16i8: +; NO-SIMD128: .functype sub_v16i8 (i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32) -> () +; NO-SIMD128-NEXT: # %bb.0: +; NO-SIMD128-NEXT: i32.sub $push0=, $9, $25 +; NO-SIMD128-NEXT: i32.store8 8($0), $pop0 +; NO-SIMD128-NEXT: i32.sub $push1=, $5, $21 +; NO-SIMD128-NEXT: i32.store8 4($0), $pop1 +; NO-SIMD128-NEXT: i32.sub $push2=, $3, $19 +; NO-SIMD128-NEXT: i32.store8 2($0), $pop2 +; NO-SIMD128-NEXT: i32.sub $push3=, $2, $18 +; NO-SIMD128-NEXT: i32.store8 1($0), $pop3 +; NO-SIMD128-NEXT: i32.sub $push4=, $1, $17 +; NO-SIMD128-NEXT: i32.store8 0($0), $pop4 +; NO-SIMD128-NEXT: i32.const $push6=, 15 +; NO-SIMD128-NEXT: i32.add $push7=, $0, $pop6 +; NO-SIMD128-NEXT: i32.sub $push5=, $16, $32 +; NO-SIMD128-NEXT: i32.store8 0($pop7), $pop5 +; NO-SIMD128-NEXT: i32.const $push9=, 14 +; NO-SIMD128-NEXT: i32.add $push10=, $0, $pop9 +; NO-SIMD128-NEXT: i32.sub $push8=, $15, $31 +; NO-SIMD128-NEXT: i32.store8 0($pop10), $pop8 +; NO-SIMD128-NEXT: i32.const $push12=, 13 +; NO-SIMD128-NEXT: i32.add $push13=, $0, $pop12 +; NO-SIMD128-NEXT: i32.sub $push11=, $14, $30 +; NO-SIMD128-NEXT: i32.store8 0($pop13), $pop11 +; NO-SIMD128-NEXT: i32.const $push15=, 12 +; NO-SIMD128-NEXT: i32.add $push16=, $0, $pop15 +; NO-SIMD128-NEXT: i32.sub $push14=, $13, $29 +; NO-SIMD128-NEXT: i32.store8 0($pop16), $pop14 +; NO-SIMD128-NEXT: i32.const $push18=, 11 +; NO-SIMD128-NEXT: i32.add $push19=, $0, $pop18 +; NO-SIMD128-NEXT: i32.sub $push17=, $12, $28 +; NO-SIMD128-NEXT: i32.store8 0($pop19), $pop17 +; NO-SIMD128-NEXT: i32.const $push21=, 10 +; NO-SIMD128-NEXT: i32.add $push22=, $0, $pop21 +; NO-SIMD128-NEXT: i32.sub $push20=, $11, $27 +; NO-SIMD128-NEXT: i32.store8 0($pop22), $pop20 +; NO-SIMD128-NEXT: i32.const $push24=, 9 +; NO-SIMD128-NEXT: i32.add $push25=, $0, $pop24 +; NO-SIMD128-NEXT: i32.sub $push23=, $10, $26 +; NO-SIMD128-NEXT: i32.store8 0($pop25), $pop23 +; NO-SIMD128-NEXT: i32.const $push27=, 7 +; NO-SIMD128-NEXT: i32.add $push28=, $0, $pop27 +; NO-SIMD128-NEXT: i32.sub $push26=, $8, $24 +; NO-SIMD128-NEXT: i32.store8 0($pop28), $pop26 +; NO-SIMD128-NEXT: i32.const $push30=, 6 +; NO-SIMD128-NEXT: i32.add $push31=, $0, $pop30 +; NO-SIMD128-NEXT: i32.sub $push29=, $7, $23 +; NO-SIMD128-NEXT: i32.store8 0($pop31), $pop29 +; NO-SIMD128-NEXT: i32.const $push33=, 5 +; NO-SIMD128-NEXT: i32.add $push34=, $0, $pop33 +; NO-SIMD128-NEXT: i32.sub $push32=, $6, $22 +; NO-SIMD128-NEXT: i32.store8 0($pop34), $pop32 +; NO-SIMD128-NEXT: i32.const $push36=, 3 +; NO-SIMD128-NEXT: i32.add $push37=, $0, $pop36 +; NO-SIMD128-NEXT: i32.sub $push35=, $4, $20 +; NO-SIMD128-NEXT: i32.store8 0($pop37), $pop35 +; NO-SIMD128-NEXT: return +; +; NO-SIMD128-FAST-LABEL: sub_v16i8: +; NO-SIMD128-FAST: .functype sub_v16i8 (i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32) -> () +; NO-SIMD128-FAST-NEXT: # %bb.0: +; NO-SIMD128-FAST-NEXT: i32.sub $push0=, $1, $17 +; NO-SIMD128-FAST-NEXT: i32.store8 0($0), $pop0 +; NO-SIMD128-FAST-NEXT: i32.sub $push1=, $2, $18 +; NO-SIMD128-FAST-NEXT: i32.store8 1($0), $pop1 +; NO-SIMD128-FAST-NEXT: i32.sub $push2=, $3, $19 +; NO-SIMD128-FAST-NEXT: i32.store8 2($0), $pop2 +; NO-SIMD128-FAST-NEXT: i32.const $push3=, 3 +; NO-SIMD128-FAST-NEXT: i32.add $push4=, $0, $pop3 +; NO-SIMD128-FAST-NEXT: i32.sub $push5=, $4, $20 +; NO-SIMD128-FAST-NEXT: i32.store8 0($pop4), $pop5 +; NO-SIMD128-FAST-NEXT: i32.sub $push6=, $5, $21 +; NO-SIMD128-FAST-NEXT: i32.store8 4($0), $pop6 +; NO-SIMD128-FAST-NEXT: i32.const $push7=, 5 +; NO-SIMD128-FAST-NEXT: i32.add $push8=, $0, $pop7 +; NO-SIMD128-FAST-NEXT: i32.sub $push9=, $6, $22 +; NO-SIMD128-FAST-NEXT: i32.store8 0($pop8), $pop9 +; NO-SIMD128-FAST-NEXT: i32.const $push10=, 6 +; NO-SIMD128-FAST-NEXT: i32.add $push11=, $0, $pop10 +; NO-SIMD128-FAST-NEXT: i32.sub $push12=, $7, $23 +; NO-SIMD128-FAST-NEXT: i32.store8 0($pop11), $pop12 +; NO-SIMD128-FAST-NEXT: i32.const $push13=, 7 +; NO-SIMD128-FAST-NEXT: i32.add $push14=, $0, $pop13 +; NO-SIMD128-FAST-NEXT: i32.sub $push15=, $8, $24 +; NO-SIMD128-FAST-NEXT: i32.store8 0($pop14), $pop15 +; NO-SIMD128-FAST-NEXT: i32.sub $push16=, $9, $25 +; NO-SIMD128-FAST-NEXT: i32.store8 8($0), $pop16 +; NO-SIMD128-FAST-NEXT: i32.const $push17=, 9 +; NO-SIMD128-FAST-NEXT: i32.add $push18=, $0, $pop17 +; NO-SIMD128-FAST-NEXT: i32.sub $push19=, $10, $26 +; NO-SIMD128-FAST-NEXT: i32.store8 0($pop18), $pop19 +; NO-SIMD128-FAST-NEXT: i32.const $push20=, 10 +; NO-SIMD128-FAST-NEXT: i32.add $push21=, $0, $pop20 +; NO-SIMD128-FAST-NEXT: i32.sub $push22=, $11, $27 +; NO-SIMD128-FAST-NEXT: i32.store8 0($pop21), $pop22 +; NO-SIMD128-FAST-NEXT: i32.const $push23=, 11 +; NO-SIMD128-FAST-NEXT: i32.add $push24=, $0, $pop23 +; NO-SIMD128-FAST-NEXT: i32.sub $push25=, $12, $28 +; NO-SIMD128-FAST-NEXT: i32.store8 0($pop24), $pop25 +; NO-SIMD128-FAST-NEXT: i32.const $push26=, 12 +; NO-SIMD128-FAST-NEXT: i32.add $push27=, $0, $pop26 +; NO-SIMD128-FAST-NEXT: i32.sub $push28=, $13, $29 +; NO-SIMD128-FAST-NEXT: i32.store8 0($pop27), $pop28 +; NO-SIMD128-FAST-NEXT: i32.const $push29=, 13 +; NO-SIMD128-FAST-NEXT: i32.add $push30=, $0, $pop29 +; NO-SIMD128-FAST-NEXT: i32.sub $push31=, $14, $30 +; NO-SIMD128-FAST-NEXT: i32.store8 0($pop30), $pop31 +; NO-SIMD128-FAST-NEXT: i32.const $push32=, 14 +; NO-SIMD128-FAST-NEXT: i32.add $push33=, $0, $pop32 +; NO-SIMD128-FAST-NEXT: i32.sub $push34=, $15, $31 +; NO-SIMD128-FAST-NEXT: i32.store8 0($pop33), $pop34 +; NO-SIMD128-FAST-NEXT: i32.const $push35=, 15 +; NO-SIMD128-FAST-NEXT: i32.add $push36=, $0, $pop35 +; NO-SIMD128-FAST-NEXT: i32.sub $push37=, $16, $32 +; NO-SIMD128-FAST-NEXT: i32.store8 0($pop36), $pop37 +; NO-SIMD128-FAST-NEXT: return %a = sub <16 x i8> %x, %y ret <16 x i8> %a } -; i8x16.mul is not in spec -; CHECK-LABEL: mul_v16i8: -; NO-SIMD128-NOT: i8x16 -; SIMD128-NOT: i8x16.mul -; SIMD128: i8x16.extract_lane_u -; SIMD128: i32.mul define <16 x i8> @mul_v16i8(<16 x i8> %x, <16 x i8> %y) { +; SIMD128-LABEL: mul_v16i8: +; SIMD128: .functype mul_v16i8 (v128, v128) -> (v128) +; SIMD128-NEXT: # %bb.0: +; SIMD128-NEXT: i8x16.extract_lane_u $push4=, $0, 0 +; SIMD128-NEXT: i8x16.extract_lane_u $push3=, $1, 0 +; SIMD128-NEXT: i32.mul $push5=, $pop4, $pop3 +; SIMD128-NEXT: i8x16.splat $push6=, $pop5 +; SIMD128-NEXT: i8x16.extract_lane_u $push1=, $0, 1 +; SIMD128-NEXT: i8x16.extract_lane_u $push0=, $1, 1 +; SIMD128-NEXT: i32.mul $push2=, $pop1, $pop0 +; SIMD128-NEXT: i8x16.replace_lane $push7=, $pop6, 1, $pop2 +; SIMD128-NEXT: i8x16.extract_lane_u $push9=, $0, 2 +; SIMD128-NEXT: i8x16.extract_lane_u $push8=, $1, 2 +; SIMD128-NEXT: i32.mul $push10=, $pop9, $pop8 +; SIMD128-NEXT: i8x16.replace_lane $push11=, $pop7, 2, $pop10 +; SIMD128-NEXT: i8x16.extract_lane_u $push13=, $0, 3 +; SIMD128-NEXT: i8x16.extract_lane_u $push12=, $1, 3 +; SIMD128-NEXT: i32.mul $push14=, $pop13, $pop12 +; SIMD128-NEXT: i8x16.replace_lane $push15=, $pop11, 3, $pop14 +; SIMD128-NEXT: i8x16.extract_lane_u $push17=, $0, 4 +; SIMD128-NEXT: i8x16.extract_lane_u $push16=, $1, 4 +; SIMD128-NEXT: i32.mul $push18=, $pop17, $pop16 +; SIMD128-NEXT: i8x16.replace_lane $push19=, $pop15, 4, $pop18 +; SIMD128-NEXT: i8x16.extract_lane_u $push21=, $0, 5 +; SIMD128-NEXT: i8x16.extract_lane_u $push20=, $1, 5 +; SIMD128-NEXT: i32.mul $push22=, $pop21, $pop20 +; SIMD128-NEXT: i8x16.replace_lane $push23=, $pop19, 5, $pop22 +; SIMD128-NEXT: i8x16.extract_lane_u $push25=, $0, 6 +; SIMD128-NEXT: i8x16.extract_lane_u $push24=, $1, 6 +; SIMD128-NEXT: i32.mul $push26=, $pop25, $pop24 +; SIMD128-NEXT: i8x16.replace_lane $push27=, $pop23, 6, $pop26 +; SIMD128-NEXT: i8x16.extract_lane_u $push29=, $0, 7 +; SIMD128-NEXT: i8x16.extract_lane_u $push28=, $1, 7 +; SIMD128-NEXT: i32.mul $push30=, $pop29, $pop28 +; SIMD128-NEXT: i8x16.replace_lane $push31=, $pop27, 7, $pop30 +; SIMD128-NEXT: i8x16.extract_lane_u $push33=, $0, 8 +; SIMD128-NEXT: i8x16.extract_lane_u $push32=, $1, 8 +; SIMD128-NEXT: i32.mul $push34=, $pop33, $pop32 +; SIMD128-NEXT: i8x16.replace_lane $push35=, $pop31, 8, $pop34 +; SIMD128-NEXT: i8x16.extract_lane_u $push37=, $0, 9 +; SIMD128-NEXT: i8x16.extract_lane_u $push36=, $1, 9 +; SIMD128-NEXT: i32.mul $push38=, $pop37, $pop36 +; SIMD128-NEXT: i8x16.replace_lane $push39=, $pop35, 9, $pop38 +; SIMD128-NEXT: i8x16.extract_lane_u $push41=, $0, 10 +; SIMD128-NEXT: i8x16.extract_lane_u $push40=, $1, 10 +; SIMD128-NEXT: i32.mul $push42=, $pop41, $pop40 +; SIMD128-NEXT: i8x16.replace_lane $push43=, $pop39, 10, $pop42 +; SIMD128-NEXT: i8x16.extract_lane_u $push45=, $0, 11 +; SIMD128-NEXT: i8x16.extract_lane_u $push44=, $1, 11 +; SIMD128-NEXT: i32.mul $push46=, $pop45, $pop44 +; SIMD128-NEXT: i8x16.replace_lane $push47=, $pop43, 11, $pop46 +; SIMD128-NEXT: i8x16.extract_lane_u $push49=, $0, 12 +; SIMD128-NEXT: i8x16.extract_lane_u $push48=, $1, 12 +; SIMD128-NEXT: i32.mul $push50=, $pop49, $pop48 +; SIMD128-NEXT: i8x16.replace_lane $push51=, $pop47, 12, $pop50 +; SIMD128-NEXT: i8x16.extract_lane_u $push53=, $0, 13 +; SIMD128-NEXT: i8x16.extract_lane_u $push52=, $1, 13 +; SIMD128-NEXT: i32.mul $push54=, $pop53, $pop52 +; SIMD128-NEXT: i8x16.replace_lane $push55=, $pop51, 13, $pop54 +; SIMD128-NEXT: i8x16.extract_lane_u $push57=, $0, 14 +; SIMD128-NEXT: i8x16.extract_lane_u $push56=, $1, 14 +; SIMD128-NEXT: i32.mul $push58=, $pop57, $pop56 +; SIMD128-NEXT: i8x16.replace_lane $push59=, $pop55, 14, $pop58 +; SIMD128-NEXT: i8x16.extract_lane_u $push61=, $0, 15 +; SIMD128-NEXT: i8x16.extract_lane_u $push60=, $1, 15 +; SIMD128-NEXT: i32.mul $push62=, $pop61, $pop60 +; SIMD128-NEXT: i8x16.replace_lane $push63=, $pop59, 15, $pop62 +; SIMD128-NEXT: return $pop63 +; +; SIMD128-FAST-LABEL: mul_v16i8: +; SIMD128-FAST: .functype mul_v16i8 (v128, v128) -> (v128) +; SIMD128-FAST-NEXT: # %bb.0: +; SIMD128-FAST-NEXT: i8x16.extract_lane_u $push5=, $0, 0 +; SIMD128-FAST-NEXT: i8x16.extract_lane_u $push4=, $1, 0 +; SIMD128-FAST-NEXT: i32.mul $push6=, $pop5, $pop4 +; SIMD128-FAST-NEXT: i8x16.splat $push7=, $pop6 +; SIMD128-FAST-NEXT: i8x16.extract_lane_u $push2=, $0, 1 +; SIMD128-FAST-NEXT: i8x16.extract_lane_u $push1=, $1, 1 +; SIMD128-FAST-NEXT: i32.mul $push3=, $pop2, $pop1 +; SIMD128-FAST-NEXT: i8x16.replace_lane $push8=, $pop7, 1, $pop3 +; SIMD128-FAST-NEXT: i8x16.extract_lane_u $push10=, $0, 2 +; SIMD128-FAST-NEXT: i8x16.extract_lane_u $push9=, $1, 2 +; SIMD128-FAST-NEXT: i32.mul $push11=, $pop10, $pop9 +; SIMD128-FAST-NEXT: i8x16.replace_lane $push12=, $pop8, 2, $pop11 +; SIMD128-FAST-NEXT: i8x16.extract_lane_u $push14=, $0, 3 +; SIMD128-FAST-NEXT: i8x16.extract_lane_u $push13=, $1, 3 +; SIMD128-FAST-NEXT: i32.mul $push15=, $pop14, $pop13 +; SIMD128-FAST-NEXT: i8x16.replace_lane $push16=, $pop12, 3, $pop15 +; SIMD128-FAST-NEXT: i8x16.extract_lane_u $push18=, $0, 4 +; SIMD128-FAST-NEXT: i8x16.extract_lane_u $push17=, $1, 4 +; SIMD128-FAST-NEXT: i32.mul $push19=, $pop18, $pop17 +; SIMD128-FAST-NEXT: i8x16.replace_lane $push20=, $pop16, 4, $pop19 +; SIMD128-FAST-NEXT: i8x16.extract_lane_u $push22=, $0, 5 +; SIMD128-FAST-NEXT: i8x16.extract_lane_u $push21=, $1, 5 +; SIMD128-FAST-NEXT: i32.mul $push23=, $pop22, $pop21 +; SIMD128-FAST-NEXT: i8x16.replace_lane $push24=, $pop20, 5, $pop23 +; SIMD128-FAST-NEXT: i8x16.extract_lane_u $push26=, $0, 6 +; SIMD128-FAST-NEXT: i8x16.extract_lane_u $push25=, $1, 6 +; SIMD128-FAST-NEXT: i32.mul $push27=, $pop26, $pop25 +; SIMD128-FAST-NEXT: i8x16.replace_lane $push28=, $pop24, 6, $pop27 +; SIMD128-FAST-NEXT: i8x16.extract_lane_u $push30=, $0, 7 +; SIMD128-FAST-NEXT: i8x16.extract_lane_u $push29=, $1, 7 +; SIMD128-FAST-NEXT: i32.mul $push31=, $pop30, $pop29 +; SIMD128-FAST-NEXT: i8x16.replace_lane $push32=, $pop28, 7, $pop31 +; SIMD128-FAST-NEXT: i8x16.extract_lane_u $push34=, $0, 8 +; SIMD128-FAST-NEXT: i8x16.extract_lane_u $push33=, $1, 8 +; SIMD128-FAST-NEXT: i32.mul $push35=, $pop34, $pop33 +; SIMD128-FAST-NEXT: i8x16.replace_lane $push36=, $pop32, 8, $pop35 +; SIMD128-FAST-NEXT: i8x16.extract_lane_u $push38=, $0, 9 +; SIMD128-FAST-NEXT: i8x16.extract_lane_u $push37=, $1, 9 +; SIMD128-FAST-NEXT: i32.mul $push39=, $pop38, $pop37 +; SIMD128-FAST-NEXT: i8x16.replace_lane $push40=, $pop36, 9, $pop39 +; SIMD128-FAST-NEXT: i8x16.extract_lane_u $push42=, $0, 10 +; SIMD128-FAST-NEXT: i8x16.extract_lane_u $push41=, $1, 10 +; SIMD128-FAST-NEXT: i32.mul $push43=, $pop42, $pop41 +; SIMD128-FAST-NEXT: i8x16.replace_lane $push44=, $pop40, 10, $pop43 +; SIMD128-FAST-NEXT: i8x16.extract_lane_u $push46=, $0, 11 +; SIMD128-FAST-NEXT: i8x16.extract_lane_u $push45=, $1, 11 +; SIMD128-FAST-NEXT: i32.mul $push47=, $pop46, $pop45 +; SIMD128-FAST-NEXT: i8x16.replace_lane $push48=, $pop44, 11, $pop47 +; SIMD128-FAST-NEXT: i8x16.extract_lane_u $push50=, $0, 12 +; SIMD128-FAST-NEXT: i8x16.extract_lane_u $push49=, $1, 12 +; SIMD128-FAST-NEXT: i32.mul $push51=, $pop50, $pop49 +; SIMD128-FAST-NEXT: i8x16.replace_lane $push52=, $pop48, 12, $pop51 +; SIMD128-FAST-NEXT: i8x16.extract_lane_u $push54=, $0, 13 +; SIMD128-FAST-NEXT: i8x16.extract_lane_u $push53=, $1, 13 +; SIMD128-FAST-NEXT: i32.mul $push55=, $pop54, $pop53 +; SIMD128-FAST-NEXT: i8x16.replace_lane $push56=, $pop52, 13, $pop55 +; SIMD128-FAST-NEXT: i8x16.extract_lane_u $push58=, $0, 14 +; SIMD128-FAST-NEXT: i8x16.extract_lane_u $push57=, $1, 14 +; SIMD128-FAST-NEXT: i32.mul $push59=, $pop58, $pop57 +; SIMD128-FAST-NEXT: i8x16.replace_lane $push60=, $pop56, 14, $pop59 +; SIMD128-FAST-NEXT: i8x16.extract_lane_u $push62=, $0, 15 +; SIMD128-FAST-NEXT: i8x16.extract_lane_u $push61=, $1, 15 +; SIMD128-FAST-NEXT: i32.mul $push63=, $pop62, $pop61 +; SIMD128-FAST-NEXT: i8x16.replace_lane $push0=, $pop60, 15, $pop63 +; SIMD128-FAST-NEXT: return $pop0 +; +; NO-SIMD128-LABEL: mul_v16i8: +; NO-SIMD128: .functype mul_v16i8 (i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32) -> () +; NO-SIMD128-NEXT: # %bb.0: +; NO-SIMD128-NEXT: i32.mul $push0=, $9, $25 +; NO-SIMD128-NEXT: i32.store8 8($0), $pop0 +; NO-SIMD128-NEXT: i32.mul $push1=, $5, $21 +; NO-SIMD128-NEXT: i32.store8 4($0), $pop1 +; NO-SIMD128-NEXT: i32.mul $push2=, $3, $19 +; NO-SIMD128-NEXT: i32.store8 2($0), $pop2 +; NO-SIMD128-NEXT: i32.mul $push3=, $2, $18 +; NO-SIMD128-NEXT: i32.store8 1($0), $pop3 +; NO-SIMD128-NEXT: i32.mul $push4=, $1, $17 +; NO-SIMD128-NEXT: i32.store8 0($0), $pop4 +; NO-SIMD128-NEXT: i32.const $push6=, 15 +; NO-SIMD128-NEXT: i32.add $push7=, $0, $pop6 +; NO-SIMD128-NEXT: i32.mul $push5=, $16, $32 +; NO-SIMD128-NEXT: i32.store8 0($pop7), $pop5 +; NO-SIMD128-NEXT: i32.const $push9=, 14 +; NO-SIMD128-NEXT: i32.add $push10=, $0, $pop9 +; NO-SIMD128-NEXT: i32.mul $push8=, $15, $31 +; NO-SIMD128-NEXT: i32.store8 0($pop10), $pop8 +; NO-SIMD128-NEXT: i32.const $push12=, 13 +; NO-SIMD128-NEXT: i32.add $push13=, $0, $pop12 +; NO-SIMD128-NEXT: i32.mul $push11=, $14, $30 +; NO-SIMD128-NEXT: i32.store8 0($pop13), $pop11 +; NO-SIMD128-NEXT: i32.const $push15=, 12 +; NO-SIMD128-NEXT: i32.add $push16=, $0, $pop15 +; NO-SIMD128-NEXT: i32.mul $push14=, $13, $29 +; NO-SIMD128-NEXT: i32.store8 0($pop16), $pop14 +; NO-SIMD128-NEXT: i32.const $push18=, 11 +; NO-SIMD128-NEXT: i32.add $push19=, $0, $pop18 +; NO-SIMD128-NEXT: i32.mul $push17=, $12, $28 +; NO-SIMD128-NEXT: i32.store8 0($pop19), $pop17 +; NO-SIMD128-NEXT: i32.const $push21=, 10 +; NO-SIMD128-NEXT: i32.add $push22=, $0, $pop21 +; NO-SIMD128-NEXT: i32.mul $push20=, $11, $27 +; NO-SIMD128-NEXT: i32.store8 0($pop22), $pop20 +; NO-SIMD128-NEXT: i32.const $push24=, 9 +; NO-SIMD128-NEXT: i32.add $push25=, $0, $pop24 +; NO-SIMD128-NEXT: i32.mul $push23=, $10, $26 +; NO-SIMD128-NEXT: i32.store8 0($pop25), $pop23 +; NO-SIMD128-NEXT: i32.const $push27=, 7 +; NO-SIMD128-NEXT: i32.add $push28=, $0, $pop27 +; NO-SIMD128-NEXT: i32.mul $push26=, $8, $24 +; NO-SIMD128-NEXT: i32.store8 0($pop28), $pop26 +; NO-SIMD128-NEXT: i32.const $push30=, 6 +; NO-SIMD128-NEXT: i32.add $push31=, $0, $pop30 +; NO-SIMD128-NEXT: i32.mul $push29=, $7, $23 +; NO-SIMD128-NEXT: i32.store8 0($pop31), $pop29 +; NO-SIMD128-NEXT: i32.const $push33=, 5 +; NO-SIMD128-NEXT: i32.add $push34=, $0, $pop33 +; NO-SIMD128-NEXT: i32.mul $push32=, $6, $22 +; NO-SIMD128-NEXT: i32.store8 0($pop34), $pop32 +; NO-SIMD128-NEXT: i32.const $push36=, 3 +; NO-SIMD128-NEXT: i32.add $push37=, $0, $pop36 +; NO-SIMD128-NEXT: i32.mul $push35=, $4, $20 +; NO-SIMD128-NEXT: i32.store8 0($pop37), $pop35 +; NO-SIMD128-NEXT: return +; +; NO-SIMD128-FAST-LABEL: mul_v16i8: +; NO-SIMD128-FAST: .functype mul_v16i8 (i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32) -> () +; NO-SIMD128-FAST-NEXT: # %bb.0: +; NO-SIMD128-FAST-NEXT: i32.mul $push0=, $1, $17 +; NO-SIMD128-FAST-NEXT: i32.store8 0($0), $pop0 +; NO-SIMD128-FAST-NEXT: i32.mul $push1=, $2, $18 +; NO-SIMD128-FAST-NEXT: i32.store8 1($0), $pop1 +; NO-SIMD128-FAST-NEXT: i32.mul $push2=, $3, $19 +; NO-SIMD128-FAST-NEXT: i32.store8 2($0), $pop2 +; NO-SIMD128-FAST-NEXT: i32.const $push3=, 3 +; NO-SIMD128-FAST-NEXT: i32.add $push4=, $0, $pop3 +; NO-SIMD128-FAST-NEXT: i32.mul $push5=, $4, $20 +; NO-SIMD128-FAST-NEXT: i32.store8 0($pop4), $pop5 +; NO-SIMD128-FAST-NEXT: i32.mul $push6=, $5, $21 +; NO-SIMD128-FAST-NEXT: i32.store8 4($0), $pop6 +; NO-SIMD128-FAST-NEXT: i32.const $push7=, 5 +; NO-SIMD128-FAST-NEXT: i32.add $push8=, $0, $pop7 +; NO-SIMD128-FAST-NEXT: i32.mul $push9=, $6, $22 +; NO-SIMD128-FAST-NEXT: i32.store8 0($pop8), $pop9 +; NO-SIMD128-FAST-NEXT: i32.const $push10=, 6 +; NO-SIMD128-FAST-NEXT: i32.add $push11=, $0, $pop10 +; NO-SIMD128-FAST-NEXT: i32.mul $push12=, $7, $23 +; NO-SIMD128-FAST-NEXT: i32.store8 0($pop11), $pop12 +; NO-SIMD128-FAST-NEXT: i32.const $push13=, 7 +; NO-SIMD128-FAST-NEXT: i32.add $push14=, $0, $pop13 +; NO-SIMD128-FAST-NEXT: i32.mul $push15=, $8, $24 +; NO-SIMD128-FAST-NEXT: i32.store8 0($pop14), $pop15 +; NO-SIMD128-FAST-NEXT: i32.mul $push16=, $9, $25 +; NO-SIMD128-FAST-NEXT: i32.store8 8($0), $pop16 +; NO-SIMD128-FAST-NEXT: i32.const $push17=, 9 +; NO-SIMD128-FAST-NEXT: i32.add $push18=, $0, $pop17 +; NO-SIMD128-FAST-NEXT: i32.mul $push19=, $10, $26 +; NO-SIMD128-FAST-NEXT: i32.store8 0($pop18), $pop19 +; NO-SIMD128-FAST-NEXT: i32.const $push20=, 10 +; NO-SIMD128-FAST-NEXT: i32.add $push21=, $0, $pop20 +; NO-SIMD128-FAST-NEXT: i32.mul $push22=, $11, $27 +; NO-SIMD128-FAST-NEXT: i32.store8 0($pop21), $pop22 +; NO-SIMD128-FAST-NEXT: i32.const $push23=, 11 +; NO-SIMD128-FAST-NEXT: i32.add $push24=, $0, $pop23 +; NO-SIMD128-FAST-NEXT: i32.mul $push25=, $12, $28 +; NO-SIMD128-FAST-NEXT: i32.store8 0($pop24), $pop25 +; NO-SIMD128-FAST-NEXT: i32.const $push26=, 12 +; NO-SIMD128-FAST-NEXT: i32.add $push27=, $0, $pop26 +; NO-SIMD128-FAST-NEXT: i32.mul $push28=, $13, $29 +; NO-SIMD128-FAST-NEXT: i32.store8 0($pop27), $pop28 +; NO-SIMD128-FAST-NEXT: i32.const $push29=, 13 +; NO-SIMD128-FAST-NEXT: i32.add $push30=, $0, $pop29 +; NO-SIMD128-FAST-NEXT: i32.mul $push31=, $14, $30 +; NO-SIMD128-FAST-NEXT: i32.store8 0($pop30), $pop31 +; NO-SIMD128-FAST-NEXT: i32.const $push32=, 14 +; NO-SIMD128-FAST-NEXT: i32.add $push33=, $0, $pop32 +; NO-SIMD128-FAST-NEXT: i32.mul $push34=, $15, $31 +; NO-SIMD128-FAST-NEXT: i32.store8 0($pop33), $pop34 +; NO-SIMD128-FAST-NEXT: i32.const $push35=, 15 +; NO-SIMD128-FAST-NEXT: i32.add $push36=, $0, $pop35 +; NO-SIMD128-FAST-NEXT: i32.mul $push37=, $16, $32 +; NO-SIMD128-FAST-NEXT: i32.store8 0($pop36), $pop37 +; NO-SIMD128-FAST-NEXT: return %a = mul <16 x i8> %x, %y ret <16 x i8> %a } -; CHECK-LABEL: min_s_v16i8: -; NO-SIMD128-NOT: i8x16 -; SIMD128-NEXT: .functype min_s_v16i8 (v128, v128) -> (v128){{$}} -; SIMD128-NEXT: i8x16.min_s $push[[R:[0-9]+]]=, $0, $1{{$}} -; SIMD128-NEXT: return $pop[[R]]{{$}} define <16 x i8> @min_s_v16i8(<16 x i8> %x, <16 x i8> %y) { +; SIMD128-LABEL: min_s_v16i8: +; SIMD128: .functype min_s_v16i8 (v128, v128) -> (v128) +; SIMD128-NEXT: # %bb.0: +; SIMD128-NEXT: i8x16.min_s $push0=, $0, $1 +; SIMD128-NEXT: return $pop0 +; +; SIMD128-FAST-LABEL: min_s_v16i8: +; SIMD128-FAST: .functype min_s_v16i8 (v128, v128) -> (v128) +; SIMD128-FAST-NEXT: # %bb.0: +; SIMD128-FAST-NEXT: i8x16.min_s $push0=, $0, $1 +; SIMD128-FAST-NEXT: return $pop0 +; +; NO-SIMD128-LABEL: min_s_v16i8: +; NO-SIMD128: .functype min_s_v16i8 (i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32) -> () +; NO-SIMD128-NEXT: # %bb.0: +; NO-SIMD128-NEXT: i32.const $push4=, 15 +; NO-SIMD128-NEXT: i32.add $push5=, $0, $pop4 +; NO-SIMD128-NEXT: i32.extend8_s $push1=, $16 +; NO-SIMD128-NEXT: i32.extend8_s $push0=, $32 +; NO-SIMD128-NEXT: i32.lt_s $push2=, $pop1, $pop0 +; NO-SIMD128-NEXT: i32.select $push3=, $16, $32, $pop2 +; NO-SIMD128-NEXT: i32.store8 0($pop5), $pop3 +; NO-SIMD128-NEXT: i32.const $push10=, 14 +; NO-SIMD128-NEXT: i32.add $push11=, $0, $pop10 +; NO-SIMD128-NEXT: i32.extend8_s $push7=, $15 +; NO-SIMD128-NEXT: i32.extend8_s $push6=, $31 +; NO-SIMD128-NEXT: i32.lt_s $push8=, $pop7, $pop6 +; NO-SIMD128-NEXT: i32.select $push9=, $15, $31, $pop8 +; NO-SIMD128-NEXT: i32.store8 0($pop11), $pop9 +; NO-SIMD128-NEXT: i32.const $push16=, 13 +; NO-SIMD128-NEXT: i32.add $push17=, $0, $pop16 +; NO-SIMD128-NEXT: i32.extend8_s $push13=, $14 +; NO-SIMD128-NEXT: i32.extend8_s $push12=, $30 +; NO-SIMD128-NEXT: i32.lt_s $push14=, $pop13, $pop12 +; NO-SIMD128-NEXT: i32.select $push15=, $14, $30, $pop14 +; NO-SIMD128-NEXT: i32.store8 0($pop17), $pop15 +; NO-SIMD128-NEXT: i32.const $push22=, 12 +; NO-SIMD128-NEXT: i32.add $push23=, $0, $pop22 +; NO-SIMD128-NEXT: i32.extend8_s $push19=, $13 +; NO-SIMD128-NEXT: i32.extend8_s $push18=, $29 +; NO-SIMD128-NEXT: i32.lt_s $push20=, $pop19, $pop18 +; NO-SIMD128-NEXT: i32.select $push21=, $13, $29, $pop20 +; NO-SIMD128-NEXT: i32.store8 0($pop23), $pop21 +; NO-SIMD128-NEXT: i32.const $push28=, 11 +; NO-SIMD128-NEXT: i32.add $push29=, $0, $pop28 +; NO-SIMD128-NEXT: i32.extend8_s $push25=, $12 +; NO-SIMD128-NEXT: i32.extend8_s $push24=, $28 +; NO-SIMD128-NEXT: i32.lt_s $push26=, $pop25, $pop24 +; NO-SIMD128-NEXT: i32.select $push27=, $12, $28, $pop26 +; NO-SIMD128-NEXT: i32.store8 0($pop29), $pop27 +; NO-SIMD128-NEXT: i32.const $push34=, 10 +; NO-SIMD128-NEXT: i32.add $push35=, $0, $pop34 +; NO-SIMD128-NEXT: i32.extend8_s $push31=, $11 +; NO-SIMD128-NEXT: i32.extend8_s $push30=, $27 +; NO-SIMD128-NEXT: i32.lt_s $push32=, $pop31, $pop30 +; NO-SIMD128-NEXT: i32.select $push33=, $11, $27, $pop32 +; NO-SIMD128-NEXT: i32.store8 0($pop35), $pop33 +; NO-SIMD128-NEXT: i32.const $push40=, 9 +; NO-SIMD128-NEXT: i32.add $push41=, $0, $pop40 +; NO-SIMD128-NEXT: i32.extend8_s $push37=, $10 +; NO-SIMD128-NEXT: i32.extend8_s $push36=, $26 +; NO-SIMD128-NEXT: i32.lt_s $push38=, $pop37, $pop36 +; NO-SIMD128-NEXT: i32.select $push39=, $10, $26, $pop38 +; NO-SIMD128-NEXT: i32.store8 0($pop41), $pop39 +; NO-SIMD128-NEXT: i32.extend8_s $push43=, $9 +; NO-SIMD128-NEXT: i32.extend8_s $push42=, $25 +; NO-SIMD128-NEXT: i32.lt_s $push44=, $pop43, $pop42 +; NO-SIMD128-NEXT: i32.select $push45=, $9, $25, $pop44 +; NO-SIMD128-NEXT: i32.store8 8($0), $pop45 +; NO-SIMD128-NEXT: i32.const $push50=, 7 +; NO-SIMD128-NEXT: i32.add $push51=, $0, $pop50 +; NO-SIMD128-NEXT: i32.extend8_s $push47=, $8 +; NO-SIMD128-NEXT: i32.extend8_s $push46=, $24 +; NO-SIMD128-NEXT: i32.lt_s $push48=, $pop47, $pop46 +; NO-SIMD128-NEXT: i32.select $push49=, $8, $24, $pop48 +; NO-SIMD128-NEXT: i32.store8 0($pop51), $pop49 +; NO-SIMD128-NEXT: i32.const $push56=, 6 +; NO-SIMD128-NEXT: i32.add $push57=, $0, $pop56 +; NO-SIMD128-NEXT: i32.extend8_s $push53=, $7 +; NO-SIMD128-NEXT: i32.extend8_s $push52=, $23 +; NO-SIMD128-NEXT: i32.lt_s $push54=, $pop53, $pop52 +; NO-SIMD128-NEXT: i32.select $push55=, $7, $23, $pop54 +; NO-SIMD128-NEXT: i32.store8 0($pop57), $pop55 +; NO-SIMD128-NEXT: i32.const $push62=, 5 +; NO-SIMD128-NEXT: i32.add $push63=, $0, $pop62 +; NO-SIMD128-NEXT: i32.extend8_s $push59=, $6 +; NO-SIMD128-NEXT: i32.extend8_s $push58=, $22 +; NO-SIMD128-NEXT: i32.lt_s $push60=, $pop59, $pop58 +; NO-SIMD128-NEXT: i32.select $push61=, $6, $22, $pop60 +; NO-SIMD128-NEXT: i32.store8 0($pop63), $pop61 +; NO-SIMD128-NEXT: i32.extend8_s $push65=, $5 +; NO-SIMD128-NEXT: i32.extend8_s $push64=, $21 +; NO-SIMD128-NEXT: i32.lt_s $push66=, $pop65, $pop64 +; NO-SIMD128-NEXT: i32.select $push67=, $5, $21, $pop66 +; NO-SIMD128-NEXT: i32.store8 4($0), $pop67 +; NO-SIMD128-NEXT: i32.const $push72=, 3 +; NO-SIMD128-NEXT: i32.add $push73=, $0, $pop72 +; NO-SIMD128-NEXT: i32.extend8_s $push69=, $4 +; NO-SIMD128-NEXT: i32.extend8_s $push68=, $20 +; NO-SIMD128-NEXT: i32.lt_s $push70=, $pop69, $pop68 +; NO-SIMD128-NEXT: i32.select $push71=, $4, $20, $pop70 +; NO-SIMD128-NEXT: i32.store8 0($pop73), $pop71 +; NO-SIMD128-NEXT: i32.extend8_s $push75=, $3 +; NO-SIMD128-NEXT: i32.extend8_s $push74=, $19 +; NO-SIMD128-NEXT: i32.lt_s $push76=, $pop75, $pop74 +; NO-SIMD128-NEXT: i32.select $push77=, $3, $19, $pop76 +; NO-SIMD128-NEXT: i32.store8 2($0), $pop77 +; NO-SIMD128-NEXT: i32.extend8_s $push79=, $2 +; NO-SIMD128-NEXT: i32.extend8_s $push78=, $18 +; NO-SIMD128-NEXT: i32.lt_s $push80=, $pop79, $pop78 +; NO-SIMD128-NEXT: i32.select $push81=, $2, $18, $pop80 +; NO-SIMD128-NEXT: i32.store8 1($0), $pop81 +; NO-SIMD128-NEXT: i32.extend8_s $push83=, $1 +; NO-SIMD128-NEXT: i32.extend8_s $push82=, $17 +; NO-SIMD128-NEXT: i32.lt_s $push84=, $pop83, $pop82 +; NO-SIMD128-NEXT: i32.select $push85=, $1, $17, $pop84 +; NO-SIMD128-NEXT: i32.store8 0($0), $pop85 +; NO-SIMD128-NEXT: return +; +; NO-SIMD128-FAST-LABEL: min_s_v16i8: +; NO-SIMD128-FAST: .functype min_s_v16i8 (i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32) -> () +; NO-SIMD128-FAST-NEXT: # %bb.0: +; NO-SIMD128-FAST-NEXT: i32.extend8_s $push1=, $1 +; NO-SIMD128-FAST-NEXT: i32.extend8_s $push0=, $17 +; NO-SIMD128-FAST-NEXT: i32.lt_s $push2=, $pop1, $pop0 +; NO-SIMD128-FAST-NEXT: i32.select $push3=, $1, $17, $pop2 +; NO-SIMD128-FAST-NEXT: i32.store8 0($0), $pop3 +; NO-SIMD128-FAST-NEXT: i32.extend8_s $push5=, $2 +; NO-SIMD128-FAST-NEXT: i32.extend8_s $push4=, $18 +; NO-SIMD128-FAST-NEXT: i32.lt_s $push6=, $pop5, $pop4 +; NO-SIMD128-FAST-NEXT: i32.select $push7=, $2, $18, $pop6 +; NO-SIMD128-FAST-NEXT: i32.store8 1($0), $pop7 +; NO-SIMD128-FAST-NEXT: i32.extend8_s $push9=, $3 +; NO-SIMD128-FAST-NEXT: i32.extend8_s $push8=, $19 +; NO-SIMD128-FAST-NEXT: i32.lt_s $push10=, $pop9, $pop8 +; NO-SIMD128-FAST-NEXT: i32.select $push11=, $3, $19, $pop10 +; NO-SIMD128-FAST-NEXT: i32.store8 2($0), $pop11 +; NO-SIMD128-FAST-NEXT: i32.const $push16=, 3 +; NO-SIMD128-FAST-NEXT: i32.add $push17=, $0, $pop16 +; NO-SIMD128-FAST-NEXT: i32.extend8_s $push13=, $4 +; NO-SIMD128-FAST-NEXT: i32.extend8_s $push12=, $20 +; NO-SIMD128-FAST-NEXT: i32.lt_s $push14=, $pop13, $pop12 +; NO-SIMD128-FAST-NEXT: i32.select $push15=, $4, $20, $pop14 +; NO-SIMD128-FAST-NEXT: i32.store8 0($pop17), $pop15 +; NO-SIMD128-FAST-NEXT: i32.extend8_s $push19=, $5 +; NO-SIMD128-FAST-NEXT: i32.extend8_s $push18=, $21 +; NO-SIMD128-FAST-NEXT: i32.lt_s $push20=, $pop19, $pop18 +; NO-SIMD128-FAST-NEXT: i32.select $push21=, $5, $21, $pop20 +; NO-SIMD128-FAST-NEXT: i32.store8 4($0), $pop21 +; NO-SIMD128-FAST-NEXT: i32.const $push26=, 5 +; NO-SIMD128-FAST-NEXT: i32.add $push27=, $0, $pop26 +; NO-SIMD128-FAST-NEXT: i32.extend8_s $push23=, $6 +; NO-SIMD128-FAST-NEXT: i32.extend8_s $push22=, $22 +; NO-SIMD128-FAST-NEXT: i32.lt_s $push24=, $pop23, $pop22 +; NO-SIMD128-FAST-NEXT: i32.select $push25=, $6, $22, $pop24 +; NO-SIMD128-FAST-NEXT: i32.store8 0($pop27), $pop25 +; NO-SIMD128-FAST-NEXT: i32.const $push32=, 6 +; NO-SIMD128-FAST-NEXT: i32.add $push33=, $0, $pop32 +; NO-SIMD128-FAST-NEXT: i32.extend8_s $push29=, $7 +; NO-SIMD128-FAST-NEXT: i32.extend8_s $push28=, $23 +; NO-SIMD128-FAST-NEXT: i32.lt_s $push30=, $pop29, $pop28 +; NO-SIMD128-FAST-NEXT: i32.select $push31=, $7, $23, $pop30 +; NO-SIMD128-FAST-NEXT: i32.store8 0($pop33), $pop31 +; NO-SIMD128-FAST-NEXT: i32.const $push38=, 7 +; NO-SIMD128-FAST-NEXT: i32.add $push39=, $0, $pop38 +; NO-SIMD128-FAST-NEXT: i32.extend8_s $push35=, $8 +; NO-SIMD128-FAST-NEXT: i32.extend8_s $push34=, $24 +; NO-SIMD128-FAST-NEXT: i32.lt_s $push36=, $pop35, $pop34 +; NO-SIMD128-FAST-NEXT: i32.select $push37=, $8, $24, $pop36 +; NO-SIMD128-FAST-NEXT: i32.store8 0($pop39), $pop37 +; NO-SIMD128-FAST-NEXT: i32.extend8_s $push41=, $9 +; NO-SIMD128-FAST-NEXT: i32.extend8_s $push40=, $25 +; NO-SIMD128-FAST-NEXT: i32.lt_s $push42=, $pop41, $pop40 +; NO-SIMD128-FAST-NEXT: i32.select $push43=, $9, $25, $pop42 +; NO-SIMD128-FAST-NEXT: i32.store8 8($0), $pop43 +; NO-SIMD128-FAST-NEXT: i32.const $push48=, 9 +; NO-SIMD128-FAST-NEXT: i32.add $push49=, $0, $pop48 +; NO-SIMD128-FAST-NEXT: i32.extend8_s $push45=, $10 +; NO-SIMD128-FAST-NEXT: i32.extend8_s $push44=, $26 +; NO-SIMD128-FAST-NEXT: i32.lt_s $push46=, $pop45, $pop44 +; NO-SIMD128-FAST-NEXT: i32.select $push47=, $10, $26, $pop46 +; NO-SIMD128-FAST-NEXT: i32.store8 0($pop49), $pop47 +; NO-SIMD128-FAST-NEXT: i32.const $push54=, 10 +; NO-SIMD128-FAST-NEXT: i32.add $push55=, $0, $pop54 +; NO-SIMD128-FAST-NEXT: i32.extend8_s $push51=, $11 +; NO-SIMD128-FAST-NEXT: i32.extend8_s $push50=, $27 +; NO-SIMD128-FAST-NEXT: i32.lt_s $push52=, $pop51, $pop50 +; NO-SIMD128-FAST-NEXT: i32.select $push53=, $11, $27, $pop52 +; NO-SIMD128-FAST-NEXT: i32.store8 0($pop55), $pop53 +; NO-SIMD128-FAST-NEXT: i32.const $push60=, 11 +; NO-SIMD128-FAST-NEXT: i32.add $push61=, $0, $pop60 +; NO-SIMD128-FAST-NEXT: i32.extend8_s $push57=, $12 +; NO-SIMD128-FAST-NEXT: i32.extend8_s $push56=, $28 +; NO-SIMD128-FAST-NEXT: i32.lt_s $push58=, $pop57, $pop56 +; NO-SIMD128-FAST-NEXT: i32.select $push59=, $12, $28, $pop58 +; NO-SIMD128-FAST-NEXT: i32.store8 0($pop61), $pop59 +; NO-SIMD128-FAST-NEXT: i32.const $push66=, 12 +; NO-SIMD128-FAST-NEXT: i32.add $push67=, $0, $pop66 +; NO-SIMD128-FAST-NEXT: i32.extend8_s $push63=, $13 +; NO-SIMD128-FAST-NEXT: i32.extend8_s $push62=, $29 +; NO-SIMD128-FAST-NEXT: i32.lt_s $push64=, $pop63, $pop62 +; NO-SIMD128-FAST-NEXT: i32.select $push65=, $13, $29, $pop64 +; NO-SIMD128-FAST-NEXT: i32.store8 0($pop67), $pop65 +; NO-SIMD128-FAST-NEXT: i32.const $push72=, 13 +; NO-SIMD128-FAST-NEXT: i32.add $push73=, $0, $pop72 +; NO-SIMD128-FAST-NEXT: i32.extend8_s $push69=, $14 +; NO-SIMD128-FAST-NEXT: i32.extend8_s $push68=, $30 +; NO-SIMD128-FAST-NEXT: i32.lt_s $push70=, $pop69, $pop68 +; NO-SIMD128-FAST-NEXT: i32.select $push71=, $14, $30, $pop70 +; NO-SIMD128-FAST-NEXT: i32.store8 0($pop73), $pop71 +; NO-SIMD128-FAST-NEXT: i32.const $push78=, 14 +; NO-SIMD128-FAST-NEXT: i32.add $push79=, $0, $pop78 +; NO-SIMD128-FAST-NEXT: i32.extend8_s $push75=, $15 +; NO-SIMD128-FAST-NEXT: i32.extend8_s $push74=, $31 +; NO-SIMD128-FAST-NEXT: i32.lt_s $push76=, $pop75, $pop74 +; NO-SIMD128-FAST-NEXT: i32.select $push77=, $15, $31, $pop76 +; NO-SIMD128-FAST-NEXT: i32.store8 0($pop79), $pop77 +; NO-SIMD128-FAST-NEXT: i32.const $push84=, 15 +; NO-SIMD128-FAST-NEXT: i32.add $push85=, $0, $pop84 +; NO-SIMD128-FAST-NEXT: i32.extend8_s $push81=, $16 +; NO-SIMD128-FAST-NEXT: i32.extend8_s $push80=, $32 +; NO-SIMD128-FAST-NEXT: i32.lt_s $push82=, $pop81, $pop80 +; NO-SIMD128-FAST-NEXT: i32.select $push83=, $16, $32, $pop82 +; NO-SIMD128-FAST-NEXT: i32.store8 0($pop85), $pop83 +; NO-SIMD128-FAST-NEXT: return %c = icmp slt <16 x i8> %x, %y %a = select <16 x i1> %c, <16 x i8> %x, <16 x i8> %y ret <16 x i8> %a } -; CHECK-LABEL: min_u_v16i8: -; NO-SIMD128-NOT: i8x16 -; SIMD128-NEXT: .functype min_u_v16i8 (v128, v128) -> (v128){{$}} -; SIMD128-NEXT: i8x16.min_u $push[[R:[0-9]+]]=, $0, $1{{$}} -; SIMD128-NEXT: return $pop[[R]]{{$}} define <16 x i8> @min_u_v16i8(<16 x i8> %x, <16 x i8> %y) { +; SIMD128-LABEL: min_u_v16i8: +; SIMD128: .functype min_u_v16i8 (v128, v128) -> (v128) +; SIMD128-NEXT: # %bb.0: +; SIMD128-NEXT: i8x16.min_u $push0=, $0, $1 +; SIMD128-NEXT: return $pop0 +; +; SIMD128-FAST-LABEL: min_u_v16i8: +; SIMD128-FAST: .functype min_u_v16i8 (v128, v128) -> (v128) +; SIMD128-FAST-NEXT: # %bb.0: +; SIMD128-FAST-NEXT: i8x16.min_u $push0=, $0, $1 +; SIMD128-FAST-NEXT: return $pop0 +; +; NO-SIMD128-LABEL: min_u_v16i8: +; NO-SIMD128: .functype min_u_v16i8 (i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32) -> () +; NO-SIMD128-NEXT: # %bb.0: +; NO-SIMD128-NEXT: i32.const $push5=, 15 +; NO-SIMD128-NEXT: i32.add $push6=, $0, $pop5 +; NO-SIMD128-NEXT: i32.const $push0=, 255 +; NO-SIMD128-NEXT: i32.and $push2=, $16, $pop0 +; NO-SIMD128-NEXT: i32.const $push117=, 255 +; NO-SIMD128-NEXT: i32.and $push1=, $32, $pop117 +; NO-SIMD128-NEXT: i32.lt_u $push3=, $pop2, $pop1 +; NO-SIMD128-NEXT: i32.select $push4=, $16, $32, $pop3 +; NO-SIMD128-NEXT: i32.store8 0($pop6), $pop4 +; NO-SIMD128-NEXT: i32.const $push11=, 14 +; NO-SIMD128-NEXT: i32.add $push12=, $0, $pop11 +; NO-SIMD128-NEXT: i32.const $push116=, 255 +; NO-SIMD128-NEXT: i32.and $push8=, $15, $pop116 +; NO-SIMD128-NEXT: i32.const $push115=, 255 +; NO-SIMD128-NEXT: i32.and $push7=, $31, $pop115 +; NO-SIMD128-NEXT: i32.lt_u $push9=, $pop8, $pop7 +; NO-SIMD128-NEXT: i32.select $push10=, $15, $31, $pop9 +; NO-SIMD128-NEXT: i32.store8 0($pop12), $pop10 +; NO-SIMD128-NEXT: i32.const $push17=, 13 +; NO-SIMD128-NEXT: i32.add $push18=, $0, $pop17 +; NO-SIMD128-NEXT: i32.const $push114=, 255 +; NO-SIMD128-NEXT: i32.and $push14=, $14, $pop114 +; NO-SIMD128-NEXT: i32.const $push113=, 255 +; NO-SIMD128-NEXT: i32.and $push13=, $30, $pop113 +; NO-SIMD128-NEXT: i32.lt_u $push15=, $pop14, $pop13 +; NO-SIMD128-NEXT: i32.select $push16=, $14, $30, $pop15 +; NO-SIMD128-NEXT: i32.store8 0($pop18), $pop16 +; NO-SIMD128-NEXT: i32.const $push23=, 12 +; NO-SIMD128-NEXT: i32.add $push24=, $0, $pop23 +; NO-SIMD128-NEXT: i32.const $push112=, 255 +; NO-SIMD128-NEXT: i32.and $push20=, $13, $pop112 +; NO-SIMD128-NEXT: i32.const $push111=, 255 +; NO-SIMD128-NEXT: i32.and $push19=, $29, $pop111 +; NO-SIMD128-NEXT: i32.lt_u $push21=, $pop20, $pop19 +; NO-SIMD128-NEXT: i32.select $push22=, $13, $29, $pop21 +; NO-SIMD128-NEXT: i32.store8 0($pop24), $pop22 +; NO-SIMD128-NEXT: i32.const $push29=, 11 +; NO-SIMD128-NEXT: i32.add $push30=, $0, $pop29 +; NO-SIMD128-NEXT: i32.const $push110=, 255 +; NO-SIMD128-NEXT: i32.and $push26=, $12, $pop110 +; NO-SIMD128-NEXT: i32.const $push109=, 255 +; NO-SIMD128-NEXT: i32.and $push25=, $28, $pop109 +; NO-SIMD128-NEXT: i32.lt_u $push27=, $pop26, $pop25 +; NO-SIMD128-NEXT: i32.select $push28=, $12, $28, $pop27 +; NO-SIMD128-NEXT: i32.store8 0($pop30), $pop28 +; NO-SIMD128-NEXT: i32.const $push35=, 10 +; NO-SIMD128-NEXT: i32.add $push36=, $0, $pop35 +; NO-SIMD128-NEXT: i32.const $push108=, 255 +; NO-SIMD128-NEXT: i32.and $push32=, $11, $pop108 +; NO-SIMD128-NEXT: i32.const $push107=, 255 +; NO-SIMD128-NEXT: i32.and $push31=, $27, $pop107 +; NO-SIMD128-NEXT: i32.lt_u $push33=, $pop32, $pop31 +; NO-SIMD128-NEXT: i32.select $push34=, $11, $27, $pop33 +; NO-SIMD128-NEXT: i32.store8 0($pop36), $pop34 +; NO-SIMD128-NEXT: i32.const $push41=, 9 +; NO-SIMD128-NEXT: i32.add $push42=, $0, $pop41 +; NO-SIMD128-NEXT: i32.const $push106=, 255 +; NO-SIMD128-NEXT: i32.and $push38=, $10, $pop106 +; NO-SIMD128-NEXT: i32.const $push105=, 255 +; NO-SIMD128-NEXT: i32.and $push37=, $26, $pop105 +; NO-SIMD128-NEXT: i32.lt_u $push39=, $pop38, $pop37 +; NO-SIMD128-NEXT: i32.select $push40=, $10, $26, $pop39 +; NO-SIMD128-NEXT: i32.store8 0($pop42), $pop40 +; NO-SIMD128-NEXT: i32.const $push104=, 255 +; NO-SIMD128-NEXT: i32.and $push44=, $9, $pop104 +; NO-SIMD128-NEXT: i32.const $push103=, 255 +; NO-SIMD128-NEXT: i32.and $push43=, $25, $pop103 +; NO-SIMD128-NEXT: i32.lt_u $push45=, $pop44, $pop43 +; NO-SIMD128-NEXT: i32.select $push46=, $9, $25, $pop45 +; NO-SIMD128-NEXT: i32.store8 8($0), $pop46 +; NO-SIMD128-NEXT: i32.const $push51=, 7 +; NO-SIMD128-NEXT: i32.add $push52=, $0, $pop51 +; NO-SIMD128-NEXT: i32.const $push102=, 255 +; NO-SIMD128-NEXT: i32.and $push48=, $8, $pop102 +; NO-SIMD128-NEXT: i32.const $push101=, 255 +; NO-SIMD128-NEXT: i32.and $push47=, $24, $pop101 +; NO-SIMD128-NEXT: i32.lt_u $push49=, $pop48, $pop47 +; NO-SIMD128-NEXT: i32.select $push50=, $8, $24, $pop49 +; NO-SIMD128-NEXT: i32.store8 0($pop52), $pop50 +; NO-SIMD128-NEXT: i32.const $push57=, 6 +; NO-SIMD128-NEXT: i32.add $push58=, $0, $pop57 +; NO-SIMD128-NEXT: i32.const $push100=, 255 +; NO-SIMD128-NEXT: i32.and $push54=, $7, $pop100 +; NO-SIMD128-NEXT: i32.const $push99=, 255 +; NO-SIMD128-NEXT: i32.and $push53=, $23, $pop99 +; NO-SIMD128-NEXT: i32.lt_u $push55=, $pop54, $pop53 +; NO-SIMD128-NEXT: i32.select $push56=, $7, $23, $pop55 +; NO-SIMD128-NEXT: i32.store8 0($pop58), $pop56 +; NO-SIMD128-NEXT: i32.const $push63=, 5 +; NO-SIMD128-NEXT: i32.add $push64=, $0, $pop63 +; NO-SIMD128-NEXT: i32.const $push98=, 255 +; NO-SIMD128-NEXT: i32.and $push60=, $6, $pop98 +; NO-SIMD128-NEXT: i32.const $push97=, 255 +; NO-SIMD128-NEXT: i32.and $push59=, $22, $pop97 +; NO-SIMD128-NEXT: i32.lt_u $push61=, $pop60, $pop59 +; NO-SIMD128-NEXT: i32.select $push62=, $6, $22, $pop61 +; NO-SIMD128-NEXT: i32.store8 0($pop64), $pop62 +; NO-SIMD128-NEXT: i32.const $push96=, 255 +; NO-SIMD128-NEXT: i32.and $push66=, $5, $pop96 +; NO-SIMD128-NEXT: i32.const $push95=, 255 +; NO-SIMD128-NEXT: i32.and $push65=, $21, $pop95 +; NO-SIMD128-NEXT: i32.lt_u $push67=, $pop66, $pop65 +; NO-SIMD128-NEXT: i32.select $push68=, $5, $21, $pop67 +; NO-SIMD128-NEXT: i32.store8 4($0), $pop68 +; NO-SIMD128-NEXT: i32.const $push73=, 3 +; NO-SIMD128-NEXT: i32.add $push74=, $0, $pop73 +; NO-SIMD128-NEXT: i32.const $push94=, 255 +; NO-SIMD128-NEXT: i32.and $push70=, $4, $pop94 +; NO-SIMD128-NEXT: i32.const $push93=, 255 +; NO-SIMD128-NEXT: i32.and $push69=, $20, $pop93 +; NO-SIMD128-NEXT: i32.lt_u $push71=, $pop70, $pop69 +; NO-SIMD128-NEXT: i32.select $push72=, $4, $20, $pop71 +; NO-SIMD128-NEXT: i32.store8 0($pop74), $pop72 +; NO-SIMD128-NEXT: i32.const $push92=, 255 +; NO-SIMD128-NEXT: i32.and $push76=, $3, $pop92 +; NO-SIMD128-NEXT: i32.const $push91=, 255 +; NO-SIMD128-NEXT: i32.and $push75=, $19, $pop91 +; NO-SIMD128-NEXT: i32.lt_u $push77=, $pop76, $pop75 +; NO-SIMD128-NEXT: i32.select $push78=, $3, $19, $pop77 +; NO-SIMD128-NEXT: i32.store8 2($0), $pop78 +; NO-SIMD128-NEXT: i32.const $push90=, 255 +; NO-SIMD128-NEXT: i32.and $push80=, $2, $pop90 +; NO-SIMD128-NEXT: i32.const $push89=, 255 +; NO-SIMD128-NEXT: i32.and $push79=, $18, $pop89 +; NO-SIMD128-NEXT: i32.lt_u $push81=, $pop80, $pop79 +; NO-SIMD128-NEXT: i32.select $push82=, $2, $18, $pop81 +; NO-SIMD128-NEXT: i32.store8 1($0), $pop82 +; NO-SIMD128-NEXT: i32.const $push88=, 255 +; NO-SIMD128-NEXT: i32.and $push84=, $1, $pop88 +; NO-SIMD128-NEXT: i32.const $push87=, 255 +; NO-SIMD128-NEXT: i32.and $push83=, $17, $pop87 +; NO-SIMD128-NEXT: i32.lt_u $push85=, $pop84, $pop83 +; NO-SIMD128-NEXT: i32.select $push86=, $1, $17, $pop85 +; NO-SIMD128-NEXT: i32.store8 0($0), $pop86 +; NO-SIMD128-NEXT: return +; +; NO-SIMD128-FAST-LABEL: min_u_v16i8: +; NO-SIMD128-FAST: .functype min_u_v16i8 (i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32) -> () +; NO-SIMD128-FAST-NEXT: # %bb.0: +; NO-SIMD128-FAST-NEXT: i32.const $push0=, 255 +; NO-SIMD128-FAST-NEXT: i32.and $push2=, $1, $pop0 +; NO-SIMD128-FAST-NEXT: i32.const $push117=, 255 +; NO-SIMD128-FAST-NEXT: i32.and $push1=, $17, $pop117 +; NO-SIMD128-FAST-NEXT: i32.lt_u $push3=, $pop2, $pop1 +; NO-SIMD128-FAST-NEXT: i32.select $push4=, $1, $17, $pop3 +; NO-SIMD128-FAST-NEXT: i32.store8 0($0), $pop4 +; NO-SIMD128-FAST-NEXT: i32.const $push116=, 255 +; NO-SIMD128-FAST-NEXT: i32.and $push6=, $2, $pop116 +; NO-SIMD128-FAST-NEXT: i32.const $push115=, 255 +; NO-SIMD128-FAST-NEXT: i32.and $push5=, $18, $pop115 +; NO-SIMD128-FAST-NEXT: i32.lt_u $push7=, $pop6, $pop5 +; NO-SIMD128-FAST-NEXT: i32.select $push8=, $2, $18, $pop7 +; NO-SIMD128-FAST-NEXT: i32.store8 1($0), $pop8 +; NO-SIMD128-FAST-NEXT: i32.const $push114=, 255 +; NO-SIMD128-FAST-NEXT: i32.and $push10=, $3, $pop114 +; NO-SIMD128-FAST-NEXT: i32.const $push113=, 255 +; NO-SIMD128-FAST-NEXT: i32.and $push9=, $19, $pop113 +; NO-SIMD128-FAST-NEXT: i32.lt_u $push11=, $pop10, $pop9 +; NO-SIMD128-FAST-NEXT: i32.select $push12=, $3, $19, $pop11 +; NO-SIMD128-FAST-NEXT: i32.store8 2($0), $pop12 +; NO-SIMD128-FAST-NEXT: i32.const $push17=, 3 +; NO-SIMD128-FAST-NEXT: i32.add $push18=, $0, $pop17 +; NO-SIMD128-FAST-NEXT: i32.const $push112=, 255 +; NO-SIMD128-FAST-NEXT: i32.and $push14=, $4, $pop112 +; NO-SIMD128-FAST-NEXT: i32.const $push111=, 255 +; NO-SIMD128-FAST-NEXT: i32.and $push13=, $20, $pop111 +; NO-SIMD128-FAST-NEXT: i32.lt_u $push15=, $pop14, $pop13 +; NO-SIMD128-FAST-NEXT: i32.select $push16=, $4, $20, $pop15 +; NO-SIMD128-FAST-NEXT: i32.store8 0($pop18), $pop16 +; NO-SIMD128-FAST-NEXT: i32.const $push110=, 255 +; NO-SIMD128-FAST-NEXT: i32.and $push20=, $5, $pop110 +; NO-SIMD128-FAST-NEXT: i32.const $push109=, 255 +; NO-SIMD128-FAST-NEXT: i32.and $push19=, $21, $pop109 +; NO-SIMD128-FAST-NEXT: i32.lt_u $push21=, $pop20, $pop19 +; NO-SIMD128-FAST-NEXT: i32.select $push22=, $5, $21, $pop21 +; NO-SIMD128-FAST-NEXT: i32.store8 4($0), $pop22 +; NO-SIMD128-FAST-NEXT: i32.const $push27=, 5 +; NO-SIMD128-FAST-NEXT: i32.add $push28=, $0, $pop27 +; NO-SIMD128-FAST-NEXT: i32.const $push108=, 255 +; NO-SIMD128-FAST-NEXT: i32.and $push24=, $6, $pop108 +; NO-SIMD128-FAST-NEXT: i32.const $push107=, 255 +; NO-SIMD128-FAST-NEXT: i32.and $push23=, $22, $pop107 +; NO-SIMD128-FAST-NEXT: i32.lt_u $push25=, $pop24, $pop23 +; NO-SIMD128-FAST-NEXT: i32.select $push26=, $6, $22, $pop25 +; NO-SIMD128-FAST-NEXT: i32.store8 0($pop28), $pop26 +; NO-SIMD128-FAST-NEXT: i32.const $push33=, 6 +; NO-SIMD128-FAST-NEXT: i32.add $push34=, $0, $pop33 +; NO-SIMD128-FAST-NEXT: i32.const $push106=, 255 +; NO-SIMD128-FAST-NEXT: i32.and $push30=, $7, $pop106 +; NO-SIMD128-FAST-NEXT: i32.const $push105=, 255 +; NO-SIMD128-FAST-NEXT: i32.and $push29=, $23, $pop105 +; NO-SIMD128-FAST-NEXT: i32.lt_u $push31=, $pop30, $pop29 +; NO-SIMD128-FAST-NEXT: i32.select $push32=, $7, $23, $pop31 +; NO-SIMD128-FAST-NEXT: i32.store8 0($pop34), $pop32 +; NO-SIMD128-FAST-NEXT: i32.const $push39=, 7 +; NO-SIMD128-FAST-NEXT: i32.add $push40=, $0, $pop39 +; NO-SIMD128-FAST-NEXT: i32.const $push104=, 255 +; NO-SIMD128-FAST-NEXT: i32.and $push36=, $8, $pop104 +; NO-SIMD128-FAST-NEXT: i32.const $push103=, 255 +; NO-SIMD128-FAST-NEXT: i32.and $push35=, $24, $pop103 +; NO-SIMD128-FAST-NEXT: i32.lt_u $push37=, $pop36, $pop35 +; NO-SIMD128-FAST-NEXT: i32.select $push38=, $8, $24, $pop37 +; NO-SIMD128-FAST-NEXT: i32.store8 0($pop40), $pop38 +; NO-SIMD128-FAST-NEXT: i32.const $push102=, 255 +; NO-SIMD128-FAST-NEXT: i32.and $push42=, $9, $pop102 +; NO-SIMD128-FAST-NEXT: i32.const $push101=, 255 +; NO-SIMD128-FAST-NEXT: i32.and $push41=, $25, $pop101 +; NO-SIMD128-FAST-NEXT: i32.lt_u $push43=, $pop42, $pop41 +; NO-SIMD128-FAST-NEXT: i32.select $push44=, $9, $25, $pop43 +; NO-SIMD128-FAST-NEXT: i32.store8 8($0), $pop44 +; NO-SIMD128-FAST-NEXT: i32.const $push49=, 9 +; NO-SIMD128-FAST-NEXT: i32.add $push50=, $0, $pop49 +; NO-SIMD128-FAST-NEXT: i32.const $push100=, 255 +; NO-SIMD128-FAST-NEXT: i32.and $push46=, $10, $pop100 +; NO-SIMD128-FAST-NEXT: i32.const $push99=, 255 +; NO-SIMD128-FAST-NEXT: i32.and $push45=, $26, $pop99 +; NO-SIMD128-FAST-NEXT: i32.lt_u $push47=, $pop46, $pop45 +; NO-SIMD128-FAST-NEXT: i32.select $push48=, $10, $26, $pop47 +; NO-SIMD128-FAST-NEXT: i32.store8 0($pop50), $pop48 +; NO-SIMD128-FAST-NEXT: i32.const $push55=, 10 +; NO-SIMD128-FAST-NEXT: i32.add $push56=, $0, $pop55 +; NO-SIMD128-FAST-NEXT: i32.const $push98=, 255 +; NO-SIMD128-FAST-NEXT: i32.and $push52=, $11, $pop98 +; NO-SIMD128-FAST-NEXT: i32.const $push97=, 255 +; NO-SIMD128-FAST-NEXT: i32.and $push51=, $27, $pop97 +; NO-SIMD128-FAST-NEXT: i32.lt_u $push53=, $pop52, $pop51 +; NO-SIMD128-FAST-NEXT: i32.select $push54=, $11, $27, $pop53 +; NO-SIMD128-FAST-NEXT: i32.store8 0($pop56), $pop54 +; NO-SIMD128-FAST-NEXT: i32.const $push61=, 11 +; NO-SIMD128-FAST-NEXT: i32.add $push62=, $0, $pop61 +; NO-SIMD128-FAST-NEXT: i32.const $push96=, 255 +; NO-SIMD128-FAST-NEXT: i32.and $push58=, $12, $pop96 +; NO-SIMD128-FAST-NEXT: i32.const $push95=, 255 +; NO-SIMD128-FAST-NEXT: i32.and $push57=, $28, $pop95 +; NO-SIMD128-FAST-NEXT: i32.lt_u $push59=, $pop58, $pop57 +; NO-SIMD128-FAST-NEXT: i32.select $push60=, $12, $28, $pop59 +; NO-SIMD128-FAST-NEXT: i32.store8 0($pop62), $pop60 +; NO-SIMD128-FAST-NEXT: i32.const $push67=, 12 +; NO-SIMD128-FAST-NEXT: i32.add $push68=, $0, $pop67 +; NO-SIMD128-FAST-NEXT: i32.const $push94=, 255 +; NO-SIMD128-FAST-NEXT: i32.and $push64=, $13, $pop94 +; NO-SIMD128-FAST-NEXT: i32.const $push93=, 255 +; NO-SIMD128-FAST-NEXT: i32.and $push63=, $29, $pop93 +; NO-SIMD128-FAST-NEXT: i32.lt_u $push65=, $pop64, $pop63 +; NO-SIMD128-FAST-NEXT: i32.select $push66=, $13, $29, $pop65 +; NO-SIMD128-FAST-NEXT: i32.store8 0($pop68), $pop66 +; NO-SIMD128-FAST-NEXT: i32.const $push73=, 13 +; NO-SIMD128-FAST-NEXT: i32.add $push74=, $0, $pop73 +; NO-SIMD128-FAST-NEXT: i32.const $push92=, 255 +; NO-SIMD128-FAST-NEXT: i32.and $push70=, $14, $pop92 +; NO-SIMD128-FAST-NEXT: i32.const $push91=, 255 +; NO-SIMD128-FAST-NEXT: i32.and $push69=, $30, $pop91 +; NO-SIMD128-FAST-NEXT: i32.lt_u $push71=, $pop70, $pop69 +; NO-SIMD128-FAST-NEXT: i32.select $push72=, $14, $30, $pop71 +; NO-SIMD128-FAST-NEXT: i32.store8 0($pop74), $pop72 +; NO-SIMD128-FAST-NEXT: i32.const $push79=, 14 +; NO-SIMD128-FAST-NEXT: i32.add $push80=, $0, $pop79 +; NO-SIMD128-FAST-NEXT: i32.const $push90=, 255 +; NO-SIMD128-FAST-NEXT: i32.and $push76=, $15, $pop90 +; NO-SIMD128-FAST-NEXT: i32.const $push89=, 255 +; NO-SIMD128-FAST-NEXT: i32.and $push75=, $31, $pop89 +; NO-SIMD128-FAST-NEXT: i32.lt_u $push77=, $pop76, $pop75 +; NO-SIMD128-FAST-NEXT: i32.select $push78=, $15, $31, $pop77 +; NO-SIMD128-FAST-NEXT: i32.store8 0($pop80), $pop78 +; NO-SIMD128-FAST-NEXT: i32.const $push85=, 15 +; NO-SIMD128-FAST-NEXT: i32.add $push86=, $0, $pop85 +; NO-SIMD128-FAST-NEXT: i32.const $push88=, 255 +; NO-SIMD128-FAST-NEXT: i32.and $push82=, $16, $pop88 +; NO-SIMD128-FAST-NEXT: i32.const $push87=, 255 +; NO-SIMD128-FAST-NEXT: i32.and $push81=, $32, $pop87 +; NO-SIMD128-FAST-NEXT: i32.lt_u $push83=, $pop82, $pop81 +; NO-SIMD128-FAST-NEXT: i32.select $push84=, $16, $32, $pop83 +; NO-SIMD128-FAST-NEXT: i32.store8 0($pop86), $pop84 +; NO-SIMD128-FAST-NEXT: return %c = icmp ult <16 x i8> %x, %y %a = select <16 x i1> %c, <16 x i8> %x, <16 x i8> %y ret <16 x i8> %a } -; CHECK-LABEL: max_s_v16i8: -; NO-SIMD128-NOT: i8x16 -; SIMD128-NEXT: .functype max_s_v16i8 (v128, v128) -> (v128){{$}} -; SIMD128-NEXT: i8x16.max_s $push[[R:[0-9]+]]=, $0, $1{{$}} -; SIMD128-NEXT: return $pop[[R]]{{$}} define <16 x i8> @max_s_v16i8(<16 x i8> %x, <16 x i8> %y) { +; SIMD128-LABEL: max_s_v16i8: +; SIMD128: .functype max_s_v16i8 (v128, v128) -> (v128) +; SIMD128-NEXT: # %bb.0: +; SIMD128-NEXT: i8x16.max_s $push0=, $0, $1 +; SIMD128-NEXT: return $pop0 +; +; SIMD128-FAST-LABEL: max_s_v16i8: +; SIMD128-FAST: .functype max_s_v16i8 (v128, v128) -> (v128) +; SIMD128-FAST-NEXT: # %bb.0: +; SIMD128-FAST-NEXT: i8x16.max_s $push0=, $0, $1 +; SIMD128-FAST-NEXT: return $pop0 +; +; NO-SIMD128-LABEL: max_s_v16i8: +; NO-SIMD128: .functype max_s_v16i8 (i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32) -> () +; NO-SIMD128-NEXT: # %bb.0: +; NO-SIMD128-NEXT: i32.const $push4=, 15 +; NO-SIMD128-NEXT: i32.add $push5=, $0, $pop4 +; NO-SIMD128-NEXT: i32.extend8_s $push1=, $16 +; NO-SIMD128-NEXT: i32.extend8_s $push0=, $32 +; NO-SIMD128-NEXT: i32.gt_s $push2=, $pop1, $pop0 +; NO-SIMD128-NEXT: i32.select $push3=, $16, $32, $pop2 +; NO-SIMD128-NEXT: i32.store8 0($pop5), $pop3 +; NO-SIMD128-NEXT: i32.const $push10=, 14 +; NO-SIMD128-NEXT: i32.add $push11=, $0, $pop10 +; NO-SIMD128-NEXT: i32.extend8_s $push7=, $15 +; NO-SIMD128-NEXT: i32.extend8_s $push6=, $31 +; NO-SIMD128-NEXT: i32.gt_s $push8=, $pop7, $pop6 +; NO-SIMD128-NEXT: i32.select $push9=, $15, $31, $pop8 +; NO-SIMD128-NEXT: i32.store8 0($pop11), $pop9 +; NO-SIMD128-NEXT: i32.const $push16=, 13 +; NO-SIMD128-NEXT: i32.add $push17=, $0, $pop16 +; NO-SIMD128-NEXT: i32.extend8_s $push13=, $14 +; NO-SIMD128-NEXT: i32.extend8_s $push12=, $30 +; NO-SIMD128-NEXT: i32.gt_s $push14=, $pop13, $pop12 +; NO-SIMD128-NEXT: i32.select $push15=, $14, $30, $pop14 +; NO-SIMD128-NEXT: i32.store8 0($pop17), $pop15 +; NO-SIMD128-NEXT: i32.const $push22=, 12 +; NO-SIMD128-NEXT: i32.add $push23=, $0, $pop22 +; NO-SIMD128-NEXT: i32.extend8_s $push19=, $13 +; NO-SIMD128-NEXT: i32.extend8_s $push18=, $29 +; NO-SIMD128-NEXT: i32.gt_s $push20=, $pop19, $pop18 +; NO-SIMD128-NEXT: i32.select $push21=, $13, $29, $pop20 +; NO-SIMD128-NEXT: i32.store8 0($pop23), $pop21 +; NO-SIMD128-NEXT: i32.const $push28=, 11 +; NO-SIMD128-NEXT: i32.add $push29=, $0, $pop28 +; NO-SIMD128-NEXT: i32.extend8_s $push25=, $12 +; NO-SIMD128-NEXT: i32.extend8_s $push24=, $28 +; NO-SIMD128-NEXT: i32.gt_s $push26=, $pop25, $pop24 +; NO-SIMD128-NEXT: i32.select $push27=, $12, $28, $pop26 +; NO-SIMD128-NEXT: i32.store8 0($pop29), $pop27 +; NO-SIMD128-NEXT: i32.const $push34=, 10 +; NO-SIMD128-NEXT: i32.add $push35=, $0, $pop34 +; NO-SIMD128-NEXT: i32.extend8_s $push31=, $11 +; NO-SIMD128-NEXT: i32.extend8_s $push30=, $27 +; NO-SIMD128-NEXT: i32.gt_s $push32=, $pop31, $pop30 +; NO-SIMD128-NEXT: i32.select $push33=, $11, $27, $pop32 +; NO-SIMD128-NEXT: i32.store8 0($pop35), $pop33 +; NO-SIMD128-NEXT: i32.const $push40=, 9 +; NO-SIMD128-NEXT: i32.add $push41=, $0, $pop40 +; NO-SIMD128-NEXT: i32.extend8_s $push37=, $10 +; NO-SIMD128-NEXT: i32.extend8_s $push36=, $26 +; NO-SIMD128-NEXT: i32.gt_s $push38=, $pop37, $pop36 +; NO-SIMD128-NEXT: i32.select $push39=, $10, $26, $pop38 +; NO-SIMD128-NEXT: i32.store8 0($pop41), $pop39 +; NO-SIMD128-NEXT: i32.extend8_s $push43=, $9 +; NO-SIMD128-NEXT: i32.extend8_s $push42=, $25 +; NO-SIMD128-NEXT: i32.gt_s $push44=, $pop43, $pop42 +; NO-SIMD128-NEXT: i32.select $push45=, $9, $25, $pop44 +; NO-SIMD128-NEXT: i32.store8 8($0), $pop45 +; NO-SIMD128-NEXT: i32.const $push50=, 7 +; NO-SIMD128-NEXT: i32.add $push51=, $0, $pop50 +; NO-SIMD128-NEXT: i32.extend8_s $push47=, $8 +; NO-SIMD128-NEXT: i32.extend8_s $push46=, $24 +; NO-SIMD128-NEXT: i32.gt_s $push48=, $pop47, $pop46 +; NO-SIMD128-NEXT: i32.select $push49=, $8, $24, $pop48 +; NO-SIMD128-NEXT: i32.store8 0($pop51), $pop49 +; NO-SIMD128-NEXT: i32.const $push56=, 6 +; NO-SIMD128-NEXT: i32.add $push57=, $0, $pop56 +; NO-SIMD128-NEXT: i32.extend8_s $push53=, $7 +; NO-SIMD128-NEXT: i32.extend8_s $push52=, $23 +; NO-SIMD128-NEXT: i32.gt_s $push54=, $pop53, $pop52 +; NO-SIMD128-NEXT: i32.select $push55=, $7, $23, $pop54 +; NO-SIMD128-NEXT: i32.store8 0($pop57), $pop55 +; NO-SIMD128-NEXT: i32.const $push62=, 5 +; NO-SIMD128-NEXT: i32.add $push63=, $0, $pop62 +; NO-SIMD128-NEXT: i32.extend8_s $push59=, $6 +; NO-SIMD128-NEXT: i32.extend8_s $push58=, $22 +; NO-SIMD128-NEXT: i32.gt_s $push60=, $pop59, $pop58 +; NO-SIMD128-NEXT: i32.select $push61=, $6, $22, $pop60 +; NO-SIMD128-NEXT: i32.store8 0($pop63), $pop61 +; NO-SIMD128-NEXT: i32.extend8_s $push65=, $5 +; NO-SIMD128-NEXT: i32.extend8_s $push64=, $21 +; NO-SIMD128-NEXT: i32.gt_s $push66=, $pop65, $pop64 +; NO-SIMD128-NEXT: i32.select $push67=, $5, $21, $pop66 +; NO-SIMD128-NEXT: i32.store8 4($0), $pop67 +; NO-SIMD128-NEXT: i32.const $push72=, 3 +; NO-SIMD128-NEXT: i32.add $push73=, $0, $pop72 +; NO-SIMD128-NEXT: i32.extend8_s $push69=, $4 +; NO-SIMD128-NEXT: i32.extend8_s $push68=, $20 +; NO-SIMD128-NEXT: i32.gt_s $push70=, $pop69, $pop68 +; NO-SIMD128-NEXT: i32.select $push71=, $4, $20, $pop70 +; NO-SIMD128-NEXT: i32.store8 0($pop73), $pop71 +; NO-SIMD128-NEXT: i32.extend8_s $push75=, $3 +; NO-SIMD128-NEXT: i32.extend8_s $push74=, $19 +; NO-SIMD128-NEXT: i32.gt_s $push76=, $pop75, $pop74 +; NO-SIMD128-NEXT: i32.select $push77=, $3, $19, $pop76 +; NO-SIMD128-NEXT: i32.store8 2($0), $pop77 +; NO-SIMD128-NEXT: i32.extend8_s $push79=, $2 +; NO-SIMD128-NEXT: i32.extend8_s $push78=, $18 +; NO-SIMD128-NEXT: i32.gt_s $push80=, $pop79, $pop78 +; NO-SIMD128-NEXT: i32.select $push81=, $2, $18, $pop80 +; NO-SIMD128-NEXT: i32.store8 1($0), $pop81 +; NO-SIMD128-NEXT: i32.extend8_s $push83=, $1 +; NO-SIMD128-NEXT: i32.extend8_s $push82=, $17 +; NO-SIMD128-NEXT: i32.gt_s $push84=, $pop83, $pop82 +; NO-SIMD128-NEXT: i32.select $push85=, $1, $17, $pop84 +; NO-SIMD128-NEXT: i32.store8 0($0), $pop85 +; NO-SIMD128-NEXT: return +; +; NO-SIMD128-FAST-LABEL: max_s_v16i8: +; NO-SIMD128-FAST: .functype max_s_v16i8 (i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32) -> () +; NO-SIMD128-FAST-NEXT: # %bb.0: +; NO-SIMD128-FAST-NEXT: i32.extend8_s $push1=, $1 +; NO-SIMD128-FAST-NEXT: i32.extend8_s $push0=, $17 +; NO-SIMD128-FAST-NEXT: i32.gt_s $push2=, $pop1, $pop0 +; NO-SIMD128-FAST-NEXT: i32.select $push3=, $1, $17, $pop2 +; NO-SIMD128-FAST-NEXT: i32.store8 0($0), $pop3 +; NO-SIMD128-FAST-NEXT: i32.extend8_s $push5=, $2 +; NO-SIMD128-FAST-NEXT: i32.extend8_s $push4=, $18 +; NO-SIMD128-FAST-NEXT: i32.gt_s $push6=, $pop5, $pop4 +; NO-SIMD128-FAST-NEXT: i32.select $push7=, $2, $18, $pop6 +; NO-SIMD128-FAST-NEXT: i32.store8 1($0), $pop7 +; NO-SIMD128-FAST-NEXT: i32.extend8_s $push9=, $3 +; NO-SIMD128-FAST-NEXT: i32.extend8_s $push8=, $19 +; NO-SIMD128-FAST-NEXT: i32.gt_s $push10=, $pop9, $pop8 +; NO-SIMD128-FAST-NEXT: i32.select $push11=, $3, $19, $pop10 +; NO-SIMD128-FAST-NEXT: i32.store8 2($0), $pop11 +; NO-SIMD128-FAST-NEXT: i32.const $push16=, 3 +; NO-SIMD128-FAST-NEXT: i32.add $push17=, $0, $pop16 +; NO-SIMD128-FAST-NEXT: i32.extend8_s $push13=, $4 +; NO-SIMD128-FAST-NEXT: i32.extend8_s $push12=, $20 +; NO-SIMD128-FAST-NEXT: i32.gt_s $push14=, $pop13, $pop12 +; NO-SIMD128-FAST-NEXT: i32.select $push15=, $4, $20, $pop14 +; NO-SIMD128-FAST-NEXT: i32.store8 0($pop17), $pop15 +; NO-SIMD128-FAST-NEXT: i32.extend8_s $push19=, $5 +; NO-SIMD128-FAST-NEXT: i32.extend8_s $push18=, $21 +; NO-SIMD128-FAST-NEXT: i32.gt_s $push20=, $pop19, $pop18 +; NO-SIMD128-FAST-NEXT: i32.select $push21=, $5, $21, $pop20 +; NO-SIMD128-FAST-NEXT: i32.store8 4($0), $pop21 +; NO-SIMD128-FAST-NEXT: i32.const $push26=, 5 +; NO-SIMD128-FAST-NEXT: i32.add $push27=, $0, $pop26 +; NO-SIMD128-FAST-NEXT: i32.extend8_s $push23=, $6 +; NO-SIMD128-FAST-NEXT: i32.extend8_s $push22=, $22 +; NO-SIMD128-FAST-NEXT: i32.gt_s $push24=, $pop23, $pop22 +; NO-SIMD128-FAST-NEXT: i32.select $push25=, $6, $22, $pop24 +; NO-SIMD128-FAST-NEXT: i32.store8 0($pop27), $pop25 +; NO-SIMD128-FAST-NEXT: i32.const $push32=, 6 +; NO-SIMD128-FAST-NEXT: i32.add $push33=, $0, $pop32 +; NO-SIMD128-FAST-NEXT: i32.extend8_s $push29=, $7 +; NO-SIMD128-FAST-NEXT: i32.extend8_s $push28=, $23 +; NO-SIMD128-FAST-NEXT: i32.gt_s $push30=, $pop29, $pop28 +; NO-SIMD128-FAST-NEXT: i32.select $push31=, $7, $23, $pop30 +; NO-SIMD128-FAST-NEXT: i32.store8 0($pop33), $pop31 +; NO-SIMD128-FAST-NEXT: i32.const $push38=, 7 +; NO-SIMD128-FAST-NEXT: i32.add $push39=, $0, $pop38 +; NO-SIMD128-FAST-NEXT: i32.extend8_s $push35=, $8 +; NO-SIMD128-FAST-NEXT: i32.extend8_s $push34=, $24 +; NO-SIMD128-FAST-NEXT: i32.gt_s $push36=, $pop35, $pop34 +; NO-SIMD128-FAST-NEXT: i32.select $push37=, $8, $24, $pop36 +; NO-SIMD128-FAST-NEXT: i32.store8 0($pop39), $pop37 +; NO-SIMD128-FAST-NEXT: i32.extend8_s $push41=, $9 +; NO-SIMD128-FAST-NEXT: i32.extend8_s $push40=, $25 +; NO-SIMD128-FAST-NEXT: i32.gt_s $push42=, $pop41, $pop40 +; NO-SIMD128-FAST-NEXT: i32.select $push43=, $9, $25, $pop42 +; NO-SIMD128-FAST-NEXT: i32.store8 8($0), $pop43 +; NO-SIMD128-FAST-NEXT: i32.const $push48=, 9 +; NO-SIMD128-FAST-NEXT: i32.add $push49=, $0, $pop48 +; NO-SIMD128-FAST-NEXT: i32.extend8_s $push45=, $10 +; NO-SIMD128-FAST-NEXT: i32.extend8_s $push44=, $26 +; NO-SIMD128-FAST-NEXT: i32.gt_s $push46=, $pop45, $pop44 +; NO-SIMD128-FAST-NEXT: i32.select $push47=, $10, $26, $pop46 +; NO-SIMD128-FAST-NEXT: i32.store8 0($pop49), $pop47 +; NO-SIMD128-FAST-NEXT: i32.const $push54=, 10 +; NO-SIMD128-FAST-NEXT: i32.add $push55=, $0, $pop54 +; NO-SIMD128-FAST-NEXT: i32.extend8_s $push51=, $11 +; NO-SIMD128-FAST-NEXT: i32.extend8_s $push50=, $27 +; NO-SIMD128-FAST-NEXT: i32.gt_s $push52=, $pop51, $pop50 +; NO-SIMD128-FAST-NEXT: i32.select $push53=, $11, $27, $pop52 +; NO-SIMD128-FAST-NEXT: i32.store8 0($pop55), $pop53 +; NO-SIMD128-FAST-NEXT: i32.const $push60=, 11 +; NO-SIMD128-FAST-NEXT: i32.add $push61=, $0, $pop60 +; NO-SIMD128-FAST-NEXT: i32.extend8_s $push57=, $12 +; NO-SIMD128-FAST-NEXT: i32.extend8_s $push56=, $28 +; NO-SIMD128-FAST-NEXT: i32.gt_s $push58=, $pop57, $pop56 +; NO-SIMD128-FAST-NEXT: i32.select $push59=, $12, $28, $pop58 +; NO-SIMD128-FAST-NEXT: i32.store8 0($pop61), $pop59 +; NO-SIMD128-FAST-NEXT: i32.const $push66=, 12 +; NO-SIMD128-FAST-NEXT: i32.add $push67=, $0, $pop66 +; NO-SIMD128-FAST-NEXT: i32.extend8_s $push63=, $13 +; NO-SIMD128-FAST-NEXT: i32.extend8_s $push62=, $29 +; NO-SIMD128-FAST-NEXT: i32.gt_s $push64=, $pop63, $pop62 +; NO-SIMD128-FAST-NEXT: i32.select $push65=, $13, $29, $pop64 +; NO-SIMD128-FAST-NEXT: i32.store8 0($pop67), $pop65 +; NO-SIMD128-FAST-NEXT: i32.const $push72=, 13 +; NO-SIMD128-FAST-NEXT: i32.add $push73=, $0, $pop72 +; NO-SIMD128-FAST-NEXT: i32.extend8_s $push69=, $14 +; NO-SIMD128-FAST-NEXT: i32.extend8_s $push68=, $30 +; NO-SIMD128-FAST-NEXT: i32.gt_s $push70=, $pop69, $pop68 +; NO-SIMD128-FAST-NEXT: i32.select $push71=, $14, $30, $pop70 +; NO-SIMD128-FAST-NEXT: i32.store8 0($pop73), $pop71 +; NO-SIMD128-FAST-NEXT: i32.const $push78=, 14 +; NO-SIMD128-FAST-NEXT: i32.add $push79=, $0, $pop78 +; NO-SIMD128-FAST-NEXT: i32.extend8_s $push75=, $15 +; NO-SIMD128-FAST-NEXT: i32.extend8_s $push74=, $31 +; NO-SIMD128-FAST-NEXT: i32.gt_s $push76=, $pop75, $pop74 +; NO-SIMD128-FAST-NEXT: i32.select $push77=, $15, $31, $pop76 +; NO-SIMD128-FAST-NEXT: i32.store8 0($pop79), $pop77 +; NO-SIMD128-FAST-NEXT: i32.const $push84=, 15 +; NO-SIMD128-FAST-NEXT: i32.add $push85=, $0, $pop84 +; NO-SIMD128-FAST-NEXT: i32.extend8_s $push81=, $16 +; NO-SIMD128-FAST-NEXT: i32.extend8_s $push80=, $32 +; NO-SIMD128-FAST-NEXT: i32.gt_s $push82=, $pop81, $pop80 +; NO-SIMD128-FAST-NEXT: i32.select $push83=, $16, $32, $pop82 +; NO-SIMD128-FAST-NEXT: i32.store8 0($pop85), $pop83 +; NO-SIMD128-FAST-NEXT: return %c = icmp sgt <16 x i8> %x, %y %a = select <16 x i1> %c, <16 x i8> %x, <16 x i8> %y ret <16 x i8> %a } -; CHECK-LABEL: max_u_v16i8: -; NO-SIMD128-NOT: i8x16 -; SIMD128-NEXT: .functype max_u_v16i8 (v128, v128) -> (v128){{$}} -; SIMD128-NEXT: i8x16.max_u $push[[R:[0-9]+]]=, $0, $1{{$}} -; SIMD128-NEXT: return $pop[[R]]{{$}} define <16 x i8> @max_u_v16i8(<16 x i8> %x, <16 x i8> %y) { +; SIMD128-LABEL: max_u_v16i8: +; SIMD128: .functype max_u_v16i8 (v128, v128) -> (v128) +; SIMD128-NEXT: # %bb.0: +; SIMD128-NEXT: i8x16.max_u $push0=, $0, $1 +; SIMD128-NEXT: return $pop0 +; +; SIMD128-FAST-LABEL: max_u_v16i8: +; SIMD128-FAST: .functype max_u_v16i8 (v128, v128) -> (v128) +; SIMD128-FAST-NEXT: # %bb.0: +; SIMD128-FAST-NEXT: i8x16.max_u $push0=, $0, $1 +; SIMD128-FAST-NEXT: return $pop0 +; +; NO-SIMD128-LABEL: max_u_v16i8: +; NO-SIMD128: .functype max_u_v16i8 (i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32) -> () +; NO-SIMD128-NEXT: # %bb.0: +; NO-SIMD128-NEXT: i32.const $push5=, 15 +; NO-SIMD128-NEXT: i32.add $push6=, $0, $pop5 +; NO-SIMD128-NEXT: i32.const $push0=, 255 +; NO-SIMD128-NEXT: i32.and $push2=, $16, $pop0 +; NO-SIMD128-NEXT: i32.const $push117=, 255 +; NO-SIMD128-NEXT: i32.and $push1=, $32, $pop117 +; NO-SIMD128-NEXT: i32.gt_u $push3=, $pop2, $pop1 +; NO-SIMD128-NEXT: i32.select $push4=, $16, $32, $pop3 +; NO-SIMD128-NEXT: i32.store8 0($pop6), $pop4 +; NO-SIMD128-NEXT: i32.const $push11=, 14 +; NO-SIMD128-NEXT: i32.add $push12=, $0, $pop11 +; NO-SIMD128-NEXT: i32.const $push116=, 255 +; NO-SIMD128-NEXT: i32.and $push8=, $15, $pop116 +; NO-SIMD128-NEXT: i32.const $push115=, 255 +; NO-SIMD128-NEXT: i32.and $push7=, $31, $pop115 +; NO-SIMD128-NEXT: i32.gt_u $push9=, $pop8, $pop7 +; NO-SIMD128-NEXT: i32.select $push10=, $15, $31, $pop9 +; NO-SIMD128-NEXT: i32.store8 0($pop12), $pop10 +; NO-SIMD128-NEXT: i32.const $push17=, 13 +; NO-SIMD128-NEXT: i32.add $push18=, $0, $pop17 +; NO-SIMD128-NEXT: i32.const $push114=, 255 +; NO-SIMD128-NEXT: i32.and $push14=, $14, $pop114 +; NO-SIMD128-NEXT: i32.const $push113=, 255 +; NO-SIMD128-NEXT: i32.and $push13=, $30, $pop113 +; NO-SIMD128-NEXT: i32.gt_u $push15=, $pop14, $pop13 +; NO-SIMD128-NEXT: i32.select $push16=, $14, $30, $pop15 +; NO-SIMD128-NEXT: i32.store8 0($pop18), $pop16 +; NO-SIMD128-NEXT: i32.const $push23=, 12 +; NO-SIMD128-NEXT: i32.add $push24=, $0, $pop23 +; NO-SIMD128-NEXT: i32.const $push112=, 255 +; NO-SIMD128-NEXT: i32.and $push20=, $13, $pop112 +; NO-SIMD128-NEXT: i32.const $push111=, 255 +; NO-SIMD128-NEXT: i32.and $push19=, $29, $pop111 +; NO-SIMD128-NEXT: i32.gt_u $push21=, $pop20, $pop19 +; NO-SIMD128-NEXT: i32.select $push22=, $13, $29, $pop21 +; NO-SIMD128-NEXT: i32.store8 0($pop24), $pop22 +; NO-SIMD128-NEXT: i32.const $push29=, 11 +; NO-SIMD128-NEXT: i32.add $push30=, $0, $pop29 +; NO-SIMD128-NEXT: i32.const $push110=, 255 +; NO-SIMD128-NEXT: i32.and $push26=, $12, $pop110 +; NO-SIMD128-NEXT: i32.const $push109=, 255 +; NO-SIMD128-NEXT: i32.and $push25=, $28, $pop109 +; NO-SIMD128-NEXT: i32.gt_u $push27=, $pop26, $pop25 +; NO-SIMD128-NEXT: i32.select $push28=, $12, $28, $pop27 +; NO-SIMD128-NEXT: i32.store8 0($pop30), $pop28 +; NO-SIMD128-NEXT: i32.const $push35=, 10 +; NO-SIMD128-NEXT: i32.add $push36=, $0, $pop35 +; NO-SIMD128-NEXT: i32.const $push108=, 255 +; NO-SIMD128-NEXT: i32.and $push32=, $11, $pop108 +; NO-SIMD128-NEXT: i32.const $push107=, 255 +; NO-SIMD128-NEXT: i32.and $push31=, $27, $pop107 +; NO-SIMD128-NEXT: i32.gt_u $push33=, $pop32, $pop31 +; NO-SIMD128-NEXT: i32.select $push34=, $11, $27, $pop33 +; NO-SIMD128-NEXT: i32.store8 0($pop36), $pop34 +; NO-SIMD128-NEXT: i32.const $push41=, 9 +; NO-SIMD128-NEXT: i32.add $push42=, $0, $pop41 +; NO-SIMD128-NEXT: i32.const $push106=, 255 +; NO-SIMD128-NEXT: i32.and $push38=, $10, $pop106 +; NO-SIMD128-NEXT: i32.const $push105=, 255 +; NO-SIMD128-NEXT: i32.and $push37=, $26, $pop105 +; NO-SIMD128-NEXT: i32.gt_u $push39=, $pop38, $pop37 +; NO-SIMD128-NEXT: i32.select $push40=, $10, $26, $pop39 +; NO-SIMD128-NEXT: i32.store8 0($pop42), $pop40 +; NO-SIMD128-NEXT: i32.const $push104=, 255 +; NO-SIMD128-NEXT: i32.and $push44=, $9, $pop104 +; NO-SIMD128-NEXT: i32.const $push103=, 255 +; NO-SIMD128-NEXT: i32.and $push43=, $25, $pop103 +; NO-SIMD128-NEXT: i32.gt_u $push45=, $pop44, $pop43 +; NO-SIMD128-NEXT: i32.select $push46=, $9, $25, $pop45 +; NO-SIMD128-NEXT: i32.store8 8($0), $pop46 +; NO-SIMD128-NEXT: i32.const $push51=, 7 +; NO-SIMD128-NEXT: i32.add $push52=, $0, $pop51 +; NO-SIMD128-NEXT: i32.const $push102=, 255 +; NO-SIMD128-NEXT: i32.and $push48=, $8, $pop102 +; NO-SIMD128-NEXT: i32.const $push101=, 255 +; NO-SIMD128-NEXT: i32.and $push47=, $24, $pop101 +; NO-SIMD128-NEXT: i32.gt_u $push49=, $pop48, $pop47 +; NO-SIMD128-NEXT: i32.select $push50=, $8, $24, $pop49 +; NO-SIMD128-NEXT: i32.store8 0($pop52), $pop50 +; NO-SIMD128-NEXT: i32.const $push57=, 6 +; NO-SIMD128-NEXT: i32.add $push58=, $0, $pop57 +; NO-SIMD128-NEXT: i32.const $push100=, 255 +; NO-SIMD128-NEXT: i32.and $push54=, $7, $pop100 +; NO-SIMD128-NEXT: i32.const $push99=, 255 +; NO-SIMD128-NEXT: i32.and $push53=, $23, $pop99 +; NO-SIMD128-NEXT: i32.gt_u $push55=, $pop54, $pop53 +; NO-SIMD128-NEXT: i32.select $push56=, $7, $23, $pop55 +; NO-SIMD128-NEXT: i32.store8 0($pop58), $pop56 +; NO-SIMD128-NEXT: i32.const $push63=, 5 +; NO-SIMD128-NEXT: i32.add $push64=, $0, $pop63 +; NO-SIMD128-NEXT: i32.const $push98=, 255 +; NO-SIMD128-NEXT: i32.and $push60=, $6, $pop98 +; NO-SIMD128-NEXT: i32.const $push97=, 255 +; NO-SIMD128-NEXT: i32.and $push59=, $22, $pop97 +; NO-SIMD128-NEXT: i32.gt_u $push61=, $pop60, $pop59 +; NO-SIMD128-NEXT: i32.select $push62=, $6, $22, $pop61 +; NO-SIMD128-NEXT: i32.store8 0($pop64), $pop62 +; NO-SIMD128-NEXT: i32.const $push96=, 255 +; NO-SIMD128-NEXT: i32.and $push66=, $5, $pop96 +; NO-SIMD128-NEXT: i32.const $push95=, 255 +; NO-SIMD128-NEXT: i32.and $push65=, $21, $pop95 +; NO-SIMD128-NEXT: i32.gt_u $push67=, $pop66, $pop65 +; NO-SIMD128-NEXT: i32.select $push68=, $5, $21, $pop67 +; NO-SIMD128-NEXT: i32.store8 4($0), $pop68 +; NO-SIMD128-NEXT: i32.const $push73=, 3 +; NO-SIMD128-NEXT: i32.add $push74=, $0, $pop73 +; NO-SIMD128-NEXT: i32.const $push94=, 255 +; NO-SIMD128-NEXT: i32.and $push70=, $4, $pop94 +; NO-SIMD128-NEXT: i32.const $push93=, 255 +; NO-SIMD128-NEXT: i32.and $push69=, $20, $pop93 +; NO-SIMD128-NEXT: i32.gt_u $push71=, $pop70, $pop69 +; NO-SIMD128-NEXT: i32.select $push72=, $4, $20, $pop71 +; NO-SIMD128-NEXT: i32.store8 0($pop74), $pop72 +; NO-SIMD128-NEXT: i32.const $push92=, 255 +; NO-SIMD128-NEXT: i32.and $push76=, $3, $pop92 +; NO-SIMD128-NEXT: i32.const $push91=, 255 +; NO-SIMD128-NEXT: i32.and $push75=, $19, $pop91 +; NO-SIMD128-NEXT: i32.gt_u $push77=, $pop76, $pop75 +; NO-SIMD128-NEXT: i32.select $push78=, $3, $19, $pop77 +; NO-SIMD128-NEXT: i32.store8 2($0), $pop78 +; NO-SIMD128-NEXT: i32.const $push90=, 255 +; NO-SIMD128-NEXT: i32.and $push80=, $2, $pop90 +; NO-SIMD128-NEXT: i32.const $push89=, 255 +; NO-SIMD128-NEXT: i32.and $push79=, $18, $pop89 +; NO-SIMD128-NEXT: i32.gt_u $push81=, $pop80, $pop79 +; NO-SIMD128-NEXT: i32.select $push82=, $2, $18, $pop81 +; NO-SIMD128-NEXT: i32.store8 1($0), $pop82 +; NO-SIMD128-NEXT: i32.const $push88=, 255 +; NO-SIMD128-NEXT: i32.and $push84=, $1, $pop88 +; NO-SIMD128-NEXT: i32.const $push87=, 255 +; NO-SIMD128-NEXT: i32.and $push83=, $17, $pop87 +; NO-SIMD128-NEXT: i32.gt_u $push85=, $pop84, $pop83 +; NO-SIMD128-NEXT: i32.select $push86=, $1, $17, $pop85 +; NO-SIMD128-NEXT: i32.store8 0($0), $pop86 +; NO-SIMD128-NEXT: return +; +; NO-SIMD128-FAST-LABEL: max_u_v16i8: +; NO-SIMD128-FAST: .functype max_u_v16i8 (i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32) -> () +; NO-SIMD128-FAST-NEXT: # %bb.0: +; NO-SIMD128-FAST-NEXT: i32.const $push0=, 255 +; NO-SIMD128-FAST-NEXT: i32.and $push2=, $1, $pop0 +; NO-SIMD128-FAST-NEXT: i32.const $push117=, 255 +; NO-SIMD128-FAST-NEXT: i32.and $push1=, $17, $pop117 +; NO-SIMD128-FAST-NEXT: i32.gt_u $push3=, $pop2, $pop1 +; NO-SIMD128-FAST-NEXT: i32.select $push4=, $1, $17, $pop3 +; NO-SIMD128-FAST-NEXT: i32.store8 0($0), $pop4 +; NO-SIMD128-FAST-NEXT: i32.const $push116=, 255 +; NO-SIMD128-FAST-NEXT: i32.and $push6=, $2, $pop116 +; NO-SIMD128-FAST-NEXT: i32.const $push115=, 255 +; NO-SIMD128-FAST-NEXT: i32.and $push5=, $18, $pop115 +; NO-SIMD128-FAST-NEXT: i32.gt_u $push7=, $pop6, $pop5 +; NO-SIMD128-FAST-NEXT: i32.select $push8=, $2, $18, $pop7 +; NO-SIMD128-FAST-NEXT: i32.store8 1($0), $pop8 +; NO-SIMD128-FAST-NEXT: i32.const $push114=, 255 +; NO-SIMD128-FAST-NEXT: i32.and $push10=, $3, $pop114 +; NO-SIMD128-FAST-NEXT: i32.const $push113=, 255 +; NO-SIMD128-FAST-NEXT: i32.and $push9=, $19, $pop113 +; NO-SIMD128-FAST-NEXT: i32.gt_u $push11=, $pop10, $pop9 +; NO-SIMD128-FAST-NEXT: i32.select $push12=, $3, $19, $pop11 +; NO-SIMD128-FAST-NEXT: i32.store8 2($0), $pop12 +; NO-SIMD128-FAST-NEXT: i32.const $push17=, 3 +; NO-SIMD128-FAST-NEXT: i32.add $push18=, $0, $pop17 +; NO-SIMD128-FAST-NEXT: i32.const $push112=, 255 +; NO-SIMD128-FAST-NEXT: i32.and $push14=, $4, $pop112 +; NO-SIMD128-FAST-NEXT: i32.const $push111=, 255 +; NO-SIMD128-FAST-NEXT: i32.and $push13=, $20, $pop111 +; NO-SIMD128-FAST-NEXT: i32.gt_u $push15=, $pop14, $pop13 +; NO-SIMD128-FAST-NEXT: i32.select $push16=, $4, $20, $pop15 +; NO-SIMD128-FAST-NEXT: i32.store8 0($pop18), $pop16 +; NO-SIMD128-FAST-NEXT: i32.const $push110=, 255 +; NO-SIMD128-FAST-NEXT: i32.and $push20=, $5, $pop110 +; NO-SIMD128-FAST-NEXT: i32.const $push109=, 255 +; NO-SIMD128-FAST-NEXT: i32.and $push19=, $21, $pop109 +; NO-SIMD128-FAST-NEXT: i32.gt_u $push21=, $pop20, $pop19 +; NO-SIMD128-FAST-NEXT: i32.select $push22=, $5, $21, $pop21 +; NO-SIMD128-FAST-NEXT: i32.store8 4($0), $pop22 +; NO-SIMD128-FAST-NEXT: i32.const $push27=, 5 +; NO-SIMD128-FAST-NEXT: i32.add $push28=, $0, $pop27 +; NO-SIMD128-FAST-NEXT: i32.const $push108=, 255 +; NO-SIMD128-FAST-NEXT: i32.and $push24=, $6, $pop108 +; NO-SIMD128-FAST-NEXT: i32.const $push107=, 255 +; NO-SIMD128-FAST-NEXT: i32.and $push23=, $22, $pop107 +; NO-SIMD128-FAST-NEXT: i32.gt_u $push25=, $pop24, $pop23 +; NO-SIMD128-FAST-NEXT: i32.select $push26=, $6, $22, $pop25 +; NO-SIMD128-FAST-NEXT: i32.store8 0($pop28), $pop26 +; NO-SIMD128-FAST-NEXT: i32.const $push33=, 6 +; NO-SIMD128-FAST-NEXT: i32.add $push34=, $0, $pop33 +; NO-SIMD128-FAST-NEXT: i32.const $push106=, 255 +; NO-SIMD128-FAST-NEXT: i32.and $push30=, $7, $pop106 +; NO-SIMD128-FAST-NEXT: i32.const $push105=, 255 +; NO-SIMD128-FAST-NEXT: i32.and $push29=, $23, $pop105 +; NO-SIMD128-FAST-NEXT: i32.gt_u $push31=, $pop30, $pop29 +; NO-SIMD128-FAST-NEXT: i32.select $push32=, $7, $23, $pop31 +; NO-SIMD128-FAST-NEXT: i32.store8 0($pop34), $pop32 +; NO-SIMD128-FAST-NEXT: i32.const $push39=, 7 +; NO-SIMD128-FAST-NEXT: i32.add $push40=, $0, $pop39 +; NO-SIMD128-FAST-NEXT: i32.const $push104=, 255 +; NO-SIMD128-FAST-NEXT: i32.and $push36=, $8, $pop104 +; NO-SIMD128-FAST-NEXT: i32.const $push103=, 255 +; NO-SIMD128-FAST-NEXT: i32.and $push35=, $24, $pop103 +; NO-SIMD128-FAST-NEXT: i32.gt_u $push37=, $pop36, $pop35 +; NO-SIMD128-FAST-NEXT: i32.select $push38=, $8, $24, $pop37 +; NO-SIMD128-FAST-NEXT: i32.store8 0($pop40), $pop38 +; NO-SIMD128-FAST-NEXT: i32.const $push102=, 255 +; NO-SIMD128-FAST-NEXT: i32.and $push42=, $9, $pop102 +; NO-SIMD128-FAST-NEXT: i32.const $push101=, 255 +; NO-SIMD128-FAST-NEXT: i32.and $push41=, $25, $pop101 +; NO-SIMD128-FAST-NEXT: i32.gt_u $push43=, $pop42, $pop41 +; NO-SIMD128-FAST-NEXT: i32.select $push44=, $9, $25, $pop43 +; NO-SIMD128-FAST-NEXT: i32.store8 8($0), $pop44 +; NO-SIMD128-FAST-NEXT: i32.const $push49=, 9 +; NO-SIMD128-FAST-NEXT: i32.add $push50=, $0, $pop49 +; NO-SIMD128-FAST-NEXT: i32.const $push100=, 255 +; NO-SIMD128-FAST-NEXT: i32.and $push46=, $10, $pop100 +; NO-SIMD128-FAST-NEXT: i32.const $push99=, 255 +; NO-SIMD128-FAST-NEXT: i32.and $push45=, $26, $pop99 +; NO-SIMD128-FAST-NEXT: i32.gt_u $push47=, $pop46, $pop45 +; NO-SIMD128-FAST-NEXT: i32.select $push48=, $10, $26, $pop47 +; NO-SIMD128-FAST-NEXT: i32.store8 0($pop50), $pop48 +; NO-SIMD128-FAST-NEXT: i32.const $push55=, 10 +; NO-SIMD128-FAST-NEXT: i32.add $push56=, $0, $pop55 +; NO-SIMD128-FAST-NEXT: i32.const $push98=, 255 +; NO-SIMD128-FAST-NEXT: i32.and $push52=, $11, $pop98 +; NO-SIMD128-FAST-NEXT: i32.const $push97=, 255 +; NO-SIMD128-FAST-NEXT: i32.and $push51=, $27, $pop97 +; NO-SIMD128-FAST-NEXT: i32.gt_u $push53=, $pop52, $pop51 +; NO-SIMD128-FAST-NEXT: i32.select $push54=, $11, $27, $pop53 +; NO-SIMD128-FAST-NEXT: i32.store8 0($pop56), $pop54 +; NO-SIMD128-FAST-NEXT: i32.const $push61=, 11 +; NO-SIMD128-FAST-NEXT: i32.add $push62=, $0, $pop61 +; NO-SIMD128-FAST-NEXT: i32.const $push96=, 255 +; NO-SIMD128-FAST-NEXT: i32.and $push58=, $12, $pop96 +; NO-SIMD128-FAST-NEXT: i32.const $push95=, 255 +; NO-SIMD128-FAST-NEXT: i32.and $push57=, $28, $pop95 +; NO-SIMD128-FAST-NEXT: i32.gt_u $push59=, $pop58, $pop57 +; NO-SIMD128-FAST-NEXT: i32.select $push60=, $12, $28, $pop59 +; NO-SIMD128-FAST-NEXT: i32.store8 0($pop62), $pop60 +; NO-SIMD128-FAST-NEXT: i32.const $push67=, 12 +; NO-SIMD128-FAST-NEXT: i32.add $push68=, $0, $pop67 +; NO-SIMD128-FAST-NEXT: i32.const $push94=, 255 +; NO-SIMD128-FAST-NEXT: i32.and $push64=, $13, $pop94 +; NO-SIMD128-FAST-NEXT: i32.const $push93=, 255 +; NO-SIMD128-FAST-NEXT: i32.and $push63=, $29, $pop93 +; NO-SIMD128-FAST-NEXT: i32.gt_u $push65=, $pop64, $pop63 +; NO-SIMD128-FAST-NEXT: i32.select $push66=, $13, $29, $pop65 +; NO-SIMD128-FAST-NEXT: i32.store8 0($pop68), $pop66 +; NO-SIMD128-FAST-NEXT: i32.const $push73=, 13 +; NO-SIMD128-FAST-NEXT: i32.add $push74=, $0, $pop73 +; NO-SIMD128-FAST-NEXT: i32.const $push92=, 255 +; NO-SIMD128-FAST-NEXT: i32.and $push70=, $14, $pop92 +; NO-SIMD128-FAST-NEXT: i32.const $push91=, 255 +; NO-SIMD128-FAST-NEXT: i32.and $push69=, $30, $pop91 +; NO-SIMD128-FAST-NEXT: i32.gt_u $push71=, $pop70, $pop69 +; NO-SIMD128-FAST-NEXT: i32.select $push72=, $14, $30, $pop71 +; NO-SIMD128-FAST-NEXT: i32.store8 0($pop74), $pop72 +; NO-SIMD128-FAST-NEXT: i32.const $push79=, 14 +; NO-SIMD128-FAST-NEXT: i32.add $push80=, $0, $pop79 +; NO-SIMD128-FAST-NEXT: i32.const $push90=, 255 +; NO-SIMD128-FAST-NEXT: i32.and $push76=, $15, $pop90 +; NO-SIMD128-FAST-NEXT: i32.const $push89=, 255 +; NO-SIMD128-FAST-NEXT: i32.and $push75=, $31, $pop89 +; NO-SIMD128-FAST-NEXT: i32.gt_u $push77=, $pop76, $pop75 +; NO-SIMD128-FAST-NEXT: i32.select $push78=, $15, $31, $pop77 +; NO-SIMD128-FAST-NEXT: i32.store8 0($pop80), $pop78 +; NO-SIMD128-FAST-NEXT: i32.const $push85=, 15 +; NO-SIMD128-FAST-NEXT: i32.add $push86=, $0, $pop85 +; NO-SIMD128-FAST-NEXT: i32.const $push88=, 255 +; NO-SIMD128-FAST-NEXT: i32.and $push82=, $16, $pop88 +; NO-SIMD128-FAST-NEXT: i32.const $push87=, 255 +; NO-SIMD128-FAST-NEXT: i32.and $push81=, $32, $pop87 +; NO-SIMD128-FAST-NEXT: i32.gt_u $push83=, $pop82, $pop81 +; NO-SIMD128-FAST-NEXT: i32.select $push84=, $16, $32, $pop83 +; NO-SIMD128-FAST-NEXT: i32.store8 0($pop86), $pop84 +; NO-SIMD128-FAST-NEXT: return %c = icmp ugt <16 x i8> %x, %y %a = select <16 x i1> %c, <16 x i8> %x, <16 x i8> %y ret <16 x i8> %a } -; CHECK-LABEL: avgr_u_v16i8: -; NO-SIMD128-NOT: i8x16 -; SIMD128-NEXT: .functype avgr_u_v16i8 (v128, v128) -> (v128){{$}} -; SIMD128-NEXT: i8x16.avgr_u $push[[R:[0-9]+]]=, $0, $1{{$}} -; SIMD128-NEXT: return $pop[[R]]{{$}} define <16 x i8> @avgr_u_v16i8(<16 x i8> %x, <16 x i8> %y) { +; SIMD128-LABEL: avgr_u_v16i8: +; SIMD128: .functype avgr_u_v16i8 (v128, v128) -> (v128) +; SIMD128-NEXT: # %bb.0: +; SIMD128-NEXT: i8x16.avgr_u $push0=, $0, $1 +; SIMD128-NEXT: return $pop0 +; +; SIMD128-FAST-LABEL: avgr_u_v16i8: +; SIMD128-FAST: .functype avgr_u_v16i8 (v128, v128) -> (v128) +; SIMD128-FAST-NEXT: # %bb.0: +; SIMD128-FAST-NEXT: i8x16.avgr_u $push0=, $0, $1 +; SIMD128-FAST-NEXT: return $pop0 +; +; NO-SIMD128-LABEL: avgr_u_v16i8: +; NO-SIMD128: .functype avgr_u_v16i8 (i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32) -> () +; NO-SIMD128-NEXT: # %bb.0: +; NO-SIMD128-NEXT: i32.const $push0=, 15 +; NO-SIMD128-NEXT: i32.add $push1=, $0, $pop0 +; NO-SIMD128-NEXT: i32.add $push2=, $16, $32 +; NO-SIMD128-NEXT: i32.const $push3=, 1 +; NO-SIMD128-NEXT: i32.add $push4=, $pop2, $pop3 +; NO-SIMD128-NEXT: i32.const $push5=, 254 +; NO-SIMD128-NEXT: i32.and $push6=, $pop4, $pop5 +; NO-SIMD128-NEXT: i32.const $push133=, 1 +; NO-SIMD128-NEXT: i32.shr_u $push7=, $pop6, $pop133 +; NO-SIMD128-NEXT: i32.store8 0($pop1), $pop7 +; NO-SIMD128-NEXT: i32.const $push8=, 14 +; NO-SIMD128-NEXT: i32.add $push9=, $0, $pop8 +; NO-SIMD128-NEXT: i32.add $push10=, $15, $31 +; NO-SIMD128-NEXT: i32.const $push132=, 1 +; NO-SIMD128-NEXT: i32.add $push11=, $pop10, $pop132 +; NO-SIMD128-NEXT: i32.const $push131=, 254 +; NO-SIMD128-NEXT: i32.and $push12=, $pop11, $pop131 +; NO-SIMD128-NEXT: i32.const $push130=, 1 +; NO-SIMD128-NEXT: i32.shr_u $push13=, $pop12, $pop130 +; NO-SIMD128-NEXT: i32.store8 0($pop9), $pop13 +; NO-SIMD128-NEXT: i32.const $push14=, 13 +; NO-SIMD128-NEXT: i32.add $push15=, $0, $pop14 +; NO-SIMD128-NEXT: i32.add $push16=, $14, $30 +; NO-SIMD128-NEXT: i32.const $push129=, 1 +; NO-SIMD128-NEXT: i32.add $push17=, $pop16, $pop129 +; NO-SIMD128-NEXT: i32.const $push128=, 254 +; NO-SIMD128-NEXT: i32.and $push18=, $pop17, $pop128 +; NO-SIMD128-NEXT: i32.const $push127=, 1 +; NO-SIMD128-NEXT: i32.shr_u $push19=, $pop18, $pop127 +; NO-SIMD128-NEXT: i32.store8 0($pop15), $pop19 +; NO-SIMD128-NEXT: i32.const $push20=, 12 +; NO-SIMD128-NEXT: i32.add $push21=, $0, $pop20 +; NO-SIMD128-NEXT: i32.add $push22=, $13, $29 +; NO-SIMD128-NEXT: i32.const $push126=, 1 +; NO-SIMD128-NEXT: i32.add $push23=, $pop22, $pop126 +; NO-SIMD128-NEXT: i32.const $push125=, 254 +; NO-SIMD128-NEXT: i32.and $push24=, $pop23, $pop125 +; NO-SIMD128-NEXT: i32.const $push124=, 1 +; NO-SIMD128-NEXT: i32.shr_u $push25=, $pop24, $pop124 +; NO-SIMD128-NEXT: i32.store8 0($pop21), $pop25 +; NO-SIMD128-NEXT: i32.const $push26=, 11 +; NO-SIMD128-NEXT: i32.add $push27=, $0, $pop26 +; NO-SIMD128-NEXT: i32.add $push28=, $12, $28 +; NO-SIMD128-NEXT: i32.const $push123=, 1 +; NO-SIMD128-NEXT: i32.add $push29=, $pop28, $pop123 +; NO-SIMD128-NEXT: i32.const $push122=, 254 +; NO-SIMD128-NEXT: i32.and $push30=, $pop29, $pop122 +; NO-SIMD128-NEXT: i32.const $push121=, 1 +; NO-SIMD128-NEXT: i32.shr_u $push31=, $pop30, $pop121 +; NO-SIMD128-NEXT: i32.store8 0($pop27), $pop31 +; NO-SIMD128-NEXT: i32.const $push32=, 10 +; NO-SIMD128-NEXT: i32.add $push33=, $0, $pop32 +; NO-SIMD128-NEXT: i32.add $push34=, $11, $27 +; NO-SIMD128-NEXT: i32.const $push120=, 1 +; NO-SIMD128-NEXT: i32.add $push35=, $pop34, $pop120 +; NO-SIMD128-NEXT: i32.const $push119=, 254 +; NO-SIMD128-NEXT: i32.and $push36=, $pop35, $pop119 +; NO-SIMD128-NEXT: i32.const $push118=, 1 +; NO-SIMD128-NEXT: i32.shr_u $push37=, $pop36, $pop118 +; NO-SIMD128-NEXT: i32.store8 0($pop33), $pop37 +; NO-SIMD128-NEXT: i32.const $push38=, 9 +; NO-SIMD128-NEXT: i32.add $push39=, $0, $pop38 +; NO-SIMD128-NEXT: i32.add $push40=, $10, $26 +; NO-SIMD128-NEXT: i32.const $push117=, 1 +; NO-SIMD128-NEXT: i32.add $push41=, $pop40, $pop117 +; NO-SIMD128-NEXT: i32.const $push116=, 254 +; NO-SIMD128-NEXT: i32.and $push42=, $pop41, $pop116 +; NO-SIMD128-NEXT: i32.const $push115=, 1 +; NO-SIMD128-NEXT: i32.shr_u $push43=, $pop42, $pop115 +; NO-SIMD128-NEXT: i32.store8 0($pop39), $pop43 +; NO-SIMD128-NEXT: i32.add $push44=, $9, $25 +; NO-SIMD128-NEXT: i32.const $push114=, 1 +; NO-SIMD128-NEXT: i32.add $push45=, $pop44, $pop114 +; NO-SIMD128-NEXT: i32.const $push113=, 254 +; NO-SIMD128-NEXT: i32.and $push46=, $pop45, $pop113 +; NO-SIMD128-NEXT: i32.const $push112=, 1 +; NO-SIMD128-NEXT: i32.shr_u $push47=, $pop46, $pop112 +; NO-SIMD128-NEXT: i32.store8 8($0), $pop47 +; NO-SIMD128-NEXT: i32.const $push48=, 7 +; NO-SIMD128-NEXT: i32.add $push49=, $0, $pop48 +; NO-SIMD128-NEXT: i32.add $push50=, $8, $24 +; NO-SIMD128-NEXT: i32.const $push111=, 1 +; NO-SIMD128-NEXT: i32.add $push51=, $pop50, $pop111 +; NO-SIMD128-NEXT: i32.const $push110=, 254 +; NO-SIMD128-NEXT: i32.and $push52=, $pop51, $pop110 +; NO-SIMD128-NEXT: i32.const $push109=, 1 +; NO-SIMD128-NEXT: i32.shr_u $push53=, $pop52, $pop109 +; NO-SIMD128-NEXT: i32.store8 0($pop49), $pop53 +; NO-SIMD128-NEXT: i32.const $push54=, 6 +; NO-SIMD128-NEXT: i32.add $push55=, $0, $pop54 +; NO-SIMD128-NEXT: i32.add $push56=, $7, $23 +; NO-SIMD128-NEXT: i32.const $push108=, 1 +; NO-SIMD128-NEXT: i32.add $push57=, $pop56, $pop108 +; NO-SIMD128-NEXT: i32.const $push107=, 254 +; NO-SIMD128-NEXT: i32.and $push58=, $pop57, $pop107 +; NO-SIMD128-NEXT: i32.const $push106=, 1 +; NO-SIMD128-NEXT: i32.shr_u $push59=, $pop58, $pop106 +; NO-SIMD128-NEXT: i32.store8 0($pop55), $pop59 +; NO-SIMD128-NEXT: i32.const $push60=, 5 +; NO-SIMD128-NEXT: i32.add $push61=, $0, $pop60 +; NO-SIMD128-NEXT: i32.add $push62=, $6, $22 +; NO-SIMD128-NEXT: i32.const $push105=, 1 +; NO-SIMD128-NEXT: i32.add $push63=, $pop62, $pop105 +; NO-SIMD128-NEXT: i32.const $push104=, 254 +; NO-SIMD128-NEXT: i32.and $push64=, $pop63, $pop104 +; NO-SIMD128-NEXT: i32.const $push103=, 1 +; NO-SIMD128-NEXT: i32.shr_u $push65=, $pop64, $pop103 +; NO-SIMD128-NEXT: i32.store8 0($pop61), $pop65 +; NO-SIMD128-NEXT: i32.add $push66=, $5, $21 +; NO-SIMD128-NEXT: i32.const $push102=, 1 +; NO-SIMD128-NEXT: i32.add $push67=, $pop66, $pop102 +; NO-SIMD128-NEXT: i32.const $push101=, 254 +; NO-SIMD128-NEXT: i32.and $push68=, $pop67, $pop101 +; NO-SIMD128-NEXT: i32.const $push100=, 1 +; NO-SIMD128-NEXT: i32.shr_u $push69=, $pop68, $pop100 +; NO-SIMD128-NEXT: i32.store8 4($0), $pop69 +; NO-SIMD128-NEXT: i32.const $push70=, 3 +; NO-SIMD128-NEXT: i32.add $push71=, $0, $pop70 +; NO-SIMD128-NEXT: i32.add $push72=, $4, $20 +; NO-SIMD128-NEXT: i32.const $push99=, 1 +; NO-SIMD128-NEXT: i32.add $push73=, $pop72, $pop99 +; NO-SIMD128-NEXT: i32.const $push98=, 254 +; NO-SIMD128-NEXT: i32.and $push74=, $pop73, $pop98 +; NO-SIMD128-NEXT: i32.const $push97=, 1 +; NO-SIMD128-NEXT: i32.shr_u $push75=, $pop74, $pop97 +; NO-SIMD128-NEXT: i32.store8 0($pop71), $pop75 +; NO-SIMD128-NEXT: i32.add $push76=, $3, $19 +; NO-SIMD128-NEXT: i32.const $push96=, 1 +; NO-SIMD128-NEXT: i32.add $push77=, $pop76, $pop96 +; NO-SIMD128-NEXT: i32.const $push95=, 254 +; NO-SIMD128-NEXT: i32.and $push78=, $pop77, $pop95 +; NO-SIMD128-NEXT: i32.const $push94=, 1 +; NO-SIMD128-NEXT: i32.shr_u $push79=, $pop78, $pop94 +; NO-SIMD128-NEXT: i32.store8 2($0), $pop79 +; NO-SIMD128-NEXT: i32.add $push80=, $2, $18 +; NO-SIMD128-NEXT: i32.const $push93=, 1 +; NO-SIMD128-NEXT: i32.add $push81=, $pop80, $pop93 +; NO-SIMD128-NEXT: i32.const $push92=, 254 +; NO-SIMD128-NEXT: i32.and $push82=, $pop81, $pop92 +; NO-SIMD128-NEXT: i32.const $push91=, 1 +; NO-SIMD128-NEXT: i32.shr_u $push83=, $pop82, $pop91 +; NO-SIMD128-NEXT: i32.store8 1($0), $pop83 +; NO-SIMD128-NEXT: i32.add $push84=, $1, $17 +; NO-SIMD128-NEXT: i32.const $push90=, 1 +; NO-SIMD128-NEXT: i32.add $push85=, $pop84, $pop90 +; NO-SIMD128-NEXT: i32.const $push89=, 254 +; NO-SIMD128-NEXT: i32.and $push86=, $pop85, $pop89 +; NO-SIMD128-NEXT: i32.const $push88=, 1 +; NO-SIMD128-NEXT: i32.shr_u $push87=, $pop86, $pop88 +; NO-SIMD128-NEXT: i32.store8 0($0), $pop87 +; NO-SIMD128-NEXT: return +; +; NO-SIMD128-FAST-LABEL: avgr_u_v16i8: +; NO-SIMD128-FAST: .functype avgr_u_v16i8 (i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32) -> () +; NO-SIMD128-FAST-NEXT: # %bb.0: +; NO-SIMD128-FAST-NEXT: i32.add $push0=, $1, $17 +; NO-SIMD128-FAST-NEXT: i32.const $push1=, 1 +; NO-SIMD128-FAST-NEXT: i32.add $push2=, $pop0, $pop1 +; NO-SIMD128-FAST-NEXT: i32.const $push3=, 254 +; NO-SIMD128-FAST-NEXT: i32.and $push4=, $pop2, $pop3 +; NO-SIMD128-FAST-NEXT: i32.const $push133=, 1 +; NO-SIMD128-FAST-NEXT: i32.shr_u $push5=, $pop4, $pop133 +; NO-SIMD128-FAST-NEXT: i32.store8 0($0), $pop5 +; NO-SIMD128-FAST-NEXT: i32.add $push6=, $2, $18 +; NO-SIMD128-FAST-NEXT: i32.const $push132=, 1 +; NO-SIMD128-FAST-NEXT: i32.add $push7=, $pop6, $pop132 +; NO-SIMD128-FAST-NEXT: i32.const $push131=, 254 +; NO-SIMD128-FAST-NEXT: i32.and $push8=, $pop7, $pop131 +; NO-SIMD128-FAST-NEXT: i32.const $push130=, 1 +; NO-SIMD128-FAST-NEXT: i32.shr_u $push9=, $pop8, $pop130 +; NO-SIMD128-FAST-NEXT: i32.store8 1($0), $pop9 +; NO-SIMD128-FAST-NEXT: i32.add $push10=, $3, $19 +; NO-SIMD128-FAST-NEXT: i32.const $push129=, 1 +; NO-SIMD128-FAST-NEXT: i32.add $push11=, $pop10, $pop129 +; NO-SIMD128-FAST-NEXT: i32.const $push128=, 254 +; NO-SIMD128-FAST-NEXT: i32.and $push12=, $pop11, $pop128 +; NO-SIMD128-FAST-NEXT: i32.const $push127=, 1 +; NO-SIMD128-FAST-NEXT: i32.shr_u $push13=, $pop12, $pop127 +; NO-SIMD128-FAST-NEXT: i32.store8 2($0), $pop13 +; NO-SIMD128-FAST-NEXT: i32.const $push14=, 3 +; NO-SIMD128-FAST-NEXT: i32.add $push15=, $0, $pop14 +; NO-SIMD128-FAST-NEXT: i32.add $push16=, $4, $20 +; NO-SIMD128-FAST-NEXT: i32.const $push126=, 1 +; NO-SIMD128-FAST-NEXT: i32.add $push17=, $pop16, $pop126 +; NO-SIMD128-FAST-NEXT: i32.const $push125=, 254 +; NO-SIMD128-FAST-NEXT: i32.and $push18=, $pop17, $pop125 +; NO-SIMD128-FAST-NEXT: i32.const $push124=, 1 +; NO-SIMD128-FAST-NEXT: i32.shr_u $push19=, $pop18, $pop124 +; NO-SIMD128-FAST-NEXT: i32.store8 0($pop15), $pop19 +; NO-SIMD128-FAST-NEXT: i32.add $push20=, $5, $21 +; NO-SIMD128-FAST-NEXT: i32.const $push123=, 1 +; NO-SIMD128-FAST-NEXT: i32.add $push21=, $pop20, $pop123 +; NO-SIMD128-FAST-NEXT: i32.const $push122=, 254 +; NO-SIMD128-FAST-NEXT: i32.and $push22=, $pop21, $pop122 +; NO-SIMD128-FAST-NEXT: i32.const $push121=, 1 +; NO-SIMD128-FAST-NEXT: i32.shr_u $push23=, $pop22, $pop121 +; NO-SIMD128-FAST-NEXT: i32.store8 4($0), $pop23 +; NO-SIMD128-FAST-NEXT: i32.const $push24=, 5 +; NO-SIMD128-FAST-NEXT: i32.add $push25=, $0, $pop24 +; NO-SIMD128-FAST-NEXT: i32.add $push26=, $6, $22 +; NO-SIMD128-FAST-NEXT: i32.const $push120=, 1 +; NO-SIMD128-FAST-NEXT: i32.add $push27=, $pop26, $pop120 +; NO-SIMD128-FAST-NEXT: i32.const $push119=, 254 +; NO-SIMD128-FAST-NEXT: i32.and $push28=, $pop27, $pop119 +; NO-SIMD128-FAST-NEXT: i32.const $push118=, 1 +; NO-SIMD128-FAST-NEXT: i32.shr_u $push29=, $pop28, $pop118 +; NO-SIMD128-FAST-NEXT: i32.store8 0($pop25), $pop29 +; NO-SIMD128-FAST-NEXT: i32.const $push30=, 6 +; NO-SIMD128-FAST-NEXT: i32.add $push31=, $0, $pop30 +; NO-SIMD128-FAST-NEXT: i32.add $push32=, $7, $23 +; NO-SIMD128-FAST-NEXT: i32.const $push117=, 1 +; NO-SIMD128-FAST-NEXT: i32.add $push33=, $pop32, $pop117 +; NO-SIMD128-FAST-NEXT: i32.const $push116=, 254 +; NO-SIMD128-FAST-NEXT: i32.and $push34=, $pop33, $pop116 +; NO-SIMD128-FAST-NEXT: i32.const $push115=, 1 +; NO-SIMD128-FAST-NEXT: i32.shr_u $push35=, $pop34, $pop115 +; NO-SIMD128-FAST-NEXT: i32.store8 0($pop31), $pop35 +; NO-SIMD128-FAST-NEXT: i32.const $push36=, 7 +; NO-SIMD128-FAST-NEXT: i32.add $push37=, $0, $pop36 +; NO-SIMD128-FAST-NEXT: i32.add $push38=, $8, $24 +; NO-SIMD128-FAST-NEXT: i32.const $push114=, 1 +; NO-SIMD128-FAST-NEXT: i32.add $push39=, $pop38, $pop114 +; NO-SIMD128-FAST-NEXT: i32.const $push113=, 254 +; NO-SIMD128-FAST-NEXT: i32.and $push40=, $pop39, $pop113 +; NO-SIMD128-FAST-NEXT: i32.const $push112=, 1 +; NO-SIMD128-FAST-NEXT: i32.shr_u $push41=, $pop40, $pop112 +; NO-SIMD128-FAST-NEXT: i32.store8 0($pop37), $pop41 +; NO-SIMD128-FAST-NEXT: i32.add $push42=, $9, $25 +; NO-SIMD128-FAST-NEXT: i32.const $push111=, 1 +; NO-SIMD128-FAST-NEXT: i32.add $push43=, $pop42, $pop111 +; NO-SIMD128-FAST-NEXT: i32.const $push110=, 254 +; NO-SIMD128-FAST-NEXT: i32.and $push44=, $pop43, $pop110 +; NO-SIMD128-FAST-NEXT: i32.const $push109=, 1 +; NO-SIMD128-FAST-NEXT: i32.shr_u $push45=, $pop44, $pop109 +; NO-SIMD128-FAST-NEXT: i32.store8 8($0), $pop45 +; NO-SIMD128-FAST-NEXT: i32.const $push46=, 9 +; NO-SIMD128-FAST-NEXT: i32.add $push47=, $0, $pop46 +; NO-SIMD128-FAST-NEXT: i32.add $push48=, $10, $26 +; NO-SIMD128-FAST-NEXT: i32.const $push108=, 1 +; NO-SIMD128-FAST-NEXT: i32.add $push49=, $pop48, $pop108 +; NO-SIMD128-FAST-NEXT: i32.const $push107=, 254 +; NO-SIMD128-FAST-NEXT: i32.and $push50=, $pop49, $pop107 +; NO-SIMD128-FAST-NEXT: i32.const $push106=, 1 +; NO-SIMD128-FAST-NEXT: i32.shr_u $push51=, $pop50, $pop106 +; NO-SIMD128-FAST-NEXT: i32.store8 0($pop47), $pop51 +; NO-SIMD128-FAST-NEXT: i32.const $push52=, 10 +; NO-SIMD128-FAST-NEXT: i32.add $push53=, $0, $pop52 +; NO-SIMD128-FAST-NEXT: i32.add $push54=, $11, $27 +; NO-SIMD128-FAST-NEXT: i32.const $push105=, 1 +; NO-SIMD128-FAST-NEXT: i32.add $push55=, $pop54, $pop105 +; NO-SIMD128-FAST-NEXT: i32.const $push104=, 254 +; NO-SIMD128-FAST-NEXT: i32.and $push56=, $pop55, $pop104 +; NO-SIMD128-FAST-NEXT: i32.const $push103=, 1 +; NO-SIMD128-FAST-NEXT: i32.shr_u $push57=, $pop56, $pop103 +; NO-SIMD128-FAST-NEXT: i32.store8 0($pop53), $pop57 +; NO-SIMD128-FAST-NEXT: i32.const $push58=, 11 +; NO-SIMD128-FAST-NEXT: i32.add $push59=, $0, $pop58 +; NO-SIMD128-FAST-NEXT: i32.add $push60=, $12, $28 +; NO-SIMD128-FAST-NEXT: i32.const $push102=, 1 +; NO-SIMD128-FAST-NEXT: i32.add $push61=, $pop60, $pop102 +; NO-SIMD128-FAST-NEXT: i32.const $push101=, 254 +; NO-SIMD128-FAST-NEXT: i32.and $push62=, $pop61, $pop101 +; NO-SIMD128-FAST-NEXT: i32.const $push100=, 1 +; NO-SIMD128-FAST-NEXT: i32.shr_u $push63=, $pop62, $pop100 +; NO-SIMD128-FAST-NEXT: i32.store8 0($pop59), $pop63 +; NO-SIMD128-FAST-NEXT: i32.const $push64=, 12 +; NO-SIMD128-FAST-NEXT: i32.add $push65=, $0, $pop64 +; NO-SIMD128-FAST-NEXT: i32.add $push66=, $13, $29 +; NO-SIMD128-FAST-NEXT: i32.const $push99=, 1 +; NO-SIMD128-FAST-NEXT: i32.add $push67=, $pop66, $pop99 +; NO-SIMD128-FAST-NEXT: i32.const $push98=, 254 +; NO-SIMD128-FAST-NEXT: i32.and $push68=, $pop67, $pop98 +; NO-SIMD128-FAST-NEXT: i32.const $push97=, 1 +; NO-SIMD128-FAST-NEXT: i32.shr_u $push69=, $pop68, $pop97 +; NO-SIMD128-FAST-NEXT: i32.store8 0($pop65), $pop69 +; NO-SIMD128-FAST-NEXT: i32.const $push70=, 13 +; NO-SIMD128-FAST-NEXT: i32.add $push71=, $0, $pop70 +; NO-SIMD128-FAST-NEXT: i32.add $push72=, $14, $30 +; NO-SIMD128-FAST-NEXT: i32.const $push96=, 1 +; NO-SIMD128-FAST-NEXT: i32.add $push73=, $pop72, $pop96 +; NO-SIMD128-FAST-NEXT: i32.const $push95=, 254 +; NO-SIMD128-FAST-NEXT: i32.and $push74=, $pop73, $pop95 +; NO-SIMD128-FAST-NEXT: i32.const $push94=, 1 +; NO-SIMD128-FAST-NEXT: i32.shr_u $push75=, $pop74, $pop94 +; NO-SIMD128-FAST-NEXT: i32.store8 0($pop71), $pop75 +; NO-SIMD128-FAST-NEXT: i32.const $push76=, 14 +; NO-SIMD128-FAST-NEXT: i32.add $push77=, $0, $pop76 +; NO-SIMD128-FAST-NEXT: i32.add $push78=, $15, $31 +; NO-SIMD128-FAST-NEXT: i32.const $push93=, 1 +; NO-SIMD128-FAST-NEXT: i32.add $push79=, $pop78, $pop93 +; NO-SIMD128-FAST-NEXT: i32.const $push92=, 254 +; NO-SIMD128-FAST-NEXT: i32.and $push80=, $pop79, $pop92 +; NO-SIMD128-FAST-NEXT: i32.const $push91=, 1 +; NO-SIMD128-FAST-NEXT: i32.shr_u $push81=, $pop80, $pop91 +; NO-SIMD128-FAST-NEXT: i32.store8 0($pop77), $pop81 +; NO-SIMD128-FAST-NEXT: i32.const $push82=, 15 +; NO-SIMD128-FAST-NEXT: i32.add $push83=, $0, $pop82 +; NO-SIMD128-FAST-NEXT: i32.add $push84=, $16, $32 +; NO-SIMD128-FAST-NEXT: i32.const $push90=, 1 +; NO-SIMD128-FAST-NEXT: i32.add $push85=, $pop84, $pop90 +; NO-SIMD128-FAST-NEXT: i32.const $push89=, 254 +; NO-SIMD128-FAST-NEXT: i32.and $push86=, $pop85, $pop89 +; NO-SIMD128-FAST-NEXT: i32.const $push88=, 1 +; NO-SIMD128-FAST-NEXT: i32.shr_u $push87=, $pop86, $pop88 +; NO-SIMD128-FAST-NEXT: i32.store8 0($pop83), $pop87 +; NO-SIMD128-FAST-NEXT: return %a = add nuw <16 x i8> %x, %y %b = add nuw <16 x i8> %a, @@ -106,11 +1925,336 @@ define <16 x i8> @avgr_u_v16i8(<16 x i8> %x, <16 x i8> %y) { ret <16 x i8> %c } -; CHECK-LABEL: avgr_u_v16i8_wrap: -; NO-SIMD128-NOT: i8x16 -; SIMD128-NEXT: .functype avgr_u_v16i8_wrap (v128, v128) -> (v128){{$}} -; SIMD128-NOT: i8x16.avgr_u define <16 x i8> @avgr_u_v16i8_wrap(<16 x i8> %x, <16 x i8> %y) { +; SIMD128-LABEL: avgr_u_v16i8_wrap: +; SIMD128: .functype avgr_u_v16i8_wrap (v128, v128) -> (v128) +; SIMD128-NEXT: # %bb.0: +; SIMD128-NEXT: i8x16.add $push0=, $0, $1 +; SIMD128-NEXT: v128.const $push1=, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1 +; SIMD128-NEXT: i8x16.add $push2=, $pop0, $pop1 +; SIMD128-NEXT: i32.const $push3=, 1 +; SIMD128-NEXT: i8x16.shr_u $push4=, $pop2, $pop3 +; SIMD128-NEXT: return $pop4 +; +; SIMD128-FAST-LABEL: avgr_u_v16i8_wrap: +; SIMD128-FAST: .functype avgr_u_v16i8_wrap (v128, v128) -> (v128) +; SIMD128-FAST-NEXT: # %bb.0: +; SIMD128-FAST-NEXT: i8x16.add $push2=, $0, $1 +; SIMD128-FAST-NEXT: v128.const $push3=, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1 +; SIMD128-FAST-NEXT: i8x16.add $push1=, $pop2, $pop3 +; SIMD128-FAST-NEXT: i32.const $push4=, 1 +; SIMD128-FAST-NEXT: i8x16.shr_u $push0=, $pop1, $pop4 +; SIMD128-FAST-NEXT: return $pop0 +; +; NO-SIMD128-LABEL: avgr_u_v16i8_wrap: +; NO-SIMD128: .functype avgr_u_v16i8_wrap (i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32) -> () +; NO-SIMD128-NEXT: # %bb.0: +; NO-SIMD128-NEXT: i32.const $push0=, 15 +; NO-SIMD128-NEXT: i32.add $push1=, $0, $pop0 +; NO-SIMD128-NEXT: i32.add $push2=, $16, $32 +; NO-SIMD128-NEXT: i32.const $push3=, 1 +; NO-SIMD128-NEXT: i32.add $push4=, $pop2, $pop3 +; NO-SIMD128-NEXT: i32.const $push5=, 254 +; NO-SIMD128-NEXT: i32.and $push6=, $pop4, $pop5 +; NO-SIMD128-NEXT: i32.const $push133=, 1 +; NO-SIMD128-NEXT: i32.shr_u $push7=, $pop6, $pop133 +; NO-SIMD128-NEXT: i32.store8 0($pop1), $pop7 +; NO-SIMD128-NEXT: i32.const $push8=, 14 +; NO-SIMD128-NEXT: i32.add $push9=, $0, $pop8 +; NO-SIMD128-NEXT: i32.add $push10=, $15, $31 +; NO-SIMD128-NEXT: i32.const $push132=, 1 +; NO-SIMD128-NEXT: i32.add $push11=, $pop10, $pop132 +; NO-SIMD128-NEXT: i32.const $push131=, 254 +; NO-SIMD128-NEXT: i32.and $push12=, $pop11, $pop131 +; NO-SIMD128-NEXT: i32.const $push130=, 1 +; NO-SIMD128-NEXT: i32.shr_u $push13=, $pop12, $pop130 +; NO-SIMD128-NEXT: i32.store8 0($pop9), $pop13 +; NO-SIMD128-NEXT: i32.const $push14=, 13 +; NO-SIMD128-NEXT: i32.add $push15=, $0, $pop14 +; NO-SIMD128-NEXT: i32.add $push16=, $14, $30 +; NO-SIMD128-NEXT: i32.const $push129=, 1 +; NO-SIMD128-NEXT: i32.add $push17=, $pop16, $pop129 +; NO-SIMD128-NEXT: i32.const $push128=, 254 +; NO-SIMD128-NEXT: i32.and $push18=, $pop17, $pop128 +; NO-SIMD128-NEXT: i32.const $push127=, 1 +; NO-SIMD128-NEXT: i32.shr_u $push19=, $pop18, $pop127 +; NO-SIMD128-NEXT: i32.store8 0($pop15), $pop19 +; NO-SIMD128-NEXT: i32.const $push20=, 12 +; NO-SIMD128-NEXT: i32.add $push21=, $0, $pop20 +; NO-SIMD128-NEXT: i32.add $push22=, $13, $29 +; NO-SIMD128-NEXT: i32.const $push126=, 1 +; NO-SIMD128-NEXT: i32.add $push23=, $pop22, $pop126 +; NO-SIMD128-NEXT: i32.const $push125=, 254 +; NO-SIMD128-NEXT: i32.and $push24=, $pop23, $pop125 +; NO-SIMD128-NEXT: i32.const $push124=, 1 +; NO-SIMD128-NEXT: i32.shr_u $push25=, $pop24, $pop124 +; NO-SIMD128-NEXT: i32.store8 0($pop21), $pop25 +; NO-SIMD128-NEXT: i32.const $push26=, 11 +; NO-SIMD128-NEXT: i32.add $push27=, $0, $pop26 +; NO-SIMD128-NEXT: i32.add $push28=, $12, $28 +; NO-SIMD128-NEXT: i32.const $push123=, 1 +; NO-SIMD128-NEXT: i32.add $push29=, $pop28, $pop123 +; NO-SIMD128-NEXT: i32.const $push122=, 254 +; NO-SIMD128-NEXT: i32.and $push30=, $pop29, $pop122 +; NO-SIMD128-NEXT: i32.const $push121=, 1 +; NO-SIMD128-NEXT: i32.shr_u $push31=, $pop30, $pop121 +; NO-SIMD128-NEXT: i32.store8 0($pop27), $pop31 +; NO-SIMD128-NEXT: i32.const $push32=, 10 +; NO-SIMD128-NEXT: i32.add $push33=, $0, $pop32 +; NO-SIMD128-NEXT: i32.add $push34=, $11, $27 +; NO-SIMD128-NEXT: i32.const $push120=, 1 +; NO-SIMD128-NEXT: i32.add $push35=, $pop34, $pop120 +; NO-SIMD128-NEXT: i32.const $push119=, 254 +; NO-SIMD128-NEXT: i32.and $push36=, $pop35, $pop119 +; NO-SIMD128-NEXT: i32.const $push118=, 1 +; NO-SIMD128-NEXT: i32.shr_u $push37=, $pop36, $pop118 +; NO-SIMD128-NEXT: i32.store8 0($pop33), $pop37 +; NO-SIMD128-NEXT: i32.const $push38=, 9 +; NO-SIMD128-NEXT: i32.add $push39=, $0, $pop38 +; NO-SIMD128-NEXT: i32.add $push40=, $10, $26 +; NO-SIMD128-NEXT: i32.const $push117=, 1 +; NO-SIMD128-NEXT: i32.add $push41=, $pop40, $pop117 +; NO-SIMD128-NEXT: i32.const $push116=, 254 +; NO-SIMD128-NEXT: i32.and $push42=, $pop41, $pop116 +; NO-SIMD128-NEXT: i32.const $push115=, 1 +; NO-SIMD128-NEXT: i32.shr_u $push43=, $pop42, $pop115 +; NO-SIMD128-NEXT: i32.store8 0($pop39), $pop43 +; NO-SIMD128-NEXT: i32.add $push44=, $9, $25 +; NO-SIMD128-NEXT: i32.const $push114=, 1 +; NO-SIMD128-NEXT: i32.add $push45=, $pop44, $pop114 +; NO-SIMD128-NEXT: i32.const $push113=, 254 +; NO-SIMD128-NEXT: i32.and $push46=, $pop45, $pop113 +; NO-SIMD128-NEXT: i32.const $push112=, 1 +; NO-SIMD128-NEXT: i32.shr_u $push47=, $pop46, $pop112 +; NO-SIMD128-NEXT: i32.store8 8($0), $pop47 +; NO-SIMD128-NEXT: i32.const $push48=, 7 +; NO-SIMD128-NEXT: i32.add $push49=, $0, $pop48 +; NO-SIMD128-NEXT: i32.add $push50=, $8, $24 +; NO-SIMD128-NEXT: i32.const $push111=, 1 +; NO-SIMD128-NEXT: i32.add $push51=, $pop50, $pop111 +; NO-SIMD128-NEXT: i32.const $push110=, 254 +; NO-SIMD128-NEXT: i32.and $push52=, $pop51, $pop110 +; NO-SIMD128-NEXT: i32.const $push109=, 1 +; NO-SIMD128-NEXT: i32.shr_u $push53=, $pop52, $pop109 +; NO-SIMD128-NEXT: i32.store8 0($pop49), $pop53 +; NO-SIMD128-NEXT: i32.const $push54=, 6 +; NO-SIMD128-NEXT: i32.add $push55=, $0, $pop54 +; NO-SIMD128-NEXT: i32.add $push56=, $7, $23 +; NO-SIMD128-NEXT: i32.const $push108=, 1 +; NO-SIMD128-NEXT: i32.add $push57=, $pop56, $pop108 +; NO-SIMD128-NEXT: i32.const $push107=, 254 +; NO-SIMD128-NEXT: i32.and $push58=, $pop57, $pop107 +; NO-SIMD128-NEXT: i32.const $push106=, 1 +; NO-SIMD128-NEXT: i32.shr_u $push59=, $pop58, $pop106 +; NO-SIMD128-NEXT: i32.store8 0($pop55), $pop59 +; NO-SIMD128-NEXT: i32.const $push60=, 5 +; NO-SIMD128-NEXT: i32.add $push61=, $0, $pop60 +; NO-SIMD128-NEXT: i32.add $push62=, $6, $22 +; NO-SIMD128-NEXT: i32.const $push105=, 1 +; NO-SIMD128-NEXT: i32.add $push63=, $pop62, $pop105 +; NO-SIMD128-NEXT: i32.const $push104=, 254 +; NO-SIMD128-NEXT: i32.and $push64=, $pop63, $pop104 +; NO-SIMD128-NEXT: i32.const $push103=, 1 +; NO-SIMD128-NEXT: i32.shr_u $push65=, $pop64, $pop103 +; NO-SIMD128-NEXT: i32.store8 0($pop61), $pop65 +; NO-SIMD128-NEXT: i32.add $push66=, $5, $21 +; NO-SIMD128-NEXT: i32.const $push102=, 1 +; NO-SIMD128-NEXT: i32.add $push67=, $pop66, $pop102 +; NO-SIMD128-NEXT: i32.const $push101=, 254 +; NO-SIMD128-NEXT: i32.and $push68=, $pop67, $pop101 +; NO-SIMD128-NEXT: i32.const $push100=, 1 +; NO-SIMD128-NEXT: i32.shr_u $push69=, $pop68, $pop100 +; NO-SIMD128-NEXT: i32.store8 4($0), $pop69 +; NO-SIMD128-NEXT: i32.const $push70=, 3 +; NO-SIMD128-NEXT: i32.add $push71=, $0, $pop70 +; NO-SIMD128-NEXT: i32.add $push72=, $4, $20 +; NO-SIMD128-NEXT: i32.const $push99=, 1 +; NO-SIMD128-NEXT: i32.add $push73=, $pop72, $pop99 +; NO-SIMD128-NEXT: i32.const $push98=, 254 +; NO-SIMD128-NEXT: i32.and $push74=, $pop73, $pop98 +; NO-SIMD128-NEXT: i32.const $push97=, 1 +; NO-SIMD128-NEXT: i32.shr_u $push75=, $pop74, $pop97 +; NO-SIMD128-NEXT: i32.store8 0($pop71), $pop75 +; NO-SIMD128-NEXT: i32.add $push76=, $3, $19 +; NO-SIMD128-NEXT: i32.const $push96=, 1 +; NO-SIMD128-NEXT: i32.add $push77=, $pop76, $pop96 +; NO-SIMD128-NEXT: i32.const $push95=, 254 +; NO-SIMD128-NEXT: i32.and $push78=, $pop77, $pop95 +; NO-SIMD128-NEXT: i32.const $push94=, 1 +; NO-SIMD128-NEXT: i32.shr_u $push79=, $pop78, $pop94 +; NO-SIMD128-NEXT: i32.store8 2($0), $pop79 +; NO-SIMD128-NEXT: i32.add $push80=, $2, $18 +; NO-SIMD128-NEXT: i32.const $push93=, 1 +; NO-SIMD128-NEXT: i32.add $push81=, $pop80, $pop93 +; NO-SIMD128-NEXT: i32.const $push92=, 254 +; NO-SIMD128-NEXT: i32.and $push82=, $pop81, $pop92 +; NO-SIMD128-NEXT: i32.const $push91=, 1 +; NO-SIMD128-NEXT: i32.shr_u $push83=, $pop82, $pop91 +; NO-SIMD128-NEXT: i32.store8 1($0), $pop83 +; NO-SIMD128-NEXT: i32.add $push84=, $1, $17 +; NO-SIMD128-NEXT: i32.const $push90=, 1 +; NO-SIMD128-NEXT: i32.add $push85=, $pop84, $pop90 +; NO-SIMD128-NEXT: i32.const $push89=, 254 +; NO-SIMD128-NEXT: i32.and $push86=, $pop85, $pop89 +; NO-SIMD128-NEXT: i32.const $push88=, 1 +; NO-SIMD128-NEXT: i32.shr_u $push87=, $pop86, $pop88 +; NO-SIMD128-NEXT: i32.store8 0($0), $pop87 +; NO-SIMD128-NEXT: return +; +; NO-SIMD128-FAST-LABEL: avgr_u_v16i8_wrap: +; NO-SIMD128-FAST: .functype avgr_u_v16i8_wrap (i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32) -> () +; NO-SIMD128-FAST-NEXT: # %bb.0: +; NO-SIMD128-FAST-NEXT: i32.add $push0=, $1, $17 +; NO-SIMD128-FAST-NEXT: i32.const $push1=, 1 +; NO-SIMD128-FAST-NEXT: i32.add $push2=, $pop0, $pop1 +; NO-SIMD128-FAST-NEXT: i32.const $push3=, 254 +; NO-SIMD128-FAST-NEXT: i32.and $push4=, $pop2, $pop3 +; NO-SIMD128-FAST-NEXT: i32.const $push133=, 1 +; NO-SIMD128-FAST-NEXT: i32.shr_u $push5=, $pop4, $pop133 +; NO-SIMD128-FAST-NEXT: i32.store8 0($0), $pop5 +; NO-SIMD128-FAST-NEXT: i32.add $push6=, $2, $18 +; NO-SIMD128-FAST-NEXT: i32.const $push132=, 1 +; NO-SIMD128-FAST-NEXT: i32.add $push7=, $pop6, $pop132 +; NO-SIMD128-FAST-NEXT: i32.const $push131=, 254 +; NO-SIMD128-FAST-NEXT: i32.and $push8=, $pop7, $pop131 +; NO-SIMD128-FAST-NEXT: i32.const $push130=, 1 +; NO-SIMD128-FAST-NEXT: i32.shr_u $push9=, $pop8, $pop130 +; NO-SIMD128-FAST-NEXT: i32.store8 1($0), $pop9 +; NO-SIMD128-FAST-NEXT: i32.add $push10=, $3, $19 +; NO-SIMD128-FAST-NEXT: i32.const $push129=, 1 +; NO-SIMD128-FAST-NEXT: i32.add $push11=, $pop10, $pop129 +; NO-SIMD128-FAST-NEXT: i32.const $push128=, 254 +; NO-SIMD128-FAST-NEXT: i32.and $push12=, $pop11, $pop128 +; NO-SIMD128-FAST-NEXT: i32.const $push127=, 1 +; NO-SIMD128-FAST-NEXT: i32.shr_u $push13=, $pop12, $pop127 +; NO-SIMD128-FAST-NEXT: i32.store8 2($0), $pop13 +; NO-SIMD128-FAST-NEXT: i32.const $push14=, 3 +; NO-SIMD128-FAST-NEXT: i32.add $push15=, $0, $pop14 +; NO-SIMD128-FAST-NEXT: i32.add $push16=, $4, $20 +; NO-SIMD128-FAST-NEXT: i32.const $push126=, 1 +; NO-SIMD128-FAST-NEXT: i32.add $push17=, $pop16, $pop126 +; NO-SIMD128-FAST-NEXT: i32.const $push125=, 254 +; NO-SIMD128-FAST-NEXT: i32.and $push18=, $pop17, $pop125 +; NO-SIMD128-FAST-NEXT: i32.const $push124=, 1 +; NO-SIMD128-FAST-NEXT: i32.shr_u $push19=, $pop18, $pop124 +; NO-SIMD128-FAST-NEXT: i32.store8 0($pop15), $pop19 +; NO-SIMD128-FAST-NEXT: i32.add $push20=, $5, $21 +; NO-SIMD128-FAST-NEXT: i32.const $push123=, 1 +; NO-SIMD128-FAST-NEXT: i32.add $push21=, $pop20, $pop123 +; NO-SIMD128-FAST-NEXT: i32.const $push122=, 254 +; NO-SIMD128-FAST-NEXT: i32.and $push22=, $pop21, $pop122 +; NO-SIMD128-FAST-NEXT: i32.const $push121=, 1 +; NO-SIMD128-FAST-NEXT: i32.shr_u $push23=, $pop22, $pop121 +; NO-SIMD128-FAST-NEXT: i32.store8 4($0), $pop23 +; NO-SIMD128-FAST-NEXT: i32.const $push24=, 5 +; NO-SIMD128-FAST-NEXT: i32.add $push25=, $0, $pop24 +; NO-SIMD128-FAST-NEXT: i32.add $push26=, $6, $22 +; NO-SIMD128-FAST-NEXT: i32.const $push120=, 1 +; NO-SIMD128-FAST-NEXT: i32.add $push27=, $pop26, $pop120 +; NO-SIMD128-FAST-NEXT: i32.const $push119=, 254 +; NO-SIMD128-FAST-NEXT: i32.and $push28=, $pop27, $pop119 +; NO-SIMD128-FAST-NEXT: i32.const $push118=, 1 +; NO-SIMD128-FAST-NEXT: i32.shr_u $push29=, $pop28, $pop118 +; NO-SIMD128-FAST-NEXT: i32.store8 0($pop25), $pop29 +; NO-SIMD128-FAST-NEXT: i32.const $push30=, 6 +; NO-SIMD128-FAST-NEXT: i32.add $push31=, $0, $pop30 +; NO-SIMD128-FAST-NEXT: i32.add $push32=, $7, $23 +; NO-SIMD128-FAST-NEXT: i32.const $push117=, 1 +; NO-SIMD128-FAST-NEXT: i32.add $push33=, $pop32, $pop117 +; NO-SIMD128-FAST-NEXT: i32.const $push116=, 254 +; NO-SIMD128-FAST-NEXT: i32.and $push34=, $pop33, $pop116 +; NO-SIMD128-FAST-NEXT: i32.const $push115=, 1 +; NO-SIMD128-FAST-NEXT: i32.shr_u $push35=, $pop34, $pop115 +; NO-SIMD128-FAST-NEXT: i32.store8 0($pop31), $pop35 +; NO-SIMD128-FAST-NEXT: i32.const $push36=, 7 +; NO-SIMD128-FAST-NEXT: i32.add $push37=, $0, $pop36 +; NO-SIMD128-FAST-NEXT: i32.add $push38=, $8, $24 +; NO-SIMD128-FAST-NEXT: i32.const $push114=, 1 +; NO-SIMD128-FAST-NEXT: i32.add $push39=, $pop38, $pop114 +; NO-SIMD128-FAST-NEXT: i32.const $push113=, 254 +; NO-SIMD128-FAST-NEXT: i32.and $push40=, $pop39, $pop113 +; NO-SIMD128-FAST-NEXT: i32.const $push112=, 1 +; NO-SIMD128-FAST-NEXT: i32.shr_u $push41=, $pop40, $pop112 +; NO-SIMD128-FAST-NEXT: i32.store8 0($pop37), $pop41 +; NO-SIMD128-FAST-NEXT: i32.add $push42=, $9, $25 +; NO-SIMD128-FAST-NEXT: i32.const $push111=, 1 +; NO-SIMD128-FAST-NEXT: i32.add $push43=, $pop42, $pop111 +; NO-SIMD128-FAST-NEXT: i32.const $push110=, 254 +; NO-SIMD128-FAST-NEXT: i32.and $push44=, $pop43, $pop110 +; NO-SIMD128-FAST-NEXT: i32.const $push109=, 1 +; NO-SIMD128-FAST-NEXT: i32.shr_u $push45=, $pop44, $pop109 +; NO-SIMD128-FAST-NEXT: i32.store8 8($0), $pop45 +; NO-SIMD128-FAST-NEXT: i32.const $push46=, 9 +; NO-SIMD128-FAST-NEXT: i32.add $push47=, $0, $pop46 +; NO-SIMD128-FAST-NEXT: i32.add $push48=, $10, $26 +; NO-SIMD128-FAST-NEXT: i32.const $push108=, 1 +; NO-SIMD128-FAST-NEXT: i32.add $push49=, $pop48, $pop108 +; NO-SIMD128-FAST-NEXT: i32.const $push107=, 254 +; NO-SIMD128-FAST-NEXT: i32.and $push50=, $pop49, $pop107 +; NO-SIMD128-FAST-NEXT: i32.const $push106=, 1 +; NO-SIMD128-FAST-NEXT: i32.shr_u $push51=, $pop50, $pop106 +; NO-SIMD128-FAST-NEXT: i32.store8 0($pop47), $pop51 +; NO-SIMD128-FAST-NEXT: i32.const $push52=, 10 +; NO-SIMD128-FAST-NEXT: i32.add $push53=, $0, $pop52 +; NO-SIMD128-FAST-NEXT: i32.add $push54=, $11, $27 +; NO-SIMD128-FAST-NEXT: i32.const $push105=, 1 +; NO-SIMD128-FAST-NEXT: i32.add $push55=, $pop54, $pop105 +; NO-SIMD128-FAST-NEXT: i32.const $push104=, 254 +; NO-SIMD128-FAST-NEXT: i32.and $push56=, $pop55, $pop104 +; NO-SIMD128-FAST-NEXT: i32.const $push103=, 1 +; NO-SIMD128-FAST-NEXT: i32.shr_u $push57=, $pop56, $pop103 +; NO-SIMD128-FAST-NEXT: i32.store8 0($pop53), $pop57 +; NO-SIMD128-FAST-NEXT: i32.const $push58=, 11 +; NO-SIMD128-FAST-NEXT: i32.add $push59=, $0, $pop58 +; NO-SIMD128-FAST-NEXT: i32.add $push60=, $12, $28 +; NO-SIMD128-FAST-NEXT: i32.const $push102=, 1 +; NO-SIMD128-FAST-NEXT: i32.add $push61=, $pop60, $pop102 +; NO-SIMD128-FAST-NEXT: i32.const $push101=, 254 +; NO-SIMD128-FAST-NEXT: i32.and $push62=, $pop61, $pop101 +; NO-SIMD128-FAST-NEXT: i32.const $push100=, 1 +; NO-SIMD128-FAST-NEXT: i32.shr_u $push63=, $pop62, $pop100 +; NO-SIMD128-FAST-NEXT: i32.store8 0($pop59), $pop63 +; NO-SIMD128-FAST-NEXT: i32.const $push64=, 12 +; NO-SIMD128-FAST-NEXT: i32.add $push65=, $0, $pop64 +; NO-SIMD128-FAST-NEXT: i32.add $push66=, $13, $29 +; NO-SIMD128-FAST-NEXT: i32.const $push99=, 1 +; NO-SIMD128-FAST-NEXT: i32.add $push67=, $pop66, $pop99 +; NO-SIMD128-FAST-NEXT: i32.const $push98=, 254 +; NO-SIMD128-FAST-NEXT: i32.and $push68=, $pop67, $pop98 +; NO-SIMD128-FAST-NEXT: i32.const $push97=, 1 +; NO-SIMD128-FAST-NEXT: i32.shr_u $push69=, $pop68, $pop97 +; NO-SIMD128-FAST-NEXT: i32.store8 0($pop65), $pop69 +; NO-SIMD128-FAST-NEXT: i32.const $push70=, 13 +; NO-SIMD128-FAST-NEXT: i32.add $push71=, $0, $pop70 +; NO-SIMD128-FAST-NEXT: i32.add $push72=, $14, $30 +; NO-SIMD128-FAST-NEXT: i32.const $push96=, 1 +; NO-SIMD128-FAST-NEXT: i32.add $push73=, $pop72, $pop96 +; NO-SIMD128-FAST-NEXT: i32.const $push95=, 254 +; NO-SIMD128-FAST-NEXT: i32.and $push74=, $pop73, $pop95 +; NO-SIMD128-FAST-NEXT: i32.const $push94=, 1 +; NO-SIMD128-FAST-NEXT: i32.shr_u $push75=, $pop74, $pop94 +; NO-SIMD128-FAST-NEXT: i32.store8 0($pop71), $pop75 +; NO-SIMD128-FAST-NEXT: i32.const $push76=, 14 +; NO-SIMD128-FAST-NEXT: i32.add $push77=, $0, $pop76 +; NO-SIMD128-FAST-NEXT: i32.add $push78=, $15, $31 +; NO-SIMD128-FAST-NEXT: i32.const $push93=, 1 +; NO-SIMD128-FAST-NEXT: i32.add $push79=, $pop78, $pop93 +; NO-SIMD128-FAST-NEXT: i32.const $push92=, 254 +; NO-SIMD128-FAST-NEXT: i32.and $push80=, $pop79, $pop92 +; NO-SIMD128-FAST-NEXT: i32.const $push91=, 1 +; NO-SIMD128-FAST-NEXT: i32.shr_u $push81=, $pop80, $pop91 +; NO-SIMD128-FAST-NEXT: i32.store8 0($pop77), $pop81 +; NO-SIMD128-FAST-NEXT: i32.const $push82=, 15 +; NO-SIMD128-FAST-NEXT: i32.add $push83=, $0, $pop82 +; NO-SIMD128-FAST-NEXT: i32.add $push84=, $16, $32 +; NO-SIMD128-FAST-NEXT: i32.const $push90=, 1 +; NO-SIMD128-FAST-NEXT: i32.add $push85=, $pop84, $pop90 +; NO-SIMD128-FAST-NEXT: i32.const $push89=, 254 +; NO-SIMD128-FAST-NEXT: i32.and $push86=, $pop85, $pop89 +; NO-SIMD128-FAST-NEXT: i32.const $push88=, 1 +; NO-SIMD128-FAST-NEXT: i32.shr_u $push87=, $pop86, $pop88 +; NO-SIMD128-FAST-NEXT: i32.store8 0($pop83), $pop87 +; NO-SIMD128-FAST-NEXT: return %a = add <16 x i8> %x, %y %b = add <16 x i8> %a, @@ -119,36 +2263,606 @@ define <16 x i8> @avgr_u_v16i8_wrap(<16 x i8> %x, <16 x i8> %y) { ret <16 x i8> %c } -; CHECK-LABEL: abs_v16i8: -; NO-SIMD128-NOT: i8x16 -; SIMD128-NEXT: .functype abs_v16i8 (v128) -> (v128){{$}} -; SIMD128-NEXT: i8x16.abs $push[[R:[0-9]+]]=, $0{{$}} -; SIMD128-NEXT: return $pop[[R]]{{$}} define <16 x i8> @abs_v16i8(<16 x i8> %x) { +; SIMD128-LABEL: abs_v16i8: +; SIMD128: .functype abs_v16i8 (v128) -> (v128) +; SIMD128-NEXT: # %bb.0: +; SIMD128-NEXT: i8x16.abs $push0=, $0 +; SIMD128-NEXT: return $pop0 +; +; SIMD128-FAST-LABEL: abs_v16i8: +; SIMD128-FAST: .functype abs_v16i8 (v128) -> (v128) +; SIMD128-FAST-NEXT: # %bb.0: +; SIMD128-FAST-NEXT: i8x16.abs $push0=, $0 +; SIMD128-FAST-NEXT: return $pop0 +; +; NO-SIMD128-LABEL: abs_v16i8: +; NO-SIMD128: .functype abs_v16i8 (i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32) -> () +; NO-SIMD128-NEXT: # %bb.0: +; NO-SIMD128-NEXT: i32.const $push4=, 15 +; NO-SIMD128-NEXT: i32.add $push5=, $0, $pop4 +; NO-SIMD128-NEXT: i32.extend8_s $push0=, $16 +; NO-SIMD128-NEXT: i32.const $push1=, 7 +; NO-SIMD128-NEXT: i32.shr_s $push117=, $pop0, $pop1 +; NO-SIMD128-NEXT: local.tee $push116=, $17=, $pop117 +; NO-SIMD128-NEXT: i32.xor $push2=, $16, $pop116 +; NO-SIMD128-NEXT: i32.sub $push3=, $pop2, $17 +; NO-SIMD128-NEXT: i32.store8 0($pop5), $pop3 +; NO-SIMD128-NEXT: i32.const $push9=, 14 +; NO-SIMD128-NEXT: i32.add $push10=, $0, $pop9 +; NO-SIMD128-NEXT: i32.extend8_s $push6=, $15 +; NO-SIMD128-NEXT: i32.const $push115=, 7 +; NO-SIMD128-NEXT: i32.shr_s $push114=, $pop6, $pop115 +; NO-SIMD128-NEXT: local.tee $push113=, $16=, $pop114 +; NO-SIMD128-NEXT: i32.xor $push7=, $15, $pop113 +; NO-SIMD128-NEXT: i32.sub $push8=, $pop7, $16 +; NO-SIMD128-NEXT: i32.store8 0($pop10), $pop8 +; NO-SIMD128-NEXT: i32.const $push14=, 13 +; NO-SIMD128-NEXT: i32.add $push15=, $0, $pop14 +; NO-SIMD128-NEXT: i32.extend8_s $push11=, $14 +; NO-SIMD128-NEXT: i32.const $push112=, 7 +; NO-SIMD128-NEXT: i32.shr_s $push111=, $pop11, $pop112 +; NO-SIMD128-NEXT: local.tee $push110=, $16=, $pop111 +; NO-SIMD128-NEXT: i32.xor $push12=, $14, $pop110 +; NO-SIMD128-NEXT: i32.sub $push13=, $pop12, $16 +; NO-SIMD128-NEXT: i32.store8 0($pop15), $pop13 +; NO-SIMD128-NEXT: i32.const $push19=, 12 +; NO-SIMD128-NEXT: i32.add $push20=, $0, $pop19 +; NO-SIMD128-NEXT: i32.extend8_s $push16=, $13 +; NO-SIMD128-NEXT: i32.const $push109=, 7 +; NO-SIMD128-NEXT: i32.shr_s $push108=, $pop16, $pop109 +; NO-SIMD128-NEXT: local.tee $push107=, $16=, $pop108 +; NO-SIMD128-NEXT: i32.xor $push17=, $13, $pop107 +; NO-SIMD128-NEXT: i32.sub $push18=, $pop17, $16 +; NO-SIMD128-NEXT: i32.store8 0($pop20), $pop18 +; NO-SIMD128-NEXT: i32.const $push24=, 11 +; NO-SIMD128-NEXT: i32.add $push25=, $0, $pop24 +; NO-SIMD128-NEXT: i32.extend8_s $push21=, $12 +; NO-SIMD128-NEXT: i32.const $push106=, 7 +; NO-SIMD128-NEXT: i32.shr_s $push105=, $pop21, $pop106 +; NO-SIMD128-NEXT: local.tee $push104=, $16=, $pop105 +; NO-SIMD128-NEXT: i32.xor $push22=, $12, $pop104 +; NO-SIMD128-NEXT: i32.sub $push23=, $pop22, $16 +; NO-SIMD128-NEXT: i32.store8 0($pop25), $pop23 +; NO-SIMD128-NEXT: i32.const $push29=, 10 +; NO-SIMD128-NEXT: i32.add $push30=, $0, $pop29 +; NO-SIMD128-NEXT: i32.extend8_s $push26=, $11 +; NO-SIMD128-NEXT: i32.const $push103=, 7 +; NO-SIMD128-NEXT: i32.shr_s $push102=, $pop26, $pop103 +; NO-SIMD128-NEXT: local.tee $push101=, $16=, $pop102 +; NO-SIMD128-NEXT: i32.xor $push27=, $11, $pop101 +; NO-SIMD128-NEXT: i32.sub $push28=, $pop27, $16 +; NO-SIMD128-NEXT: i32.store8 0($pop30), $pop28 +; NO-SIMD128-NEXT: i32.const $push34=, 9 +; NO-SIMD128-NEXT: i32.add $push35=, $0, $pop34 +; NO-SIMD128-NEXT: i32.extend8_s $push31=, $10 +; NO-SIMD128-NEXT: i32.const $push100=, 7 +; NO-SIMD128-NEXT: i32.shr_s $push99=, $pop31, $pop100 +; NO-SIMD128-NEXT: local.tee $push98=, $16=, $pop99 +; NO-SIMD128-NEXT: i32.xor $push32=, $10, $pop98 +; NO-SIMD128-NEXT: i32.sub $push33=, $pop32, $16 +; NO-SIMD128-NEXT: i32.store8 0($pop35), $pop33 +; NO-SIMD128-NEXT: i32.extend8_s $push36=, $9 +; NO-SIMD128-NEXT: i32.const $push97=, 7 +; NO-SIMD128-NEXT: i32.shr_s $push96=, $pop36, $pop97 +; NO-SIMD128-NEXT: local.tee $push95=, $16=, $pop96 +; NO-SIMD128-NEXT: i32.xor $push37=, $9, $pop95 +; NO-SIMD128-NEXT: i32.sub $push38=, $pop37, $16 +; NO-SIMD128-NEXT: i32.store8 8($0), $pop38 +; NO-SIMD128-NEXT: i32.const $push94=, 7 +; NO-SIMD128-NEXT: i32.add $push42=, $0, $pop94 +; NO-SIMD128-NEXT: i32.extend8_s $push39=, $8 +; NO-SIMD128-NEXT: i32.const $push93=, 7 +; NO-SIMD128-NEXT: i32.shr_s $push92=, $pop39, $pop93 +; NO-SIMD128-NEXT: local.tee $push91=, $16=, $pop92 +; NO-SIMD128-NEXT: i32.xor $push40=, $8, $pop91 +; NO-SIMD128-NEXT: i32.sub $push41=, $pop40, $16 +; NO-SIMD128-NEXT: i32.store8 0($pop42), $pop41 +; NO-SIMD128-NEXT: i32.const $push46=, 6 +; NO-SIMD128-NEXT: i32.add $push47=, $0, $pop46 +; NO-SIMD128-NEXT: i32.extend8_s $push43=, $7 +; NO-SIMD128-NEXT: i32.const $push90=, 7 +; NO-SIMD128-NEXT: i32.shr_s $push89=, $pop43, $pop90 +; NO-SIMD128-NEXT: local.tee $push88=, $16=, $pop89 +; NO-SIMD128-NEXT: i32.xor $push44=, $7, $pop88 +; NO-SIMD128-NEXT: i32.sub $push45=, $pop44, $16 +; NO-SIMD128-NEXT: i32.store8 0($pop47), $pop45 +; NO-SIMD128-NEXT: i32.const $push51=, 5 +; NO-SIMD128-NEXT: i32.add $push52=, $0, $pop51 +; NO-SIMD128-NEXT: i32.extend8_s $push48=, $6 +; NO-SIMD128-NEXT: i32.const $push87=, 7 +; NO-SIMD128-NEXT: i32.shr_s $push86=, $pop48, $pop87 +; NO-SIMD128-NEXT: local.tee $push85=, $16=, $pop86 +; NO-SIMD128-NEXT: i32.xor $push49=, $6, $pop85 +; NO-SIMD128-NEXT: i32.sub $push50=, $pop49, $16 +; NO-SIMD128-NEXT: i32.store8 0($pop52), $pop50 +; NO-SIMD128-NEXT: i32.extend8_s $push53=, $5 +; NO-SIMD128-NEXT: i32.const $push84=, 7 +; NO-SIMD128-NEXT: i32.shr_s $push83=, $pop53, $pop84 +; NO-SIMD128-NEXT: local.tee $push82=, $16=, $pop83 +; NO-SIMD128-NEXT: i32.xor $push54=, $5, $pop82 +; NO-SIMD128-NEXT: i32.sub $push55=, $pop54, $16 +; NO-SIMD128-NEXT: i32.store8 4($0), $pop55 +; NO-SIMD128-NEXT: i32.const $push59=, 3 +; NO-SIMD128-NEXT: i32.add $push60=, $0, $pop59 +; NO-SIMD128-NEXT: i32.extend8_s $push56=, $4 +; NO-SIMD128-NEXT: i32.const $push81=, 7 +; NO-SIMD128-NEXT: i32.shr_s $push80=, $pop56, $pop81 +; NO-SIMD128-NEXT: local.tee $push79=, $16=, $pop80 +; NO-SIMD128-NEXT: i32.xor $push57=, $4, $pop79 +; NO-SIMD128-NEXT: i32.sub $push58=, $pop57, $16 +; NO-SIMD128-NEXT: i32.store8 0($pop60), $pop58 +; NO-SIMD128-NEXT: i32.extend8_s $push61=, $3 +; NO-SIMD128-NEXT: i32.const $push78=, 7 +; NO-SIMD128-NEXT: i32.shr_s $push77=, $pop61, $pop78 +; NO-SIMD128-NEXT: local.tee $push76=, $16=, $pop77 +; NO-SIMD128-NEXT: i32.xor $push62=, $3, $pop76 +; NO-SIMD128-NEXT: i32.sub $push63=, $pop62, $16 +; NO-SIMD128-NEXT: i32.store8 2($0), $pop63 +; NO-SIMD128-NEXT: i32.extend8_s $push64=, $2 +; NO-SIMD128-NEXT: i32.const $push75=, 7 +; NO-SIMD128-NEXT: i32.shr_s $push74=, $pop64, $pop75 +; NO-SIMD128-NEXT: local.tee $push73=, $16=, $pop74 +; NO-SIMD128-NEXT: i32.xor $push65=, $2, $pop73 +; NO-SIMD128-NEXT: i32.sub $push66=, $pop65, $16 +; NO-SIMD128-NEXT: i32.store8 1($0), $pop66 +; NO-SIMD128-NEXT: i32.extend8_s $push67=, $1 +; NO-SIMD128-NEXT: i32.const $push72=, 7 +; NO-SIMD128-NEXT: i32.shr_s $push71=, $pop67, $pop72 +; NO-SIMD128-NEXT: local.tee $push70=, $16=, $pop71 +; NO-SIMD128-NEXT: i32.xor $push68=, $1, $pop70 +; NO-SIMD128-NEXT: i32.sub $push69=, $pop68, $16 +; NO-SIMD128-NEXT: i32.store8 0($0), $pop69 +; NO-SIMD128-NEXT: return +; +; NO-SIMD128-FAST-LABEL: abs_v16i8: +; NO-SIMD128-FAST: .functype abs_v16i8 (i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32) -> () +; NO-SIMD128-FAST-NEXT: # %bb.0: +; NO-SIMD128-FAST-NEXT: i32.extend8_s $push0=, $1 +; NO-SIMD128-FAST-NEXT: i32.const $push1=, 7 +; NO-SIMD128-FAST-NEXT: i32.shr_s $push117=, $pop0, $pop1 +; NO-SIMD128-FAST-NEXT: local.tee $push116=, $17=, $pop117 +; NO-SIMD128-FAST-NEXT: i32.xor $push2=, $1, $pop116 +; NO-SIMD128-FAST-NEXT: i32.sub $push3=, $pop2, $17 +; NO-SIMD128-FAST-NEXT: i32.store8 0($0), $pop3 +; NO-SIMD128-FAST-NEXT: i32.extend8_s $push4=, $2 +; NO-SIMD128-FAST-NEXT: i32.const $push115=, 7 +; NO-SIMD128-FAST-NEXT: i32.shr_s $push114=, $pop4, $pop115 +; NO-SIMD128-FAST-NEXT: local.tee $push113=, $1=, $pop114 +; NO-SIMD128-FAST-NEXT: i32.xor $push5=, $2, $pop113 +; NO-SIMD128-FAST-NEXT: i32.sub $push6=, $pop5, $1 +; NO-SIMD128-FAST-NEXT: i32.store8 1($0), $pop6 +; NO-SIMD128-FAST-NEXT: i32.extend8_s $push7=, $3 +; NO-SIMD128-FAST-NEXT: i32.const $push112=, 7 +; NO-SIMD128-FAST-NEXT: i32.shr_s $push111=, $pop7, $pop112 +; NO-SIMD128-FAST-NEXT: local.tee $push110=, $2=, $pop111 +; NO-SIMD128-FAST-NEXT: i32.xor $push8=, $3, $pop110 +; NO-SIMD128-FAST-NEXT: i32.sub $push9=, $pop8, $2 +; NO-SIMD128-FAST-NEXT: i32.store8 2($0), $pop9 +; NO-SIMD128-FAST-NEXT: i32.const $push13=, 3 +; NO-SIMD128-FAST-NEXT: i32.add $push14=, $0, $pop13 +; NO-SIMD128-FAST-NEXT: i32.extend8_s $push10=, $4 +; NO-SIMD128-FAST-NEXT: i32.const $push109=, 7 +; NO-SIMD128-FAST-NEXT: i32.shr_s $push108=, $pop10, $pop109 +; NO-SIMD128-FAST-NEXT: local.tee $push107=, $3=, $pop108 +; NO-SIMD128-FAST-NEXT: i32.xor $push11=, $4, $pop107 +; NO-SIMD128-FAST-NEXT: i32.sub $push12=, $pop11, $3 +; NO-SIMD128-FAST-NEXT: i32.store8 0($pop14), $pop12 +; NO-SIMD128-FAST-NEXT: i32.extend8_s $push15=, $5 +; NO-SIMD128-FAST-NEXT: i32.const $push106=, 7 +; NO-SIMD128-FAST-NEXT: i32.shr_s $push105=, $pop15, $pop106 +; NO-SIMD128-FAST-NEXT: local.tee $push104=, $4=, $pop105 +; NO-SIMD128-FAST-NEXT: i32.xor $push16=, $5, $pop104 +; NO-SIMD128-FAST-NEXT: i32.sub $push17=, $pop16, $4 +; NO-SIMD128-FAST-NEXT: i32.store8 4($0), $pop17 +; NO-SIMD128-FAST-NEXT: i32.const $push21=, 5 +; NO-SIMD128-FAST-NEXT: i32.add $push22=, $0, $pop21 +; NO-SIMD128-FAST-NEXT: i32.extend8_s $push18=, $6 +; NO-SIMD128-FAST-NEXT: i32.const $push103=, 7 +; NO-SIMD128-FAST-NEXT: i32.shr_s $push102=, $pop18, $pop103 +; NO-SIMD128-FAST-NEXT: local.tee $push101=, $5=, $pop102 +; NO-SIMD128-FAST-NEXT: i32.xor $push19=, $6, $pop101 +; NO-SIMD128-FAST-NEXT: i32.sub $push20=, $pop19, $5 +; NO-SIMD128-FAST-NEXT: i32.store8 0($pop22), $pop20 +; NO-SIMD128-FAST-NEXT: i32.const $push26=, 6 +; NO-SIMD128-FAST-NEXT: i32.add $push27=, $0, $pop26 +; NO-SIMD128-FAST-NEXT: i32.extend8_s $push23=, $7 +; NO-SIMD128-FAST-NEXT: i32.const $push100=, 7 +; NO-SIMD128-FAST-NEXT: i32.shr_s $push99=, $pop23, $pop100 +; NO-SIMD128-FAST-NEXT: local.tee $push98=, $6=, $pop99 +; NO-SIMD128-FAST-NEXT: i32.xor $push24=, $7, $pop98 +; NO-SIMD128-FAST-NEXT: i32.sub $push25=, $pop24, $6 +; NO-SIMD128-FAST-NEXT: i32.store8 0($pop27), $pop25 +; NO-SIMD128-FAST-NEXT: i32.const $push97=, 7 +; NO-SIMD128-FAST-NEXT: i32.add $push31=, $0, $pop97 +; NO-SIMD128-FAST-NEXT: i32.extend8_s $push28=, $8 +; NO-SIMD128-FAST-NEXT: i32.const $push96=, 7 +; NO-SIMD128-FAST-NEXT: i32.shr_s $push95=, $pop28, $pop96 +; NO-SIMD128-FAST-NEXT: local.tee $push94=, $7=, $pop95 +; NO-SIMD128-FAST-NEXT: i32.xor $push29=, $8, $pop94 +; NO-SIMD128-FAST-NEXT: i32.sub $push30=, $pop29, $7 +; NO-SIMD128-FAST-NEXT: i32.store8 0($pop31), $pop30 +; NO-SIMD128-FAST-NEXT: i32.extend8_s $push32=, $9 +; NO-SIMD128-FAST-NEXT: i32.const $push93=, 7 +; NO-SIMD128-FAST-NEXT: i32.shr_s $push92=, $pop32, $pop93 +; NO-SIMD128-FAST-NEXT: local.tee $push91=, $8=, $pop92 +; NO-SIMD128-FAST-NEXT: i32.xor $push33=, $9, $pop91 +; NO-SIMD128-FAST-NEXT: i32.sub $push34=, $pop33, $8 +; NO-SIMD128-FAST-NEXT: i32.store8 8($0), $pop34 +; NO-SIMD128-FAST-NEXT: i32.const $push38=, 9 +; NO-SIMD128-FAST-NEXT: i32.add $push39=, $0, $pop38 +; NO-SIMD128-FAST-NEXT: i32.extend8_s $push35=, $10 +; NO-SIMD128-FAST-NEXT: i32.const $push90=, 7 +; NO-SIMD128-FAST-NEXT: i32.shr_s $push89=, $pop35, $pop90 +; NO-SIMD128-FAST-NEXT: local.tee $push88=, $9=, $pop89 +; NO-SIMD128-FAST-NEXT: i32.xor $push36=, $10, $pop88 +; NO-SIMD128-FAST-NEXT: i32.sub $push37=, $pop36, $9 +; NO-SIMD128-FAST-NEXT: i32.store8 0($pop39), $pop37 +; NO-SIMD128-FAST-NEXT: i32.const $push43=, 10 +; NO-SIMD128-FAST-NEXT: i32.add $push44=, $0, $pop43 +; NO-SIMD128-FAST-NEXT: i32.extend8_s $push40=, $11 +; NO-SIMD128-FAST-NEXT: i32.const $push87=, 7 +; NO-SIMD128-FAST-NEXT: i32.shr_s $push86=, $pop40, $pop87 +; NO-SIMD128-FAST-NEXT: local.tee $push85=, $10=, $pop86 +; NO-SIMD128-FAST-NEXT: i32.xor $push41=, $11, $pop85 +; NO-SIMD128-FAST-NEXT: i32.sub $push42=, $pop41, $10 +; NO-SIMD128-FAST-NEXT: i32.store8 0($pop44), $pop42 +; NO-SIMD128-FAST-NEXT: i32.const $push48=, 11 +; NO-SIMD128-FAST-NEXT: i32.add $push49=, $0, $pop48 +; NO-SIMD128-FAST-NEXT: i32.extend8_s $push45=, $12 +; NO-SIMD128-FAST-NEXT: i32.const $push84=, 7 +; NO-SIMD128-FAST-NEXT: i32.shr_s $push83=, $pop45, $pop84 +; NO-SIMD128-FAST-NEXT: local.tee $push82=, $11=, $pop83 +; NO-SIMD128-FAST-NEXT: i32.xor $push46=, $12, $pop82 +; NO-SIMD128-FAST-NEXT: i32.sub $push47=, $pop46, $11 +; NO-SIMD128-FAST-NEXT: i32.store8 0($pop49), $pop47 +; NO-SIMD128-FAST-NEXT: i32.const $push53=, 12 +; NO-SIMD128-FAST-NEXT: i32.add $push54=, $0, $pop53 +; NO-SIMD128-FAST-NEXT: i32.extend8_s $push50=, $13 +; NO-SIMD128-FAST-NEXT: i32.const $push81=, 7 +; NO-SIMD128-FAST-NEXT: i32.shr_s $push80=, $pop50, $pop81 +; NO-SIMD128-FAST-NEXT: local.tee $push79=, $12=, $pop80 +; NO-SIMD128-FAST-NEXT: i32.xor $push51=, $13, $pop79 +; NO-SIMD128-FAST-NEXT: i32.sub $push52=, $pop51, $12 +; NO-SIMD128-FAST-NEXT: i32.store8 0($pop54), $pop52 +; NO-SIMD128-FAST-NEXT: i32.const $push58=, 13 +; NO-SIMD128-FAST-NEXT: i32.add $push59=, $0, $pop58 +; NO-SIMD128-FAST-NEXT: i32.extend8_s $push55=, $14 +; NO-SIMD128-FAST-NEXT: i32.const $push78=, 7 +; NO-SIMD128-FAST-NEXT: i32.shr_s $push77=, $pop55, $pop78 +; NO-SIMD128-FAST-NEXT: local.tee $push76=, $13=, $pop77 +; NO-SIMD128-FAST-NEXT: i32.xor $push56=, $14, $pop76 +; NO-SIMD128-FAST-NEXT: i32.sub $push57=, $pop56, $13 +; NO-SIMD128-FAST-NEXT: i32.store8 0($pop59), $pop57 +; NO-SIMD128-FAST-NEXT: i32.const $push63=, 14 +; NO-SIMD128-FAST-NEXT: i32.add $push64=, $0, $pop63 +; NO-SIMD128-FAST-NEXT: i32.extend8_s $push60=, $15 +; NO-SIMD128-FAST-NEXT: i32.const $push75=, 7 +; NO-SIMD128-FAST-NEXT: i32.shr_s $push74=, $pop60, $pop75 +; NO-SIMD128-FAST-NEXT: local.tee $push73=, $14=, $pop74 +; NO-SIMD128-FAST-NEXT: i32.xor $push61=, $15, $pop73 +; NO-SIMD128-FAST-NEXT: i32.sub $push62=, $pop61, $14 +; NO-SIMD128-FAST-NEXT: i32.store8 0($pop64), $pop62 +; NO-SIMD128-FAST-NEXT: i32.const $push68=, 15 +; NO-SIMD128-FAST-NEXT: i32.add $push69=, $0, $pop68 +; NO-SIMD128-FAST-NEXT: i32.extend8_s $push65=, $16 +; NO-SIMD128-FAST-NEXT: i32.const $push72=, 7 +; NO-SIMD128-FAST-NEXT: i32.shr_s $push71=, $pop65, $pop72 +; NO-SIMD128-FAST-NEXT: local.tee $push70=, $0=, $pop71 +; NO-SIMD128-FAST-NEXT: i32.xor $push66=, $16, $pop70 +; NO-SIMD128-FAST-NEXT: i32.sub $push67=, $pop66, $0 +; NO-SIMD128-FAST-NEXT: i32.store8 0($pop69), $pop67 +; NO-SIMD128-FAST-NEXT: return %a = sub <16 x i8> zeroinitializer, %x %b = icmp slt <16 x i8> %x, zeroinitializer %c = select <16 x i1> %b, <16 x i8> %a, <16 x i8> %x ret <16 x i8> %c } -; CHECK-LABEL: neg_v16i8: -; NO-SIMD128-NOT: i8x16 -; SIMD128-NEXT: .functype neg_v16i8 (v128) -> (v128){{$}} -; SIMD128-NEXT: i8x16.neg $push[[R:[0-9]+]]=, $0{{$}} -; SIMD128-NEXT: return $pop[[R]]{{$}} define <16 x i8> @neg_v16i8(<16 x i8> %x) { +; SIMD128-LABEL: neg_v16i8: +; SIMD128: .functype neg_v16i8 (v128) -> (v128) +; SIMD128-NEXT: # %bb.0: +; SIMD128-NEXT: i8x16.neg $push0=, $0 +; SIMD128-NEXT: return $pop0 +; +; SIMD128-FAST-LABEL: neg_v16i8: +; SIMD128-FAST: .functype neg_v16i8 (v128) -> (v128) +; SIMD128-FAST-NEXT: # %bb.0: +; SIMD128-FAST-NEXT: i8x16.neg $push0=, $0 +; SIMD128-FAST-NEXT: return $pop0 +; +; NO-SIMD128-LABEL: neg_v16i8: +; NO-SIMD128: .functype neg_v16i8 (i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32) -> () +; NO-SIMD128-NEXT: # %bb.0: +; NO-SIMD128-NEXT: i32.const $push0=, 0 +; NO-SIMD128-NEXT: i32.sub $push1=, $pop0, $9 +; NO-SIMD128-NEXT: i32.store8 8($0), $pop1 +; NO-SIMD128-NEXT: i32.const $push53=, 0 +; NO-SIMD128-NEXT: i32.sub $push2=, $pop53, $5 +; NO-SIMD128-NEXT: i32.store8 4($0), $pop2 +; NO-SIMD128-NEXT: i32.const $push52=, 0 +; NO-SIMD128-NEXT: i32.sub $push3=, $pop52, $3 +; NO-SIMD128-NEXT: i32.store8 2($0), $pop3 +; NO-SIMD128-NEXT: i32.const $push51=, 0 +; NO-SIMD128-NEXT: i32.sub $push4=, $pop51, $2 +; NO-SIMD128-NEXT: i32.store8 1($0), $pop4 +; NO-SIMD128-NEXT: i32.const $push50=, 0 +; NO-SIMD128-NEXT: i32.sub $push5=, $pop50, $1 +; NO-SIMD128-NEXT: i32.store8 0($0), $pop5 +; NO-SIMD128-NEXT: i32.const $push7=, 15 +; NO-SIMD128-NEXT: i32.add $push8=, $0, $pop7 +; NO-SIMD128-NEXT: i32.const $push49=, 0 +; NO-SIMD128-NEXT: i32.sub $push6=, $pop49, $16 +; NO-SIMD128-NEXT: i32.store8 0($pop8), $pop6 +; NO-SIMD128-NEXT: i32.const $push10=, 14 +; NO-SIMD128-NEXT: i32.add $push11=, $0, $pop10 +; NO-SIMD128-NEXT: i32.const $push48=, 0 +; NO-SIMD128-NEXT: i32.sub $push9=, $pop48, $15 +; NO-SIMD128-NEXT: i32.store8 0($pop11), $pop9 +; NO-SIMD128-NEXT: i32.const $push13=, 13 +; NO-SIMD128-NEXT: i32.add $push14=, $0, $pop13 +; NO-SIMD128-NEXT: i32.const $push47=, 0 +; NO-SIMD128-NEXT: i32.sub $push12=, $pop47, $14 +; NO-SIMD128-NEXT: i32.store8 0($pop14), $pop12 +; NO-SIMD128-NEXT: i32.const $push16=, 12 +; NO-SIMD128-NEXT: i32.add $push17=, $0, $pop16 +; NO-SIMD128-NEXT: i32.const $push46=, 0 +; NO-SIMD128-NEXT: i32.sub $push15=, $pop46, $13 +; NO-SIMD128-NEXT: i32.store8 0($pop17), $pop15 +; NO-SIMD128-NEXT: i32.const $push19=, 11 +; NO-SIMD128-NEXT: i32.add $push20=, $0, $pop19 +; NO-SIMD128-NEXT: i32.const $push45=, 0 +; NO-SIMD128-NEXT: i32.sub $push18=, $pop45, $12 +; NO-SIMD128-NEXT: i32.store8 0($pop20), $pop18 +; NO-SIMD128-NEXT: i32.const $push22=, 10 +; NO-SIMD128-NEXT: i32.add $push23=, $0, $pop22 +; NO-SIMD128-NEXT: i32.const $push44=, 0 +; NO-SIMD128-NEXT: i32.sub $push21=, $pop44, $11 +; NO-SIMD128-NEXT: i32.store8 0($pop23), $pop21 +; NO-SIMD128-NEXT: i32.const $push25=, 9 +; NO-SIMD128-NEXT: i32.add $push26=, $0, $pop25 +; NO-SIMD128-NEXT: i32.const $push43=, 0 +; NO-SIMD128-NEXT: i32.sub $push24=, $pop43, $10 +; NO-SIMD128-NEXT: i32.store8 0($pop26), $pop24 +; NO-SIMD128-NEXT: i32.const $push28=, 7 +; NO-SIMD128-NEXT: i32.add $push29=, $0, $pop28 +; NO-SIMD128-NEXT: i32.const $push42=, 0 +; NO-SIMD128-NEXT: i32.sub $push27=, $pop42, $8 +; NO-SIMD128-NEXT: i32.store8 0($pop29), $pop27 +; NO-SIMD128-NEXT: i32.const $push31=, 6 +; NO-SIMD128-NEXT: i32.add $push32=, $0, $pop31 +; NO-SIMD128-NEXT: i32.const $push41=, 0 +; NO-SIMD128-NEXT: i32.sub $push30=, $pop41, $7 +; NO-SIMD128-NEXT: i32.store8 0($pop32), $pop30 +; NO-SIMD128-NEXT: i32.const $push34=, 5 +; NO-SIMD128-NEXT: i32.add $push35=, $0, $pop34 +; NO-SIMD128-NEXT: i32.const $push40=, 0 +; NO-SIMD128-NEXT: i32.sub $push33=, $pop40, $6 +; NO-SIMD128-NEXT: i32.store8 0($pop35), $pop33 +; NO-SIMD128-NEXT: i32.const $push37=, 3 +; NO-SIMD128-NEXT: i32.add $push38=, $0, $pop37 +; NO-SIMD128-NEXT: i32.const $push39=, 0 +; NO-SIMD128-NEXT: i32.sub $push36=, $pop39, $4 +; NO-SIMD128-NEXT: i32.store8 0($pop38), $pop36 +; NO-SIMD128-NEXT: return +; +; NO-SIMD128-FAST-LABEL: neg_v16i8: +; NO-SIMD128-FAST: .functype neg_v16i8 (i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32) -> () +; NO-SIMD128-FAST-NEXT: # %bb.0: +; NO-SIMD128-FAST-NEXT: i32.const $push0=, 0 +; NO-SIMD128-FAST-NEXT: i32.sub $push1=, $pop0, $1 +; NO-SIMD128-FAST-NEXT: i32.store8 0($0), $pop1 +; NO-SIMD128-FAST-NEXT: i32.const $push53=, 0 +; NO-SIMD128-FAST-NEXT: i32.sub $push2=, $pop53, $2 +; NO-SIMD128-FAST-NEXT: i32.store8 1($0), $pop2 +; NO-SIMD128-FAST-NEXT: i32.const $push52=, 0 +; NO-SIMD128-FAST-NEXT: i32.sub $push3=, $pop52, $3 +; NO-SIMD128-FAST-NEXT: i32.store8 2($0), $pop3 +; NO-SIMD128-FAST-NEXT: i32.const $push4=, 3 +; NO-SIMD128-FAST-NEXT: i32.add $push5=, $0, $pop4 +; NO-SIMD128-FAST-NEXT: i32.const $push51=, 0 +; NO-SIMD128-FAST-NEXT: i32.sub $push6=, $pop51, $4 +; NO-SIMD128-FAST-NEXT: i32.store8 0($pop5), $pop6 +; NO-SIMD128-FAST-NEXT: i32.const $push50=, 0 +; NO-SIMD128-FAST-NEXT: i32.sub $push7=, $pop50, $5 +; NO-SIMD128-FAST-NEXT: i32.store8 4($0), $pop7 +; NO-SIMD128-FAST-NEXT: i32.const $push8=, 5 +; NO-SIMD128-FAST-NEXT: i32.add $push9=, $0, $pop8 +; NO-SIMD128-FAST-NEXT: i32.const $push49=, 0 +; NO-SIMD128-FAST-NEXT: i32.sub $push10=, $pop49, $6 +; NO-SIMD128-FAST-NEXT: i32.store8 0($pop9), $pop10 +; NO-SIMD128-FAST-NEXT: i32.const $push11=, 6 +; NO-SIMD128-FAST-NEXT: i32.add $push12=, $0, $pop11 +; NO-SIMD128-FAST-NEXT: i32.const $push48=, 0 +; NO-SIMD128-FAST-NEXT: i32.sub $push13=, $pop48, $7 +; NO-SIMD128-FAST-NEXT: i32.store8 0($pop12), $pop13 +; NO-SIMD128-FAST-NEXT: i32.const $push14=, 7 +; NO-SIMD128-FAST-NEXT: i32.add $push15=, $0, $pop14 +; NO-SIMD128-FAST-NEXT: i32.const $push47=, 0 +; NO-SIMD128-FAST-NEXT: i32.sub $push16=, $pop47, $8 +; NO-SIMD128-FAST-NEXT: i32.store8 0($pop15), $pop16 +; NO-SIMD128-FAST-NEXT: i32.const $push46=, 0 +; NO-SIMD128-FAST-NEXT: i32.sub $push17=, $pop46, $9 +; NO-SIMD128-FAST-NEXT: i32.store8 8($0), $pop17 +; NO-SIMD128-FAST-NEXT: i32.const $push18=, 9 +; NO-SIMD128-FAST-NEXT: i32.add $push19=, $0, $pop18 +; NO-SIMD128-FAST-NEXT: i32.const $push45=, 0 +; NO-SIMD128-FAST-NEXT: i32.sub $push20=, $pop45, $10 +; NO-SIMD128-FAST-NEXT: i32.store8 0($pop19), $pop20 +; NO-SIMD128-FAST-NEXT: i32.const $push21=, 10 +; NO-SIMD128-FAST-NEXT: i32.add $push22=, $0, $pop21 +; NO-SIMD128-FAST-NEXT: i32.const $push44=, 0 +; NO-SIMD128-FAST-NEXT: i32.sub $push23=, $pop44, $11 +; NO-SIMD128-FAST-NEXT: i32.store8 0($pop22), $pop23 +; NO-SIMD128-FAST-NEXT: i32.const $push24=, 11 +; NO-SIMD128-FAST-NEXT: i32.add $push25=, $0, $pop24 +; NO-SIMD128-FAST-NEXT: i32.const $push43=, 0 +; NO-SIMD128-FAST-NEXT: i32.sub $push26=, $pop43, $12 +; NO-SIMD128-FAST-NEXT: i32.store8 0($pop25), $pop26 +; NO-SIMD128-FAST-NEXT: i32.const $push27=, 12 +; NO-SIMD128-FAST-NEXT: i32.add $push28=, $0, $pop27 +; NO-SIMD128-FAST-NEXT: i32.const $push42=, 0 +; NO-SIMD128-FAST-NEXT: i32.sub $push29=, $pop42, $13 +; NO-SIMD128-FAST-NEXT: i32.store8 0($pop28), $pop29 +; NO-SIMD128-FAST-NEXT: i32.const $push30=, 13 +; NO-SIMD128-FAST-NEXT: i32.add $push31=, $0, $pop30 +; NO-SIMD128-FAST-NEXT: i32.const $push41=, 0 +; NO-SIMD128-FAST-NEXT: i32.sub $push32=, $pop41, $14 +; NO-SIMD128-FAST-NEXT: i32.store8 0($pop31), $pop32 +; NO-SIMD128-FAST-NEXT: i32.const $push33=, 14 +; NO-SIMD128-FAST-NEXT: i32.add $push34=, $0, $pop33 +; NO-SIMD128-FAST-NEXT: i32.const $push40=, 0 +; NO-SIMD128-FAST-NEXT: i32.sub $push35=, $pop40, $15 +; NO-SIMD128-FAST-NEXT: i32.store8 0($pop34), $pop35 +; NO-SIMD128-FAST-NEXT: i32.const $push36=, 15 +; NO-SIMD128-FAST-NEXT: i32.add $push37=, $0, $pop36 +; NO-SIMD128-FAST-NEXT: i32.const $push39=, 0 +; NO-SIMD128-FAST-NEXT: i32.sub $push38=, $pop39, $16 +; NO-SIMD128-FAST-NEXT: i32.store8 0($pop37), $pop38 +; NO-SIMD128-FAST-NEXT: return %a = sub <16 x i8> , %x ret <16 x i8> %a } -; CHECK-LABEL: shl_v16i8: -; NO-SIMD128-NOT: i8x16 -; SIMD128-NEXT: .functype shl_v16i8 (v128, i32) -> (v128){{$}} -; SIMD128-NEXT: i8x16.shl $push[[R:[0-9]+]]=, $0, $1{{$}} -; SIMD128-NEXT: return $pop[[R]]{{$}} define <16 x i8> @shl_v16i8(<16 x i8> %v, i8 %x) { +; SIMD128-LABEL: shl_v16i8: +; SIMD128: .functype shl_v16i8 (v128, i32) -> (v128) +; SIMD128-NEXT: # %bb.0: +; SIMD128-NEXT: i8x16.shl $push0=, $0, $1 +; SIMD128-NEXT: return $pop0 +; +; SIMD128-FAST-LABEL: shl_v16i8: +; SIMD128-FAST: .functype shl_v16i8 (v128, i32) -> (v128) +; SIMD128-FAST-NEXT: # %bb.0: +; SIMD128-FAST-NEXT: i8x16.shl $push0=, $0, $1 +; SIMD128-FAST-NEXT: return $pop0 +; +; NO-SIMD128-LABEL: shl_v16i8: +; NO-SIMD128: .functype shl_v16i8 (i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32) -> () +; NO-SIMD128-NEXT: # %bb.0: +; NO-SIMD128-NEXT: i32.const $push0=, 255 +; NO-SIMD128-NEXT: i32.and $push40=, $17, $pop0 +; NO-SIMD128-NEXT: local.tee $push39=, $17=, $pop40 +; NO-SIMD128-NEXT: i32.shl $push1=, $9, $pop39 +; NO-SIMD128-NEXT: i32.store8 8($0), $pop1 +; NO-SIMD128-NEXT: i32.shl $push2=, $5, $17 +; NO-SIMD128-NEXT: i32.store8 4($0), $pop2 +; NO-SIMD128-NEXT: i32.shl $push3=, $3, $17 +; NO-SIMD128-NEXT: i32.store8 2($0), $pop3 +; NO-SIMD128-NEXT: i32.shl $push4=, $2, $17 +; NO-SIMD128-NEXT: i32.store8 1($0), $pop4 +; NO-SIMD128-NEXT: i32.shl $push5=, $1, $17 +; NO-SIMD128-NEXT: i32.store8 0($0), $pop5 +; NO-SIMD128-NEXT: i32.const $push7=, 15 +; NO-SIMD128-NEXT: i32.add $push8=, $0, $pop7 +; NO-SIMD128-NEXT: i32.shl $push6=, $16, $17 +; NO-SIMD128-NEXT: i32.store8 0($pop8), $pop6 +; NO-SIMD128-NEXT: i32.const $push10=, 14 +; NO-SIMD128-NEXT: i32.add $push11=, $0, $pop10 +; NO-SIMD128-NEXT: i32.shl $push9=, $15, $17 +; NO-SIMD128-NEXT: i32.store8 0($pop11), $pop9 +; NO-SIMD128-NEXT: i32.const $push13=, 13 +; NO-SIMD128-NEXT: i32.add $push14=, $0, $pop13 +; NO-SIMD128-NEXT: i32.shl $push12=, $14, $17 +; NO-SIMD128-NEXT: i32.store8 0($pop14), $pop12 +; NO-SIMD128-NEXT: i32.const $push16=, 12 +; NO-SIMD128-NEXT: i32.add $push17=, $0, $pop16 +; NO-SIMD128-NEXT: i32.shl $push15=, $13, $17 +; NO-SIMD128-NEXT: i32.store8 0($pop17), $pop15 +; NO-SIMD128-NEXT: i32.const $push19=, 11 +; NO-SIMD128-NEXT: i32.add $push20=, $0, $pop19 +; NO-SIMD128-NEXT: i32.shl $push18=, $12, $17 +; NO-SIMD128-NEXT: i32.store8 0($pop20), $pop18 +; NO-SIMD128-NEXT: i32.const $push22=, 10 +; NO-SIMD128-NEXT: i32.add $push23=, $0, $pop22 +; NO-SIMD128-NEXT: i32.shl $push21=, $11, $17 +; NO-SIMD128-NEXT: i32.store8 0($pop23), $pop21 +; NO-SIMD128-NEXT: i32.const $push25=, 9 +; NO-SIMD128-NEXT: i32.add $push26=, $0, $pop25 +; NO-SIMD128-NEXT: i32.shl $push24=, $10, $17 +; NO-SIMD128-NEXT: i32.store8 0($pop26), $pop24 +; NO-SIMD128-NEXT: i32.const $push28=, 7 +; NO-SIMD128-NEXT: i32.add $push29=, $0, $pop28 +; NO-SIMD128-NEXT: i32.shl $push27=, $8, $17 +; NO-SIMD128-NEXT: i32.store8 0($pop29), $pop27 +; NO-SIMD128-NEXT: i32.const $push31=, 6 +; NO-SIMD128-NEXT: i32.add $push32=, $0, $pop31 +; NO-SIMD128-NEXT: i32.shl $push30=, $7, $17 +; NO-SIMD128-NEXT: i32.store8 0($pop32), $pop30 +; NO-SIMD128-NEXT: i32.const $push34=, 5 +; NO-SIMD128-NEXT: i32.add $push35=, $0, $pop34 +; NO-SIMD128-NEXT: i32.shl $push33=, $6, $17 +; NO-SIMD128-NEXT: i32.store8 0($pop35), $pop33 +; NO-SIMD128-NEXT: i32.const $push37=, 3 +; NO-SIMD128-NEXT: i32.add $push38=, $0, $pop37 +; NO-SIMD128-NEXT: i32.shl $push36=, $4, $17 +; NO-SIMD128-NEXT: i32.store8 0($pop38), $pop36 +; NO-SIMD128-NEXT: return +; +; NO-SIMD128-FAST-LABEL: shl_v16i8: +; NO-SIMD128-FAST: .functype shl_v16i8 (i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32) -> () +; NO-SIMD128-FAST-NEXT: # %bb.0: +; NO-SIMD128-FAST-NEXT: i32.const $push0=, 255 +; NO-SIMD128-FAST-NEXT: i32.and $push40=, $17, $pop0 +; NO-SIMD128-FAST-NEXT: local.tee $push39=, $17=, $pop40 +; NO-SIMD128-FAST-NEXT: i32.shl $push1=, $2, $pop39 +; NO-SIMD128-FAST-NEXT: i32.store8 1($0), $pop1 +; NO-SIMD128-FAST-NEXT: i32.shl $push2=, $1, $17 +; NO-SIMD128-FAST-NEXT: i32.store8 0($0), $pop2 +; NO-SIMD128-FAST-NEXT: i32.shl $push3=, $3, $17 +; NO-SIMD128-FAST-NEXT: i32.store8 2($0), $pop3 +; NO-SIMD128-FAST-NEXT: i32.const $push4=, 3 +; NO-SIMD128-FAST-NEXT: i32.add $push5=, $0, $pop4 +; NO-SIMD128-FAST-NEXT: i32.shl $push6=, $4, $17 +; NO-SIMD128-FAST-NEXT: i32.store8 0($pop5), $pop6 +; NO-SIMD128-FAST-NEXT: i32.shl $push7=, $5, $17 +; NO-SIMD128-FAST-NEXT: i32.store8 4($0), $pop7 +; NO-SIMD128-FAST-NEXT: i32.const $push8=, 5 +; NO-SIMD128-FAST-NEXT: i32.add $push9=, $0, $pop8 +; NO-SIMD128-FAST-NEXT: i32.shl $push10=, $6, $17 +; NO-SIMD128-FAST-NEXT: i32.store8 0($pop9), $pop10 +; NO-SIMD128-FAST-NEXT: i32.const $push11=, 6 +; NO-SIMD128-FAST-NEXT: i32.add $push12=, $0, $pop11 +; NO-SIMD128-FAST-NEXT: i32.shl $push13=, $7, $17 +; NO-SIMD128-FAST-NEXT: i32.store8 0($pop12), $pop13 +; NO-SIMD128-FAST-NEXT: i32.const $push14=, 7 +; NO-SIMD128-FAST-NEXT: i32.add $push15=, $0, $pop14 +; NO-SIMD128-FAST-NEXT: i32.shl $push16=, $8, $17 +; NO-SIMD128-FAST-NEXT: i32.store8 0($pop15), $pop16 +; NO-SIMD128-FAST-NEXT: i32.shl $push17=, $9, $17 +; NO-SIMD128-FAST-NEXT: i32.store8 8($0), $pop17 +; NO-SIMD128-FAST-NEXT: i32.const $push18=, 9 +; NO-SIMD128-FAST-NEXT: i32.add $push19=, $0, $pop18 +; NO-SIMD128-FAST-NEXT: i32.shl $push20=, $10, $17 +; NO-SIMD128-FAST-NEXT: i32.store8 0($pop19), $pop20 +; NO-SIMD128-FAST-NEXT: i32.const $push21=, 10 +; NO-SIMD128-FAST-NEXT: i32.add $push22=, $0, $pop21 +; NO-SIMD128-FAST-NEXT: i32.shl $push23=, $11, $17 +; NO-SIMD128-FAST-NEXT: i32.store8 0($pop22), $pop23 +; NO-SIMD128-FAST-NEXT: i32.const $push24=, 11 +; NO-SIMD128-FAST-NEXT: i32.add $push25=, $0, $pop24 +; NO-SIMD128-FAST-NEXT: i32.shl $push26=, $12, $17 +; NO-SIMD128-FAST-NEXT: i32.store8 0($pop25), $pop26 +; NO-SIMD128-FAST-NEXT: i32.const $push27=, 12 +; NO-SIMD128-FAST-NEXT: i32.add $push28=, $0, $pop27 +; NO-SIMD128-FAST-NEXT: i32.shl $push29=, $13, $17 +; NO-SIMD128-FAST-NEXT: i32.store8 0($pop28), $pop29 +; NO-SIMD128-FAST-NEXT: i32.const $push30=, 13 +; NO-SIMD128-FAST-NEXT: i32.add $push31=, $0, $pop30 +; NO-SIMD128-FAST-NEXT: i32.shl $push32=, $14, $17 +; NO-SIMD128-FAST-NEXT: i32.store8 0($pop31), $pop32 +; NO-SIMD128-FAST-NEXT: i32.const $push33=, 14 +; NO-SIMD128-FAST-NEXT: i32.add $push34=, $0, $pop33 +; NO-SIMD128-FAST-NEXT: i32.shl $push35=, $15, $17 +; NO-SIMD128-FAST-NEXT: i32.store8 0($pop34), $pop35 +; NO-SIMD128-FAST-NEXT: i32.const $push36=, 15 +; NO-SIMD128-FAST-NEXT: i32.add $push37=, $0, $pop36 +; NO-SIMD128-FAST-NEXT: i32.shl $push38=, $16, $17 +; NO-SIMD128-FAST-NEXT: i32.store8 0($pop37), $pop38 +; NO-SIMD128-FAST-NEXT: return %t = insertelement <16 x i8> undef, i8 %x, i32 0 %s = shufflevector <16 x i8> %t, <16 x i8> undef, <16 x i32> @shl_v16i8(<16 x i8> %v, i8 %x) { ret <16 x i8> %a } -; CHECK-LABEL: shl_const_v16i8: -; NO-SIMD128-NOT: i8x16 -; SIMD128-NEXT: .functype shl_const_v16i8 (v128) -> (v128){{$}} -; SIMD128-NEXT: i32.const $push[[L0:[0-9]+]]=, 5 -; SIMD128-NEXT: i8x16.shl $push[[R:[0-9]+]]=, $0, $pop[[L0]]{{$}} -; SIMD128-NEXT: return $pop[[R]]{{$}} define <16 x i8> @shl_const_v16i8(<16 x i8> %v) { +; SIMD128-LABEL: shl_const_v16i8: +; SIMD128: .functype shl_const_v16i8 (v128) -> (v128) +; SIMD128-NEXT: # %bb.0: +; SIMD128-NEXT: i32.const $push0=, 5 +; SIMD128-NEXT: i8x16.shl $push1=, $0, $pop0 +; SIMD128-NEXT: return $pop1 +; +; SIMD128-FAST-LABEL: shl_const_v16i8: +; SIMD128-FAST: .functype shl_const_v16i8 (v128) -> (v128) +; SIMD128-FAST-NEXT: # %bb.0: +; SIMD128-FAST-NEXT: i32.const $push1=, 5 +; SIMD128-FAST-NEXT: i8x16.shl $push0=, $0, $pop1 +; SIMD128-FAST-NEXT: return $pop0 +; +; NO-SIMD128-LABEL: shl_const_v16i8: +; NO-SIMD128: .functype shl_const_v16i8 (i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32) -> () +; NO-SIMD128-NEXT: # %bb.0: +; NO-SIMD128-NEXT: i32.const $push0=, 5 +; NO-SIMD128-NEXT: i32.shl $push1=, $9, $pop0 +; NO-SIMD128-NEXT: i32.store8 8($0), $pop1 +; NO-SIMD128-NEXT: i32.const $push53=, 5 +; NO-SIMD128-NEXT: i32.shl $push2=, $5, $pop53 +; NO-SIMD128-NEXT: i32.store8 4($0), $pop2 +; NO-SIMD128-NEXT: i32.const $push52=, 5 +; NO-SIMD128-NEXT: i32.shl $push3=, $3, $pop52 +; NO-SIMD128-NEXT: i32.store8 2($0), $pop3 +; NO-SIMD128-NEXT: i32.const $push51=, 5 +; NO-SIMD128-NEXT: i32.shl $push4=, $2, $pop51 +; NO-SIMD128-NEXT: i32.store8 1($0), $pop4 +; NO-SIMD128-NEXT: i32.const $push50=, 5 +; NO-SIMD128-NEXT: i32.shl $push5=, $1, $pop50 +; NO-SIMD128-NEXT: i32.store8 0($0), $pop5 +; NO-SIMD128-NEXT: i32.const $push7=, 15 +; NO-SIMD128-NEXT: i32.add $push8=, $0, $pop7 +; NO-SIMD128-NEXT: i32.const $push49=, 5 +; NO-SIMD128-NEXT: i32.shl $push6=, $16, $pop49 +; NO-SIMD128-NEXT: i32.store8 0($pop8), $pop6 +; NO-SIMD128-NEXT: i32.const $push10=, 14 +; NO-SIMD128-NEXT: i32.add $push11=, $0, $pop10 +; NO-SIMD128-NEXT: i32.const $push48=, 5 +; NO-SIMD128-NEXT: i32.shl $push9=, $15, $pop48 +; NO-SIMD128-NEXT: i32.store8 0($pop11), $pop9 +; NO-SIMD128-NEXT: i32.const $push13=, 13 +; NO-SIMD128-NEXT: i32.add $push14=, $0, $pop13 +; NO-SIMD128-NEXT: i32.const $push47=, 5 +; NO-SIMD128-NEXT: i32.shl $push12=, $14, $pop47 +; NO-SIMD128-NEXT: i32.store8 0($pop14), $pop12 +; NO-SIMD128-NEXT: i32.const $push16=, 12 +; NO-SIMD128-NEXT: i32.add $push17=, $0, $pop16 +; NO-SIMD128-NEXT: i32.const $push46=, 5 +; NO-SIMD128-NEXT: i32.shl $push15=, $13, $pop46 +; NO-SIMD128-NEXT: i32.store8 0($pop17), $pop15 +; NO-SIMD128-NEXT: i32.const $push19=, 11 +; NO-SIMD128-NEXT: i32.add $push20=, $0, $pop19 +; NO-SIMD128-NEXT: i32.const $push45=, 5 +; NO-SIMD128-NEXT: i32.shl $push18=, $12, $pop45 +; NO-SIMD128-NEXT: i32.store8 0($pop20), $pop18 +; NO-SIMD128-NEXT: i32.const $push22=, 10 +; NO-SIMD128-NEXT: i32.add $push23=, $0, $pop22 +; NO-SIMD128-NEXT: i32.const $push44=, 5 +; NO-SIMD128-NEXT: i32.shl $push21=, $11, $pop44 +; NO-SIMD128-NEXT: i32.store8 0($pop23), $pop21 +; NO-SIMD128-NEXT: i32.const $push25=, 9 +; NO-SIMD128-NEXT: i32.add $push26=, $0, $pop25 +; NO-SIMD128-NEXT: i32.const $push43=, 5 +; NO-SIMD128-NEXT: i32.shl $push24=, $10, $pop43 +; NO-SIMD128-NEXT: i32.store8 0($pop26), $pop24 +; NO-SIMD128-NEXT: i32.const $push28=, 7 +; NO-SIMD128-NEXT: i32.add $push29=, $0, $pop28 +; NO-SIMD128-NEXT: i32.const $push42=, 5 +; NO-SIMD128-NEXT: i32.shl $push27=, $8, $pop42 +; NO-SIMD128-NEXT: i32.store8 0($pop29), $pop27 +; NO-SIMD128-NEXT: i32.const $push31=, 6 +; NO-SIMD128-NEXT: i32.add $push32=, $0, $pop31 +; NO-SIMD128-NEXT: i32.const $push41=, 5 +; NO-SIMD128-NEXT: i32.shl $push30=, $7, $pop41 +; NO-SIMD128-NEXT: i32.store8 0($pop32), $pop30 +; NO-SIMD128-NEXT: i32.const $push40=, 5 +; NO-SIMD128-NEXT: i32.add $push34=, $0, $pop40 +; NO-SIMD128-NEXT: i32.const $push39=, 5 +; NO-SIMD128-NEXT: i32.shl $push33=, $6, $pop39 +; NO-SIMD128-NEXT: i32.store8 0($pop34), $pop33 +; NO-SIMD128-NEXT: i32.const $push36=, 3 +; NO-SIMD128-NEXT: i32.add $push37=, $0, $pop36 +; NO-SIMD128-NEXT: i32.const $push38=, 5 +; NO-SIMD128-NEXT: i32.shl $push35=, $4, $pop38 +; NO-SIMD128-NEXT: i32.store8 0($pop37), $pop35 +; NO-SIMD128-NEXT: return +; +; NO-SIMD128-FAST-LABEL: shl_const_v16i8: +; NO-SIMD128-FAST: .functype shl_const_v16i8 (i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32) -> () +; NO-SIMD128-FAST-NEXT: # %bb.0: +; NO-SIMD128-FAST-NEXT: i32.const $push0=, 5 +; NO-SIMD128-FAST-NEXT: i32.shl $push1=, $1, $pop0 +; NO-SIMD128-FAST-NEXT: i32.store8 0($0), $pop1 +; NO-SIMD128-FAST-NEXT: i32.const $push53=, 5 +; NO-SIMD128-FAST-NEXT: i32.shl $push2=, $2, $pop53 +; NO-SIMD128-FAST-NEXT: i32.store8 1($0), $pop2 +; NO-SIMD128-FAST-NEXT: i32.const $push52=, 5 +; NO-SIMD128-FAST-NEXT: i32.shl $push3=, $3, $pop52 +; NO-SIMD128-FAST-NEXT: i32.store8 2($0), $pop3 +; NO-SIMD128-FAST-NEXT: i32.const $push4=, 3 +; NO-SIMD128-FAST-NEXT: i32.add $push5=, $0, $pop4 +; NO-SIMD128-FAST-NEXT: i32.const $push51=, 5 +; NO-SIMD128-FAST-NEXT: i32.shl $push6=, $4, $pop51 +; NO-SIMD128-FAST-NEXT: i32.store8 0($pop5), $pop6 +; NO-SIMD128-FAST-NEXT: i32.const $push50=, 5 +; NO-SIMD128-FAST-NEXT: i32.shl $push7=, $5, $pop50 +; NO-SIMD128-FAST-NEXT: i32.store8 4($0), $pop7 +; NO-SIMD128-FAST-NEXT: i32.const $push49=, 5 +; NO-SIMD128-FAST-NEXT: i32.add $push8=, $0, $pop49 +; NO-SIMD128-FAST-NEXT: i32.const $push48=, 5 +; NO-SIMD128-FAST-NEXT: i32.shl $push9=, $6, $pop48 +; NO-SIMD128-FAST-NEXT: i32.store8 0($pop8), $pop9 +; NO-SIMD128-FAST-NEXT: i32.const $push10=, 6 +; NO-SIMD128-FAST-NEXT: i32.add $push11=, $0, $pop10 +; NO-SIMD128-FAST-NEXT: i32.const $push47=, 5 +; NO-SIMD128-FAST-NEXT: i32.shl $push12=, $7, $pop47 +; NO-SIMD128-FAST-NEXT: i32.store8 0($pop11), $pop12 +; NO-SIMD128-FAST-NEXT: i32.const $push13=, 7 +; NO-SIMD128-FAST-NEXT: i32.add $push14=, $0, $pop13 +; NO-SIMD128-FAST-NEXT: i32.const $push46=, 5 +; NO-SIMD128-FAST-NEXT: i32.shl $push15=, $8, $pop46 +; NO-SIMD128-FAST-NEXT: i32.store8 0($pop14), $pop15 +; NO-SIMD128-FAST-NEXT: i32.const $push45=, 5 +; NO-SIMD128-FAST-NEXT: i32.shl $push16=, $9, $pop45 +; NO-SIMD128-FAST-NEXT: i32.store8 8($0), $pop16 +; NO-SIMD128-FAST-NEXT: i32.const $push17=, 9 +; NO-SIMD128-FAST-NEXT: i32.add $push18=, $0, $pop17 +; NO-SIMD128-FAST-NEXT: i32.const $push44=, 5 +; NO-SIMD128-FAST-NEXT: i32.shl $push19=, $10, $pop44 +; NO-SIMD128-FAST-NEXT: i32.store8 0($pop18), $pop19 +; NO-SIMD128-FAST-NEXT: i32.const $push20=, 10 +; NO-SIMD128-FAST-NEXT: i32.add $push21=, $0, $pop20 +; NO-SIMD128-FAST-NEXT: i32.const $push43=, 5 +; NO-SIMD128-FAST-NEXT: i32.shl $push22=, $11, $pop43 +; NO-SIMD128-FAST-NEXT: i32.store8 0($pop21), $pop22 +; NO-SIMD128-FAST-NEXT: i32.const $push23=, 11 +; NO-SIMD128-FAST-NEXT: i32.add $push24=, $0, $pop23 +; NO-SIMD128-FAST-NEXT: i32.const $push42=, 5 +; NO-SIMD128-FAST-NEXT: i32.shl $push25=, $12, $pop42 +; NO-SIMD128-FAST-NEXT: i32.store8 0($pop24), $pop25 +; NO-SIMD128-FAST-NEXT: i32.const $push26=, 12 +; NO-SIMD128-FAST-NEXT: i32.add $push27=, $0, $pop26 +; NO-SIMD128-FAST-NEXT: i32.const $push41=, 5 +; NO-SIMD128-FAST-NEXT: i32.shl $push28=, $13, $pop41 +; NO-SIMD128-FAST-NEXT: i32.store8 0($pop27), $pop28 +; NO-SIMD128-FAST-NEXT: i32.const $push29=, 13 +; NO-SIMD128-FAST-NEXT: i32.add $push30=, $0, $pop29 +; NO-SIMD128-FAST-NEXT: i32.const $push40=, 5 +; NO-SIMD128-FAST-NEXT: i32.shl $push31=, $14, $pop40 +; NO-SIMD128-FAST-NEXT: i32.store8 0($pop30), $pop31 +; NO-SIMD128-FAST-NEXT: i32.const $push32=, 14 +; NO-SIMD128-FAST-NEXT: i32.add $push33=, $0, $pop32 +; NO-SIMD128-FAST-NEXT: i32.const $push39=, 5 +; NO-SIMD128-FAST-NEXT: i32.shl $push34=, $15, $pop39 +; NO-SIMD128-FAST-NEXT: i32.store8 0($pop33), $pop34 +; NO-SIMD128-FAST-NEXT: i32.const $push35=, 15 +; NO-SIMD128-FAST-NEXT: i32.add $push36=, $0, $pop35 +; NO-SIMD128-FAST-NEXT: i32.const $push38=, 5 +; NO-SIMD128-FAST-NEXT: i32.shl $push37=, $16, $pop38 +; NO-SIMD128-FAST-NEXT: i32.store8 0($pop36), $pop37 +; NO-SIMD128-FAST-NEXT: return %a = shl <16 x i8> %v, ret <16 x i8> %a } -; CHECK-LABEL: shl_vec_v16i8: -; NO-SIMD128-NOT: i8x16 -; SIMD128-NEXT: .functype shl_vec_v16i8 (v128, v128) -> (v128){{$}} -; SIMD128-NEXT: i8x16.extract_lane_u $push[[L0:[0-9]+]]=, $0, 0{{$}} -; SIMD128-NEXT: i8x16.extract_lane_u $push[[L1:[0-9]+]]=, $1, 0{{$}} -; SIMD128-NEXT: i32.const $push[[M0:[0-9]+]]=, 7{{$}} -; SIMD128-NEXT: i32.and $push[[M1:[0-9]+]]=, $pop[[L1]], $pop[[M0]]{{$}} -; SIMD128-NEXT: i32.shl $push[[M2:[0-9]+]]=, $pop[[L0]], $pop[[M1]] -; SIMD128-NEXT: i8x16.splat $push[[M3:[0-9]+]]=, $pop[[M2]] -; Skip 14 lanes -; SIMD128: i8x16.extract_lane_u $push[[L4:[0-9]+]]=, $0, 15{{$}} -; SIMD128-NEXT: i8x16.extract_lane_u $push[[L5:[0-9]+]]=, $1, 15{{$}} -; SIMD128-NEXT: i32.const $push[[M4:[0-9]+]]=, 7{{$}} -; SIMD128-NEXT: i32.and $push[[M5:[0-9]+]]=, $pop[[L5]], $pop[[M4]]{{$}} -; SIMD128-NEXT: i32.shl $push[[M6:[0-9]+]]=, $pop[[L4]], $pop[[M5]]{{$}} -; SIMD128-NEXT: i8x16.replace_lane $push[[R:[0-9]+]]=, $pop[[M7:[0-9]+]], 15, $pop[[M6]]{{$}} -; SIMD128-NEXT: return $pop[[R]]{{$}} define <16 x i8> @shl_vec_v16i8(<16 x i8> %v, <16 x i8> %x) { +; SIMD128-LABEL: shl_vec_v16i8: +; SIMD128: .functype shl_vec_v16i8 (v128, v128) -> (v128) +; SIMD128-NEXT: # %bb.0: +; SIMD128-NEXT: i8x16.extract_lane_u $push7=, $0, 0 +; SIMD128-NEXT: i8x16.extract_lane_u $push5=, $1, 0 +; SIMD128-NEXT: i32.const $push1=, 7 +; SIMD128-NEXT: i32.and $push6=, $pop5, $pop1 +; SIMD128-NEXT: i32.shl $push8=, $pop7, $pop6 +; SIMD128-NEXT: i8x16.splat $push9=, $pop8 +; SIMD128-NEXT: i8x16.extract_lane_u $push3=, $0, 1 +; SIMD128-NEXT: i8x16.extract_lane_u $push0=, $1, 1 +; SIMD128-NEXT: i32.const $push95=, 7 +; SIMD128-NEXT: i32.and $push2=, $pop0, $pop95 +; SIMD128-NEXT: i32.shl $push4=, $pop3, $pop2 +; SIMD128-NEXT: i8x16.replace_lane $push10=, $pop9, 1, $pop4 +; SIMD128-NEXT: i8x16.extract_lane_u $push13=, $0, 2 +; SIMD128-NEXT: i8x16.extract_lane_u $push11=, $1, 2 +; SIMD128-NEXT: i32.const $push94=, 7 +; SIMD128-NEXT: i32.and $push12=, $pop11, $pop94 +; SIMD128-NEXT: i32.shl $push14=, $pop13, $pop12 +; SIMD128-NEXT: i8x16.replace_lane $push15=, $pop10, 2, $pop14 +; SIMD128-NEXT: i8x16.extract_lane_u $push18=, $0, 3 +; SIMD128-NEXT: i8x16.extract_lane_u $push16=, $1, 3 +; SIMD128-NEXT: i32.const $push93=, 7 +; SIMD128-NEXT: i32.and $push17=, $pop16, $pop93 +; SIMD128-NEXT: i32.shl $push19=, $pop18, $pop17 +; SIMD128-NEXT: i8x16.replace_lane $push20=, $pop15, 3, $pop19 +; SIMD128-NEXT: i8x16.extract_lane_u $push23=, $0, 4 +; SIMD128-NEXT: i8x16.extract_lane_u $push21=, $1, 4 +; SIMD128-NEXT: i32.const $push92=, 7 +; SIMD128-NEXT: i32.and $push22=, $pop21, $pop92 +; SIMD128-NEXT: i32.shl $push24=, $pop23, $pop22 +; SIMD128-NEXT: i8x16.replace_lane $push25=, $pop20, 4, $pop24 +; SIMD128-NEXT: i8x16.extract_lane_u $push28=, $0, 5 +; SIMD128-NEXT: i8x16.extract_lane_u $push26=, $1, 5 +; SIMD128-NEXT: i32.const $push91=, 7 +; SIMD128-NEXT: i32.and $push27=, $pop26, $pop91 +; SIMD128-NEXT: i32.shl $push29=, $pop28, $pop27 +; SIMD128-NEXT: i8x16.replace_lane $push30=, $pop25, 5, $pop29 +; SIMD128-NEXT: i8x16.extract_lane_u $push33=, $0, 6 +; SIMD128-NEXT: i8x16.extract_lane_u $push31=, $1, 6 +; SIMD128-NEXT: i32.const $push90=, 7 +; SIMD128-NEXT: i32.and $push32=, $pop31, $pop90 +; SIMD128-NEXT: i32.shl $push34=, $pop33, $pop32 +; SIMD128-NEXT: i8x16.replace_lane $push35=, $pop30, 6, $pop34 +; SIMD128-NEXT: i8x16.extract_lane_u $push38=, $0, 7 +; SIMD128-NEXT: i8x16.extract_lane_u $push36=, $1, 7 +; SIMD128-NEXT: i32.const $push89=, 7 +; SIMD128-NEXT: i32.and $push37=, $pop36, $pop89 +; SIMD128-NEXT: i32.shl $push39=, $pop38, $pop37 +; SIMD128-NEXT: i8x16.replace_lane $push40=, $pop35, 7, $pop39 +; SIMD128-NEXT: i8x16.extract_lane_u $push43=, $0, 8 +; SIMD128-NEXT: i8x16.extract_lane_u $push41=, $1, 8 +; SIMD128-NEXT: i32.const $push88=, 7 +; SIMD128-NEXT: i32.and $push42=, $pop41, $pop88 +; SIMD128-NEXT: i32.shl $push44=, $pop43, $pop42 +; SIMD128-NEXT: i8x16.replace_lane $push45=, $pop40, 8, $pop44 +; SIMD128-NEXT: i8x16.extract_lane_u $push48=, $0, 9 +; SIMD128-NEXT: i8x16.extract_lane_u $push46=, $1, 9 +; SIMD128-NEXT: i32.const $push87=, 7 +; SIMD128-NEXT: i32.and $push47=, $pop46, $pop87 +; SIMD128-NEXT: i32.shl $push49=, $pop48, $pop47 +; SIMD128-NEXT: i8x16.replace_lane $push50=, $pop45, 9, $pop49 +; SIMD128-NEXT: i8x16.extract_lane_u $push53=, $0, 10 +; SIMD128-NEXT: i8x16.extract_lane_u $push51=, $1, 10 +; SIMD128-NEXT: i32.const $push86=, 7 +; SIMD128-NEXT: i32.and $push52=, $pop51, $pop86 +; SIMD128-NEXT: i32.shl $push54=, $pop53, $pop52 +; SIMD128-NEXT: i8x16.replace_lane $push55=, $pop50, 10, $pop54 +; SIMD128-NEXT: i8x16.extract_lane_u $push58=, $0, 11 +; SIMD128-NEXT: i8x16.extract_lane_u $push56=, $1, 11 +; SIMD128-NEXT: i32.const $push85=, 7 +; SIMD128-NEXT: i32.and $push57=, $pop56, $pop85 +; SIMD128-NEXT: i32.shl $push59=, $pop58, $pop57 +; SIMD128-NEXT: i8x16.replace_lane $push60=, $pop55, 11, $pop59 +; SIMD128-NEXT: i8x16.extract_lane_u $push63=, $0, 12 +; SIMD128-NEXT: i8x16.extract_lane_u $push61=, $1, 12 +; SIMD128-NEXT: i32.const $push84=, 7 +; SIMD128-NEXT: i32.and $push62=, $pop61, $pop84 +; SIMD128-NEXT: i32.shl $push64=, $pop63, $pop62 +; SIMD128-NEXT: i8x16.replace_lane $push65=, $pop60, 12, $pop64 +; SIMD128-NEXT: i8x16.extract_lane_u $push68=, $0, 13 +; SIMD128-NEXT: i8x16.extract_lane_u $push66=, $1, 13 +; SIMD128-NEXT: i32.const $push83=, 7 +; SIMD128-NEXT: i32.and $push67=, $pop66, $pop83 +; SIMD128-NEXT: i32.shl $push69=, $pop68, $pop67 +; SIMD128-NEXT: i8x16.replace_lane $push70=, $pop65, 13, $pop69 +; SIMD128-NEXT: i8x16.extract_lane_u $push73=, $0, 14 +; SIMD128-NEXT: i8x16.extract_lane_u $push71=, $1, 14 +; SIMD128-NEXT: i32.const $push82=, 7 +; SIMD128-NEXT: i32.and $push72=, $pop71, $pop82 +; SIMD128-NEXT: i32.shl $push74=, $pop73, $pop72 +; SIMD128-NEXT: i8x16.replace_lane $push75=, $pop70, 14, $pop74 +; SIMD128-NEXT: i8x16.extract_lane_u $push78=, $0, 15 +; SIMD128-NEXT: i8x16.extract_lane_u $push76=, $1, 15 +; SIMD128-NEXT: i32.const $push81=, 7 +; SIMD128-NEXT: i32.and $push77=, $pop76, $pop81 +; SIMD128-NEXT: i32.shl $push79=, $pop78, $pop77 +; SIMD128-NEXT: i8x16.replace_lane $push80=, $pop75, 15, $pop79 +; SIMD128-NEXT: return $pop80 +; +; SIMD128-FAST-LABEL: shl_vec_v16i8: +; SIMD128-FAST: .functype shl_vec_v16i8 (v128, v128) -> (v128) +; SIMD128-FAST-NEXT: # %bb.0: +; SIMD128-FAST-NEXT: i8x16.extract_lane_u $push8=, $0, 0 +; SIMD128-FAST-NEXT: i8x16.extract_lane_u $push6=, $1, 0 +; SIMD128-FAST-NEXT: i32.const $push2=, 7 +; SIMD128-FAST-NEXT: i32.and $push7=, $pop6, $pop2 +; SIMD128-FAST-NEXT: i32.shl $push9=, $pop8, $pop7 +; SIMD128-FAST-NEXT: i8x16.splat $push10=, $pop9 +; SIMD128-FAST-NEXT: i8x16.extract_lane_u $push4=, $0, 1 +; SIMD128-FAST-NEXT: i8x16.extract_lane_u $push1=, $1, 1 +; SIMD128-FAST-NEXT: i32.const $push95=, 7 +; SIMD128-FAST-NEXT: i32.and $push3=, $pop1, $pop95 +; SIMD128-FAST-NEXT: i32.shl $push5=, $pop4, $pop3 +; SIMD128-FAST-NEXT: i8x16.replace_lane $push11=, $pop10, 1, $pop5 +; SIMD128-FAST-NEXT: i8x16.extract_lane_u $push14=, $0, 2 +; SIMD128-FAST-NEXT: i8x16.extract_lane_u $push12=, $1, 2 +; SIMD128-FAST-NEXT: i32.const $push94=, 7 +; SIMD128-FAST-NEXT: i32.and $push13=, $pop12, $pop94 +; SIMD128-FAST-NEXT: i32.shl $push15=, $pop14, $pop13 +; SIMD128-FAST-NEXT: i8x16.replace_lane $push16=, $pop11, 2, $pop15 +; SIMD128-FAST-NEXT: i8x16.extract_lane_u $push19=, $0, 3 +; SIMD128-FAST-NEXT: i8x16.extract_lane_u $push17=, $1, 3 +; SIMD128-FAST-NEXT: i32.const $push93=, 7 +; SIMD128-FAST-NEXT: i32.and $push18=, $pop17, $pop93 +; SIMD128-FAST-NEXT: i32.shl $push20=, $pop19, $pop18 +; SIMD128-FAST-NEXT: i8x16.replace_lane $push21=, $pop16, 3, $pop20 +; SIMD128-FAST-NEXT: i8x16.extract_lane_u $push24=, $0, 4 +; SIMD128-FAST-NEXT: i8x16.extract_lane_u $push22=, $1, 4 +; SIMD128-FAST-NEXT: i32.const $push92=, 7 +; SIMD128-FAST-NEXT: i32.and $push23=, $pop22, $pop92 +; SIMD128-FAST-NEXT: i32.shl $push25=, $pop24, $pop23 +; SIMD128-FAST-NEXT: i8x16.replace_lane $push26=, $pop21, 4, $pop25 +; SIMD128-FAST-NEXT: i8x16.extract_lane_u $push29=, $0, 5 +; SIMD128-FAST-NEXT: i8x16.extract_lane_u $push27=, $1, 5 +; SIMD128-FAST-NEXT: i32.const $push91=, 7 +; SIMD128-FAST-NEXT: i32.and $push28=, $pop27, $pop91 +; SIMD128-FAST-NEXT: i32.shl $push30=, $pop29, $pop28 +; SIMD128-FAST-NEXT: i8x16.replace_lane $push31=, $pop26, 5, $pop30 +; SIMD128-FAST-NEXT: i8x16.extract_lane_u $push34=, $0, 6 +; SIMD128-FAST-NEXT: i8x16.extract_lane_u $push32=, $1, 6 +; SIMD128-FAST-NEXT: i32.const $push90=, 7 +; SIMD128-FAST-NEXT: i32.and $push33=, $pop32, $pop90 +; SIMD128-FAST-NEXT: i32.shl $push35=, $pop34, $pop33 +; SIMD128-FAST-NEXT: i8x16.replace_lane $push36=, $pop31, 6, $pop35 +; SIMD128-FAST-NEXT: i8x16.extract_lane_u $push39=, $0, 7 +; SIMD128-FAST-NEXT: i8x16.extract_lane_u $push37=, $1, 7 +; SIMD128-FAST-NEXT: i32.const $push89=, 7 +; SIMD128-FAST-NEXT: i32.and $push38=, $pop37, $pop89 +; SIMD128-FAST-NEXT: i32.shl $push40=, $pop39, $pop38 +; SIMD128-FAST-NEXT: i8x16.replace_lane $push41=, $pop36, 7, $pop40 +; SIMD128-FAST-NEXT: i8x16.extract_lane_u $push44=, $0, 8 +; SIMD128-FAST-NEXT: i8x16.extract_lane_u $push42=, $1, 8 +; SIMD128-FAST-NEXT: i32.const $push88=, 7 +; SIMD128-FAST-NEXT: i32.and $push43=, $pop42, $pop88 +; SIMD128-FAST-NEXT: i32.shl $push45=, $pop44, $pop43 +; SIMD128-FAST-NEXT: i8x16.replace_lane $push46=, $pop41, 8, $pop45 +; SIMD128-FAST-NEXT: i8x16.extract_lane_u $push49=, $0, 9 +; SIMD128-FAST-NEXT: i8x16.extract_lane_u $push47=, $1, 9 +; SIMD128-FAST-NEXT: i32.const $push87=, 7 +; SIMD128-FAST-NEXT: i32.and $push48=, $pop47, $pop87 +; SIMD128-FAST-NEXT: i32.shl $push50=, $pop49, $pop48 +; SIMD128-FAST-NEXT: i8x16.replace_lane $push51=, $pop46, 9, $pop50 +; SIMD128-FAST-NEXT: i8x16.extract_lane_u $push54=, $0, 10 +; SIMD128-FAST-NEXT: i8x16.extract_lane_u $push52=, $1, 10 +; SIMD128-FAST-NEXT: i32.const $push86=, 7 +; SIMD128-FAST-NEXT: i32.and $push53=, $pop52, $pop86 +; SIMD128-FAST-NEXT: i32.shl $push55=, $pop54, $pop53 +; SIMD128-FAST-NEXT: i8x16.replace_lane $push56=, $pop51, 10, $pop55 +; SIMD128-FAST-NEXT: i8x16.extract_lane_u $push59=, $0, 11 +; SIMD128-FAST-NEXT: i8x16.extract_lane_u $push57=, $1, 11 +; SIMD128-FAST-NEXT: i32.const $push85=, 7 +; SIMD128-FAST-NEXT: i32.and $push58=, $pop57, $pop85 +; SIMD128-FAST-NEXT: i32.shl $push60=, $pop59, $pop58 +; SIMD128-FAST-NEXT: i8x16.replace_lane $push61=, $pop56, 11, $pop60 +; SIMD128-FAST-NEXT: i8x16.extract_lane_u $push64=, $0, 12 +; SIMD128-FAST-NEXT: i8x16.extract_lane_u $push62=, $1, 12 +; SIMD128-FAST-NEXT: i32.const $push84=, 7 +; SIMD128-FAST-NEXT: i32.and $push63=, $pop62, $pop84 +; SIMD128-FAST-NEXT: i32.shl $push65=, $pop64, $pop63 +; SIMD128-FAST-NEXT: i8x16.replace_lane $push66=, $pop61, 12, $pop65 +; SIMD128-FAST-NEXT: i8x16.extract_lane_u $push69=, $0, 13 +; SIMD128-FAST-NEXT: i8x16.extract_lane_u $push67=, $1, 13 +; SIMD128-FAST-NEXT: i32.const $push83=, 7 +; SIMD128-FAST-NEXT: i32.and $push68=, $pop67, $pop83 +; SIMD128-FAST-NEXT: i32.shl $push70=, $pop69, $pop68 +; SIMD128-FAST-NEXT: i8x16.replace_lane $push71=, $pop66, 13, $pop70 +; SIMD128-FAST-NEXT: i8x16.extract_lane_u $push74=, $0, 14 +; SIMD128-FAST-NEXT: i8x16.extract_lane_u $push72=, $1, 14 +; SIMD128-FAST-NEXT: i32.const $push82=, 7 +; SIMD128-FAST-NEXT: i32.and $push73=, $pop72, $pop82 +; SIMD128-FAST-NEXT: i32.shl $push75=, $pop74, $pop73 +; SIMD128-FAST-NEXT: i8x16.replace_lane $push76=, $pop71, 14, $pop75 +; SIMD128-FAST-NEXT: i8x16.extract_lane_u $push79=, $0, 15 +; SIMD128-FAST-NEXT: i8x16.extract_lane_u $push77=, $1, 15 +; SIMD128-FAST-NEXT: i32.const $push81=, 7 +; SIMD128-FAST-NEXT: i32.and $push78=, $pop77, $pop81 +; SIMD128-FAST-NEXT: i32.shl $push80=, $pop79, $pop78 +; SIMD128-FAST-NEXT: i8x16.replace_lane $push0=, $pop76, 15, $pop80 +; SIMD128-FAST-NEXT: return $pop0 +; +; NO-SIMD128-LABEL: shl_vec_v16i8: +; NO-SIMD128: .functype shl_vec_v16i8 (i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32) -> () +; NO-SIMD128-NEXT: # %bb.0: +; NO-SIMD128-NEXT: i32.const $push0=, 255 +; NO-SIMD128-NEXT: i32.and $push1=, $25, $pop0 +; NO-SIMD128-NEXT: i32.shl $push2=, $9, $pop1 +; NO-SIMD128-NEXT: i32.store8 8($0), $pop2 +; NO-SIMD128-NEXT: i32.const $push69=, 255 +; NO-SIMD128-NEXT: i32.and $push3=, $21, $pop69 +; NO-SIMD128-NEXT: i32.shl $push4=, $5, $pop3 +; NO-SIMD128-NEXT: i32.store8 4($0), $pop4 +; NO-SIMD128-NEXT: i32.const $push68=, 255 +; NO-SIMD128-NEXT: i32.and $push5=, $19, $pop68 +; NO-SIMD128-NEXT: i32.shl $push6=, $3, $pop5 +; NO-SIMD128-NEXT: i32.store8 2($0), $pop6 +; NO-SIMD128-NEXT: i32.const $push67=, 255 +; NO-SIMD128-NEXT: i32.and $push7=, $18, $pop67 +; NO-SIMD128-NEXT: i32.shl $push8=, $2, $pop7 +; NO-SIMD128-NEXT: i32.store8 1($0), $pop8 +; NO-SIMD128-NEXT: i32.const $push66=, 255 +; NO-SIMD128-NEXT: i32.and $push9=, $17, $pop66 +; NO-SIMD128-NEXT: i32.shl $push10=, $1, $pop9 +; NO-SIMD128-NEXT: i32.store8 0($0), $pop10 +; NO-SIMD128-NEXT: i32.const $push13=, 15 +; NO-SIMD128-NEXT: i32.add $push14=, $0, $pop13 +; NO-SIMD128-NEXT: i32.const $push65=, 255 +; NO-SIMD128-NEXT: i32.and $push11=, $32, $pop65 +; NO-SIMD128-NEXT: i32.shl $push12=, $16, $pop11 +; NO-SIMD128-NEXT: i32.store8 0($pop14), $pop12 +; NO-SIMD128-NEXT: i32.const $push17=, 14 +; NO-SIMD128-NEXT: i32.add $push18=, $0, $pop17 +; NO-SIMD128-NEXT: i32.const $push64=, 255 +; NO-SIMD128-NEXT: i32.and $push15=, $31, $pop64 +; NO-SIMD128-NEXT: i32.shl $push16=, $15, $pop15 +; NO-SIMD128-NEXT: i32.store8 0($pop18), $pop16 +; NO-SIMD128-NEXT: i32.const $push21=, 13 +; NO-SIMD128-NEXT: i32.add $push22=, $0, $pop21 +; NO-SIMD128-NEXT: i32.const $push63=, 255 +; NO-SIMD128-NEXT: i32.and $push19=, $30, $pop63 +; NO-SIMD128-NEXT: i32.shl $push20=, $14, $pop19 +; NO-SIMD128-NEXT: i32.store8 0($pop22), $pop20 +; NO-SIMD128-NEXT: i32.const $push25=, 12 +; NO-SIMD128-NEXT: i32.add $push26=, $0, $pop25 +; NO-SIMD128-NEXT: i32.const $push62=, 255 +; NO-SIMD128-NEXT: i32.and $push23=, $29, $pop62 +; NO-SIMD128-NEXT: i32.shl $push24=, $13, $pop23 +; NO-SIMD128-NEXT: i32.store8 0($pop26), $pop24 +; NO-SIMD128-NEXT: i32.const $push29=, 11 +; NO-SIMD128-NEXT: i32.add $push30=, $0, $pop29 +; NO-SIMD128-NEXT: i32.const $push61=, 255 +; NO-SIMD128-NEXT: i32.and $push27=, $28, $pop61 +; NO-SIMD128-NEXT: i32.shl $push28=, $12, $pop27 +; NO-SIMD128-NEXT: i32.store8 0($pop30), $pop28 +; NO-SIMD128-NEXT: i32.const $push33=, 10 +; NO-SIMD128-NEXT: i32.add $push34=, $0, $pop33 +; NO-SIMD128-NEXT: i32.const $push60=, 255 +; NO-SIMD128-NEXT: i32.and $push31=, $27, $pop60 +; NO-SIMD128-NEXT: i32.shl $push32=, $11, $pop31 +; NO-SIMD128-NEXT: i32.store8 0($pop34), $pop32 +; NO-SIMD128-NEXT: i32.const $push37=, 9 +; NO-SIMD128-NEXT: i32.add $push38=, $0, $pop37 +; NO-SIMD128-NEXT: i32.const $push59=, 255 +; NO-SIMD128-NEXT: i32.and $push35=, $26, $pop59 +; NO-SIMD128-NEXT: i32.shl $push36=, $10, $pop35 +; NO-SIMD128-NEXT: i32.store8 0($pop38), $pop36 +; NO-SIMD128-NEXT: i32.const $push41=, 7 +; NO-SIMD128-NEXT: i32.add $push42=, $0, $pop41 +; NO-SIMD128-NEXT: i32.const $push58=, 255 +; NO-SIMD128-NEXT: i32.and $push39=, $24, $pop58 +; NO-SIMD128-NEXT: i32.shl $push40=, $8, $pop39 +; NO-SIMD128-NEXT: i32.store8 0($pop42), $pop40 +; NO-SIMD128-NEXT: i32.const $push45=, 6 +; NO-SIMD128-NEXT: i32.add $push46=, $0, $pop45 +; NO-SIMD128-NEXT: i32.const $push57=, 255 +; NO-SIMD128-NEXT: i32.and $push43=, $23, $pop57 +; NO-SIMD128-NEXT: i32.shl $push44=, $7, $pop43 +; NO-SIMD128-NEXT: i32.store8 0($pop46), $pop44 +; NO-SIMD128-NEXT: i32.const $push49=, 5 +; NO-SIMD128-NEXT: i32.add $push50=, $0, $pop49 +; NO-SIMD128-NEXT: i32.const $push56=, 255 +; NO-SIMD128-NEXT: i32.and $push47=, $22, $pop56 +; NO-SIMD128-NEXT: i32.shl $push48=, $6, $pop47 +; NO-SIMD128-NEXT: i32.store8 0($pop50), $pop48 +; NO-SIMD128-NEXT: i32.const $push53=, 3 +; NO-SIMD128-NEXT: i32.add $push54=, $0, $pop53 +; NO-SIMD128-NEXT: i32.const $push55=, 255 +; NO-SIMD128-NEXT: i32.and $push51=, $20, $pop55 +; NO-SIMD128-NEXT: i32.shl $push52=, $4, $pop51 +; NO-SIMD128-NEXT: i32.store8 0($pop54), $pop52 +; NO-SIMD128-NEXT: return +; +; NO-SIMD128-FAST-LABEL: shl_vec_v16i8: +; NO-SIMD128-FAST: .functype shl_vec_v16i8 (i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32) -> () +; NO-SIMD128-FAST-NEXT: # %bb.0: +; NO-SIMD128-FAST-NEXT: i32.const $push0=, 255 +; NO-SIMD128-FAST-NEXT: i32.and $push1=, $17, $pop0 +; NO-SIMD128-FAST-NEXT: i32.shl $push2=, $1, $pop1 +; NO-SIMD128-FAST-NEXT: i32.store8 0($0), $pop2 +; NO-SIMD128-FAST-NEXT: i32.const $push69=, 255 +; NO-SIMD128-FAST-NEXT: i32.and $push3=, $18, $pop69 +; NO-SIMD128-FAST-NEXT: i32.shl $push4=, $2, $pop3 +; NO-SIMD128-FAST-NEXT: i32.store8 1($0), $pop4 +; NO-SIMD128-FAST-NEXT: i32.const $push68=, 255 +; NO-SIMD128-FAST-NEXT: i32.and $push5=, $19, $pop68 +; NO-SIMD128-FAST-NEXT: i32.shl $push6=, $3, $pop5 +; NO-SIMD128-FAST-NEXT: i32.store8 2($0), $pop6 +; NO-SIMD128-FAST-NEXT: i32.const $push7=, 3 +; NO-SIMD128-FAST-NEXT: i32.add $push8=, $0, $pop7 +; NO-SIMD128-FAST-NEXT: i32.const $push67=, 255 +; NO-SIMD128-FAST-NEXT: i32.and $push9=, $20, $pop67 +; NO-SIMD128-FAST-NEXT: i32.shl $push10=, $4, $pop9 +; NO-SIMD128-FAST-NEXT: i32.store8 0($pop8), $pop10 +; NO-SIMD128-FAST-NEXT: i32.const $push66=, 255 +; NO-SIMD128-FAST-NEXT: i32.and $push11=, $21, $pop66 +; NO-SIMD128-FAST-NEXT: i32.shl $push12=, $5, $pop11 +; NO-SIMD128-FAST-NEXT: i32.store8 4($0), $pop12 +; NO-SIMD128-FAST-NEXT: i32.const $push13=, 5 +; NO-SIMD128-FAST-NEXT: i32.add $push14=, $0, $pop13 +; NO-SIMD128-FAST-NEXT: i32.const $push65=, 255 +; NO-SIMD128-FAST-NEXT: i32.and $push15=, $22, $pop65 +; NO-SIMD128-FAST-NEXT: i32.shl $push16=, $6, $pop15 +; NO-SIMD128-FAST-NEXT: i32.store8 0($pop14), $pop16 +; NO-SIMD128-FAST-NEXT: i32.const $push17=, 6 +; NO-SIMD128-FAST-NEXT: i32.add $push18=, $0, $pop17 +; NO-SIMD128-FAST-NEXT: i32.const $push64=, 255 +; NO-SIMD128-FAST-NEXT: i32.and $push19=, $23, $pop64 +; NO-SIMD128-FAST-NEXT: i32.shl $push20=, $7, $pop19 +; NO-SIMD128-FAST-NEXT: i32.store8 0($pop18), $pop20 +; NO-SIMD128-FAST-NEXT: i32.const $push21=, 7 +; NO-SIMD128-FAST-NEXT: i32.add $push22=, $0, $pop21 +; NO-SIMD128-FAST-NEXT: i32.const $push63=, 255 +; NO-SIMD128-FAST-NEXT: i32.and $push23=, $24, $pop63 +; NO-SIMD128-FAST-NEXT: i32.shl $push24=, $8, $pop23 +; NO-SIMD128-FAST-NEXT: i32.store8 0($pop22), $pop24 +; NO-SIMD128-FAST-NEXT: i32.const $push62=, 255 +; NO-SIMD128-FAST-NEXT: i32.and $push25=, $25, $pop62 +; NO-SIMD128-FAST-NEXT: i32.shl $push26=, $9, $pop25 +; NO-SIMD128-FAST-NEXT: i32.store8 8($0), $pop26 +; NO-SIMD128-FAST-NEXT: i32.const $push27=, 9 +; NO-SIMD128-FAST-NEXT: i32.add $push28=, $0, $pop27 +; NO-SIMD128-FAST-NEXT: i32.const $push61=, 255 +; NO-SIMD128-FAST-NEXT: i32.and $push29=, $26, $pop61 +; NO-SIMD128-FAST-NEXT: i32.shl $push30=, $10, $pop29 +; NO-SIMD128-FAST-NEXT: i32.store8 0($pop28), $pop30 +; NO-SIMD128-FAST-NEXT: i32.const $push31=, 10 +; NO-SIMD128-FAST-NEXT: i32.add $push32=, $0, $pop31 +; NO-SIMD128-FAST-NEXT: i32.const $push60=, 255 +; NO-SIMD128-FAST-NEXT: i32.and $push33=, $27, $pop60 +; NO-SIMD128-FAST-NEXT: i32.shl $push34=, $11, $pop33 +; NO-SIMD128-FAST-NEXT: i32.store8 0($pop32), $pop34 +; NO-SIMD128-FAST-NEXT: i32.const $push35=, 11 +; NO-SIMD128-FAST-NEXT: i32.add $push36=, $0, $pop35 +; NO-SIMD128-FAST-NEXT: i32.const $push59=, 255 +; NO-SIMD128-FAST-NEXT: i32.and $push37=, $28, $pop59 +; NO-SIMD128-FAST-NEXT: i32.shl $push38=, $12, $pop37 +; NO-SIMD128-FAST-NEXT: i32.store8 0($pop36), $pop38 +; NO-SIMD128-FAST-NEXT: i32.const $push39=, 12 +; NO-SIMD128-FAST-NEXT: i32.add $push40=, $0, $pop39 +; NO-SIMD128-FAST-NEXT: i32.const $push58=, 255 +; NO-SIMD128-FAST-NEXT: i32.and $push41=, $29, $pop58 +; NO-SIMD128-FAST-NEXT: i32.shl $push42=, $13, $pop41 +; NO-SIMD128-FAST-NEXT: i32.store8 0($pop40), $pop42 +; NO-SIMD128-FAST-NEXT: i32.const $push43=, 13 +; NO-SIMD128-FAST-NEXT: i32.add $push44=, $0, $pop43 +; NO-SIMD128-FAST-NEXT: i32.const $push57=, 255 +; NO-SIMD128-FAST-NEXT: i32.and $push45=, $30, $pop57 +; NO-SIMD128-FAST-NEXT: i32.shl $push46=, $14, $pop45 +; NO-SIMD128-FAST-NEXT: i32.store8 0($pop44), $pop46 +; NO-SIMD128-FAST-NEXT: i32.const $push47=, 14 +; NO-SIMD128-FAST-NEXT: i32.add $push48=, $0, $pop47 +; NO-SIMD128-FAST-NEXT: i32.const $push56=, 255 +; NO-SIMD128-FAST-NEXT: i32.and $push49=, $31, $pop56 +; NO-SIMD128-FAST-NEXT: i32.shl $push50=, $15, $pop49 +; NO-SIMD128-FAST-NEXT: i32.store8 0($pop48), $pop50 +; NO-SIMD128-FAST-NEXT: i32.const $push51=, 15 +; NO-SIMD128-FAST-NEXT: i32.add $push52=, $0, $pop51 +; NO-SIMD128-FAST-NEXT: i32.const $push55=, 255 +; NO-SIMD128-FAST-NEXT: i32.and $push53=, $32, $pop55 +; NO-SIMD128-FAST-NEXT: i32.shl $push54=, $16, $pop53 +; NO-SIMD128-FAST-NEXT: i32.store8 0($pop52), $pop54 +; NO-SIMD128-FAST-NEXT: return %a = shl <16 x i8> %v, %x ret <16 x i8> %a } -; CHECK-LABEL: shr_s_v16i8: -; NO-SIMD128-NOT: i8x16 -; SIMD128-NEXT: .functype shr_s_v16i8 (v128, i32) -> (v128){{$}} -; SIMD128-NEXT: i8x16.shr_s $push[[R:[0-9]+]]=, $0, $1{{$}} -; SIMD128-NEXT: return $pop[[R]]{{$}} define <16 x i8> @shr_s_v16i8(<16 x i8> %v, i8 %x) { +; SIMD128-LABEL: shr_s_v16i8: +; SIMD128: .functype shr_s_v16i8 (v128, i32) -> (v128) +; SIMD128-NEXT: # %bb.0: +; SIMD128-NEXT: i8x16.shr_s $push0=, $0, $1 +; SIMD128-NEXT: return $pop0 +; +; SIMD128-FAST-LABEL: shr_s_v16i8: +; SIMD128-FAST: .functype shr_s_v16i8 (v128, i32) -> (v128) +; SIMD128-FAST-NEXT: # %bb.0: +; SIMD128-FAST-NEXT: i8x16.shr_s $push0=, $0, $1 +; SIMD128-FAST-NEXT: return $pop0 +; +; NO-SIMD128-LABEL: shr_s_v16i8: +; NO-SIMD128: .functype shr_s_v16i8 (i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32) -> () +; NO-SIMD128-NEXT: # %bb.0: +; NO-SIMD128-NEXT: i32.extend8_s $push1=, $9 +; NO-SIMD128-NEXT: i32.const $push0=, 255 +; NO-SIMD128-NEXT: i32.and $push56=, $17, $pop0 +; NO-SIMD128-NEXT: local.tee $push55=, $17=, $pop56 +; NO-SIMD128-NEXT: i32.shr_s $push2=, $pop1, $pop55 +; NO-SIMD128-NEXT: i32.store8 8($0), $pop2 +; NO-SIMD128-NEXT: i32.extend8_s $push3=, $5 +; NO-SIMD128-NEXT: i32.shr_s $push4=, $pop3, $17 +; NO-SIMD128-NEXT: i32.store8 4($0), $pop4 +; NO-SIMD128-NEXT: i32.extend8_s $push5=, $3 +; NO-SIMD128-NEXT: i32.shr_s $push6=, $pop5, $17 +; NO-SIMD128-NEXT: i32.store8 2($0), $pop6 +; NO-SIMD128-NEXT: i32.extend8_s $push7=, $2 +; NO-SIMD128-NEXT: i32.shr_s $push8=, $pop7, $17 +; NO-SIMD128-NEXT: i32.store8 1($0), $pop8 +; NO-SIMD128-NEXT: i32.extend8_s $push9=, $1 +; NO-SIMD128-NEXT: i32.shr_s $push10=, $pop9, $17 +; NO-SIMD128-NEXT: i32.store8 0($0), $pop10 +; NO-SIMD128-NEXT: i32.const $push13=, 15 +; NO-SIMD128-NEXT: i32.add $push14=, $0, $pop13 +; NO-SIMD128-NEXT: i32.extend8_s $push11=, $16 +; NO-SIMD128-NEXT: i32.shr_s $push12=, $pop11, $17 +; NO-SIMD128-NEXT: i32.store8 0($pop14), $pop12 +; NO-SIMD128-NEXT: i32.const $push17=, 14 +; NO-SIMD128-NEXT: i32.add $push18=, $0, $pop17 +; NO-SIMD128-NEXT: i32.extend8_s $push15=, $15 +; NO-SIMD128-NEXT: i32.shr_s $push16=, $pop15, $17 +; NO-SIMD128-NEXT: i32.store8 0($pop18), $pop16 +; NO-SIMD128-NEXT: i32.const $push21=, 13 +; NO-SIMD128-NEXT: i32.add $push22=, $0, $pop21 +; NO-SIMD128-NEXT: i32.extend8_s $push19=, $14 +; NO-SIMD128-NEXT: i32.shr_s $push20=, $pop19, $17 +; NO-SIMD128-NEXT: i32.store8 0($pop22), $pop20 +; NO-SIMD128-NEXT: i32.const $push25=, 12 +; NO-SIMD128-NEXT: i32.add $push26=, $0, $pop25 +; NO-SIMD128-NEXT: i32.extend8_s $push23=, $13 +; NO-SIMD128-NEXT: i32.shr_s $push24=, $pop23, $17 +; NO-SIMD128-NEXT: i32.store8 0($pop26), $pop24 +; NO-SIMD128-NEXT: i32.const $push29=, 11 +; NO-SIMD128-NEXT: i32.add $push30=, $0, $pop29 +; NO-SIMD128-NEXT: i32.extend8_s $push27=, $12 +; NO-SIMD128-NEXT: i32.shr_s $push28=, $pop27, $17 +; NO-SIMD128-NEXT: i32.store8 0($pop30), $pop28 +; NO-SIMD128-NEXT: i32.const $push33=, 10 +; NO-SIMD128-NEXT: i32.add $push34=, $0, $pop33 +; NO-SIMD128-NEXT: i32.extend8_s $push31=, $11 +; NO-SIMD128-NEXT: i32.shr_s $push32=, $pop31, $17 +; NO-SIMD128-NEXT: i32.store8 0($pop34), $pop32 +; NO-SIMD128-NEXT: i32.const $push37=, 9 +; NO-SIMD128-NEXT: i32.add $push38=, $0, $pop37 +; NO-SIMD128-NEXT: i32.extend8_s $push35=, $10 +; NO-SIMD128-NEXT: i32.shr_s $push36=, $pop35, $17 +; NO-SIMD128-NEXT: i32.store8 0($pop38), $pop36 +; NO-SIMD128-NEXT: i32.const $push41=, 7 +; NO-SIMD128-NEXT: i32.add $push42=, $0, $pop41 +; NO-SIMD128-NEXT: i32.extend8_s $push39=, $8 +; NO-SIMD128-NEXT: i32.shr_s $push40=, $pop39, $17 +; NO-SIMD128-NEXT: i32.store8 0($pop42), $pop40 +; NO-SIMD128-NEXT: i32.const $push45=, 6 +; NO-SIMD128-NEXT: i32.add $push46=, $0, $pop45 +; NO-SIMD128-NEXT: i32.extend8_s $push43=, $7 +; NO-SIMD128-NEXT: i32.shr_s $push44=, $pop43, $17 +; NO-SIMD128-NEXT: i32.store8 0($pop46), $pop44 +; NO-SIMD128-NEXT: i32.const $push49=, 5 +; NO-SIMD128-NEXT: i32.add $push50=, $0, $pop49 +; NO-SIMD128-NEXT: i32.extend8_s $push47=, $6 +; NO-SIMD128-NEXT: i32.shr_s $push48=, $pop47, $17 +; NO-SIMD128-NEXT: i32.store8 0($pop50), $pop48 +; NO-SIMD128-NEXT: i32.const $push53=, 3 +; NO-SIMD128-NEXT: i32.add $push54=, $0, $pop53 +; NO-SIMD128-NEXT: i32.extend8_s $push51=, $4 +; NO-SIMD128-NEXT: i32.shr_s $push52=, $pop51, $17 +; NO-SIMD128-NEXT: i32.store8 0($pop54), $pop52 +; NO-SIMD128-NEXT: return +; +; NO-SIMD128-FAST-LABEL: shr_s_v16i8: +; NO-SIMD128-FAST: .functype shr_s_v16i8 (i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32) -> () +; NO-SIMD128-FAST-NEXT: # %bb.0: +; NO-SIMD128-FAST-NEXT: i32.extend8_s $push1=, $1 +; NO-SIMD128-FAST-NEXT: i32.const $push0=, 255 +; NO-SIMD128-FAST-NEXT: i32.and $push56=, $17, $pop0 +; NO-SIMD128-FAST-NEXT: local.tee $push55=, $1=, $pop56 +; NO-SIMD128-FAST-NEXT: i32.shr_s $push2=, $pop1, $pop55 +; NO-SIMD128-FAST-NEXT: i32.store8 0($0), $pop2 +; NO-SIMD128-FAST-NEXT: i32.extend8_s $push3=, $2 +; NO-SIMD128-FAST-NEXT: i32.shr_s $push4=, $pop3, $1 +; NO-SIMD128-FAST-NEXT: i32.store8 1($0), $pop4 +; NO-SIMD128-FAST-NEXT: i32.extend8_s $push5=, $3 +; NO-SIMD128-FAST-NEXT: i32.shr_s $push6=, $pop5, $1 +; NO-SIMD128-FAST-NEXT: i32.store8 2($0), $pop6 +; NO-SIMD128-FAST-NEXT: i32.const $push7=, 3 +; NO-SIMD128-FAST-NEXT: i32.add $push8=, $0, $pop7 +; NO-SIMD128-FAST-NEXT: i32.extend8_s $push9=, $4 +; NO-SIMD128-FAST-NEXT: i32.shr_s $push10=, $pop9, $1 +; NO-SIMD128-FAST-NEXT: i32.store8 0($pop8), $pop10 +; NO-SIMD128-FAST-NEXT: i32.extend8_s $push11=, $5 +; NO-SIMD128-FAST-NEXT: i32.shr_s $push12=, $pop11, $1 +; NO-SIMD128-FAST-NEXT: i32.store8 4($0), $pop12 +; NO-SIMD128-FAST-NEXT: i32.const $push13=, 5 +; NO-SIMD128-FAST-NEXT: i32.add $push14=, $0, $pop13 +; NO-SIMD128-FAST-NEXT: i32.extend8_s $push15=, $6 +; NO-SIMD128-FAST-NEXT: i32.shr_s $push16=, $pop15, $1 +; NO-SIMD128-FAST-NEXT: i32.store8 0($pop14), $pop16 +; NO-SIMD128-FAST-NEXT: i32.const $push17=, 6 +; NO-SIMD128-FAST-NEXT: i32.add $push18=, $0, $pop17 +; NO-SIMD128-FAST-NEXT: i32.extend8_s $push19=, $7 +; NO-SIMD128-FAST-NEXT: i32.shr_s $push20=, $pop19, $1 +; NO-SIMD128-FAST-NEXT: i32.store8 0($pop18), $pop20 +; NO-SIMD128-FAST-NEXT: i32.const $push21=, 7 +; NO-SIMD128-FAST-NEXT: i32.add $push22=, $0, $pop21 +; NO-SIMD128-FAST-NEXT: i32.extend8_s $push23=, $8 +; NO-SIMD128-FAST-NEXT: i32.shr_s $push24=, $pop23, $1 +; NO-SIMD128-FAST-NEXT: i32.store8 0($pop22), $pop24 +; NO-SIMD128-FAST-NEXT: i32.extend8_s $push25=, $9 +; NO-SIMD128-FAST-NEXT: i32.shr_s $push26=, $pop25, $1 +; NO-SIMD128-FAST-NEXT: i32.store8 8($0), $pop26 +; NO-SIMD128-FAST-NEXT: i32.const $push27=, 9 +; NO-SIMD128-FAST-NEXT: i32.add $push28=, $0, $pop27 +; NO-SIMD128-FAST-NEXT: i32.extend8_s $push29=, $10 +; NO-SIMD128-FAST-NEXT: i32.shr_s $push30=, $pop29, $1 +; NO-SIMD128-FAST-NEXT: i32.store8 0($pop28), $pop30 +; NO-SIMD128-FAST-NEXT: i32.const $push31=, 10 +; NO-SIMD128-FAST-NEXT: i32.add $push32=, $0, $pop31 +; NO-SIMD128-FAST-NEXT: i32.extend8_s $push33=, $11 +; NO-SIMD128-FAST-NEXT: i32.shr_s $push34=, $pop33, $1 +; NO-SIMD128-FAST-NEXT: i32.store8 0($pop32), $pop34 +; NO-SIMD128-FAST-NEXT: i32.const $push35=, 11 +; NO-SIMD128-FAST-NEXT: i32.add $push36=, $0, $pop35 +; NO-SIMD128-FAST-NEXT: i32.extend8_s $push37=, $12 +; NO-SIMD128-FAST-NEXT: i32.shr_s $push38=, $pop37, $1 +; NO-SIMD128-FAST-NEXT: i32.store8 0($pop36), $pop38 +; NO-SIMD128-FAST-NEXT: i32.const $push39=, 12 +; NO-SIMD128-FAST-NEXT: i32.add $push40=, $0, $pop39 +; NO-SIMD128-FAST-NEXT: i32.extend8_s $push41=, $13 +; NO-SIMD128-FAST-NEXT: i32.shr_s $push42=, $pop41, $1 +; NO-SIMD128-FAST-NEXT: i32.store8 0($pop40), $pop42 +; NO-SIMD128-FAST-NEXT: i32.const $push43=, 13 +; NO-SIMD128-FAST-NEXT: i32.add $push44=, $0, $pop43 +; NO-SIMD128-FAST-NEXT: i32.extend8_s $push45=, $14 +; NO-SIMD128-FAST-NEXT: i32.shr_s $push46=, $pop45, $1 +; NO-SIMD128-FAST-NEXT: i32.store8 0($pop44), $pop46 +; NO-SIMD128-FAST-NEXT: i32.const $push47=, 14 +; NO-SIMD128-FAST-NEXT: i32.add $push48=, $0, $pop47 +; NO-SIMD128-FAST-NEXT: i32.extend8_s $push49=, $15 +; NO-SIMD128-FAST-NEXT: i32.shr_s $push50=, $pop49, $1 +; NO-SIMD128-FAST-NEXT: i32.store8 0($pop48), $pop50 +; NO-SIMD128-FAST-NEXT: i32.const $push51=, 15 +; NO-SIMD128-FAST-NEXT: i32.add $push52=, $0, $pop51 +; NO-SIMD128-FAST-NEXT: i32.extend8_s $push53=, $16 +; NO-SIMD128-FAST-NEXT: i32.shr_s $push54=, $pop53, $1 +; NO-SIMD128-FAST-NEXT: i32.store8 0($pop52), $pop54 +; NO-SIMD128-FAST-NEXT: return %t = insertelement <16 x i8> undef, i8 %x, i32 0 %s = shufflevector <16 x i8> %t, <16 x i8> undef, <16 x i32> @shr_s_v16i8(<16 x i8> %v, i8 %x) { ret <16 x i8> %a } -; CHECK-LABEL: shr_s_vec_v16i8: -; NO-SIMD128-NOT: i8x16 -; SIMD128-NEXT: .functype shr_s_vec_v16i8 (v128, v128) -> (v128){{$}} -; SIMD128-NEXT: i8x16.extract_lane_s $push[[L0:[0-9]+]]=, $0, 0{{$}} -; SIMD128-NEXT: i8x16.extract_lane_u $push[[L1:[0-9]+]]=, $1, 0{{$}} -; SIMD128-NEXT: i32.const $push[[M0:[0-9]+]]=, 7{{$}} -; SIMD128-NEXT: i32.and $push[[M1:[0-9]+]]=, $pop[[L1]], $pop[[M0]]{{$}} -; SIMD128-NEXT: i32.shr_s $push[[M2:[0-9]+]]=, $pop[[L0]], $pop[[M1]] -; SIMD128-NEXT: i8x16.splat $push[[M3:[0-9]+]]=, $pop[[M2]] -; Skip 14 lanes -; SIMD128: i8x16.extract_lane_s $push[[L4:[0-9]+]]=, $0, 15{{$}} -; SIMD128-NEXT: i8x16.extract_lane_u $push[[L5:[0-9]+]]=, $1, 15{{$}} -; SIMD128-NEXT: i32.const $push[[M4:[0-9]+]]=, 7{{$}} -; SIMD128-NEXT: i32.and $push[[M5:[0-9]+]]=, $pop[[L5]], $pop[[M4]]{{$}} -; SIMD128-NEXT: i32.shr_s $push[[M6:[0-9]+]]=, $pop[[L4]], $pop[[M5]]{{$}} -; SIMD128-NEXT: i8x16.replace_lane $push[[R:[0-9]+]]=, $pop[[M7:[0-9]+]], 15, $pop[[M6]]{{$}} -; SIMD128-NEXT: return $pop[[R]]{{$}} define <16 x i8> @shr_s_vec_v16i8(<16 x i8> %v, <16 x i8> %x) { +; SIMD128-LABEL: shr_s_vec_v16i8: +; SIMD128: .functype shr_s_vec_v16i8 (v128, v128) -> (v128) +; SIMD128-NEXT: # %bb.0: +; SIMD128-NEXT: i8x16.extract_lane_s $push7=, $0, 0 +; SIMD128-NEXT: i8x16.extract_lane_u $push5=, $1, 0 +; SIMD128-NEXT: i32.const $push1=, 7 +; SIMD128-NEXT: i32.and $push6=, $pop5, $pop1 +; SIMD128-NEXT: i32.shr_s $push8=, $pop7, $pop6 +; SIMD128-NEXT: i8x16.splat $push9=, $pop8 +; SIMD128-NEXT: i8x16.extract_lane_s $push3=, $0, 1 +; SIMD128-NEXT: i8x16.extract_lane_u $push0=, $1, 1 +; SIMD128-NEXT: i32.const $push95=, 7 +; SIMD128-NEXT: i32.and $push2=, $pop0, $pop95 +; SIMD128-NEXT: i32.shr_s $push4=, $pop3, $pop2 +; SIMD128-NEXT: i8x16.replace_lane $push10=, $pop9, 1, $pop4 +; SIMD128-NEXT: i8x16.extract_lane_s $push13=, $0, 2 +; SIMD128-NEXT: i8x16.extract_lane_u $push11=, $1, 2 +; SIMD128-NEXT: i32.const $push94=, 7 +; SIMD128-NEXT: i32.and $push12=, $pop11, $pop94 +; SIMD128-NEXT: i32.shr_s $push14=, $pop13, $pop12 +; SIMD128-NEXT: i8x16.replace_lane $push15=, $pop10, 2, $pop14 +; SIMD128-NEXT: i8x16.extract_lane_s $push18=, $0, 3 +; SIMD128-NEXT: i8x16.extract_lane_u $push16=, $1, 3 +; SIMD128-NEXT: i32.const $push93=, 7 +; SIMD128-NEXT: i32.and $push17=, $pop16, $pop93 +; SIMD128-NEXT: i32.shr_s $push19=, $pop18, $pop17 +; SIMD128-NEXT: i8x16.replace_lane $push20=, $pop15, 3, $pop19 +; SIMD128-NEXT: i8x16.extract_lane_s $push23=, $0, 4 +; SIMD128-NEXT: i8x16.extract_lane_u $push21=, $1, 4 +; SIMD128-NEXT: i32.const $push92=, 7 +; SIMD128-NEXT: i32.and $push22=, $pop21, $pop92 +; SIMD128-NEXT: i32.shr_s $push24=, $pop23, $pop22 +; SIMD128-NEXT: i8x16.replace_lane $push25=, $pop20, 4, $pop24 +; SIMD128-NEXT: i8x16.extract_lane_s $push28=, $0, 5 +; SIMD128-NEXT: i8x16.extract_lane_u $push26=, $1, 5 +; SIMD128-NEXT: i32.const $push91=, 7 +; SIMD128-NEXT: i32.and $push27=, $pop26, $pop91 +; SIMD128-NEXT: i32.shr_s $push29=, $pop28, $pop27 +; SIMD128-NEXT: i8x16.replace_lane $push30=, $pop25, 5, $pop29 +; SIMD128-NEXT: i8x16.extract_lane_s $push33=, $0, 6 +; SIMD128-NEXT: i8x16.extract_lane_u $push31=, $1, 6 +; SIMD128-NEXT: i32.const $push90=, 7 +; SIMD128-NEXT: i32.and $push32=, $pop31, $pop90 +; SIMD128-NEXT: i32.shr_s $push34=, $pop33, $pop32 +; SIMD128-NEXT: i8x16.replace_lane $push35=, $pop30, 6, $pop34 +; SIMD128-NEXT: i8x16.extract_lane_s $push38=, $0, 7 +; SIMD128-NEXT: i8x16.extract_lane_u $push36=, $1, 7 +; SIMD128-NEXT: i32.const $push89=, 7 +; SIMD128-NEXT: i32.and $push37=, $pop36, $pop89 +; SIMD128-NEXT: i32.shr_s $push39=, $pop38, $pop37 +; SIMD128-NEXT: i8x16.replace_lane $push40=, $pop35, 7, $pop39 +; SIMD128-NEXT: i8x16.extract_lane_s $push43=, $0, 8 +; SIMD128-NEXT: i8x16.extract_lane_u $push41=, $1, 8 +; SIMD128-NEXT: i32.const $push88=, 7 +; SIMD128-NEXT: i32.and $push42=, $pop41, $pop88 +; SIMD128-NEXT: i32.shr_s $push44=, $pop43, $pop42 +; SIMD128-NEXT: i8x16.replace_lane $push45=, $pop40, 8, $pop44 +; SIMD128-NEXT: i8x16.extract_lane_s $push48=, $0, 9 +; SIMD128-NEXT: i8x16.extract_lane_u $push46=, $1, 9 +; SIMD128-NEXT: i32.const $push87=, 7 +; SIMD128-NEXT: i32.and $push47=, $pop46, $pop87 +; SIMD128-NEXT: i32.shr_s $push49=, $pop48, $pop47 +; SIMD128-NEXT: i8x16.replace_lane $push50=, $pop45, 9, $pop49 +; SIMD128-NEXT: i8x16.extract_lane_s $push53=, $0, 10 +; SIMD128-NEXT: i8x16.extract_lane_u $push51=, $1, 10 +; SIMD128-NEXT: i32.const $push86=, 7 +; SIMD128-NEXT: i32.and $push52=, $pop51, $pop86 +; SIMD128-NEXT: i32.shr_s $push54=, $pop53, $pop52 +; SIMD128-NEXT: i8x16.replace_lane $push55=, $pop50, 10, $pop54 +; SIMD128-NEXT: i8x16.extract_lane_s $push58=, $0, 11 +; SIMD128-NEXT: i8x16.extract_lane_u $push56=, $1, 11 +; SIMD128-NEXT: i32.const $push85=, 7 +; SIMD128-NEXT: i32.and $push57=, $pop56, $pop85 +; SIMD128-NEXT: i32.shr_s $push59=, $pop58, $pop57 +; SIMD128-NEXT: i8x16.replace_lane $push60=, $pop55, 11, $pop59 +; SIMD128-NEXT: i8x16.extract_lane_s $push63=, $0, 12 +; SIMD128-NEXT: i8x16.extract_lane_u $push61=, $1, 12 +; SIMD128-NEXT: i32.const $push84=, 7 +; SIMD128-NEXT: i32.and $push62=, $pop61, $pop84 +; SIMD128-NEXT: i32.shr_s $push64=, $pop63, $pop62 +; SIMD128-NEXT: i8x16.replace_lane $push65=, $pop60, 12, $pop64 +; SIMD128-NEXT: i8x16.extract_lane_s $push68=, $0, 13 +; SIMD128-NEXT: i8x16.extract_lane_u $push66=, $1, 13 +; SIMD128-NEXT: i32.const $push83=, 7 +; SIMD128-NEXT: i32.and $push67=, $pop66, $pop83 +; SIMD128-NEXT: i32.shr_s $push69=, $pop68, $pop67 +; SIMD128-NEXT: i8x16.replace_lane $push70=, $pop65, 13, $pop69 +; SIMD128-NEXT: i8x16.extract_lane_s $push73=, $0, 14 +; SIMD128-NEXT: i8x16.extract_lane_u $push71=, $1, 14 +; SIMD128-NEXT: i32.const $push82=, 7 +; SIMD128-NEXT: i32.and $push72=, $pop71, $pop82 +; SIMD128-NEXT: i32.shr_s $push74=, $pop73, $pop72 +; SIMD128-NEXT: i8x16.replace_lane $push75=, $pop70, 14, $pop74 +; SIMD128-NEXT: i8x16.extract_lane_s $push78=, $0, 15 +; SIMD128-NEXT: i8x16.extract_lane_u $push76=, $1, 15 +; SIMD128-NEXT: i32.const $push81=, 7 +; SIMD128-NEXT: i32.and $push77=, $pop76, $pop81 +; SIMD128-NEXT: i32.shr_s $push79=, $pop78, $pop77 +; SIMD128-NEXT: i8x16.replace_lane $push80=, $pop75, 15, $pop79 +; SIMD128-NEXT: return $pop80 +; +; SIMD128-FAST-LABEL: shr_s_vec_v16i8: +; SIMD128-FAST: .functype shr_s_vec_v16i8 (v128, v128) -> (v128) +; SIMD128-FAST-NEXT: # %bb.0: +; SIMD128-FAST-NEXT: i8x16.extract_lane_s $push8=, $0, 0 +; SIMD128-FAST-NEXT: i8x16.extract_lane_u $push6=, $1, 0 +; SIMD128-FAST-NEXT: i32.const $push2=, 7 +; SIMD128-FAST-NEXT: i32.and $push7=, $pop6, $pop2 +; SIMD128-FAST-NEXT: i32.shr_s $push9=, $pop8, $pop7 +; SIMD128-FAST-NEXT: i8x16.splat $push10=, $pop9 +; SIMD128-FAST-NEXT: i8x16.extract_lane_s $push4=, $0, 1 +; SIMD128-FAST-NEXT: i8x16.extract_lane_u $push1=, $1, 1 +; SIMD128-FAST-NEXT: i32.const $push95=, 7 +; SIMD128-FAST-NEXT: i32.and $push3=, $pop1, $pop95 +; SIMD128-FAST-NEXT: i32.shr_s $push5=, $pop4, $pop3 +; SIMD128-FAST-NEXT: i8x16.replace_lane $push11=, $pop10, 1, $pop5 +; SIMD128-FAST-NEXT: i8x16.extract_lane_s $push14=, $0, 2 +; SIMD128-FAST-NEXT: i8x16.extract_lane_u $push12=, $1, 2 +; SIMD128-FAST-NEXT: i32.const $push94=, 7 +; SIMD128-FAST-NEXT: i32.and $push13=, $pop12, $pop94 +; SIMD128-FAST-NEXT: i32.shr_s $push15=, $pop14, $pop13 +; SIMD128-FAST-NEXT: i8x16.replace_lane $push16=, $pop11, 2, $pop15 +; SIMD128-FAST-NEXT: i8x16.extract_lane_s $push19=, $0, 3 +; SIMD128-FAST-NEXT: i8x16.extract_lane_u $push17=, $1, 3 +; SIMD128-FAST-NEXT: i32.const $push93=, 7 +; SIMD128-FAST-NEXT: i32.and $push18=, $pop17, $pop93 +; SIMD128-FAST-NEXT: i32.shr_s $push20=, $pop19, $pop18 +; SIMD128-FAST-NEXT: i8x16.replace_lane $push21=, $pop16, 3, $pop20 +; SIMD128-FAST-NEXT: i8x16.extract_lane_s $push24=, $0, 4 +; SIMD128-FAST-NEXT: i8x16.extract_lane_u $push22=, $1, 4 +; SIMD128-FAST-NEXT: i32.const $push92=, 7 +; SIMD128-FAST-NEXT: i32.and $push23=, $pop22, $pop92 +; SIMD128-FAST-NEXT: i32.shr_s $push25=, $pop24, $pop23 +; SIMD128-FAST-NEXT: i8x16.replace_lane $push26=, $pop21, 4, $pop25 +; SIMD128-FAST-NEXT: i8x16.extract_lane_s $push29=, $0, 5 +; SIMD128-FAST-NEXT: i8x16.extract_lane_u $push27=, $1, 5 +; SIMD128-FAST-NEXT: i32.const $push91=, 7 +; SIMD128-FAST-NEXT: i32.and $push28=, $pop27, $pop91 +; SIMD128-FAST-NEXT: i32.shr_s $push30=, $pop29, $pop28 +; SIMD128-FAST-NEXT: i8x16.replace_lane $push31=, $pop26, 5, $pop30 +; SIMD128-FAST-NEXT: i8x16.extract_lane_s $push34=, $0, 6 +; SIMD128-FAST-NEXT: i8x16.extract_lane_u $push32=, $1, 6 +; SIMD128-FAST-NEXT: i32.const $push90=, 7 +; SIMD128-FAST-NEXT: i32.and $push33=, $pop32, $pop90 +; SIMD128-FAST-NEXT: i32.shr_s $push35=, $pop34, $pop33 +; SIMD128-FAST-NEXT: i8x16.replace_lane $push36=, $pop31, 6, $pop35 +; SIMD128-FAST-NEXT: i8x16.extract_lane_s $push39=, $0, 7 +; SIMD128-FAST-NEXT: i8x16.extract_lane_u $push37=, $1, 7 +; SIMD128-FAST-NEXT: i32.const $push89=, 7 +; SIMD128-FAST-NEXT: i32.and $push38=, $pop37, $pop89 +; SIMD128-FAST-NEXT: i32.shr_s $push40=, $pop39, $pop38 +; SIMD128-FAST-NEXT: i8x16.replace_lane $push41=, $pop36, 7, $pop40 +; SIMD128-FAST-NEXT: i8x16.extract_lane_s $push44=, $0, 8 +; SIMD128-FAST-NEXT: i8x16.extract_lane_u $push42=, $1, 8 +; SIMD128-FAST-NEXT: i32.const $push88=, 7 +; SIMD128-FAST-NEXT: i32.and $push43=, $pop42, $pop88 +; SIMD128-FAST-NEXT: i32.shr_s $push45=, $pop44, $pop43 +; SIMD128-FAST-NEXT: i8x16.replace_lane $push46=, $pop41, 8, $pop45 +; SIMD128-FAST-NEXT: i8x16.extract_lane_s $push49=, $0, 9 +; SIMD128-FAST-NEXT: i8x16.extract_lane_u $push47=, $1, 9 +; SIMD128-FAST-NEXT: i32.const $push87=, 7 +; SIMD128-FAST-NEXT: i32.and $push48=, $pop47, $pop87 +; SIMD128-FAST-NEXT: i32.shr_s $push50=, $pop49, $pop48 +; SIMD128-FAST-NEXT: i8x16.replace_lane $push51=, $pop46, 9, $pop50 +; SIMD128-FAST-NEXT: i8x16.extract_lane_s $push54=, $0, 10 +; SIMD128-FAST-NEXT: i8x16.extract_lane_u $push52=, $1, 10 +; SIMD128-FAST-NEXT: i32.const $push86=, 7 +; SIMD128-FAST-NEXT: i32.and $push53=, $pop52, $pop86 +; SIMD128-FAST-NEXT: i32.shr_s $push55=, $pop54, $pop53 +; SIMD128-FAST-NEXT: i8x16.replace_lane $push56=, $pop51, 10, $pop55 +; SIMD128-FAST-NEXT: i8x16.extract_lane_s $push59=, $0, 11 +; SIMD128-FAST-NEXT: i8x16.extract_lane_u $push57=, $1, 11 +; SIMD128-FAST-NEXT: i32.const $push85=, 7 +; SIMD128-FAST-NEXT: i32.and $push58=, $pop57, $pop85 +; SIMD128-FAST-NEXT: i32.shr_s $push60=, $pop59, $pop58 +; SIMD128-FAST-NEXT: i8x16.replace_lane $push61=, $pop56, 11, $pop60 +; SIMD128-FAST-NEXT: i8x16.extract_lane_s $push64=, $0, 12 +; SIMD128-FAST-NEXT: i8x16.extract_lane_u $push62=, $1, 12 +; SIMD128-FAST-NEXT: i32.const $push84=, 7 +; SIMD128-FAST-NEXT: i32.and $push63=, $pop62, $pop84 +; SIMD128-FAST-NEXT: i32.shr_s $push65=, $pop64, $pop63 +; SIMD128-FAST-NEXT: i8x16.replace_lane $push66=, $pop61, 12, $pop65 +; SIMD128-FAST-NEXT: i8x16.extract_lane_s $push69=, $0, 13 +; SIMD128-FAST-NEXT: i8x16.extract_lane_u $push67=, $1, 13 +; SIMD128-FAST-NEXT: i32.const $push83=, 7 +; SIMD128-FAST-NEXT: i32.and $push68=, $pop67, $pop83 +; SIMD128-FAST-NEXT: i32.shr_s $push70=, $pop69, $pop68 +; SIMD128-FAST-NEXT: i8x16.replace_lane $push71=, $pop66, 13, $pop70 +; SIMD128-FAST-NEXT: i8x16.extract_lane_s $push74=, $0, 14 +; SIMD128-FAST-NEXT: i8x16.extract_lane_u $push72=, $1, 14 +; SIMD128-FAST-NEXT: i32.const $push82=, 7 +; SIMD128-FAST-NEXT: i32.and $push73=, $pop72, $pop82 +; SIMD128-FAST-NEXT: i32.shr_s $push75=, $pop74, $pop73 +; SIMD128-FAST-NEXT: i8x16.replace_lane $push76=, $pop71, 14, $pop75 +; SIMD128-FAST-NEXT: i8x16.extract_lane_s $push79=, $0, 15 +; SIMD128-FAST-NEXT: i8x16.extract_lane_u $push77=, $1, 15 +; SIMD128-FAST-NEXT: i32.const $push81=, 7 +; SIMD128-FAST-NEXT: i32.and $push78=, $pop77, $pop81 +; SIMD128-FAST-NEXT: i32.shr_s $push80=, $pop79, $pop78 +; SIMD128-FAST-NEXT: i8x16.replace_lane $push0=, $pop76, 15, $pop80 +; SIMD128-FAST-NEXT: return $pop0 +; +; NO-SIMD128-LABEL: shr_s_vec_v16i8: +; NO-SIMD128: .functype shr_s_vec_v16i8 (i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32) -> () +; NO-SIMD128-NEXT: # %bb.0: +; NO-SIMD128-NEXT: i32.extend8_s $push2=, $9 +; NO-SIMD128-NEXT: i32.const $push0=, 255 +; NO-SIMD128-NEXT: i32.and $push1=, $25, $pop0 +; NO-SIMD128-NEXT: i32.shr_s $push3=, $pop2, $pop1 +; NO-SIMD128-NEXT: i32.store8 8($0), $pop3 +; NO-SIMD128-NEXT: i32.extend8_s $push5=, $5 +; NO-SIMD128-NEXT: i32.const $push85=, 255 +; NO-SIMD128-NEXT: i32.and $push4=, $21, $pop85 +; NO-SIMD128-NEXT: i32.shr_s $push6=, $pop5, $pop4 +; NO-SIMD128-NEXT: i32.store8 4($0), $pop6 +; NO-SIMD128-NEXT: i32.extend8_s $push8=, $3 +; NO-SIMD128-NEXT: i32.const $push84=, 255 +; NO-SIMD128-NEXT: i32.and $push7=, $19, $pop84 +; NO-SIMD128-NEXT: i32.shr_s $push9=, $pop8, $pop7 +; NO-SIMD128-NEXT: i32.store8 2($0), $pop9 +; NO-SIMD128-NEXT: i32.extend8_s $push11=, $2 +; NO-SIMD128-NEXT: i32.const $push83=, 255 +; NO-SIMD128-NEXT: i32.and $push10=, $18, $pop83 +; NO-SIMD128-NEXT: i32.shr_s $push12=, $pop11, $pop10 +; NO-SIMD128-NEXT: i32.store8 1($0), $pop12 +; NO-SIMD128-NEXT: i32.extend8_s $push14=, $1 +; NO-SIMD128-NEXT: i32.const $push82=, 255 +; NO-SIMD128-NEXT: i32.and $push13=, $17, $pop82 +; NO-SIMD128-NEXT: i32.shr_s $push15=, $pop14, $pop13 +; NO-SIMD128-NEXT: i32.store8 0($0), $pop15 +; NO-SIMD128-NEXT: i32.const $push19=, 15 +; NO-SIMD128-NEXT: i32.add $push20=, $0, $pop19 +; NO-SIMD128-NEXT: i32.extend8_s $push17=, $16 +; NO-SIMD128-NEXT: i32.const $push81=, 255 +; NO-SIMD128-NEXT: i32.and $push16=, $32, $pop81 +; NO-SIMD128-NEXT: i32.shr_s $push18=, $pop17, $pop16 +; NO-SIMD128-NEXT: i32.store8 0($pop20), $pop18 +; NO-SIMD128-NEXT: i32.const $push24=, 14 +; NO-SIMD128-NEXT: i32.add $push25=, $0, $pop24 +; NO-SIMD128-NEXT: i32.extend8_s $push22=, $15 +; NO-SIMD128-NEXT: i32.const $push80=, 255 +; NO-SIMD128-NEXT: i32.and $push21=, $31, $pop80 +; NO-SIMD128-NEXT: i32.shr_s $push23=, $pop22, $pop21 +; NO-SIMD128-NEXT: i32.store8 0($pop25), $pop23 +; NO-SIMD128-NEXT: i32.const $push29=, 13 +; NO-SIMD128-NEXT: i32.add $push30=, $0, $pop29 +; NO-SIMD128-NEXT: i32.extend8_s $push27=, $14 +; NO-SIMD128-NEXT: i32.const $push79=, 255 +; NO-SIMD128-NEXT: i32.and $push26=, $30, $pop79 +; NO-SIMD128-NEXT: i32.shr_s $push28=, $pop27, $pop26 +; NO-SIMD128-NEXT: i32.store8 0($pop30), $pop28 +; NO-SIMD128-NEXT: i32.const $push34=, 12 +; NO-SIMD128-NEXT: i32.add $push35=, $0, $pop34 +; NO-SIMD128-NEXT: i32.extend8_s $push32=, $13 +; NO-SIMD128-NEXT: i32.const $push78=, 255 +; NO-SIMD128-NEXT: i32.and $push31=, $29, $pop78 +; NO-SIMD128-NEXT: i32.shr_s $push33=, $pop32, $pop31 +; NO-SIMD128-NEXT: i32.store8 0($pop35), $pop33 +; NO-SIMD128-NEXT: i32.const $push39=, 11 +; NO-SIMD128-NEXT: i32.add $push40=, $0, $pop39 +; NO-SIMD128-NEXT: i32.extend8_s $push37=, $12 +; NO-SIMD128-NEXT: i32.const $push77=, 255 +; NO-SIMD128-NEXT: i32.and $push36=, $28, $pop77 +; NO-SIMD128-NEXT: i32.shr_s $push38=, $pop37, $pop36 +; NO-SIMD128-NEXT: i32.store8 0($pop40), $pop38 +; NO-SIMD128-NEXT: i32.const $push44=, 10 +; NO-SIMD128-NEXT: i32.add $push45=, $0, $pop44 +; NO-SIMD128-NEXT: i32.extend8_s $push42=, $11 +; NO-SIMD128-NEXT: i32.const $push76=, 255 +; NO-SIMD128-NEXT: i32.and $push41=, $27, $pop76 +; NO-SIMD128-NEXT: i32.shr_s $push43=, $pop42, $pop41 +; NO-SIMD128-NEXT: i32.store8 0($pop45), $pop43 +; NO-SIMD128-NEXT: i32.const $push49=, 9 +; NO-SIMD128-NEXT: i32.add $push50=, $0, $pop49 +; NO-SIMD128-NEXT: i32.extend8_s $push47=, $10 +; NO-SIMD128-NEXT: i32.const $push75=, 255 +; NO-SIMD128-NEXT: i32.and $push46=, $26, $pop75 +; NO-SIMD128-NEXT: i32.shr_s $push48=, $pop47, $pop46 +; NO-SIMD128-NEXT: i32.store8 0($pop50), $pop48 +; NO-SIMD128-NEXT: i32.const $push54=, 7 +; NO-SIMD128-NEXT: i32.add $push55=, $0, $pop54 +; NO-SIMD128-NEXT: i32.extend8_s $push52=, $8 +; NO-SIMD128-NEXT: i32.const $push74=, 255 +; NO-SIMD128-NEXT: i32.and $push51=, $24, $pop74 +; NO-SIMD128-NEXT: i32.shr_s $push53=, $pop52, $pop51 +; NO-SIMD128-NEXT: i32.store8 0($pop55), $pop53 +; NO-SIMD128-NEXT: i32.const $push59=, 6 +; NO-SIMD128-NEXT: i32.add $push60=, $0, $pop59 +; NO-SIMD128-NEXT: i32.extend8_s $push57=, $7 +; NO-SIMD128-NEXT: i32.const $push73=, 255 +; NO-SIMD128-NEXT: i32.and $push56=, $23, $pop73 +; NO-SIMD128-NEXT: i32.shr_s $push58=, $pop57, $pop56 +; NO-SIMD128-NEXT: i32.store8 0($pop60), $pop58 +; NO-SIMD128-NEXT: i32.const $push64=, 5 +; NO-SIMD128-NEXT: i32.add $push65=, $0, $pop64 +; NO-SIMD128-NEXT: i32.extend8_s $push62=, $6 +; NO-SIMD128-NEXT: i32.const $push72=, 255 +; NO-SIMD128-NEXT: i32.and $push61=, $22, $pop72 +; NO-SIMD128-NEXT: i32.shr_s $push63=, $pop62, $pop61 +; NO-SIMD128-NEXT: i32.store8 0($pop65), $pop63 +; NO-SIMD128-NEXT: i32.const $push69=, 3 +; NO-SIMD128-NEXT: i32.add $push70=, $0, $pop69 +; NO-SIMD128-NEXT: i32.extend8_s $push67=, $4 +; NO-SIMD128-NEXT: i32.const $push71=, 255 +; NO-SIMD128-NEXT: i32.and $push66=, $20, $pop71 +; NO-SIMD128-NEXT: i32.shr_s $push68=, $pop67, $pop66 +; NO-SIMD128-NEXT: i32.store8 0($pop70), $pop68 +; NO-SIMD128-NEXT: return +; +; NO-SIMD128-FAST-LABEL: shr_s_vec_v16i8: +; NO-SIMD128-FAST: .functype shr_s_vec_v16i8 (i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32) -> () +; NO-SIMD128-FAST-NEXT: # %bb.0: +; NO-SIMD128-FAST-NEXT: i32.extend8_s $push2=, $1 +; NO-SIMD128-FAST-NEXT: i32.const $push0=, 255 +; NO-SIMD128-FAST-NEXT: i32.and $push1=, $17, $pop0 +; NO-SIMD128-FAST-NEXT: i32.shr_s $push3=, $pop2, $pop1 +; NO-SIMD128-FAST-NEXT: i32.store8 0($0), $pop3 +; NO-SIMD128-FAST-NEXT: i32.extend8_s $push5=, $2 +; NO-SIMD128-FAST-NEXT: i32.const $push85=, 255 +; NO-SIMD128-FAST-NEXT: i32.and $push4=, $18, $pop85 +; NO-SIMD128-FAST-NEXT: i32.shr_s $push6=, $pop5, $pop4 +; NO-SIMD128-FAST-NEXT: i32.store8 1($0), $pop6 +; NO-SIMD128-FAST-NEXT: i32.extend8_s $push8=, $3 +; NO-SIMD128-FAST-NEXT: i32.const $push84=, 255 +; NO-SIMD128-FAST-NEXT: i32.and $push7=, $19, $pop84 +; NO-SIMD128-FAST-NEXT: i32.shr_s $push9=, $pop8, $pop7 +; NO-SIMD128-FAST-NEXT: i32.store8 2($0), $pop9 +; NO-SIMD128-FAST-NEXT: i32.const $push10=, 3 +; NO-SIMD128-FAST-NEXT: i32.add $push11=, $0, $pop10 +; NO-SIMD128-FAST-NEXT: i32.extend8_s $push13=, $4 +; NO-SIMD128-FAST-NEXT: i32.const $push83=, 255 +; NO-SIMD128-FAST-NEXT: i32.and $push12=, $20, $pop83 +; NO-SIMD128-FAST-NEXT: i32.shr_s $push14=, $pop13, $pop12 +; NO-SIMD128-FAST-NEXT: i32.store8 0($pop11), $pop14 +; NO-SIMD128-FAST-NEXT: i32.extend8_s $push16=, $5 +; NO-SIMD128-FAST-NEXT: i32.const $push82=, 255 +; NO-SIMD128-FAST-NEXT: i32.and $push15=, $21, $pop82 +; NO-SIMD128-FAST-NEXT: i32.shr_s $push17=, $pop16, $pop15 +; NO-SIMD128-FAST-NEXT: i32.store8 4($0), $pop17 +; NO-SIMD128-FAST-NEXT: i32.const $push18=, 5 +; NO-SIMD128-FAST-NEXT: i32.add $push19=, $0, $pop18 +; NO-SIMD128-FAST-NEXT: i32.extend8_s $push21=, $6 +; NO-SIMD128-FAST-NEXT: i32.const $push81=, 255 +; NO-SIMD128-FAST-NEXT: i32.and $push20=, $22, $pop81 +; NO-SIMD128-FAST-NEXT: i32.shr_s $push22=, $pop21, $pop20 +; NO-SIMD128-FAST-NEXT: i32.store8 0($pop19), $pop22 +; NO-SIMD128-FAST-NEXT: i32.const $push23=, 6 +; NO-SIMD128-FAST-NEXT: i32.add $push24=, $0, $pop23 +; NO-SIMD128-FAST-NEXT: i32.extend8_s $push26=, $7 +; NO-SIMD128-FAST-NEXT: i32.const $push80=, 255 +; NO-SIMD128-FAST-NEXT: i32.and $push25=, $23, $pop80 +; NO-SIMD128-FAST-NEXT: i32.shr_s $push27=, $pop26, $pop25 +; NO-SIMD128-FAST-NEXT: i32.store8 0($pop24), $pop27 +; NO-SIMD128-FAST-NEXT: i32.const $push28=, 7 +; NO-SIMD128-FAST-NEXT: i32.add $push29=, $0, $pop28 +; NO-SIMD128-FAST-NEXT: i32.extend8_s $push31=, $8 +; NO-SIMD128-FAST-NEXT: i32.const $push79=, 255 +; NO-SIMD128-FAST-NEXT: i32.and $push30=, $24, $pop79 +; NO-SIMD128-FAST-NEXT: i32.shr_s $push32=, $pop31, $pop30 +; NO-SIMD128-FAST-NEXT: i32.store8 0($pop29), $pop32 +; NO-SIMD128-FAST-NEXT: i32.extend8_s $push34=, $9 +; NO-SIMD128-FAST-NEXT: i32.const $push78=, 255 +; NO-SIMD128-FAST-NEXT: i32.and $push33=, $25, $pop78 +; NO-SIMD128-FAST-NEXT: i32.shr_s $push35=, $pop34, $pop33 +; NO-SIMD128-FAST-NEXT: i32.store8 8($0), $pop35 +; NO-SIMD128-FAST-NEXT: i32.const $push36=, 9 +; NO-SIMD128-FAST-NEXT: i32.add $push37=, $0, $pop36 +; NO-SIMD128-FAST-NEXT: i32.extend8_s $push39=, $10 +; NO-SIMD128-FAST-NEXT: i32.const $push77=, 255 +; NO-SIMD128-FAST-NEXT: i32.and $push38=, $26, $pop77 +; NO-SIMD128-FAST-NEXT: i32.shr_s $push40=, $pop39, $pop38 +; NO-SIMD128-FAST-NEXT: i32.store8 0($pop37), $pop40 +; NO-SIMD128-FAST-NEXT: i32.const $push41=, 10 +; NO-SIMD128-FAST-NEXT: i32.add $push42=, $0, $pop41 +; NO-SIMD128-FAST-NEXT: i32.extend8_s $push44=, $11 +; NO-SIMD128-FAST-NEXT: i32.const $push76=, 255 +; NO-SIMD128-FAST-NEXT: i32.and $push43=, $27, $pop76 +; NO-SIMD128-FAST-NEXT: i32.shr_s $push45=, $pop44, $pop43 +; NO-SIMD128-FAST-NEXT: i32.store8 0($pop42), $pop45 +; NO-SIMD128-FAST-NEXT: i32.const $push46=, 11 +; NO-SIMD128-FAST-NEXT: i32.add $push47=, $0, $pop46 +; NO-SIMD128-FAST-NEXT: i32.extend8_s $push49=, $12 +; NO-SIMD128-FAST-NEXT: i32.const $push75=, 255 +; NO-SIMD128-FAST-NEXT: i32.and $push48=, $28, $pop75 +; NO-SIMD128-FAST-NEXT: i32.shr_s $push50=, $pop49, $pop48 +; NO-SIMD128-FAST-NEXT: i32.store8 0($pop47), $pop50 +; NO-SIMD128-FAST-NEXT: i32.const $push51=, 12 +; NO-SIMD128-FAST-NEXT: i32.add $push52=, $0, $pop51 +; NO-SIMD128-FAST-NEXT: i32.extend8_s $push54=, $13 +; NO-SIMD128-FAST-NEXT: i32.const $push74=, 255 +; NO-SIMD128-FAST-NEXT: i32.and $push53=, $29, $pop74 +; NO-SIMD128-FAST-NEXT: i32.shr_s $push55=, $pop54, $pop53 +; NO-SIMD128-FAST-NEXT: i32.store8 0($pop52), $pop55 +; NO-SIMD128-FAST-NEXT: i32.const $push56=, 13 +; NO-SIMD128-FAST-NEXT: i32.add $push57=, $0, $pop56 +; NO-SIMD128-FAST-NEXT: i32.extend8_s $push59=, $14 +; NO-SIMD128-FAST-NEXT: i32.const $push73=, 255 +; NO-SIMD128-FAST-NEXT: i32.and $push58=, $30, $pop73 +; NO-SIMD128-FAST-NEXT: i32.shr_s $push60=, $pop59, $pop58 +; NO-SIMD128-FAST-NEXT: i32.store8 0($pop57), $pop60 +; NO-SIMD128-FAST-NEXT: i32.const $push61=, 14 +; NO-SIMD128-FAST-NEXT: i32.add $push62=, $0, $pop61 +; NO-SIMD128-FAST-NEXT: i32.extend8_s $push64=, $15 +; NO-SIMD128-FAST-NEXT: i32.const $push72=, 255 +; NO-SIMD128-FAST-NEXT: i32.and $push63=, $31, $pop72 +; NO-SIMD128-FAST-NEXT: i32.shr_s $push65=, $pop64, $pop63 +; NO-SIMD128-FAST-NEXT: i32.store8 0($pop62), $pop65 +; NO-SIMD128-FAST-NEXT: i32.const $push66=, 15 +; NO-SIMD128-FAST-NEXT: i32.add $push67=, $0, $pop66 +; NO-SIMD128-FAST-NEXT: i32.extend8_s $push69=, $16 +; NO-SIMD128-FAST-NEXT: i32.const $push71=, 255 +; NO-SIMD128-FAST-NEXT: i32.and $push68=, $32, $pop71 +; NO-SIMD128-FAST-NEXT: i32.shr_s $push70=, $pop69, $pop68 +; NO-SIMD128-FAST-NEXT: i32.store8 0($pop67), $pop70 +; NO-SIMD128-FAST-NEXT: return %a = ashr <16 x i8> %v, %x ret <16 x i8> %a } -; CHECK-LABEL: shr_u_v16i8: -; NO-SIMD128-NOT: i8x16 -; SIMD128-NEXT: .functype shr_u_v16i8 (v128, i32) -> (v128){{$}} -; SIMD128-NEXT: i8x16.shr_u $push[[R:[0-9]+]]=, $0, $1{{$}} -; SIMD128-NEXT: return $pop[[R]]{{$}} define <16 x i8> @shr_u_v16i8(<16 x i8> %v, i8 %x) { +; SIMD128-LABEL: shr_u_v16i8: +; SIMD128: .functype shr_u_v16i8 (v128, i32) -> (v128) +; SIMD128-NEXT: # %bb.0: +; SIMD128-NEXT: i8x16.shr_u $push0=, $0, $1 +; SIMD128-NEXT: return $pop0 +; +; SIMD128-FAST-LABEL: shr_u_v16i8: +; SIMD128-FAST: .functype shr_u_v16i8 (v128, i32) -> (v128) +; SIMD128-FAST-NEXT: # %bb.0: +; SIMD128-FAST-NEXT: i8x16.shr_u $push0=, $0, $1 +; SIMD128-FAST-NEXT: return $pop0 +; +; NO-SIMD128-LABEL: shr_u_v16i8: +; NO-SIMD128: .functype shr_u_v16i8 (i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32) -> () +; NO-SIMD128-NEXT: # %bb.0: +; NO-SIMD128-NEXT: i32.const $push0=, 255 +; NO-SIMD128-NEXT: i32.and $push1=, $9, $pop0 +; NO-SIMD128-NEXT: i32.const $push72=, 255 +; NO-SIMD128-NEXT: i32.and $push71=, $17, $pop72 +; NO-SIMD128-NEXT: local.tee $push70=, $17=, $pop71 +; NO-SIMD128-NEXT: i32.shr_u $push2=, $pop1, $pop70 +; NO-SIMD128-NEXT: i32.store8 8($0), $pop2 +; NO-SIMD128-NEXT: i32.const $push69=, 255 +; NO-SIMD128-NEXT: i32.and $push3=, $5, $pop69 +; NO-SIMD128-NEXT: i32.shr_u $push4=, $pop3, $17 +; NO-SIMD128-NEXT: i32.store8 4($0), $pop4 +; NO-SIMD128-NEXT: i32.const $push68=, 255 +; NO-SIMD128-NEXT: i32.and $push5=, $3, $pop68 +; NO-SIMD128-NEXT: i32.shr_u $push6=, $pop5, $17 +; NO-SIMD128-NEXT: i32.store8 2($0), $pop6 +; NO-SIMD128-NEXT: i32.const $push67=, 255 +; NO-SIMD128-NEXT: i32.and $push7=, $2, $pop67 +; NO-SIMD128-NEXT: i32.shr_u $push8=, $pop7, $17 +; NO-SIMD128-NEXT: i32.store8 1($0), $pop8 +; NO-SIMD128-NEXT: i32.const $push66=, 255 +; NO-SIMD128-NEXT: i32.and $push9=, $1, $pop66 +; NO-SIMD128-NEXT: i32.shr_u $push10=, $pop9, $17 +; NO-SIMD128-NEXT: i32.store8 0($0), $pop10 +; NO-SIMD128-NEXT: i32.const $push13=, 15 +; NO-SIMD128-NEXT: i32.add $push14=, $0, $pop13 +; NO-SIMD128-NEXT: i32.const $push65=, 255 +; NO-SIMD128-NEXT: i32.and $push11=, $16, $pop65 +; NO-SIMD128-NEXT: i32.shr_u $push12=, $pop11, $17 +; NO-SIMD128-NEXT: i32.store8 0($pop14), $pop12 +; NO-SIMD128-NEXT: i32.const $push17=, 14 +; NO-SIMD128-NEXT: i32.add $push18=, $0, $pop17 +; NO-SIMD128-NEXT: i32.const $push64=, 255 +; NO-SIMD128-NEXT: i32.and $push15=, $15, $pop64 +; NO-SIMD128-NEXT: i32.shr_u $push16=, $pop15, $17 +; NO-SIMD128-NEXT: i32.store8 0($pop18), $pop16 +; NO-SIMD128-NEXT: i32.const $push21=, 13 +; NO-SIMD128-NEXT: i32.add $push22=, $0, $pop21 +; NO-SIMD128-NEXT: i32.const $push63=, 255 +; NO-SIMD128-NEXT: i32.and $push19=, $14, $pop63 +; NO-SIMD128-NEXT: i32.shr_u $push20=, $pop19, $17 +; NO-SIMD128-NEXT: i32.store8 0($pop22), $pop20 +; NO-SIMD128-NEXT: i32.const $push25=, 12 +; NO-SIMD128-NEXT: i32.add $push26=, $0, $pop25 +; NO-SIMD128-NEXT: i32.const $push62=, 255 +; NO-SIMD128-NEXT: i32.and $push23=, $13, $pop62 +; NO-SIMD128-NEXT: i32.shr_u $push24=, $pop23, $17 +; NO-SIMD128-NEXT: i32.store8 0($pop26), $pop24 +; NO-SIMD128-NEXT: i32.const $push29=, 11 +; NO-SIMD128-NEXT: i32.add $push30=, $0, $pop29 +; NO-SIMD128-NEXT: i32.const $push61=, 255 +; NO-SIMD128-NEXT: i32.and $push27=, $12, $pop61 +; NO-SIMD128-NEXT: i32.shr_u $push28=, $pop27, $17 +; NO-SIMD128-NEXT: i32.store8 0($pop30), $pop28 +; NO-SIMD128-NEXT: i32.const $push33=, 10 +; NO-SIMD128-NEXT: i32.add $push34=, $0, $pop33 +; NO-SIMD128-NEXT: i32.const $push60=, 255 +; NO-SIMD128-NEXT: i32.and $push31=, $11, $pop60 +; NO-SIMD128-NEXT: i32.shr_u $push32=, $pop31, $17 +; NO-SIMD128-NEXT: i32.store8 0($pop34), $pop32 +; NO-SIMD128-NEXT: i32.const $push37=, 9 +; NO-SIMD128-NEXT: i32.add $push38=, $0, $pop37 +; NO-SIMD128-NEXT: i32.const $push59=, 255 +; NO-SIMD128-NEXT: i32.and $push35=, $10, $pop59 +; NO-SIMD128-NEXT: i32.shr_u $push36=, $pop35, $17 +; NO-SIMD128-NEXT: i32.store8 0($pop38), $pop36 +; NO-SIMD128-NEXT: i32.const $push41=, 7 +; NO-SIMD128-NEXT: i32.add $push42=, $0, $pop41 +; NO-SIMD128-NEXT: i32.const $push58=, 255 +; NO-SIMD128-NEXT: i32.and $push39=, $8, $pop58 +; NO-SIMD128-NEXT: i32.shr_u $push40=, $pop39, $17 +; NO-SIMD128-NEXT: i32.store8 0($pop42), $pop40 +; NO-SIMD128-NEXT: i32.const $push45=, 6 +; NO-SIMD128-NEXT: i32.add $push46=, $0, $pop45 +; NO-SIMD128-NEXT: i32.const $push57=, 255 +; NO-SIMD128-NEXT: i32.and $push43=, $7, $pop57 +; NO-SIMD128-NEXT: i32.shr_u $push44=, $pop43, $17 +; NO-SIMD128-NEXT: i32.store8 0($pop46), $pop44 +; NO-SIMD128-NEXT: i32.const $push49=, 5 +; NO-SIMD128-NEXT: i32.add $push50=, $0, $pop49 +; NO-SIMD128-NEXT: i32.const $push56=, 255 +; NO-SIMD128-NEXT: i32.and $push47=, $6, $pop56 +; NO-SIMD128-NEXT: i32.shr_u $push48=, $pop47, $17 +; NO-SIMD128-NEXT: i32.store8 0($pop50), $pop48 +; NO-SIMD128-NEXT: i32.const $push53=, 3 +; NO-SIMD128-NEXT: i32.add $push54=, $0, $pop53 +; NO-SIMD128-NEXT: i32.const $push55=, 255 +; NO-SIMD128-NEXT: i32.and $push51=, $4, $pop55 +; NO-SIMD128-NEXT: i32.shr_u $push52=, $pop51, $17 +; NO-SIMD128-NEXT: i32.store8 0($pop54), $pop52 +; NO-SIMD128-NEXT: return +; +; NO-SIMD128-FAST-LABEL: shr_u_v16i8: +; NO-SIMD128-FAST: .functype shr_u_v16i8 (i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32) -> () +; NO-SIMD128-FAST-NEXT: # %bb.0: +; NO-SIMD128-FAST-NEXT: i32.const $push0=, 255 +; NO-SIMD128-FAST-NEXT: i32.and $push1=, $1, $pop0 +; NO-SIMD128-FAST-NEXT: i32.const $push72=, 255 +; NO-SIMD128-FAST-NEXT: i32.and $push71=, $17, $pop72 +; NO-SIMD128-FAST-NEXT: local.tee $push70=, $1=, $pop71 +; NO-SIMD128-FAST-NEXT: i32.shr_u $push2=, $pop1, $pop70 +; NO-SIMD128-FAST-NEXT: i32.store8 0($0), $pop2 +; NO-SIMD128-FAST-NEXT: i32.const $push69=, 255 +; NO-SIMD128-FAST-NEXT: i32.and $push3=, $2, $pop69 +; NO-SIMD128-FAST-NEXT: i32.shr_u $push4=, $pop3, $1 +; NO-SIMD128-FAST-NEXT: i32.store8 1($0), $pop4 +; NO-SIMD128-FAST-NEXT: i32.const $push68=, 255 +; NO-SIMD128-FAST-NEXT: i32.and $push5=, $3, $pop68 +; NO-SIMD128-FAST-NEXT: i32.shr_u $push6=, $pop5, $1 +; NO-SIMD128-FAST-NEXT: i32.store8 2($0), $pop6 +; NO-SIMD128-FAST-NEXT: i32.const $push9=, 3 +; NO-SIMD128-FAST-NEXT: i32.add $push10=, $0, $pop9 +; NO-SIMD128-FAST-NEXT: i32.const $push67=, 255 +; NO-SIMD128-FAST-NEXT: i32.and $push7=, $4, $pop67 +; NO-SIMD128-FAST-NEXT: i32.shr_u $push8=, $pop7, $1 +; NO-SIMD128-FAST-NEXT: i32.store8 0($pop10), $pop8 +; NO-SIMD128-FAST-NEXT: i32.const $push66=, 255 +; NO-SIMD128-FAST-NEXT: i32.and $push11=, $5, $pop66 +; NO-SIMD128-FAST-NEXT: i32.shr_u $push12=, $pop11, $1 +; NO-SIMD128-FAST-NEXT: i32.store8 4($0), $pop12 +; NO-SIMD128-FAST-NEXT: i32.const $push15=, 5 +; NO-SIMD128-FAST-NEXT: i32.add $push16=, $0, $pop15 +; NO-SIMD128-FAST-NEXT: i32.const $push65=, 255 +; NO-SIMD128-FAST-NEXT: i32.and $push13=, $6, $pop65 +; NO-SIMD128-FAST-NEXT: i32.shr_u $push14=, $pop13, $1 +; NO-SIMD128-FAST-NEXT: i32.store8 0($pop16), $pop14 +; NO-SIMD128-FAST-NEXT: i32.const $push19=, 6 +; NO-SIMD128-FAST-NEXT: i32.add $push20=, $0, $pop19 +; NO-SIMD128-FAST-NEXT: i32.const $push64=, 255 +; NO-SIMD128-FAST-NEXT: i32.and $push17=, $7, $pop64 +; NO-SIMD128-FAST-NEXT: i32.shr_u $push18=, $pop17, $1 +; NO-SIMD128-FAST-NEXT: i32.store8 0($pop20), $pop18 +; NO-SIMD128-FAST-NEXT: i32.const $push23=, 7 +; NO-SIMD128-FAST-NEXT: i32.add $push24=, $0, $pop23 +; NO-SIMD128-FAST-NEXT: i32.const $push63=, 255 +; NO-SIMD128-FAST-NEXT: i32.and $push21=, $8, $pop63 +; NO-SIMD128-FAST-NEXT: i32.shr_u $push22=, $pop21, $1 +; NO-SIMD128-FAST-NEXT: i32.store8 0($pop24), $pop22 +; NO-SIMD128-FAST-NEXT: i32.const $push62=, 255 +; NO-SIMD128-FAST-NEXT: i32.and $push25=, $9, $pop62 +; NO-SIMD128-FAST-NEXT: i32.shr_u $push26=, $pop25, $1 +; NO-SIMD128-FAST-NEXT: i32.store8 8($0), $pop26 +; NO-SIMD128-FAST-NEXT: i32.const $push29=, 9 +; NO-SIMD128-FAST-NEXT: i32.add $push30=, $0, $pop29 +; NO-SIMD128-FAST-NEXT: i32.const $push61=, 255 +; NO-SIMD128-FAST-NEXT: i32.and $push27=, $10, $pop61 +; NO-SIMD128-FAST-NEXT: i32.shr_u $push28=, $pop27, $1 +; NO-SIMD128-FAST-NEXT: i32.store8 0($pop30), $pop28 +; NO-SIMD128-FAST-NEXT: i32.const $push33=, 10 +; NO-SIMD128-FAST-NEXT: i32.add $push34=, $0, $pop33 +; NO-SIMD128-FAST-NEXT: i32.const $push60=, 255 +; NO-SIMD128-FAST-NEXT: i32.and $push31=, $11, $pop60 +; NO-SIMD128-FAST-NEXT: i32.shr_u $push32=, $pop31, $1 +; NO-SIMD128-FAST-NEXT: i32.store8 0($pop34), $pop32 +; NO-SIMD128-FAST-NEXT: i32.const $push37=, 11 +; NO-SIMD128-FAST-NEXT: i32.add $push38=, $0, $pop37 +; NO-SIMD128-FAST-NEXT: i32.const $push59=, 255 +; NO-SIMD128-FAST-NEXT: i32.and $push35=, $12, $pop59 +; NO-SIMD128-FAST-NEXT: i32.shr_u $push36=, $pop35, $1 +; NO-SIMD128-FAST-NEXT: i32.store8 0($pop38), $pop36 +; NO-SIMD128-FAST-NEXT: i32.const $push41=, 12 +; NO-SIMD128-FAST-NEXT: i32.add $push42=, $0, $pop41 +; NO-SIMD128-FAST-NEXT: i32.const $push58=, 255 +; NO-SIMD128-FAST-NEXT: i32.and $push39=, $13, $pop58 +; NO-SIMD128-FAST-NEXT: i32.shr_u $push40=, $pop39, $1 +; NO-SIMD128-FAST-NEXT: i32.store8 0($pop42), $pop40 +; NO-SIMD128-FAST-NEXT: i32.const $push45=, 13 +; NO-SIMD128-FAST-NEXT: i32.add $push46=, $0, $pop45 +; NO-SIMD128-FAST-NEXT: i32.const $push57=, 255 +; NO-SIMD128-FAST-NEXT: i32.and $push43=, $14, $pop57 +; NO-SIMD128-FAST-NEXT: i32.shr_u $push44=, $pop43, $1 +; NO-SIMD128-FAST-NEXT: i32.store8 0($pop46), $pop44 +; NO-SIMD128-FAST-NEXT: i32.const $push49=, 14 +; NO-SIMD128-FAST-NEXT: i32.add $push50=, $0, $pop49 +; NO-SIMD128-FAST-NEXT: i32.const $push56=, 255 +; NO-SIMD128-FAST-NEXT: i32.and $push47=, $15, $pop56 +; NO-SIMD128-FAST-NEXT: i32.shr_u $push48=, $pop47, $1 +; NO-SIMD128-FAST-NEXT: i32.store8 0($pop50), $pop48 +; NO-SIMD128-FAST-NEXT: i32.const $push53=, 15 +; NO-SIMD128-FAST-NEXT: i32.add $push54=, $0, $pop53 +; NO-SIMD128-FAST-NEXT: i32.const $push55=, 255 +; NO-SIMD128-FAST-NEXT: i32.and $push51=, $16, $pop55 +; NO-SIMD128-FAST-NEXT: i32.shr_u $push52=, $pop51, $1 +; NO-SIMD128-FAST-NEXT: i32.store8 0($pop54), $pop52 +; NO-SIMD128-FAST-NEXT: return %t = insertelement <16 x i8> undef, i8 %x, i32 0 %s = shufflevector <16 x i8> %t, <16 x i8> undef, <16 x i32> @shr_u_v16i8(<16 x i8> %v, i8 %x) { ret <16 x i8> %a } -; CHECK-LABEL: shr_u_vec_v16i8: -; NO-SIMD128-NOT: i8x16 -; SIMD128-NEXT: .functype shr_u_vec_v16i8 (v128, v128) -> (v128){{$}} -; SIMD128-NEXT: i8x16.extract_lane_u $push[[L0:[0-9]+]]=, $0, 0{{$}} -; SIMD128-NEXT: i8x16.extract_lane_u $push[[L1:[0-9]+]]=, $1, 0{{$}} -; SIMD128-NEXT: i32.const $push[[M0:[0-9]+]]=, 7{{$}} -; SIMD128-NEXT: i32.and $push[[M1:[0-9]+]]=, $pop[[L1]], $pop[[M0]]{{$}} -; SIMD128-NEXT: i32.shr_u $push[[M2:[0-9]+]]=, $pop[[L0]], $pop[[M1]] -; SIMD128-NEXT: i8x16.splat $push[[M3:[0-9]+]]=, $pop[[M2]] -; Skip 14 lanes -; SIMD128: i8x16.extract_lane_u $push[[L4:[0-9]+]]=, $0, 15{{$}} -; SIMD128-NEXT: i8x16.extract_lane_u $push[[L5:[0-9]+]]=, $1, 15{{$}} -; SIMD128-NEXT: i32.const $push[[M4:[0-9]+]]=, 7{{$}} -; SIMD128-NEXT: i32.and $push[[M5:[0-9]+]]=, $pop[[L5]], $pop[[M4]]{{$}} -; SIMD128-NEXT: i32.shr_u $push[[M6:[0-9]+]]=, $pop[[L4]], $pop[[M5]]{{$}} -; SIMD128-NEXT: i8x16.replace_lane $push[[R:[0-9]+]]=, $pop[[M7:[0-9]+]], 15, $pop[[M6]]{{$}} -; SIMD128-NEXT: return $pop[[R]]{{$}} define <16 x i8> @shr_u_vec_v16i8(<16 x i8> %v, <16 x i8> %x) { +; SIMD128-LABEL: shr_u_vec_v16i8: +; SIMD128: .functype shr_u_vec_v16i8 (v128, v128) -> (v128) +; SIMD128-NEXT: # %bb.0: +; SIMD128-NEXT: i8x16.extract_lane_u $push7=, $0, 0 +; SIMD128-NEXT: i8x16.extract_lane_u $push5=, $1, 0 +; SIMD128-NEXT: i32.const $push1=, 7 +; SIMD128-NEXT: i32.and $push6=, $pop5, $pop1 +; SIMD128-NEXT: i32.shr_u $push8=, $pop7, $pop6 +; SIMD128-NEXT: i8x16.splat $push9=, $pop8 +; SIMD128-NEXT: i8x16.extract_lane_u $push3=, $0, 1 +; SIMD128-NEXT: i8x16.extract_lane_u $push0=, $1, 1 +; SIMD128-NEXT: i32.const $push95=, 7 +; SIMD128-NEXT: i32.and $push2=, $pop0, $pop95 +; SIMD128-NEXT: i32.shr_u $push4=, $pop3, $pop2 +; SIMD128-NEXT: i8x16.replace_lane $push10=, $pop9, 1, $pop4 +; SIMD128-NEXT: i8x16.extract_lane_u $push13=, $0, 2 +; SIMD128-NEXT: i8x16.extract_lane_u $push11=, $1, 2 +; SIMD128-NEXT: i32.const $push94=, 7 +; SIMD128-NEXT: i32.and $push12=, $pop11, $pop94 +; SIMD128-NEXT: i32.shr_u $push14=, $pop13, $pop12 +; SIMD128-NEXT: i8x16.replace_lane $push15=, $pop10, 2, $pop14 +; SIMD128-NEXT: i8x16.extract_lane_u $push18=, $0, 3 +; SIMD128-NEXT: i8x16.extract_lane_u $push16=, $1, 3 +; SIMD128-NEXT: i32.const $push93=, 7 +; SIMD128-NEXT: i32.and $push17=, $pop16, $pop93 +; SIMD128-NEXT: i32.shr_u $push19=, $pop18, $pop17 +; SIMD128-NEXT: i8x16.replace_lane $push20=, $pop15, 3, $pop19 +; SIMD128-NEXT: i8x16.extract_lane_u $push23=, $0, 4 +; SIMD128-NEXT: i8x16.extract_lane_u $push21=, $1, 4 +; SIMD128-NEXT: i32.const $push92=, 7 +; SIMD128-NEXT: i32.and $push22=, $pop21, $pop92 +; SIMD128-NEXT: i32.shr_u $push24=, $pop23, $pop22 +; SIMD128-NEXT: i8x16.replace_lane $push25=, $pop20, 4, $pop24 +; SIMD128-NEXT: i8x16.extract_lane_u $push28=, $0, 5 +; SIMD128-NEXT: i8x16.extract_lane_u $push26=, $1, 5 +; SIMD128-NEXT: i32.const $push91=, 7 +; SIMD128-NEXT: i32.and $push27=, $pop26, $pop91 +; SIMD128-NEXT: i32.shr_u $push29=, $pop28, $pop27 +; SIMD128-NEXT: i8x16.replace_lane $push30=, $pop25, 5, $pop29 +; SIMD128-NEXT: i8x16.extract_lane_u $push33=, $0, 6 +; SIMD128-NEXT: i8x16.extract_lane_u $push31=, $1, 6 +; SIMD128-NEXT: i32.const $push90=, 7 +; SIMD128-NEXT: i32.and $push32=, $pop31, $pop90 +; SIMD128-NEXT: i32.shr_u $push34=, $pop33, $pop32 +; SIMD128-NEXT: i8x16.replace_lane $push35=, $pop30, 6, $pop34 +; SIMD128-NEXT: i8x16.extract_lane_u $push38=, $0, 7 +; SIMD128-NEXT: i8x16.extract_lane_u $push36=, $1, 7 +; SIMD128-NEXT: i32.const $push89=, 7 +; SIMD128-NEXT: i32.and $push37=, $pop36, $pop89 +; SIMD128-NEXT: i32.shr_u $push39=, $pop38, $pop37 +; SIMD128-NEXT: i8x16.replace_lane $push40=, $pop35, 7, $pop39 +; SIMD128-NEXT: i8x16.extract_lane_u $push43=, $0, 8 +; SIMD128-NEXT: i8x16.extract_lane_u $push41=, $1, 8 +; SIMD128-NEXT: i32.const $push88=, 7 +; SIMD128-NEXT: i32.and $push42=, $pop41, $pop88 +; SIMD128-NEXT: i32.shr_u $push44=, $pop43, $pop42 +; SIMD128-NEXT: i8x16.replace_lane $push45=, $pop40, 8, $pop44 +; SIMD128-NEXT: i8x16.extract_lane_u $push48=, $0, 9 +; SIMD128-NEXT: i8x16.extract_lane_u $push46=, $1, 9 +; SIMD128-NEXT: i32.const $push87=, 7 +; SIMD128-NEXT: i32.and $push47=, $pop46, $pop87 +; SIMD128-NEXT: i32.shr_u $push49=, $pop48, $pop47 +; SIMD128-NEXT: i8x16.replace_lane $push50=, $pop45, 9, $pop49 +; SIMD128-NEXT: i8x16.extract_lane_u $push53=, $0, 10 +; SIMD128-NEXT: i8x16.extract_lane_u $push51=, $1, 10 +; SIMD128-NEXT: i32.const $push86=, 7 +; SIMD128-NEXT: i32.and $push52=, $pop51, $pop86 +; SIMD128-NEXT: i32.shr_u $push54=, $pop53, $pop52 +; SIMD128-NEXT: i8x16.replace_lane $push55=, $pop50, 10, $pop54 +; SIMD128-NEXT: i8x16.extract_lane_u $push58=, $0, 11 +; SIMD128-NEXT: i8x16.extract_lane_u $push56=, $1, 11 +; SIMD128-NEXT: i32.const $push85=, 7 +; SIMD128-NEXT: i32.and $push57=, $pop56, $pop85 +; SIMD128-NEXT: i32.shr_u $push59=, $pop58, $pop57 +; SIMD128-NEXT: i8x16.replace_lane $push60=, $pop55, 11, $pop59 +; SIMD128-NEXT: i8x16.extract_lane_u $push63=, $0, 12 +; SIMD128-NEXT: i8x16.extract_lane_u $push61=, $1, 12 +; SIMD128-NEXT: i32.const $push84=, 7 +; SIMD128-NEXT: i32.and $push62=, $pop61, $pop84 +; SIMD128-NEXT: i32.shr_u $push64=, $pop63, $pop62 +; SIMD128-NEXT: i8x16.replace_lane $push65=, $pop60, 12, $pop64 +; SIMD128-NEXT: i8x16.extract_lane_u $push68=, $0, 13 +; SIMD128-NEXT: i8x16.extract_lane_u $push66=, $1, 13 +; SIMD128-NEXT: i32.const $push83=, 7 +; SIMD128-NEXT: i32.and $push67=, $pop66, $pop83 +; SIMD128-NEXT: i32.shr_u $push69=, $pop68, $pop67 +; SIMD128-NEXT: i8x16.replace_lane $push70=, $pop65, 13, $pop69 +; SIMD128-NEXT: i8x16.extract_lane_u $push73=, $0, 14 +; SIMD128-NEXT: i8x16.extract_lane_u $push71=, $1, 14 +; SIMD128-NEXT: i32.const $push82=, 7 +; SIMD128-NEXT: i32.and $push72=, $pop71, $pop82 +; SIMD128-NEXT: i32.shr_u $push74=, $pop73, $pop72 +; SIMD128-NEXT: i8x16.replace_lane $push75=, $pop70, 14, $pop74 +; SIMD128-NEXT: i8x16.extract_lane_u $push78=, $0, 15 +; SIMD128-NEXT: i8x16.extract_lane_u $push76=, $1, 15 +; SIMD128-NEXT: i32.const $push81=, 7 +; SIMD128-NEXT: i32.and $push77=, $pop76, $pop81 +; SIMD128-NEXT: i32.shr_u $push79=, $pop78, $pop77 +; SIMD128-NEXT: i8x16.replace_lane $push80=, $pop75, 15, $pop79 +; SIMD128-NEXT: return $pop80 +; +; SIMD128-FAST-LABEL: shr_u_vec_v16i8: +; SIMD128-FAST: .functype shr_u_vec_v16i8 (v128, v128) -> (v128) +; SIMD128-FAST-NEXT: # %bb.0: +; SIMD128-FAST-NEXT: i8x16.extract_lane_u $push8=, $0, 0 +; SIMD128-FAST-NEXT: i8x16.extract_lane_u $push6=, $1, 0 +; SIMD128-FAST-NEXT: i32.const $push2=, 7 +; SIMD128-FAST-NEXT: i32.and $push7=, $pop6, $pop2 +; SIMD128-FAST-NEXT: i32.shr_u $push9=, $pop8, $pop7 +; SIMD128-FAST-NEXT: i8x16.splat $push10=, $pop9 +; SIMD128-FAST-NEXT: i8x16.extract_lane_u $push4=, $0, 1 +; SIMD128-FAST-NEXT: i8x16.extract_lane_u $push1=, $1, 1 +; SIMD128-FAST-NEXT: i32.const $push95=, 7 +; SIMD128-FAST-NEXT: i32.and $push3=, $pop1, $pop95 +; SIMD128-FAST-NEXT: i32.shr_u $push5=, $pop4, $pop3 +; SIMD128-FAST-NEXT: i8x16.replace_lane $push11=, $pop10, 1, $pop5 +; SIMD128-FAST-NEXT: i8x16.extract_lane_u $push14=, $0, 2 +; SIMD128-FAST-NEXT: i8x16.extract_lane_u $push12=, $1, 2 +; SIMD128-FAST-NEXT: i32.const $push94=, 7 +; SIMD128-FAST-NEXT: i32.and $push13=, $pop12, $pop94 +; SIMD128-FAST-NEXT: i32.shr_u $push15=, $pop14, $pop13 +; SIMD128-FAST-NEXT: i8x16.replace_lane $push16=, $pop11, 2, $pop15 +; SIMD128-FAST-NEXT: i8x16.extract_lane_u $push19=, $0, 3 +; SIMD128-FAST-NEXT: i8x16.extract_lane_u $push17=, $1, 3 +; SIMD128-FAST-NEXT: i32.const $push93=, 7 +; SIMD128-FAST-NEXT: i32.and $push18=, $pop17, $pop93 +; SIMD128-FAST-NEXT: i32.shr_u $push20=, $pop19, $pop18 +; SIMD128-FAST-NEXT: i8x16.replace_lane $push21=, $pop16, 3, $pop20 +; SIMD128-FAST-NEXT: i8x16.extract_lane_u $push24=, $0, 4 +; SIMD128-FAST-NEXT: i8x16.extract_lane_u $push22=, $1, 4 +; SIMD128-FAST-NEXT: i32.const $push92=, 7 +; SIMD128-FAST-NEXT: i32.and $push23=, $pop22, $pop92 +; SIMD128-FAST-NEXT: i32.shr_u $push25=, $pop24, $pop23 +; SIMD128-FAST-NEXT: i8x16.replace_lane $push26=, $pop21, 4, $pop25 +; SIMD128-FAST-NEXT: i8x16.extract_lane_u $push29=, $0, 5 +; SIMD128-FAST-NEXT: i8x16.extract_lane_u $push27=, $1, 5 +; SIMD128-FAST-NEXT: i32.const $push91=, 7 +; SIMD128-FAST-NEXT: i32.and $push28=, $pop27, $pop91 +; SIMD128-FAST-NEXT: i32.shr_u $push30=, $pop29, $pop28 +; SIMD128-FAST-NEXT: i8x16.replace_lane $push31=, $pop26, 5, $pop30 +; SIMD128-FAST-NEXT: i8x16.extract_lane_u $push34=, $0, 6 +; SIMD128-FAST-NEXT: i8x16.extract_lane_u $push32=, $1, 6 +; SIMD128-FAST-NEXT: i32.const $push90=, 7 +; SIMD128-FAST-NEXT: i32.and $push33=, $pop32, $pop90 +; SIMD128-FAST-NEXT: i32.shr_u $push35=, $pop34, $pop33 +; SIMD128-FAST-NEXT: i8x16.replace_lane $push36=, $pop31, 6, $pop35 +; SIMD128-FAST-NEXT: i8x16.extract_lane_u $push39=, $0, 7 +; SIMD128-FAST-NEXT: i8x16.extract_lane_u $push37=, $1, 7 +; SIMD128-FAST-NEXT: i32.const $push89=, 7 +; SIMD128-FAST-NEXT: i32.and $push38=, $pop37, $pop89 +; SIMD128-FAST-NEXT: i32.shr_u $push40=, $pop39, $pop38 +; SIMD128-FAST-NEXT: i8x16.replace_lane $push41=, $pop36, 7, $pop40 +; SIMD128-FAST-NEXT: i8x16.extract_lane_u $push44=, $0, 8 +; SIMD128-FAST-NEXT: i8x16.extract_lane_u $push42=, $1, 8 +; SIMD128-FAST-NEXT: i32.const $push88=, 7 +; SIMD128-FAST-NEXT: i32.and $push43=, $pop42, $pop88 +; SIMD128-FAST-NEXT: i32.shr_u $push45=, $pop44, $pop43 +; SIMD128-FAST-NEXT: i8x16.replace_lane $push46=, $pop41, 8, $pop45 +; SIMD128-FAST-NEXT: i8x16.extract_lane_u $push49=, $0, 9 +; SIMD128-FAST-NEXT: i8x16.extract_lane_u $push47=, $1, 9 +; SIMD128-FAST-NEXT: i32.const $push87=, 7 +; SIMD128-FAST-NEXT: i32.and $push48=, $pop47, $pop87 +; SIMD128-FAST-NEXT: i32.shr_u $push50=, $pop49, $pop48 +; SIMD128-FAST-NEXT: i8x16.replace_lane $push51=, $pop46, 9, $pop50 +; SIMD128-FAST-NEXT: i8x16.extract_lane_u $push54=, $0, 10 +; SIMD128-FAST-NEXT: i8x16.extract_lane_u $push52=, $1, 10 +; SIMD128-FAST-NEXT: i32.const $push86=, 7 +; SIMD128-FAST-NEXT: i32.and $push53=, $pop52, $pop86 +; SIMD128-FAST-NEXT: i32.shr_u $push55=, $pop54, $pop53 +; SIMD128-FAST-NEXT: i8x16.replace_lane $push56=, $pop51, 10, $pop55 +; SIMD128-FAST-NEXT: i8x16.extract_lane_u $push59=, $0, 11 +; SIMD128-FAST-NEXT: i8x16.extract_lane_u $push57=, $1, 11 +; SIMD128-FAST-NEXT: i32.const $push85=, 7 +; SIMD128-FAST-NEXT: i32.and $push58=, $pop57, $pop85 +; SIMD128-FAST-NEXT: i32.shr_u $push60=, $pop59, $pop58 +; SIMD128-FAST-NEXT: i8x16.replace_lane $push61=, $pop56, 11, $pop60 +; SIMD128-FAST-NEXT: i8x16.extract_lane_u $push64=, $0, 12 +; SIMD128-FAST-NEXT: i8x16.extract_lane_u $push62=, $1, 12 +; SIMD128-FAST-NEXT: i32.const $push84=, 7 +; SIMD128-FAST-NEXT: i32.and $push63=, $pop62, $pop84 +; SIMD128-FAST-NEXT: i32.shr_u $push65=, $pop64, $pop63 +; SIMD128-FAST-NEXT: i8x16.replace_lane $push66=, $pop61, 12, $pop65 +; SIMD128-FAST-NEXT: i8x16.extract_lane_u $push69=, $0, 13 +; SIMD128-FAST-NEXT: i8x16.extract_lane_u $push67=, $1, 13 +; SIMD128-FAST-NEXT: i32.const $push83=, 7 +; SIMD128-FAST-NEXT: i32.and $push68=, $pop67, $pop83 +; SIMD128-FAST-NEXT: i32.shr_u $push70=, $pop69, $pop68 +; SIMD128-FAST-NEXT: i8x16.replace_lane $push71=, $pop66, 13, $pop70 +; SIMD128-FAST-NEXT: i8x16.extract_lane_u $push74=, $0, 14 +; SIMD128-FAST-NEXT: i8x16.extract_lane_u $push72=, $1, 14 +; SIMD128-FAST-NEXT: i32.const $push82=, 7 +; SIMD128-FAST-NEXT: i32.and $push73=, $pop72, $pop82 +; SIMD128-FAST-NEXT: i32.shr_u $push75=, $pop74, $pop73 +; SIMD128-FAST-NEXT: i8x16.replace_lane $push76=, $pop71, 14, $pop75 +; SIMD128-FAST-NEXT: i8x16.extract_lane_u $push79=, $0, 15 +; SIMD128-FAST-NEXT: i8x16.extract_lane_u $push77=, $1, 15 +; SIMD128-FAST-NEXT: i32.const $push81=, 7 +; SIMD128-FAST-NEXT: i32.and $push78=, $pop77, $pop81 +; SIMD128-FAST-NEXT: i32.shr_u $push80=, $pop79, $pop78 +; SIMD128-FAST-NEXT: i8x16.replace_lane $push0=, $pop76, 15, $pop80 +; SIMD128-FAST-NEXT: return $pop0 +; +; NO-SIMD128-LABEL: shr_u_vec_v16i8: +; NO-SIMD128: .functype shr_u_vec_v16i8 (i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32) -> () +; NO-SIMD128-NEXT: # %bb.0: +; NO-SIMD128-NEXT: i32.const $push0=, 255 +; NO-SIMD128-NEXT: i32.and $push2=, $9, $pop0 +; NO-SIMD128-NEXT: i32.const $push101=, 255 +; NO-SIMD128-NEXT: i32.and $push1=, $25, $pop101 +; NO-SIMD128-NEXT: i32.shr_u $push3=, $pop2, $pop1 +; NO-SIMD128-NEXT: i32.store8 8($0), $pop3 +; NO-SIMD128-NEXT: i32.const $push100=, 255 +; NO-SIMD128-NEXT: i32.and $push5=, $5, $pop100 +; NO-SIMD128-NEXT: i32.const $push99=, 255 +; NO-SIMD128-NEXT: i32.and $push4=, $21, $pop99 +; NO-SIMD128-NEXT: i32.shr_u $push6=, $pop5, $pop4 +; NO-SIMD128-NEXT: i32.store8 4($0), $pop6 +; NO-SIMD128-NEXT: i32.const $push98=, 255 +; NO-SIMD128-NEXT: i32.and $push8=, $3, $pop98 +; NO-SIMD128-NEXT: i32.const $push97=, 255 +; NO-SIMD128-NEXT: i32.and $push7=, $19, $pop97 +; NO-SIMD128-NEXT: i32.shr_u $push9=, $pop8, $pop7 +; NO-SIMD128-NEXT: i32.store8 2($0), $pop9 +; NO-SIMD128-NEXT: i32.const $push96=, 255 +; NO-SIMD128-NEXT: i32.and $push11=, $2, $pop96 +; NO-SIMD128-NEXT: i32.const $push95=, 255 +; NO-SIMD128-NEXT: i32.and $push10=, $18, $pop95 +; NO-SIMD128-NEXT: i32.shr_u $push12=, $pop11, $pop10 +; NO-SIMD128-NEXT: i32.store8 1($0), $pop12 +; NO-SIMD128-NEXT: i32.const $push94=, 255 +; NO-SIMD128-NEXT: i32.and $push14=, $1, $pop94 +; NO-SIMD128-NEXT: i32.const $push93=, 255 +; NO-SIMD128-NEXT: i32.and $push13=, $17, $pop93 +; NO-SIMD128-NEXT: i32.shr_u $push15=, $pop14, $pop13 +; NO-SIMD128-NEXT: i32.store8 0($0), $pop15 +; NO-SIMD128-NEXT: i32.const $push19=, 15 +; NO-SIMD128-NEXT: i32.add $push20=, $0, $pop19 +; NO-SIMD128-NEXT: i32.const $push92=, 255 +; NO-SIMD128-NEXT: i32.and $push17=, $16, $pop92 +; NO-SIMD128-NEXT: i32.const $push91=, 255 +; NO-SIMD128-NEXT: i32.and $push16=, $32, $pop91 +; NO-SIMD128-NEXT: i32.shr_u $push18=, $pop17, $pop16 +; NO-SIMD128-NEXT: i32.store8 0($pop20), $pop18 +; NO-SIMD128-NEXT: i32.const $push24=, 14 +; NO-SIMD128-NEXT: i32.add $push25=, $0, $pop24 +; NO-SIMD128-NEXT: i32.const $push90=, 255 +; NO-SIMD128-NEXT: i32.and $push22=, $15, $pop90 +; NO-SIMD128-NEXT: i32.const $push89=, 255 +; NO-SIMD128-NEXT: i32.and $push21=, $31, $pop89 +; NO-SIMD128-NEXT: i32.shr_u $push23=, $pop22, $pop21 +; NO-SIMD128-NEXT: i32.store8 0($pop25), $pop23 +; NO-SIMD128-NEXT: i32.const $push29=, 13 +; NO-SIMD128-NEXT: i32.add $push30=, $0, $pop29 +; NO-SIMD128-NEXT: i32.const $push88=, 255 +; NO-SIMD128-NEXT: i32.and $push27=, $14, $pop88 +; NO-SIMD128-NEXT: i32.const $push87=, 255 +; NO-SIMD128-NEXT: i32.and $push26=, $30, $pop87 +; NO-SIMD128-NEXT: i32.shr_u $push28=, $pop27, $pop26 +; NO-SIMD128-NEXT: i32.store8 0($pop30), $pop28 +; NO-SIMD128-NEXT: i32.const $push34=, 12 +; NO-SIMD128-NEXT: i32.add $push35=, $0, $pop34 +; NO-SIMD128-NEXT: i32.const $push86=, 255 +; NO-SIMD128-NEXT: i32.and $push32=, $13, $pop86 +; NO-SIMD128-NEXT: i32.const $push85=, 255 +; NO-SIMD128-NEXT: i32.and $push31=, $29, $pop85 +; NO-SIMD128-NEXT: i32.shr_u $push33=, $pop32, $pop31 +; NO-SIMD128-NEXT: i32.store8 0($pop35), $pop33 +; NO-SIMD128-NEXT: i32.const $push39=, 11 +; NO-SIMD128-NEXT: i32.add $push40=, $0, $pop39 +; NO-SIMD128-NEXT: i32.const $push84=, 255 +; NO-SIMD128-NEXT: i32.and $push37=, $12, $pop84 +; NO-SIMD128-NEXT: i32.const $push83=, 255 +; NO-SIMD128-NEXT: i32.and $push36=, $28, $pop83 +; NO-SIMD128-NEXT: i32.shr_u $push38=, $pop37, $pop36 +; NO-SIMD128-NEXT: i32.store8 0($pop40), $pop38 +; NO-SIMD128-NEXT: i32.const $push44=, 10 +; NO-SIMD128-NEXT: i32.add $push45=, $0, $pop44 +; NO-SIMD128-NEXT: i32.const $push82=, 255 +; NO-SIMD128-NEXT: i32.and $push42=, $11, $pop82 +; NO-SIMD128-NEXT: i32.const $push81=, 255 +; NO-SIMD128-NEXT: i32.and $push41=, $27, $pop81 +; NO-SIMD128-NEXT: i32.shr_u $push43=, $pop42, $pop41 +; NO-SIMD128-NEXT: i32.store8 0($pop45), $pop43 +; NO-SIMD128-NEXT: i32.const $push49=, 9 +; NO-SIMD128-NEXT: i32.add $push50=, $0, $pop49 +; NO-SIMD128-NEXT: i32.const $push80=, 255 +; NO-SIMD128-NEXT: i32.and $push47=, $10, $pop80 +; NO-SIMD128-NEXT: i32.const $push79=, 255 +; NO-SIMD128-NEXT: i32.and $push46=, $26, $pop79 +; NO-SIMD128-NEXT: i32.shr_u $push48=, $pop47, $pop46 +; NO-SIMD128-NEXT: i32.store8 0($pop50), $pop48 +; NO-SIMD128-NEXT: i32.const $push54=, 7 +; NO-SIMD128-NEXT: i32.add $push55=, $0, $pop54 +; NO-SIMD128-NEXT: i32.const $push78=, 255 +; NO-SIMD128-NEXT: i32.and $push52=, $8, $pop78 +; NO-SIMD128-NEXT: i32.const $push77=, 255 +; NO-SIMD128-NEXT: i32.and $push51=, $24, $pop77 +; NO-SIMD128-NEXT: i32.shr_u $push53=, $pop52, $pop51 +; NO-SIMD128-NEXT: i32.store8 0($pop55), $pop53 +; NO-SIMD128-NEXT: i32.const $push59=, 6 +; NO-SIMD128-NEXT: i32.add $push60=, $0, $pop59 +; NO-SIMD128-NEXT: i32.const $push76=, 255 +; NO-SIMD128-NEXT: i32.and $push57=, $7, $pop76 +; NO-SIMD128-NEXT: i32.const $push75=, 255 +; NO-SIMD128-NEXT: i32.and $push56=, $23, $pop75 +; NO-SIMD128-NEXT: i32.shr_u $push58=, $pop57, $pop56 +; NO-SIMD128-NEXT: i32.store8 0($pop60), $pop58 +; NO-SIMD128-NEXT: i32.const $push64=, 5 +; NO-SIMD128-NEXT: i32.add $push65=, $0, $pop64 +; NO-SIMD128-NEXT: i32.const $push74=, 255 +; NO-SIMD128-NEXT: i32.and $push62=, $6, $pop74 +; NO-SIMD128-NEXT: i32.const $push73=, 255 +; NO-SIMD128-NEXT: i32.and $push61=, $22, $pop73 +; NO-SIMD128-NEXT: i32.shr_u $push63=, $pop62, $pop61 +; NO-SIMD128-NEXT: i32.store8 0($pop65), $pop63 +; NO-SIMD128-NEXT: i32.const $push69=, 3 +; NO-SIMD128-NEXT: i32.add $push70=, $0, $pop69 +; NO-SIMD128-NEXT: i32.const $push72=, 255 +; NO-SIMD128-NEXT: i32.and $push67=, $4, $pop72 +; NO-SIMD128-NEXT: i32.const $push71=, 255 +; NO-SIMD128-NEXT: i32.and $push66=, $20, $pop71 +; NO-SIMD128-NEXT: i32.shr_u $push68=, $pop67, $pop66 +; NO-SIMD128-NEXT: i32.store8 0($pop70), $pop68 +; NO-SIMD128-NEXT: return +; +; NO-SIMD128-FAST-LABEL: shr_u_vec_v16i8: +; NO-SIMD128-FAST: .functype shr_u_vec_v16i8 (i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32) -> () +; NO-SIMD128-FAST-NEXT: # %bb.0: +; NO-SIMD128-FAST-NEXT: i32.const $push0=, 255 +; NO-SIMD128-FAST-NEXT: i32.and $push2=, $1, $pop0 +; NO-SIMD128-FAST-NEXT: i32.const $push101=, 255 +; NO-SIMD128-FAST-NEXT: i32.and $push1=, $17, $pop101 +; NO-SIMD128-FAST-NEXT: i32.shr_u $push3=, $pop2, $pop1 +; NO-SIMD128-FAST-NEXT: i32.store8 0($0), $pop3 +; NO-SIMD128-FAST-NEXT: i32.const $push100=, 255 +; NO-SIMD128-FAST-NEXT: i32.and $push5=, $2, $pop100 +; NO-SIMD128-FAST-NEXT: i32.const $push99=, 255 +; NO-SIMD128-FAST-NEXT: i32.and $push4=, $18, $pop99 +; NO-SIMD128-FAST-NEXT: i32.shr_u $push6=, $pop5, $pop4 +; NO-SIMD128-FAST-NEXT: i32.store8 1($0), $pop6 +; NO-SIMD128-FAST-NEXT: i32.const $push98=, 255 +; NO-SIMD128-FAST-NEXT: i32.and $push8=, $3, $pop98 +; NO-SIMD128-FAST-NEXT: i32.const $push97=, 255 +; NO-SIMD128-FAST-NEXT: i32.and $push7=, $19, $pop97 +; NO-SIMD128-FAST-NEXT: i32.shr_u $push9=, $pop8, $pop7 +; NO-SIMD128-FAST-NEXT: i32.store8 2($0), $pop9 +; NO-SIMD128-FAST-NEXT: i32.const $push13=, 3 +; NO-SIMD128-FAST-NEXT: i32.add $push14=, $0, $pop13 +; NO-SIMD128-FAST-NEXT: i32.const $push96=, 255 +; NO-SIMD128-FAST-NEXT: i32.and $push11=, $4, $pop96 +; NO-SIMD128-FAST-NEXT: i32.const $push95=, 255 +; NO-SIMD128-FAST-NEXT: i32.and $push10=, $20, $pop95 +; NO-SIMD128-FAST-NEXT: i32.shr_u $push12=, $pop11, $pop10 +; NO-SIMD128-FAST-NEXT: i32.store8 0($pop14), $pop12 +; NO-SIMD128-FAST-NEXT: i32.const $push94=, 255 +; NO-SIMD128-FAST-NEXT: i32.and $push16=, $5, $pop94 +; NO-SIMD128-FAST-NEXT: i32.const $push93=, 255 +; NO-SIMD128-FAST-NEXT: i32.and $push15=, $21, $pop93 +; NO-SIMD128-FAST-NEXT: i32.shr_u $push17=, $pop16, $pop15 +; NO-SIMD128-FAST-NEXT: i32.store8 4($0), $pop17 +; NO-SIMD128-FAST-NEXT: i32.const $push21=, 5 +; NO-SIMD128-FAST-NEXT: i32.add $push22=, $0, $pop21 +; NO-SIMD128-FAST-NEXT: i32.const $push92=, 255 +; NO-SIMD128-FAST-NEXT: i32.and $push19=, $6, $pop92 +; NO-SIMD128-FAST-NEXT: i32.const $push91=, 255 +; NO-SIMD128-FAST-NEXT: i32.and $push18=, $22, $pop91 +; NO-SIMD128-FAST-NEXT: i32.shr_u $push20=, $pop19, $pop18 +; NO-SIMD128-FAST-NEXT: i32.store8 0($pop22), $pop20 +; NO-SIMD128-FAST-NEXT: i32.const $push26=, 6 +; NO-SIMD128-FAST-NEXT: i32.add $push27=, $0, $pop26 +; NO-SIMD128-FAST-NEXT: i32.const $push90=, 255 +; NO-SIMD128-FAST-NEXT: i32.and $push24=, $7, $pop90 +; NO-SIMD128-FAST-NEXT: i32.const $push89=, 255 +; NO-SIMD128-FAST-NEXT: i32.and $push23=, $23, $pop89 +; NO-SIMD128-FAST-NEXT: i32.shr_u $push25=, $pop24, $pop23 +; NO-SIMD128-FAST-NEXT: i32.store8 0($pop27), $pop25 +; NO-SIMD128-FAST-NEXT: i32.const $push31=, 7 +; NO-SIMD128-FAST-NEXT: i32.add $push32=, $0, $pop31 +; NO-SIMD128-FAST-NEXT: i32.const $push88=, 255 +; NO-SIMD128-FAST-NEXT: i32.and $push29=, $8, $pop88 +; NO-SIMD128-FAST-NEXT: i32.const $push87=, 255 +; NO-SIMD128-FAST-NEXT: i32.and $push28=, $24, $pop87 +; NO-SIMD128-FAST-NEXT: i32.shr_u $push30=, $pop29, $pop28 +; NO-SIMD128-FAST-NEXT: i32.store8 0($pop32), $pop30 +; NO-SIMD128-FAST-NEXT: i32.const $push86=, 255 +; NO-SIMD128-FAST-NEXT: i32.and $push34=, $9, $pop86 +; NO-SIMD128-FAST-NEXT: i32.const $push85=, 255 +; NO-SIMD128-FAST-NEXT: i32.and $push33=, $25, $pop85 +; NO-SIMD128-FAST-NEXT: i32.shr_u $push35=, $pop34, $pop33 +; NO-SIMD128-FAST-NEXT: i32.store8 8($0), $pop35 +; NO-SIMD128-FAST-NEXT: i32.const $push39=, 9 +; NO-SIMD128-FAST-NEXT: i32.add $push40=, $0, $pop39 +; NO-SIMD128-FAST-NEXT: i32.const $push84=, 255 +; NO-SIMD128-FAST-NEXT: i32.and $push37=, $10, $pop84 +; NO-SIMD128-FAST-NEXT: i32.const $push83=, 255 +; NO-SIMD128-FAST-NEXT: i32.and $push36=, $26, $pop83 +; NO-SIMD128-FAST-NEXT: i32.shr_u $push38=, $pop37, $pop36 +; NO-SIMD128-FAST-NEXT: i32.store8 0($pop40), $pop38 +; NO-SIMD128-FAST-NEXT: i32.const $push44=, 10 +; NO-SIMD128-FAST-NEXT: i32.add $push45=, $0, $pop44 +; NO-SIMD128-FAST-NEXT: i32.const $push82=, 255 +; NO-SIMD128-FAST-NEXT: i32.and $push42=, $11, $pop82 +; NO-SIMD128-FAST-NEXT: i32.const $push81=, 255 +; NO-SIMD128-FAST-NEXT: i32.and $push41=, $27, $pop81 +; NO-SIMD128-FAST-NEXT: i32.shr_u $push43=, $pop42, $pop41 +; NO-SIMD128-FAST-NEXT: i32.store8 0($pop45), $pop43 +; NO-SIMD128-FAST-NEXT: i32.const $push49=, 11 +; NO-SIMD128-FAST-NEXT: i32.add $push50=, $0, $pop49 +; NO-SIMD128-FAST-NEXT: i32.const $push80=, 255 +; NO-SIMD128-FAST-NEXT: i32.and $push47=, $12, $pop80 +; NO-SIMD128-FAST-NEXT: i32.const $push79=, 255 +; NO-SIMD128-FAST-NEXT: i32.and $push46=, $28, $pop79 +; NO-SIMD128-FAST-NEXT: i32.shr_u $push48=, $pop47, $pop46 +; NO-SIMD128-FAST-NEXT: i32.store8 0($pop50), $pop48 +; NO-SIMD128-FAST-NEXT: i32.const $push54=, 12 +; NO-SIMD128-FAST-NEXT: i32.add $push55=, $0, $pop54 +; NO-SIMD128-FAST-NEXT: i32.const $push78=, 255 +; NO-SIMD128-FAST-NEXT: i32.and $push52=, $13, $pop78 +; NO-SIMD128-FAST-NEXT: i32.const $push77=, 255 +; NO-SIMD128-FAST-NEXT: i32.and $push51=, $29, $pop77 +; NO-SIMD128-FAST-NEXT: i32.shr_u $push53=, $pop52, $pop51 +; NO-SIMD128-FAST-NEXT: i32.store8 0($pop55), $pop53 +; NO-SIMD128-FAST-NEXT: i32.const $push59=, 13 +; NO-SIMD128-FAST-NEXT: i32.add $push60=, $0, $pop59 +; NO-SIMD128-FAST-NEXT: i32.const $push76=, 255 +; NO-SIMD128-FAST-NEXT: i32.and $push57=, $14, $pop76 +; NO-SIMD128-FAST-NEXT: i32.const $push75=, 255 +; NO-SIMD128-FAST-NEXT: i32.and $push56=, $30, $pop75 +; NO-SIMD128-FAST-NEXT: i32.shr_u $push58=, $pop57, $pop56 +; NO-SIMD128-FAST-NEXT: i32.store8 0($pop60), $pop58 +; NO-SIMD128-FAST-NEXT: i32.const $push64=, 14 +; NO-SIMD128-FAST-NEXT: i32.add $push65=, $0, $pop64 +; NO-SIMD128-FAST-NEXT: i32.const $push74=, 255 +; NO-SIMD128-FAST-NEXT: i32.and $push62=, $15, $pop74 +; NO-SIMD128-FAST-NEXT: i32.const $push73=, 255 +; NO-SIMD128-FAST-NEXT: i32.and $push61=, $31, $pop73 +; NO-SIMD128-FAST-NEXT: i32.shr_u $push63=, $pop62, $pop61 +; NO-SIMD128-FAST-NEXT: i32.store8 0($pop65), $pop63 +; NO-SIMD128-FAST-NEXT: i32.const $push69=, 15 +; NO-SIMD128-FAST-NEXT: i32.add $push70=, $0, $pop69 +; NO-SIMD128-FAST-NEXT: i32.const $push72=, 255 +; NO-SIMD128-FAST-NEXT: i32.and $push67=, $16, $pop72 +; NO-SIMD128-FAST-NEXT: i32.const $push71=, 255 +; NO-SIMD128-FAST-NEXT: i32.and $push66=, $32, $pop71 +; NO-SIMD128-FAST-NEXT: i32.shr_u $push68=, $pop67, $pop66 +; NO-SIMD128-FAST-NEXT: i32.store8 0($pop70), $pop68 +; NO-SIMD128-FAST-NEXT: return %a = lshr <16 x i8> %v, %x ret <16 x i8> %a } -; CHECK-LABEL: and_v16i8: -; NO-SIMD128-NOT: v128 -; SIMD128-NEXT: .functype and_v16i8 (v128, v128) -> (v128){{$}} -; SIMD128-NEXT: v128.and $push[[R:[0-9]+]]=, $0, $1{{$}} -; SIMD128-NEXT: return $pop[[R]]{{$}} define <16 x i8> @and_v16i8(<16 x i8> %x, <16 x i8> %y) { +; SIMD128-LABEL: and_v16i8: +; SIMD128: .functype and_v16i8 (v128, v128) -> (v128) +; SIMD128-NEXT: # %bb.0: +; SIMD128-NEXT: v128.and $push0=, $0, $1 +; SIMD128-NEXT: return $pop0 +; +; SIMD128-FAST-LABEL: and_v16i8: +; SIMD128-FAST: .functype and_v16i8 (v128, v128) -> (v128) +; SIMD128-FAST-NEXT: # %bb.0: +; SIMD128-FAST-NEXT: v128.and $push0=, $0, $1 +; SIMD128-FAST-NEXT: return $pop0 +; +; NO-SIMD128-LABEL: and_v16i8: +; NO-SIMD128: .functype and_v16i8 (i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32) -> () +; NO-SIMD128-NEXT: # %bb.0: +; NO-SIMD128-NEXT: i32.and $push0=, $9, $25 +; NO-SIMD128-NEXT: i32.store8 8($0), $pop0 +; NO-SIMD128-NEXT: i32.and $push1=, $5, $21 +; NO-SIMD128-NEXT: i32.store8 4($0), $pop1 +; NO-SIMD128-NEXT: i32.and $push2=, $3, $19 +; NO-SIMD128-NEXT: i32.store8 2($0), $pop2 +; NO-SIMD128-NEXT: i32.and $push3=, $2, $18 +; NO-SIMD128-NEXT: i32.store8 1($0), $pop3 +; NO-SIMD128-NEXT: i32.and $push4=, $1, $17 +; NO-SIMD128-NEXT: i32.store8 0($0), $pop4 +; NO-SIMD128-NEXT: i32.const $push6=, 15 +; NO-SIMD128-NEXT: i32.add $push7=, $0, $pop6 +; NO-SIMD128-NEXT: i32.and $push5=, $16, $32 +; NO-SIMD128-NEXT: i32.store8 0($pop7), $pop5 +; NO-SIMD128-NEXT: i32.const $push9=, 14 +; NO-SIMD128-NEXT: i32.add $push10=, $0, $pop9 +; NO-SIMD128-NEXT: i32.and $push8=, $15, $31 +; NO-SIMD128-NEXT: i32.store8 0($pop10), $pop8 +; NO-SIMD128-NEXT: i32.const $push12=, 13 +; NO-SIMD128-NEXT: i32.add $push13=, $0, $pop12 +; NO-SIMD128-NEXT: i32.and $push11=, $14, $30 +; NO-SIMD128-NEXT: i32.store8 0($pop13), $pop11 +; NO-SIMD128-NEXT: i32.const $push15=, 12 +; NO-SIMD128-NEXT: i32.add $push16=, $0, $pop15 +; NO-SIMD128-NEXT: i32.and $push14=, $13, $29 +; NO-SIMD128-NEXT: i32.store8 0($pop16), $pop14 +; NO-SIMD128-NEXT: i32.const $push18=, 11 +; NO-SIMD128-NEXT: i32.add $push19=, $0, $pop18 +; NO-SIMD128-NEXT: i32.and $push17=, $12, $28 +; NO-SIMD128-NEXT: i32.store8 0($pop19), $pop17 +; NO-SIMD128-NEXT: i32.const $push21=, 10 +; NO-SIMD128-NEXT: i32.add $push22=, $0, $pop21 +; NO-SIMD128-NEXT: i32.and $push20=, $11, $27 +; NO-SIMD128-NEXT: i32.store8 0($pop22), $pop20 +; NO-SIMD128-NEXT: i32.const $push24=, 9 +; NO-SIMD128-NEXT: i32.add $push25=, $0, $pop24 +; NO-SIMD128-NEXT: i32.and $push23=, $10, $26 +; NO-SIMD128-NEXT: i32.store8 0($pop25), $pop23 +; NO-SIMD128-NEXT: i32.const $push27=, 7 +; NO-SIMD128-NEXT: i32.add $push28=, $0, $pop27 +; NO-SIMD128-NEXT: i32.and $push26=, $8, $24 +; NO-SIMD128-NEXT: i32.store8 0($pop28), $pop26 +; NO-SIMD128-NEXT: i32.const $push30=, 6 +; NO-SIMD128-NEXT: i32.add $push31=, $0, $pop30 +; NO-SIMD128-NEXT: i32.and $push29=, $7, $23 +; NO-SIMD128-NEXT: i32.store8 0($pop31), $pop29 +; NO-SIMD128-NEXT: i32.const $push33=, 5 +; NO-SIMD128-NEXT: i32.add $push34=, $0, $pop33 +; NO-SIMD128-NEXT: i32.and $push32=, $6, $22 +; NO-SIMD128-NEXT: i32.store8 0($pop34), $pop32 +; NO-SIMD128-NEXT: i32.const $push36=, 3 +; NO-SIMD128-NEXT: i32.add $push37=, $0, $pop36 +; NO-SIMD128-NEXT: i32.and $push35=, $4, $20 +; NO-SIMD128-NEXT: i32.store8 0($pop37), $pop35 +; NO-SIMD128-NEXT: return +; +; NO-SIMD128-FAST-LABEL: and_v16i8: +; NO-SIMD128-FAST: .functype and_v16i8 (i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32) -> () +; NO-SIMD128-FAST-NEXT: # %bb.0: +; NO-SIMD128-FAST-NEXT: i32.and $push0=, $1, $17 +; NO-SIMD128-FAST-NEXT: i32.store8 0($0), $pop0 +; NO-SIMD128-FAST-NEXT: i32.and $push1=, $2, $18 +; NO-SIMD128-FAST-NEXT: i32.store8 1($0), $pop1 +; NO-SIMD128-FAST-NEXT: i32.and $push2=, $3, $19 +; NO-SIMD128-FAST-NEXT: i32.store8 2($0), $pop2 +; NO-SIMD128-FAST-NEXT: i32.const $push3=, 3 +; NO-SIMD128-FAST-NEXT: i32.add $push4=, $0, $pop3 +; NO-SIMD128-FAST-NEXT: i32.and $push5=, $4, $20 +; NO-SIMD128-FAST-NEXT: i32.store8 0($pop4), $pop5 +; NO-SIMD128-FAST-NEXT: i32.and $push6=, $5, $21 +; NO-SIMD128-FAST-NEXT: i32.store8 4($0), $pop6 +; NO-SIMD128-FAST-NEXT: i32.const $push7=, 5 +; NO-SIMD128-FAST-NEXT: i32.add $push8=, $0, $pop7 +; NO-SIMD128-FAST-NEXT: i32.and $push9=, $6, $22 +; NO-SIMD128-FAST-NEXT: i32.store8 0($pop8), $pop9 +; NO-SIMD128-FAST-NEXT: i32.const $push10=, 6 +; NO-SIMD128-FAST-NEXT: i32.add $push11=, $0, $pop10 +; NO-SIMD128-FAST-NEXT: i32.and $push12=, $7, $23 +; NO-SIMD128-FAST-NEXT: i32.store8 0($pop11), $pop12 +; NO-SIMD128-FAST-NEXT: i32.const $push13=, 7 +; NO-SIMD128-FAST-NEXT: i32.add $push14=, $0, $pop13 +; NO-SIMD128-FAST-NEXT: i32.and $push15=, $8, $24 +; NO-SIMD128-FAST-NEXT: i32.store8 0($pop14), $pop15 +; NO-SIMD128-FAST-NEXT: i32.and $push16=, $9, $25 +; NO-SIMD128-FAST-NEXT: i32.store8 8($0), $pop16 +; NO-SIMD128-FAST-NEXT: i32.const $push17=, 9 +; NO-SIMD128-FAST-NEXT: i32.add $push18=, $0, $pop17 +; NO-SIMD128-FAST-NEXT: i32.and $push19=, $10, $26 +; NO-SIMD128-FAST-NEXT: i32.store8 0($pop18), $pop19 +; NO-SIMD128-FAST-NEXT: i32.const $push20=, 10 +; NO-SIMD128-FAST-NEXT: i32.add $push21=, $0, $pop20 +; NO-SIMD128-FAST-NEXT: i32.and $push22=, $11, $27 +; NO-SIMD128-FAST-NEXT: i32.store8 0($pop21), $pop22 +; NO-SIMD128-FAST-NEXT: i32.const $push23=, 11 +; NO-SIMD128-FAST-NEXT: i32.add $push24=, $0, $pop23 +; NO-SIMD128-FAST-NEXT: i32.and $push25=, $12, $28 +; NO-SIMD128-FAST-NEXT: i32.store8 0($pop24), $pop25 +; NO-SIMD128-FAST-NEXT: i32.const $push26=, 12 +; NO-SIMD128-FAST-NEXT: i32.add $push27=, $0, $pop26 +; NO-SIMD128-FAST-NEXT: i32.and $push28=, $13, $29 +; NO-SIMD128-FAST-NEXT: i32.store8 0($pop27), $pop28 +; NO-SIMD128-FAST-NEXT: i32.const $push29=, 13 +; NO-SIMD128-FAST-NEXT: i32.add $push30=, $0, $pop29 +; NO-SIMD128-FAST-NEXT: i32.and $push31=, $14, $30 +; NO-SIMD128-FAST-NEXT: i32.store8 0($pop30), $pop31 +; NO-SIMD128-FAST-NEXT: i32.const $push32=, 14 +; NO-SIMD128-FAST-NEXT: i32.add $push33=, $0, $pop32 +; NO-SIMD128-FAST-NEXT: i32.and $push34=, $15, $31 +; NO-SIMD128-FAST-NEXT: i32.store8 0($pop33), $pop34 +; NO-SIMD128-FAST-NEXT: i32.const $push35=, 15 +; NO-SIMD128-FAST-NEXT: i32.add $push36=, $0, $pop35 +; NO-SIMD128-FAST-NEXT: i32.and $push37=, $16, $32 +; NO-SIMD128-FAST-NEXT: i32.store8 0($pop36), $pop37 +; NO-SIMD128-FAST-NEXT: return %a = and <16 x i8> %x, %y ret <16 x i8> %a } -; CHECK-LABEL: or_v16i8: -; NO-SIMD128-NOT: v128 -; SIMD128-NEXT: .functype or_v16i8 (v128, v128) -> (v128){{$}} -; SIMD128-NEXT: v128.or $push[[R:[0-9]+]]=, $0, $1{{$}} -; SIMD128-NEXT: return $pop[[R]]{{$}} define <16 x i8> @or_v16i8(<16 x i8> %x, <16 x i8> %y) { +; SIMD128-LABEL: or_v16i8: +; SIMD128: .functype or_v16i8 (v128, v128) -> (v128) +; SIMD128-NEXT: # %bb.0: +; SIMD128-NEXT: v128.or $push0=, $0, $1 +; SIMD128-NEXT: return $pop0 +; +; SIMD128-FAST-LABEL: or_v16i8: +; SIMD128-FAST: .functype or_v16i8 (v128, v128) -> (v128) +; SIMD128-FAST-NEXT: # %bb.0: +; SIMD128-FAST-NEXT: v128.or $push0=, $0, $1 +; SIMD128-FAST-NEXT: return $pop0 +; +; NO-SIMD128-LABEL: or_v16i8: +; NO-SIMD128: .functype or_v16i8 (i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32) -> () +; NO-SIMD128-NEXT: # %bb.0: +; NO-SIMD128-NEXT: i32.or $push0=, $9, $25 +; NO-SIMD128-NEXT: i32.store8 8($0), $pop0 +; NO-SIMD128-NEXT: i32.or $push1=, $5, $21 +; NO-SIMD128-NEXT: i32.store8 4($0), $pop1 +; NO-SIMD128-NEXT: i32.or $push2=, $3, $19 +; NO-SIMD128-NEXT: i32.store8 2($0), $pop2 +; NO-SIMD128-NEXT: i32.or $push3=, $2, $18 +; NO-SIMD128-NEXT: i32.store8 1($0), $pop3 +; NO-SIMD128-NEXT: i32.or $push4=, $1, $17 +; NO-SIMD128-NEXT: i32.store8 0($0), $pop4 +; NO-SIMD128-NEXT: i32.const $push6=, 15 +; NO-SIMD128-NEXT: i32.add $push7=, $0, $pop6 +; NO-SIMD128-NEXT: i32.or $push5=, $16, $32 +; NO-SIMD128-NEXT: i32.store8 0($pop7), $pop5 +; NO-SIMD128-NEXT: i32.const $push9=, 14 +; NO-SIMD128-NEXT: i32.add $push10=, $0, $pop9 +; NO-SIMD128-NEXT: i32.or $push8=, $15, $31 +; NO-SIMD128-NEXT: i32.store8 0($pop10), $pop8 +; NO-SIMD128-NEXT: i32.const $push12=, 13 +; NO-SIMD128-NEXT: i32.add $push13=, $0, $pop12 +; NO-SIMD128-NEXT: i32.or $push11=, $14, $30 +; NO-SIMD128-NEXT: i32.store8 0($pop13), $pop11 +; NO-SIMD128-NEXT: i32.const $push15=, 12 +; NO-SIMD128-NEXT: i32.add $push16=, $0, $pop15 +; NO-SIMD128-NEXT: i32.or $push14=, $13, $29 +; NO-SIMD128-NEXT: i32.store8 0($pop16), $pop14 +; NO-SIMD128-NEXT: i32.const $push18=, 11 +; NO-SIMD128-NEXT: i32.add $push19=, $0, $pop18 +; NO-SIMD128-NEXT: i32.or $push17=, $12, $28 +; NO-SIMD128-NEXT: i32.store8 0($pop19), $pop17 +; NO-SIMD128-NEXT: i32.const $push21=, 10 +; NO-SIMD128-NEXT: i32.add $push22=, $0, $pop21 +; NO-SIMD128-NEXT: i32.or $push20=, $11, $27 +; NO-SIMD128-NEXT: i32.store8 0($pop22), $pop20 +; NO-SIMD128-NEXT: i32.const $push24=, 9 +; NO-SIMD128-NEXT: i32.add $push25=, $0, $pop24 +; NO-SIMD128-NEXT: i32.or $push23=, $10, $26 +; NO-SIMD128-NEXT: i32.store8 0($pop25), $pop23 +; NO-SIMD128-NEXT: i32.const $push27=, 7 +; NO-SIMD128-NEXT: i32.add $push28=, $0, $pop27 +; NO-SIMD128-NEXT: i32.or $push26=, $8, $24 +; NO-SIMD128-NEXT: i32.store8 0($pop28), $pop26 +; NO-SIMD128-NEXT: i32.const $push30=, 6 +; NO-SIMD128-NEXT: i32.add $push31=, $0, $pop30 +; NO-SIMD128-NEXT: i32.or $push29=, $7, $23 +; NO-SIMD128-NEXT: i32.store8 0($pop31), $pop29 +; NO-SIMD128-NEXT: i32.const $push33=, 5 +; NO-SIMD128-NEXT: i32.add $push34=, $0, $pop33 +; NO-SIMD128-NEXT: i32.or $push32=, $6, $22 +; NO-SIMD128-NEXT: i32.store8 0($pop34), $pop32 +; NO-SIMD128-NEXT: i32.const $push36=, 3 +; NO-SIMD128-NEXT: i32.add $push37=, $0, $pop36 +; NO-SIMD128-NEXT: i32.or $push35=, $4, $20 +; NO-SIMD128-NEXT: i32.store8 0($pop37), $pop35 +; NO-SIMD128-NEXT: return +; +; NO-SIMD128-FAST-LABEL: or_v16i8: +; NO-SIMD128-FAST: .functype or_v16i8 (i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32) -> () +; NO-SIMD128-FAST-NEXT: # %bb.0: +; NO-SIMD128-FAST-NEXT: i32.or $push0=, $1, $17 +; NO-SIMD128-FAST-NEXT: i32.store8 0($0), $pop0 +; NO-SIMD128-FAST-NEXT: i32.or $push1=, $2, $18 +; NO-SIMD128-FAST-NEXT: i32.store8 1($0), $pop1 +; NO-SIMD128-FAST-NEXT: i32.or $push2=, $3, $19 +; NO-SIMD128-FAST-NEXT: i32.store8 2($0), $pop2 +; NO-SIMD128-FAST-NEXT: i32.const $push3=, 3 +; NO-SIMD128-FAST-NEXT: i32.add $push4=, $0, $pop3 +; NO-SIMD128-FAST-NEXT: i32.or $push5=, $4, $20 +; NO-SIMD128-FAST-NEXT: i32.store8 0($pop4), $pop5 +; NO-SIMD128-FAST-NEXT: i32.or $push6=, $5, $21 +; NO-SIMD128-FAST-NEXT: i32.store8 4($0), $pop6 +; NO-SIMD128-FAST-NEXT: i32.const $push7=, 5 +; NO-SIMD128-FAST-NEXT: i32.add $push8=, $0, $pop7 +; NO-SIMD128-FAST-NEXT: i32.or $push9=, $6, $22 +; NO-SIMD128-FAST-NEXT: i32.store8 0($pop8), $pop9 +; NO-SIMD128-FAST-NEXT: i32.const $push10=, 6 +; NO-SIMD128-FAST-NEXT: i32.add $push11=, $0, $pop10 +; NO-SIMD128-FAST-NEXT: i32.or $push12=, $7, $23 +; NO-SIMD128-FAST-NEXT: i32.store8 0($pop11), $pop12 +; NO-SIMD128-FAST-NEXT: i32.const $push13=, 7 +; NO-SIMD128-FAST-NEXT: i32.add $push14=, $0, $pop13 +; NO-SIMD128-FAST-NEXT: i32.or $push15=, $8, $24 +; NO-SIMD128-FAST-NEXT: i32.store8 0($pop14), $pop15 +; NO-SIMD128-FAST-NEXT: i32.or $push16=, $9, $25 +; NO-SIMD128-FAST-NEXT: i32.store8 8($0), $pop16 +; NO-SIMD128-FAST-NEXT: i32.const $push17=, 9 +; NO-SIMD128-FAST-NEXT: i32.add $push18=, $0, $pop17 +; NO-SIMD128-FAST-NEXT: i32.or $push19=, $10, $26 +; NO-SIMD128-FAST-NEXT: i32.store8 0($pop18), $pop19 +; NO-SIMD128-FAST-NEXT: i32.const $push20=, 10 +; NO-SIMD128-FAST-NEXT: i32.add $push21=, $0, $pop20 +; NO-SIMD128-FAST-NEXT: i32.or $push22=, $11, $27 +; NO-SIMD128-FAST-NEXT: i32.store8 0($pop21), $pop22 +; NO-SIMD128-FAST-NEXT: i32.const $push23=, 11 +; NO-SIMD128-FAST-NEXT: i32.add $push24=, $0, $pop23 +; NO-SIMD128-FAST-NEXT: i32.or $push25=, $12, $28 +; NO-SIMD128-FAST-NEXT: i32.store8 0($pop24), $pop25 +; NO-SIMD128-FAST-NEXT: i32.const $push26=, 12 +; NO-SIMD128-FAST-NEXT: i32.add $push27=, $0, $pop26 +; NO-SIMD128-FAST-NEXT: i32.or $push28=, $13, $29 +; NO-SIMD128-FAST-NEXT: i32.store8 0($pop27), $pop28 +; NO-SIMD128-FAST-NEXT: i32.const $push29=, 13 +; NO-SIMD128-FAST-NEXT: i32.add $push30=, $0, $pop29 +; NO-SIMD128-FAST-NEXT: i32.or $push31=, $14, $30 +; NO-SIMD128-FAST-NEXT: i32.store8 0($pop30), $pop31 +; NO-SIMD128-FAST-NEXT: i32.const $push32=, 14 +; NO-SIMD128-FAST-NEXT: i32.add $push33=, $0, $pop32 +; NO-SIMD128-FAST-NEXT: i32.or $push34=, $15, $31 +; NO-SIMD128-FAST-NEXT: i32.store8 0($pop33), $pop34 +; NO-SIMD128-FAST-NEXT: i32.const $push35=, 15 +; NO-SIMD128-FAST-NEXT: i32.add $push36=, $0, $pop35 +; NO-SIMD128-FAST-NEXT: i32.or $push37=, $16, $32 +; NO-SIMD128-FAST-NEXT: i32.store8 0($pop36), $pop37 +; NO-SIMD128-FAST-NEXT: return %a = or <16 x i8> %x, %y ret <16 x i8> %a } -; CHECK-LABEL: xor_v16i8: -; NO-SIMD128-NOT: v128 -; SIMD128-NEXT: .functype xor_v16i8 (v128, v128) -> (v128){{$}} -; SIMD128-NEXT: v128.xor $push[[R:[0-9]+]]=, $0, $1{{$}} -; SIMD128-NEXT: return $pop[[R]]{{$}} define <16 x i8> @xor_v16i8(<16 x i8> %x, <16 x i8> %y) { +; SIMD128-LABEL: xor_v16i8: +; SIMD128: .functype xor_v16i8 (v128, v128) -> (v128) +; SIMD128-NEXT: # %bb.0: +; SIMD128-NEXT: v128.xor $push0=, $0, $1 +; SIMD128-NEXT: return $pop0 +; +; SIMD128-FAST-LABEL: xor_v16i8: +; SIMD128-FAST: .functype xor_v16i8 (v128, v128) -> (v128) +; SIMD128-FAST-NEXT: # %bb.0: +; SIMD128-FAST-NEXT: v128.xor $push0=, $0, $1 +; SIMD128-FAST-NEXT: return $pop0 +; +; NO-SIMD128-LABEL: xor_v16i8: +; NO-SIMD128: .functype xor_v16i8 (i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32) -> () +; NO-SIMD128-NEXT: # %bb.0: +; NO-SIMD128-NEXT: i32.xor $push0=, $9, $25 +; NO-SIMD128-NEXT: i32.store8 8($0), $pop0 +; NO-SIMD128-NEXT: i32.xor $push1=, $5, $21 +; NO-SIMD128-NEXT: i32.store8 4($0), $pop1 +; NO-SIMD128-NEXT: i32.xor $push2=, $3, $19 +; NO-SIMD128-NEXT: i32.store8 2($0), $pop2 +; NO-SIMD128-NEXT: i32.xor $push3=, $2, $18 +; NO-SIMD128-NEXT: i32.store8 1($0), $pop3 +; NO-SIMD128-NEXT: i32.xor $push4=, $1, $17 +; NO-SIMD128-NEXT: i32.store8 0($0), $pop4 +; NO-SIMD128-NEXT: i32.const $push6=, 15 +; NO-SIMD128-NEXT: i32.add $push7=, $0, $pop6 +; NO-SIMD128-NEXT: i32.xor $push5=, $16, $32 +; NO-SIMD128-NEXT: i32.store8 0($pop7), $pop5 +; NO-SIMD128-NEXT: i32.const $push9=, 14 +; NO-SIMD128-NEXT: i32.add $push10=, $0, $pop9 +; NO-SIMD128-NEXT: i32.xor $push8=, $15, $31 +; NO-SIMD128-NEXT: i32.store8 0($pop10), $pop8 +; NO-SIMD128-NEXT: i32.const $push12=, 13 +; NO-SIMD128-NEXT: i32.add $push13=, $0, $pop12 +; NO-SIMD128-NEXT: i32.xor $push11=, $14, $30 +; NO-SIMD128-NEXT: i32.store8 0($pop13), $pop11 +; NO-SIMD128-NEXT: i32.const $push15=, 12 +; NO-SIMD128-NEXT: i32.add $push16=, $0, $pop15 +; NO-SIMD128-NEXT: i32.xor $push14=, $13, $29 +; NO-SIMD128-NEXT: i32.store8 0($pop16), $pop14 +; NO-SIMD128-NEXT: i32.const $push18=, 11 +; NO-SIMD128-NEXT: i32.add $push19=, $0, $pop18 +; NO-SIMD128-NEXT: i32.xor $push17=, $12, $28 +; NO-SIMD128-NEXT: i32.store8 0($pop19), $pop17 +; NO-SIMD128-NEXT: i32.const $push21=, 10 +; NO-SIMD128-NEXT: i32.add $push22=, $0, $pop21 +; NO-SIMD128-NEXT: i32.xor $push20=, $11, $27 +; NO-SIMD128-NEXT: i32.store8 0($pop22), $pop20 +; NO-SIMD128-NEXT: i32.const $push24=, 9 +; NO-SIMD128-NEXT: i32.add $push25=, $0, $pop24 +; NO-SIMD128-NEXT: i32.xor $push23=, $10, $26 +; NO-SIMD128-NEXT: i32.store8 0($pop25), $pop23 +; NO-SIMD128-NEXT: i32.const $push27=, 7 +; NO-SIMD128-NEXT: i32.add $push28=, $0, $pop27 +; NO-SIMD128-NEXT: i32.xor $push26=, $8, $24 +; NO-SIMD128-NEXT: i32.store8 0($pop28), $pop26 +; NO-SIMD128-NEXT: i32.const $push30=, 6 +; NO-SIMD128-NEXT: i32.add $push31=, $0, $pop30 +; NO-SIMD128-NEXT: i32.xor $push29=, $7, $23 +; NO-SIMD128-NEXT: i32.store8 0($pop31), $pop29 +; NO-SIMD128-NEXT: i32.const $push33=, 5 +; NO-SIMD128-NEXT: i32.add $push34=, $0, $pop33 +; NO-SIMD128-NEXT: i32.xor $push32=, $6, $22 +; NO-SIMD128-NEXT: i32.store8 0($pop34), $pop32 +; NO-SIMD128-NEXT: i32.const $push36=, 3 +; NO-SIMD128-NEXT: i32.add $push37=, $0, $pop36 +; NO-SIMD128-NEXT: i32.xor $push35=, $4, $20 +; NO-SIMD128-NEXT: i32.store8 0($pop37), $pop35 +; NO-SIMD128-NEXT: return +; +; NO-SIMD128-FAST-LABEL: xor_v16i8: +; NO-SIMD128-FAST: .functype xor_v16i8 (i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32) -> () +; NO-SIMD128-FAST-NEXT: # %bb.0: +; NO-SIMD128-FAST-NEXT: i32.xor $push0=, $1, $17 +; NO-SIMD128-FAST-NEXT: i32.store8 0($0), $pop0 +; NO-SIMD128-FAST-NEXT: i32.xor $push1=, $2, $18 +; NO-SIMD128-FAST-NEXT: i32.store8 1($0), $pop1 +; NO-SIMD128-FAST-NEXT: i32.xor $push2=, $3, $19 +; NO-SIMD128-FAST-NEXT: i32.store8 2($0), $pop2 +; NO-SIMD128-FAST-NEXT: i32.const $push3=, 3 +; NO-SIMD128-FAST-NEXT: i32.add $push4=, $0, $pop3 +; NO-SIMD128-FAST-NEXT: i32.xor $push5=, $4, $20 +; NO-SIMD128-FAST-NEXT: i32.store8 0($pop4), $pop5 +; NO-SIMD128-FAST-NEXT: i32.xor $push6=, $5, $21 +; NO-SIMD128-FAST-NEXT: i32.store8 4($0), $pop6 +; NO-SIMD128-FAST-NEXT: i32.const $push7=, 5 +; NO-SIMD128-FAST-NEXT: i32.add $push8=, $0, $pop7 +; NO-SIMD128-FAST-NEXT: i32.xor $push9=, $6, $22 +; NO-SIMD128-FAST-NEXT: i32.store8 0($pop8), $pop9 +; NO-SIMD128-FAST-NEXT: i32.const $push10=, 6 +; NO-SIMD128-FAST-NEXT: i32.add $push11=, $0, $pop10 +; NO-SIMD128-FAST-NEXT: i32.xor $push12=, $7, $23 +; NO-SIMD128-FAST-NEXT: i32.store8 0($pop11), $pop12 +; NO-SIMD128-FAST-NEXT: i32.const $push13=, 7 +; NO-SIMD128-FAST-NEXT: i32.add $push14=, $0, $pop13 +; NO-SIMD128-FAST-NEXT: i32.xor $push15=, $8, $24 +; NO-SIMD128-FAST-NEXT: i32.store8 0($pop14), $pop15 +; NO-SIMD128-FAST-NEXT: i32.xor $push16=, $9, $25 +; NO-SIMD128-FAST-NEXT: i32.store8 8($0), $pop16 +; NO-SIMD128-FAST-NEXT: i32.const $push17=, 9 +; NO-SIMD128-FAST-NEXT: i32.add $push18=, $0, $pop17 +; NO-SIMD128-FAST-NEXT: i32.xor $push19=, $10, $26 +; NO-SIMD128-FAST-NEXT: i32.store8 0($pop18), $pop19 +; NO-SIMD128-FAST-NEXT: i32.const $push20=, 10 +; NO-SIMD128-FAST-NEXT: i32.add $push21=, $0, $pop20 +; NO-SIMD128-FAST-NEXT: i32.xor $push22=, $11, $27 +; NO-SIMD128-FAST-NEXT: i32.store8 0($pop21), $pop22 +; NO-SIMD128-FAST-NEXT: i32.const $push23=, 11 +; NO-SIMD128-FAST-NEXT: i32.add $push24=, $0, $pop23 +; NO-SIMD128-FAST-NEXT: i32.xor $push25=, $12, $28 +; NO-SIMD128-FAST-NEXT: i32.store8 0($pop24), $pop25 +; NO-SIMD128-FAST-NEXT: i32.const $push26=, 12 +; NO-SIMD128-FAST-NEXT: i32.add $push27=, $0, $pop26 +; NO-SIMD128-FAST-NEXT: i32.xor $push28=, $13, $29 +; NO-SIMD128-FAST-NEXT: i32.store8 0($pop27), $pop28 +; NO-SIMD128-FAST-NEXT: i32.const $push29=, 13 +; NO-SIMD128-FAST-NEXT: i32.add $push30=, $0, $pop29 +; NO-SIMD128-FAST-NEXT: i32.xor $push31=, $14, $30 +; NO-SIMD128-FAST-NEXT: i32.store8 0($pop30), $pop31 +; NO-SIMD128-FAST-NEXT: i32.const $push32=, 14 +; NO-SIMD128-FAST-NEXT: i32.add $push33=, $0, $pop32 +; NO-SIMD128-FAST-NEXT: i32.xor $push34=, $15, $31 +; NO-SIMD128-FAST-NEXT: i32.store8 0($pop33), $pop34 +; NO-SIMD128-FAST-NEXT: i32.const $push35=, 15 +; NO-SIMD128-FAST-NEXT: i32.add $push36=, $0, $pop35 +; NO-SIMD128-FAST-NEXT: i32.xor $push37=, $16, $32 +; NO-SIMD128-FAST-NEXT: i32.store8 0($pop36), $pop37 +; NO-SIMD128-FAST-NEXT: return %a = xor <16 x i8> %x, %y ret <16 x i8> %a } -; CHECK-LABEL: not_v16i8: -; NO-SIMD128-NOT: v128 -; SIMD128-NEXT: .functype not_v16i8 (v128) -> (v128){{$}} -; SIMD128-NEXT: v128.not $push[[R:[0-9]+]]=, $0{{$}} -; SIMD128-NEXT: return $pop[[R]]{{$}} define <16 x i8> @not_v16i8(<16 x i8> %x) { +; SIMD128-LABEL: not_v16i8: +; SIMD128: .functype not_v16i8 (v128) -> (v128) +; SIMD128-NEXT: # %bb.0: +; SIMD128-NEXT: v128.not $push0=, $0 +; SIMD128-NEXT: return $pop0 +; +; SIMD128-FAST-LABEL: not_v16i8: +; SIMD128-FAST: .functype not_v16i8 (v128) -> (v128) +; SIMD128-FAST-NEXT: # %bb.0: +; SIMD128-FAST-NEXT: v128.not $push0=, $0 +; SIMD128-FAST-NEXT: return $pop0 +; +; NO-SIMD128-LABEL: not_v16i8: +; NO-SIMD128: .functype not_v16i8 (i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32) -> () +; NO-SIMD128-NEXT: # %bb.0: +; NO-SIMD128-NEXT: i32.const $push0=, -1 +; NO-SIMD128-NEXT: i32.xor $push1=, $9, $pop0 +; NO-SIMD128-NEXT: i32.store8 8($0), $pop1 +; NO-SIMD128-NEXT: i32.const $push53=, -1 +; NO-SIMD128-NEXT: i32.xor $push2=, $5, $pop53 +; NO-SIMD128-NEXT: i32.store8 4($0), $pop2 +; NO-SIMD128-NEXT: i32.const $push52=, -1 +; NO-SIMD128-NEXT: i32.xor $push3=, $3, $pop52 +; NO-SIMD128-NEXT: i32.store8 2($0), $pop3 +; NO-SIMD128-NEXT: i32.const $push51=, -1 +; NO-SIMD128-NEXT: i32.xor $push4=, $2, $pop51 +; NO-SIMD128-NEXT: i32.store8 1($0), $pop4 +; NO-SIMD128-NEXT: i32.const $push50=, -1 +; NO-SIMD128-NEXT: i32.xor $push5=, $1, $pop50 +; NO-SIMD128-NEXT: i32.store8 0($0), $pop5 +; NO-SIMD128-NEXT: i32.const $push7=, 15 +; NO-SIMD128-NEXT: i32.add $push8=, $0, $pop7 +; NO-SIMD128-NEXT: i32.const $push49=, -1 +; NO-SIMD128-NEXT: i32.xor $push6=, $16, $pop49 +; NO-SIMD128-NEXT: i32.store8 0($pop8), $pop6 +; NO-SIMD128-NEXT: i32.const $push10=, 14 +; NO-SIMD128-NEXT: i32.add $push11=, $0, $pop10 +; NO-SIMD128-NEXT: i32.const $push48=, -1 +; NO-SIMD128-NEXT: i32.xor $push9=, $15, $pop48 +; NO-SIMD128-NEXT: i32.store8 0($pop11), $pop9 +; NO-SIMD128-NEXT: i32.const $push13=, 13 +; NO-SIMD128-NEXT: i32.add $push14=, $0, $pop13 +; NO-SIMD128-NEXT: i32.const $push47=, -1 +; NO-SIMD128-NEXT: i32.xor $push12=, $14, $pop47 +; NO-SIMD128-NEXT: i32.store8 0($pop14), $pop12 +; NO-SIMD128-NEXT: i32.const $push16=, 12 +; NO-SIMD128-NEXT: i32.add $push17=, $0, $pop16 +; NO-SIMD128-NEXT: i32.const $push46=, -1 +; NO-SIMD128-NEXT: i32.xor $push15=, $13, $pop46 +; NO-SIMD128-NEXT: i32.store8 0($pop17), $pop15 +; NO-SIMD128-NEXT: i32.const $push19=, 11 +; NO-SIMD128-NEXT: i32.add $push20=, $0, $pop19 +; NO-SIMD128-NEXT: i32.const $push45=, -1 +; NO-SIMD128-NEXT: i32.xor $push18=, $12, $pop45 +; NO-SIMD128-NEXT: i32.store8 0($pop20), $pop18 +; NO-SIMD128-NEXT: i32.const $push22=, 10 +; NO-SIMD128-NEXT: i32.add $push23=, $0, $pop22 +; NO-SIMD128-NEXT: i32.const $push44=, -1 +; NO-SIMD128-NEXT: i32.xor $push21=, $11, $pop44 +; NO-SIMD128-NEXT: i32.store8 0($pop23), $pop21 +; NO-SIMD128-NEXT: i32.const $push25=, 9 +; NO-SIMD128-NEXT: i32.add $push26=, $0, $pop25 +; NO-SIMD128-NEXT: i32.const $push43=, -1 +; NO-SIMD128-NEXT: i32.xor $push24=, $10, $pop43 +; NO-SIMD128-NEXT: i32.store8 0($pop26), $pop24 +; NO-SIMD128-NEXT: i32.const $push28=, 7 +; NO-SIMD128-NEXT: i32.add $push29=, $0, $pop28 +; NO-SIMD128-NEXT: i32.const $push42=, -1 +; NO-SIMD128-NEXT: i32.xor $push27=, $8, $pop42 +; NO-SIMD128-NEXT: i32.store8 0($pop29), $pop27 +; NO-SIMD128-NEXT: i32.const $push31=, 6 +; NO-SIMD128-NEXT: i32.add $push32=, $0, $pop31 +; NO-SIMD128-NEXT: i32.const $push41=, -1 +; NO-SIMD128-NEXT: i32.xor $push30=, $7, $pop41 +; NO-SIMD128-NEXT: i32.store8 0($pop32), $pop30 +; NO-SIMD128-NEXT: i32.const $push34=, 5 +; NO-SIMD128-NEXT: i32.add $push35=, $0, $pop34 +; NO-SIMD128-NEXT: i32.const $push40=, -1 +; NO-SIMD128-NEXT: i32.xor $push33=, $6, $pop40 +; NO-SIMD128-NEXT: i32.store8 0($pop35), $pop33 +; NO-SIMD128-NEXT: i32.const $push37=, 3 +; NO-SIMD128-NEXT: i32.add $push38=, $0, $pop37 +; NO-SIMD128-NEXT: i32.const $push39=, -1 +; NO-SIMD128-NEXT: i32.xor $push36=, $4, $pop39 +; NO-SIMD128-NEXT: i32.store8 0($pop38), $pop36 +; NO-SIMD128-NEXT: return +; +; NO-SIMD128-FAST-LABEL: not_v16i8: +; NO-SIMD128-FAST: .functype not_v16i8 (i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32) -> () +; NO-SIMD128-FAST-NEXT: # %bb.0: +; NO-SIMD128-FAST-NEXT: i32.const $push0=, -1 +; NO-SIMD128-FAST-NEXT: i32.xor $push1=, $1, $pop0 +; NO-SIMD128-FAST-NEXT: i32.store8 0($0), $pop1 +; NO-SIMD128-FAST-NEXT: i32.const $push53=, -1 +; NO-SIMD128-FAST-NEXT: i32.xor $push2=, $2, $pop53 +; NO-SIMD128-FAST-NEXT: i32.store8 1($0), $pop2 +; NO-SIMD128-FAST-NEXT: i32.const $push52=, -1 +; NO-SIMD128-FAST-NEXT: i32.xor $push3=, $3, $pop52 +; NO-SIMD128-FAST-NEXT: i32.store8 2($0), $pop3 +; NO-SIMD128-FAST-NEXT: i32.const $push4=, 3 +; NO-SIMD128-FAST-NEXT: i32.add $push5=, $0, $pop4 +; NO-SIMD128-FAST-NEXT: i32.const $push51=, -1 +; NO-SIMD128-FAST-NEXT: i32.xor $push6=, $4, $pop51 +; NO-SIMD128-FAST-NEXT: i32.store8 0($pop5), $pop6 +; NO-SIMD128-FAST-NEXT: i32.const $push50=, -1 +; NO-SIMD128-FAST-NEXT: i32.xor $push7=, $5, $pop50 +; NO-SIMD128-FAST-NEXT: i32.store8 4($0), $pop7 +; NO-SIMD128-FAST-NEXT: i32.const $push8=, 5 +; NO-SIMD128-FAST-NEXT: i32.add $push9=, $0, $pop8 +; NO-SIMD128-FAST-NEXT: i32.const $push49=, -1 +; NO-SIMD128-FAST-NEXT: i32.xor $push10=, $6, $pop49 +; NO-SIMD128-FAST-NEXT: i32.store8 0($pop9), $pop10 +; NO-SIMD128-FAST-NEXT: i32.const $push11=, 6 +; NO-SIMD128-FAST-NEXT: i32.add $push12=, $0, $pop11 +; NO-SIMD128-FAST-NEXT: i32.const $push48=, -1 +; NO-SIMD128-FAST-NEXT: i32.xor $push13=, $7, $pop48 +; NO-SIMD128-FAST-NEXT: i32.store8 0($pop12), $pop13 +; NO-SIMD128-FAST-NEXT: i32.const $push14=, 7 +; NO-SIMD128-FAST-NEXT: i32.add $push15=, $0, $pop14 +; NO-SIMD128-FAST-NEXT: i32.const $push47=, -1 +; NO-SIMD128-FAST-NEXT: i32.xor $push16=, $8, $pop47 +; NO-SIMD128-FAST-NEXT: i32.store8 0($pop15), $pop16 +; NO-SIMD128-FAST-NEXT: i32.const $push46=, -1 +; NO-SIMD128-FAST-NEXT: i32.xor $push17=, $9, $pop46 +; NO-SIMD128-FAST-NEXT: i32.store8 8($0), $pop17 +; NO-SIMD128-FAST-NEXT: i32.const $push18=, 9 +; NO-SIMD128-FAST-NEXT: i32.add $push19=, $0, $pop18 +; NO-SIMD128-FAST-NEXT: i32.const $push45=, -1 +; NO-SIMD128-FAST-NEXT: i32.xor $push20=, $10, $pop45 +; NO-SIMD128-FAST-NEXT: i32.store8 0($pop19), $pop20 +; NO-SIMD128-FAST-NEXT: i32.const $push21=, 10 +; NO-SIMD128-FAST-NEXT: i32.add $push22=, $0, $pop21 +; NO-SIMD128-FAST-NEXT: i32.const $push44=, -1 +; NO-SIMD128-FAST-NEXT: i32.xor $push23=, $11, $pop44 +; NO-SIMD128-FAST-NEXT: i32.store8 0($pop22), $pop23 +; NO-SIMD128-FAST-NEXT: i32.const $push24=, 11 +; NO-SIMD128-FAST-NEXT: i32.add $push25=, $0, $pop24 +; NO-SIMD128-FAST-NEXT: i32.const $push43=, -1 +; NO-SIMD128-FAST-NEXT: i32.xor $push26=, $12, $pop43 +; NO-SIMD128-FAST-NEXT: i32.store8 0($pop25), $pop26 +; NO-SIMD128-FAST-NEXT: i32.const $push27=, 12 +; NO-SIMD128-FAST-NEXT: i32.add $push28=, $0, $pop27 +; NO-SIMD128-FAST-NEXT: i32.const $push42=, -1 +; NO-SIMD128-FAST-NEXT: i32.xor $push29=, $13, $pop42 +; NO-SIMD128-FAST-NEXT: i32.store8 0($pop28), $pop29 +; NO-SIMD128-FAST-NEXT: i32.const $push30=, 13 +; NO-SIMD128-FAST-NEXT: i32.add $push31=, $0, $pop30 +; NO-SIMD128-FAST-NEXT: i32.const $push41=, -1 +; NO-SIMD128-FAST-NEXT: i32.xor $push32=, $14, $pop41 +; NO-SIMD128-FAST-NEXT: i32.store8 0($pop31), $pop32 +; NO-SIMD128-FAST-NEXT: i32.const $push33=, 14 +; NO-SIMD128-FAST-NEXT: i32.add $push34=, $0, $pop33 +; NO-SIMD128-FAST-NEXT: i32.const $push40=, -1 +; NO-SIMD128-FAST-NEXT: i32.xor $push35=, $15, $pop40 +; NO-SIMD128-FAST-NEXT: i32.store8 0($pop34), $pop35 +; NO-SIMD128-FAST-NEXT: i32.const $push36=, 15 +; NO-SIMD128-FAST-NEXT: i32.add $push37=, $0, $pop36 +; NO-SIMD128-FAST-NEXT: i32.const $push39=, -1 +; NO-SIMD128-FAST-NEXT: i32.xor $push38=, $16, $pop39 +; NO-SIMD128-FAST-NEXT: i32.store8 0($pop37), $pop38 +; NO-SIMD128-FAST-NEXT: return %a = xor <16 x i8> %x, @not_v16i8(<16 x i8> %x) { ret <16 x i8> %a } -; CHECK-LABEL: andnot_v16i8: -; NO-SIMD128-NOT: v128 -; SIMD128-NEXT: .functype andnot_v16i8 (v128, v128) -> (v128){{$}} -; SIMD128-SLOW-NEXT: v128.andnot $push[[R:[0-9]+]]=, $0, $1{{$}} -; SIMD128-SLOW-NEXT: return $pop[[R]]{{$}} -; SIMD128-FAST-NEXT: v128.not -; SIMD128-FAST-NEXT: v128.and -; SIMD128-FAST-NEXT: return define <16 x i8> @andnot_v16i8(<16 x i8> %x, <16 x i8> %y) { +; SIMD128-LABEL: andnot_v16i8: +; SIMD128: .functype andnot_v16i8 (v128, v128) -> (v128) +; SIMD128-NEXT: # %bb.0: +; SIMD128-NEXT: v128.andnot $push0=, $0, $1 +; SIMD128-NEXT: return $pop0 +; +; SIMD128-FAST-LABEL: andnot_v16i8: +; SIMD128-FAST: .functype andnot_v16i8 (v128, v128) -> (v128) +; SIMD128-FAST-NEXT: # %bb.0: +; SIMD128-FAST-NEXT: v128.not $push0=, $1 +; SIMD128-FAST-NEXT: v128.and $push1=, $0, $pop0 +; SIMD128-FAST-NEXT: return $pop1 +; +; NO-SIMD128-LABEL: andnot_v16i8: +; NO-SIMD128: .functype andnot_v16i8 (i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32) -> () +; NO-SIMD128-NEXT: # %bb.0: +; NO-SIMD128-NEXT: i32.const $push0=, -1 +; NO-SIMD128-NEXT: i32.xor $push1=, $25, $pop0 +; NO-SIMD128-NEXT: i32.and $push2=, $9, $pop1 +; NO-SIMD128-NEXT: i32.store8 8($0), $pop2 +; NO-SIMD128-NEXT: i32.const $push69=, -1 +; NO-SIMD128-NEXT: i32.xor $push3=, $21, $pop69 +; NO-SIMD128-NEXT: i32.and $push4=, $5, $pop3 +; NO-SIMD128-NEXT: i32.store8 4($0), $pop4 +; NO-SIMD128-NEXT: i32.const $push68=, -1 +; NO-SIMD128-NEXT: i32.xor $push5=, $19, $pop68 +; NO-SIMD128-NEXT: i32.and $push6=, $3, $pop5 +; NO-SIMD128-NEXT: i32.store8 2($0), $pop6 +; NO-SIMD128-NEXT: i32.const $push67=, -1 +; NO-SIMD128-NEXT: i32.xor $push7=, $18, $pop67 +; NO-SIMD128-NEXT: i32.and $push8=, $2, $pop7 +; NO-SIMD128-NEXT: i32.store8 1($0), $pop8 +; NO-SIMD128-NEXT: i32.const $push66=, -1 +; NO-SIMD128-NEXT: i32.xor $push9=, $17, $pop66 +; NO-SIMD128-NEXT: i32.and $push10=, $1, $pop9 +; NO-SIMD128-NEXT: i32.store8 0($0), $pop10 +; NO-SIMD128-NEXT: i32.const $push13=, 15 +; NO-SIMD128-NEXT: i32.add $push14=, $0, $pop13 +; NO-SIMD128-NEXT: i32.const $push65=, -1 +; NO-SIMD128-NEXT: i32.xor $push11=, $32, $pop65 +; NO-SIMD128-NEXT: i32.and $push12=, $16, $pop11 +; NO-SIMD128-NEXT: i32.store8 0($pop14), $pop12 +; NO-SIMD128-NEXT: i32.const $push17=, 14 +; NO-SIMD128-NEXT: i32.add $push18=, $0, $pop17 +; NO-SIMD128-NEXT: i32.const $push64=, -1 +; NO-SIMD128-NEXT: i32.xor $push15=, $31, $pop64 +; NO-SIMD128-NEXT: i32.and $push16=, $15, $pop15 +; NO-SIMD128-NEXT: i32.store8 0($pop18), $pop16 +; NO-SIMD128-NEXT: i32.const $push21=, 13 +; NO-SIMD128-NEXT: i32.add $push22=, $0, $pop21 +; NO-SIMD128-NEXT: i32.const $push63=, -1 +; NO-SIMD128-NEXT: i32.xor $push19=, $30, $pop63 +; NO-SIMD128-NEXT: i32.and $push20=, $14, $pop19 +; NO-SIMD128-NEXT: i32.store8 0($pop22), $pop20 +; NO-SIMD128-NEXT: i32.const $push25=, 12 +; NO-SIMD128-NEXT: i32.add $push26=, $0, $pop25 +; NO-SIMD128-NEXT: i32.const $push62=, -1 +; NO-SIMD128-NEXT: i32.xor $push23=, $29, $pop62 +; NO-SIMD128-NEXT: i32.and $push24=, $13, $pop23 +; NO-SIMD128-NEXT: i32.store8 0($pop26), $pop24 +; NO-SIMD128-NEXT: i32.const $push29=, 11 +; NO-SIMD128-NEXT: i32.add $push30=, $0, $pop29 +; NO-SIMD128-NEXT: i32.const $push61=, -1 +; NO-SIMD128-NEXT: i32.xor $push27=, $28, $pop61 +; NO-SIMD128-NEXT: i32.and $push28=, $12, $pop27 +; NO-SIMD128-NEXT: i32.store8 0($pop30), $pop28 +; NO-SIMD128-NEXT: i32.const $push33=, 10 +; NO-SIMD128-NEXT: i32.add $push34=, $0, $pop33 +; NO-SIMD128-NEXT: i32.const $push60=, -1 +; NO-SIMD128-NEXT: i32.xor $push31=, $27, $pop60 +; NO-SIMD128-NEXT: i32.and $push32=, $11, $pop31 +; NO-SIMD128-NEXT: i32.store8 0($pop34), $pop32 +; NO-SIMD128-NEXT: i32.const $push37=, 9 +; NO-SIMD128-NEXT: i32.add $push38=, $0, $pop37 +; NO-SIMD128-NEXT: i32.const $push59=, -1 +; NO-SIMD128-NEXT: i32.xor $push35=, $26, $pop59 +; NO-SIMD128-NEXT: i32.and $push36=, $10, $pop35 +; NO-SIMD128-NEXT: i32.store8 0($pop38), $pop36 +; NO-SIMD128-NEXT: i32.const $push41=, 7 +; NO-SIMD128-NEXT: i32.add $push42=, $0, $pop41 +; NO-SIMD128-NEXT: i32.const $push58=, -1 +; NO-SIMD128-NEXT: i32.xor $push39=, $24, $pop58 +; NO-SIMD128-NEXT: i32.and $push40=, $8, $pop39 +; NO-SIMD128-NEXT: i32.store8 0($pop42), $pop40 +; NO-SIMD128-NEXT: i32.const $push45=, 6 +; NO-SIMD128-NEXT: i32.add $push46=, $0, $pop45 +; NO-SIMD128-NEXT: i32.const $push57=, -1 +; NO-SIMD128-NEXT: i32.xor $push43=, $23, $pop57 +; NO-SIMD128-NEXT: i32.and $push44=, $7, $pop43 +; NO-SIMD128-NEXT: i32.store8 0($pop46), $pop44 +; NO-SIMD128-NEXT: i32.const $push49=, 5 +; NO-SIMD128-NEXT: i32.add $push50=, $0, $pop49 +; NO-SIMD128-NEXT: i32.const $push56=, -1 +; NO-SIMD128-NEXT: i32.xor $push47=, $22, $pop56 +; NO-SIMD128-NEXT: i32.and $push48=, $6, $pop47 +; NO-SIMD128-NEXT: i32.store8 0($pop50), $pop48 +; NO-SIMD128-NEXT: i32.const $push53=, 3 +; NO-SIMD128-NEXT: i32.add $push54=, $0, $pop53 +; NO-SIMD128-NEXT: i32.const $push55=, -1 +; NO-SIMD128-NEXT: i32.xor $push51=, $20, $pop55 +; NO-SIMD128-NEXT: i32.and $push52=, $4, $pop51 +; NO-SIMD128-NEXT: i32.store8 0($pop54), $pop52 +; NO-SIMD128-NEXT: return +; +; NO-SIMD128-FAST-LABEL: andnot_v16i8: +; NO-SIMD128-FAST: .functype andnot_v16i8 (i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32) -> () +; NO-SIMD128-FAST-NEXT: # %bb.0: +; NO-SIMD128-FAST-NEXT: i32.const $push0=, -1 +; NO-SIMD128-FAST-NEXT: i32.xor $push1=, $17, $pop0 +; NO-SIMD128-FAST-NEXT: i32.and $push2=, $1, $pop1 +; NO-SIMD128-FAST-NEXT: i32.store8 0($0), $pop2 +; NO-SIMD128-FAST-NEXT: i32.const $push69=, -1 +; NO-SIMD128-FAST-NEXT: i32.xor $push3=, $18, $pop69 +; NO-SIMD128-FAST-NEXT: i32.and $push4=, $2, $pop3 +; NO-SIMD128-FAST-NEXT: i32.store8 1($0), $pop4 +; NO-SIMD128-FAST-NEXT: i32.const $push68=, -1 +; NO-SIMD128-FAST-NEXT: i32.xor $push5=, $19, $pop68 +; NO-SIMD128-FAST-NEXT: i32.and $push6=, $3, $pop5 +; NO-SIMD128-FAST-NEXT: i32.store8 2($0), $pop6 +; NO-SIMD128-FAST-NEXT: i32.const $push7=, 3 +; NO-SIMD128-FAST-NEXT: i32.add $push8=, $0, $pop7 +; NO-SIMD128-FAST-NEXT: i32.const $push67=, -1 +; NO-SIMD128-FAST-NEXT: i32.xor $push9=, $20, $pop67 +; NO-SIMD128-FAST-NEXT: i32.and $push10=, $4, $pop9 +; NO-SIMD128-FAST-NEXT: i32.store8 0($pop8), $pop10 +; NO-SIMD128-FAST-NEXT: i32.const $push66=, -1 +; NO-SIMD128-FAST-NEXT: i32.xor $push11=, $21, $pop66 +; NO-SIMD128-FAST-NEXT: i32.and $push12=, $5, $pop11 +; NO-SIMD128-FAST-NEXT: i32.store8 4($0), $pop12 +; NO-SIMD128-FAST-NEXT: i32.const $push13=, 5 +; NO-SIMD128-FAST-NEXT: i32.add $push14=, $0, $pop13 +; NO-SIMD128-FAST-NEXT: i32.const $push65=, -1 +; NO-SIMD128-FAST-NEXT: i32.xor $push15=, $22, $pop65 +; NO-SIMD128-FAST-NEXT: i32.and $push16=, $6, $pop15 +; NO-SIMD128-FAST-NEXT: i32.store8 0($pop14), $pop16 +; NO-SIMD128-FAST-NEXT: i32.const $push17=, 6 +; NO-SIMD128-FAST-NEXT: i32.add $push18=, $0, $pop17 +; NO-SIMD128-FAST-NEXT: i32.const $push64=, -1 +; NO-SIMD128-FAST-NEXT: i32.xor $push19=, $23, $pop64 +; NO-SIMD128-FAST-NEXT: i32.and $push20=, $7, $pop19 +; NO-SIMD128-FAST-NEXT: i32.store8 0($pop18), $pop20 +; NO-SIMD128-FAST-NEXT: i32.const $push21=, 7 +; NO-SIMD128-FAST-NEXT: i32.add $push22=, $0, $pop21 +; NO-SIMD128-FAST-NEXT: i32.const $push63=, -1 +; NO-SIMD128-FAST-NEXT: i32.xor $push23=, $24, $pop63 +; NO-SIMD128-FAST-NEXT: i32.and $push24=, $8, $pop23 +; NO-SIMD128-FAST-NEXT: i32.store8 0($pop22), $pop24 +; NO-SIMD128-FAST-NEXT: i32.const $push62=, -1 +; NO-SIMD128-FAST-NEXT: i32.xor $push25=, $25, $pop62 +; NO-SIMD128-FAST-NEXT: i32.and $push26=, $9, $pop25 +; NO-SIMD128-FAST-NEXT: i32.store8 8($0), $pop26 +; NO-SIMD128-FAST-NEXT: i32.const $push27=, 9 +; NO-SIMD128-FAST-NEXT: i32.add $push28=, $0, $pop27 +; NO-SIMD128-FAST-NEXT: i32.const $push61=, -1 +; NO-SIMD128-FAST-NEXT: i32.xor $push29=, $26, $pop61 +; NO-SIMD128-FAST-NEXT: i32.and $push30=, $10, $pop29 +; NO-SIMD128-FAST-NEXT: i32.store8 0($pop28), $pop30 +; NO-SIMD128-FAST-NEXT: i32.const $push31=, 10 +; NO-SIMD128-FAST-NEXT: i32.add $push32=, $0, $pop31 +; NO-SIMD128-FAST-NEXT: i32.const $push60=, -1 +; NO-SIMD128-FAST-NEXT: i32.xor $push33=, $27, $pop60 +; NO-SIMD128-FAST-NEXT: i32.and $push34=, $11, $pop33 +; NO-SIMD128-FAST-NEXT: i32.store8 0($pop32), $pop34 +; NO-SIMD128-FAST-NEXT: i32.const $push35=, 11 +; NO-SIMD128-FAST-NEXT: i32.add $push36=, $0, $pop35 +; NO-SIMD128-FAST-NEXT: i32.const $push59=, -1 +; NO-SIMD128-FAST-NEXT: i32.xor $push37=, $28, $pop59 +; NO-SIMD128-FAST-NEXT: i32.and $push38=, $12, $pop37 +; NO-SIMD128-FAST-NEXT: i32.store8 0($pop36), $pop38 +; NO-SIMD128-FAST-NEXT: i32.const $push39=, 12 +; NO-SIMD128-FAST-NEXT: i32.add $push40=, $0, $pop39 +; NO-SIMD128-FAST-NEXT: i32.const $push58=, -1 +; NO-SIMD128-FAST-NEXT: i32.xor $push41=, $29, $pop58 +; NO-SIMD128-FAST-NEXT: i32.and $push42=, $13, $pop41 +; NO-SIMD128-FAST-NEXT: i32.store8 0($pop40), $pop42 +; NO-SIMD128-FAST-NEXT: i32.const $push43=, 13 +; NO-SIMD128-FAST-NEXT: i32.add $push44=, $0, $pop43 +; NO-SIMD128-FAST-NEXT: i32.const $push57=, -1 +; NO-SIMD128-FAST-NEXT: i32.xor $push45=, $30, $pop57 +; NO-SIMD128-FAST-NEXT: i32.and $push46=, $14, $pop45 +; NO-SIMD128-FAST-NEXT: i32.store8 0($pop44), $pop46 +; NO-SIMD128-FAST-NEXT: i32.const $push47=, 14 +; NO-SIMD128-FAST-NEXT: i32.add $push48=, $0, $pop47 +; NO-SIMD128-FAST-NEXT: i32.const $push56=, -1 +; NO-SIMD128-FAST-NEXT: i32.xor $push49=, $31, $pop56 +; NO-SIMD128-FAST-NEXT: i32.and $push50=, $15, $pop49 +; NO-SIMD128-FAST-NEXT: i32.store8 0($pop48), $pop50 +; NO-SIMD128-FAST-NEXT: i32.const $push51=, 15 +; NO-SIMD128-FAST-NEXT: i32.add $push52=, $0, $pop51 +; NO-SIMD128-FAST-NEXT: i32.const $push55=, -1 +; NO-SIMD128-FAST-NEXT: i32.xor $push53=, $32, $pop55 +; NO-SIMD128-FAST-NEXT: i32.and $push54=, $16, $pop53 +; NO-SIMD128-FAST-NEXT: i32.store8 0($pop52), $pop54 +; NO-SIMD128-FAST-NEXT: return %inv_y = xor <16 x i8> %y, @@ -323,17 +5458,267 @@ define <16 x i8> @andnot_v16i8(<16 x i8> %x, <16 x i8> %y) { ret <16 x i8> %a } -; CHECK-LABEL: bitselect_v16i8: -; NO-SIMD128-NOT: v128 -; SIMD128-NEXT: .functype bitselect_v16i8 (v128, v128, v128) -> (v128){{$}} -; SIMD128-SLOW-NEXT: v128.bitselect $push[[R:[0-9]+]]=, $1, $2, $0{{$}} -; SIMD128-SLOW-NEXT: return $pop[[R]]{{$}} -; SIMD128-FAST-NEXT: v128.and -; SIMD128-FAST-NEXT: v128.not -; SIMD128-FAST-NEXT: v128.and -; SIMD128-FAST-NEXT: v128.or -; SIMD128-FAST-NEXT: return define <16 x i8> @bitselect_v16i8(<16 x i8> %c, <16 x i8> %v1, <16 x i8> %v2) { +; SIMD128-LABEL: bitselect_v16i8: +; SIMD128: .functype bitselect_v16i8 (v128, v128, v128) -> (v128) +; SIMD128-NEXT: # %bb.0: +; SIMD128-NEXT: v128.bitselect $push0=, $1, $2, $0 +; SIMD128-NEXT: return $pop0 +; +; SIMD128-FAST-LABEL: bitselect_v16i8: +; SIMD128-FAST: .functype bitselect_v16i8 (v128, v128, v128) -> (v128) +; SIMD128-FAST-NEXT: # %bb.0: +; SIMD128-FAST-NEXT: v128.and $push0=, $0, $1 +; SIMD128-FAST-NEXT: v128.not $push2=, $0 +; SIMD128-FAST-NEXT: v128.and $push3=, $pop2, $2 +; SIMD128-FAST-NEXT: v128.or $push1=, $pop0, $pop3 +; SIMD128-FAST-NEXT: return $pop1 +; +; NO-SIMD128-LABEL: bitselect_v16i8: +; NO-SIMD128: .functype bitselect_v16i8 (i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32) -> () +; NO-SIMD128-NEXT: # %bb.0: +; NO-SIMD128-NEXT: i32.const $push5=, 15 +; NO-SIMD128-NEXT: i32.add $push6=, $0, $pop5 +; NO-SIMD128-NEXT: i32.and $push0=, $16, $32 +; NO-SIMD128-NEXT: i32.const $push1=, -1 +; NO-SIMD128-NEXT: i32.xor $push2=, $16, $pop1 +; NO-SIMD128-NEXT: i32.and $push3=, $pop2, $48 +; NO-SIMD128-NEXT: i32.or $push4=, $pop0, $pop3 +; NO-SIMD128-NEXT: i32.store8 0($pop6), $pop4 +; NO-SIMD128-NEXT: i32.const $push11=, 14 +; NO-SIMD128-NEXT: i32.add $push12=, $0, $pop11 +; NO-SIMD128-NEXT: i32.and $push7=, $15, $31 +; NO-SIMD128-NEXT: i32.const $push101=, -1 +; NO-SIMD128-NEXT: i32.xor $push8=, $15, $pop101 +; NO-SIMD128-NEXT: i32.and $push9=, $pop8, $47 +; NO-SIMD128-NEXT: i32.or $push10=, $pop7, $pop9 +; NO-SIMD128-NEXT: i32.store8 0($pop12), $pop10 +; NO-SIMD128-NEXT: i32.const $push17=, 13 +; NO-SIMD128-NEXT: i32.add $push18=, $0, $pop17 +; NO-SIMD128-NEXT: i32.and $push13=, $14, $30 +; NO-SIMD128-NEXT: i32.const $push100=, -1 +; NO-SIMD128-NEXT: i32.xor $push14=, $14, $pop100 +; NO-SIMD128-NEXT: i32.and $push15=, $pop14, $46 +; NO-SIMD128-NEXT: i32.or $push16=, $pop13, $pop15 +; NO-SIMD128-NEXT: i32.store8 0($pop18), $pop16 +; NO-SIMD128-NEXT: i32.const $push23=, 12 +; NO-SIMD128-NEXT: i32.add $push24=, $0, $pop23 +; NO-SIMD128-NEXT: i32.and $push19=, $13, $29 +; NO-SIMD128-NEXT: i32.const $push99=, -1 +; NO-SIMD128-NEXT: i32.xor $push20=, $13, $pop99 +; NO-SIMD128-NEXT: i32.and $push21=, $pop20, $45 +; NO-SIMD128-NEXT: i32.or $push22=, $pop19, $pop21 +; NO-SIMD128-NEXT: i32.store8 0($pop24), $pop22 +; NO-SIMD128-NEXT: i32.const $push29=, 11 +; NO-SIMD128-NEXT: i32.add $push30=, $0, $pop29 +; NO-SIMD128-NEXT: i32.and $push25=, $12, $28 +; NO-SIMD128-NEXT: i32.const $push98=, -1 +; NO-SIMD128-NEXT: i32.xor $push26=, $12, $pop98 +; NO-SIMD128-NEXT: i32.and $push27=, $pop26, $44 +; NO-SIMD128-NEXT: i32.or $push28=, $pop25, $pop27 +; NO-SIMD128-NEXT: i32.store8 0($pop30), $pop28 +; NO-SIMD128-NEXT: i32.const $push35=, 10 +; NO-SIMD128-NEXT: i32.add $push36=, $0, $pop35 +; NO-SIMD128-NEXT: i32.and $push31=, $11, $27 +; NO-SIMD128-NEXT: i32.const $push97=, -1 +; NO-SIMD128-NEXT: i32.xor $push32=, $11, $pop97 +; NO-SIMD128-NEXT: i32.and $push33=, $pop32, $43 +; NO-SIMD128-NEXT: i32.or $push34=, $pop31, $pop33 +; NO-SIMD128-NEXT: i32.store8 0($pop36), $pop34 +; NO-SIMD128-NEXT: i32.const $push41=, 9 +; NO-SIMD128-NEXT: i32.add $push42=, $0, $pop41 +; NO-SIMD128-NEXT: i32.and $push37=, $10, $26 +; NO-SIMD128-NEXT: i32.const $push96=, -1 +; NO-SIMD128-NEXT: i32.xor $push38=, $10, $pop96 +; NO-SIMD128-NEXT: i32.and $push39=, $pop38, $42 +; NO-SIMD128-NEXT: i32.or $push40=, $pop37, $pop39 +; NO-SIMD128-NEXT: i32.store8 0($pop42), $pop40 +; NO-SIMD128-NEXT: i32.and $push43=, $9, $25 +; NO-SIMD128-NEXT: i32.const $push95=, -1 +; NO-SIMD128-NEXT: i32.xor $push44=, $9, $pop95 +; NO-SIMD128-NEXT: i32.and $push45=, $pop44, $41 +; NO-SIMD128-NEXT: i32.or $push46=, $pop43, $pop45 +; NO-SIMD128-NEXT: i32.store8 8($0), $pop46 +; NO-SIMD128-NEXT: i32.const $push51=, 7 +; NO-SIMD128-NEXT: i32.add $push52=, $0, $pop51 +; NO-SIMD128-NEXT: i32.and $push47=, $8, $24 +; NO-SIMD128-NEXT: i32.const $push94=, -1 +; NO-SIMD128-NEXT: i32.xor $push48=, $8, $pop94 +; NO-SIMD128-NEXT: i32.and $push49=, $pop48, $40 +; NO-SIMD128-NEXT: i32.or $push50=, $pop47, $pop49 +; NO-SIMD128-NEXT: i32.store8 0($pop52), $pop50 +; NO-SIMD128-NEXT: i32.const $push57=, 6 +; NO-SIMD128-NEXT: i32.add $push58=, $0, $pop57 +; NO-SIMD128-NEXT: i32.and $push53=, $7, $23 +; NO-SIMD128-NEXT: i32.const $push93=, -1 +; NO-SIMD128-NEXT: i32.xor $push54=, $7, $pop93 +; NO-SIMD128-NEXT: i32.and $push55=, $pop54, $39 +; NO-SIMD128-NEXT: i32.or $push56=, $pop53, $pop55 +; NO-SIMD128-NEXT: i32.store8 0($pop58), $pop56 +; NO-SIMD128-NEXT: i32.const $push63=, 5 +; NO-SIMD128-NEXT: i32.add $push64=, $0, $pop63 +; NO-SIMD128-NEXT: i32.and $push59=, $6, $22 +; NO-SIMD128-NEXT: i32.const $push92=, -1 +; NO-SIMD128-NEXT: i32.xor $push60=, $6, $pop92 +; NO-SIMD128-NEXT: i32.and $push61=, $pop60, $38 +; NO-SIMD128-NEXT: i32.or $push62=, $pop59, $pop61 +; NO-SIMD128-NEXT: i32.store8 0($pop64), $pop62 +; NO-SIMD128-NEXT: i32.and $push65=, $5, $21 +; NO-SIMD128-NEXT: i32.const $push91=, -1 +; NO-SIMD128-NEXT: i32.xor $push66=, $5, $pop91 +; NO-SIMD128-NEXT: i32.and $push67=, $pop66, $37 +; NO-SIMD128-NEXT: i32.or $push68=, $pop65, $pop67 +; NO-SIMD128-NEXT: i32.store8 4($0), $pop68 +; NO-SIMD128-NEXT: i32.const $push73=, 3 +; NO-SIMD128-NEXT: i32.add $push74=, $0, $pop73 +; NO-SIMD128-NEXT: i32.and $push69=, $4, $20 +; NO-SIMD128-NEXT: i32.const $push90=, -1 +; NO-SIMD128-NEXT: i32.xor $push70=, $4, $pop90 +; NO-SIMD128-NEXT: i32.and $push71=, $pop70, $36 +; NO-SIMD128-NEXT: i32.or $push72=, $pop69, $pop71 +; NO-SIMD128-NEXT: i32.store8 0($pop74), $pop72 +; NO-SIMD128-NEXT: i32.and $push75=, $3, $19 +; NO-SIMD128-NEXT: i32.const $push89=, -1 +; NO-SIMD128-NEXT: i32.xor $push76=, $3, $pop89 +; NO-SIMD128-NEXT: i32.and $push77=, $pop76, $35 +; NO-SIMD128-NEXT: i32.or $push78=, $pop75, $pop77 +; NO-SIMD128-NEXT: i32.store8 2($0), $pop78 +; NO-SIMD128-NEXT: i32.and $push79=, $2, $18 +; NO-SIMD128-NEXT: i32.const $push88=, -1 +; NO-SIMD128-NEXT: i32.xor $push80=, $2, $pop88 +; NO-SIMD128-NEXT: i32.and $push81=, $pop80, $34 +; NO-SIMD128-NEXT: i32.or $push82=, $pop79, $pop81 +; NO-SIMD128-NEXT: i32.store8 1($0), $pop82 +; NO-SIMD128-NEXT: i32.and $push83=, $1, $17 +; NO-SIMD128-NEXT: i32.const $push87=, -1 +; NO-SIMD128-NEXT: i32.xor $push84=, $1, $pop87 +; NO-SIMD128-NEXT: i32.and $push85=, $pop84, $33 +; NO-SIMD128-NEXT: i32.or $push86=, $pop83, $pop85 +; NO-SIMD128-NEXT: i32.store8 0($0), $pop86 +; NO-SIMD128-NEXT: return +; +; NO-SIMD128-FAST-LABEL: bitselect_v16i8: +; NO-SIMD128-FAST: .functype bitselect_v16i8 (i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32) -> () +; NO-SIMD128-FAST-NEXT: # %bb.0: +; NO-SIMD128-FAST-NEXT: i32.and $push0=, $1, $17 +; NO-SIMD128-FAST-NEXT: i32.const $push1=, -1 +; NO-SIMD128-FAST-NEXT: i32.xor $push2=, $1, $pop1 +; NO-SIMD128-FAST-NEXT: i32.and $push3=, $pop2, $33 +; NO-SIMD128-FAST-NEXT: i32.or $push4=, $pop0, $pop3 +; NO-SIMD128-FAST-NEXT: i32.store8 0($0), $pop4 +; NO-SIMD128-FAST-NEXT: i32.and $push5=, $2, $18 +; NO-SIMD128-FAST-NEXT: i32.const $push101=, -1 +; NO-SIMD128-FAST-NEXT: i32.xor $push6=, $2, $pop101 +; NO-SIMD128-FAST-NEXT: i32.and $push7=, $pop6, $34 +; NO-SIMD128-FAST-NEXT: i32.or $push8=, $pop5, $pop7 +; NO-SIMD128-FAST-NEXT: i32.store8 1($0), $pop8 +; NO-SIMD128-FAST-NEXT: i32.and $push9=, $3, $19 +; NO-SIMD128-FAST-NEXT: i32.const $push100=, -1 +; NO-SIMD128-FAST-NEXT: i32.xor $push10=, $3, $pop100 +; NO-SIMD128-FAST-NEXT: i32.and $push11=, $pop10, $35 +; NO-SIMD128-FAST-NEXT: i32.or $push12=, $pop9, $pop11 +; NO-SIMD128-FAST-NEXT: i32.store8 2($0), $pop12 +; NO-SIMD128-FAST-NEXT: i32.const $push17=, 3 +; NO-SIMD128-FAST-NEXT: i32.add $push18=, $0, $pop17 +; NO-SIMD128-FAST-NEXT: i32.and $push13=, $4, $20 +; NO-SIMD128-FAST-NEXT: i32.const $push99=, -1 +; NO-SIMD128-FAST-NEXT: i32.xor $push14=, $4, $pop99 +; NO-SIMD128-FAST-NEXT: i32.and $push15=, $pop14, $36 +; NO-SIMD128-FAST-NEXT: i32.or $push16=, $pop13, $pop15 +; NO-SIMD128-FAST-NEXT: i32.store8 0($pop18), $pop16 +; NO-SIMD128-FAST-NEXT: i32.and $push19=, $5, $21 +; NO-SIMD128-FAST-NEXT: i32.const $push98=, -1 +; NO-SIMD128-FAST-NEXT: i32.xor $push20=, $5, $pop98 +; NO-SIMD128-FAST-NEXT: i32.and $push21=, $pop20, $37 +; NO-SIMD128-FAST-NEXT: i32.or $push22=, $pop19, $pop21 +; NO-SIMD128-FAST-NEXT: i32.store8 4($0), $pop22 +; NO-SIMD128-FAST-NEXT: i32.const $push27=, 5 +; NO-SIMD128-FAST-NEXT: i32.add $push28=, $0, $pop27 +; NO-SIMD128-FAST-NEXT: i32.and $push23=, $6, $22 +; NO-SIMD128-FAST-NEXT: i32.const $push97=, -1 +; NO-SIMD128-FAST-NEXT: i32.xor $push24=, $6, $pop97 +; NO-SIMD128-FAST-NEXT: i32.and $push25=, $pop24, $38 +; NO-SIMD128-FAST-NEXT: i32.or $push26=, $pop23, $pop25 +; NO-SIMD128-FAST-NEXT: i32.store8 0($pop28), $pop26 +; NO-SIMD128-FAST-NEXT: i32.const $push33=, 6 +; NO-SIMD128-FAST-NEXT: i32.add $push34=, $0, $pop33 +; NO-SIMD128-FAST-NEXT: i32.and $push29=, $7, $23 +; NO-SIMD128-FAST-NEXT: i32.const $push96=, -1 +; NO-SIMD128-FAST-NEXT: i32.xor $push30=, $7, $pop96 +; NO-SIMD128-FAST-NEXT: i32.and $push31=, $pop30, $39 +; NO-SIMD128-FAST-NEXT: i32.or $push32=, $pop29, $pop31 +; NO-SIMD128-FAST-NEXT: i32.store8 0($pop34), $pop32 +; NO-SIMD128-FAST-NEXT: i32.const $push39=, 7 +; NO-SIMD128-FAST-NEXT: i32.add $push40=, $0, $pop39 +; NO-SIMD128-FAST-NEXT: i32.and $push35=, $8, $24 +; NO-SIMD128-FAST-NEXT: i32.const $push95=, -1 +; NO-SIMD128-FAST-NEXT: i32.xor $push36=, $8, $pop95 +; NO-SIMD128-FAST-NEXT: i32.and $push37=, $pop36, $40 +; NO-SIMD128-FAST-NEXT: i32.or $push38=, $pop35, $pop37 +; NO-SIMD128-FAST-NEXT: i32.store8 0($pop40), $pop38 +; NO-SIMD128-FAST-NEXT: i32.and $push41=, $9, $25 +; NO-SIMD128-FAST-NEXT: i32.const $push94=, -1 +; NO-SIMD128-FAST-NEXT: i32.xor $push42=, $9, $pop94 +; NO-SIMD128-FAST-NEXT: i32.and $push43=, $pop42, $41 +; NO-SIMD128-FAST-NEXT: i32.or $push44=, $pop41, $pop43 +; NO-SIMD128-FAST-NEXT: i32.store8 8($0), $pop44 +; NO-SIMD128-FAST-NEXT: i32.const $push49=, 9 +; NO-SIMD128-FAST-NEXT: i32.add $push50=, $0, $pop49 +; NO-SIMD128-FAST-NEXT: i32.and $push45=, $10, $26 +; NO-SIMD128-FAST-NEXT: i32.const $push93=, -1 +; NO-SIMD128-FAST-NEXT: i32.xor $push46=, $10, $pop93 +; NO-SIMD128-FAST-NEXT: i32.and $push47=, $pop46, $42 +; NO-SIMD128-FAST-NEXT: i32.or $push48=, $pop45, $pop47 +; NO-SIMD128-FAST-NEXT: i32.store8 0($pop50), $pop48 +; NO-SIMD128-FAST-NEXT: i32.const $push55=, 10 +; NO-SIMD128-FAST-NEXT: i32.add $push56=, $0, $pop55 +; NO-SIMD128-FAST-NEXT: i32.and $push51=, $11, $27 +; NO-SIMD128-FAST-NEXT: i32.const $push92=, -1 +; NO-SIMD128-FAST-NEXT: i32.xor $push52=, $11, $pop92 +; NO-SIMD128-FAST-NEXT: i32.and $push53=, $pop52, $43 +; NO-SIMD128-FAST-NEXT: i32.or $push54=, $pop51, $pop53 +; NO-SIMD128-FAST-NEXT: i32.store8 0($pop56), $pop54 +; NO-SIMD128-FAST-NEXT: i32.const $push61=, 11 +; NO-SIMD128-FAST-NEXT: i32.add $push62=, $0, $pop61 +; NO-SIMD128-FAST-NEXT: i32.and $push57=, $12, $28 +; NO-SIMD128-FAST-NEXT: i32.const $push91=, -1 +; NO-SIMD128-FAST-NEXT: i32.xor $push58=, $12, $pop91 +; NO-SIMD128-FAST-NEXT: i32.and $push59=, $pop58, $44 +; NO-SIMD128-FAST-NEXT: i32.or $push60=, $pop57, $pop59 +; NO-SIMD128-FAST-NEXT: i32.store8 0($pop62), $pop60 +; NO-SIMD128-FAST-NEXT: i32.const $push67=, 12 +; NO-SIMD128-FAST-NEXT: i32.add $push68=, $0, $pop67 +; NO-SIMD128-FAST-NEXT: i32.and $push63=, $13, $29 +; NO-SIMD128-FAST-NEXT: i32.const $push90=, -1 +; NO-SIMD128-FAST-NEXT: i32.xor $push64=, $13, $pop90 +; NO-SIMD128-FAST-NEXT: i32.and $push65=, $pop64, $45 +; NO-SIMD128-FAST-NEXT: i32.or $push66=, $pop63, $pop65 +; NO-SIMD128-FAST-NEXT: i32.store8 0($pop68), $pop66 +; NO-SIMD128-FAST-NEXT: i32.const $push73=, 13 +; NO-SIMD128-FAST-NEXT: i32.add $push74=, $0, $pop73 +; NO-SIMD128-FAST-NEXT: i32.and $push69=, $14, $30 +; NO-SIMD128-FAST-NEXT: i32.const $push89=, -1 +; NO-SIMD128-FAST-NEXT: i32.xor $push70=, $14, $pop89 +; NO-SIMD128-FAST-NEXT: i32.and $push71=, $pop70, $46 +; NO-SIMD128-FAST-NEXT: i32.or $push72=, $pop69, $pop71 +; NO-SIMD128-FAST-NEXT: i32.store8 0($pop74), $pop72 +; NO-SIMD128-FAST-NEXT: i32.const $push79=, 14 +; NO-SIMD128-FAST-NEXT: i32.add $push80=, $0, $pop79 +; NO-SIMD128-FAST-NEXT: i32.and $push75=, $15, $31 +; NO-SIMD128-FAST-NEXT: i32.const $push88=, -1 +; NO-SIMD128-FAST-NEXT: i32.xor $push76=, $15, $pop88 +; NO-SIMD128-FAST-NEXT: i32.and $push77=, $pop76, $47 +; NO-SIMD128-FAST-NEXT: i32.or $push78=, $pop75, $pop77 +; NO-SIMD128-FAST-NEXT: i32.store8 0($pop80), $pop78 +; NO-SIMD128-FAST-NEXT: i32.const $push85=, 15 +; NO-SIMD128-FAST-NEXT: i32.add $push86=, $0, $pop85 +; NO-SIMD128-FAST-NEXT: i32.and $push81=, $16, $32 +; NO-SIMD128-FAST-NEXT: i32.const $push87=, -1 +; NO-SIMD128-FAST-NEXT: i32.xor $push82=, $16, $pop87 +; NO-SIMD128-FAST-NEXT: i32.and $push83=, $pop82, $48 +; NO-SIMD128-FAST-NEXT: i32.or $push84=, $pop81, $pop83 +; NO-SIMD128-FAST-NEXT: i32.store8 0($pop86), $pop84 +; NO-SIMD128-FAST-NEXT: return %masked_v1 = and <16 x i8> %c, %v1 %inv_mask = xor <16 x i8> %c, @bitselect_v16i8(<16 x i8> %c, <16 x i8> %v1, <16 x i8> %v2) { ret <16 x i8> %a } -; CHECK-LABEL: bitselect_xor_v16i8: -; NO-SIMD128-NOT: v128 -; SIMD128-NEXT: .functype bitselect_xor_v16i8 (v128, v128, v128) -> (v128){{$}} -; SIMD128-SLOW-NEXT: v128.bitselect $push[[R:[0-9]+]]=, $1, $2, $0{{$}} -; SIMD128-SLOW-NEXT: return $pop[[R]]{{$}} -; SIMD128-FAST-NEXT: v128.xor -; SIMD128-FAST-NEXT: v128.and -; SIMD128-FAST-NEXT: v128.xor define <16 x i8> @bitselect_xor_v16i8(<16 x i8> %c, <16 x i8> %v1, <16 x i8> %v2) { +; SIMD128-LABEL: bitselect_xor_v16i8: +; SIMD128: .functype bitselect_xor_v16i8 (v128, v128, v128) -> (v128) +; SIMD128-NEXT: # %bb.0: +; SIMD128-NEXT: v128.bitselect $push0=, $1, $2, $0 +; SIMD128-NEXT: return $pop0 +; +; SIMD128-FAST-LABEL: bitselect_xor_v16i8: +; SIMD128-FAST: .functype bitselect_xor_v16i8 (v128, v128, v128) -> (v128) +; SIMD128-FAST-NEXT: # %bb.0: +; SIMD128-FAST-NEXT: v128.xor $push2=, $1, $2 +; SIMD128-FAST-NEXT: v128.and $push1=, $pop2, $0 +; SIMD128-FAST-NEXT: v128.xor $push0=, $pop1, $2 +; SIMD128-FAST-NEXT: return $pop0 +; +; NO-SIMD128-LABEL: bitselect_xor_v16i8: +; NO-SIMD128: .functype bitselect_xor_v16i8 (i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32) -> () +; NO-SIMD128-NEXT: # %bb.0: +; NO-SIMD128-NEXT: i32.const $push3=, 15 +; NO-SIMD128-NEXT: i32.add $push4=, $0, $pop3 +; NO-SIMD128-NEXT: i32.xor $push0=, $32, $48 +; NO-SIMD128-NEXT: i32.and $push1=, $pop0, $16 +; NO-SIMD128-NEXT: i32.xor $push2=, $pop1, $48 +; NO-SIMD128-NEXT: i32.store8 0($pop4), $pop2 +; NO-SIMD128-NEXT: i32.const $push8=, 14 +; NO-SIMD128-NEXT: i32.add $push9=, $0, $pop8 +; NO-SIMD128-NEXT: i32.xor $push5=, $31, $47 +; NO-SIMD128-NEXT: i32.and $push6=, $pop5, $15 +; NO-SIMD128-NEXT: i32.xor $push7=, $pop6, $47 +; NO-SIMD128-NEXT: i32.store8 0($pop9), $pop7 +; NO-SIMD128-NEXT: i32.const $push13=, 13 +; NO-SIMD128-NEXT: i32.add $push14=, $0, $pop13 +; NO-SIMD128-NEXT: i32.xor $push10=, $30, $46 +; NO-SIMD128-NEXT: i32.and $push11=, $pop10, $14 +; NO-SIMD128-NEXT: i32.xor $push12=, $pop11, $46 +; NO-SIMD128-NEXT: i32.store8 0($pop14), $pop12 +; NO-SIMD128-NEXT: i32.const $push18=, 12 +; NO-SIMD128-NEXT: i32.add $push19=, $0, $pop18 +; NO-SIMD128-NEXT: i32.xor $push15=, $29, $45 +; NO-SIMD128-NEXT: i32.and $push16=, $pop15, $13 +; NO-SIMD128-NEXT: i32.xor $push17=, $pop16, $45 +; NO-SIMD128-NEXT: i32.store8 0($pop19), $pop17 +; NO-SIMD128-NEXT: i32.const $push23=, 11 +; NO-SIMD128-NEXT: i32.add $push24=, $0, $pop23 +; NO-SIMD128-NEXT: i32.xor $push20=, $28, $44 +; NO-SIMD128-NEXT: i32.and $push21=, $pop20, $12 +; NO-SIMD128-NEXT: i32.xor $push22=, $pop21, $44 +; NO-SIMD128-NEXT: i32.store8 0($pop24), $pop22 +; NO-SIMD128-NEXT: i32.const $push28=, 10 +; NO-SIMD128-NEXT: i32.add $push29=, $0, $pop28 +; NO-SIMD128-NEXT: i32.xor $push25=, $27, $43 +; NO-SIMD128-NEXT: i32.and $push26=, $pop25, $11 +; NO-SIMD128-NEXT: i32.xor $push27=, $pop26, $43 +; NO-SIMD128-NEXT: i32.store8 0($pop29), $pop27 +; NO-SIMD128-NEXT: i32.const $push33=, 9 +; NO-SIMD128-NEXT: i32.add $push34=, $0, $pop33 +; NO-SIMD128-NEXT: i32.xor $push30=, $26, $42 +; NO-SIMD128-NEXT: i32.and $push31=, $pop30, $10 +; NO-SIMD128-NEXT: i32.xor $push32=, $pop31, $42 +; NO-SIMD128-NEXT: i32.store8 0($pop34), $pop32 +; NO-SIMD128-NEXT: i32.xor $push35=, $25, $41 +; NO-SIMD128-NEXT: i32.and $push36=, $pop35, $9 +; NO-SIMD128-NEXT: i32.xor $push37=, $pop36, $41 +; NO-SIMD128-NEXT: i32.store8 8($0), $pop37 +; NO-SIMD128-NEXT: i32.const $push41=, 7 +; NO-SIMD128-NEXT: i32.add $push42=, $0, $pop41 +; NO-SIMD128-NEXT: i32.xor $push38=, $24, $40 +; NO-SIMD128-NEXT: i32.and $push39=, $pop38, $8 +; NO-SIMD128-NEXT: i32.xor $push40=, $pop39, $40 +; NO-SIMD128-NEXT: i32.store8 0($pop42), $pop40 +; NO-SIMD128-NEXT: i32.const $push46=, 6 +; NO-SIMD128-NEXT: i32.add $push47=, $0, $pop46 +; NO-SIMD128-NEXT: i32.xor $push43=, $23, $39 +; NO-SIMD128-NEXT: i32.and $push44=, $pop43, $7 +; NO-SIMD128-NEXT: i32.xor $push45=, $pop44, $39 +; NO-SIMD128-NEXT: i32.store8 0($pop47), $pop45 +; NO-SIMD128-NEXT: i32.const $push51=, 5 +; NO-SIMD128-NEXT: i32.add $push52=, $0, $pop51 +; NO-SIMD128-NEXT: i32.xor $push48=, $22, $38 +; NO-SIMD128-NEXT: i32.and $push49=, $pop48, $6 +; NO-SIMD128-NEXT: i32.xor $push50=, $pop49, $38 +; NO-SIMD128-NEXT: i32.store8 0($pop52), $pop50 +; NO-SIMD128-NEXT: i32.xor $push53=, $21, $37 +; NO-SIMD128-NEXT: i32.and $push54=, $pop53, $5 +; NO-SIMD128-NEXT: i32.xor $push55=, $pop54, $37 +; NO-SIMD128-NEXT: i32.store8 4($0), $pop55 +; NO-SIMD128-NEXT: i32.const $push59=, 3 +; NO-SIMD128-NEXT: i32.add $push60=, $0, $pop59 +; NO-SIMD128-NEXT: i32.xor $push56=, $20, $36 +; NO-SIMD128-NEXT: i32.and $push57=, $pop56, $4 +; NO-SIMD128-NEXT: i32.xor $push58=, $pop57, $36 +; NO-SIMD128-NEXT: i32.store8 0($pop60), $pop58 +; NO-SIMD128-NEXT: i32.xor $push61=, $19, $35 +; NO-SIMD128-NEXT: i32.and $push62=, $pop61, $3 +; NO-SIMD128-NEXT: i32.xor $push63=, $pop62, $35 +; NO-SIMD128-NEXT: i32.store8 2($0), $pop63 +; NO-SIMD128-NEXT: i32.xor $push64=, $18, $34 +; NO-SIMD128-NEXT: i32.and $push65=, $pop64, $2 +; NO-SIMD128-NEXT: i32.xor $push66=, $pop65, $34 +; NO-SIMD128-NEXT: i32.store8 1($0), $pop66 +; NO-SIMD128-NEXT: i32.xor $push67=, $17, $33 +; NO-SIMD128-NEXT: i32.and $push68=, $pop67, $1 +; NO-SIMD128-NEXT: i32.xor $push69=, $pop68, $33 +; NO-SIMD128-NEXT: i32.store8 0($0), $pop69 +; NO-SIMD128-NEXT: return +; +; NO-SIMD128-FAST-LABEL: bitselect_xor_v16i8: +; NO-SIMD128-FAST: .functype bitselect_xor_v16i8 (i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32) -> () +; NO-SIMD128-FAST-NEXT: # %bb.0: +; NO-SIMD128-FAST-NEXT: i32.xor $push0=, $17, $33 +; NO-SIMD128-FAST-NEXT: i32.and $push1=, $pop0, $1 +; NO-SIMD128-FAST-NEXT: i32.xor $push2=, $pop1, $33 +; NO-SIMD128-FAST-NEXT: i32.store8 0($0), $pop2 +; NO-SIMD128-FAST-NEXT: i32.xor $push3=, $18, $34 +; NO-SIMD128-FAST-NEXT: i32.and $push4=, $pop3, $2 +; NO-SIMD128-FAST-NEXT: i32.xor $push5=, $pop4, $34 +; NO-SIMD128-FAST-NEXT: i32.store8 1($0), $pop5 +; NO-SIMD128-FAST-NEXT: i32.xor $push6=, $19, $35 +; NO-SIMD128-FAST-NEXT: i32.and $push7=, $pop6, $3 +; NO-SIMD128-FAST-NEXT: i32.xor $push8=, $pop7, $35 +; NO-SIMD128-FAST-NEXT: i32.store8 2($0), $pop8 +; NO-SIMD128-FAST-NEXT: i32.const $push9=, 3 +; NO-SIMD128-FAST-NEXT: i32.add $push10=, $0, $pop9 +; NO-SIMD128-FAST-NEXT: i32.xor $push11=, $20, $36 +; NO-SIMD128-FAST-NEXT: i32.and $push12=, $pop11, $4 +; NO-SIMD128-FAST-NEXT: i32.xor $push13=, $pop12, $36 +; NO-SIMD128-FAST-NEXT: i32.store8 0($pop10), $pop13 +; NO-SIMD128-FAST-NEXT: i32.xor $push14=, $21, $37 +; NO-SIMD128-FAST-NEXT: i32.and $push15=, $pop14, $5 +; NO-SIMD128-FAST-NEXT: i32.xor $push16=, $pop15, $37 +; NO-SIMD128-FAST-NEXT: i32.store8 4($0), $pop16 +; NO-SIMD128-FAST-NEXT: i32.const $push17=, 5 +; NO-SIMD128-FAST-NEXT: i32.add $push18=, $0, $pop17 +; NO-SIMD128-FAST-NEXT: i32.xor $push19=, $22, $38 +; NO-SIMD128-FAST-NEXT: i32.and $push20=, $pop19, $6 +; NO-SIMD128-FAST-NEXT: i32.xor $push21=, $pop20, $38 +; NO-SIMD128-FAST-NEXT: i32.store8 0($pop18), $pop21 +; NO-SIMD128-FAST-NEXT: i32.const $push22=, 6 +; NO-SIMD128-FAST-NEXT: i32.add $push23=, $0, $pop22 +; NO-SIMD128-FAST-NEXT: i32.xor $push24=, $23, $39 +; NO-SIMD128-FAST-NEXT: i32.and $push25=, $pop24, $7 +; NO-SIMD128-FAST-NEXT: i32.xor $push26=, $pop25, $39 +; NO-SIMD128-FAST-NEXT: i32.store8 0($pop23), $pop26 +; NO-SIMD128-FAST-NEXT: i32.const $push27=, 7 +; NO-SIMD128-FAST-NEXT: i32.add $push28=, $0, $pop27 +; NO-SIMD128-FAST-NEXT: i32.xor $push29=, $24, $40 +; NO-SIMD128-FAST-NEXT: i32.and $push30=, $pop29, $8 +; NO-SIMD128-FAST-NEXT: i32.xor $push31=, $pop30, $40 +; NO-SIMD128-FAST-NEXT: i32.store8 0($pop28), $pop31 +; NO-SIMD128-FAST-NEXT: i32.xor $push32=, $25, $41 +; NO-SIMD128-FAST-NEXT: i32.and $push33=, $pop32, $9 +; NO-SIMD128-FAST-NEXT: i32.xor $push34=, $pop33, $41 +; NO-SIMD128-FAST-NEXT: i32.store8 8($0), $pop34 +; NO-SIMD128-FAST-NEXT: i32.const $push35=, 9 +; NO-SIMD128-FAST-NEXT: i32.add $push36=, $0, $pop35 +; NO-SIMD128-FAST-NEXT: i32.xor $push37=, $26, $42 +; NO-SIMD128-FAST-NEXT: i32.and $push38=, $pop37, $10 +; NO-SIMD128-FAST-NEXT: i32.xor $push39=, $pop38, $42 +; NO-SIMD128-FAST-NEXT: i32.store8 0($pop36), $pop39 +; NO-SIMD128-FAST-NEXT: i32.const $push40=, 10 +; NO-SIMD128-FAST-NEXT: i32.add $push41=, $0, $pop40 +; NO-SIMD128-FAST-NEXT: i32.xor $push42=, $27, $43 +; NO-SIMD128-FAST-NEXT: i32.and $push43=, $pop42, $11 +; NO-SIMD128-FAST-NEXT: i32.xor $push44=, $pop43, $43 +; NO-SIMD128-FAST-NEXT: i32.store8 0($pop41), $pop44 +; NO-SIMD128-FAST-NEXT: i32.const $push45=, 11 +; NO-SIMD128-FAST-NEXT: i32.add $push46=, $0, $pop45 +; NO-SIMD128-FAST-NEXT: i32.xor $push47=, $28, $44 +; NO-SIMD128-FAST-NEXT: i32.and $push48=, $pop47, $12 +; NO-SIMD128-FAST-NEXT: i32.xor $push49=, $pop48, $44 +; NO-SIMD128-FAST-NEXT: i32.store8 0($pop46), $pop49 +; NO-SIMD128-FAST-NEXT: i32.const $push50=, 12 +; NO-SIMD128-FAST-NEXT: i32.add $push51=, $0, $pop50 +; NO-SIMD128-FAST-NEXT: i32.xor $push52=, $29, $45 +; NO-SIMD128-FAST-NEXT: i32.and $push53=, $pop52, $13 +; NO-SIMD128-FAST-NEXT: i32.xor $push54=, $pop53, $45 +; NO-SIMD128-FAST-NEXT: i32.store8 0($pop51), $pop54 +; NO-SIMD128-FAST-NEXT: i32.const $push55=, 13 +; NO-SIMD128-FAST-NEXT: i32.add $push56=, $0, $pop55 +; NO-SIMD128-FAST-NEXT: i32.xor $push57=, $30, $46 +; NO-SIMD128-FAST-NEXT: i32.and $push58=, $pop57, $14 +; NO-SIMD128-FAST-NEXT: i32.xor $push59=, $pop58, $46 +; NO-SIMD128-FAST-NEXT: i32.store8 0($pop56), $pop59 +; NO-SIMD128-FAST-NEXT: i32.const $push60=, 14 +; NO-SIMD128-FAST-NEXT: i32.add $push61=, $0, $pop60 +; NO-SIMD128-FAST-NEXT: i32.xor $push62=, $31, $47 +; NO-SIMD128-FAST-NEXT: i32.and $push63=, $pop62, $15 +; NO-SIMD128-FAST-NEXT: i32.xor $push64=, $pop63, $47 +; NO-SIMD128-FAST-NEXT: i32.store8 0($pop61), $pop64 +; NO-SIMD128-FAST-NEXT: i32.const $push65=, 15 +; NO-SIMD128-FAST-NEXT: i32.add $push66=, $0, $pop65 +; NO-SIMD128-FAST-NEXT: i32.xor $push67=, $32, $48 +; NO-SIMD128-FAST-NEXT: i32.and $push68=, $pop67, $16 +; NO-SIMD128-FAST-NEXT: i32.xor $push69=, $pop68, $48 +; NO-SIMD128-FAST-NEXT: i32.store8 0($pop66), $pop69 +; NO-SIMD128-FAST-NEXT: return %xor1 = xor <16 x i8> %v1, %v2 %and = and <16 x i8> %xor1, %c %a = xor <16 x i8> %and, %v2 ret <16 x i8> %a } -; CHECK-LABEL: bitselect_xor_reversed_v16i8: -; NO-SIMD128-NOT: v128 -; SIMD128-NEXT: .functype bitselect_xor_reversed_v16i8 (v128, v128, v128) -> (v128){{$}} -; SIMD128-SLOW-NEXT: v128.bitselect $push[[R:[0-9]+]]=, $2, $1, $0{{$}} -; SIMD128-SLOW-NEXT: return $pop[[R]]{{$}} -; SIMD128-FAST-NEXT: v128.xor -; SIMD128-FAST-NEXT: v128.not -; SIMD128-FAST-NEXT: v128.and -; SIMD128-FAST-NEXT: v128.xor define <16 x i8> @bitselect_xor_reversed_v16i8(<16 x i8> %c, <16 x i8> %v1, <16 x i8> %v2) { +; SIMD128-LABEL: bitselect_xor_reversed_v16i8: +; SIMD128: .functype bitselect_xor_reversed_v16i8 (v128, v128, v128) -> (v128) +; SIMD128-NEXT: # %bb.0: +; SIMD128-NEXT: v128.bitselect $push0=, $2, $1, $0 +; SIMD128-NEXT: return $pop0 +; +; SIMD128-FAST-LABEL: bitselect_xor_reversed_v16i8: +; SIMD128-FAST: .functype bitselect_xor_reversed_v16i8 (v128, v128, v128) -> (v128) +; SIMD128-FAST-NEXT: # %bb.0: +; SIMD128-FAST-NEXT: v128.xor $push1=, $1, $2 +; SIMD128-FAST-NEXT: v128.not $push2=, $0 +; SIMD128-FAST-NEXT: v128.and $push3=, $pop1, $pop2 +; SIMD128-FAST-NEXT: v128.xor $push0=, $pop3, $2 +; SIMD128-FAST-NEXT: return $pop0 +; +; NO-SIMD128-LABEL: bitselect_xor_reversed_v16i8: +; NO-SIMD128: .functype bitselect_xor_reversed_v16i8 (i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32) -> () +; NO-SIMD128-NEXT: # %bb.0: +; NO-SIMD128-NEXT: i32.const $push5=, 15 +; NO-SIMD128-NEXT: i32.add $push6=, $0, $pop5 +; NO-SIMD128-NEXT: i32.xor $push2=, $32, $48 +; NO-SIMD128-NEXT: i32.const $push0=, -1 +; NO-SIMD128-NEXT: i32.xor $push1=, $16, $pop0 +; NO-SIMD128-NEXT: i32.and $push3=, $pop2, $pop1 +; NO-SIMD128-NEXT: i32.xor $push4=, $pop3, $48 +; NO-SIMD128-NEXT: i32.store8 0($pop6), $pop4 +; NO-SIMD128-NEXT: i32.const $push11=, 14 +; NO-SIMD128-NEXT: i32.add $push12=, $0, $pop11 +; NO-SIMD128-NEXT: i32.xor $push8=, $31, $47 +; NO-SIMD128-NEXT: i32.const $push101=, -1 +; NO-SIMD128-NEXT: i32.xor $push7=, $15, $pop101 +; NO-SIMD128-NEXT: i32.and $push9=, $pop8, $pop7 +; NO-SIMD128-NEXT: i32.xor $push10=, $pop9, $47 +; NO-SIMD128-NEXT: i32.store8 0($pop12), $pop10 +; NO-SIMD128-NEXT: i32.const $push17=, 13 +; NO-SIMD128-NEXT: i32.add $push18=, $0, $pop17 +; NO-SIMD128-NEXT: i32.xor $push14=, $30, $46 +; NO-SIMD128-NEXT: i32.const $push100=, -1 +; NO-SIMD128-NEXT: i32.xor $push13=, $14, $pop100 +; NO-SIMD128-NEXT: i32.and $push15=, $pop14, $pop13 +; NO-SIMD128-NEXT: i32.xor $push16=, $pop15, $46 +; NO-SIMD128-NEXT: i32.store8 0($pop18), $pop16 +; NO-SIMD128-NEXT: i32.const $push23=, 12 +; NO-SIMD128-NEXT: i32.add $push24=, $0, $pop23 +; NO-SIMD128-NEXT: i32.xor $push20=, $29, $45 +; NO-SIMD128-NEXT: i32.const $push99=, -1 +; NO-SIMD128-NEXT: i32.xor $push19=, $13, $pop99 +; NO-SIMD128-NEXT: i32.and $push21=, $pop20, $pop19 +; NO-SIMD128-NEXT: i32.xor $push22=, $pop21, $45 +; NO-SIMD128-NEXT: i32.store8 0($pop24), $pop22 +; NO-SIMD128-NEXT: i32.const $push29=, 11 +; NO-SIMD128-NEXT: i32.add $push30=, $0, $pop29 +; NO-SIMD128-NEXT: i32.xor $push26=, $28, $44 +; NO-SIMD128-NEXT: i32.const $push98=, -1 +; NO-SIMD128-NEXT: i32.xor $push25=, $12, $pop98 +; NO-SIMD128-NEXT: i32.and $push27=, $pop26, $pop25 +; NO-SIMD128-NEXT: i32.xor $push28=, $pop27, $44 +; NO-SIMD128-NEXT: i32.store8 0($pop30), $pop28 +; NO-SIMD128-NEXT: i32.const $push35=, 10 +; NO-SIMD128-NEXT: i32.add $push36=, $0, $pop35 +; NO-SIMD128-NEXT: i32.xor $push32=, $27, $43 +; NO-SIMD128-NEXT: i32.const $push97=, -1 +; NO-SIMD128-NEXT: i32.xor $push31=, $11, $pop97 +; NO-SIMD128-NEXT: i32.and $push33=, $pop32, $pop31 +; NO-SIMD128-NEXT: i32.xor $push34=, $pop33, $43 +; NO-SIMD128-NEXT: i32.store8 0($pop36), $pop34 +; NO-SIMD128-NEXT: i32.const $push41=, 9 +; NO-SIMD128-NEXT: i32.add $push42=, $0, $pop41 +; NO-SIMD128-NEXT: i32.xor $push38=, $26, $42 +; NO-SIMD128-NEXT: i32.const $push96=, -1 +; NO-SIMD128-NEXT: i32.xor $push37=, $10, $pop96 +; NO-SIMD128-NEXT: i32.and $push39=, $pop38, $pop37 +; NO-SIMD128-NEXT: i32.xor $push40=, $pop39, $42 +; NO-SIMD128-NEXT: i32.store8 0($pop42), $pop40 +; NO-SIMD128-NEXT: i32.xor $push44=, $25, $41 +; NO-SIMD128-NEXT: i32.const $push95=, -1 +; NO-SIMD128-NEXT: i32.xor $push43=, $9, $pop95 +; NO-SIMD128-NEXT: i32.and $push45=, $pop44, $pop43 +; NO-SIMD128-NEXT: i32.xor $push46=, $pop45, $41 +; NO-SIMD128-NEXT: i32.store8 8($0), $pop46 +; NO-SIMD128-NEXT: i32.const $push51=, 7 +; NO-SIMD128-NEXT: i32.add $push52=, $0, $pop51 +; NO-SIMD128-NEXT: i32.xor $push48=, $24, $40 +; NO-SIMD128-NEXT: i32.const $push94=, -1 +; NO-SIMD128-NEXT: i32.xor $push47=, $8, $pop94 +; NO-SIMD128-NEXT: i32.and $push49=, $pop48, $pop47 +; NO-SIMD128-NEXT: i32.xor $push50=, $pop49, $40 +; NO-SIMD128-NEXT: i32.store8 0($pop52), $pop50 +; NO-SIMD128-NEXT: i32.const $push57=, 6 +; NO-SIMD128-NEXT: i32.add $push58=, $0, $pop57 +; NO-SIMD128-NEXT: i32.xor $push54=, $23, $39 +; NO-SIMD128-NEXT: i32.const $push93=, -1 +; NO-SIMD128-NEXT: i32.xor $push53=, $7, $pop93 +; NO-SIMD128-NEXT: i32.and $push55=, $pop54, $pop53 +; NO-SIMD128-NEXT: i32.xor $push56=, $pop55, $39 +; NO-SIMD128-NEXT: i32.store8 0($pop58), $pop56 +; NO-SIMD128-NEXT: i32.const $push63=, 5 +; NO-SIMD128-NEXT: i32.add $push64=, $0, $pop63 +; NO-SIMD128-NEXT: i32.xor $push60=, $22, $38 +; NO-SIMD128-NEXT: i32.const $push92=, -1 +; NO-SIMD128-NEXT: i32.xor $push59=, $6, $pop92 +; NO-SIMD128-NEXT: i32.and $push61=, $pop60, $pop59 +; NO-SIMD128-NEXT: i32.xor $push62=, $pop61, $38 +; NO-SIMD128-NEXT: i32.store8 0($pop64), $pop62 +; NO-SIMD128-NEXT: i32.xor $push66=, $21, $37 +; NO-SIMD128-NEXT: i32.const $push91=, -1 +; NO-SIMD128-NEXT: i32.xor $push65=, $5, $pop91 +; NO-SIMD128-NEXT: i32.and $push67=, $pop66, $pop65 +; NO-SIMD128-NEXT: i32.xor $push68=, $pop67, $37 +; NO-SIMD128-NEXT: i32.store8 4($0), $pop68 +; NO-SIMD128-NEXT: i32.const $push73=, 3 +; NO-SIMD128-NEXT: i32.add $push74=, $0, $pop73 +; NO-SIMD128-NEXT: i32.xor $push70=, $20, $36 +; NO-SIMD128-NEXT: i32.const $push90=, -1 +; NO-SIMD128-NEXT: i32.xor $push69=, $4, $pop90 +; NO-SIMD128-NEXT: i32.and $push71=, $pop70, $pop69 +; NO-SIMD128-NEXT: i32.xor $push72=, $pop71, $36 +; NO-SIMD128-NEXT: i32.store8 0($pop74), $pop72 +; NO-SIMD128-NEXT: i32.xor $push76=, $19, $35 +; NO-SIMD128-NEXT: i32.const $push89=, -1 +; NO-SIMD128-NEXT: i32.xor $push75=, $3, $pop89 +; NO-SIMD128-NEXT: i32.and $push77=, $pop76, $pop75 +; NO-SIMD128-NEXT: i32.xor $push78=, $pop77, $35 +; NO-SIMD128-NEXT: i32.store8 2($0), $pop78 +; NO-SIMD128-NEXT: i32.xor $push80=, $18, $34 +; NO-SIMD128-NEXT: i32.const $push88=, -1 +; NO-SIMD128-NEXT: i32.xor $push79=, $2, $pop88 +; NO-SIMD128-NEXT: i32.and $push81=, $pop80, $pop79 +; NO-SIMD128-NEXT: i32.xor $push82=, $pop81, $34 +; NO-SIMD128-NEXT: i32.store8 1($0), $pop82 +; NO-SIMD128-NEXT: i32.xor $push84=, $17, $33 +; NO-SIMD128-NEXT: i32.const $push87=, -1 +; NO-SIMD128-NEXT: i32.xor $push83=, $1, $pop87 +; NO-SIMD128-NEXT: i32.and $push85=, $pop84, $pop83 +; NO-SIMD128-NEXT: i32.xor $push86=, $pop85, $33 +; NO-SIMD128-NEXT: i32.store8 0($0), $pop86 +; NO-SIMD128-NEXT: return +; +; NO-SIMD128-FAST-LABEL: bitselect_xor_reversed_v16i8: +; NO-SIMD128-FAST: .functype bitselect_xor_reversed_v16i8 (i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32) -> () +; NO-SIMD128-FAST-NEXT: # %bb.0: +; NO-SIMD128-FAST-NEXT: i32.xor $push2=, $17, $33 +; NO-SIMD128-FAST-NEXT: i32.const $push0=, -1 +; NO-SIMD128-FAST-NEXT: i32.xor $push1=, $1, $pop0 +; NO-SIMD128-FAST-NEXT: i32.and $push3=, $pop2, $pop1 +; NO-SIMD128-FAST-NEXT: i32.xor $push4=, $pop3, $33 +; NO-SIMD128-FAST-NEXT: i32.store8 0($0), $pop4 +; NO-SIMD128-FAST-NEXT: i32.xor $push6=, $18, $34 +; NO-SIMD128-FAST-NEXT: i32.const $push101=, -1 +; NO-SIMD128-FAST-NEXT: i32.xor $push5=, $2, $pop101 +; NO-SIMD128-FAST-NEXT: i32.and $push7=, $pop6, $pop5 +; NO-SIMD128-FAST-NEXT: i32.xor $push8=, $pop7, $34 +; NO-SIMD128-FAST-NEXT: i32.store8 1($0), $pop8 +; NO-SIMD128-FAST-NEXT: i32.xor $push10=, $19, $35 +; NO-SIMD128-FAST-NEXT: i32.const $push100=, -1 +; NO-SIMD128-FAST-NEXT: i32.xor $push9=, $3, $pop100 +; NO-SIMD128-FAST-NEXT: i32.and $push11=, $pop10, $pop9 +; NO-SIMD128-FAST-NEXT: i32.xor $push12=, $pop11, $35 +; NO-SIMD128-FAST-NEXT: i32.store8 2($0), $pop12 +; NO-SIMD128-FAST-NEXT: i32.const $push17=, 3 +; NO-SIMD128-FAST-NEXT: i32.add $push18=, $0, $pop17 +; NO-SIMD128-FAST-NEXT: i32.xor $push14=, $20, $36 +; NO-SIMD128-FAST-NEXT: i32.const $push99=, -1 +; NO-SIMD128-FAST-NEXT: i32.xor $push13=, $4, $pop99 +; NO-SIMD128-FAST-NEXT: i32.and $push15=, $pop14, $pop13 +; NO-SIMD128-FAST-NEXT: i32.xor $push16=, $pop15, $36 +; NO-SIMD128-FAST-NEXT: i32.store8 0($pop18), $pop16 +; NO-SIMD128-FAST-NEXT: i32.xor $push20=, $21, $37 +; NO-SIMD128-FAST-NEXT: i32.const $push98=, -1 +; NO-SIMD128-FAST-NEXT: i32.xor $push19=, $5, $pop98 +; NO-SIMD128-FAST-NEXT: i32.and $push21=, $pop20, $pop19 +; NO-SIMD128-FAST-NEXT: i32.xor $push22=, $pop21, $37 +; NO-SIMD128-FAST-NEXT: i32.store8 4($0), $pop22 +; NO-SIMD128-FAST-NEXT: i32.const $push27=, 5 +; NO-SIMD128-FAST-NEXT: i32.add $push28=, $0, $pop27 +; NO-SIMD128-FAST-NEXT: i32.xor $push24=, $22, $38 +; NO-SIMD128-FAST-NEXT: i32.const $push97=, -1 +; NO-SIMD128-FAST-NEXT: i32.xor $push23=, $6, $pop97 +; NO-SIMD128-FAST-NEXT: i32.and $push25=, $pop24, $pop23 +; NO-SIMD128-FAST-NEXT: i32.xor $push26=, $pop25, $38 +; NO-SIMD128-FAST-NEXT: i32.store8 0($pop28), $pop26 +; NO-SIMD128-FAST-NEXT: i32.const $push33=, 6 +; NO-SIMD128-FAST-NEXT: i32.add $push34=, $0, $pop33 +; NO-SIMD128-FAST-NEXT: i32.xor $push30=, $23, $39 +; NO-SIMD128-FAST-NEXT: i32.const $push96=, -1 +; NO-SIMD128-FAST-NEXT: i32.xor $push29=, $7, $pop96 +; NO-SIMD128-FAST-NEXT: i32.and $push31=, $pop30, $pop29 +; NO-SIMD128-FAST-NEXT: i32.xor $push32=, $pop31, $39 +; NO-SIMD128-FAST-NEXT: i32.store8 0($pop34), $pop32 +; NO-SIMD128-FAST-NEXT: i32.const $push39=, 7 +; NO-SIMD128-FAST-NEXT: i32.add $push40=, $0, $pop39 +; NO-SIMD128-FAST-NEXT: i32.xor $push36=, $24, $40 +; NO-SIMD128-FAST-NEXT: i32.const $push95=, -1 +; NO-SIMD128-FAST-NEXT: i32.xor $push35=, $8, $pop95 +; NO-SIMD128-FAST-NEXT: i32.and $push37=, $pop36, $pop35 +; NO-SIMD128-FAST-NEXT: i32.xor $push38=, $pop37, $40 +; NO-SIMD128-FAST-NEXT: i32.store8 0($pop40), $pop38 +; NO-SIMD128-FAST-NEXT: i32.xor $push42=, $25, $41 +; NO-SIMD128-FAST-NEXT: i32.const $push94=, -1 +; NO-SIMD128-FAST-NEXT: i32.xor $push41=, $9, $pop94 +; NO-SIMD128-FAST-NEXT: i32.and $push43=, $pop42, $pop41 +; NO-SIMD128-FAST-NEXT: i32.xor $push44=, $pop43, $41 +; NO-SIMD128-FAST-NEXT: i32.store8 8($0), $pop44 +; NO-SIMD128-FAST-NEXT: i32.const $push49=, 9 +; NO-SIMD128-FAST-NEXT: i32.add $push50=, $0, $pop49 +; NO-SIMD128-FAST-NEXT: i32.xor $push46=, $26, $42 +; NO-SIMD128-FAST-NEXT: i32.const $push93=, -1 +; NO-SIMD128-FAST-NEXT: i32.xor $push45=, $10, $pop93 +; NO-SIMD128-FAST-NEXT: i32.and $push47=, $pop46, $pop45 +; NO-SIMD128-FAST-NEXT: i32.xor $push48=, $pop47, $42 +; NO-SIMD128-FAST-NEXT: i32.store8 0($pop50), $pop48 +; NO-SIMD128-FAST-NEXT: i32.const $push55=, 10 +; NO-SIMD128-FAST-NEXT: i32.add $push56=, $0, $pop55 +; NO-SIMD128-FAST-NEXT: i32.xor $push52=, $27, $43 +; NO-SIMD128-FAST-NEXT: i32.const $push92=, -1 +; NO-SIMD128-FAST-NEXT: i32.xor $push51=, $11, $pop92 +; NO-SIMD128-FAST-NEXT: i32.and $push53=, $pop52, $pop51 +; NO-SIMD128-FAST-NEXT: i32.xor $push54=, $pop53, $43 +; NO-SIMD128-FAST-NEXT: i32.store8 0($pop56), $pop54 +; NO-SIMD128-FAST-NEXT: i32.const $push61=, 11 +; NO-SIMD128-FAST-NEXT: i32.add $push62=, $0, $pop61 +; NO-SIMD128-FAST-NEXT: i32.xor $push58=, $28, $44 +; NO-SIMD128-FAST-NEXT: i32.const $push91=, -1 +; NO-SIMD128-FAST-NEXT: i32.xor $push57=, $12, $pop91 +; NO-SIMD128-FAST-NEXT: i32.and $push59=, $pop58, $pop57 +; NO-SIMD128-FAST-NEXT: i32.xor $push60=, $pop59, $44 +; NO-SIMD128-FAST-NEXT: i32.store8 0($pop62), $pop60 +; NO-SIMD128-FAST-NEXT: i32.const $push67=, 12 +; NO-SIMD128-FAST-NEXT: i32.add $push68=, $0, $pop67 +; NO-SIMD128-FAST-NEXT: i32.xor $push64=, $29, $45 +; NO-SIMD128-FAST-NEXT: i32.const $push90=, -1 +; NO-SIMD128-FAST-NEXT: i32.xor $push63=, $13, $pop90 +; NO-SIMD128-FAST-NEXT: i32.and $push65=, $pop64, $pop63 +; NO-SIMD128-FAST-NEXT: i32.xor $push66=, $pop65, $45 +; NO-SIMD128-FAST-NEXT: i32.store8 0($pop68), $pop66 +; NO-SIMD128-FAST-NEXT: i32.const $push73=, 13 +; NO-SIMD128-FAST-NEXT: i32.add $push74=, $0, $pop73 +; NO-SIMD128-FAST-NEXT: i32.xor $push70=, $30, $46 +; NO-SIMD128-FAST-NEXT: i32.const $push89=, -1 +; NO-SIMD128-FAST-NEXT: i32.xor $push69=, $14, $pop89 +; NO-SIMD128-FAST-NEXT: i32.and $push71=, $pop70, $pop69 +; NO-SIMD128-FAST-NEXT: i32.xor $push72=, $pop71, $46 +; NO-SIMD128-FAST-NEXT: i32.store8 0($pop74), $pop72 +; NO-SIMD128-FAST-NEXT: i32.const $push79=, 14 +; NO-SIMD128-FAST-NEXT: i32.add $push80=, $0, $pop79 +; NO-SIMD128-FAST-NEXT: i32.xor $push76=, $31, $47 +; NO-SIMD128-FAST-NEXT: i32.const $push88=, -1 +; NO-SIMD128-FAST-NEXT: i32.xor $push75=, $15, $pop88 +; NO-SIMD128-FAST-NEXT: i32.and $push77=, $pop76, $pop75 +; NO-SIMD128-FAST-NEXT: i32.xor $push78=, $pop77, $47 +; NO-SIMD128-FAST-NEXT: i32.store8 0($pop80), $pop78 +; NO-SIMD128-FAST-NEXT: i32.const $push85=, 15 +; NO-SIMD128-FAST-NEXT: i32.add $push86=, $0, $pop85 +; NO-SIMD128-FAST-NEXT: i32.xor $push82=, $32, $48 +; NO-SIMD128-FAST-NEXT: i32.const $push87=, -1 +; NO-SIMD128-FAST-NEXT: i32.xor $push81=, $16, $pop87 +; NO-SIMD128-FAST-NEXT: i32.and $push83=, $pop82, $pop81 +; NO-SIMD128-FAST-NEXT: i32.xor $push84=, $pop83, $48 +; NO-SIMD128-FAST-NEXT: i32.store8 0($pop86), $pop84 +; NO-SIMD128-FAST-NEXT: return %xor1 = xor <16 x i8> %v1, %v2 %notc = xor <16 x i8> %c, @@ -379,132 +6202,1459 @@ define <16 x i8> @bitselect_xor_reversed_v16i8(<16 x i8> %c, <16 x i8> %v1, <16 ; ============================================================================== ; 8 x i16 ; ============================================================================== -; CHECK-LABEL: add_v8i16: -; NO-SIMD128-NOT: i16x8 -; SIMD128-NEXT: .functype add_v8i16 (v128, v128) -> (v128){{$}} -; SIMD128-NEXT: i16x8.add $push[[R:[0-9]+]]=, $0, $1{{$}} -; SIMD128-NEXT: return $pop[[R]]{{$}} define <8 x i16> @add_v8i16(<8 x i16> %x, <8 x i16> %y) { +; SIMD128-LABEL: add_v8i16: +; SIMD128: .functype add_v8i16 (v128, v128) -> (v128) +; SIMD128-NEXT: # %bb.0: +; SIMD128-NEXT: i16x8.add $push0=, $0, $1 +; SIMD128-NEXT: return $pop0 +; +; SIMD128-FAST-LABEL: add_v8i16: +; SIMD128-FAST: .functype add_v8i16 (v128, v128) -> (v128) +; SIMD128-FAST-NEXT: # %bb.0: +; SIMD128-FAST-NEXT: i16x8.add $push0=, $0, $1 +; SIMD128-FAST-NEXT: return $pop0 +; +; NO-SIMD128-LABEL: add_v8i16: +; NO-SIMD128: .functype add_v8i16 (i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32) -> () +; NO-SIMD128-NEXT: # %bb.0: +; NO-SIMD128-NEXT: i32.add $push0=, $5, $13 +; NO-SIMD128-NEXT: i32.store16 8($0), $pop0 +; NO-SIMD128-NEXT: i32.add $push1=, $3, $11 +; NO-SIMD128-NEXT: i32.store16 4($0), $pop1 +; NO-SIMD128-NEXT: i32.add $push2=, $2, $10 +; NO-SIMD128-NEXT: i32.store16 2($0), $pop2 +; NO-SIMD128-NEXT: i32.add $push3=, $1, $9 +; NO-SIMD128-NEXT: i32.store16 0($0), $pop3 +; NO-SIMD128-NEXT: i32.const $push5=, 14 +; NO-SIMD128-NEXT: i32.add $push6=, $0, $pop5 +; NO-SIMD128-NEXT: i32.add $push4=, $8, $16 +; NO-SIMD128-NEXT: i32.store16 0($pop6), $pop4 +; NO-SIMD128-NEXT: i32.const $push8=, 12 +; NO-SIMD128-NEXT: i32.add $push9=, $0, $pop8 +; NO-SIMD128-NEXT: i32.add $push7=, $7, $15 +; NO-SIMD128-NEXT: i32.store16 0($pop9), $pop7 +; NO-SIMD128-NEXT: i32.const $push11=, 10 +; NO-SIMD128-NEXT: i32.add $push12=, $0, $pop11 +; NO-SIMD128-NEXT: i32.add $push10=, $6, $14 +; NO-SIMD128-NEXT: i32.store16 0($pop12), $pop10 +; NO-SIMD128-NEXT: i32.const $push14=, 6 +; NO-SIMD128-NEXT: i32.add $push15=, $0, $pop14 +; NO-SIMD128-NEXT: i32.add $push13=, $4, $12 +; NO-SIMD128-NEXT: i32.store16 0($pop15), $pop13 +; NO-SIMD128-NEXT: return +; +; NO-SIMD128-FAST-LABEL: add_v8i16: +; NO-SIMD128-FAST: .functype add_v8i16 (i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32) -> () +; NO-SIMD128-FAST-NEXT: # %bb.0: +; NO-SIMD128-FAST-NEXT: i32.add $push0=, $1, $9 +; NO-SIMD128-FAST-NEXT: i32.store16 0($0), $pop0 +; NO-SIMD128-FAST-NEXT: i32.add $push1=, $2, $10 +; NO-SIMD128-FAST-NEXT: i32.store16 2($0), $pop1 +; NO-SIMD128-FAST-NEXT: i32.add $push2=, $3, $11 +; NO-SIMD128-FAST-NEXT: i32.store16 4($0), $pop2 +; NO-SIMD128-FAST-NEXT: i32.const $push3=, 6 +; NO-SIMD128-FAST-NEXT: i32.add $push4=, $0, $pop3 +; NO-SIMD128-FAST-NEXT: i32.add $push5=, $4, $12 +; NO-SIMD128-FAST-NEXT: i32.store16 0($pop4), $pop5 +; NO-SIMD128-FAST-NEXT: i32.add $push6=, $5, $13 +; NO-SIMD128-FAST-NEXT: i32.store16 8($0), $pop6 +; NO-SIMD128-FAST-NEXT: i32.const $push7=, 10 +; NO-SIMD128-FAST-NEXT: i32.add $push8=, $0, $pop7 +; NO-SIMD128-FAST-NEXT: i32.add $push9=, $6, $14 +; NO-SIMD128-FAST-NEXT: i32.store16 0($pop8), $pop9 +; NO-SIMD128-FAST-NEXT: i32.const $push10=, 12 +; NO-SIMD128-FAST-NEXT: i32.add $push11=, $0, $pop10 +; NO-SIMD128-FAST-NEXT: i32.add $push12=, $7, $15 +; NO-SIMD128-FAST-NEXT: i32.store16 0($pop11), $pop12 +; NO-SIMD128-FAST-NEXT: i32.const $push13=, 14 +; NO-SIMD128-FAST-NEXT: i32.add $push14=, $0, $pop13 +; NO-SIMD128-FAST-NEXT: i32.add $push15=, $8, $16 +; NO-SIMD128-FAST-NEXT: i32.store16 0($pop14), $pop15 +; NO-SIMD128-FAST-NEXT: return %a = add <8 x i16> %x, %y ret <8 x i16> %a } -; CHECK-LABEL: sub_v8i16: -; NO-SIMD128-NOT: i16x8 -; SIMD128-NEXT: .functype sub_v8i16 (v128, v128) -> (v128){{$}} -; SIMD128-NEXT: i16x8.sub $push[[R:[0-9]+]]=, $0, $1{{$}} -; SIMD128-NEXT: return $pop[[R]]{{$}} define <8 x i16> @sub_v8i16(<8 x i16> %x, <8 x i16> %y) { +; SIMD128-LABEL: sub_v8i16: +; SIMD128: .functype sub_v8i16 (v128, v128) -> (v128) +; SIMD128-NEXT: # %bb.0: +; SIMD128-NEXT: i16x8.sub $push0=, $0, $1 +; SIMD128-NEXT: return $pop0 +; +; SIMD128-FAST-LABEL: sub_v8i16: +; SIMD128-FAST: .functype sub_v8i16 (v128, v128) -> (v128) +; SIMD128-FAST-NEXT: # %bb.0: +; SIMD128-FAST-NEXT: i16x8.sub $push0=, $0, $1 +; SIMD128-FAST-NEXT: return $pop0 +; +; NO-SIMD128-LABEL: sub_v8i16: +; NO-SIMD128: .functype sub_v8i16 (i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32) -> () +; NO-SIMD128-NEXT: # %bb.0: +; NO-SIMD128-NEXT: i32.sub $push0=, $5, $13 +; NO-SIMD128-NEXT: i32.store16 8($0), $pop0 +; NO-SIMD128-NEXT: i32.sub $push1=, $3, $11 +; NO-SIMD128-NEXT: i32.store16 4($0), $pop1 +; NO-SIMD128-NEXT: i32.sub $push2=, $2, $10 +; NO-SIMD128-NEXT: i32.store16 2($0), $pop2 +; NO-SIMD128-NEXT: i32.sub $push3=, $1, $9 +; NO-SIMD128-NEXT: i32.store16 0($0), $pop3 +; NO-SIMD128-NEXT: i32.const $push5=, 14 +; NO-SIMD128-NEXT: i32.add $push6=, $0, $pop5 +; NO-SIMD128-NEXT: i32.sub $push4=, $8, $16 +; NO-SIMD128-NEXT: i32.store16 0($pop6), $pop4 +; NO-SIMD128-NEXT: i32.const $push8=, 12 +; NO-SIMD128-NEXT: i32.add $push9=, $0, $pop8 +; NO-SIMD128-NEXT: i32.sub $push7=, $7, $15 +; NO-SIMD128-NEXT: i32.store16 0($pop9), $pop7 +; NO-SIMD128-NEXT: i32.const $push11=, 10 +; NO-SIMD128-NEXT: i32.add $push12=, $0, $pop11 +; NO-SIMD128-NEXT: i32.sub $push10=, $6, $14 +; NO-SIMD128-NEXT: i32.store16 0($pop12), $pop10 +; NO-SIMD128-NEXT: i32.const $push14=, 6 +; NO-SIMD128-NEXT: i32.add $push15=, $0, $pop14 +; NO-SIMD128-NEXT: i32.sub $push13=, $4, $12 +; NO-SIMD128-NEXT: i32.store16 0($pop15), $pop13 +; NO-SIMD128-NEXT: return +; +; NO-SIMD128-FAST-LABEL: sub_v8i16: +; NO-SIMD128-FAST: .functype sub_v8i16 (i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32) -> () +; NO-SIMD128-FAST-NEXT: # %bb.0: +; NO-SIMD128-FAST-NEXT: i32.sub $push0=, $1, $9 +; NO-SIMD128-FAST-NEXT: i32.store16 0($0), $pop0 +; NO-SIMD128-FAST-NEXT: i32.sub $push1=, $2, $10 +; NO-SIMD128-FAST-NEXT: i32.store16 2($0), $pop1 +; NO-SIMD128-FAST-NEXT: i32.sub $push2=, $3, $11 +; NO-SIMD128-FAST-NEXT: i32.store16 4($0), $pop2 +; NO-SIMD128-FAST-NEXT: i32.const $push3=, 6 +; NO-SIMD128-FAST-NEXT: i32.add $push4=, $0, $pop3 +; NO-SIMD128-FAST-NEXT: i32.sub $push5=, $4, $12 +; NO-SIMD128-FAST-NEXT: i32.store16 0($pop4), $pop5 +; NO-SIMD128-FAST-NEXT: i32.sub $push6=, $5, $13 +; NO-SIMD128-FAST-NEXT: i32.store16 8($0), $pop6 +; NO-SIMD128-FAST-NEXT: i32.const $push7=, 10 +; NO-SIMD128-FAST-NEXT: i32.add $push8=, $0, $pop7 +; NO-SIMD128-FAST-NEXT: i32.sub $push9=, $6, $14 +; NO-SIMD128-FAST-NEXT: i32.store16 0($pop8), $pop9 +; NO-SIMD128-FAST-NEXT: i32.const $push10=, 12 +; NO-SIMD128-FAST-NEXT: i32.add $push11=, $0, $pop10 +; NO-SIMD128-FAST-NEXT: i32.sub $push12=, $7, $15 +; NO-SIMD128-FAST-NEXT: i32.store16 0($pop11), $pop12 +; NO-SIMD128-FAST-NEXT: i32.const $push13=, 14 +; NO-SIMD128-FAST-NEXT: i32.add $push14=, $0, $pop13 +; NO-SIMD128-FAST-NEXT: i32.sub $push15=, $8, $16 +; NO-SIMD128-FAST-NEXT: i32.store16 0($pop14), $pop15 +; NO-SIMD128-FAST-NEXT: return %a = sub <8 x i16> %x, %y ret <8 x i16> %a } -; CHECK-LABEL: mul_v8i16: -; NO-SIMD128-NOT: i16x8 -; SIMD128-NEXT: .functype mul_v8i16 (v128, v128) -> (v128){{$}} -; SIMD128-NEXT: i16x8.mul $push[[R:[0-9]+]]=, $0, $1{{$}} -; SIMD128-NEXT: return $pop[[R]]{{$}} define <8 x i16> @mul_v8i16(<8 x i16> %x, <8 x i16> %y) { +; SIMD128-LABEL: mul_v8i16: +; SIMD128: .functype mul_v8i16 (v128, v128) -> (v128) +; SIMD128-NEXT: # %bb.0: +; SIMD128-NEXT: i16x8.mul $push0=, $0, $1 +; SIMD128-NEXT: return $pop0 +; +; SIMD128-FAST-LABEL: mul_v8i16: +; SIMD128-FAST: .functype mul_v8i16 (v128, v128) -> (v128) +; SIMD128-FAST-NEXT: # %bb.0: +; SIMD128-FAST-NEXT: i16x8.mul $push0=, $0, $1 +; SIMD128-FAST-NEXT: return $pop0 +; +; NO-SIMD128-LABEL: mul_v8i16: +; NO-SIMD128: .functype mul_v8i16 (i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32) -> () +; NO-SIMD128-NEXT: # %bb.0: +; NO-SIMD128-NEXT: i32.mul $push0=, $5, $13 +; NO-SIMD128-NEXT: i32.store16 8($0), $pop0 +; NO-SIMD128-NEXT: i32.mul $push1=, $3, $11 +; NO-SIMD128-NEXT: i32.store16 4($0), $pop1 +; NO-SIMD128-NEXT: i32.mul $push2=, $2, $10 +; NO-SIMD128-NEXT: i32.store16 2($0), $pop2 +; NO-SIMD128-NEXT: i32.mul $push3=, $1, $9 +; NO-SIMD128-NEXT: i32.store16 0($0), $pop3 +; NO-SIMD128-NEXT: i32.const $push5=, 14 +; NO-SIMD128-NEXT: i32.add $push6=, $0, $pop5 +; NO-SIMD128-NEXT: i32.mul $push4=, $8, $16 +; NO-SIMD128-NEXT: i32.store16 0($pop6), $pop4 +; NO-SIMD128-NEXT: i32.const $push8=, 12 +; NO-SIMD128-NEXT: i32.add $push9=, $0, $pop8 +; NO-SIMD128-NEXT: i32.mul $push7=, $7, $15 +; NO-SIMD128-NEXT: i32.store16 0($pop9), $pop7 +; NO-SIMD128-NEXT: i32.const $push11=, 10 +; NO-SIMD128-NEXT: i32.add $push12=, $0, $pop11 +; NO-SIMD128-NEXT: i32.mul $push10=, $6, $14 +; NO-SIMD128-NEXT: i32.store16 0($pop12), $pop10 +; NO-SIMD128-NEXT: i32.const $push14=, 6 +; NO-SIMD128-NEXT: i32.add $push15=, $0, $pop14 +; NO-SIMD128-NEXT: i32.mul $push13=, $4, $12 +; NO-SIMD128-NEXT: i32.store16 0($pop15), $pop13 +; NO-SIMD128-NEXT: return +; +; NO-SIMD128-FAST-LABEL: mul_v8i16: +; NO-SIMD128-FAST: .functype mul_v8i16 (i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32) -> () +; NO-SIMD128-FAST-NEXT: # %bb.0: +; NO-SIMD128-FAST-NEXT: i32.mul $push0=, $1, $9 +; NO-SIMD128-FAST-NEXT: i32.store16 0($0), $pop0 +; NO-SIMD128-FAST-NEXT: i32.mul $push1=, $2, $10 +; NO-SIMD128-FAST-NEXT: i32.store16 2($0), $pop1 +; NO-SIMD128-FAST-NEXT: i32.mul $push2=, $3, $11 +; NO-SIMD128-FAST-NEXT: i32.store16 4($0), $pop2 +; NO-SIMD128-FAST-NEXT: i32.const $push3=, 6 +; NO-SIMD128-FAST-NEXT: i32.add $push4=, $0, $pop3 +; NO-SIMD128-FAST-NEXT: i32.mul $push5=, $4, $12 +; NO-SIMD128-FAST-NEXT: i32.store16 0($pop4), $pop5 +; NO-SIMD128-FAST-NEXT: i32.mul $push6=, $5, $13 +; NO-SIMD128-FAST-NEXT: i32.store16 8($0), $pop6 +; NO-SIMD128-FAST-NEXT: i32.const $push7=, 10 +; NO-SIMD128-FAST-NEXT: i32.add $push8=, $0, $pop7 +; NO-SIMD128-FAST-NEXT: i32.mul $push9=, $6, $14 +; NO-SIMD128-FAST-NEXT: i32.store16 0($pop8), $pop9 +; NO-SIMD128-FAST-NEXT: i32.const $push10=, 12 +; NO-SIMD128-FAST-NEXT: i32.add $push11=, $0, $pop10 +; NO-SIMD128-FAST-NEXT: i32.mul $push12=, $7, $15 +; NO-SIMD128-FAST-NEXT: i32.store16 0($pop11), $pop12 +; NO-SIMD128-FAST-NEXT: i32.const $push13=, 14 +; NO-SIMD128-FAST-NEXT: i32.add $push14=, $0, $pop13 +; NO-SIMD128-FAST-NEXT: i32.mul $push15=, $8, $16 +; NO-SIMD128-FAST-NEXT: i32.store16 0($pop14), $pop15 +; NO-SIMD128-FAST-NEXT: return %a = mul <8 x i16> %x, %y ret <8 x i16> %a } -; CHECK-LABEL: min_s_v8i16: -; NO-SIMD128-NOT: i16x8 -; SIMD128-NEXT: .functype min_s_v8i16 (v128, v128) -> (v128){{$}} -; SIMD128-NEXT: i16x8.min_s $push[[R:[0-9]+]]=, $0, $1{{$}} -; SIMD128-NEXT: return $pop[[R]]{{$}} define <8 x i16> @min_s_v8i16(<8 x i16> %x, <8 x i16> %y) { +; SIMD128-LABEL: min_s_v8i16: +; SIMD128: .functype min_s_v8i16 (v128, v128) -> (v128) +; SIMD128-NEXT: # %bb.0: +; SIMD128-NEXT: i16x8.min_s $push0=, $0, $1 +; SIMD128-NEXT: return $pop0 +; +; SIMD128-FAST-LABEL: min_s_v8i16: +; SIMD128-FAST: .functype min_s_v8i16 (v128, v128) -> (v128) +; SIMD128-FAST-NEXT: # %bb.0: +; SIMD128-FAST-NEXT: i16x8.min_s $push0=, $0, $1 +; SIMD128-FAST-NEXT: return $pop0 +; +; NO-SIMD128-LABEL: min_s_v8i16: +; NO-SIMD128: .functype min_s_v8i16 (i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32) -> () +; NO-SIMD128-NEXT: # %bb.0: +; NO-SIMD128-NEXT: i32.const $push4=, 14 +; NO-SIMD128-NEXT: i32.add $push5=, $0, $pop4 +; NO-SIMD128-NEXT: i32.extend16_s $push1=, $8 +; NO-SIMD128-NEXT: i32.extend16_s $push0=, $16 +; NO-SIMD128-NEXT: i32.lt_s $push2=, $pop1, $pop0 +; NO-SIMD128-NEXT: i32.select $push3=, $8, $16, $pop2 +; NO-SIMD128-NEXT: i32.store16 0($pop5), $pop3 +; NO-SIMD128-NEXT: i32.const $push10=, 12 +; NO-SIMD128-NEXT: i32.add $push11=, $0, $pop10 +; NO-SIMD128-NEXT: i32.extend16_s $push7=, $7 +; NO-SIMD128-NEXT: i32.extend16_s $push6=, $15 +; NO-SIMD128-NEXT: i32.lt_s $push8=, $pop7, $pop6 +; NO-SIMD128-NEXT: i32.select $push9=, $7, $15, $pop8 +; NO-SIMD128-NEXT: i32.store16 0($pop11), $pop9 +; NO-SIMD128-NEXT: i32.const $push16=, 10 +; NO-SIMD128-NEXT: i32.add $push17=, $0, $pop16 +; NO-SIMD128-NEXT: i32.extend16_s $push13=, $6 +; NO-SIMD128-NEXT: i32.extend16_s $push12=, $14 +; NO-SIMD128-NEXT: i32.lt_s $push14=, $pop13, $pop12 +; NO-SIMD128-NEXT: i32.select $push15=, $6, $14, $pop14 +; NO-SIMD128-NEXT: i32.store16 0($pop17), $pop15 +; NO-SIMD128-NEXT: i32.extend16_s $push19=, $5 +; NO-SIMD128-NEXT: i32.extend16_s $push18=, $13 +; NO-SIMD128-NEXT: i32.lt_s $push20=, $pop19, $pop18 +; NO-SIMD128-NEXT: i32.select $push21=, $5, $13, $pop20 +; NO-SIMD128-NEXT: i32.store16 8($0), $pop21 +; NO-SIMD128-NEXT: i32.const $push26=, 6 +; NO-SIMD128-NEXT: i32.add $push27=, $0, $pop26 +; NO-SIMD128-NEXT: i32.extend16_s $push23=, $4 +; NO-SIMD128-NEXT: i32.extend16_s $push22=, $12 +; NO-SIMD128-NEXT: i32.lt_s $push24=, $pop23, $pop22 +; NO-SIMD128-NEXT: i32.select $push25=, $4, $12, $pop24 +; NO-SIMD128-NEXT: i32.store16 0($pop27), $pop25 +; NO-SIMD128-NEXT: i32.extend16_s $push29=, $3 +; NO-SIMD128-NEXT: i32.extend16_s $push28=, $11 +; NO-SIMD128-NEXT: i32.lt_s $push30=, $pop29, $pop28 +; NO-SIMD128-NEXT: i32.select $push31=, $3, $11, $pop30 +; NO-SIMD128-NEXT: i32.store16 4($0), $pop31 +; NO-SIMD128-NEXT: i32.extend16_s $push33=, $2 +; NO-SIMD128-NEXT: i32.extend16_s $push32=, $10 +; NO-SIMD128-NEXT: i32.lt_s $push34=, $pop33, $pop32 +; NO-SIMD128-NEXT: i32.select $push35=, $2, $10, $pop34 +; NO-SIMD128-NEXT: i32.store16 2($0), $pop35 +; NO-SIMD128-NEXT: i32.extend16_s $push37=, $1 +; NO-SIMD128-NEXT: i32.extend16_s $push36=, $9 +; NO-SIMD128-NEXT: i32.lt_s $push38=, $pop37, $pop36 +; NO-SIMD128-NEXT: i32.select $push39=, $1, $9, $pop38 +; NO-SIMD128-NEXT: i32.store16 0($0), $pop39 +; NO-SIMD128-NEXT: return +; +; NO-SIMD128-FAST-LABEL: min_s_v8i16: +; NO-SIMD128-FAST: .functype min_s_v8i16 (i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32) -> () +; NO-SIMD128-FAST-NEXT: # %bb.0: +; NO-SIMD128-FAST-NEXT: i32.extend16_s $push1=, $1 +; NO-SIMD128-FAST-NEXT: i32.extend16_s $push0=, $9 +; NO-SIMD128-FAST-NEXT: i32.lt_s $push2=, $pop1, $pop0 +; NO-SIMD128-FAST-NEXT: i32.select $push3=, $1, $9, $pop2 +; NO-SIMD128-FAST-NEXT: i32.store16 0($0), $pop3 +; NO-SIMD128-FAST-NEXT: i32.extend16_s $push5=, $2 +; NO-SIMD128-FAST-NEXT: i32.extend16_s $push4=, $10 +; NO-SIMD128-FAST-NEXT: i32.lt_s $push6=, $pop5, $pop4 +; NO-SIMD128-FAST-NEXT: i32.select $push7=, $2, $10, $pop6 +; NO-SIMD128-FAST-NEXT: i32.store16 2($0), $pop7 +; NO-SIMD128-FAST-NEXT: i32.extend16_s $push9=, $3 +; NO-SIMD128-FAST-NEXT: i32.extend16_s $push8=, $11 +; NO-SIMD128-FAST-NEXT: i32.lt_s $push10=, $pop9, $pop8 +; NO-SIMD128-FAST-NEXT: i32.select $push11=, $3, $11, $pop10 +; NO-SIMD128-FAST-NEXT: i32.store16 4($0), $pop11 +; NO-SIMD128-FAST-NEXT: i32.const $push16=, 6 +; NO-SIMD128-FAST-NEXT: i32.add $push17=, $0, $pop16 +; NO-SIMD128-FAST-NEXT: i32.extend16_s $push13=, $4 +; NO-SIMD128-FAST-NEXT: i32.extend16_s $push12=, $12 +; NO-SIMD128-FAST-NEXT: i32.lt_s $push14=, $pop13, $pop12 +; NO-SIMD128-FAST-NEXT: i32.select $push15=, $4, $12, $pop14 +; NO-SIMD128-FAST-NEXT: i32.store16 0($pop17), $pop15 +; NO-SIMD128-FAST-NEXT: i32.extend16_s $push19=, $5 +; NO-SIMD128-FAST-NEXT: i32.extend16_s $push18=, $13 +; NO-SIMD128-FAST-NEXT: i32.lt_s $push20=, $pop19, $pop18 +; NO-SIMD128-FAST-NEXT: i32.select $push21=, $5, $13, $pop20 +; NO-SIMD128-FAST-NEXT: i32.store16 8($0), $pop21 +; NO-SIMD128-FAST-NEXT: i32.const $push26=, 10 +; NO-SIMD128-FAST-NEXT: i32.add $push27=, $0, $pop26 +; NO-SIMD128-FAST-NEXT: i32.extend16_s $push23=, $6 +; NO-SIMD128-FAST-NEXT: i32.extend16_s $push22=, $14 +; NO-SIMD128-FAST-NEXT: i32.lt_s $push24=, $pop23, $pop22 +; NO-SIMD128-FAST-NEXT: i32.select $push25=, $6, $14, $pop24 +; NO-SIMD128-FAST-NEXT: i32.store16 0($pop27), $pop25 +; NO-SIMD128-FAST-NEXT: i32.const $push32=, 12 +; NO-SIMD128-FAST-NEXT: i32.add $push33=, $0, $pop32 +; NO-SIMD128-FAST-NEXT: i32.extend16_s $push29=, $7 +; NO-SIMD128-FAST-NEXT: i32.extend16_s $push28=, $15 +; NO-SIMD128-FAST-NEXT: i32.lt_s $push30=, $pop29, $pop28 +; NO-SIMD128-FAST-NEXT: i32.select $push31=, $7, $15, $pop30 +; NO-SIMD128-FAST-NEXT: i32.store16 0($pop33), $pop31 +; NO-SIMD128-FAST-NEXT: i32.const $push38=, 14 +; NO-SIMD128-FAST-NEXT: i32.add $push39=, $0, $pop38 +; NO-SIMD128-FAST-NEXT: i32.extend16_s $push35=, $8 +; NO-SIMD128-FAST-NEXT: i32.extend16_s $push34=, $16 +; NO-SIMD128-FAST-NEXT: i32.lt_s $push36=, $pop35, $pop34 +; NO-SIMD128-FAST-NEXT: i32.select $push37=, $8, $16, $pop36 +; NO-SIMD128-FAST-NEXT: i32.store16 0($pop39), $pop37 +; NO-SIMD128-FAST-NEXT: return %c = icmp slt <8 x i16> %x, %y %a = select <8 x i1> %c, <8 x i16> %x, <8 x i16> %y ret <8 x i16> %a } -; CHECK-LABEL: min_u_v8i16: -; NO-SIMD128-NOT: i16x8 -; SIMD128-NEXT: .functype min_u_v8i16 (v128, v128) -> (v128){{$}} -; SIMD128-NEXT: i16x8.min_u $push[[R:[0-9]+]]=, $0, $1{{$}} -; SIMD128-NEXT: return $pop[[R]]{{$}} define <8 x i16> @min_u_v8i16(<8 x i16> %x, <8 x i16> %y) { +; SIMD128-LABEL: min_u_v8i16: +; SIMD128: .functype min_u_v8i16 (v128, v128) -> (v128) +; SIMD128-NEXT: # %bb.0: +; SIMD128-NEXT: i16x8.min_u $push0=, $0, $1 +; SIMD128-NEXT: return $pop0 +; +; SIMD128-FAST-LABEL: min_u_v8i16: +; SIMD128-FAST: .functype min_u_v8i16 (v128, v128) -> (v128) +; SIMD128-FAST-NEXT: # %bb.0: +; SIMD128-FAST-NEXT: i16x8.min_u $push0=, $0, $1 +; SIMD128-FAST-NEXT: return $pop0 +; +; NO-SIMD128-LABEL: min_u_v8i16: +; NO-SIMD128: .functype min_u_v8i16 (i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32) -> () +; NO-SIMD128-NEXT: # %bb.0: +; NO-SIMD128-NEXT: i32.const $push5=, 14 +; NO-SIMD128-NEXT: i32.add $push6=, $0, $pop5 +; NO-SIMD128-NEXT: i32.const $push0=, 65535 +; NO-SIMD128-NEXT: i32.and $push2=, $8, $pop0 +; NO-SIMD128-NEXT: i32.const $push55=, 65535 +; NO-SIMD128-NEXT: i32.and $push1=, $16, $pop55 +; NO-SIMD128-NEXT: i32.lt_u $push3=, $pop2, $pop1 +; NO-SIMD128-NEXT: i32.select $push4=, $8, $16, $pop3 +; NO-SIMD128-NEXT: i32.store16 0($pop6), $pop4 +; NO-SIMD128-NEXT: i32.const $push11=, 12 +; NO-SIMD128-NEXT: i32.add $push12=, $0, $pop11 +; NO-SIMD128-NEXT: i32.const $push54=, 65535 +; NO-SIMD128-NEXT: i32.and $push8=, $7, $pop54 +; NO-SIMD128-NEXT: i32.const $push53=, 65535 +; NO-SIMD128-NEXT: i32.and $push7=, $15, $pop53 +; NO-SIMD128-NEXT: i32.lt_u $push9=, $pop8, $pop7 +; NO-SIMD128-NEXT: i32.select $push10=, $7, $15, $pop9 +; NO-SIMD128-NEXT: i32.store16 0($pop12), $pop10 +; NO-SIMD128-NEXT: i32.const $push17=, 10 +; NO-SIMD128-NEXT: i32.add $push18=, $0, $pop17 +; NO-SIMD128-NEXT: i32.const $push52=, 65535 +; NO-SIMD128-NEXT: i32.and $push14=, $6, $pop52 +; NO-SIMD128-NEXT: i32.const $push51=, 65535 +; NO-SIMD128-NEXT: i32.and $push13=, $14, $pop51 +; NO-SIMD128-NEXT: i32.lt_u $push15=, $pop14, $pop13 +; NO-SIMD128-NEXT: i32.select $push16=, $6, $14, $pop15 +; NO-SIMD128-NEXT: i32.store16 0($pop18), $pop16 +; NO-SIMD128-NEXT: i32.const $push50=, 65535 +; NO-SIMD128-NEXT: i32.and $push20=, $5, $pop50 +; NO-SIMD128-NEXT: i32.const $push49=, 65535 +; NO-SIMD128-NEXT: i32.and $push19=, $13, $pop49 +; NO-SIMD128-NEXT: i32.lt_u $push21=, $pop20, $pop19 +; NO-SIMD128-NEXT: i32.select $push22=, $5, $13, $pop21 +; NO-SIMD128-NEXT: i32.store16 8($0), $pop22 +; NO-SIMD128-NEXT: i32.const $push27=, 6 +; NO-SIMD128-NEXT: i32.add $push28=, $0, $pop27 +; NO-SIMD128-NEXT: i32.const $push48=, 65535 +; NO-SIMD128-NEXT: i32.and $push24=, $4, $pop48 +; NO-SIMD128-NEXT: i32.const $push47=, 65535 +; NO-SIMD128-NEXT: i32.and $push23=, $12, $pop47 +; NO-SIMD128-NEXT: i32.lt_u $push25=, $pop24, $pop23 +; NO-SIMD128-NEXT: i32.select $push26=, $4, $12, $pop25 +; NO-SIMD128-NEXT: i32.store16 0($pop28), $pop26 +; NO-SIMD128-NEXT: i32.const $push46=, 65535 +; NO-SIMD128-NEXT: i32.and $push30=, $3, $pop46 +; NO-SIMD128-NEXT: i32.const $push45=, 65535 +; NO-SIMD128-NEXT: i32.and $push29=, $11, $pop45 +; NO-SIMD128-NEXT: i32.lt_u $push31=, $pop30, $pop29 +; NO-SIMD128-NEXT: i32.select $push32=, $3, $11, $pop31 +; NO-SIMD128-NEXT: i32.store16 4($0), $pop32 +; NO-SIMD128-NEXT: i32.const $push44=, 65535 +; NO-SIMD128-NEXT: i32.and $push34=, $2, $pop44 +; NO-SIMD128-NEXT: i32.const $push43=, 65535 +; NO-SIMD128-NEXT: i32.and $push33=, $10, $pop43 +; NO-SIMD128-NEXT: i32.lt_u $push35=, $pop34, $pop33 +; NO-SIMD128-NEXT: i32.select $push36=, $2, $10, $pop35 +; NO-SIMD128-NEXT: i32.store16 2($0), $pop36 +; NO-SIMD128-NEXT: i32.const $push42=, 65535 +; NO-SIMD128-NEXT: i32.and $push38=, $1, $pop42 +; NO-SIMD128-NEXT: i32.const $push41=, 65535 +; NO-SIMD128-NEXT: i32.and $push37=, $9, $pop41 +; NO-SIMD128-NEXT: i32.lt_u $push39=, $pop38, $pop37 +; NO-SIMD128-NEXT: i32.select $push40=, $1, $9, $pop39 +; NO-SIMD128-NEXT: i32.store16 0($0), $pop40 +; NO-SIMD128-NEXT: return +; +; NO-SIMD128-FAST-LABEL: min_u_v8i16: +; NO-SIMD128-FAST: .functype min_u_v8i16 (i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32) -> () +; NO-SIMD128-FAST-NEXT: # %bb.0: +; NO-SIMD128-FAST-NEXT: i32.const $push0=, 65535 +; NO-SIMD128-FAST-NEXT: i32.and $push2=, $1, $pop0 +; NO-SIMD128-FAST-NEXT: i32.const $push55=, 65535 +; NO-SIMD128-FAST-NEXT: i32.and $push1=, $9, $pop55 +; NO-SIMD128-FAST-NEXT: i32.lt_u $push3=, $pop2, $pop1 +; NO-SIMD128-FAST-NEXT: i32.select $push4=, $1, $9, $pop3 +; NO-SIMD128-FAST-NEXT: i32.store16 0($0), $pop4 +; NO-SIMD128-FAST-NEXT: i32.const $push54=, 65535 +; NO-SIMD128-FAST-NEXT: i32.and $push6=, $2, $pop54 +; NO-SIMD128-FAST-NEXT: i32.const $push53=, 65535 +; NO-SIMD128-FAST-NEXT: i32.and $push5=, $10, $pop53 +; NO-SIMD128-FAST-NEXT: i32.lt_u $push7=, $pop6, $pop5 +; NO-SIMD128-FAST-NEXT: i32.select $push8=, $2, $10, $pop7 +; NO-SIMD128-FAST-NEXT: i32.store16 2($0), $pop8 +; NO-SIMD128-FAST-NEXT: i32.const $push52=, 65535 +; NO-SIMD128-FAST-NEXT: i32.and $push10=, $3, $pop52 +; NO-SIMD128-FAST-NEXT: i32.const $push51=, 65535 +; NO-SIMD128-FAST-NEXT: i32.and $push9=, $11, $pop51 +; NO-SIMD128-FAST-NEXT: i32.lt_u $push11=, $pop10, $pop9 +; NO-SIMD128-FAST-NEXT: i32.select $push12=, $3, $11, $pop11 +; NO-SIMD128-FAST-NEXT: i32.store16 4($0), $pop12 +; NO-SIMD128-FAST-NEXT: i32.const $push17=, 6 +; NO-SIMD128-FAST-NEXT: i32.add $push18=, $0, $pop17 +; NO-SIMD128-FAST-NEXT: i32.const $push50=, 65535 +; NO-SIMD128-FAST-NEXT: i32.and $push14=, $4, $pop50 +; NO-SIMD128-FAST-NEXT: i32.const $push49=, 65535 +; NO-SIMD128-FAST-NEXT: i32.and $push13=, $12, $pop49 +; NO-SIMD128-FAST-NEXT: i32.lt_u $push15=, $pop14, $pop13 +; NO-SIMD128-FAST-NEXT: i32.select $push16=, $4, $12, $pop15 +; NO-SIMD128-FAST-NEXT: i32.store16 0($pop18), $pop16 +; NO-SIMD128-FAST-NEXT: i32.const $push48=, 65535 +; NO-SIMD128-FAST-NEXT: i32.and $push20=, $5, $pop48 +; NO-SIMD128-FAST-NEXT: i32.const $push47=, 65535 +; NO-SIMD128-FAST-NEXT: i32.and $push19=, $13, $pop47 +; NO-SIMD128-FAST-NEXT: i32.lt_u $push21=, $pop20, $pop19 +; NO-SIMD128-FAST-NEXT: i32.select $push22=, $5, $13, $pop21 +; NO-SIMD128-FAST-NEXT: i32.store16 8($0), $pop22 +; NO-SIMD128-FAST-NEXT: i32.const $push27=, 10 +; NO-SIMD128-FAST-NEXT: i32.add $push28=, $0, $pop27 +; NO-SIMD128-FAST-NEXT: i32.const $push46=, 65535 +; NO-SIMD128-FAST-NEXT: i32.and $push24=, $6, $pop46 +; NO-SIMD128-FAST-NEXT: i32.const $push45=, 65535 +; NO-SIMD128-FAST-NEXT: i32.and $push23=, $14, $pop45 +; NO-SIMD128-FAST-NEXT: i32.lt_u $push25=, $pop24, $pop23 +; NO-SIMD128-FAST-NEXT: i32.select $push26=, $6, $14, $pop25 +; NO-SIMD128-FAST-NEXT: i32.store16 0($pop28), $pop26 +; NO-SIMD128-FAST-NEXT: i32.const $push33=, 12 +; NO-SIMD128-FAST-NEXT: i32.add $push34=, $0, $pop33 +; NO-SIMD128-FAST-NEXT: i32.const $push44=, 65535 +; NO-SIMD128-FAST-NEXT: i32.and $push30=, $7, $pop44 +; NO-SIMD128-FAST-NEXT: i32.const $push43=, 65535 +; NO-SIMD128-FAST-NEXT: i32.and $push29=, $15, $pop43 +; NO-SIMD128-FAST-NEXT: i32.lt_u $push31=, $pop30, $pop29 +; NO-SIMD128-FAST-NEXT: i32.select $push32=, $7, $15, $pop31 +; NO-SIMD128-FAST-NEXT: i32.store16 0($pop34), $pop32 +; NO-SIMD128-FAST-NEXT: i32.const $push39=, 14 +; NO-SIMD128-FAST-NEXT: i32.add $push40=, $0, $pop39 +; NO-SIMD128-FAST-NEXT: i32.const $push42=, 65535 +; NO-SIMD128-FAST-NEXT: i32.and $push36=, $8, $pop42 +; NO-SIMD128-FAST-NEXT: i32.const $push41=, 65535 +; NO-SIMD128-FAST-NEXT: i32.and $push35=, $16, $pop41 +; NO-SIMD128-FAST-NEXT: i32.lt_u $push37=, $pop36, $pop35 +; NO-SIMD128-FAST-NEXT: i32.select $push38=, $8, $16, $pop37 +; NO-SIMD128-FAST-NEXT: i32.store16 0($pop40), $pop38 +; NO-SIMD128-FAST-NEXT: return %c = icmp ult <8 x i16> %x, %y %a = select <8 x i1> %c, <8 x i16> %x, <8 x i16> %y ret <8 x i16> %a } -; CHECK-LABEL: max_s_v8i16: -; NO-SIMD128-NOT: i16x8 -; SIMD128-NEXT: .functype max_s_v8i16 (v128, v128) -> (v128){{$}} -; SIMD128-NEXT: i16x8.max_s $push[[R:[0-9]+]]=, $0, $1{{$}} -; SIMD128-NEXT: return $pop[[R]]{{$}} define <8 x i16> @max_s_v8i16(<8 x i16> %x, <8 x i16> %y) { +; SIMD128-LABEL: max_s_v8i16: +; SIMD128: .functype max_s_v8i16 (v128, v128) -> (v128) +; SIMD128-NEXT: # %bb.0: +; SIMD128-NEXT: i16x8.max_s $push0=, $0, $1 +; SIMD128-NEXT: return $pop0 +; +; SIMD128-FAST-LABEL: max_s_v8i16: +; SIMD128-FAST: .functype max_s_v8i16 (v128, v128) -> (v128) +; SIMD128-FAST-NEXT: # %bb.0: +; SIMD128-FAST-NEXT: i16x8.max_s $push0=, $0, $1 +; SIMD128-FAST-NEXT: return $pop0 +; +; NO-SIMD128-LABEL: max_s_v8i16: +; NO-SIMD128: .functype max_s_v8i16 (i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32) -> () +; NO-SIMD128-NEXT: # %bb.0: +; NO-SIMD128-NEXT: i32.const $push4=, 14 +; NO-SIMD128-NEXT: i32.add $push5=, $0, $pop4 +; NO-SIMD128-NEXT: i32.extend16_s $push1=, $8 +; NO-SIMD128-NEXT: i32.extend16_s $push0=, $16 +; NO-SIMD128-NEXT: i32.gt_s $push2=, $pop1, $pop0 +; NO-SIMD128-NEXT: i32.select $push3=, $8, $16, $pop2 +; NO-SIMD128-NEXT: i32.store16 0($pop5), $pop3 +; NO-SIMD128-NEXT: i32.const $push10=, 12 +; NO-SIMD128-NEXT: i32.add $push11=, $0, $pop10 +; NO-SIMD128-NEXT: i32.extend16_s $push7=, $7 +; NO-SIMD128-NEXT: i32.extend16_s $push6=, $15 +; NO-SIMD128-NEXT: i32.gt_s $push8=, $pop7, $pop6 +; NO-SIMD128-NEXT: i32.select $push9=, $7, $15, $pop8 +; NO-SIMD128-NEXT: i32.store16 0($pop11), $pop9 +; NO-SIMD128-NEXT: i32.const $push16=, 10 +; NO-SIMD128-NEXT: i32.add $push17=, $0, $pop16 +; NO-SIMD128-NEXT: i32.extend16_s $push13=, $6 +; NO-SIMD128-NEXT: i32.extend16_s $push12=, $14 +; NO-SIMD128-NEXT: i32.gt_s $push14=, $pop13, $pop12 +; NO-SIMD128-NEXT: i32.select $push15=, $6, $14, $pop14 +; NO-SIMD128-NEXT: i32.store16 0($pop17), $pop15 +; NO-SIMD128-NEXT: i32.extend16_s $push19=, $5 +; NO-SIMD128-NEXT: i32.extend16_s $push18=, $13 +; NO-SIMD128-NEXT: i32.gt_s $push20=, $pop19, $pop18 +; NO-SIMD128-NEXT: i32.select $push21=, $5, $13, $pop20 +; NO-SIMD128-NEXT: i32.store16 8($0), $pop21 +; NO-SIMD128-NEXT: i32.const $push26=, 6 +; NO-SIMD128-NEXT: i32.add $push27=, $0, $pop26 +; NO-SIMD128-NEXT: i32.extend16_s $push23=, $4 +; NO-SIMD128-NEXT: i32.extend16_s $push22=, $12 +; NO-SIMD128-NEXT: i32.gt_s $push24=, $pop23, $pop22 +; NO-SIMD128-NEXT: i32.select $push25=, $4, $12, $pop24 +; NO-SIMD128-NEXT: i32.store16 0($pop27), $pop25 +; NO-SIMD128-NEXT: i32.extend16_s $push29=, $3 +; NO-SIMD128-NEXT: i32.extend16_s $push28=, $11 +; NO-SIMD128-NEXT: i32.gt_s $push30=, $pop29, $pop28 +; NO-SIMD128-NEXT: i32.select $push31=, $3, $11, $pop30 +; NO-SIMD128-NEXT: i32.store16 4($0), $pop31 +; NO-SIMD128-NEXT: i32.extend16_s $push33=, $2 +; NO-SIMD128-NEXT: i32.extend16_s $push32=, $10 +; NO-SIMD128-NEXT: i32.gt_s $push34=, $pop33, $pop32 +; NO-SIMD128-NEXT: i32.select $push35=, $2, $10, $pop34 +; NO-SIMD128-NEXT: i32.store16 2($0), $pop35 +; NO-SIMD128-NEXT: i32.extend16_s $push37=, $1 +; NO-SIMD128-NEXT: i32.extend16_s $push36=, $9 +; NO-SIMD128-NEXT: i32.gt_s $push38=, $pop37, $pop36 +; NO-SIMD128-NEXT: i32.select $push39=, $1, $9, $pop38 +; NO-SIMD128-NEXT: i32.store16 0($0), $pop39 +; NO-SIMD128-NEXT: return +; +; NO-SIMD128-FAST-LABEL: max_s_v8i16: +; NO-SIMD128-FAST: .functype max_s_v8i16 (i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32) -> () +; NO-SIMD128-FAST-NEXT: # %bb.0: +; NO-SIMD128-FAST-NEXT: i32.extend16_s $push1=, $1 +; NO-SIMD128-FAST-NEXT: i32.extend16_s $push0=, $9 +; NO-SIMD128-FAST-NEXT: i32.gt_s $push2=, $pop1, $pop0 +; NO-SIMD128-FAST-NEXT: i32.select $push3=, $1, $9, $pop2 +; NO-SIMD128-FAST-NEXT: i32.store16 0($0), $pop3 +; NO-SIMD128-FAST-NEXT: i32.extend16_s $push5=, $2 +; NO-SIMD128-FAST-NEXT: i32.extend16_s $push4=, $10 +; NO-SIMD128-FAST-NEXT: i32.gt_s $push6=, $pop5, $pop4 +; NO-SIMD128-FAST-NEXT: i32.select $push7=, $2, $10, $pop6 +; NO-SIMD128-FAST-NEXT: i32.store16 2($0), $pop7 +; NO-SIMD128-FAST-NEXT: i32.extend16_s $push9=, $3 +; NO-SIMD128-FAST-NEXT: i32.extend16_s $push8=, $11 +; NO-SIMD128-FAST-NEXT: i32.gt_s $push10=, $pop9, $pop8 +; NO-SIMD128-FAST-NEXT: i32.select $push11=, $3, $11, $pop10 +; NO-SIMD128-FAST-NEXT: i32.store16 4($0), $pop11 +; NO-SIMD128-FAST-NEXT: i32.const $push16=, 6 +; NO-SIMD128-FAST-NEXT: i32.add $push17=, $0, $pop16 +; NO-SIMD128-FAST-NEXT: i32.extend16_s $push13=, $4 +; NO-SIMD128-FAST-NEXT: i32.extend16_s $push12=, $12 +; NO-SIMD128-FAST-NEXT: i32.gt_s $push14=, $pop13, $pop12 +; NO-SIMD128-FAST-NEXT: i32.select $push15=, $4, $12, $pop14 +; NO-SIMD128-FAST-NEXT: i32.store16 0($pop17), $pop15 +; NO-SIMD128-FAST-NEXT: i32.extend16_s $push19=, $5 +; NO-SIMD128-FAST-NEXT: i32.extend16_s $push18=, $13 +; NO-SIMD128-FAST-NEXT: i32.gt_s $push20=, $pop19, $pop18 +; NO-SIMD128-FAST-NEXT: i32.select $push21=, $5, $13, $pop20 +; NO-SIMD128-FAST-NEXT: i32.store16 8($0), $pop21 +; NO-SIMD128-FAST-NEXT: i32.const $push26=, 10 +; NO-SIMD128-FAST-NEXT: i32.add $push27=, $0, $pop26 +; NO-SIMD128-FAST-NEXT: i32.extend16_s $push23=, $6 +; NO-SIMD128-FAST-NEXT: i32.extend16_s $push22=, $14 +; NO-SIMD128-FAST-NEXT: i32.gt_s $push24=, $pop23, $pop22 +; NO-SIMD128-FAST-NEXT: i32.select $push25=, $6, $14, $pop24 +; NO-SIMD128-FAST-NEXT: i32.store16 0($pop27), $pop25 +; NO-SIMD128-FAST-NEXT: i32.const $push32=, 12 +; NO-SIMD128-FAST-NEXT: i32.add $push33=, $0, $pop32 +; NO-SIMD128-FAST-NEXT: i32.extend16_s $push29=, $7 +; NO-SIMD128-FAST-NEXT: i32.extend16_s $push28=, $15 +; NO-SIMD128-FAST-NEXT: i32.gt_s $push30=, $pop29, $pop28 +; NO-SIMD128-FAST-NEXT: i32.select $push31=, $7, $15, $pop30 +; NO-SIMD128-FAST-NEXT: i32.store16 0($pop33), $pop31 +; NO-SIMD128-FAST-NEXT: i32.const $push38=, 14 +; NO-SIMD128-FAST-NEXT: i32.add $push39=, $0, $pop38 +; NO-SIMD128-FAST-NEXT: i32.extend16_s $push35=, $8 +; NO-SIMD128-FAST-NEXT: i32.extend16_s $push34=, $16 +; NO-SIMD128-FAST-NEXT: i32.gt_s $push36=, $pop35, $pop34 +; NO-SIMD128-FAST-NEXT: i32.select $push37=, $8, $16, $pop36 +; NO-SIMD128-FAST-NEXT: i32.store16 0($pop39), $pop37 +; NO-SIMD128-FAST-NEXT: return %c = icmp sgt <8 x i16> %x, %y %a = select <8 x i1> %c, <8 x i16> %x, <8 x i16> %y ret <8 x i16> %a } -; CHECK-LABEL: max_u_v8i16: -; NO-SIMD128-NOT: i16x8 -; SIMD128-NEXT: .functype max_u_v8i16 (v128, v128) -> (v128){{$}} -; SIMD128-NEXT: i16x8.max_u $push[[R:[0-9]+]]=, $0, $1{{$}} -; SIMD128-NEXT: return $pop[[R]]{{$}} define <8 x i16> @max_u_v8i16(<8 x i16> %x, <8 x i16> %y) { +; SIMD128-LABEL: max_u_v8i16: +; SIMD128: .functype max_u_v8i16 (v128, v128) -> (v128) +; SIMD128-NEXT: # %bb.0: +; SIMD128-NEXT: i16x8.max_u $push0=, $0, $1 +; SIMD128-NEXT: return $pop0 +; +; SIMD128-FAST-LABEL: max_u_v8i16: +; SIMD128-FAST: .functype max_u_v8i16 (v128, v128) -> (v128) +; SIMD128-FAST-NEXT: # %bb.0: +; SIMD128-FAST-NEXT: i16x8.max_u $push0=, $0, $1 +; SIMD128-FAST-NEXT: return $pop0 +; +; NO-SIMD128-LABEL: max_u_v8i16: +; NO-SIMD128: .functype max_u_v8i16 (i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32) -> () +; NO-SIMD128-NEXT: # %bb.0: +; NO-SIMD128-NEXT: i32.const $push5=, 14 +; NO-SIMD128-NEXT: i32.add $push6=, $0, $pop5 +; NO-SIMD128-NEXT: i32.const $push0=, 65535 +; NO-SIMD128-NEXT: i32.and $push2=, $8, $pop0 +; NO-SIMD128-NEXT: i32.const $push55=, 65535 +; NO-SIMD128-NEXT: i32.and $push1=, $16, $pop55 +; NO-SIMD128-NEXT: i32.gt_u $push3=, $pop2, $pop1 +; NO-SIMD128-NEXT: i32.select $push4=, $8, $16, $pop3 +; NO-SIMD128-NEXT: i32.store16 0($pop6), $pop4 +; NO-SIMD128-NEXT: i32.const $push11=, 12 +; NO-SIMD128-NEXT: i32.add $push12=, $0, $pop11 +; NO-SIMD128-NEXT: i32.const $push54=, 65535 +; NO-SIMD128-NEXT: i32.and $push8=, $7, $pop54 +; NO-SIMD128-NEXT: i32.const $push53=, 65535 +; NO-SIMD128-NEXT: i32.and $push7=, $15, $pop53 +; NO-SIMD128-NEXT: i32.gt_u $push9=, $pop8, $pop7 +; NO-SIMD128-NEXT: i32.select $push10=, $7, $15, $pop9 +; NO-SIMD128-NEXT: i32.store16 0($pop12), $pop10 +; NO-SIMD128-NEXT: i32.const $push17=, 10 +; NO-SIMD128-NEXT: i32.add $push18=, $0, $pop17 +; NO-SIMD128-NEXT: i32.const $push52=, 65535 +; NO-SIMD128-NEXT: i32.and $push14=, $6, $pop52 +; NO-SIMD128-NEXT: i32.const $push51=, 65535 +; NO-SIMD128-NEXT: i32.and $push13=, $14, $pop51 +; NO-SIMD128-NEXT: i32.gt_u $push15=, $pop14, $pop13 +; NO-SIMD128-NEXT: i32.select $push16=, $6, $14, $pop15 +; NO-SIMD128-NEXT: i32.store16 0($pop18), $pop16 +; NO-SIMD128-NEXT: i32.const $push50=, 65535 +; NO-SIMD128-NEXT: i32.and $push20=, $5, $pop50 +; NO-SIMD128-NEXT: i32.const $push49=, 65535 +; NO-SIMD128-NEXT: i32.and $push19=, $13, $pop49 +; NO-SIMD128-NEXT: i32.gt_u $push21=, $pop20, $pop19 +; NO-SIMD128-NEXT: i32.select $push22=, $5, $13, $pop21 +; NO-SIMD128-NEXT: i32.store16 8($0), $pop22 +; NO-SIMD128-NEXT: i32.const $push27=, 6 +; NO-SIMD128-NEXT: i32.add $push28=, $0, $pop27 +; NO-SIMD128-NEXT: i32.const $push48=, 65535 +; NO-SIMD128-NEXT: i32.and $push24=, $4, $pop48 +; NO-SIMD128-NEXT: i32.const $push47=, 65535 +; NO-SIMD128-NEXT: i32.and $push23=, $12, $pop47 +; NO-SIMD128-NEXT: i32.gt_u $push25=, $pop24, $pop23 +; NO-SIMD128-NEXT: i32.select $push26=, $4, $12, $pop25 +; NO-SIMD128-NEXT: i32.store16 0($pop28), $pop26 +; NO-SIMD128-NEXT: i32.const $push46=, 65535 +; NO-SIMD128-NEXT: i32.and $push30=, $3, $pop46 +; NO-SIMD128-NEXT: i32.const $push45=, 65535 +; NO-SIMD128-NEXT: i32.and $push29=, $11, $pop45 +; NO-SIMD128-NEXT: i32.gt_u $push31=, $pop30, $pop29 +; NO-SIMD128-NEXT: i32.select $push32=, $3, $11, $pop31 +; NO-SIMD128-NEXT: i32.store16 4($0), $pop32 +; NO-SIMD128-NEXT: i32.const $push44=, 65535 +; NO-SIMD128-NEXT: i32.and $push34=, $2, $pop44 +; NO-SIMD128-NEXT: i32.const $push43=, 65535 +; NO-SIMD128-NEXT: i32.and $push33=, $10, $pop43 +; NO-SIMD128-NEXT: i32.gt_u $push35=, $pop34, $pop33 +; NO-SIMD128-NEXT: i32.select $push36=, $2, $10, $pop35 +; NO-SIMD128-NEXT: i32.store16 2($0), $pop36 +; NO-SIMD128-NEXT: i32.const $push42=, 65535 +; NO-SIMD128-NEXT: i32.and $push38=, $1, $pop42 +; NO-SIMD128-NEXT: i32.const $push41=, 65535 +; NO-SIMD128-NEXT: i32.and $push37=, $9, $pop41 +; NO-SIMD128-NEXT: i32.gt_u $push39=, $pop38, $pop37 +; NO-SIMD128-NEXT: i32.select $push40=, $1, $9, $pop39 +; NO-SIMD128-NEXT: i32.store16 0($0), $pop40 +; NO-SIMD128-NEXT: return +; +; NO-SIMD128-FAST-LABEL: max_u_v8i16: +; NO-SIMD128-FAST: .functype max_u_v8i16 (i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32) -> () +; NO-SIMD128-FAST-NEXT: # %bb.0: +; NO-SIMD128-FAST-NEXT: i32.const $push0=, 65535 +; NO-SIMD128-FAST-NEXT: i32.and $push2=, $1, $pop0 +; NO-SIMD128-FAST-NEXT: i32.const $push55=, 65535 +; NO-SIMD128-FAST-NEXT: i32.and $push1=, $9, $pop55 +; NO-SIMD128-FAST-NEXT: i32.gt_u $push3=, $pop2, $pop1 +; NO-SIMD128-FAST-NEXT: i32.select $push4=, $1, $9, $pop3 +; NO-SIMD128-FAST-NEXT: i32.store16 0($0), $pop4 +; NO-SIMD128-FAST-NEXT: i32.const $push54=, 65535 +; NO-SIMD128-FAST-NEXT: i32.and $push6=, $2, $pop54 +; NO-SIMD128-FAST-NEXT: i32.const $push53=, 65535 +; NO-SIMD128-FAST-NEXT: i32.and $push5=, $10, $pop53 +; NO-SIMD128-FAST-NEXT: i32.gt_u $push7=, $pop6, $pop5 +; NO-SIMD128-FAST-NEXT: i32.select $push8=, $2, $10, $pop7 +; NO-SIMD128-FAST-NEXT: i32.store16 2($0), $pop8 +; NO-SIMD128-FAST-NEXT: i32.const $push52=, 65535 +; NO-SIMD128-FAST-NEXT: i32.and $push10=, $3, $pop52 +; NO-SIMD128-FAST-NEXT: i32.const $push51=, 65535 +; NO-SIMD128-FAST-NEXT: i32.and $push9=, $11, $pop51 +; NO-SIMD128-FAST-NEXT: i32.gt_u $push11=, $pop10, $pop9 +; NO-SIMD128-FAST-NEXT: i32.select $push12=, $3, $11, $pop11 +; NO-SIMD128-FAST-NEXT: i32.store16 4($0), $pop12 +; NO-SIMD128-FAST-NEXT: i32.const $push17=, 6 +; NO-SIMD128-FAST-NEXT: i32.add $push18=, $0, $pop17 +; NO-SIMD128-FAST-NEXT: i32.const $push50=, 65535 +; NO-SIMD128-FAST-NEXT: i32.and $push14=, $4, $pop50 +; NO-SIMD128-FAST-NEXT: i32.const $push49=, 65535 +; NO-SIMD128-FAST-NEXT: i32.and $push13=, $12, $pop49 +; NO-SIMD128-FAST-NEXT: i32.gt_u $push15=, $pop14, $pop13 +; NO-SIMD128-FAST-NEXT: i32.select $push16=, $4, $12, $pop15 +; NO-SIMD128-FAST-NEXT: i32.store16 0($pop18), $pop16 +; NO-SIMD128-FAST-NEXT: i32.const $push48=, 65535 +; NO-SIMD128-FAST-NEXT: i32.and $push20=, $5, $pop48 +; NO-SIMD128-FAST-NEXT: i32.const $push47=, 65535 +; NO-SIMD128-FAST-NEXT: i32.and $push19=, $13, $pop47 +; NO-SIMD128-FAST-NEXT: i32.gt_u $push21=, $pop20, $pop19 +; NO-SIMD128-FAST-NEXT: i32.select $push22=, $5, $13, $pop21 +; NO-SIMD128-FAST-NEXT: i32.store16 8($0), $pop22 +; NO-SIMD128-FAST-NEXT: i32.const $push27=, 10 +; NO-SIMD128-FAST-NEXT: i32.add $push28=, $0, $pop27 +; NO-SIMD128-FAST-NEXT: i32.const $push46=, 65535 +; NO-SIMD128-FAST-NEXT: i32.and $push24=, $6, $pop46 +; NO-SIMD128-FAST-NEXT: i32.const $push45=, 65535 +; NO-SIMD128-FAST-NEXT: i32.and $push23=, $14, $pop45 +; NO-SIMD128-FAST-NEXT: i32.gt_u $push25=, $pop24, $pop23 +; NO-SIMD128-FAST-NEXT: i32.select $push26=, $6, $14, $pop25 +; NO-SIMD128-FAST-NEXT: i32.store16 0($pop28), $pop26 +; NO-SIMD128-FAST-NEXT: i32.const $push33=, 12 +; NO-SIMD128-FAST-NEXT: i32.add $push34=, $0, $pop33 +; NO-SIMD128-FAST-NEXT: i32.const $push44=, 65535 +; NO-SIMD128-FAST-NEXT: i32.and $push30=, $7, $pop44 +; NO-SIMD128-FAST-NEXT: i32.const $push43=, 65535 +; NO-SIMD128-FAST-NEXT: i32.and $push29=, $15, $pop43 +; NO-SIMD128-FAST-NEXT: i32.gt_u $push31=, $pop30, $pop29 +; NO-SIMD128-FAST-NEXT: i32.select $push32=, $7, $15, $pop31 +; NO-SIMD128-FAST-NEXT: i32.store16 0($pop34), $pop32 +; NO-SIMD128-FAST-NEXT: i32.const $push39=, 14 +; NO-SIMD128-FAST-NEXT: i32.add $push40=, $0, $pop39 +; NO-SIMD128-FAST-NEXT: i32.const $push42=, 65535 +; NO-SIMD128-FAST-NEXT: i32.and $push36=, $8, $pop42 +; NO-SIMD128-FAST-NEXT: i32.const $push41=, 65535 +; NO-SIMD128-FAST-NEXT: i32.and $push35=, $16, $pop41 +; NO-SIMD128-FAST-NEXT: i32.gt_u $push37=, $pop36, $pop35 +; NO-SIMD128-FAST-NEXT: i32.select $push38=, $8, $16, $pop37 +; NO-SIMD128-FAST-NEXT: i32.store16 0($pop40), $pop38 +; NO-SIMD128-FAST-NEXT: return %c = icmp ugt <8 x i16> %x, %y %a = select <8 x i1> %c, <8 x i16> %x, <8 x i16> %y ret <8 x i16> %a } -; CHECK-LABEL: avgr_u_v8i16: -; NO-SIMD128-NOT: i16x8 -; SIMD128-NEXT: .functype avgr_u_v8i16 (v128, v128) -> (v128){{$}} -; SIMD128-NEXT: i16x8.avgr_u $push[[R:[0-9]+]]=, $0, $1{{$}} -; SIMD128-NEXT: return $pop[[R]]{{$}} define <8 x i16> @avgr_u_v8i16(<8 x i16> %x, <8 x i16> %y) { +; SIMD128-LABEL: avgr_u_v8i16: +; SIMD128: .functype avgr_u_v8i16 (v128, v128) -> (v128) +; SIMD128-NEXT: # %bb.0: +; SIMD128-NEXT: i16x8.avgr_u $push0=, $0, $1 +; SIMD128-NEXT: return $pop0 +; +; SIMD128-FAST-LABEL: avgr_u_v8i16: +; SIMD128-FAST: .functype avgr_u_v8i16 (v128, v128) -> (v128) +; SIMD128-FAST-NEXT: # %bb.0: +; SIMD128-FAST-NEXT: i16x8.avgr_u $push0=, $0, $1 +; SIMD128-FAST-NEXT: return $pop0 +; +; NO-SIMD128-LABEL: avgr_u_v8i16: +; NO-SIMD128: .functype avgr_u_v8i16 (i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32) -> () +; NO-SIMD128-NEXT: # %bb.0: +; NO-SIMD128-NEXT: i32.const $push0=, 14 +; NO-SIMD128-NEXT: i32.add $push1=, $0, $pop0 +; NO-SIMD128-NEXT: i32.add $push2=, $8, $16 +; NO-SIMD128-NEXT: i32.const $push3=, 1 +; NO-SIMD128-NEXT: i32.add $push4=, $pop2, $pop3 +; NO-SIMD128-NEXT: i32.const $push5=, 65534 +; NO-SIMD128-NEXT: i32.and $push6=, $pop4, $pop5 +; NO-SIMD128-NEXT: i32.const $push63=, 1 +; NO-SIMD128-NEXT: i32.shr_u $push7=, $pop6, $pop63 +; NO-SIMD128-NEXT: i32.store16 0($pop1), $pop7 +; NO-SIMD128-NEXT: i32.const $push8=, 12 +; NO-SIMD128-NEXT: i32.add $push9=, $0, $pop8 +; NO-SIMD128-NEXT: i32.add $push10=, $7, $15 +; NO-SIMD128-NEXT: i32.const $push62=, 1 +; NO-SIMD128-NEXT: i32.add $push11=, $pop10, $pop62 +; NO-SIMD128-NEXT: i32.const $push61=, 65534 +; NO-SIMD128-NEXT: i32.and $push12=, $pop11, $pop61 +; NO-SIMD128-NEXT: i32.const $push60=, 1 +; NO-SIMD128-NEXT: i32.shr_u $push13=, $pop12, $pop60 +; NO-SIMD128-NEXT: i32.store16 0($pop9), $pop13 +; NO-SIMD128-NEXT: i32.const $push14=, 10 +; NO-SIMD128-NEXT: i32.add $push15=, $0, $pop14 +; NO-SIMD128-NEXT: i32.add $push16=, $6, $14 +; NO-SIMD128-NEXT: i32.const $push59=, 1 +; NO-SIMD128-NEXT: i32.add $push17=, $pop16, $pop59 +; NO-SIMD128-NEXT: i32.const $push58=, 65534 +; NO-SIMD128-NEXT: i32.and $push18=, $pop17, $pop58 +; NO-SIMD128-NEXT: i32.const $push57=, 1 +; NO-SIMD128-NEXT: i32.shr_u $push19=, $pop18, $pop57 +; NO-SIMD128-NEXT: i32.store16 0($pop15), $pop19 +; NO-SIMD128-NEXT: i32.add $push20=, $5, $13 +; NO-SIMD128-NEXT: i32.const $push56=, 1 +; NO-SIMD128-NEXT: i32.add $push21=, $pop20, $pop56 +; NO-SIMD128-NEXT: i32.const $push55=, 65534 +; NO-SIMD128-NEXT: i32.and $push22=, $pop21, $pop55 +; NO-SIMD128-NEXT: i32.const $push54=, 1 +; NO-SIMD128-NEXT: i32.shr_u $push23=, $pop22, $pop54 +; NO-SIMD128-NEXT: i32.store16 8($0), $pop23 +; NO-SIMD128-NEXT: i32.const $push24=, 6 +; NO-SIMD128-NEXT: i32.add $push25=, $0, $pop24 +; NO-SIMD128-NEXT: i32.add $push26=, $4, $12 +; NO-SIMD128-NEXT: i32.const $push53=, 1 +; NO-SIMD128-NEXT: i32.add $push27=, $pop26, $pop53 +; NO-SIMD128-NEXT: i32.const $push52=, 65534 +; NO-SIMD128-NEXT: i32.and $push28=, $pop27, $pop52 +; NO-SIMD128-NEXT: i32.const $push51=, 1 +; NO-SIMD128-NEXT: i32.shr_u $push29=, $pop28, $pop51 +; NO-SIMD128-NEXT: i32.store16 0($pop25), $pop29 +; NO-SIMD128-NEXT: i32.add $push30=, $3, $11 +; NO-SIMD128-NEXT: i32.const $push50=, 1 +; NO-SIMD128-NEXT: i32.add $push31=, $pop30, $pop50 +; NO-SIMD128-NEXT: i32.const $push49=, 65534 +; NO-SIMD128-NEXT: i32.and $push32=, $pop31, $pop49 +; NO-SIMD128-NEXT: i32.const $push48=, 1 +; NO-SIMD128-NEXT: i32.shr_u $push33=, $pop32, $pop48 +; NO-SIMD128-NEXT: i32.store16 4($0), $pop33 +; NO-SIMD128-NEXT: i32.add $push34=, $2, $10 +; NO-SIMD128-NEXT: i32.const $push47=, 1 +; NO-SIMD128-NEXT: i32.add $push35=, $pop34, $pop47 +; NO-SIMD128-NEXT: i32.const $push46=, 65534 +; NO-SIMD128-NEXT: i32.and $push36=, $pop35, $pop46 +; NO-SIMD128-NEXT: i32.const $push45=, 1 +; NO-SIMD128-NEXT: i32.shr_u $push37=, $pop36, $pop45 +; NO-SIMD128-NEXT: i32.store16 2($0), $pop37 +; NO-SIMD128-NEXT: i32.add $push38=, $1, $9 +; NO-SIMD128-NEXT: i32.const $push44=, 1 +; NO-SIMD128-NEXT: i32.add $push39=, $pop38, $pop44 +; NO-SIMD128-NEXT: i32.const $push43=, 65534 +; NO-SIMD128-NEXT: i32.and $push40=, $pop39, $pop43 +; NO-SIMD128-NEXT: i32.const $push42=, 1 +; NO-SIMD128-NEXT: i32.shr_u $push41=, $pop40, $pop42 +; NO-SIMD128-NEXT: i32.store16 0($0), $pop41 +; NO-SIMD128-NEXT: return +; +; NO-SIMD128-FAST-LABEL: avgr_u_v8i16: +; NO-SIMD128-FAST: .functype avgr_u_v8i16 (i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32) -> () +; NO-SIMD128-FAST-NEXT: # %bb.0: +; NO-SIMD128-FAST-NEXT: i32.add $push0=, $1, $9 +; NO-SIMD128-FAST-NEXT: i32.const $push1=, 1 +; NO-SIMD128-FAST-NEXT: i32.add $push2=, $pop0, $pop1 +; NO-SIMD128-FAST-NEXT: i32.const $push3=, 65534 +; NO-SIMD128-FAST-NEXT: i32.and $push4=, $pop2, $pop3 +; NO-SIMD128-FAST-NEXT: i32.const $push63=, 1 +; NO-SIMD128-FAST-NEXT: i32.shr_u $push5=, $pop4, $pop63 +; NO-SIMD128-FAST-NEXT: i32.store16 0($0), $pop5 +; NO-SIMD128-FAST-NEXT: i32.add $push6=, $2, $10 +; NO-SIMD128-FAST-NEXT: i32.const $push62=, 1 +; NO-SIMD128-FAST-NEXT: i32.add $push7=, $pop6, $pop62 +; NO-SIMD128-FAST-NEXT: i32.const $push61=, 65534 +; NO-SIMD128-FAST-NEXT: i32.and $push8=, $pop7, $pop61 +; NO-SIMD128-FAST-NEXT: i32.const $push60=, 1 +; NO-SIMD128-FAST-NEXT: i32.shr_u $push9=, $pop8, $pop60 +; NO-SIMD128-FAST-NEXT: i32.store16 2($0), $pop9 +; NO-SIMD128-FAST-NEXT: i32.add $push10=, $3, $11 +; NO-SIMD128-FAST-NEXT: i32.const $push59=, 1 +; NO-SIMD128-FAST-NEXT: i32.add $push11=, $pop10, $pop59 +; NO-SIMD128-FAST-NEXT: i32.const $push58=, 65534 +; NO-SIMD128-FAST-NEXT: i32.and $push12=, $pop11, $pop58 +; NO-SIMD128-FAST-NEXT: i32.const $push57=, 1 +; NO-SIMD128-FAST-NEXT: i32.shr_u $push13=, $pop12, $pop57 +; NO-SIMD128-FAST-NEXT: i32.store16 4($0), $pop13 +; NO-SIMD128-FAST-NEXT: i32.const $push14=, 6 +; NO-SIMD128-FAST-NEXT: i32.add $push15=, $0, $pop14 +; NO-SIMD128-FAST-NEXT: i32.add $push16=, $4, $12 +; NO-SIMD128-FAST-NEXT: i32.const $push56=, 1 +; NO-SIMD128-FAST-NEXT: i32.add $push17=, $pop16, $pop56 +; NO-SIMD128-FAST-NEXT: i32.const $push55=, 65534 +; NO-SIMD128-FAST-NEXT: i32.and $push18=, $pop17, $pop55 +; NO-SIMD128-FAST-NEXT: i32.const $push54=, 1 +; NO-SIMD128-FAST-NEXT: i32.shr_u $push19=, $pop18, $pop54 +; NO-SIMD128-FAST-NEXT: i32.store16 0($pop15), $pop19 +; NO-SIMD128-FAST-NEXT: i32.add $push20=, $5, $13 +; NO-SIMD128-FAST-NEXT: i32.const $push53=, 1 +; NO-SIMD128-FAST-NEXT: i32.add $push21=, $pop20, $pop53 +; NO-SIMD128-FAST-NEXT: i32.const $push52=, 65534 +; NO-SIMD128-FAST-NEXT: i32.and $push22=, $pop21, $pop52 +; NO-SIMD128-FAST-NEXT: i32.const $push51=, 1 +; NO-SIMD128-FAST-NEXT: i32.shr_u $push23=, $pop22, $pop51 +; NO-SIMD128-FAST-NEXT: i32.store16 8($0), $pop23 +; NO-SIMD128-FAST-NEXT: i32.const $push24=, 10 +; NO-SIMD128-FAST-NEXT: i32.add $push25=, $0, $pop24 +; NO-SIMD128-FAST-NEXT: i32.add $push26=, $6, $14 +; NO-SIMD128-FAST-NEXT: i32.const $push50=, 1 +; NO-SIMD128-FAST-NEXT: i32.add $push27=, $pop26, $pop50 +; NO-SIMD128-FAST-NEXT: i32.const $push49=, 65534 +; NO-SIMD128-FAST-NEXT: i32.and $push28=, $pop27, $pop49 +; NO-SIMD128-FAST-NEXT: i32.const $push48=, 1 +; NO-SIMD128-FAST-NEXT: i32.shr_u $push29=, $pop28, $pop48 +; NO-SIMD128-FAST-NEXT: i32.store16 0($pop25), $pop29 +; NO-SIMD128-FAST-NEXT: i32.const $push30=, 12 +; NO-SIMD128-FAST-NEXT: i32.add $push31=, $0, $pop30 +; NO-SIMD128-FAST-NEXT: i32.add $push32=, $7, $15 +; NO-SIMD128-FAST-NEXT: i32.const $push47=, 1 +; NO-SIMD128-FAST-NEXT: i32.add $push33=, $pop32, $pop47 +; NO-SIMD128-FAST-NEXT: i32.const $push46=, 65534 +; NO-SIMD128-FAST-NEXT: i32.and $push34=, $pop33, $pop46 +; NO-SIMD128-FAST-NEXT: i32.const $push45=, 1 +; NO-SIMD128-FAST-NEXT: i32.shr_u $push35=, $pop34, $pop45 +; NO-SIMD128-FAST-NEXT: i32.store16 0($pop31), $pop35 +; NO-SIMD128-FAST-NEXT: i32.const $push36=, 14 +; NO-SIMD128-FAST-NEXT: i32.add $push37=, $0, $pop36 +; NO-SIMD128-FAST-NEXT: i32.add $push38=, $8, $16 +; NO-SIMD128-FAST-NEXT: i32.const $push44=, 1 +; NO-SIMD128-FAST-NEXT: i32.add $push39=, $pop38, $pop44 +; NO-SIMD128-FAST-NEXT: i32.const $push43=, 65534 +; NO-SIMD128-FAST-NEXT: i32.and $push40=, $pop39, $pop43 +; NO-SIMD128-FAST-NEXT: i32.const $push42=, 1 +; NO-SIMD128-FAST-NEXT: i32.shr_u $push41=, $pop40, $pop42 +; NO-SIMD128-FAST-NEXT: i32.store16 0($pop37), $pop41 +; NO-SIMD128-FAST-NEXT: return %a = add nuw <8 x i16> %x, %y %b = add nuw <8 x i16> %a, %c = udiv <8 x i16> %b, ret <8 x i16> %c } -; CHECK-LABEL: avgr_u_v8i16_wrap: -; NO-SIMD128-NOT: i16x8 -; SIMD128-NEXT: .functype avgr_u_v8i16_wrap (v128, v128) -> (v128){{$}} -; SIMD128-NOT: i16x8.avgr_u define <8 x i16> @avgr_u_v8i16_wrap(<8 x i16> %x, <8 x i16> %y) { +; SIMD128-LABEL: avgr_u_v8i16_wrap: +; SIMD128: .functype avgr_u_v8i16_wrap (v128, v128) -> (v128) +; SIMD128-NEXT: # %bb.0: +; SIMD128-NEXT: i16x8.add $push0=, $0, $1 +; SIMD128-NEXT: v128.const $push1=, 1, 1, 1, 1, 1, 1, 1, 1 +; SIMD128-NEXT: i16x8.add $push2=, $pop0, $pop1 +; SIMD128-NEXT: i32.const $push3=, 1 +; SIMD128-NEXT: i16x8.shr_u $push4=, $pop2, $pop3 +; SIMD128-NEXT: return $pop4 +; +; SIMD128-FAST-LABEL: avgr_u_v8i16_wrap: +; SIMD128-FAST: .functype avgr_u_v8i16_wrap (v128, v128) -> (v128) +; SIMD128-FAST-NEXT: # %bb.0: +; SIMD128-FAST-NEXT: i16x8.add $push2=, $0, $1 +; SIMD128-FAST-NEXT: v128.const $push3=, 1, 1, 1, 1, 1, 1, 1, 1 +; SIMD128-FAST-NEXT: i16x8.add $push1=, $pop2, $pop3 +; SIMD128-FAST-NEXT: i32.const $push4=, 1 +; SIMD128-FAST-NEXT: i16x8.shr_u $push0=, $pop1, $pop4 +; SIMD128-FAST-NEXT: return $pop0 +; +; NO-SIMD128-LABEL: avgr_u_v8i16_wrap: +; NO-SIMD128: .functype avgr_u_v8i16_wrap (i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32) -> () +; NO-SIMD128-NEXT: # %bb.0: +; NO-SIMD128-NEXT: i32.const $push0=, 14 +; NO-SIMD128-NEXT: i32.add $push1=, $0, $pop0 +; NO-SIMD128-NEXT: i32.add $push2=, $8, $16 +; NO-SIMD128-NEXT: i32.const $push3=, 1 +; NO-SIMD128-NEXT: i32.add $push4=, $pop2, $pop3 +; NO-SIMD128-NEXT: i32.const $push5=, 65534 +; NO-SIMD128-NEXT: i32.and $push6=, $pop4, $pop5 +; NO-SIMD128-NEXT: i32.const $push63=, 1 +; NO-SIMD128-NEXT: i32.shr_u $push7=, $pop6, $pop63 +; NO-SIMD128-NEXT: i32.store16 0($pop1), $pop7 +; NO-SIMD128-NEXT: i32.const $push8=, 12 +; NO-SIMD128-NEXT: i32.add $push9=, $0, $pop8 +; NO-SIMD128-NEXT: i32.add $push10=, $7, $15 +; NO-SIMD128-NEXT: i32.const $push62=, 1 +; NO-SIMD128-NEXT: i32.add $push11=, $pop10, $pop62 +; NO-SIMD128-NEXT: i32.const $push61=, 65534 +; NO-SIMD128-NEXT: i32.and $push12=, $pop11, $pop61 +; NO-SIMD128-NEXT: i32.const $push60=, 1 +; NO-SIMD128-NEXT: i32.shr_u $push13=, $pop12, $pop60 +; NO-SIMD128-NEXT: i32.store16 0($pop9), $pop13 +; NO-SIMD128-NEXT: i32.const $push14=, 10 +; NO-SIMD128-NEXT: i32.add $push15=, $0, $pop14 +; NO-SIMD128-NEXT: i32.add $push16=, $6, $14 +; NO-SIMD128-NEXT: i32.const $push59=, 1 +; NO-SIMD128-NEXT: i32.add $push17=, $pop16, $pop59 +; NO-SIMD128-NEXT: i32.const $push58=, 65534 +; NO-SIMD128-NEXT: i32.and $push18=, $pop17, $pop58 +; NO-SIMD128-NEXT: i32.const $push57=, 1 +; NO-SIMD128-NEXT: i32.shr_u $push19=, $pop18, $pop57 +; NO-SIMD128-NEXT: i32.store16 0($pop15), $pop19 +; NO-SIMD128-NEXT: i32.add $push20=, $5, $13 +; NO-SIMD128-NEXT: i32.const $push56=, 1 +; NO-SIMD128-NEXT: i32.add $push21=, $pop20, $pop56 +; NO-SIMD128-NEXT: i32.const $push55=, 65534 +; NO-SIMD128-NEXT: i32.and $push22=, $pop21, $pop55 +; NO-SIMD128-NEXT: i32.const $push54=, 1 +; NO-SIMD128-NEXT: i32.shr_u $push23=, $pop22, $pop54 +; NO-SIMD128-NEXT: i32.store16 8($0), $pop23 +; NO-SIMD128-NEXT: i32.const $push24=, 6 +; NO-SIMD128-NEXT: i32.add $push25=, $0, $pop24 +; NO-SIMD128-NEXT: i32.add $push26=, $4, $12 +; NO-SIMD128-NEXT: i32.const $push53=, 1 +; NO-SIMD128-NEXT: i32.add $push27=, $pop26, $pop53 +; NO-SIMD128-NEXT: i32.const $push52=, 65534 +; NO-SIMD128-NEXT: i32.and $push28=, $pop27, $pop52 +; NO-SIMD128-NEXT: i32.const $push51=, 1 +; NO-SIMD128-NEXT: i32.shr_u $push29=, $pop28, $pop51 +; NO-SIMD128-NEXT: i32.store16 0($pop25), $pop29 +; NO-SIMD128-NEXT: i32.add $push30=, $3, $11 +; NO-SIMD128-NEXT: i32.const $push50=, 1 +; NO-SIMD128-NEXT: i32.add $push31=, $pop30, $pop50 +; NO-SIMD128-NEXT: i32.const $push49=, 65534 +; NO-SIMD128-NEXT: i32.and $push32=, $pop31, $pop49 +; NO-SIMD128-NEXT: i32.const $push48=, 1 +; NO-SIMD128-NEXT: i32.shr_u $push33=, $pop32, $pop48 +; NO-SIMD128-NEXT: i32.store16 4($0), $pop33 +; NO-SIMD128-NEXT: i32.add $push34=, $2, $10 +; NO-SIMD128-NEXT: i32.const $push47=, 1 +; NO-SIMD128-NEXT: i32.add $push35=, $pop34, $pop47 +; NO-SIMD128-NEXT: i32.const $push46=, 65534 +; NO-SIMD128-NEXT: i32.and $push36=, $pop35, $pop46 +; NO-SIMD128-NEXT: i32.const $push45=, 1 +; NO-SIMD128-NEXT: i32.shr_u $push37=, $pop36, $pop45 +; NO-SIMD128-NEXT: i32.store16 2($0), $pop37 +; NO-SIMD128-NEXT: i32.add $push38=, $1, $9 +; NO-SIMD128-NEXT: i32.const $push44=, 1 +; NO-SIMD128-NEXT: i32.add $push39=, $pop38, $pop44 +; NO-SIMD128-NEXT: i32.const $push43=, 65534 +; NO-SIMD128-NEXT: i32.and $push40=, $pop39, $pop43 +; NO-SIMD128-NEXT: i32.const $push42=, 1 +; NO-SIMD128-NEXT: i32.shr_u $push41=, $pop40, $pop42 +; NO-SIMD128-NEXT: i32.store16 0($0), $pop41 +; NO-SIMD128-NEXT: return +; +; NO-SIMD128-FAST-LABEL: avgr_u_v8i16_wrap: +; NO-SIMD128-FAST: .functype avgr_u_v8i16_wrap (i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32) -> () +; NO-SIMD128-FAST-NEXT: # %bb.0: +; NO-SIMD128-FAST-NEXT: i32.add $push0=, $1, $9 +; NO-SIMD128-FAST-NEXT: i32.const $push1=, 1 +; NO-SIMD128-FAST-NEXT: i32.add $push2=, $pop0, $pop1 +; NO-SIMD128-FAST-NEXT: i32.const $push3=, 65534 +; NO-SIMD128-FAST-NEXT: i32.and $push4=, $pop2, $pop3 +; NO-SIMD128-FAST-NEXT: i32.const $push63=, 1 +; NO-SIMD128-FAST-NEXT: i32.shr_u $push5=, $pop4, $pop63 +; NO-SIMD128-FAST-NEXT: i32.store16 0($0), $pop5 +; NO-SIMD128-FAST-NEXT: i32.add $push6=, $2, $10 +; NO-SIMD128-FAST-NEXT: i32.const $push62=, 1 +; NO-SIMD128-FAST-NEXT: i32.add $push7=, $pop6, $pop62 +; NO-SIMD128-FAST-NEXT: i32.const $push61=, 65534 +; NO-SIMD128-FAST-NEXT: i32.and $push8=, $pop7, $pop61 +; NO-SIMD128-FAST-NEXT: i32.const $push60=, 1 +; NO-SIMD128-FAST-NEXT: i32.shr_u $push9=, $pop8, $pop60 +; NO-SIMD128-FAST-NEXT: i32.store16 2($0), $pop9 +; NO-SIMD128-FAST-NEXT: i32.add $push10=, $3, $11 +; NO-SIMD128-FAST-NEXT: i32.const $push59=, 1 +; NO-SIMD128-FAST-NEXT: i32.add $push11=, $pop10, $pop59 +; NO-SIMD128-FAST-NEXT: i32.const $push58=, 65534 +; NO-SIMD128-FAST-NEXT: i32.and $push12=, $pop11, $pop58 +; NO-SIMD128-FAST-NEXT: i32.const $push57=, 1 +; NO-SIMD128-FAST-NEXT: i32.shr_u $push13=, $pop12, $pop57 +; NO-SIMD128-FAST-NEXT: i32.store16 4($0), $pop13 +; NO-SIMD128-FAST-NEXT: i32.const $push14=, 6 +; NO-SIMD128-FAST-NEXT: i32.add $push15=, $0, $pop14 +; NO-SIMD128-FAST-NEXT: i32.add $push16=, $4, $12 +; NO-SIMD128-FAST-NEXT: i32.const $push56=, 1 +; NO-SIMD128-FAST-NEXT: i32.add $push17=, $pop16, $pop56 +; NO-SIMD128-FAST-NEXT: i32.const $push55=, 65534 +; NO-SIMD128-FAST-NEXT: i32.and $push18=, $pop17, $pop55 +; NO-SIMD128-FAST-NEXT: i32.const $push54=, 1 +; NO-SIMD128-FAST-NEXT: i32.shr_u $push19=, $pop18, $pop54 +; NO-SIMD128-FAST-NEXT: i32.store16 0($pop15), $pop19 +; NO-SIMD128-FAST-NEXT: i32.add $push20=, $5, $13 +; NO-SIMD128-FAST-NEXT: i32.const $push53=, 1 +; NO-SIMD128-FAST-NEXT: i32.add $push21=, $pop20, $pop53 +; NO-SIMD128-FAST-NEXT: i32.const $push52=, 65534 +; NO-SIMD128-FAST-NEXT: i32.and $push22=, $pop21, $pop52 +; NO-SIMD128-FAST-NEXT: i32.const $push51=, 1 +; NO-SIMD128-FAST-NEXT: i32.shr_u $push23=, $pop22, $pop51 +; NO-SIMD128-FAST-NEXT: i32.store16 8($0), $pop23 +; NO-SIMD128-FAST-NEXT: i32.const $push24=, 10 +; NO-SIMD128-FAST-NEXT: i32.add $push25=, $0, $pop24 +; NO-SIMD128-FAST-NEXT: i32.add $push26=, $6, $14 +; NO-SIMD128-FAST-NEXT: i32.const $push50=, 1 +; NO-SIMD128-FAST-NEXT: i32.add $push27=, $pop26, $pop50 +; NO-SIMD128-FAST-NEXT: i32.const $push49=, 65534 +; NO-SIMD128-FAST-NEXT: i32.and $push28=, $pop27, $pop49 +; NO-SIMD128-FAST-NEXT: i32.const $push48=, 1 +; NO-SIMD128-FAST-NEXT: i32.shr_u $push29=, $pop28, $pop48 +; NO-SIMD128-FAST-NEXT: i32.store16 0($pop25), $pop29 +; NO-SIMD128-FAST-NEXT: i32.const $push30=, 12 +; NO-SIMD128-FAST-NEXT: i32.add $push31=, $0, $pop30 +; NO-SIMD128-FAST-NEXT: i32.add $push32=, $7, $15 +; NO-SIMD128-FAST-NEXT: i32.const $push47=, 1 +; NO-SIMD128-FAST-NEXT: i32.add $push33=, $pop32, $pop47 +; NO-SIMD128-FAST-NEXT: i32.const $push46=, 65534 +; NO-SIMD128-FAST-NEXT: i32.and $push34=, $pop33, $pop46 +; NO-SIMD128-FAST-NEXT: i32.const $push45=, 1 +; NO-SIMD128-FAST-NEXT: i32.shr_u $push35=, $pop34, $pop45 +; NO-SIMD128-FAST-NEXT: i32.store16 0($pop31), $pop35 +; NO-SIMD128-FAST-NEXT: i32.const $push36=, 14 +; NO-SIMD128-FAST-NEXT: i32.add $push37=, $0, $pop36 +; NO-SIMD128-FAST-NEXT: i32.add $push38=, $8, $16 +; NO-SIMD128-FAST-NEXT: i32.const $push44=, 1 +; NO-SIMD128-FAST-NEXT: i32.add $push39=, $pop38, $pop44 +; NO-SIMD128-FAST-NEXT: i32.const $push43=, 65534 +; NO-SIMD128-FAST-NEXT: i32.and $push40=, $pop39, $pop43 +; NO-SIMD128-FAST-NEXT: i32.const $push42=, 1 +; NO-SIMD128-FAST-NEXT: i32.shr_u $push41=, $pop40, $pop42 +; NO-SIMD128-FAST-NEXT: i32.store16 0($pop37), $pop41 +; NO-SIMD128-FAST-NEXT: return %a = add <8 x i16> %x, %y %b = add <8 x i16> %a, %c = udiv <8 x i16> %b, ret <8 x i16> %c } -; CHECK-LABEL: abs_v8i16: -; NO-SIMD128-NOT: i16x8 -; SIMD128-NEXT: .functype abs_v8i16 (v128) -> (v128){{$}} -; SIMD128-NEXT: i16x8.abs $push[[R:[0-9]+]]=, $0{{$}} -; SIMD128-NEXT: return $pop[[R]]{{$}} define <8 x i16> @abs_v8i16(<8 x i16> %x) { +; SIMD128-LABEL: abs_v8i16: +; SIMD128: .functype abs_v8i16 (v128) -> (v128) +; SIMD128-NEXT: # %bb.0: +; SIMD128-NEXT: i16x8.abs $push0=, $0 +; SIMD128-NEXT: return $pop0 +; +; SIMD128-FAST-LABEL: abs_v8i16: +; SIMD128-FAST: .functype abs_v8i16 (v128) -> (v128) +; SIMD128-FAST-NEXT: # %bb.0: +; SIMD128-FAST-NEXT: i16x8.abs $push0=, $0 +; SIMD128-FAST-NEXT: return $pop0 +; +; NO-SIMD128-LABEL: abs_v8i16: +; NO-SIMD128: .functype abs_v8i16 (i32, i32, i32, i32, i32, i32, i32, i32, i32) -> () +; NO-SIMD128-NEXT: # %bb.0: +; NO-SIMD128-NEXT: i32.const $push4=, 14 +; NO-SIMD128-NEXT: i32.add $push5=, $0, $pop4 +; NO-SIMD128-NEXT: i32.extend16_s $push0=, $8 +; NO-SIMD128-NEXT: i32.const $push1=, 15 +; NO-SIMD128-NEXT: i32.shr_s $push55=, $pop0, $pop1 +; NO-SIMD128-NEXT: local.tee $push54=, $9=, $pop55 +; NO-SIMD128-NEXT: i32.xor $push2=, $8, $pop54 +; NO-SIMD128-NEXT: i32.sub $push3=, $pop2, $9 +; NO-SIMD128-NEXT: i32.store16 0($pop5), $pop3 +; NO-SIMD128-NEXT: i32.const $push9=, 12 +; NO-SIMD128-NEXT: i32.add $push10=, $0, $pop9 +; NO-SIMD128-NEXT: i32.extend16_s $push6=, $7 +; NO-SIMD128-NEXT: i32.const $push53=, 15 +; NO-SIMD128-NEXT: i32.shr_s $push52=, $pop6, $pop53 +; NO-SIMD128-NEXT: local.tee $push51=, $8=, $pop52 +; NO-SIMD128-NEXT: i32.xor $push7=, $7, $pop51 +; NO-SIMD128-NEXT: i32.sub $push8=, $pop7, $8 +; NO-SIMD128-NEXT: i32.store16 0($pop10), $pop8 +; NO-SIMD128-NEXT: i32.const $push14=, 10 +; NO-SIMD128-NEXT: i32.add $push15=, $0, $pop14 +; NO-SIMD128-NEXT: i32.extend16_s $push11=, $6 +; NO-SIMD128-NEXT: i32.const $push50=, 15 +; NO-SIMD128-NEXT: i32.shr_s $push49=, $pop11, $pop50 +; NO-SIMD128-NEXT: local.tee $push48=, $8=, $pop49 +; NO-SIMD128-NEXT: i32.xor $push12=, $6, $pop48 +; NO-SIMD128-NEXT: i32.sub $push13=, $pop12, $8 +; NO-SIMD128-NEXT: i32.store16 0($pop15), $pop13 +; NO-SIMD128-NEXT: i32.extend16_s $push16=, $5 +; NO-SIMD128-NEXT: i32.const $push47=, 15 +; NO-SIMD128-NEXT: i32.shr_s $push46=, $pop16, $pop47 +; NO-SIMD128-NEXT: local.tee $push45=, $8=, $pop46 +; NO-SIMD128-NEXT: i32.xor $push17=, $5, $pop45 +; NO-SIMD128-NEXT: i32.sub $push18=, $pop17, $8 +; NO-SIMD128-NEXT: i32.store16 8($0), $pop18 +; NO-SIMD128-NEXT: i32.const $push22=, 6 +; NO-SIMD128-NEXT: i32.add $push23=, $0, $pop22 +; NO-SIMD128-NEXT: i32.extend16_s $push19=, $4 +; NO-SIMD128-NEXT: i32.const $push44=, 15 +; NO-SIMD128-NEXT: i32.shr_s $push43=, $pop19, $pop44 +; NO-SIMD128-NEXT: local.tee $push42=, $8=, $pop43 +; NO-SIMD128-NEXT: i32.xor $push20=, $4, $pop42 +; NO-SIMD128-NEXT: i32.sub $push21=, $pop20, $8 +; NO-SIMD128-NEXT: i32.store16 0($pop23), $pop21 +; NO-SIMD128-NEXT: i32.extend16_s $push24=, $3 +; NO-SIMD128-NEXT: i32.const $push41=, 15 +; NO-SIMD128-NEXT: i32.shr_s $push40=, $pop24, $pop41 +; NO-SIMD128-NEXT: local.tee $push39=, $8=, $pop40 +; NO-SIMD128-NEXT: i32.xor $push25=, $3, $pop39 +; NO-SIMD128-NEXT: i32.sub $push26=, $pop25, $8 +; NO-SIMD128-NEXT: i32.store16 4($0), $pop26 +; NO-SIMD128-NEXT: i32.extend16_s $push27=, $2 +; NO-SIMD128-NEXT: i32.const $push38=, 15 +; NO-SIMD128-NEXT: i32.shr_s $push37=, $pop27, $pop38 +; NO-SIMD128-NEXT: local.tee $push36=, $8=, $pop37 +; NO-SIMD128-NEXT: i32.xor $push28=, $2, $pop36 +; NO-SIMD128-NEXT: i32.sub $push29=, $pop28, $8 +; NO-SIMD128-NEXT: i32.store16 2($0), $pop29 +; NO-SIMD128-NEXT: i32.extend16_s $push30=, $1 +; NO-SIMD128-NEXT: i32.const $push35=, 15 +; NO-SIMD128-NEXT: i32.shr_s $push34=, $pop30, $pop35 +; NO-SIMD128-NEXT: local.tee $push33=, $8=, $pop34 +; NO-SIMD128-NEXT: i32.xor $push31=, $1, $pop33 +; NO-SIMD128-NEXT: i32.sub $push32=, $pop31, $8 +; NO-SIMD128-NEXT: i32.store16 0($0), $pop32 +; NO-SIMD128-NEXT: return +; +; NO-SIMD128-FAST-LABEL: abs_v8i16: +; NO-SIMD128-FAST: .functype abs_v8i16 (i32, i32, i32, i32, i32, i32, i32, i32, i32) -> () +; NO-SIMD128-FAST-NEXT: # %bb.0: +; NO-SIMD128-FAST-NEXT: i32.extend16_s $push0=, $1 +; NO-SIMD128-FAST-NEXT: i32.const $push1=, 15 +; NO-SIMD128-FAST-NEXT: i32.shr_s $push55=, $pop0, $pop1 +; NO-SIMD128-FAST-NEXT: local.tee $push54=, $9=, $pop55 +; NO-SIMD128-FAST-NEXT: i32.xor $push2=, $1, $pop54 +; NO-SIMD128-FAST-NEXT: i32.sub $push3=, $pop2, $9 +; NO-SIMD128-FAST-NEXT: i32.store16 0($0), $pop3 +; NO-SIMD128-FAST-NEXT: i32.extend16_s $push4=, $2 +; NO-SIMD128-FAST-NEXT: i32.const $push53=, 15 +; NO-SIMD128-FAST-NEXT: i32.shr_s $push52=, $pop4, $pop53 +; NO-SIMD128-FAST-NEXT: local.tee $push51=, $1=, $pop52 +; NO-SIMD128-FAST-NEXT: i32.xor $push5=, $2, $pop51 +; NO-SIMD128-FAST-NEXT: i32.sub $push6=, $pop5, $1 +; NO-SIMD128-FAST-NEXT: i32.store16 2($0), $pop6 +; NO-SIMD128-FAST-NEXT: i32.extend16_s $push7=, $3 +; NO-SIMD128-FAST-NEXT: i32.const $push50=, 15 +; NO-SIMD128-FAST-NEXT: i32.shr_s $push49=, $pop7, $pop50 +; NO-SIMD128-FAST-NEXT: local.tee $push48=, $2=, $pop49 +; NO-SIMD128-FAST-NEXT: i32.xor $push8=, $3, $pop48 +; NO-SIMD128-FAST-NEXT: i32.sub $push9=, $pop8, $2 +; NO-SIMD128-FAST-NEXT: i32.store16 4($0), $pop9 +; NO-SIMD128-FAST-NEXT: i32.const $push13=, 6 +; NO-SIMD128-FAST-NEXT: i32.add $push14=, $0, $pop13 +; NO-SIMD128-FAST-NEXT: i32.extend16_s $push10=, $4 +; NO-SIMD128-FAST-NEXT: i32.const $push47=, 15 +; NO-SIMD128-FAST-NEXT: i32.shr_s $push46=, $pop10, $pop47 +; NO-SIMD128-FAST-NEXT: local.tee $push45=, $3=, $pop46 +; NO-SIMD128-FAST-NEXT: i32.xor $push11=, $4, $pop45 +; NO-SIMD128-FAST-NEXT: i32.sub $push12=, $pop11, $3 +; NO-SIMD128-FAST-NEXT: i32.store16 0($pop14), $pop12 +; NO-SIMD128-FAST-NEXT: i32.extend16_s $push15=, $5 +; NO-SIMD128-FAST-NEXT: i32.const $push44=, 15 +; NO-SIMD128-FAST-NEXT: i32.shr_s $push43=, $pop15, $pop44 +; NO-SIMD128-FAST-NEXT: local.tee $push42=, $4=, $pop43 +; NO-SIMD128-FAST-NEXT: i32.xor $push16=, $5, $pop42 +; NO-SIMD128-FAST-NEXT: i32.sub $push17=, $pop16, $4 +; NO-SIMD128-FAST-NEXT: i32.store16 8($0), $pop17 +; NO-SIMD128-FAST-NEXT: i32.const $push21=, 10 +; NO-SIMD128-FAST-NEXT: i32.add $push22=, $0, $pop21 +; NO-SIMD128-FAST-NEXT: i32.extend16_s $push18=, $6 +; NO-SIMD128-FAST-NEXT: i32.const $push41=, 15 +; NO-SIMD128-FAST-NEXT: i32.shr_s $push40=, $pop18, $pop41 +; NO-SIMD128-FAST-NEXT: local.tee $push39=, $5=, $pop40 +; NO-SIMD128-FAST-NEXT: i32.xor $push19=, $6, $pop39 +; NO-SIMD128-FAST-NEXT: i32.sub $push20=, $pop19, $5 +; NO-SIMD128-FAST-NEXT: i32.store16 0($pop22), $pop20 +; NO-SIMD128-FAST-NEXT: i32.const $push26=, 12 +; NO-SIMD128-FAST-NEXT: i32.add $push27=, $0, $pop26 +; NO-SIMD128-FAST-NEXT: i32.extend16_s $push23=, $7 +; NO-SIMD128-FAST-NEXT: i32.const $push38=, 15 +; NO-SIMD128-FAST-NEXT: i32.shr_s $push37=, $pop23, $pop38 +; NO-SIMD128-FAST-NEXT: local.tee $push36=, $6=, $pop37 +; NO-SIMD128-FAST-NEXT: i32.xor $push24=, $7, $pop36 +; NO-SIMD128-FAST-NEXT: i32.sub $push25=, $pop24, $6 +; NO-SIMD128-FAST-NEXT: i32.store16 0($pop27), $pop25 +; NO-SIMD128-FAST-NEXT: i32.const $push31=, 14 +; NO-SIMD128-FAST-NEXT: i32.add $push32=, $0, $pop31 +; NO-SIMD128-FAST-NEXT: i32.extend16_s $push28=, $8 +; NO-SIMD128-FAST-NEXT: i32.const $push35=, 15 +; NO-SIMD128-FAST-NEXT: i32.shr_s $push34=, $pop28, $pop35 +; NO-SIMD128-FAST-NEXT: local.tee $push33=, $0=, $pop34 +; NO-SIMD128-FAST-NEXT: i32.xor $push29=, $8, $pop33 +; NO-SIMD128-FAST-NEXT: i32.sub $push30=, $pop29, $0 +; NO-SIMD128-FAST-NEXT: i32.store16 0($pop32), $pop30 +; NO-SIMD128-FAST-NEXT: return %a = sub <8 x i16> zeroinitializer, %x %b = icmp slt <8 x i16> %x, zeroinitializer %c = select <8 x i1> %b, <8 x i16> %a, <8 x i16> %x ret <8 x i16> %c } -; CHECK-LABEL: neg_v8i16: -; NO-SIMD128-NOT: i16x8 -; SIMD128-NEXT: .functype neg_v8i16 (v128) -> (v128){{$}} -; SIMD128-NEXT: i16x8.neg $push[[R:[0-9]+]]=, $0{{$}} -; SIMD128-NEXT: return $pop[[R]]{{$}} define <8 x i16> @neg_v8i16(<8 x i16> %x) { +; SIMD128-LABEL: neg_v8i16: +; SIMD128: .functype neg_v8i16 (v128) -> (v128) +; SIMD128-NEXT: # %bb.0: +; SIMD128-NEXT: i16x8.neg $push0=, $0 +; SIMD128-NEXT: return $pop0 +; +; SIMD128-FAST-LABEL: neg_v8i16: +; SIMD128-FAST: .functype neg_v8i16 (v128) -> (v128) +; SIMD128-FAST-NEXT: # %bb.0: +; SIMD128-FAST-NEXT: i16x8.neg $push0=, $0 +; SIMD128-FAST-NEXT: return $pop0 +; +; NO-SIMD128-LABEL: neg_v8i16: +; NO-SIMD128: .functype neg_v8i16 (i32, i32, i32, i32, i32, i32, i32, i32, i32) -> () +; NO-SIMD128-NEXT: # %bb.0: +; NO-SIMD128-NEXT: i32.const $push0=, 0 +; NO-SIMD128-NEXT: i32.sub $push1=, $pop0, $5 +; NO-SIMD128-NEXT: i32.store16 8($0), $pop1 +; NO-SIMD128-NEXT: i32.const $push23=, 0 +; NO-SIMD128-NEXT: i32.sub $push2=, $pop23, $3 +; NO-SIMD128-NEXT: i32.store16 4($0), $pop2 +; NO-SIMD128-NEXT: i32.const $push22=, 0 +; NO-SIMD128-NEXT: i32.sub $push3=, $pop22, $2 +; NO-SIMD128-NEXT: i32.store16 2($0), $pop3 +; NO-SIMD128-NEXT: i32.const $push21=, 0 +; NO-SIMD128-NEXT: i32.sub $push4=, $pop21, $1 +; NO-SIMD128-NEXT: i32.store16 0($0), $pop4 +; NO-SIMD128-NEXT: i32.const $push6=, 14 +; NO-SIMD128-NEXT: i32.add $push7=, $0, $pop6 +; NO-SIMD128-NEXT: i32.const $push20=, 0 +; NO-SIMD128-NEXT: i32.sub $push5=, $pop20, $8 +; NO-SIMD128-NEXT: i32.store16 0($pop7), $pop5 +; NO-SIMD128-NEXT: i32.const $push9=, 12 +; NO-SIMD128-NEXT: i32.add $push10=, $0, $pop9 +; NO-SIMD128-NEXT: i32.const $push19=, 0 +; NO-SIMD128-NEXT: i32.sub $push8=, $pop19, $7 +; NO-SIMD128-NEXT: i32.store16 0($pop10), $pop8 +; NO-SIMD128-NEXT: i32.const $push12=, 10 +; NO-SIMD128-NEXT: i32.add $push13=, $0, $pop12 +; NO-SIMD128-NEXT: i32.const $push18=, 0 +; NO-SIMD128-NEXT: i32.sub $push11=, $pop18, $6 +; NO-SIMD128-NEXT: i32.store16 0($pop13), $pop11 +; NO-SIMD128-NEXT: i32.const $push15=, 6 +; NO-SIMD128-NEXT: i32.add $push16=, $0, $pop15 +; NO-SIMD128-NEXT: i32.const $push17=, 0 +; NO-SIMD128-NEXT: i32.sub $push14=, $pop17, $4 +; NO-SIMD128-NEXT: i32.store16 0($pop16), $pop14 +; NO-SIMD128-NEXT: return +; +; NO-SIMD128-FAST-LABEL: neg_v8i16: +; NO-SIMD128-FAST: .functype neg_v8i16 (i32, i32, i32, i32, i32, i32, i32, i32, i32) -> () +; NO-SIMD128-FAST-NEXT: # %bb.0: +; NO-SIMD128-FAST-NEXT: i32.const $push0=, 0 +; NO-SIMD128-FAST-NEXT: i32.sub $push1=, $pop0, $1 +; NO-SIMD128-FAST-NEXT: i32.store16 0($0), $pop1 +; NO-SIMD128-FAST-NEXT: i32.const $push23=, 0 +; NO-SIMD128-FAST-NEXT: i32.sub $push2=, $pop23, $2 +; NO-SIMD128-FAST-NEXT: i32.store16 2($0), $pop2 +; NO-SIMD128-FAST-NEXT: i32.const $push22=, 0 +; NO-SIMD128-FAST-NEXT: i32.sub $push3=, $pop22, $3 +; NO-SIMD128-FAST-NEXT: i32.store16 4($0), $pop3 +; NO-SIMD128-FAST-NEXT: i32.const $push4=, 6 +; NO-SIMD128-FAST-NEXT: i32.add $push5=, $0, $pop4 +; NO-SIMD128-FAST-NEXT: i32.const $push21=, 0 +; NO-SIMD128-FAST-NEXT: i32.sub $push6=, $pop21, $4 +; NO-SIMD128-FAST-NEXT: i32.store16 0($pop5), $pop6 +; NO-SIMD128-FAST-NEXT: i32.const $push20=, 0 +; NO-SIMD128-FAST-NEXT: i32.sub $push7=, $pop20, $5 +; NO-SIMD128-FAST-NEXT: i32.store16 8($0), $pop7 +; NO-SIMD128-FAST-NEXT: i32.const $push8=, 10 +; NO-SIMD128-FAST-NEXT: i32.add $push9=, $0, $pop8 +; NO-SIMD128-FAST-NEXT: i32.const $push19=, 0 +; NO-SIMD128-FAST-NEXT: i32.sub $push10=, $pop19, $6 +; NO-SIMD128-FAST-NEXT: i32.store16 0($pop9), $pop10 +; NO-SIMD128-FAST-NEXT: i32.const $push11=, 12 +; NO-SIMD128-FAST-NEXT: i32.add $push12=, $0, $pop11 +; NO-SIMD128-FAST-NEXT: i32.const $push18=, 0 +; NO-SIMD128-FAST-NEXT: i32.sub $push13=, $pop18, $7 +; NO-SIMD128-FAST-NEXT: i32.store16 0($pop12), $pop13 +; NO-SIMD128-FAST-NEXT: i32.const $push14=, 14 +; NO-SIMD128-FAST-NEXT: i32.add $push15=, $0, $pop14 +; NO-SIMD128-FAST-NEXT: i32.const $push17=, 0 +; NO-SIMD128-FAST-NEXT: i32.sub $push16=, $pop17, $8 +; NO-SIMD128-FAST-NEXT: i32.store16 0($pop15), $pop16 +; NO-SIMD128-FAST-NEXT: return %a = sub <8 x i16> , %x ret <8 x i16> %a } -; CHECK-LABEL: shl_v8i16: -; NO-SIMD128-NOT: i16x8 -; SIMD128-NEXT: .functype shl_v8i16 (v128, i32) -> (v128){{$}} -; SIMD128-NEXT: i16x8.shl $push[[R:[0-9]+]]=, $0, $1{{$}} -; SIMD128-NEXT: return $pop[[R]]{{$}} define <8 x i16> @shl_v8i16(<8 x i16> %v, i16 %x) { +; SIMD128-LABEL: shl_v8i16: +; SIMD128: .functype shl_v8i16 (v128, i32) -> (v128) +; SIMD128-NEXT: # %bb.0: +; SIMD128-NEXT: i16x8.shl $push0=, $0, $1 +; SIMD128-NEXT: return $pop0 +; +; SIMD128-FAST-LABEL: shl_v8i16: +; SIMD128-FAST: .functype shl_v8i16 (v128, i32) -> (v128) +; SIMD128-FAST-NEXT: # %bb.0: +; SIMD128-FAST-NEXT: i16x8.shl $push0=, $0, $1 +; SIMD128-FAST-NEXT: return $pop0 +; +; NO-SIMD128-LABEL: shl_v8i16: +; NO-SIMD128: .functype shl_v8i16 (i32, i32, i32, i32, i32, i32, i32, i32, i32, i32) -> () +; NO-SIMD128-NEXT: # %bb.0: +; NO-SIMD128-NEXT: i32.const $push0=, 65535 +; NO-SIMD128-NEXT: i32.and $push18=, $9, $pop0 +; NO-SIMD128-NEXT: local.tee $push17=, $9=, $pop18 +; NO-SIMD128-NEXT: i32.shl $push1=, $5, $pop17 +; NO-SIMD128-NEXT: i32.store16 8($0), $pop1 +; NO-SIMD128-NEXT: i32.shl $push2=, $3, $9 +; NO-SIMD128-NEXT: i32.store16 4($0), $pop2 +; NO-SIMD128-NEXT: i32.shl $push3=, $2, $9 +; NO-SIMD128-NEXT: i32.store16 2($0), $pop3 +; NO-SIMD128-NEXT: i32.shl $push4=, $1, $9 +; NO-SIMD128-NEXT: i32.store16 0($0), $pop4 +; NO-SIMD128-NEXT: i32.const $push6=, 14 +; NO-SIMD128-NEXT: i32.add $push7=, $0, $pop6 +; NO-SIMD128-NEXT: i32.shl $push5=, $8, $9 +; NO-SIMD128-NEXT: i32.store16 0($pop7), $pop5 +; NO-SIMD128-NEXT: i32.const $push9=, 12 +; NO-SIMD128-NEXT: i32.add $push10=, $0, $pop9 +; NO-SIMD128-NEXT: i32.shl $push8=, $7, $9 +; NO-SIMD128-NEXT: i32.store16 0($pop10), $pop8 +; NO-SIMD128-NEXT: i32.const $push12=, 10 +; NO-SIMD128-NEXT: i32.add $push13=, $0, $pop12 +; NO-SIMD128-NEXT: i32.shl $push11=, $6, $9 +; NO-SIMD128-NEXT: i32.store16 0($pop13), $pop11 +; NO-SIMD128-NEXT: i32.const $push15=, 6 +; NO-SIMD128-NEXT: i32.add $push16=, $0, $pop15 +; NO-SIMD128-NEXT: i32.shl $push14=, $4, $9 +; NO-SIMD128-NEXT: i32.store16 0($pop16), $pop14 +; NO-SIMD128-NEXT: return +; +; NO-SIMD128-FAST-LABEL: shl_v8i16: +; NO-SIMD128-FAST: .functype shl_v8i16 (i32, i32, i32, i32, i32, i32, i32, i32, i32, i32) -> () +; NO-SIMD128-FAST-NEXT: # %bb.0: +; NO-SIMD128-FAST-NEXT: i32.const $push0=, 65535 +; NO-SIMD128-FAST-NEXT: i32.and $push18=, $9, $pop0 +; NO-SIMD128-FAST-NEXT: local.tee $push17=, $9=, $pop18 +; NO-SIMD128-FAST-NEXT: i32.shl $push1=, $2, $pop17 +; NO-SIMD128-FAST-NEXT: i32.store16 2($0), $pop1 +; NO-SIMD128-FAST-NEXT: i32.shl $push2=, $1, $9 +; NO-SIMD128-FAST-NEXT: i32.store16 0($0), $pop2 +; NO-SIMD128-FAST-NEXT: i32.shl $push3=, $3, $9 +; NO-SIMD128-FAST-NEXT: i32.store16 4($0), $pop3 +; NO-SIMD128-FAST-NEXT: i32.const $push4=, 6 +; NO-SIMD128-FAST-NEXT: i32.add $push5=, $0, $pop4 +; NO-SIMD128-FAST-NEXT: i32.shl $push6=, $4, $9 +; NO-SIMD128-FAST-NEXT: i32.store16 0($pop5), $pop6 +; NO-SIMD128-FAST-NEXT: i32.shl $push7=, $5, $9 +; NO-SIMD128-FAST-NEXT: i32.store16 8($0), $pop7 +; NO-SIMD128-FAST-NEXT: i32.const $push8=, 10 +; NO-SIMD128-FAST-NEXT: i32.add $push9=, $0, $pop8 +; NO-SIMD128-FAST-NEXT: i32.shl $push10=, $6, $9 +; NO-SIMD128-FAST-NEXT: i32.store16 0($pop9), $pop10 +; NO-SIMD128-FAST-NEXT: i32.const $push11=, 12 +; NO-SIMD128-FAST-NEXT: i32.add $push12=, $0, $pop11 +; NO-SIMD128-FAST-NEXT: i32.shl $push13=, $7, $9 +; NO-SIMD128-FAST-NEXT: i32.store16 0($pop12), $pop13 +; NO-SIMD128-FAST-NEXT: i32.const $push14=, 14 +; NO-SIMD128-FAST-NEXT: i32.add $push15=, $0, $pop14 +; NO-SIMD128-FAST-NEXT: i32.shl $push16=, $8, $9 +; NO-SIMD128-FAST-NEXT: i32.store16 0($pop15), $pop16 +; NO-SIMD128-FAST-NEXT: return %t = insertelement <8 x i16> undef, i16 %x, i32 0 %s = shufflevector <8 x i16> %t, <8 x i16> undef, <8 x i32> @@ -512,46 +7662,391 @@ define <8 x i16> @shl_v8i16(<8 x i16> %v, i16 %x) { ret <8 x i16> %a } -; CHECK-LABEL: shl_const_v8i16: -; NO-SIMD128-NOT: i16x8 -; SIMD128-NEXT: .functype shl_const_v8i16 (v128) -> (v128){{$}} -; SIMD128-NEXT: i32.const $push[[L0:[0-9]+]]=, 5 -; SIMD128-NEXT: i16x8.shl $push[[R:[0-9]+]]=, $0, $pop[[L0]]{{$}} -; SIMD128-NEXT: return $pop[[R]]{{$}} define <8 x i16> @shl_const_v8i16(<8 x i16> %v) { +; SIMD128-LABEL: shl_const_v8i16: +; SIMD128: .functype shl_const_v8i16 (v128) -> (v128) +; SIMD128-NEXT: # %bb.0: +; SIMD128-NEXT: i32.const $push0=, 5 +; SIMD128-NEXT: i16x8.shl $push1=, $0, $pop0 +; SIMD128-NEXT: return $pop1 +; +; SIMD128-FAST-LABEL: shl_const_v8i16: +; SIMD128-FAST: .functype shl_const_v8i16 (v128) -> (v128) +; SIMD128-FAST-NEXT: # %bb.0: +; SIMD128-FAST-NEXT: i32.const $push1=, 5 +; SIMD128-FAST-NEXT: i16x8.shl $push0=, $0, $pop1 +; SIMD128-FAST-NEXT: return $pop0 +; +; NO-SIMD128-LABEL: shl_const_v8i16: +; NO-SIMD128: .functype shl_const_v8i16 (i32, i32, i32, i32, i32, i32, i32, i32, i32) -> () +; NO-SIMD128-NEXT: # %bb.0: +; NO-SIMD128-NEXT: i32.const $push0=, 5 +; NO-SIMD128-NEXT: i32.shl $push1=, $5, $pop0 +; NO-SIMD128-NEXT: i32.store16 8($0), $pop1 +; NO-SIMD128-NEXT: i32.const $push23=, 5 +; NO-SIMD128-NEXT: i32.shl $push2=, $3, $pop23 +; NO-SIMD128-NEXT: i32.store16 4($0), $pop2 +; NO-SIMD128-NEXT: i32.const $push22=, 5 +; NO-SIMD128-NEXT: i32.shl $push3=, $2, $pop22 +; NO-SIMD128-NEXT: i32.store16 2($0), $pop3 +; NO-SIMD128-NEXT: i32.const $push21=, 5 +; NO-SIMD128-NEXT: i32.shl $push4=, $1, $pop21 +; NO-SIMD128-NEXT: i32.store16 0($0), $pop4 +; NO-SIMD128-NEXT: i32.const $push6=, 14 +; NO-SIMD128-NEXT: i32.add $push7=, $0, $pop6 +; NO-SIMD128-NEXT: i32.const $push20=, 5 +; NO-SIMD128-NEXT: i32.shl $push5=, $8, $pop20 +; NO-SIMD128-NEXT: i32.store16 0($pop7), $pop5 +; NO-SIMD128-NEXT: i32.const $push9=, 12 +; NO-SIMD128-NEXT: i32.add $push10=, $0, $pop9 +; NO-SIMD128-NEXT: i32.const $push19=, 5 +; NO-SIMD128-NEXT: i32.shl $push8=, $7, $pop19 +; NO-SIMD128-NEXT: i32.store16 0($pop10), $pop8 +; NO-SIMD128-NEXT: i32.const $push12=, 10 +; NO-SIMD128-NEXT: i32.add $push13=, $0, $pop12 +; NO-SIMD128-NEXT: i32.const $push18=, 5 +; NO-SIMD128-NEXT: i32.shl $push11=, $6, $pop18 +; NO-SIMD128-NEXT: i32.store16 0($pop13), $pop11 +; NO-SIMD128-NEXT: i32.const $push15=, 6 +; NO-SIMD128-NEXT: i32.add $push16=, $0, $pop15 +; NO-SIMD128-NEXT: i32.const $push17=, 5 +; NO-SIMD128-NEXT: i32.shl $push14=, $4, $pop17 +; NO-SIMD128-NEXT: i32.store16 0($pop16), $pop14 +; NO-SIMD128-NEXT: return +; +; NO-SIMD128-FAST-LABEL: shl_const_v8i16: +; NO-SIMD128-FAST: .functype shl_const_v8i16 (i32, i32, i32, i32, i32, i32, i32, i32, i32) -> () +; NO-SIMD128-FAST-NEXT: # %bb.0: +; NO-SIMD128-FAST-NEXT: i32.const $push0=, 5 +; NO-SIMD128-FAST-NEXT: i32.shl $push1=, $1, $pop0 +; NO-SIMD128-FAST-NEXT: i32.store16 0($0), $pop1 +; NO-SIMD128-FAST-NEXT: i32.const $push23=, 5 +; NO-SIMD128-FAST-NEXT: i32.shl $push2=, $2, $pop23 +; NO-SIMD128-FAST-NEXT: i32.store16 2($0), $pop2 +; NO-SIMD128-FAST-NEXT: i32.const $push22=, 5 +; NO-SIMD128-FAST-NEXT: i32.shl $push3=, $3, $pop22 +; NO-SIMD128-FAST-NEXT: i32.store16 4($0), $pop3 +; NO-SIMD128-FAST-NEXT: i32.const $push4=, 6 +; NO-SIMD128-FAST-NEXT: i32.add $push5=, $0, $pop4 +; NO-SIMD128-FAST-NEXT: i32.const $push21=, 5 +; NO-SIMD128-FAST-NEXT: i32.shl $push6=, $4, $pop21 +; NO-SIMD128-FAST-NEXT: i32.store16 0($pop5), $pop6 +; NO-SIMD128-FAST-NEXT: i32.const $push20=, 5 +; NO-SIMD128-FAST-NEXT: i32.shl $push7=, $5, $pop20 +; NO-SIMD128-FAST-NEXT: i32.store16 8($0), $pop7 +; NO-SIMD128-FAST-NEXT: i32.const $push8=, 10 +; NO-SIMD128-FAST-NEXT: i32.add $push9=, $0, $pop8 +; NO-SIMD128-FAST-NEXT: i32.const $push19=, 5 +; NO-SIMD128-FAST-NEXT: i32.shl $push10=, $6, $pop19 +; NO-SIMD128-FAST-NEXT: i32.store16 0($pop9), $pop10 +; NO-SIMD128-FAST-NEXT: i32.const $push11=, 12 +; NO-SIMD128-FAST-NEXT: i32.add $push12=, $0, $pop11 +; NO-SIMD128-FAST-NEXT: i32.const $push18=, 5 +; NO-SIMD128-FAST-NEXT: i32.shl $push13=, $7, $pop18 +; NO-SIMD128-FAST-NEXT: i32.store16 0($pop12), $pop13 +; NO-SIMD128-FAST-NEXT: i32.const $push14=, 14 +; NO-SIMD128-FAST-NEXT: i32.add $push15=, $0, $pop14 +; NO-SIMD128-FAST-NEXT: i32.const $push17=, 5 +; NO-SIMD128-FAST-NEXT: i32.shl $push16=, $8, $pop17 +; NO-SIMD128-FAST-NEXT: i32.store16 0($pop15), $pop16 +; NO-SIMD128-FAST-NEXT: return %a = shl <8 x i16> %v, ret <8 x i16> %a } -; CHECK-LABEL: shl_vec_v8i16: -; NO-SIMD128-NOT: i16x8 -; SIMD128-NEXT: .functype shl_vec_v8i16 (v128, v128) -> (v128){{$}} -; SIMD128-NEXT: i16x8.extract_lane_u $push[[L0:[0-9]+]]=, $0, 0{{$}} -; SIMD128-NEXT: i16x8.extract_lane_u $push[[L1:[0-9]+]]=, $1, 0{{$}} -; SIMD128-NEXT: i32.const $push[[M0:[0-9]+]]=, 15{{$}} -; SIMD128-NEXT: i32.and $push[[M1:[0-9]+]]=, $pop[[L1]], $pop[[M0]]{{$}} -; SIMD128-NEXT: i32.shl $push[[M2:[0-9]+]]=, $pop[[L0]], $pop[[M1]]{{$}} -; SIMD128-NEXT: i16x8.splat $push[[M3:[0-9]+]]=, $pop[[M2]]{{$}} -; Skip 6 lanes -; SIMD128: i16x8.extract_lane_u $push[[L4:[0-9]+]]=, $0, 7{{$}} -; SIMD128-NEXT: i16x8.extract_lane_u $push[[L5:[0-9]+]]=, $1, 7{{$}} -; SIMD128-NEXT: i32.const $push[[M4:[0-9]+]]=, 15{{$}} -; SIMD128-NEXT: i32.and $push[[M5:[0-9]+]]=, $pop[[L5]], $pop[[M4]]{{$}} -; SIMD128-NEXT: i32.shl $push[[M6:[0-9]+]]=, $pop[[L4]], $pop[[M5]]{{$}} -; SIMD128-NEXT: i16x8.replace_lane $push[[R:[0-9]+]]=, $pop[[M7:[0-9]+]], 7, $pop[[M6]]{{$}} -; SIMD128-NEXT: return $pop[[R]]{{$}} define <8 x i16> @shl_vec_v8i16(<8 x i16> %v, <8 x i16> %x) { +; SIMD128-LABEL: shl_vec_v8i16: +; SIMD128: .functype shl_vec_v8i16 (v128, v128) -> (v128) +; SIMD128-NEXT: # %bb.0: +; SIMD128-NEXT: i16x8.extract_lane_u $push7=, $0, 0 +; SIMD128-NEXT: i16x8.extract_lane_u $push5=, $1, 0 +; SIMD128-NEXT: i32.const $push1=, 15 +; SIMD128-NEXT: i32.and $push6=, $pop5, $pop1 +; SIMD128-NEXT: i32.shl $push8=, $pop7, $pop6 +; SIMD128-NEXT: i16x8.splat $push9=, $pop8 +; SIMD128-NEXT: i16x8.extract_lane_u $push3=, $0, 1 +; SIMD128-NEXT: i16x8.extract_lane_u $push0=, $1, 1 +; SIMD128-NEXT: i32.const $push47=, 15 +; SIMD128-NEXT: i32.and $push2=, $pop0, $pop47 +; SIMD128-NEXT: i32.shl $push4=, $pop3, $pop2 +; SIMD128-NEXT: i16x8.replace_lane $push10=, $pop9, 1, $pop4 +; SIMD128-NEXT: i16x8.extract_lane_u $push13=, $0, 2 +; SIMD128-NEXT: i16x8.extract_lane_u $push11=, $1, 2 +; SIMD128-NEXT: i32.const $push46=, 15 +; SIMD128-NEXT: i32.and $push12=, $pop11, $pop46 +; SIMD128-NEXT: i32.shl $push14=, $pop13, $pop12 +; SIMD128-NEXT: i16x8.replace_lane $push15=, $pop10, 2, $pop14 +; SIMD128-NEXT: i16x8.extract_lane_u $push18=, $0, 3 +; SIMD128-NEXT: i16x8.extract_lane_u $push16=, $1, 3 +; SIMD128-NEXT: i32.const $push45=, 15 +; SIMD128-NEXT: i32.and $push17=, $pop16, $pop45 +; SIMD128-NEXT: i32.shl $push19=, $pop18, $pop17 +; SIMD128-NEXT: i16x8.replace_lane $push20=, $pop15, 3, $pop19 +; SIMD128-NEXT: i16x8.extract_lane_u $push23=, $0, 4 +; SIMD128-NEXT: i16x8.extract_lane_u $push21=, $1, 4 +; SIMD128-NEXT: i32.const $push44=, 15 +; SIMD128-NEXT: i32.and $push22=, $pop21, $pop44 +; SIMD128-NEXT: i32.shl $push24=, $pop23, $pop22 +; SIMD128-NEXT: i16x8.replace_lane $push25=, $pop20, 4, $pop24 +; SIMD128-NEXT: i16x8.extract_lane_u $push28=, $0, 5 +; SIMD128-NEXT: i16x8.extract_lane_u $push26=, $1, 5 +; SIMD128-NEXT: i32.const $push43=, 15 +; SIMD128-NEXT: i32.and $push27=, $pop26, $pop43 +; SIMD128-NEXT: i32.shl $push29=, $pop28, $pop27 +; SIMD128-NEXT: i16x8.replace_lane $push30=, $pop25, 5, $pop29 +; SIMD128-NEXT: i16x8.extract_lane_u $push33=, $0, 6 +; SIMD128-NEXT: i16x8.extract_lane_u $push31=, $1, 6 +; SIMD128-NEXT: i32.const $push42=, 15 +; SIMD128-NEXT: i32.and $push32=, $pop31, $pop42 +; SIMD128-NEXT: i32.shl $push34=, $pop33, $pop32 +; SIMD128-NEXT: i16x8.replace_lane $push35=, $pop30, 6, $pop34 +; SIMD128-NEXT: i16x8.extract_lane_u $push38=, $0, 7 +; SIMD128-NEXT: i16x8.extract_lane_u $push36=, $1, 7 +; SIMD128-NEXT: i32.const $push41=, 15 +; SIMD128-NEXT: i32.and $push37=, $pop36, $pop41 +; SIMD128-NEXT: i32.shl $push39=, $pop38, $pop37 +; SIMD128-NEXT: i16x8.replace_lane $push40=, $pop35, 7, $pop39 +; SIMD128-NEXT: return $pop40 +; +; SIMD128-FAST-LABEL: shl_vec_v8i16: +; SIMD128-FAST: .functype shl_vec_v8i16 (v128, v128) -> (v128) +; SIMD128-FAST-NEXT: # %bb.0: +; SIMD128-FAST-NEXT: i16x8.extract_lane_u $push8=, $0, 0 +; SIMD128-FAST-NEXT: i16x8.extract_lane_u $push6=, $1, 0 +; SIMD128-FAST-NEXT: i32.const $push2=, 15 +; SIMD128-FAST-NEXT: i32.and $push7=, $pop6, $pop2 +; SIMD128-FAST-NEXT: i32.shl $push9=, $pop8, $pop7 +; SIMD128-FAST-NEXT: i16x8.splat $push10=, $pop9 +; SIMD128-FAST-NEXT: i16x8.extract_lane_u $push4=, $0, 1 +; SIMD128-FAST-NEXT: i16x8.extract_lane_u $push1=, $1, 1 +; SIMD128-FAST-NEXT: i32.const $push47=, 15 +; SIMD128-FAST-NEXT: i32.and $push3=, $pop1, $pop47 +; SIMD128-FAST-NEXT: i32.shl $push5=, $pop4, $pop3 +; SIMD128-FAST-NEXT: i16x8.replace_lane $push11=, $pop10, 1, $pop5 +; SIMD128-FAST-NEXT: i16x8.extract_lane_u $push14=, $0, 2 +; SIMD128-FAST-NEXT: i16x8.extract_lane_u $push12=, $1, 2 +; SIMD128-FAST-NEXT: i32.const $push46=, 15 +; SIMD128-FAST-NEXT: i32.and $push13=, $pop12, $pop46 +; SIMD128-FAST-NEXT: i32.shl $push15=, $pop14, $pop13 +; SIMD128-FAST-NEXT: i16x8.replace_lane $push16=, $pop11, 2, $pop15 +; SIMD128-FAST-NEXT: i16x8.extract_lane_u $push19=, $0, 3 +; SIMD128-FAST-NEXT: i16x8.extract_lane_u $push17=, $1, 3 +; SIMD128-FAST-NEXT: i32.const $push45=, 15 +; SIMD128-FAST-NEXT: i32.and $push18=, $pop17, $pop45 +; SIMD128-FAST-NEXT: i32.shl $push20=, $pop19, $pop18 +; SIMD128-FAST-NEXT: i16x8.replace_lane $push21=, $pop16, 3, $pop20 +; SIMD128-FAST-NEXT: i16x8.extract_lane_u $push24=, $0, 4 +; SIMD128-FAST-NEXT: i16x8.extract_lane_u $push22=, $1, 4 +; SIMD128-FAST-NEXT: i32.const $push44=, 15 +; SIMD128-FAST-NEXT: i32.and $push23=, $pop22, $pop44 +; SIMD128-FAST-NEXT: i32.shl $push25=, $pop24, $pop23 +; SIMD128-FAST-NEXT: i16x8.replace_lane $push26=, $pop21, 4, $pop25 +; SIMD128-FAST-NEXT: i16x8.extract_lane_u $push29=, $0, 5 +; SIMD128-FAST-NEXT: i16x8.extract_lane_u $push27=, $1, 5 +; SIMD128-FAST-NEXT: i32.const $push43=, 15 +; SIMD128-FAST-NEXT: i32.and $push28=, $pop27, $pop43 +; SIMD128-FAST-NEXT: i32.shl $push30=, $pop29, $pop28 +; SIMD128-FAST-NEXT: i16x8.replace_lane $push31=, $pop26, 5, $pop30 +; SIMD128-FAST-NEXT: i16x8.extract_lane_u $push34=, $0, 6 +; SIMD128-FAST-NEXT: i16x8.extract_lane_u $push32=, $1, 6 +; SIMD128-FAST-NEXT: i32.const $push42=, 15 +; SIMD128-FAST-NEXT: i32.and $push33=, $pop32, $pop42 +; SIMD128-FAST-NEXT: i32.shl $push35=, $pop34, $pop33 +; SIMD128-FAST-NEXT: i16x8.replace_lane $push36=, $pop31, 6, $pop35 +; SIMD128-FAST-NEXT: i16x8.extract_lane_u $push39=, $0, 7 +; SIMD128-FAST-NEXT: i16x8.extract_lane_u $push37=, $1, 7 +; SIMD128-FAST-NEXT: i32.const $push41=, 15 +; SIMD128-FAST-NEXT: i32.and $push38=, $pop37, $pop41 +; SIMD128-FAST-NEXT: i32.shl $push40=, $pop39, $pop38 +; SIMD128-FAST-NEXT: i16x8.replace_lane $push0=, $pop36, 7, $pop40 +; SIMD128-FAST-NEXT: return $pop0 +; +; NO-SIMD128-LABEL: shl_vec_v8i16: +; NO-SIMD128: .functype shl_vec_v8i16 (i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32) -> () +; NO-SIMD128-NEXT: # %bb.0: +; NO-SIMD128-NEXT: i32.const $push0=, 65535 +; NO-SIMD128-NEXT: i32.and $push1=, $13, $pop0 +; NO-SIMD128-NEXT: i32.shl $push2=, $5, $pop1 +; NO-SIMD128-NEXT: i32.store16 8($0), $pop2 +; NO-SIMD128-NEXT: i32.const $push31=, 65535 +; NO-SIMD128-NEXT: i32.and $push3=, $11, $pop31 +; NO-SIMD128-NEXT: i32.shl $push4=, $3, $pop3 +; NO-SIMD128-NEXT: i32.store16 4($0), $pop4 +; NO-SIMD128-NEXT: i32.const $push30=, 65535 +; NO-SIMD128-NEXT: i32.and $push5=, $10, $pop30 +; NO-SIMD128-NEXT: i32.shl $push6=, $2, $pop5 +; NO-SIMD128-NEXT: i32.store16 2($0), $pop6 +; NO-SIMD128-NEXT: i32.const $push29=, 65535 +; NO-SIMD128-NEXT: i32.and $push7=, $9, $pop29 +; NO-SIMD128-NEXT: i32.shl $push8=, $1, $pop7 +; NO-SIMD128-NEXT: i32.store16 0($0), $pop8 +; NO-SIMD128-NEXT: i32.const $push11=, 14 +; NO-SIMD128-NEXT: i32.add $push12=, $0, $pop11 +; NO-SIMD128-NEXT: i32.const $push28=, 65535 +; NO-SIMD128-NEXT: i32.and $push9=, $16, $pop28 +; NO-SIMD128-NEXT: i32.shl $push10=, $8, $pop9 +; NO-SIMD128-NEXT: i32.store16 0($pop12), $pop10 +; NO-SIMD128-NEXT: i32.const $push15=, 12 +; NO-SIMD128-NEXT: i32.add $push16=, $0, $pop15 +; NO-SIMD128-NEXT: i32.const $push27=, 65535 +; NO-SIMD128-NEXT: i32.and $push13=, $15, $pop27 +; NO-SIMD128-NEXT: i32.shl $push14=, $7, $pop13 +; NO-SIMD128-NEXT: i32.store16 0($pop16), $pop14 +; NO-SIMD128-NEXT: i32.const $push19=, 10 +; NO-SIMD128-NEXT: i32.add $push20=, $0, $pop19 +; NO-SIMD128-NEXT: i32.const $push26=, 65535 +; NO-SIMD128-NEXT: i32.and $push17=, $14, $pop26 +; NO-SIMD128-NEXT: i32.shl $push18=, $6, $pop17 +; NO-SIMD128-NEXT: i32.store16 0($pop20), $pop18 +; NO-SIMD128-NEXT: i32.const $push23=, 6 +; NO-SIMD128-NEXT: i32.add $push24=, $0, $pop23 +; NO-SIMD128-NEXT: i32.const $push25=, 65535 +; NO-SIMD128-NEXT: i32.and $push21=, $12, $pop25 +; NO-SIMD128-NEXT: i32.shl $push22=, $4, $pop21 +; NO-SIMD128-NEXT: i32.store16 0($pop24), $pop22 +; NO-SIMD128-NEXT: return +; +; NO-SIMD128-FAST-LABEL: shl_vec_v8i16: +; NO-SIMD128-FAST: .functype shl_vec_v8i16 (i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32) -> () +; NO-SIMD128-FAST-NEXT: # %bb.0: +; NO-SIMD128-FAST-NEXT: i32.const $push0=, 65535 +; NO-SIMD128-FAST-NEXT: i32.and $push1=, $9, $pop0 +; NO-SIMD128-FAST-NEXT: i32.shl $push2=, $1, $pop1 +; NO-SIMD128-FAST-NEXT: i32.store16 0($0), $pop2 +; NO-SIMD128-FAST-NEXT: i32.const $push31=, 65535 +; NO-SIMD128-FAST-NEXT: i32.and $push3=, $10, $pop31 +; NO-SIMD128-FAST-NEXT: i32.shl $push4=, $2, $pop3 +; NO-SIMD128-FAST-NEXT: i32.store16 2($0), $pop4 +; NO-SIMD128-FAST-NEXT: i32.const $push30=, 65535 +; NO-SIMD128-FAST-NEXT: i32.and $push5=, $11, $pop30 +; NO-SIMD128-FAST-NEXT: i32.shl $push6=, $3, $pop5 +; NO-SIMD128-FAST-NEXT: i32.store16 4($0), $pop6 +; NO-SIMD128-FAST-NEXT: i32.const $push7=, 6 +; NO-SIMD128-FAST-NEXT: i32.add $push8=, $0, $pop7 +; NO-SIMD128-FAST-NEXT: i32.const $push29=, 65535 +; NO-SIMD128-FAST-NEXT: i32.and $push9=, $12, $pop29 +; NO-SIMD128-FAST-NEXT: i32.shl $push10=, $4, $pop9 +; NO-SIMD128-FAST-NEXT: i32.store16 0($pop8), $pop10 +; NO-SIMD128-FAST-NEXT: i32.const $push28=, 65535 +; NO-SIMD128-FAST-NEXT: i32.and $push11=, $13, $pop28 +; NO-SIMD128-FAST-NEXT: i32.shl $push12=, $5, $pop11 +; NO-SIMD128-FAST-NEXT: i32.store16 8($0), $pop12 +; NO-SIMD128-FAST-NEXT: i32.const $push13=, 10 +; NO-SIMD128-FAST-NEXT: i32.add $push14=, $0, $pop13 +; NO-SIMD128-FAST-NEXT: i32.const $push27=, 65535 +; NO-SIMD128-FAST-NEXT: i32.and $push15=, $14, $pop27 +; NO-SIMD128-FAST-NEXT: i32.shl $push16=, $6, $pop15 +; NO-SIMD128-FAST-NEXT: i32.store16 0($pop14), $pop16 +; NO-SIMD128-FAST-NEXT: i32.const $push17=, 12 +; NO-SIMD128-FAST-NEXT: i32.add $push18=, $0, $pop17 +; NO-SIMD128-FAST-NEXT: i32.const $push26=, 65535 +; NO-SIMD128-FAST-NEXT: i32.and $push19=, $15, $pop26 +; NO-SIMD128-FAST-NEXT: i32.shl $push20=, $7, $pop19 +; NO-SIMD128-FAST-NEXT: i32.store16 0($pop18), $pop20 +; NO-SIMD128-FAST-NEXT: i32.const $push21=, 14 +; NO-SIMD128-FAST-NEXT: i32.add $push22=, $0, $pop21 +; NO-SIMD128-FAST-NEXT: i32.const $push25=, 65535 +; NO-SIMD128-FAST-NEXT: i32.and $push23=, $16, $pop25 +; NO-SIMD128-FAST-NEXT: i32.shl $push24=, $8, $pop23 +; NO-SIMD128-FAST-NEXT: i32.store16 0($pop22), $pop24 +; NO-SIMD128-FAST-NEXT: return %a = shl <8 x i16> %v, %x ret <8 x i16> %a } -; CHECK-LABEL: shr_s_v8i16: -; NO-SIMD128-NOT: i16x8 -; SIMD128-NEXT: .functype shr_s_v8i16 (v128, i32) -> (v128){{$}} -; SIMD128-NEXT: i16x8.shr_s $push[[R:[0-9]+]]=, $0, $1{{$}} -; SIMD128-NEXT: return $pop[[R]]{{$}} define <8 x i16> @shr_s_v8i16(<8 x i16> %v, i16 %x) { +; SIMD128-LABEL: shr_s_v8i16: +; SIMD128: .functype shr_s_v8i16 (v128, i32) -> (v128) +; SIMD128-NEXT: # %bb.0: +; SIMD128-NEXT: i16x8.shr_s $push0=, $0, $1 +; SIMD128-NEXT: return $pop0 +; +; SIMD128-FAST-LABEL: shr_s_v8i16: +; SIMD128-FAST: .functype shr_s_v8i16 (v128, i32) -> (v128) +; SIMD128-FAST-NEXT: # %bb.0: +; SIMD128-FAST-NEXT: i16x8.shr_s $push0=, $0, $1 +; SIMD128-FAST-NEXT: return $pop0 +; +; NO-SIMD128-LABEL: shr_s_v8i16: +; NO-SIMD128: .functype shr_s_v8i16 (i32, i32, i32, i32, i32, i32, i32, i32, i32, i32) -> () +; NO-SIMD128-NEXT: # %bb.0: +; NO-SIMD128-NEXT: i32.extend16_s $push1=, $5 +; NO-SIMD128-NEXT: i32.const $push0=, 65535 +; NO-SIMD128-NEXT: i32.and $push26=, $9, $pop0 +; NO-SIMD128-NEXT: local.tee $push25=, $9=, $pop26 +; NO-SIMD128-NEXT: i32.shr_s $push2=, $pop1, $pop25 +; NO-SIMD128-NEXT: i32.store16 8($0), $pop2 +; NO-SIMD128-NEXT: i32.extend16_s $push3=, $3 +; NO-SIMD128-NEXT: i32.shr_s $push4=, $pop3, $9 +; NO-SIMD128-NEXT: i32.store16 4($0), $pop4 +; NO-SIMD128-NEXT: i32.extend16_s $push5=, $2 +; NO-SIMD128-NEXT: i32.shr_s $push6=, $pop5, $9 +; NO-SIMD128-NEXT: i32.store16 2($0), $pop6 +; NO-SIMD128-NEXT: i32.extend16_s $push7=, $1 +; NO-SIMD128-NEXT: i32.shr_s $push8=, $pop7, $9 +; NO-SIMD128-NEXT: i32.store16 0($0), $pop8 +; NO-SIMD128-NEXT: i32.const $push11=, 14 +; NO-SIMD128-NEXT: i32.add $push12=, $0, $pop11 +; NO-SIMD128-NEXT: i32.extend16_s $push9=, $8 +; NO-SIMD128-NEXT: i32.shr_s $push10=, $pop9, $9 +; NO-SIMD128-NEXT: i32.store16 0($pop12), $pop10 +; NO-SIMD128-NEXT: i32.const $push15=, 12 +; NO-SIMD128-NEXT: i32.add $push16=, $0, $pop15 +; NO-SIMD128-NEXT: i32.extend16_s $push13=, $7 +; NO-SIMD128-NEXT: i32.shr_s $push14=, $pop13, $9 +; NO-SIMD128-NEXT: i32.store16 0($pop16), $pop14 +; NO-SIMD128-NEXT: i32.const $push19=, 10 +; NO-SIMD128-NEXT: i32.add $push20=, $0, $pop19 +; NO-SIMD128-NEXT: i32.extend16_s $push17=, $6 +; NO-SIMD128-NEXT: i32.shr_s $push18=, $pop17, $9 +; NO-SIMD128-NEXT: i32.store16 0($pop20), $pop18 +; NO-SIMD128-NEXT: i32.const $push23=, 6 +; NO-SIMD128-NEXT: i32.add $push24=, $0, $pop23 +; NO-SIMD128-NEXT: i32.extend16_s $push21=, $4 +; NO-SIMD128-NEXT: i32.shr_s $push22=, $pop21, $9 +; NO-SIMD128-NEXT: i32.store16 0($pop24), $pop22 +; NO-SIMD128-NEXT: return +; +; NO-SIMD128-FAST-LABEL: shr_s_v8i16: +; NO-SIMD128-FAST: .functype shr_s_v8i16 (i32, i32, i32, i32, i32, i32, i32, i32, i32, i32) -> () +; NO-SIMD128-FAST-NEXT: # %bb.0: +; NO-SIMD128-FAST-NEXT: i32.extend16_s $push1=, $1 +; NO-SIMD128-FAST-NEXT: i32.const $push0=, 65535 +; NO-SIMD128-FAST-NEXT: i32.and $push26=, $9, $pop0 +; NO-SIMD128-FAST-NEXT: local.tee $push25=, $1=, $pop26 +; NO-SIMD128-FAST-NEXT: i32.shr_s $push2=, $pop1, $pop25 +; NO-SIMD128-FAST-NEXT: i32.store16 0($0), $pop2 +; NO-SIMD128-FAST-NEXT: i32.extend16_s $push3=, $2 +; NO-SIMD128-FAST-NEXT: i32.shr_s $push4=, $pop3, $1 +; NO-SIMD128-FAST-NEXT: i32.store16 2($0), $pop4 +; NO-SIMD128-FAST-NEXT: i32.extend16_s $push5=, $3 +; NO-SIMD128-FAST-NEXT: i32.shr_s $push6=, $pop5, $1 +; NO-SIMD128-FAST-NEXT: i32.store16 4($0), $pop6 +; NO-SIMD128-FAST-NEXT: i32.const $push7=, 6 +; NO-SIMD128-FAST-NEXT: i32.add $push8=, $0, $pop7 +; NO-SIMD128-FAST-NEXT: i32.extend16_s $push9=, $4 +; NO-SIMD128-FAST-NEXT: i32.shr_s $push10=, $pop9, $1 +; NO-SIMD128-FAST-NEXT: i32.store16 0($pop8), $pop10 +; NO-SIMD128-FAST-NEXT: i32.extend16_s $push11=, $5 +; NO-SIMD128-FAST-NEXT: i32.shr_s $push12=, $pop11, $1 +; NO-SIMD128-FAST-NEXT: i32.store16 8($0), $pop12 +; NO-SIMD128-FAST-NEXT: i32.const $push13=, 10 +; NO-SIMD128-FAST-NEXT: i32.add $push14=, $0, $pop13 +; NO-SIMD128-FAST-NEXT: i32.extend16_s $push15=, $6 +; NO-SIMD128-FAST-NEXT: i32.shr_s $push16=, $pop15, $1 +; NO-SIMD128-FAST-NEXT: i32.store16 0($pop14), $pop16 +; NO-SIMD128-FAST-NEXT: i32.const $push17=, 12 +; NO-SIMD128-FAST-NEXT: i32.add $push18=, $0, $pop17 +; NO-SIMD128-FAST-NEXT: i32.extend16_s $push19=, $7 +; NO-SIMD128-FAST-NEXT: i32.shr_s $push20=, $pop19, $1 +; NO-SIMD128-FAST-NEXT: i32.store16 0($pop18), $pop20 +; NO-SIMD128-FAST-NEXT: i32.const $push21=, 14 +; NO-SIMD128-FAST-NEXT: i32.add $push22=, $0, $pop21 +; NO-SIMD128-FAST-NEXT: i32.extend16_s $push23=, $8 +; NO-SIMD128-FAST-NEXT: i32.shr_s $push24=, $pop23, $1 +; NO-SIMD128-FAST-NEXT: i32.store16 0($pop22), $pop24 +; NO-SIMD128-FAST-NEXT: return %t = insertelement <8 x i16> undef, i16 %x, i32 0 %s = shufflevector <8 x i16> %t, <8 x i16> undef, <8 x i32> @@ -559,34 +8054,330 @@ define <8 x i16> @shr_s_v8i16(<8 x i16> %v, i16 %x) { ret <8 x i16> %a } -; CHECK-LABEL: shr_s_vec_v8i16: -; NO-SIMD128-NOT: i16x8 -; SIMD128-NEXT: .functype shr_s_vec_v8i16 (v128, v128) -> (v128){{$}} -; SIMD128-NEXT: i16x8.extract_lane_s $push[[L0:[0-9]+]]=, $0, 0{{$}} -; SIMD128-NEXT: i16x8.extract_lane_u $push[[L1:[0-9]+]]=, $1, 0{{$}} -; SIMD128-NEXT: i32.const $push[[M0:[0-9]+]]=, 15{{$}} -; SIMD128-NEXT: i32.and $push[[M1:[0-9]+]]=, $pop[[L1]], $pop[[M0]]{{$}} -; SIMD128-NEXT: i32.shr_s $push[[M2:[0-9]+]]=, $pop[[L0]], $pop[[M1]]{{$}} -; SIMD128-NEXT: i16x8.splat $push[[M3:[0-9]+]]=, $pop[[M2]]{{$}} -; Skip 6 lanes -; SIMD128: i16x8.extract_lane_s $push[[L4:[0-9]+]]=, $0, 7{{$}} -; SIMD128-NEXT: i16x8.extract_lane_u $push[[L5:[0-9]+]]=, $1, 7{{$}} -; SIMD128-NEXT: i32.const $push[[M4:[0-9]+]]=, 15{{$}} -; SIMD128-NEXT: i32.and $push[[M5:[0-9]+]]=, $pop[[L5]], $pop[[M4]]{{$}} -; SIMD128-NEXT: i32.shr_s $push[[M6:[0-9]+]]=, $pop[[L4]], $pop[[M5]]{{$}} -; SIMD128-NEXT: i16x8.replace_lane $push[[R:[0-9]+]]=, $pop[[M7:[0-9]+]], 7, $pop[[M6]]{{$}} -; SIMD128-NEXT: return $pop[[R]]{{$}} define <8 x i16> @shr_s_vec_v8i16(<8 x i16> %v, <8 x i16> %x) { +; SIMD128-LABEL: shr_s_vec_v8i16: +; SIMD128: .functype shr_s_vec_v8i16 (v128, v128) -> (v128) +; SIMD128-NEXT: # %bb.0: +; SIMD128-NEXT: i16x8.extract_lane_s $push7=, $0, 0 +; SIMD128-NEXT: i16x8.extract_lane_u $push5=, $1, 0 +; SIMD128-NEXT: i32.const $push1=, 15 +; SIMD128-NEXT: i32.and $push6=, $pop5, $pop1 +; SIMD128-NEXT: i32.shr_s $push8=, $pop7, $pop6 +; SIMD128-NEXT: i16x8.splat $push9=, $pop8 +; SIMD128-NEXT: i16x8.extract_lane_s $push3=, $0, 1 +; SIMD128-NEXT: i16x8.extract_lane_u $push0=, $1, 1 +; SIMD128-NEXT: i32.const $push47=, 15 +; SIMD128-NEXT: i32.and $push2=, $pop0, $pop47 +; SIMD128-NEXT: i32.shr_s $push4=, $pop3, $pop2 +; SIMD128-NEXT: i16x8.replace_lane $push10=, $pop9, 1, $pop4 +; SIMD128-NEXT: i16x8.extract_lane_s $push13=, $0, 2 +; SIMD128-NEXT: i16x8.extract_lane_u $push11=, $1, 2 +; SIMD128-NEXT: i32.const $push46=, 15 +; SIMD128-NEXT: i32.and $push12=, $pop11, $pop46 +; SIMD128-NEXT: i32.shr_s $push14=, $pop13, $pop12 +; SIMD128-NEXT: i16x8.replace_lane $push15=, $pop10, 2, $pop14 +; SIMD128-NEXT: i16x8.extract_lane_s $push18=, $0, 3 +; SIMD128-NEXT: i16x8.extract_lane_u $push16=, $1, 3 +; SIMD128-NEXT: i32.const $push45=, 15 +; SIMD128-NEXT: i32.and $push17=, $pop16, $pop45 +; SIMD128-NEXT: i32.shr_s $push19=, $pop18, $pop17 +; SIMD128-NEXT: i16x8.replace_lane $push20=, $pop15, 3, $pop19 +; SIMD128-NEXT: i16x8.extract_lane_s $push23=, $0, 4 +; SIMD128-NEXT: i16x8.extract_lane_u $push21=, $1, 4 +; SIMD128-NEXT: i32.const $push44=, 15 +; SIMD128-NEXT: i32.and $push22=, $pop21, $pop44 +; SIMD128-NEXT: i32.shr_s $push24=, $pop23, $pop22 +; SIMD128-NEXT: i16x8.replace_lane $push25=, $pop20, 4, $pop24 +; SIMD128-NEXT: i16x8.extract_lane_s $push28=, $0, 5 +; SIMD128-NEXT: i16x8.extract_lane_u $push26=, $1, 5 +; SIMD128-NEXT: i32.const $push43=, 15 +; SIMD128-NEXT: i32.and $push27=, $pop26, $pop43 +; SIMD128-NEXT: i32.shr_s $push29=, $pop28, $pop27 +; SIMD128-NEXT: i16x8.replace_lane $push30=, $pop25, 5, $pop29 +; SIMD128-NEXT: i16x8.extract_lane_s $push33=, $0, 6 +; SIMD128-NEXT: i16x8.extract_lane_u $push31=, $1, 6 +; SIMD128-NEXT: i32.const $push42=, 15 +; SIMD128-NEXT: i32.and $push32=, $pop31, $pop42 +; SIMD128-NEXT: i32.shr_s $push34=, $pop33, $pop32 +; SIMD128-NEXT: i16x8.replace_lane $push35=, $pop30, 6, $pop34 +; SIMD128-NEXT: i16x8.extract_lane_s $push38=, $0, 7 +; SIMD128-NEXT: i16x8.extract_lane_u $push36=, $1, 7 +; SIMD128-NEXT: i32.const $push41=, 15 +; SIMD128-NEXT: i32.and $push37=, $pop36, $pop41 +; SIMD128-NEXT: i32.shr_s $push39=, $pop38, $pop37 +; SIMD128-NEXT: i16x8.replace_lane $push40=, $pop35, 7, $pop39 +; SIMD128-NEXT: return $pop40 +; +; SIMD128-FAST-LABEL: shr_s_vec_v8i16: +; SIMD128-FAST: .functype shr_s_vec_v8i16 (v128, v128) -> (v128) +; SIMD128-FAST-NEXT: # %bb.0: +; SIMD128-FAST-NEXT: i16x8.extract_lane_s $push8=, $0, 0 +; SIMD128-FAST-NEXT: i16x8.extract_lane_u $push6=, $1, 0 +; SIMD128-FAST-NEXT: i32.const $push2=, 15 +; SIMD128-FAST-NEXT: i32.and $push7=, $pop6, $pop2 +; SIMD128-FAST-NEXT: i32.shr_s $push9=, $pop8, $pop7 +; SIMD128-FAST-NEXT: i16x8.splat $push10=, $pop9 +; SIMD128-FAST-NEXT: i16x8.extract_lane_s $push4=, $0, 1 +; SIMD128-FAST-NEXT: i16x8.extract_lane_u $push1=, $1, 1 +; SIMD128-FAST-NEXT: i32.const $push47=, 15 +; SIMD128-FAST-NEXT: i32.and $push3=, $pop1, $pop47 +; SIMD128-FAST-NEXT: i32.shr_s $push5=, $pop4, $pop3 +; SIMD128-FAST-NEXT: i16x8.replace_lane $push11=, $pop10, 1, $pop5 +; SIMD128-FAST-NEXT: i16x8.extract_lane_s $push14=, $0, 2 +; SIMD128-FAST-NEXT: i16x8.extract_lane_u $push12=, $1, 2 +; SIMD128-FAST-NEXT: i32.const $push46=, 15 +; SIMD128-FAST-NEXT: i32.and $push13=, $pop12, $pop46 +; SIMD128-FAST-NEXT: i32.shr_s $push15=, $pop14, $pop13 +; SIMD128-FAST-NEXT: i16x8.replace_lane $push16=, $pop11, 2, $pop15 +; SIMD128-FAST-NEXT: i16x8.extract_lane_s $push19=, $0, 3 +; SIMD128-FAST-NEXT: i16x8.extract_lane_u $push17=, $1, 3 +; SIMD128-FAST-NEXT: i32.const $push45=, 15 +; SIMD128-FAST-NEXT: i32.and $push18=, $pop17, $pop45 +; SIMD128-FAST-NEXT: i32.shr_s $push20=, $pop19, $pop18 +; SIMD128-FAST-NEXT: i16x8.replace_lane $push21=, $pop16, 3, $pop20 +; SIMD128-FAST-NEXT: i16x8.extract_lane_s $push24=, $0, 4 +; SIMD128-FAST-NEXT: i16x8.extract_lane_u $push22=, $1, 4 +; SIMD128-FAST-NEXT: i32.const $push44=, 15 +; SIMD128-FAST-NEXT: i32.and $push23=, $pop22, $pop44 +; SIMD128-FAST-NEXT: i32.shr_s $push25=, $pop24, $pop23 +; SIMD128-FAST-NEXT: i16x8.replace_lane $push26=, $pop21, 4, $pop25 +; SIMD128-FAST-NEXT: i16x8.extract_lane_s $push29=, $0, 5 +; SIMD128-FAST-NEXT: i16x8.extract_lane_u $push27=, $1, 5 +; SIMD128-FAST-NEXT: i32.const $push43=, 15 +; SIMD128-FAST-NEXT: i32.and $push28=, $pop27, $pop43 +; SIMD128-FAST-NEXT: i32.shr_s $push30=, $pop29, $pop28 +; SIMD128-FAST-NEXT: i16x8.replace_lane $push31=, $pop26, 5, $pop30 +; SIMD128-FAST-NEXT: i16x8.extract_lane_s $push34=, $0, 6 +; SIMD128-FAST-NEXT: i16x8.extract_lane_u $push32=, $1, 6 +; SIMD128-FAST-NEXT: i32.const $push42=, 15 +; SIMD128-FAST-NEXT: i32.and $push33=, $pop32, $pop42 +; SIMD128-FAST-NEXT: i32.shr_s $push35=, $pop34, $pop33 +; SIMD128-FAST-NEXT: i16x8.replace_lane $push36=, $pop31, 6, $pop35 +; SIMD128-FAST-NEXT: i16x8.extract_lane_s $push39=, $0, 7 +; SIMD128-FAST-NEXT: i16x8.extract_lane_u $push37=, $1, 7 +; SIMD128-FAST-NEXT: i32.const $push41=, 15 +; SIMD128-FAST-NEXT: i32.and $push38=, $pop37, $pop41 +; SIMD128-FAST-NEXT: i32.shr_s $push40=, $pop39, $pop38 +; SIMD128-FAST-NEXT: i16x8.replace_lane $push0=, $pop36, 7, $pop40 +; SIMD128-FAST-NEXT: return $pop0 +; +; NO-SIMD128-LABEL: shr_s_vec_v8i16: +; NO-SIMD128: .functype shr_s_vec_v8i16 (i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32) -> () +; NO-SIMD128-NEXT: # %bb.0: +; NO-SIMD128-NEXT: i32.extend16_s $push2=, $5 +; NO-SIMD128-NEXT: i32.const $push0=, 65535 +; NO-SIMD128-NEXT: i32.and $push1=, $13, $pop0 +; NO-SIMD128-NEXT: i32.shr_s $push3=, $pop2, $pop1 +; NO-SIMD128-NEXT: i32.store16 8($0), $pop3 +; NO-SIMD128-NEXT: i32.extend16_s $push5=, $3 +; NO-SIMD128-NEXT: i32.const $push39=, 65535 +; NO-SIMD128-NEXT: i32.and $push4=, $11, $pop39 +; NO-SIMD128-NEXT: i32.shr_s $push6=, $pop5, $pop4 +; NO-SIMD128-NEXT: i32.store16 4($0), $pop6 +; NO-SIMD128-NEXT: i32.extend16_s $push8=, $2 +; NO-SIMD128-NEXT: i32.const $push38=, 65535 +; NO-SIMD128-NEXT: i32.and $push7=, $10, $pop38 +; NO-SIMD128-NEXT: i32.shr_s $push9=, $pop8, $pop7 +; NO-SIMD128-NEXT: i32.store16 2($0), $pop9 +; NO-SIMD128-NEXT: i32.extend16_s $push11=, $1 +; NO-SIMD128-NEXT: i32.const $push37=, 65535 +; NO-SIMD128-NEXT: i32.and $push10=, $9, $pop37 +; NO-SIMD128-NEXT: i32.shr_s $push12=, $pop11, $pop10 +; NO-SIMD128-NEXT: i32.store16 0($0), $pop12 +; NO-SIMD128-NEXT: i32.const $push16=, 14 +; NO-SIMD128-NEXT: i32.add $push17=, $0, $pop16 +; NO-SIMD128-NEXT: i32.extend16_s $push14=, $8 +; NO-SIMD128-NEXT: i32.const $push36=, 65535 +; NO-SIMD128-NEXT: i32.and $push13=, $16, $pop36 +; NO-SIMD128-NEXT: i32.shr_s $push15=, $pop14, $pop13 +; NO-SIMD128-NEXT: i32.store16 0($pop17), $pop15 +; NO-SIMD128-NEXT: i32.const $push21=, 12 +; NO-SIMD128-NEXT: i32.add $push22=, $0, $pop21 +; NO-SIMD128-NEXT: i32.extend16_s $push19=, $7 +; NO-SIMD128-NEXT: i32.const $push35=, 65535 +; NO-SIMD128-NEXT: i32.and $push18=, $15, $pop35 +; NO-SIMD128-NEXT: i32.shr_s $push20=, $pop19, $pop18 +; NO-SIMD128-NEXT: i32.store16 0($pop22), $pop20 +; NO-SIMD128-NEXT: i32.const $push26=, 10 +; NO-SIMD128-NEXT: i32.add $push27=, $0, $pop26 +; NO-SIMD128-NEXT: i32.extend16_s $push24=, $6 +; NO-SIMD128-NEXT: i32.const $push34=, 65535 +; NO-SIMD128-NEXT: i32.and $push23=, $14, $pop34 +; NO-SIMD128-NEXT: i32.shr_s $push25=, $pop24, $pop23 +; NO-SIMD128-NEXT: i32.store16 0($pop27), $pop25 +; NO-SIMD128-NEXT: i32.const $push31=, 6 +; NO-SIMD128-NEXT: i32.add $push32=, $0, $pop31 +; NO-SIMD128-NEXT: i32.extend16_s $push29=, $4 +; NO-SIMD128-NEXT: i32.const $push33=, 65535 +; NO-SIMD128-NEXT: i32.and $push28=, $12, $pop33 +; NO-SIMD128-NEXT: i32.shr_s $push30=, $pop29, $pop28 +; NO-SIMD128-NEXT: i32.store16 0($pop32), $pop30 +; NO-SIMD128-NEXT: return +; +; NO-SIMD128-FAST-LABEL: shr_s_vec_v8i16: +; NO-SIMD128-FAST: .functype shr_s_vec_v8i16 (i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32) -> () +; NO-SIMD128-FAST-NEXT: # %bb.0: +; NO-SIMD128-FAST-NEXT: i32.extend16_s $push2=, $1 +; NO-SIMD128-FAST-NEXT: i32.const $push0=, 65535 +; NO-SIMD128-FAST-NEXT: i32.and $push1=, $9, $pop0 +; NO-SIMD128-FAST-NEXT: i32.shr_s $push3=, $pop2, $pop1 +; NO-SIMD128-FAST-NEXT: i32.store16 0($0), $pop3 +; NO-SIMD128-FAST-NEXT: i32.extend16_s $push5=, $2 +; NO-SIMD128-FAST-NEXT: i32.const $push39=, 65535 +; NO-SIMD128-FAST-NEXT: i32.and $push4=, $10, $pop39 +; NO-SIMD128-FAST-NEXT: i32.shr_s $push6=, $pop5, $pop4 +; NO-SIMD128-FAST-NEXT: i32.store16 2($0), $pop6 +; NO-SIMD128-FAST-NEXT: i32.extend16_s $push8=, $3 +; NO-SIMD128-FAST-NEXT: i32.const $push38=, 65535 +; NO-SIMD128-FAST-NEXT: i32.and $push7=, $11, $pop38 +; NO-SIMD128-FAST-NEXT: i32.shr_s $push9=, $pop8, $pop7 +; NO-SIMD128-FAST-NEXT: i32.store16 4($0), $pop9 +; NO-SIMD128-FAST-NEXT: i32.const $push10=, 6 +; NO-SIMD128-FAST-NEXT: i32.add $push11=, $0, $pop10 +; NO-SIMD128-FAST-NEXT: i32.extend16_s $push13=, $4 +; NO-SIMD128-FAST-NEXT: i32.const $push37=, 65535 +; NO-SIMD128-FAST-NEXT: i32.and $push12=, $12, $pop37 +; NO-SIMD128-FAST-NEXT: i32.shr_s $push14=, $pop13, $pop12 +; NO-SIMD128-FAST-NEXT: i32.store16 0($pop11), $pop14 +; NO-SIMD128-FAST-NEXT: i32.extend16_s $push16=, $5 +; NO-SIMD128-FAST-NEXT: i32.const $push36=, 65535 +; NO-SIMD128-FAST-NEXT: i32.and $push15=, $13, $pop36 +; NO-SIMD128-FAST-NEXT: i32.shr_s $push17=, $pop16, $pop15 +; NO-SIMD128-FAST-NEXT: i32.store16 8($0), $pop17 +; NO-SIMD128-FAST-NEXT: i32.const $push18=, 10 +; NO-SIMD128-FAST-NEXT: i32.add $push19=, $0, $pop18 +; NO-SIMD128-FAST-NEXT: i32.extend16_s $push21=, $6 +; NO-SIMD128-FAST-NEXT: i32.const $push35=, 65535 +; NO-SIMD128-FAST-NEXT: i32.and $push20=, $14, $pop35 +; NO-SIMD128-FAST-NEXT: i32.shr_s $push22=, $pop21, $pop20 +; NO-SIMD128-FAST-NEXT: i32.store16 0($pop19), $pop22 +; NO-SIMD128-FAST-NEXT: i32.const $push23=, 12 +; NO-SIMD128-FAST-NEXT: i32.add $push24=, $0, $pop23 +; NO-SIMD128-FAST-NEXT: i32.extend16_s $push26=, $7 +; NO-SIMD128-FAST-NEXT: i32.const $push34=, 65535 +; NO-SIMD128-FAST-NEXT: i32.and $push25=, $15, $pop34 +; NO-SIMD128-FAST-NEXT: i32.shr_s $push27=, $pop26, $pop25 +; NO-SIMD128-FAST-NEXT: i32.store16 0($pop24), $pop27 +; NO-SIMD128-FAST-NEXT: i32.const $push28=, 14 +; NO-SIMD128-FAST-NEXT: i32.add $push29=, $0, $pop28 +; NO-SIMD128-FAST-NEXT: i32.extend16_s $push31=, $8 +; NO-SIMD128-FAST-NEXT: i32.const $push33=, 65535 +; NO-SIMD128-FAST-NEXT: i32.and $push30=, $16, $pop33 +; NO-SIMD128-FAST-NEXT: i32.shr_s $push32=, $pop31, $pop30 +; NO-SIMD128-FAST-NEXT: i32.store16 0($pop29), $pop32 +; NO-SIMD128-FAST-NEXT: return %a = ashr <8 x i16> %v, %x ret <8 x i16> %a } -; CHECK-LABEL: shr_u_v8i16: -; NO-SIMD128-NOT: i16x8 -; SIMD128-NEXT: .functype shr_u_v8i16 (v128, i32) -> (v128){{$}} -; SIMD128-NEXT: i16x8.shr_u $push[[R:[0-9]+]]=, $0, $1{{$}} -; SIMD128-NEXT: return $pop[[R]]{{$}} define <8 x i16> @shr_u_v8i16(<8 x i16> %v, i16 %x) { +; SIMD128-LABEL: shr_u_v8i16: +; SIMD128: .functype shr_u_v8i16 (v128, i32) -> (v128) +; SIMD128-NEXT: # %bb.0: +; SIMD128-NEXT: i16x8.shr_u $push0=, $0, $1 +; SIMD128-NEXT: return $pop0 +; +; SIMD128-FAST-LABEL: shr_u_v8i16: +; SIMD128-FAST: .functype shr_u_v8i16 (v128, i32) -> (v128) +; SIMD128-FAST-NEXT: # %bb.0: +; SIMD128-FAST-NEXT: i16x8.shr_u $push0=, $0, $1 +; SIMD128-FAST-NEXT: return $pop0 +; +; NO-SIMD128-LABEL: shr_u_v8i16: +; NO-SIMD128: .functype shr_u_v8i16 (i32, i32, i32, i32, i32, i32, i32, i32, i32, i32) -> () +; NO-SIMD128-NEXT: # %bb.0: +; NO-SIMD128-NEXT: i32.const $push0=, 65535 +; NO-SIMD128-NEXT: i32.and $push1=, $5, $pop0 +; NO-SIMD128-NEXT: i32.const $push34=, 65535 +; NO-SIMD128-NEXT: i32.and $push33=, $9, $pop34 +; NO-SIMD128-NEXT: local.tee $push32=, $9=, $pop33 +; NO-SIMD128-NEXT: i32.shr_u $push2=, $pop1, $pop32 +; NO-SIMD128-NEXT: i32.store16 8($0), $pop2 +; NO-SIMD128-NEXT: i32.const $push31=, 65535 +; NO-SIMD128-NEXT: i32.and $push3=, $3, $pop31 +; NO-SIMD128-NEXT: i32.shr_u $push4=, $pop3, $9 +; NO-SIMD128-NEXT: i32.store16 4($0), $pop4 +; NO-SIMD128-NEXT: i32.const $push30=, 65535 +; NO-SIMD128-NEXT: i32.and $push5=, $2, $pop30 +; NO-SIMD128-NEXT: i32.shr_u $push6=, $pop5, $9 +; NO-SIMD128-NEXT: i32.store16 2($0), $pop6 +; NO-SIMD128-NEXT: i32.const $push29=, 65535 +; NO-SIMD128-NEXT: i32.and $push7=, $1, $pop29 +; NO-SIMD128-NEXT: i32.shr_u $push8=, $pop7, $9 +; NO-SIMD128-NEXT: i32.store16 0($0), $pop8 +; NO-SIMD128-NEXT: i32.const $push11=, 14 +; NO-SIMD128-NEXT: i32.add $push12=, $0, $pop11 +; NO-SIMD128-NEXT: i32.const $push28=, 65535 +; NO-SIMD128-NEXT: i32.and $push9=, $8, $pop28 +; NO-SIMD128-NEXT: i32.shr_u $push10=, $pop9, $9 +; NO-SIMD128-NEXT: i32.store16 0($pop12), $pop10 +; NO-SIMD128-NEXT: i32.const $push15=, 12 +; NO-SIMD128-NEXT: i32.add $push16=, $0, $pop15 +; NO-SIMD128-NEXT: i32.const $push27=, 65535 +; NO-SIMD128-NEXT: i32.and $push13=, $7, $pop27 +; NO-SIMD128-NEXT: i32.shr_u $push14=, $pop13, $9 +; NO-SIMD128-NEXT: i32.store16 0($pop16), $pop14 +; NO-SIMD128-NEXT: i32.const $push19=, 10 +; NO-SIMD128-NEXT: i32.add $push20=, $0, $pop19 +; NO-SIMD128-NEXT: i32.const $push26=, 65535 +; NO-SIMD128-NEXT: i32.and $push17=, $6, $pop26 +; NO-SIMD128-NEXT: i32.shr_u $push18=, $pop17, $9 +; NO-SIMD128-NEXT: i32.store16 0($pop20), $pop18 +; NO-SIMD128-NEXT: i32.const $push23=, 6 +; NO-SIMD128-NEXT: i32.add $push24=, $0, $pop23 +; NO-SIMD128-NEXT: i32.const $push25=, 65535 +; NO-SIMD128-NEXT: i32.and $push21=, $4, $pop25 +; NO-SIMD128-NEXT: i32.shr_u $push22=, $pop21, $9 +; NO-SIMD128-NEXT: i32.store16 0($pop24), $pop22 +; NO-SIMD128-NEXT: return +; +; NO-SIMD128-FAST-LABEL: shr_u_v8i16: +; NO-SIMD128-FAST: .functype shr_u_v8i16 (i32, i32, i32, i32, i32, i32, i32, i32, i32, i32) -> () +; NO-SIMD128-FAST-NEXT: # %bb.0: +; NO-SIMD128-FAST-NEXT: i32.const $push0=, 65535 +; NO-SIMD128-FAST-NEXT: i32.and $push1=, $1, $pop0 +; NO-SIMD128-FAST-NEXT: i32.const $push34=, 65535 +; NO-SIMD128-FAST-NEXT: i32.and $push33=, $9, $pop34 +; NO-SIMD128-FAST-NEXT: local.tee $push32=, $1=, $pop33 +; NO-SIMD128-FAST-NEXT: i32.shr_u $push2=, $pop1, $pop32 +; NO-SIMD128-FAST-NEXT: i32.store16 0($0), $pop2 +; NO-SIMD128-FAST-NEXT: i32.const $push31=, 65535 +; NO-SIMD128-FAST-NEXT: i32.and $push3=, $2, $pop31 +; NO-SIMD128-FAST-NEXT: i32.shr_u $push4=, $pop3, $1 +; NO-SIMD128-FAST-NEXT: i32.store16 2($0), $pop4 +; NO-SIMD128-FAST-NEXT: i32.const $push30=, 65535 +; NO-SIMD128-FAST-NEXT: i32.and $push5=, $3, $pop30 +; NO-SIMD128-FAST-NEXT: i32.shr_u $push6=, $pop5, $1 +; NO-SIMD128-FAST-NEXT: i32.store16 4($0), $pop6 +; NO-SIMD128-FAST-NEXT: i32.const $push9=, 6 +; NO-SIMD128-FAST-NEXT: i32.add $push10=, $0, $pop9 +; NO-SIMD128-FAST-NEXT: i32.const $push29=, 65535 +; NO-SIMD128-FAST-NEXT: i32.and $push7=, $4, $pop29 +; NO-SIMD128-FAST-NEXT: i32.shr_u $push8=, $pop7, $1 +; NO-SIMD128-FAST-NEXT: i32.store16 0($pop10), $pop8 +; NO-SIMD128-FAST-NEXT: i32.const $push28=, 65535 +; NO-SIMD128-FAST-NEXT: i32.and $push11=, $5, $pop28 +; NO-SIMD128-FAST-NEXT: i32.shr_u $push12=, $pop11, $1 +; NO-SIMD128-FAST-NEXT: i32.store16 8($0), $pop12 +; NO-SIMD128-FAST-NEXT: i32.const $push15=, 10 +; NO-SIMD128-FAST-NEXT: i32.add $push16=, $0, $pop15 +; NO-SIMD128-FAST-NEXT: i32.const $push27=, 65535 +; NO-SIMD128-FAST-NEXT: i32.and $push13=, $6, $pop27 +; NO-SIMD128-FAST-NEXT: i32.shr_u $push14=, $pop13, $1 +; NO-SIMD128-FAST-NEXT: i32.store16 0($pop16), $pop14 +; NO-SIMD128-FAST-NEXT: i32.const $push19=, 12 +; NO-SIMD128-FAST-NEXT: i32.add $push20=, $0, $pop19 +; NO-SIMD128-FAST-NEXT: i32.const $push26=, 65535 +; NO-SIMD128-FAST-NEXT: i32.and $push17=, $7, $pop26 +; NO-SIMD128-FAST-NEXT: i32.shr_u $push18=, $pop17, $1 +; NO-SIMD128-FAST-NEXT: i32.store16 0($pop20), $pop18 +; NO-SIMD128-FAST-NEXT: i32.const $push23=, 14 +; NO-SIMD128-FAST-NEXT: i32.add $push24=, $0, $pop23 +; NO-SIMD128-FAST-NEXT: i32.const $push25=, 65535 +; NO-SIMD128-FAST-NEXT: i32.and $push21=, $8, $pop25 +; NO-SIMD128-FAST-NEXT: i32.shr_u $push22=, $pop21, $1 +; NO-SIMD128-FAST-NEXT: i32.store16 0($pop24), $pop22 +; NO-SIMD128-FAST-NEXT: return %t = insertelement <8 x i16> undef, i16 %x, i32 0 %s = shufflevector <8 x i16> %t, <8 x i16> undef, <8 x i32> @@ -594,95 +8385,797 @@ define <8 x i16> @shr_u_v8i16(<8 x i16> %v, i16 %x) { ret <8 x i16> %a } -; CHECK-LABEL: shr_u_vec_v8i16: -; NO-SIMD128-NOT: i16x8 -; SIMD128-NEXT: .functype shr_u_vec_v8i16 (v128, v128) -> (v128){{$}} -; SIMD128-NEXT: i16x8.extract_lane_u $push[[L0:[0-9]+]]=, $0, 0{{$}} -; SIMD128-NEXT: i16x8.extract_lane_u $push[[L1:[0-9]+]]=, $1, 0{{$}} -; SIMD128-NEXT: i32.const $push[[M0:[0-9]+]]=, 15{{$}} -; SIMD128-NEXT: i32.and $push[[M1:[0-9]+]]=, $pop[[L1]], $pop[[M0]]{{$}} -; SIMD128-NEXT: i32.shr_u $push[[M2:[0-9]+]]=, $pop[[L0]], $pop[[M1]]{{$}} -; SIMD128-NEXT: i16x8.splat $push[[M3:[0-9]+]]=, $pop[[M2]]{{$}} -; Skip 6 lanes -; SIMD128: i16x8.extract_lane_u $push[[L4:[0-9]+]]=, $0, 7{{$}} -; SIMD128-NEXT: i16x8.extract_lane_u $push[[L5:[0-9]+]]=, $1, 7{{$}} -; SIMD128-NEXT: i32.const $push[[M4:[0-9]+]]=, 15{{$}} -; SIMD128-NEXT: i32.and $push[[M5:[0-9]+]]=, $pop[[L5]], $pop[[M4]]{{$}} -; SIMD128-NEXT: i32.shr_u $push[[M6:[0-9]+]]=, $pop[[L4]], $pop[[M5]]{{$}} -; SIMD128-NEXT: i16x8.replace_lane $push[[R:[0-9]+]]=, $pop[[M7:[0-9]+]], 7, $pop[[M6]]{{$}} -; SIMD128-NEXT: return $pop[[R]]{{$}} define <8 x i16> @shr_u_vec_v8i16(<8 x i16> %v, <8 x i16> %x) { +; SIMD128-LABEL: shr_u_vec_v8i16: +; SIMD128: .functype shr_u_vec_v8i16 (v128, v128) -> (v128) +; SIMD128-NEXT: # %bb.0: +; SIMD128-NEXT: i16x8.extract_lane_u $push7=, $0, 0 +; SIMD128-NEXT: i16x8.extract_lane_u $push5=, $1, 0 +; SIMD128-NEXT: i32.const $push1=, 15 +; SIMD128-NEXT: i32.and $push6=, $pop5, $pop1 +; SIMD128-NEXT: i32.shr_u $push8=, $pop7, $pop6 +; SIMD128-NEXT: i16x8.splat $push9=, $pop8 +; SIMD128-NEXT: i16x8.extract_lane_u $push3=, $0, 1 +; SIMD128-NEXT: i16x8.extract_lane_u $push0=, $1, 1 +; SIMD128-NEXT: i32.const $push47=, 15 +; SIMD128-NEXT: i32.and $push2=, $pop0, $pop47 +; SIMD128-NEXT: i32.shr_u $push4=, $pop3, $pop2 +; SIMD128-NEXT: i16x8.replace_lane $push10=, $pop9, 1, $pop4 +; SIMD128-NEXT: i16x8.extract_lane_u $push13=, $0, 2 +; SIMD128-NEXT: i16x8.extract_lane_u $push11=, $1, 2 +; SIMD128-NEXT: i32.const $push46=, 15 +; SIMD128-NEXT: i32.and $push12=, $pop11, $pop46 +; SIMD128-NEXT: i32.shr_u $push14=, $pop13, $pop12 +; SIMD128-NEXT: i16x8.replace_lane $push15=, $pop10, 2, $pop14 +; SIMD128-NEXT: i16x8.extract_lane_u $push18=, $0, 3 +; SIMD128-NEXT: i16x8.extract_lane_u $push16=, $1, 3 +; SIMD128-NEXT: i32.const $push45=, 15 +; SIMD128-NEXT: i32.and $push17=, $pop16, $pop45 +; SIMD128-NEXT: i32.shr_u $push19=, $pop18, $pop17 +; SIMD128-NEXT: i16x8.replace_lane $push20=, $pop15, 3, $pop19 +; SIMD128-NEXT: i16x8.extract_lane_u $push23=, $0, 4 +; SIMD128-NEXT: i16x8.extract_lane_u $push21=, $1, 4 +; SIMD128-NEXT: i32.const $push44=, 15 +; SIMD128-NEXT: i32.and $push22=, $pop21, $pop44 +; SIMD128-NEXT: i32.shr_u $push24=, $pop23, $pop22 +; SIMD128-NEXT: i16x8.replace_lane $push25=, $pop20, 4, $pop24 +; SIMD128-NEXT: i16x8.extract_lane_u $push28=, $0, 5 +; SIMD128-NEXT: i16x8.extract_lane_u $push26=, $1, 5 +; SIMD128-NEXT: i32.const $push43=, 15 +; SIMD128-NEXT: i32.and $push27=, $pop26, $pop43 +; SIMD128-NEXT: i32.shr_u $push29=, $pop28, $pop27 +; SIMD128-NEXT: i16x8.replace_lane $push30=, $pop25, 5, $pop29 +; SIMD128-NEXT: i16x8.extract_lane_u $push33=, $0, 6 +; SIMD128-NEXT: i16x8.extract_lane_u $push31=, $1, 6 +; SIMD128-NEXT: i32.const $push42=, 15 +; SIMD128-NEXT: i32.and $push32=, $pop31, $pop42 +; SIMD128-NEXT: i32.shr_u $push34=, $pop33, $pop32 +; SIMD128-NEXT: i16x8.replace_lane $push35=, $pop30, 6, $pop34 +; SIMD128-NEXT: i16x8.extract_lane_u $push38=, $0, 7 +; SIMD128-NEXT: i16x8.extract_lane_u $push36=, $1, 7 +; SIMD128-NEXT: i32.const $push41=, 15 +; SIMD128-NEXT: i32.and $push37=, $pop36, $pop41 +; SIMD128-NEXT: i32.shr_u $push39=, $pop38, $pop37 +; SIMD128-NEXT: i16x8.replace_lane $push40=, $pop35, 7, $pop39 +; SIMD128-NEXT: return $pop40 +; +; SIMD128-FAST-LABEL: shr_u_vec_v8i16: +; SIMD128-FAST: .functype shr_u_vec_v8i16 (v128, v128) -> (v128) +; SIMD128-FAST-NEXT: # %bb.0: +; SIMD128-FAST-NEXT: i16x8.extract_lane_u $push8=, $0, 0 +; SIMD128-FAST-NEXT: i16x8.extract_lane_u $push6=, $1, 0 +; SIMD128-FAST-NEXT: i32.const $push2=, 15 +; SIMD128-FAST-NEXT: i32.and $push7=, $pop6, $pop2 +; SIMD128-FAST-NEXT: i32.shr_u $push9=, $pop8, $pop7 +; SIMD128-FAST-NEXT: i16x8.splat $push10=, $pop9 +; SIMD128-FAST-NEXT: i16x8.extract_lane_u $push4=, $0, 1 +; SIMD128-FAST-NEXT: i16x8.extract_lane_u $push1=, $1, 1 +; SIMD128-FAST-NEXT: i32.const $push47=, 15 +; SIMD128-FAST-NEXT: i32.and $push3=, $pop1, $pop47 +; SIMD128-FAST-NEXT: i32.shr_u $push5=, $pop4, $pop3 +; SIMD128-FAST-NEXT: i16x8.replace_lane $push11=, $pop10, 1, $pop5 +; SIMD128-FAST-NEXT: i16x8.extract_lane_u $push14=, $0, 2 +; SIMD128-FAST-NEXT: i16x8.extract_lane_u $push12=, $1, 2 +; SIMD128-FAST-NEXT: i32.const $push46=, 15 +; SIMD128-FAST-NEXT: i32.and $push13=, $pop12, $pop46 +; SIMD128-FAST-NEXT: i32.shr_u $push15=, $pop14, $pop13 +; SIMD128-FAST-NEXT: i16x8.replace_lane $push16=, $pop11, 2, $pop15 +; SIMD128-FAST-NEXT: i16x8.extract_lane_u $push19=, $0, 3 +; SIMD128-FAST-NEXT: i16x8.extract_lane_u $push17=, $1, 3 +; SIMD128-FAST-NEXT: i32.const $push45=, 15 +; SIMD128-FAST-NEXT: i32.and $push18=, $pop17, $pop45 +; SIMD128-FAST-NEXT: i32.shr_u $push20=, $pop19, $pop18 +; SIMD128-FAST-NEXT: i16x8.replace_lane $push21=, $pop16, 3, $pop20 +; SIMD128-FAST-NEXT: i16x8.extract_lane_u $push24=, $0, 4 +; SIMD128-FAST-NEXT: i16x8.extract_lane_u $push22=, $1, 4 +; SIMD128-FAST-NEXT: i32.const $push44=, 15 +; SIMD128-FAST-NEXT: i32.and $push23=, $pop22, $pop44 +; SIMD128-FAST-NEXT: i32.shr_u $push25=, $pop24, $pop23 +; SIMD128-FAST-NEXT: i16x8.replace_lane $push26=, $pop21, 4, $pop25 +; SIMD128-FAST-NEXT: i16x8.extract_lane_u $push29=, $0, 5 +; SIMD128-FAST-NEXT: i16x8.extract_lane_u $push27=, $1, 5 +; SIMD128-FAST-NEXT: i32.const $push43=, 15 +; SIMD128-FAST-NEXT: i32.and $push28=, $pop27, $pop43 +; SIMD128-FAST-NEXT: i32.shr_u $push30=, $pop29, $pop28 +; SIMD128-FAST-NEXT: i16x8.replace_lane $push31=, $pop26, 5, $pop30 +; SIMD128-FAST-NEXT: i16x8.extract_lane_u $push34=, $0, 6 +; SIMD128-FAST-NEXT: i16x8.extract_lane_u $push32=, $1, 6 +; SIMD128-FAST-NEXT: i32.const $push42=, 15 +; SIMD128-FAST-NEXT: i32.and $push33=, $pop32, $pop42 +; SIMD128-FAST-NEXT: i32.shr_u $push35=, $pop34, $pop33 +; SIMD128-FAST-NEXT: i16x8.replace_lane $push36=, $pop31, 6, $pop35 +; SIMD128-FAST-NEXT: i16x8.extract_lane_u $push39=, $0, 7 +; SIMD128-FAST-NEXT: i16x8.extract_lane_u $push37=, $1, 7 +; SIMD128-FAST-NEXT: i32.const $push41=, 15 +; SIMD128-FAST-NEXT: i32.and $push38=, $pop37, $pop41 +; SIMD128-FAST-NEXT: i32.shr_u $push40=, $pop39, $pop38 +; SIMD128-FAST-NEXT: i16x8.replace_lane $push0=, $pop36, 7, $pop40 +; SIMD128-FAST-NEXT: return $pop0 +; +; NO-SIMD128-LABEL: shr_u_vec_v8i16: +; NO-SIMD128: .functype shr_u_vec_v8i16 (i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32) -> () +; NO-SIMD128-NEXT: # %bb.0: +; NO-SIMD128-NEXT: i32.const $push0=, 65535 +; NO-SIMD128-NEXT: i32.and $push2=, $5, $pop0 +; NO-SIMD128-NEXT: i32.const $push47=, 65535 +; NO-SIMD128-NEXT: i32.and $push1=, $13, $pop47 +; NO-SIMD128-NEXT: i32.shr_u $push3=, $pop2, $pop1 +; NO-SIMD128-NEXT: i32.store16 8($0), $pop3 +; NO-SIMD128-NEXT: i32.const $push46=, 65535 +; NO-SIMD128-NEXT: i32.and $push5=, $3, $pop46 +; NO-SIMD128-NEXT: i32.const $push45=, 65535 +; NO-SIMD128-NEXT: i32.and $push4=, $11, $pop45 +; NO-SIMD128-NEXT: i32.shr_u $push6=, $pop5, $pop4 +; NO-SIMD128-NEXT: i32.store16 4($0), $pop6 +; NO-SIMD128-NEXT: i32.const $push44=, 65535 +; NO-SIMD128-NEXT: i32.and $push8=, $2, $pop44 +; NO-SIMD128-NEXT: i32.const $push43=, 65535 +; NO-SIMD128-NEXT: i32.and $push7=, $10, $pop43 +; NO-SIMD128-NEXT: i32.shr_u $push9=, $pop8, $pop7 +; NO-SIMD128-NEXT: i32.store16 2($0), $pop9 +; NO-SIMD128-NEXT: i32.const $push42=, 65535 +; NO-SIMD128-NEXT: i32.and $push11=, $1, $pop42 +; NO-SIMD128-NEXT: i32.const $push41=, 65535 +; NO-SIMD128-NEXT: i32.and $push10=, $9, $pop41 +; NO-SIMD128-NEXT: i32.shr_u $push12=, $pop11, $pop10 +; NO-SIMD128-NEXT: i32.store16 0($0), $pop12 +; NO-SIMD128-NEXT: i32.const $push16=, 14 +; NO-SIMD128-NEXT: i32.add $push17=, $0, $pop16 +; NO-SIMD128-NEXT: i32.const $push40=, 65535 +; NO-SIMD128-NEXT: i32.and $push14=, $8, $pop40 +; NO-SIMD128-NEXT: i32.const $push39=, 65535 +; NO-SIMD128-NEXT: i32.and $push13=, $16, $pop39 +; NO-SIMD128-NEXT: i32.shr_u $push15=, $pop14, $pop13 +; NO-SIMD128-NEXT: i32.store16 0($pop17), $pop15 +; NO-SIMD128-NEXT: i32.const $push21=, 12 +; NO-SIMD128-NEXT: i32.add $push22=, $0, $pop21 +; NO-SIMD128-NEXT: i32.const $push38=, 65535 +; NO-SIMD128-NEXT: i32.and $push19=, $7, $pop38 +; NO-SIMD128-NEXT: i32.const $push37=, 65535 +; NO-SIMD128-NEXT: i32.and $push18=, $15, $pop37 +; NO-SIMD128-NEXT: i32.shr_u $push20=, $pop19, $pop18 +; NO-SIMD128-NEXT: i32.store16 0($pop22), $pop20 +; NO-SIMD128-NEXT: i32.const $push26=, 10 +; NO-SIMD128-NEXT: i32.add $push27=, $0, $pop26 +; NO-SIMD128-NEXT: i32.const $push36=, 65535 +; NO-SIMD128-NEXT: i32.and $push24=, $6, $pop36 +; NO-SIMD128-NEXT: i32.const $push35=, 65535 +; NO-SIMD128-NEXT: i32.and $push23=, $14, $pop35 +; NO-SIMD128-NEXT: i32.shr_u $push25=, $pop24, $pop23 +; NO-SIMD128-NEXT: i32.store16 0($pop27), $pop25 +; NO-SIMD128-NEXT: i32.const $push31=, 6 +; NO-SIMD128-NEXT: i32.add $push32=, $0, $pop31 +; NO-SIMD128-NEXT: i32.const $push34=, 65535 +; NO-SIMD128-NEXT: i32.and $push29=, $4, $pop34 +; NO-SIMD128-NEXT: i32.const $push33=, 65535 +; NO-SIMD128-NEXT: i32.and $push28=, $12, $pop33 +; NO-SIMD128-NEXT: i32.shr_u $push30=, $pop29, $pop28 +; NO-SIMD128-NEXT: i32.store16 0($pop32), $pop30 +; NO-SIMD128-NEXT: return +; +; NO-SIMD128-FAST-LABEL: shr_u_vec_v8i16: +; NO-SIMD128-FAST: .functype shr_u_vec_v8i16 (i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32) -> () +; NO-SIMD128-FAST-NEXT: # %bb.0: +; NO-SIMD128-FAST-NEXT: i32.const $push0=, 65535 +; NO-SIMD128-FAST-NEXT: i32.and $push2=, $1, $pop0 +; NO-SIMD128-FAST-NEXT: i32.const $push47=, 65535 +; NO-SIMD128-FAST-NEXT: i32.and $push1=, $9, $pop47 +; NO-SIMD128-FAST-NEXT: i32.shr_u $push3=, $pop2, $pop1 +; NO-SIMD128-FAST-NEXT: i32.store16 0($0), $pop3 +; NO-SIMD128-FAST-NEXT: i32.const $push46=, 65535 +; NO-SIMD128-FAST-NEXT: i32.and $push5=, $2, $pop46 +; NO-SIMD128-FAST-NEXT: i32.const $push45=, 65535 +; NO-SIMD128-FAST-NEXT: i32.and $push4=, $10, $pop45 +; NO-SIMD128-FAST-NEXT: i32.shr_u $push6=, $pop5, $pop4 +; NO-SIMD128-FAST-NEXT: i32.store16 2($0), $pop6 +; NO-SIMD128-FAST-NEXT: i32.const $push44=, 65535 +; NO-SIMD128-FAST-NEXT: i32.and $push8=, $3, $pop44 +; NO-SIMD128-FAST-NEXT: i32.const $push43=, 65535 +; NO-SIMD128-FAST-NEXT: i32.and $push7=, $11, $pop43 +; NO-SIMD128-FAST-NEXT: i32.shr_u $push9=, $pop8, $pop7 +; NO-SIMD128-FAST-NEXT: i32.store16 4($0), $pop9 +; NO-SIMD128-FAST-NEXT: i32.const $push13=, 6 +; NO-SIMD128-FAST-NEXT: i32.add $push14=, $0, $pop13 +; NO-SIMD128-FAST-NEXT: i32.const $push42=, 65535 +; NO-SIMD128-FAST-NEXT: i32.and $push11=, $4, $pop42 +; NO-SIMD128-FAST-NEXT: i32.const $push41=, 65535 +; NO-SIMD128-FAST-NEXT: i32.and $push10=, $12, $pop41 +; NO-SIMD128-FAST-NEXT: i32.shr_u $push12=, $pop11, $pop10 +; NO-SIMD128-FAST-NEXT: i32.store16 0($pop14), $pop12 +; NO-SIMD128-FAST-NEXT: i32.const $push40=, 65535 +; NO-SIMD128-FAST-NEXT: i32.and $push16=, $5, $pop40 +; NO-SIMD128-FAST-NEXT: i32.const $push39=, 65535 +; NO-SIMD128-FAST-NEXT: i32.and $push15=, $13, $pop39 +; NO-SIMD128-FAST-NEXT: i32.shr_u $push17=, $pop16, $pop15 +; NO-SIMD128-FAST-NEXT: i32.store16 8($0), $pop17 +; NO-SIMD128-FAST-NEXT: i32.const $push21=, 10 +; NO-SIMD128-FAST-NEXT: i32.add $push22=, $0, $pop21 +; NO-SIMD128-FAST-NEXT: i32.const $push38=, 65535 +; NO-SIMD128-FAST-NEXT: i32.and $push19=, $6, $pop38 +; NO-SIMD128-FAST-NEXT: i32.const $push37=, 65535 +; NO-SIMD128-FAST-NEXT: i32.and $push18=, $14, $pop37 +; NO-SIMD128-FAST-NEXT: i32.shr_u $push20=, $pop19, $pop18 +; NO-SIMD128-FAST-NEXT: i32.store16 0($pop22), $pop20 +; NO-SIMD128-FAST-NEXT: i32.const $push26=, 12 +; NO-SIMD128-FAST-NEXT: i32.add $push27=, $0, $pop26 +; NO-SIMD128-FAST-NEXT: i32.const $push36=, 65535 +; NO-SIMD128-FAST-NEXT: i32.and $push24=, $7, $pop36 +; NO-SIMD128-FAST-NEXT: i32.const $push35=, 65535 +; NO-SIMD128-FAST-NEXT: i32.and $push23=, $15, $pop35 +; NO-SIMD128-FAST-NEXT: i32.shr_u $push25=, $pop24, $pop23 +; NO-SIMD128-FAST-NEXT: i32.store16 0($pop27), $pop25 +; NO-SIMD128-FAST-NEXT: i32.const $push31=, 14 +; NO-SIMD128-FAST-NEXT: i32.add $push32=, $0, $pop31 +; NO-SIMD128-FAST-NEXT: i32.const $push34=, 65535 +; NO-SIMD128-FAST-NEXT: i32.and $push29=, $8, $pop34 +; NO-SIMD128-FAST-NEXT: i32.const $push33=, 65535 +; NO-SIMD128-FAST-NEXT: i32.and $push28=, $16, $pop33 +; NO-SIMD128-FAST-NEXT: i32.shr_u $push30=, $pop29, $pop28 +; NO-SIMD128-FAST-NEXT: i32.store16 0($pop32), $pop30 +; NO-SIMD128-FAST-NEXT: return %a = lshr <8 x i16> %v, %x ret <8 x i16> %a } -; CHECK-LABEL: and_v8i16: -; NO-SIMD128-NOT: v128 -; SIMD128-NEXT: .functype and_v8i16 (v128, v128) -> (v128){{$}} -; SIMD128-NEXT: v128.and $push[[R:[0-9]+]]=, $0, $1{{$}} -; SIMD128-NEXT: return $pop[[R]]{{$}} define <8 x i16> @and_v8i16(<8 x i16> %x, <8 x i16> %y) { +; SIMD128-LABEL: and_v8i16: +; SIMD128: .functype and_v8i16 (v128, v128) -> (v128) +; SIMD128-NEXT: # %bb.0: +; SIMD128-NEXT: v128.and $push0=, $0, $1 +; SIMD128-NEXT: return $pop0 +; +; SIMD128-FAST-LABEL: and_v8i16: +; SIMD128-FAST: .functype and_v8i16 (v128, v128) -> (v128) +; SIMD128-FAST-NEXT: # %bb.0: +; SIMD128-FAST-NEXT: v128.and $push0=, $0, $1 +; SIMD128-FAST-NEXT: return $pop0 +; +; NO-SIMD128-LABEL: and_v8i16: +; NO-SIMD128: .functype and_v8i16 (i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32) -> () +; NO-SIMD128-NEXT: # %bb.0: +; NO-SIMD128-NEXT: i32.and $push0=, $5, $13 +; NO-SIMD128-NEXT: i32.store16 8($0), $pop0 +; NO-SIMD128-NEXT: i32.and $push1=, $3, $11 +; NO-SIMD128-NEXT: i32.store16 4($0), $pop1 +; NO-SIMD128-NEXT: i32.and $push2=, $2, $10 +; NO-SIMD128-NEXT: i32.store16 2($0), $pop2 +; NO-SIMD128-NEXT: i32.and $push3=, $1, $9 +; NO-SIMD128-NEXT: i32.store16 0($0), $pop3 +; NO-SIMD128-NEXT: i32.const $push5=, 14 +; NO-SIMD128-NEXT: i32.add $push6=, $0, $pop5 +; NO-SIMD128-NEXT: i32.and $push4=, $8, $16 +; NO-SIMD128-NEXT: i32.store16 0($pop6), $pop4 +; NO-SIMD128-NEXT: i32.const $push8=, 12 +; NO-SIMD128-NEXT: i32.add $push9=, $0, $pop8 +; NO-SIMD128-NEXT: i32.and $push7=, $7, $15 +; NO-SIMD128-NEXT: i32.store16 0($pop9), $pop7 +; NO-SIMD128-NEXT: i32.const $push11=, 10 +; NO-SIMD128-NEXT: i32.add $push12=, $0, $pop11 +; NO-SIMD128-NEXT: i32.and $push10=, $6, $14 +; NO-SIMD128-NEXT: i32.store16 0($pop12), $pop10 +; NO-SIMD128-NEXT: i32.const $push14=, 6 +; NO-SIMD128-NEXT: i32.add $push15=, $0, $pop14 +; NO-SIMD128-NEXT: i32.and $push13=, $4, $12 +; NO-SIMD128-NEXT: i32.store16 0($pop15), $pop13 +; NO-SIMD128-NEXT: return +; +; NO-SIMD128-FAST-LABEL: and_v8i16: +; NO-SIMD128-FAST: .functype and_v8i16 (i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32) -> () +; NO-SIMD128-FAST-NEXT: # %bb.0: +; NO-SIMD128-FAST-NEXT: i32.and $push0=, $1, $9 +; NO-SIMD128-FAST-NEXT: i32.store16 0($0), $pop0 +; NO-SIMD128-FAST-NEXT: i32.and $push1=, $2, $10 +; NO-SIMD128-FAST-NEXT: i32.store16 2($0), $pop1 +; NO-SIMD128-FAST-NEXT: i32.and $push2=, $3, $11 +; NO-SIMD128-FAST-NEXT: i32.store16 4($0), $pop2 +; NO-SIMD128-FAST-NEXT: i32.const $push3=, 6 +; NO-SIMD128-FAST-NEXT: i32.add $push4=, $0, $pop3 +; NO-SIMD128-FAST-NEXT: i32.and $push5=, $4, $12 +; NO-SIMD128-FAST-NEXT: i32.store16 0($pop4), $pop5 +; NO-SIMD128-FAST-NEXT: i32.and $push6=, $5, $13 +; NO-SIMD128-FAST-NEXT: i32.store16 8($0), $pop6 +; NO-SIMD128-FAST-NEXT: i32.const $push7=, 10 +; NO-SIMD128-FAST-NEXT: i32.add $push8=, $0, $pop7 +; NO-SIMD128-FAST-NEXT: i32.and $push9=, $6, $14 +; NO-SIMD128-FAST-NEXT: i32.store16 0($pop8), $pop9 +; NO-SIMD128-FAST-NEXT: i32.const $push10=, 12 +; NO-SIMD128-FAST-NEXT: i32.add $push11=, $0, $pop10 +; NO-SIMD128-FAST-NEXT: i32.and $push12=, $7, $15 +; NO-SIMD128-FAST-NEXT: i32.store16 0($pop11), $pop12 +; NO-SIMD128-FAST-NEXT: i32.const $push13=, 14 +; NO-SIMD128-FAST-NEXT: i32.add $push14=, $0, $pop13 +; NO-SIMD128-FAST-NEXT: i32.and $push15=, $8, $16 +; NO-SIMD128-FAST-NEXT: i32.store16 0($pop14), $pop15 +; NO-SIMD128-FAST-NEXT: return %a = and <8 x i16> %x, %y ret <8 x i16> %a } -; CHECK-LABEL: or_v8i16: -; NO-SIMD128-NOT: v128 -; SIMD128-NEXT: .functype or_v8i16 (v128, v128) -> (v128){{$}} -; SIMD128-NEXT: v128.or $push[[R:[0-9]+]]=, $0, $1{{$}} -; SIMD128-NEXT: return $pop[[R]]{{$}} define <8 x i16> @or_v8i16(<8 x i16> %x, <8 x i16> %y) { +; SIMD128-LABEL: or_v8i16: +; SIMD128: .functype or_v8i16 (v128, v128) -> (v128) +; SIMD128-NEXT: # %bb.0: +; SIMD128-NEXT: v128.or $push0=, $0, $1 +; SIMD128-NEXT: return $pop0 +; +; SIMD128-FAST-LABEL: or_v8i16: +; SIMD128-FAST: .functype or_v8i16 (v128, v128) -> (v128) +; SIMD128-FAST-NEXT: # %bb.0: +; SIMD128-FAST-NEXT: v128.or $push0=, $0, $1 +; SIMD128-FAST-NEXT: return $pop0 +; +; NO-SIMD128-LABEL: or_v8i16: +; NO-SIMD128: .functype or_v8i16 (i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32) -> () +; NO-SIMD128-NEXT: # %bb.0: +; NO-SIMD128-NEXT: i32.or $push0=, $5, $13 +; NO-SIMD128-NEXT: i32.store16 8($0), $pop0 +; NO-SIMD128-NEXT: i32.or $push1=, $3, $11 +; NO-SIMD128-NEXT: i32.store16 4($0), $pop1 +; NO-SIMD128-NEXT: i32.or $push2=, $2, $10 +; NO-SIMD128-NEXT: i32.store16 2($0), $pop2 +; NO-SIMD128-NEXT: i32.or $push3=, $1, $9 +; NO-SIMD128-NEXT: i32.store16 0($0), $pop3 +; NO-SIMD128-NEXT: i32.const $push5=, 14 +; NO-SIMD128-NEXT: i32.add $push6=, $0, $pop5 +; NO-SIMD128-NEXT: i32.or $push4=, $8, $16 +; NO-SIMD128-NEXT: i32.store16 0($pop6), $pop4 +; NO-SIMD128-NEXT: i32.const $push8=, 12 +; NO-SIMD128-NEXT: i32.add $push9=, $0, $pop8 +; NO-SIMD128-NEXT: i32.or $push7=, $7, $15 +; NO-SIMD128-NEXT: i32.store16 0($pop9), $pop7 +; NO-SIMD128-NEXT: i32.const $push11=, 10 +; NO-SIMD128-NEXT: i32.add $push12=, $0, $pop11 +; NO-SIMD128-NEXT: i32.or $push10=, $6, $14 +; NO-SIMD128-NEXT: i32.store16 0($pop12), $pop10 +; NO-SIMD128-NEXT: i32.const $push14=, 6 +; NO-SIMD128-NEXT: i32.add $push15=, $0, $pop14 +; NO-SIMD128-NEXT: i32.or $push13=, $4, $12 +; NO-SIMD128-NEXT: i32.store16 0($pop15), $pop13 +; NO-SIMD128-NEXT: return +; +; NO-SIMD128-FAST-LABEL: or_v8i16: +; NO-SIMD128-FAST: .functype or_v8i16 (i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32) -> () +; NO-SIMD128-FAST-NEXT: # %bb.0: +; NO-SIMD128-FAST-NEXT: i32.or $push0=, $1, $9 +; NO-SIMD128-FAST-NEXT: i32.store16 0($0), $pop0 +; NO-SIMD128-FAST-NEXT: i32.or $push1=, $2, $10 +; NO-SIMD128-FAST-NEXT: i32.store16 2($0), $pop1 +; NO-SIMD128-FAST-NEXT: i32.or $push2=, $3, $11 +; NO-SIMD128-FAST-NEXT: i32.store16 4($0), $pop2 +; NO-SIMD128-FAST-NEXT: i32.const $push3=, 6 +; NO-SIMD128-FAST-NEXT: i32.add $push4=, $0, $pop3 +; NO-SIMD128-FAST-NEXT: i32.or $push5=, $4, $12 +; NO-SIMD128-FAST-NEXT: i32.store16 0($pop4), $pop5 +; NO-SIMD128-FAST-NEXT: i32.or $push6=, $5, $13 +; NO-SIMD128-FAST-NEXT: i32.store16 8($0), $pop6 +; NO-SIMD128-FAST-NEXT: i32.const $push7=, 10 +; NO-SIMD128-FAST-NEXT: i32.add $push8=, $0, $pop7 +; NO-SIMD128-FAST-NEXT: i32.or $push9=, $6, $14 +; NO-SIMD128-FAST-NEXT: i32.store16 0($pop8), $pop9 +; NO-SIMD128-FAST-NEXT: i32.const $push10=, 12 +; NO-SIMD128-FAST-NEXT: i32.add $push11=, $0, $pop10 +; NO-SIMD128-FAST-NEXT: i32.or $push12=, $7, $15 +; NO-SIMD128-FAST-NEXT: i32.store16 0($pop11), $pop12 +; NO-SIMD128-FAST-NEXT: i32.const $push13=, 14 +; NO-SIMD128-FAST-NEXT: i32.add $push14=, $0, $pop13 +; NO-SIMD128-FAST-NEXT: i32.or $push15=, $8, $16 +; NO-SIMD128-FAST-NEXT: i32.store16 0($pop14), $pop15 +; NO-SIMD128-FAST-NEXT: return %a = or <8 x i16> %x, %y ret <8 x i16> %a } -; CHECK-LABEL: xor_v8i16: -; NO-SIMD128-NOT: v128 -; SIMD128-NEXT: .functype xor_v8i16 (v128, v128) -> (v128){{$}} -; SIMD128-NEXT: v128.xor $push[[R:[0-9]+]]=, $0, $1{{$}} -; SIMD128-NEXT: return $pop[[R]]{{$}} define <8 x i16> @xor_v8i16(<8 x i16> %x, <8 x i16> %y) { +; SIMD128-LABEL: xor_v8i16: +; SIMD128: .functype xor_v8i16 (v128, v128) -> (v128) +; SIMD128-NEXT: # %bb.0: +; SIMD128-NEXT: v128.xor $push0=, $0, $1 +; SIMD128-NEXT: return $pop0 +; +; SIMD128-FAST-LABEL: xor_v8i16: +; SIMD128-FAST: .functype xor_v8i16 (v128, v128) -> (v128) +; SIMD128-FAST-NEXT: # %bb.0: +; SIMD128-FAST-NEXT: v128.xor $push0=, $0, $1 +; SIMD128-FAST-NEXT: return $pop0 +; +; NO-SIMD128-LABEL: xor_v8i16: +; NO-SIMD128: .functype xor_v8i16 (i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32) -> () +; NO-SIMD128-NEXT: # %bb.0: +; NO-SIMD128-NEXT: i32.xor $push0=, $5, $13 +; NO-SIMD128-NEXT: i32.store16 8($0), $pop0 +; NO-SIMD128-NEXT: i32.xor $push1=, $3, $11 +; NO-SIMD128-NEXT: i32.store16 4($0), $pop1 +; NO-SIMD128-NEXT: i32.xor $push2=, $2, $10 +; NO-SIMD128-NEXT: i32.store16 2($0), $pop2 +; NO-SIMD128-NEXT: i32.xor $push3=, $1, $9 +; NO-SIMD128-NEXT: i32.store16 0($0), $pop3 +; NO-SIMD128-NEXT: i32.const $push5=, 14 +; NO-SIMD128-NEXT: i32.add $push6=, $0, $pop5 +; NO-SIMD128-NEXT: i32.xor $push4=, $8, $16 +; NO-SIMD128-NEXT: i32.store16 0($pop6), $pop4 +; NO-SIMD128-NEXT: i32.const $push8=, 12 +; NO-SIMD128-NEXT: i32.add $push9=, $0, $pop8 +; NO-SIMD128-NEXT: i32.xor $push7=, $7, $15 +; NO-SIMD128-NEXT: i32.store16 0($pop9), $pop7 +; NO-SIMD128-NEXT: i32.const $push11=, 10 +; NO-SIMD128-NEXT: i32.add $push12=, $0, $pop11 +; NO-SIMD128-NEXT: i32.xor $push10=, $6, $14 +; NO-SIMD128-NEXT: i32.store16 0($pop12), $pop10 +; NO-SIMD128-NEXT: i32.const $push14=, 6 +; NO-SIMD128-NEXT: i32.add $push15=, $0, $pop14 +; NO-SIMD128-NEXT: i32.xor $push13=, $4, $12 +; NO-SIMD128-NEXT: i32.store16 0($pop15), $pop13 +; NO-SIMD128-NEXT: return +; +; NO-SIMD128-FAST-LABEL: xor_v8i16: +; NO-SIMD128-FAST: .functype xor_v8i16 (i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32) -> () +; NO-SIMD128-FAST-NEXT: # %bb.0: +; NO-SIMD128-FAST-NEXT: i32.xor $push0=, $1, $9 +; NO-SIMD128-FAST-NEXT: i32.store16 0($0), $pop0 +; NO-SIMD128-FAST-NEXT: i32.xor $push1=, $2, $10 +; NO-SIMD128-FAST-NEXT: i32.store16 2($0), $pop1 +; NO-SIMD128-FAST-NEXT: i32.xor $push2=, $3, $11 +; NO-SIMD128-FAST-NEXT: i32.store16 4($0), $pop2 +; NO-SIMD128-FAST-NEXT: i32.const $push3=, 6 +; NO-SIMD128-FAST-NEXT: i32.add $push4=, $0, $pop3 +; NO-SIMD128-FAST-NEXT: i32.xor $push5=, $4, $12 +; NO-SIMD128-FAST-NEXT: i32.store16 0($pop4), $pop5 +; NO-SIMD128-FAST-NEXT: i32.xor $push6=, $5, $13 +; NO-SIMD128-FAST-NEXT: i32.store16 8($0), $pop6 +; NO-SIMD128-FAST-NEXT: i32.const $push7=, 10 +; NO-SIMD128-FAST-NEXT: i32.add $push8=, $0, $pop7 +; NO-SIMD128-FAST-NEXT: i32.xor $push9=, $6, $14 +; NO-SIMD128-FAST-NEXT: i32.store16 0($pop8), $pop9 +; NO-SIMD128-FAST-NEXT: i32.const $push10=, 12 +; NO-SIMD128-FAST-NEXT: i32.add $push11=, $0, $pop10 +; NO-SIMD128-FAST-NEXT: i32.xor $push12=, $7, $15 +; NO-SIMD128-FAST-NEXT: i32.store16 0($pop11), $pop12 +; NO-SIMD128-FAST-NEXT: i32.const $push13=, 14 +; NO-SIMD128-FAST-NEXT: i32.add $push14=, $0, $pop13 +; NO-SIMD128-FAST-NEXT: i32.xor $push15=, $8, $16 +; NO-SIMD128-FAST-NEXT: i32.store16 0($pop14), $pop15 +; NO-SIMD128-FAST-NEXT: return %a = xor <8 x i16> %x, %y ret <8 x i16> %a } -; CHECK-LABEL: not_v8i16: -; NO-SIMD128-NOT: v128 -; SIMD128-NEXT: .functype not_v8i16 (v128) -> (v128){{$}} -; SIMD128-NEXT: v128.not $push[[R:[0-9]+]]=, $0{{$}} -; SIMD128-NEXT: return $pop[[R]]{{$}} define <8 x i16> @not_v8i16(<8 x i16> %x) { +; SIMD128-LABEL: not_v8i16: +; SIMD128: .functype not_v8i16 (v128) -> (v128) +; SIMD128-NEXT: # %bb.0: +; SIMD128-NEXT: v128.not $push0=, $0 +; SIMD128-NEXT: return $pop0 +; +; SIMD128-FAST-LABEL: not_v8i16: +; SIMD128-FAST: .functype not_v8i16 (v128) -> (v128) +; SIMD128-FAST-NEXT: # %bb.0: +; SIMD128-FAST-NEXT: v128.not $push0=, $0 +; SIMD128-FAST-NEXT: return $pop0 +; +; NO-SIMD128-LABEL: not_v8i16: +; NO-SIMD128: .functype not_v8i16 (i32, i32, i32, i32, i32, i32, i32, i32, i32) -> () +; NO-SIMD128-NEXT: # %bb.0: +; NO-SIMD128-NEXT: i32.const $push0=, -1 +; NO-SIMD128-NEXT: i32.xor $push1=, $5, $pop0 +; NO-SIMD128-NEXT: i32.store16 8($0), $pop1 +; NO-SIMD128-NEXT: i32.const $push23=, -1 +; NO-SIMD128-NEXT: i32.xor $push2=, $3, $pop23 +; NO-SIMD128-NEXT: i32.store16 4($0), $pop2 +; NO-SIMD128-NEXT: i32.const $push22=, -1 +; NO-SIMD128-NEXT: i32.xor $push3=, $2, $pop22 +; NO-SIMD128-NEXT: i32.store16 2($0), $pop3 +; NO-SIMD128-NEXT: i32.const $push21=, -1 +; NO-SIMD128-NEXT: i32.xor $push4=, $1, $pop21 +; NO-SIMD128-NEXT: i32.store16 0($0), $pop4 +; NO-SIMD128-NEXT: i32.const $push6=, 14 +; NO-SIMD128-NEXT: i32.add $push7=, $0, $pop6 +; NO-SIMD128-NEXT: i32.const $push20=, -1 +; NO-SIMD128-NEXT: i32.xor $push5=, $8, $pop20 +; NO-SIMD128-NEXT: i32.store16 0($pop7), $pop5 +; NO-SIMD128-NEXT: i32.const $push9=, 12 +; NO-SIMD128-NEXT: i32.add $push10=, $0, $pop9 +; NO-SIMD128-NEXT: i32.const $push19=, -1 +; NO-SIMD128-NEXT: i32.xor $push8=, $7, $pop19 +; NO-SIMD128-NEXT: i32.store16 0($pop10), $pop8 +; NO-SIMD128-NEXT: i32.const $push12=, 10 +; NO-SIMD128-NEXT: i32.add $push13=, $0, $pop12 +; NO-SIMD128-NEXT: i32.const $push18=, -1 +; NO-SIMD128-NEXT: i32.xor $push11=, $6, $pop18 +; NO-SIMD128-NEXT: i32.store16 0($pop13), $pop11 +; NO-SIMD128-NEXT: i32.const $push15=, 6 +; NO-SIMD128-NEXT: i32.add $push16=, $0, $pop15 +; NO-SIMD128-NEXT: i32.const $push17=, -1 +; NO-SIMD128-NEXT: i32.xor $push14=, $4, $pop17 +; NO-SIMD128-NEXT: i32.store16 0($pop16), $pop14 +; NO-SIMD128-NEXT: return +; +; NO-SIMD128-FAST-LABEL: not_v8i16: +; NO-SIMD128-FAST: .functype not_v8i16 (i32, i32, i32, i32, i32, i32, i32, i32, i32) -> () +; NO-SIMD128-FAST-NEXT: # %bb.0: +; NO-SIMD128-FAST-NEXT: i32.const $push0=, -1 +; NO-SIMD128-FAST-NEXT: i32.xor $push1=, $1, $pop0 +; NO-SIMD128-FAST-NEXT: i32.store16 0($0), $pop1 +; NO-SIMD128-FAST-NEXT: i32.const $push23=, -1 +; NO-SIMD128-FAST-NEXT: i32.xor $push2=, $2, $pop23 +; NO-SIMD128-FAST-NEXT: i32.store16 2($0), $pop2 +; NO-SIMD128-FAST-NEXT: i32.const $push22=, -1 +; NO-SIMD128-FAST-NEXT: i32.xor $push3=, $3, $pop22 +; NO-SIMD128-FAST-NEXT: i32.store16 4($0), $pop3 +; NO-SIMD128-FAST-NEXT: i32.const $push4=, 6 +; NO-SIMD128-FAST-NEXT: i32.add $push5=, $0, $pop4 +; NO-SIMD128-FAST-NEXT: i32.const $push21=, -1 +; NO-SIMD128-FAST-NEXT: i32.xor $push6=, $4, $pop21 +; NO-SIMD128-FAST-NEXT: i32.store16 0($pop5), $pop6 +; NO-SIMD128-FAST-NEXT: i32.const $push20=, -1 +; NO-SIMD128-FAST-NEXT: i32.xor $push7=, $5, $pop20 +; NO-SIMD128-FAST-NEXT: i32.store16 8($0), $pop7 +; NO-SIMD128-FAST-NEXT: i32.const $push8=, 10 +; NO-SIMD128-FAST-NEXT: i32.add $push9=, $0, $pop8 +; NO-SIMD128-FAST-NEXT: i32.const $push19=, -1 +; NO-SIMD128-FAST-NEXT: i32.xor $push10=, $6, $pop19 +; NO-SIMD128-FAST-NEXT: i32.store16 0($pop9), $pop10 +; NO-SIMD128-FAST-NEXT: i32.const $push11=, 12 +; NO-SIMD128-FAST-NEXT: i32.add $push12=, $0, $pop11 +; NO-SIMD128-FAST-NEXT: i32.const $push18=, -1 +; NO-SIMD128-FAST-NEXT: i32.xor $push13=, $7, $pop18 +; NO-SIMD128-FAST-NEXT: i32.store16 0($pop12), $pop13 +; NO-SIMD128-FAST-NEXT: i32.const $push14=, 14 +; NO-SIMD128-FAST-NEXT: i32.add $push15=, $0, $pop14 +; NO-SIMD128-FAST-NEXT: i32.const $push17=, -1 +; NO-SIMD128-FAST-NEXT: i32.xor $push16=, $8, $pop17 +; NO-SIMD128-FAST-NEXT: i32.store16 0($pop15), $pop16 +; NO-SIMD128-FAST-NEXT: return %a = xor <8 x i16> %x, ret <8 x i16> %a } -; CHECK-LABEL: andnot_v8i16: -; NO-SIMD128-NOT: v128 -; SIMD128-NEXT: .functype andnot_v8i16 (v128, v128) -> (v128){{$}} -; SIMD128-SLOW-NEXT: v128.andnot $push[[R:[0-9]+]]=, $0, $1{{$}} -; SIMD128-SLOW-NEXT: return $pop[[R]]{{$}} -; SIMD128-FAST-NEXT: v128.not -; SIMD128-FAST-NEXT: v128.and -; SIMD128-FAST-NEXT: return define <8 x i16> @andnot_v8i16(<8 x i16> %x, <8 x i16> %y) { +; SIMD128-LABEL: andnot_v8i16: +; SIMD128: .functype andnot_v8i16 (v128, v128) -> (v128) +; SIMD128-NEXT: # %bb.0: +; SIMD128-NEXT: v128.andnot $push0=, $0, $1 +; SIMD128-NEXT: return $pop0 +; +; SIMD128-FAST-LABEL: andnot_v8i16: +; SIMD128-FAST: .functype andnot_v8i16 (v128, v128) -> (v128) +; SIMD128-FAST-NEXT: # %bb.0: +; SIMD128-FAST-NEXT: v128.not $push0=, $1 +; SIMD128-FAST-NEXT: v128.and $push1=, $0, $pop0 +; SIMD128-FAST-NEXT: return $pop1 +; +; NO-SIMD128-LABEL: andnot_v8i16: +; NO-SIMD128: .functype andnot_v8i16 (i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32) -> () +; NO-SIMD128-NEXT: # %bb.0: +; NO-SIMD128-NEXT: i32.const $push0=, -1 +; NO-SIMD128-NEXT: i32.xor $push1=, $13, $pop0 +; NO-SIMD128-NEXT: i32.and $push2=, $5, $pop1 +; NO-SIMD128-NEXT: i32.store16 8($0), $pop2 +; NO-SIMD128-NEXT: i32.const $push31=, -1 +; NO-SIMD128-NEXT: i32.xor $push3=, $11, $pop31 +; NO-SIMD128-NEXT: i32.and $push4=, $3, $pop3 +; NO-SIMD128-NEXT: i32.store16 4($0), $pop4 +; NO-SIMD128-NEXT: i32.const $push30=, -1 +; NO-SIMD128-NEXT: i32.xor $push5=, $10, $pop30 +; NO-SIMD128-NEXT: i32.and $push6=, $2, $pop5 +; NO-SIMD128-NEXT: i32.store16 2($0), $pop6 +; NO-SIMD128-NEXT: i32.const $push29=, -1 +; NO-SIMD128-NEXT: i32.xor $push7=, $9, $pop29 +; NO-SIMD128-NEXT: i32.and $push8=, $1, $pop7 +; NO-SIMD128-NEXT: i32.store16 0($0), $pop8 +; NO-SIMD128-NEXT: i32.const $push11=, 14 +; NO-SIMD128-NEXT: i32.add $push12=, $0, $pop11 +; NO-SIMD128-NEXT: i32.const $push28=, -1 +; NO-SIMD128-NEXT: i32.xor $push9=, $16, $pop28 +; NO-SIMD128-NEXT: i32.and $push10=, $8, $pop9 +; NO-SIMD128-NEXT: i32.store16 0($pop12), $pop10 +; NO-SIMD128-NEXT: i32.const $push15=, 12 +; NO-SIMD128-NEXT: i32.add $push16=, $0, $pop15 +; NO-SIMD128-NEXT: i32.const $push27=, -1 +; NO-SIMD128-NEXT: i32.xor $push13=, $15, $pop27 +; NO-SIMD128-NEXT: i32.and $push14=, $7, $pop13 +; NO-SIMD128-NEXT: i32.store16 0($pop16), $pop14 +; NO-SIMD128-NEXT: i32.const $push19=, 10 +; NO-SIMD128-NEXT: i32.add $push20=, $0, $pop19 +; NO-SIMD128-NEXT: i32.const $push26=, -1 +; NO-SIMD128-NEXT: i32.xor $push17=, $14, $pop26 +; NO-SIMD128-NEXT: i32.and $push18=, $6, $pop17 +; NO-SIMD128-NEXT: i32.store16 0($pop20), $pop18 +; NO-SIMD128-NEXT: i32.const $push23=, 6 +; NO-SIMD128-NEXT: i32.add $push24=, $0, $pop23 +; NO-SIMD128-NEXT: i32.const $push25=, -1 +; NO-SIMD128-NEXT: i32.xor $push21=, $12, $pop25 +; NO-SIMD128-NEXT: i32.and $push22=, $4, $pop21 +; NO-SIMD128-NEXT: i32.store16 0($pop24), $pop22 +; NO-SIMD128-NEXT: return +; +; NO-SIMD128-FAST-LABEL: andnot_v8i16: +; NO-SIMD128-FAST: .functype andnot_v8i16 (i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32) -> () +; NO-SIMD128-FAST-NEXT: # %bb.0: +; NO-SIMD128-FAST-NEXT: i32.const $push0=, -1 +; NO-SIMD128-FAST-NEXT: i32.xor $push1=, $9, $pop0 +; NO-SIMD128-FAST-NEXT: i32.and $push2=, $1, $pop1 +; NO-SIMD128-FAST-NEXT: i32.store16 0($0), $pop2 +; NO-SIMD128-FAST-NEXT: i32.const $push31=, -1 +; NO-SIMD128-FAST-NEXT: i32.xor $push3=, $10, $pop31 +; NO-SIMD128-FAST-NEXT: i32.and $push4=, $2, $pop3 +; NO-SIMD128-FAST-NEXT: i32.store16 2($0), $pop4 +; NO-SIMD128-FAST-NEXT: i32.const $push30=, -1 +; NO-SIMD128-FAST-NEXT: i32.xor $push5=, $11, $pop30 +; NO-SIMD128-FAST-NEXT: i32.and $push6=, $3, $pop5 +; NO-SIMD128-FAST-NEXT: i32.store16 4($0), $pop6 +; NO-SIMD128-FAST-NEXT: i32.const $push7=, 6 +; NO-SIMD128-FAST-NEXT: i32.add $push8=, $0, $pop7 +; NO-SIMD128-FAST-NEXT: i32.const $push29=, -1 +; NO-SIMD128-FAST-NEXT: i32.xor $push9=, $12, $pop29 +; NO-SIMD128-FAST-NEXT: i32.and $push10=, $4, $pop9 +; NO-SIMD128-FAST-NEXT: i32.store16 0($pop8), $pop10 +; NO-SIMD128-FAST-NEXT: i32.const $push28=, -1 +; NO-SIMD128-FAST-NEXT: i32.xor $push11=, $13, $pop28 +; NO-SIMD128-FAST-NEXT: i32.and $push12=, $5, $pop11 +; NO-SIMD128-FAST-NEXT: i32.store16 8($0), $pop12 +; NO-SIMD128-FAST-NEXT: i32.const $push13=, 10 +; NO-SIMD128-FAST-NEXT: i32.add $push14=, $0, $pop13 +; NO-SIMD128-FAST-NEXT: i32.const $push27=, -1 +; NO-SIMD128-FAST-NEXT: i32.xor $push15=, $14, $pop27 +; NO-SIMD128-FAST-NEXT: i32.and $push16=, $6, $pop15 +; NO-SIMD128-FAST-NEXT: i32.store16 0($pop14), $pop16 +; NO-SIMD128-FAST-NEXT: i32.const $push17=, 12 +; NO-SIMD128-FAST-NEXT: i32.add $push18=, $0, $pop17 +; NO-SIMD128-FAST-NEXT: i32.const $push26=, -1 +; NO-SIMD128-FAST-NEXT: i32.xor $push19=, $15, $pop26 +; NO-SIMD128-FAST-NEXT: i32.and $push20=, $7, $pop19 +; NO-SIMD128-FAST-NEXT: i32.store16 0($pop18), $pop20 +; NO-SIMD128-FAST-NEXT: i32.const $push21=, 14 +; NO-SIMD128-FAST-NEXT: i32.add $push22=, $0, $pop21 +; NO-SIMD128-FAST-NEXT: i32.const $push25=, -1 +; NO-SIMD128-FAST-NEXT: i32.xor $push23=, $16, $pop25 +; NO-SIMD128-FAST-NEXT: i32.and $push24=, $8, $pop23 +; NO-SIMD128-FAST-NEXT: i32.store16 0($pop22), $pop24 +; NO-SIMD128-FAST-NEXT: return %inv_y = xor <8 x i16> %y, %a = and <8 x i16> %x, %inv_y ret <8 x i16> %a } -; CHECK-LABEL: bitselect_v8i16: -; NO-SIMD128-NOT: v128 -; SIMD128-NEXT: .functype bitselect_v8i16 (v128, v128, v128) -> (v128){{$}} -; SIMD128-SLOW-NEXT: v128.bitselect $push[[R:[0-9]+]]=, $1, $2, $0{{$}} -; SIMD128-SLOW-NEXT: return $pop[[R]]{{$}} -; SIMD128-FAST-NEXT: v128.and -; SIMD128-FAST-NEXT: v128.not -; SIMD128-FAST-NEXT: v128.and -; SIMD128-FAST-NEXT: v128.or -; SIMD128-FAST-NEXT: return define <8 x i16> @bitselect_v8i16(<8 x i16> %c, <8 x i16> %v1, <8 x i16> %v2) { +; SIMD128-LABEL: bitselect_v8i16: +; SIMD128: .functype bitselect_v8i16 (v128, v128, v128) -> (v128) +; SIMD128-NEXT: # %bb.0: +; SIMD128-NEXT: v128.bitselect $push0=, $1, $2, $0 +; SIMD128-NEXT: return $pop0 +; +; SIMD128-FAST-LABEL: bitselect_v8i16: +; SIMD128-FAST: .functype bitselect_v8i16 (v128, v128, v128) -> (v128) +; SIMD128-FAST-NEXT: # %bb.0: +; SIMD128-FAST-NEXT: v128.and $push0=, $1, $0 +; SIMD128-FAST-NEXT: v128.not $push2=, $0 +; SIMD128-FAST-NEXT: v128.and $push3=, $2, $pop2 +; SIMD128-FAST-NEXT: v128.or $push1=, $pop0, $pop3 +; SIMD128-FAST-NEXT: return $pop1 +; +; NO-SIMD128-LABEL: bitselect_v8i16: +; NO-SIMD128: .functype bitselect_v8i16 (i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32) -> () +; NO-SIMD128-NEXT: # %bb.0: +; NO-SIMD128-NEXT: i32.const $push5=, 14 +; NO-SIMD128-NEXT: i32.add $push6=, $0, $pop5 +; NO-SIMD128-NEXT: i32.and $push0=, $16, $8 +; NO-SIMD128-NEXT: i32.const $push1=, -1 +; NO-SIMD128-NEXT: i32.xor $push2=, $8, $pop1 +; NO-SIMD128-NEXT: i32.and $push3=, $24, $pop2 +; NO-SIMD128-NEXT: i32.or $push4=, $pop0, $pop3 +; NO-SIMD128-NEXT: i32.store16 0($pop6), $pop4 +; NO-SIMD128-NEXT: i32.const $push11=, 12 +; NO-SIMD128-NEXT: i32.add $push12=, $0, $pop11 +; NO-SIMD128-NEXT: i32.and $push7=, $15, $7 +; NO-SIMD128-NEXT: i32.const $push47=, -1 +; NO-SIMD128-NEXT: i32.xor $push8=, $7, $pop47 +; NO-SIMD128-NEXT: i32.and $push9=, $23, $pop8 +; NO-SIMD128-NEXT: i32.or $push10=, $pop7, $pop9 +; NO-SIMD128-NEXT: i32.store16 0($pop12), $pop10 +; NO-SIMD128-NEXT: i32.const $push17=, 10 +; NO-SIMD128-NEXT: i32.add $push18=, $0, $pop17 +; NO-SIMD128-NEXT: i32.and $push13=, $14, $6 +; NO-SIMD128-NEXT: i32.const $push46=, -1 +; NO-SIMD128-NEXT: i32.xor $push14=, $6, $pop46 +; NO-SIMD128-NEXT: i32.and $push15=, $22, $pop14 +; NO-SIMD128-NEXT: i32.or $push16=, $pop13, $pop15 +; NO-SIMD128-NEXT: i32.store16 0($pop18), $pop16 +; NO-SIMD128-NEXT: i32.and $push19=, $13, $5 +; NO-SIMD128-NEXT: i32.const $push45=, -1 +; NO-SIMD128-NEXT: i32.xor $push20=, $5, $pop45 +; NO-SIMD128-NEXT: i32.and $push21=, $21, $pop20 +; NO-SIMD128-NEXT: i32.or $push22=, $pop19, $pop21 +; NO-SIMD128-NEXT: i32.store16 8($0), $pop22 +; NO-SIMD128-NEXT: i32.const $push27=, 6 +; NO-SIMD128-NEXT: i32.add $push28=, $0, $pop27 +; NO-SIMD128-NEXT: i32.and $push23=, $12, $4 +; NO-SIMD128-NEXT: i32.const $push44=, -1 +; NO-SIMD128-NEXT: i32.xor $push24=, $4, $pop44 +; NO-SIMD128-NEXT: i32.and $push25=, $20, $pop24 +; NO-SIMD128-NEXT: i32.or $push26=, $pop23, $pop25 +; NO-SIMD128-NEXT: i32.store16 0($pop28), $pop26 +; NO-SIMD128-NEXT: i32.and $push29=, $11, $3 +; NO-SIMD128-NEXT: i32.const $push43=, -1 +; NO-SIMD128-NEXT: i32.xor $push30=, $3, $pop43 +; NO-SIMD128-NEXT: i32.and $push31=, $19, $pop30 +; NO-SIMD128-NEXT: i32.or $push32=, $pop29, $pop31 +; NO-SIMD128-NEXT: i32.store16 4($0), $pop32 +; NO-SIMD128-NEXT: i32.and $push33=, $10, $2 +; NO-SIMD128-NEXT: i32.const $push42=, -1 +; NO-SIMD128-NEXT: i32.xor $push34=, $2, $pop42 +; NO-SIMD128-NEXT: i32.and $push35=, $18, $pop34 +; NO-SIMD128-NEXT: i32.or $push36=, $pop33, $pop35 +; NO-SIMD128-NEXT: i32.store16 2($0), $pop36 +; NO-SIMD128-NEXT: i32.and $push37=, $9, $1 +; NO-SIMD128-NEXT: i32.const $push41=, -1 +; NO-SIMD128-NEXT: i32.xor $push38=, $1, $pop41 +; NO-SIMD128-NEXT: i32.and $push39=, $17, $pop38 +; NO-SIMD128-NEXT: i32.or $push40=, $pop37, $pop39 +; NO-SIMD128-NEXT: i32.store16 0($0), $pop40 +; NO-SIMD128-NEXT: return +; +; NO-SIMD128-FAST-LABEL: bitselect_v8i16: +; NO-SIMD128-FAST: .functype bitselect_v8i16 (i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32) -> () +; NO-SIMD128-FAST-NEXT: # %bb.0: +; NO-SIMD128-FAST-NEXT: i32.and $push0=, $9, $1 +; NO-SIMD128-FAST-NEXT: i32.const $push1=, -1 +; NO-SIMD128-FAST-NEXT: i32.xor $push2=, $1, $pop1 +; NO-SIMD128-FAST-NEXT: i32.and $push3=, $17, $pop2 +; NO-SIMD128-FAST-NEXT: i32.or $push4=, $pop0, $pop3 +; NO-SIMD128-FAST-NEXT: i32.store16 0($0), $pop4 +; NO-SIMD128-FAST-NEXT: i32.and $push5=, $10, $2 +; NO-SIMD128-FAST-NEXT: i32.const $push47=, -1 +; NO-SIMD128-FAST-NEXT: i32.xor $push6=, $2, $pop47 +; NO-SIMD128-FAST-NEXT: i32.and $push7=, $18, $pop6 +; NO-SIMD128-FAST-NEXT: i32.or $push8=, $pop5, $pop7 +; NO-SIMD128-FAST-NEXT: i32.store16 2($0), $pop8 +; NO-SIMD128-FAST-NEXT: i32.and $push9=, $11, $3 +; NO-SIMD128-FAST-NEXT: i32.const $push46=, -1 +; NO-SIMD128-FAST-NEXT: i32.xor $push10=, $3, $pop46 +; NO-SIMD128-FAST-NEXT: i32.and $push11=, $19, $pop10 +; NO-SIMD128-FAST-NEXT: i32.or $push12=, $pop9, $pop11 +; NO-SIMD128-FAST-NEXT: i32.store16 4($0), $pop12 +; NO-SIMD128-FAST-NEXT: i32.const $push17=, 6 +; NO-SIMD128-FAST-NEXT: i32.add $push18=, $0, $pop17 +; NO-SIMD128-FAST-NEXT: i32.and $push13=, $12, $4 +; NO-SIMD128-FAST-NEXT: i32.const $push45=, -1 +; NO-SIMD128-FAST-NEXT: i32.xor $push14=, $4, $pop45 +; NO-SIMD128-FAST-NEXT: i32.and $push15=, $20, $pop14 +; NO-SIMD128-FAST-NEXT: i32.or $push16=, $pop13, $pop15 +; NO-SIMD128-FAST-NEXT: i32.store16 0($pop18), $pop16 +; NO-SIMD128-FAST-NEXT: i32.and $push19=, $13, $5 +; NO-SIMD128-FAST-NEXT: i32.const $push44=, -1 +; NO-SIMD128-FAST-NEXT: i32.xor $push20=, $5, $pop44 +; NO-SIMD128-FAST-NEXT: i32.and $push21=, $21, $pop20 +; NO-SIMD128-FAST-NEXT: i32.or $push22=, $pop19, $pop21 +; NO-SIMD128-FAST-NEXT: i32.store16 8($0), $pop22 +; NO-SIMD128-FAST-NEXT: i32.const $push27=, 10 +; NO-SIMD128-FAST-NEXT: i32.add $push28=, $0, $pop27 +; NO-SIMD128-FAST-NEXT: i32.and $push23=, $14, $6 +; NO-SIMD128-FAST-NEXT: i32.const $push43=, -1 +; NO-SIMD128-FAST-NEXT: i32.xor $push24=, $6, $pop43 +; NO-SIMD128-FAST-NEXT: i32.and $push25=, $22, $pop24 +; NO-SIMD128-FAST-NEXT: i32.or $push26=, $pop23, $pop25 +; NO-SIMD128-FAST-NEXT: i32.store16 0($pop28), $pop26 +; NO-SIMD128-FAST-NEXT: i32.const $push33=, 12 +; NO-SIMD128-FAST-NEXT: i32.add $push34=, $0, $pop33 +; NO-SIMD128-FAST-NEXT: i32.and $push29=, $15, $7 +; NO-SIMD128-FAST-NEXT: i32.const $push42=, -1 +; NO-SIMD128-FAST-NEXT: i32.xor $push30=, $7, $pop42 +; NO-SIMD128-FAST-NEXT: i32.and $push31=, $23, $pop30 +; NO-SIMD128-FAST-NEXT: i32.or $push32=, $pop29, $pop31 +; NO-SIMD128-FAST-NEXT: i32.store16 0($pop34), $pop32 +; NO-SIMD128-FAST-NEXT: i32.const $push39=, 14 +; NO-SIMD128-FAST-NEXT: i32.add $push40=, $0, $pop39 +; NO-SIMD128-FAST-NEXT: i32.and $push35=, $16, $8 +; NO-SIMD128-FAST-NEXT: i32.const $push41=, -1 +; NO-SIMD128-FAST-NEXT: i32.xor $push36=, $8, $pop41 +; NO-SIMD128-FAST-NEXT: i32.and $push37=, $24, $pop36 +; NO-SIMD128-FAST-NEXT: i32.or $push38=, $pop35, $pop37 +; NO-SIMD128-FAST-NEXT: i32.store16 0($pop40), $pop38 +; NO-SIMD128-FAST-NEXT: return %masked_v1 = and <8 x i16> %v1, %c %inv_mask = xor <8 x i16> , @@ -692,31 +9185,253 @@ define <8 x i16> @bitselect_v8i16(<8 x i16> %c, <8 x i16> %v1, <8 x i16> %v2) { ret <8 x i16> %a } -; CHECK-LABEL: bitselect_xor_v8i16: -; NO-SIMD128-NOT: v128 -; SIMD128-NEXT: .functype bitselect_xor_v8i16 (v128, v128, v128) -> (v128){{$}} -; SIMD128-SLOW-NEXT: v128.bitselect $push[[R:[0-9]+]]=, $1, $2, $0{{$}} -; SIMD128-SLOW-NEXT: return $pop[[R]]{{$}} -; SIMD128-FAST-NEXT: v128.xor -; SIMD128-FAST-NEXT: v128.and -; SIMD128-FAST-NEXT: v128.xor define <8 x i16> @bitselect_xor_v8i16(<8 x i16> %c, <8 x i16> %v1, <8 x i16> %v2) { +; SIMD128-LABEL: bitselect_xor_v8i16: +; SIMD128: .functype bitselect_xor_v8i16 (v128, v128, v128) -> (v128) +; SIMD128-NEXT: # %bb.0: +; SIMD128-NEXT: v128.bitselect $push0=, $1, $2, $0 +; SIMD128-NEXT: return $pop0 +; +; SIMD128-FAST-LABEL: bitselect_xor_v8i16: +; SIMD128-FAST: .functype bitselect_xor_v8i16 (v128, v128, v128) -> (v128) +; SIMD128-FAST-NEXT: # %bb.0: +; SIMD128-FAST-NEXT: v128.xor $push2=, $1, $2 +; SIMD128-FAST-NEXT: v128.and $push1=, $pop2, $0 +; SIMD128-FAST-NEXT: v128.xor $push0=, $pop1, $2 +; SIMD128-FAST-NEXT: return $pop0 +; +; NO-SIMD128-LABEL: bitselect_xor_v8i16: +; NO-SIMD128: .functype bitselect_xor_v8i16 (i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32) -> () +; NO-SIMD128-NEXT: # %bb.0: +; NO-SIMD128-NEXT: i32.const $push3=, 14 +; NO-SIMD128-NEXT: i32.add $push4=, $0, $pop3 +; NO-SIMD128-NEXT: i32.xor $push0=, $16, $24 +; NO-SIMD128-NEXT: i32.and $push1=, $pop0, $8 +; NO-SIMD128-NEXT: i32.xor $push2=, $pop1, $24 +; NO-SIMD128-NEXT: i32.store16 0($pop4), $pop2 +; NO-SIMD128-NEXT: i32.const $push8=, 12 +; NO-SIMD128-NEXT: i32.add $push9=, $0, $pop8 +; NO-SIMD128-NEXT: i32.xor $push5=, $15, $23 +; NO-SIMD128-NEXT: i32.and $push6=, $pop5, $7 +; NO-SIMD128-NEXT: i32.xor $push7=, $pop6, $23 +; NO-SIMD128-NEXT: i32.store16 0($pop9), $pop7 +; NO-SIMD128-NEXT: i32.const $push13=, 10 +; NO-SIMD128-NEXT: i32.add $push14=, $0, $pop13 +; NO-SIMD128-NEXT: i32.xor $push10=, $14, $22 +; NO-SIMD128-NEXT: i32.and $push11=, $pop10, $6 +; NO-SIMD128-NEXT: i32.xor $push12=, $pop11, $22 +; NO-SIMD128-NEXT: i32.store16 0($pop14), $pop12 +; NO-SIMD128-NEXT: i32.xor $push15=, $13, $21 +; NO-SIMD128-NEXT: i32.and $push16=, $pop15, $5 +; NO-SIMD128-NEXT: i32.xor $push17=, $pop16, $21 +; NO-SIMD128-NEXT: i32.store16 8($0), $pop17 +; NO-SIMD128-NEXT: i32.const $push21=, 6 +; NO-SIMD128-NEXT: i32.add $push22=, $0, $pop21 +; NO-SIMD128-NEXT: i32.xor $push18=, $12, $20 +; NO-SIMD128-NEXT: i32.and $push19=, $pop18, $4 +; NO-SIMD128-NEXT: i32.xor $push20=, $pop19, $20 +; NO-SIMD128-NEXT: i32.store16 0($pop22), $pop20 +; NO-SIMD128-NEXT: i32.xor $push23=, $11, $19 +; NO-SIMD128-NEXT: i32.and $push24=, $pop23, $3 +; NO-SIMD128-NEXT: i32.xor $push25=, $pop24, $19 +; NO-SIMD128-NEXT: i32.store16 4($0), $pop25 +; NO-SIMD128-NEXT: i32.xor $push26=, $10, $18 +; NO-SIMD128-NEXT: i32.and $push27=, $pop26, $2 +; NO-SIMD128-NEXT: i32.xor $push28=, $pop27, $18 +; NO-SIMD128-NEXT: i32.store16 2($0), $pop28 +; NO-SIMD128-NEXT: i32.xor $push29=, $9, $17 +; NO-SIMD128-NEXT: i32.and $push30=, $pop29, $1 +; NO-SIMD128-NEXT: i32.xor $push31=, $pop30, $17 +; NO-SIMD128-NEXT: i32.store16 0($0), $pop31 +; NO-SIMD128-NEXT: return +; +; NO-SIMD128-FAST-LABEL: bitselect_xor_v8i16: +; NO-SIMD128-FAST: .functype bitselect_xor_v8i16 (i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32) -> () +; NO-SIMD128-FAST-NEXT: # %bb.0: +; NO-SIMD128-FAST-NEXT: i32.xor $push0=, $9, $17 +; NO-SIMD128-FAST-NEXT: i32.and $push1=, $pop0, $1 +; NO-SIMD128-FAST-NEXT: i32.xor $push2=, $pop1, $17 +; NO-SIMD128-FAST-NEXT: i32.store16 0($0), $pop2 +; NO-SIMD128-FAST-NEXT: i32.xor $push3=, $10, $18 +; NO-SIMD128-FAST-NEXT: i32.and $push4=, $pop3, $2 +; NO-SIMD128-FAST-NEXT: i32.xor $push5=, $pop4, $18 +; NO-SIMD128-FAST-NEXT: i32.store16 2($0), $pop5 +; NO-SIMD128-FAST-NEXT: i32.xor $push6=, $11, $19 +; NO-SIMD128-FAST-NEXT: i32.and $push7=, $pop6, $3 +; NO-SIMD128-FAST-NEXT: i32.xor $push8=, $pop7, $19 +; NO-SIMD128-FAST-NEXT: i32.store16 4($0), $pop8 +; NO-SIMD128-FAST-NEXT: i32.const $push9=, 6 +; NO-SIMD128-FAST-NEXT: i32.add $push10=, $0, $pop9 +; NO-SIMD128-FAST-NEXT: i32.xor $push11=, $12, $20 +; NO-SIMD128-FAST-NEXT: i32.and $push12=, $pop11, $4 +; NO-SIMD128-FAST-NEXT: i32.xor $push13=, $pop12, $20 +; NO-SIMD128-FAST-NEXT: i32.store16 0($pop10), $pop13 +; NO-SIMD128-FAST-NEXT: i32.xor $push14=, $13, $21 +; NO-SIMD128-FAST-NEXT: i32.and $push15=, $pop14, $5 +; NO-SIMD128-FAST-NEXT: i32.xor $push16=, $pop15, $21 +; NO-SIMD128-FAST-NEXT: i32.store16 8($0), $pop16 +; NO-SIMD128-FAST-NEXT: i32.const $push17=, 10 +; NO-SIMD128-FAST-NEXT: i32.add $push18=, $0, $pop17 +; NO-SIMD128-FAST-NEXT: i32.xor $push19=, $14, $22 +; NO-SIMD128-FAST-NEXT: i32.and $push20=, $pop19, $6 +; NO-SIMD128-FAST-NEXT: i32.xor $push21=, $pop20, $22 +; NO-SIMD128-FAST-NEXT: i32.store16 0($pop18), $pop21 +; NO-SIMD128-FAST-NEXT: i32.const $push22=, 12 +; NO-SIMD128-FAST-NEXT: i32.add $push23=, $0, $pop22 +; NO-SIMD128-FAST-NEXT: i32.xor $push24=, $15, $23 +; NO-SIMD128-FAST-NEXT: i32.and $push25=, $pop24, $7 +; NO-SIMD128-FAST-NEXT: i32.xor $push26=, $pop25, $23 +; NO-SIMD128-FAST-NEXT: i32.store16 0($pop23), $pop26 +; NO-SIMD128-FAST-NEXT: i32.const $push27=, 14 +; NO-SIMD128-FAST-NEXT: i32.add $push28=, $0, $pop27 +; NO-SIMD128-FAST-NEXT: i32.xor $push29=, $16, $24 +; NO-SIMD128-FAST-NEXT: i32.and $push30=, $pop29, $8 +; NO-SIMD128-FAST-NEXT: i32.xor $push31=, $pop30, $24 +; NO-SIMD128-FAST-NEXT: i32.store16 0($pop28), $pop31 +; NO-SIMD128-FAST-NEXT: return %xor1 = xor <8 x i16> %v1, %v2 %and = and <8 x i16> %xor1, %c %a = xor <8 x i16> %and, %v2 ret <8 x i16> %a } -; CHECK-LABEL: bitselect_xor_reversed_v8i16: -; NO-SIMD128-NOT: v128 -; SIMD128-NEXT: .functype bitselect_xor_reversed_v8i16 (v128, v128, v128) -> (v128){{$}} -; SIMD128-SLOW-NEXT: v128.bitselect $push[[R:[0-9]+]]=, $2, $1, $0{{$}} -; SIMD128-SLOW-NEXT: return $pop[[R]]{{$}} -; SIMD128-FAST-NEXT: v128.xor -; SIMD128-FAST-NEXT: v128.not -; SIMD128-FAST-NEXT: v128.and -; SIMD128-FAST-NEXT: v128.xor define <8 x i16> @bitselect_xor_reversed_v8i16(<8 x i16> %c, <8 x i16> %v1, <8 x i16> %v2) { +; SIMD128-LABEL: bitselect_xor_reversed_v8i16: +; SIMD128: .functype bitselect_xor_reversed_v8i16 (v128, v128, v128) -> (v128) +; SIMD128-NEXT: # %bb.0: +; SIMD128-NEXT: v128.bitselect $push0=, $2, $1, $0 +; SIMD128-NEXT: return $pop0 +; +; SIMD128-FAST-LABEL: bitselect_xor_reversed_v8i16: +; SIMD128-FAST: .functype bitselect_xor_reversed_v8i16 (v128, v128, v128) -> (v128) +; SIMD128-FAST-NEXT: # %bb.0: +; SIMD128-FAST-NEXT: v128.xor $push1=, $1, $2 +; SIMD128-FAST-NEXT: v128.not $push2=, $0 +; SIMD128-FAST-NEXT: v128.and $push3=, $pop1, $pop2 +; SIMD128-FAST-NEXT: v128.xor $push0=, $pop3, $2 +; SIMD128-FAST-NEXT: return $pop0 +; +; NO-SIMD128-LABEL: bitselect_xor_reversed_v8i16: +; NO-SIMD128: .functype bitselect_xor_reversed_v8i16 (i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32) -> () +; NO-SIMD128-NEXT: # %bb.0: +; NO-SIMD128-NEXT: i32.const $push5=, 14 +; NO-SIMD128-NEXT: i32.add $push6=, $0, $pop5 +; NO-SIMD128-NEXT: i32.xor $push2=, $16, $24 +; NO-SIMD128-NEXT: i32.const $push0=, -1 +; NO-SIMD128-NEXT: i32.xor $push1=, $8, $pop0 +; NO-SIMD128-NEXT: i32.and $push3=, $pop2, $pop1 +; NO-SIMD128-NEXT: i32.xor $push4=, $pop3, $24 +; NO-SIMD128-NEXT: i32.store16 0($pop6), $pop4 +; NO-SIMD128-NEXT: i32.const $push11=, 12 +; NO-SIMD128-NEXT: i32.add $push12=, $0, $pop11 +; NO-SIMD128-NEXT: i32.xor $push8=, $15, $23 +; NO-SIMD128-NEXT: i32.const $push47=, -1 +; NO-SIMD128-NEXT: i32.xor $push7=, $7, $pop47 +; NO-SIMD128-NEXT: i32.and $push9=, $pop8, $pop7 +; NO-SIMD128-NEXT: i32.xor $push10=, $pop9, $23 +; NO-SIMD128-NEXT: i32.store16 0($pop12), $pop10 +; NO-SIMD128-NEXT: i32.const $push17=, 10 +; NO-SIMD128-NEXT: i32.add $push18=, $0, $pop17 +; NO-SIMD128-NEXT: i32.xor $push14=, $14, $22 +; NO-SIMD128-NEXT: i32.const $push46=, -1 +; NO-SIMD128-NEXT: i32.xor $push13=, $6, $pop46 +; NO-SIMD128-NEXT: i32.and $push15=, $pop14, $pop13 +; NO-SIMD128-NEXT: i32.xor $push16=, $pop15, $22 +; NO-SIMD128-NEXT: i32.store16 0($pop18), $pop16 +; NO-SIMD128-NEXT: i32.xor $push20=, $13, $21 +; NO-SIMD128-NEXT: i32.const $push45=, -1 +; NO-SIMD128-NEXT: i32.xor $push19=, $5, $pop45 +; NO-SIMD128-NEXT: i32.and $push21=, $pop20, $pop19 +; NO-SIMD128-NEXT: i32.xor $push22=, $pop21, $21 +; NO-SIMD128-NEXT: i32.store16 8($0), $pop22 +; NO-SIMD128-NEXT: i32.const $push27=, 6 +; NO-SIMD128-NEXT: i32.add $push28=, $0, $pop27 +; NO-SIMD128-NEXT: i32.xor $push24=, $12, $20 +; NO-SIMD128-NEXT: i32.const $push44=, -1 +; NO-SIMD128-NEXT: i32.xor $push23=, $4, $pop44 +; NO-SIMD128-NEXT: i32.and $push25=, $pop24, $pop23 +; NO-SIMD128-NEXT: i32.xor $push26=, $pop25, $20 +; NO-SIMD128-NEXT: i32.store16 0($pop28), $pop26 +; NO-SIMD128-NEXT: i32.xor $push30=, $11, $19 +; NO-SIMD128-NEXT: i32.const $push43=, -1 +; NO-SIMD128-NEXT: i32.xor $push29=, $3, $pop43 +; NO-SIMD128-NEXT: i32.and $push31=, $pop30, $pop29 +; NO-SIMD128-NEXT: i32.xor $push32=, $pop31, $19 +; NO-SIMD128-NEXT: i32.store16 4($0), $pop32 +; NO-SIMD128-NEXT: i32.xor $push34=, $10, $18 +; NO-SIMD128-NEXT: i32.const $push42=, -1 +; NO-SIMD128-NEXT: i32.xor $push33=, $2, $pop42 +; NO-SIMD128-NEXT: i32.and $push35=, $pop34, $pop33 +; NO-SIMD128-NEXT: i32.xor $push36=, $pop35, $18 +; NO-SIMD128-NEXT: i32.store16 2($0), $pop36 +; NO-SIMD128-NEXT: i32.xor $push38=, $9, $17 +; NO-SIMD128-NEXT: i32.const $push41=, -1 +; NO-SIMD128-NEXT: i32.xor $push37=, $1, $pop41 +; NO-SIMD128-NEXT: i32.and $push39=, $pop38, $pop37 +; NO-SIMD128-NEXT: i32.xor $push40=, $pop39, $17 +; NO-SIMD128-NEXT: i32.store16 0($0), $pop40 +; NO-SIMD128-NEXT: return +; +; NO-SIMD128-FAST-LABEL: bitselect_xor_reversed_v8i16: +; NO-SIMD128-FAST: .functype bitselect_xor_reversed_v8i16 (i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32) -> () +; NO-SIMD128-FAST-NEXT: # %bb.0: +; NO-SIMD128-FAST-NEXT: i32.xor $push2=, $9, $17 +; NO-SIMD128-FAST-NEXT: i32.const $push0=, -1 +; NO-SIMD128-FAST-NEXT: i32.xor $push1=, $1, $pop0 +; NO-SIMD128-FAST-NEXT: i32.and $push3=, $pop2, $pop1 +; NO-SIMD128-FAST-NEXT: i32.xor $push4=, $pop3, $17 +; NO-SIMD128-FAST-NEXT: i32.store16 0($0), $pop4 +; NO-SIMD128-FAST-NEXT: i32.xor $push6=, $10, $18 +; NO-SIMD128-FAST-NEXT: i32.const $push47=, -1 +; NO-SIMD128-FAST-NEXT: i32.xor $push5=, $2, $pop47 +; NO-SIMD128-FAST-NEXT: i32.and $push7=, $pop6, $pop5 +; NO-SIMD128-FAST-NEXT: i32.xor $push8=, $pop7, $18 +; NO-SIMD128-FAST-NEXT: i32.store16 2($0), $pop8 +; NO-SIMD128-FAST-NEXT: i32.xor $push10=, $11, $19 +; NO-SIMD128-FAST-NEXT: i32.const $push46=, -1 +; NO-SIMD128-FAST-NEXT: i32.xor $push9=, $3, $pop46 +; NO-SIMD128-FAST-NEXT: i32.and $push11=, $pop10, $pop9 +; NO-SIMD128-FAST-NEXT: i32.xor $push12=, $pop11, $19 +; NO-SIMD128-FAST-NEXT: i32.store16 4($0), $pop12 +; NO-SIMD128-FAST-NEXT: i32.const $push17=, 6 +; NO-SIMD128-FAST-NEXT: i32.add $push18=, $0, $pop17 +; NO-SIMD128-FAST-NEXT: i32.xor $push14=, $12, $20 +; NO-SIMD128-FAST-NEXT: i32.const $push45=, -1 +; NO-SIMD128-FAST-NEXT: i32.xor $push13=, $4, $pop45 +; NO-SIMD128-FAST-NEXT: i32.and $push15=, $pop14, $pop13 +; NO-SIMD128-FAST-NEXT: i32.xor $push16=, $pop15, $20 +; NO-SIMD128-FAST-NEXT: i32.store16 0($pop18), $pop16 +; NO-SIMD128-FAST-NEXT: i32.xor $push20=, $13, $21 +; NO-SIMD128-FAST-NEXT: i32.const $push44=, -1 +; NO-SIMD128-FAST-NEXT: i32.xor $push19=, $5, $pop44 +; NO-SIMD128-FAST-NEXT: i32.and $push21=, $pop20, $pop19 +; NO-SIMD128-FAST-NEXT: i32.xor $push22=, $pop21, $21 +; NO-SIMD128-FAST-NEXT: i32.store16 8($0), $pop22 +; NO-SIMD128-FAST-NEXT: i32.const $push27=, 10 +; NO-SIMD128-FAST-NEXT: i32.add $push28=, $0, $pop27 +; NO-SIMD128-FAST-NEXT: i32.xor $push24=, $14, $22 +; NO-SIMD128-FAST-NEXT: i32.const $push43=, -1 +; NO-SIMD128-FAST-NEXT: i32.xor $push23=, $6, $pop43 +; NO-SIMD128-FAST-NEXT: i32.and $push25=, $pop24, $pop23 +; NO-SIMD128-FAST-NEXT: i32.xor $push26=, $pop25, $22 +; NO-SIMD128-FAST-NEXT: i32.store16 0($pop28), $pop26 +; NO-SIMD128-FAST-NEXT: i32.const $push33=, 12 +; NO-SIMD128-FAST-NEXT: i32.add $push34=, $0, $pop33 +; NO-SIMD128-FAST-NEXT: i32.xor $push30=, $15, $23 +; NO-SIMD128-FAST-NEXT: i32.const $push42=, -1 +; NO-SIMD128-FAST-NEXT: i32.xor $push29=, $7, $pop42 +; NO-SIMD128-FAST-NEXT: i32.and $push31=, $pop30, $pop29 +; NO-SIMD128-FAST-NEXT: i32.xor $push32=, $pop31, $23 +; NO-SIMD128-FAST-NEXT: i32.store16 0($pop34), $pop32 +; NO-SIMD128-FAST-NEXT: i32.const $push39=, 14 +; NO-SIMD128-FAST-NEXT: i32.add $push40=, $0, $pop39 +; NO-SIMD128-FAST-NEXT: i32.xor $push36=, $16, $24 +; NO-SIMD128-FAST-NEXT: i32.const $push41=, -1 +; NO-SIMD128-FAST-NEXT: i32.xor $push35=, $8, $pop41 +; NO-SIMD128-FAST-NEXT: i32.and $push37=, $pop36, $pop35 +; NO-SIMD128-FAST-NEXT: i32.xor $push38=, $pop37, $24 +; NO-SIMD128-FAST-NEXT: i32.store16 0($pop40), $pop38 +; NO-SIMD128-FAST-NEXT: return %xor1 = xor <8 x i16> %v1, %v2 %notc = xor <8 x i16> %c, @@ -725,12 +9440,110 @@ define <8 x i16> @bitselect_xor_reversed_v8i16(<8 x i16> %c, <8 x i16> %v1, <8 x ret <8 x i16> %a } -; CHECK-LABEL: extmul_low_s_v8i16: -; NO-SIMD128-NOT: i16x8 -; SIMD128-NEXT: .functype extmul_low_s_v8i16 (v128, v128) -> (v128){{$}} -; SIMD128-SLOW-NEXT: i16x8.extmul_low_i8x16_s $push[[R:[0-9]+]]=, $0, $1{{$}} -; SIMD128-SLOW-NEXT: return $pop[[R]]{{$}} define <8 x i16> @extmul_low_s_v8i16(<16 x i8> %v1, <16 x i8> %v2) { +; SIMD128-LABEL: extmul_low_s_v8i16: +; SIMD128: .functype extmul_low_s_v8i16 (v128, v128) -> (v128) +; SIMD128-NEXT: # %bb.0: +; SIMD128-NEXT: i16x8.extmul_low_i8x16_s $push0=, $0, $1 +; SIMD128-NEXT: return $pop0 +; +; SIMD128-FAST-LABEL: extmul_low_s_v8i16: +; SIMD128-FAST: .functype extmul_low_s_v8i16 (v128, v128) -> (v128) +; SIMD128-FAST-NEXT: # %bb.0: +; SIMD128-FAST-NEXT: i16x8.extend_low_i8x16_s $push0=, $0 +; SIMD128-FAST-NEXT: i16x8.extend_low_i8x16_s $push1=, $1 +; SIMD128-FAST-NEXT: i16x8.mul $push2=, $pop0, $pop1 +; SIMD128-FAST-NEXT: return $pop2 +; +; NO-SIMD128-LABEL: extmul_low_s_v8i16: +; NO-SIMD128: .functype extmul_low_s_v8i16 (i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32) -> () +; NO-SIMD128-NEXT: # %bb.0: +; NO-SIMD128-NEXT: i32.extend8_s $push1=, $5 +; NO-SIMD128-NEXT: i32.extend8_s $push0=, $21 +; NO-SIMD128-NEXT: i32.mul $push2=, $pop1, $pop0 +; NO-SIMD128-NEXT: i32.store16 8($0), $pop2 +; NO-SIMD128-NEXT: i32.extend8_s $push4=, $3 +; NO-SIMD128-NEXT: i32.extend8_s $push3=, $19 +; NO-SIMD128-NEXT: i32.mul $push5=, $pop4, $pop3 +; NO-SIMD128-NEXT: i32.store16 4($0), $pop5 +; NO-SIMD128-NEXT: i32.extend8_s $push7=, $2 +; NO-SIMD128-NEXT: i32.extend8_s $push6=, $18 +; NO-SIMD128-NEXT: i32.mul $push8=, $pop7, $pop6 +; NO-SIMD128-NEXT: i32.store16 2($0), $pop8 +; NO-SIMD128-NEXT: i32.extend8_s $push10=, $1 +; NO-SIMD128-NEXT: i32.extend8_s $push9=, $17 +; NO-SIMD128-NEXT: i32.mul $push11=, $pop10, $pop9 +; NO-SIMD128-NEXT: i32.store16 0($0), $pop11 +; NO-SIMD128-NEXT: i32.const $push15=, 14 +; NO-SIMD128-NEXT: i32.add $push16=, $0, $pop15 +; NO-SIMD128-NEXT: i32.extend8_s $push13=, $8 +; NO-SIMD128-NEXT: i32.extend8_s $push12=, $24 +; NO-SIMD128-NEXT: i32.mul $push14=, $pop13, $pop12 +; NO-SIMD128-NEXT: i32.store16 0($pop16), $pop14 +; NO-SIMD128-NEXT: i32.const $push20=, 12 +; NO-SIMD128-NEXT: i32.add $push21=, $0, $pop20 +; NO-SIMD128-NEXT: i32.extend8_s $push18=, $7 +; NO-SIMD128-NEXT: i32.extend8_s $push17=, $23 +; NO-SIMD128-NEXT: i32.mul $push19=, $pop18, $pop17 +; NO-SIMD128-NEXT: i32.store16 0($pop21), $pop19 +; NO-SIMD128-NEXT: i32.const $push25=, 10 +; NO-SIMD128-NEXT: i32.add $push26=, $0, $pop25 +; NO-SIMD128-NEXT: i32.extend8_s $push23=, $6 +; NO-SIMD128-NEXT: i32.extend8_s $push22=, $22 +; NO-SIMD128-NEXT: i32.mul $push24=, $pop23, $pop22 +; NO-SIMD128-NEXT: i32.store16 0($pop26), $pop24 +; NO-SIMD128-NEXT: i32.const $push30=, 6 +; NO-SIMD128-NEXT: i32.add $push31=, $0, $pop30 +; NO-SIMD128-NEXT: i32.extend8_s $push28=, $4 +; NO-SIMD128-NEXT: i32.extend8_s $push27=, $20 +; NO-SIMD128-NEXT: i32.mul $push29=, $pop28, $pop27 +; NO-SIMD128-NEXT: i32.store16 0($pop31), $pop29 +; NO-SIMD128-NEXT: return +; +; NO-SIMD128-FAST-LABEL: extmul_low_s_v8i16: +; NO-SIMD128-FAST: .functype extmul_low_s_v8i16 (i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32) -> () +; NO-SIMD128-FAST-NEXT: # %bb.0: +; NO-SIMD128-FAST-NEXT: i32.extend8_s $push1=, $1 +; NO-SIMD128-FAST-NEXT: i32.extend8_s $push0=, $17 +; NO-SIMD128-FAST-NEXT: i32.mul $push2=, $pop1, $pop0 +; NO-SIMD128-FAST-NEXT: i32.store16 0($0), $pop2 +; NO-SIMD128-FAST-NEXT: i32.extend8_s $push4=, $2 +; NO-SIMD128-FAST-NEXT: i32.extend8_s $push3=, $18 +; NO-SIMD128-FAST-NEXT: i32.mul $push5=, $pop4, $pop3 +; NO-SIMD128-FAST-NEXT: i32.store16 2($0), $pop5 +; NO-SIMD128-FAST-NEXT: i32.extend8_s $push7=, $3 +; NO-SIMD128-FAST-NEXT: i32.extend8_s $push6=, $19 +; NO-SIMD128-FAST-NEXT: i32.mul $push8=, $pop7, $pop6 +; NO-SIMD128-FAST-NEXT: i32.store16 4($0), $pop8 +; NO-SIMD128-FAST-NEXT: i32.const $push9=, 6 +; NO-SIMD128-FAST-NEXT: i32.add $push10=, $0, $pop9 +; NO-SIMD128-FAST-NEXT: i32.extend8_s $push12=, $4 +; NO-SIMD128-FAST-NEXT: i32.extend8_s $push11=, $20 +; NO-SIMD128-FAST-NEXT: i32.mul $push13=, $pop12, $pop11 +; NO-SIMD128-FAST-NEXT: i32.store16 0($pop10), $pop13 +; NO-SIMD128-FAST-NEXT: i32.extend8_s $push15=, $5 +; NO-SIMD128-FAST-NEXT: i32.extend8_s $push14=, $21 +; NO-SIMD128-FAST-NEXT: i32.mul $push16=, $pop15, $pop14 +; NO-SIMD128-FAST-NEXT: i32.store16 8($0), $pop16 +; NO-SIMD128-FAST-NEXT: i32.const $push17=, 10 +; NO-SIMD128-FAST-NEXT: i32.add $push18=, $0, $pop17 +; NO-SIMD128-FAST-NEXT: i32.extend8_s $push20=, $6 +; NO-SIMD128-FAST-NEXT: i32.extend8_s $push19=, $22 +; NO-SIMD128-FAST-NEXT: i32.mul $push21=, $pop20, $pop19 +; NO-SIMD128-FAST-NEXT: i32.store16 0($pop18), $pop21 +; NO-SIMD128-FAST-NEXT: i32.const $push22=, 12 +; NO-SIMD128-FAST-NEXT: i32.add $push23=, $0, $pop22 +; NO-SIMD128-FAST-NEXT: i32.extend8_s $push25=, $7 +; NO-SIMD128-FAST-NEXT: i32.extend8_s $push24=, $23 +; NO-SIMD128-FAST-NEXT: i32.mul $push26=, $pop25, $pop24 +; NO-SIMD128-FAST-NEXT: i32.store16 0($pop23), $pop26 +; NO-SIMD128-FAST-NEXT: i32.const $push27=, 14 +; NO-SIMD128-FAST-NEXT: i32.add $push28=, $0, $pop27 +; NO-SIMD128-FAST-NEXT: i32.extend8_s $push30=, $8 +; NO-SIMD128-FAST-NEXT: i32.extend8_s $push29=, $24 +; NO-SIMD128-FAST-NEXT: i32.mul $push31=, $pop30, $pop29 +; NO-SIMD128-FAST-NEXT: i32.store16 0($pop28), $pop31 +; NO-SIMD128-FAST-NEXT: return %low1 = shufflevector <16 x i8> %v1, <16 x i8> undef, <8 x i32> %low2 = shufflevector <16 x i8> %v2, <16 x i8> undef, @@ -741,12 +9554,110 @@ define <8 x i16> @extmul_low_s_v8i16(<16 x i8> %v1, <16 x i8> %v2) { ret <8 x i16> %a } -; CHECK-LABEL: extmul_high_s_v8i16: -; NO-SIMD128-NOT: i16x8 -; SIMD128-NEXT: .functype extmul_high_s_v8i16 (v128, v128) -> (v128){{$}} -; SIMD128-SLOW-NEXT: i16x8.extmul_high_i8x16_s $push[[R:[0-9]+]]=, $0, $1{{$}} -; SIMD128-SLOW-NEXT: return $pop[[R]]{{$}} define <8 x i16> @extmul_high_s_v8i16(<16 x i8> %v1, <16 x i8> %v2) { +; SIMD128-LABEL: extmul_high_s_v8i16: +; SIMD128: .functype extmul_high_s_v8i16 (v128, v128) -> (v128) +; SIMD128-NEXT: # %bb.0: +; SIMD128-NEXT: i16x8.extmul_high_i8x16_s $push0=, $0, $1 +; SIMD128-NEXT: return $pop0 +; +; SIMD128-FAST-LABEL: extmul_high_s_v8i16: +; SIMD128-FAST: .functype extmul_high_s_v8i16 (v128, v128) -> (v128) +; SIMD128-FAST-NEXT: # %bb.0: +; SIMD128-FAST-NEXT: i16x8.extend_high_i8x16_s $push0=, $0 +; SIMD128-FAST-NEXT: i16x8.extend_high_i8x16_s $push1=, $1 +; SIMD128-FAST-NEXT: i16x8.mul $push2=, $pop0, $pop1 +; SIMD128-FAST-NEXT: return $pop2 +; +; NO-SIMD128-LABEL: extmul_high_s_v8i16: +; NO-SIMD128: .functype extmul_high_s_v8i16 (i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32) -> () +; NO-SIMD128-NEXT: # %bb.0: +; NO-SIMD128-NEXT: i32.extend8_s $push1=, $13 +; NO-SIMD128-NEXT: i32.extend8_s $push0=, $29 +; NO-SIMD128-NEXT: i32.mul $push2=, $pop1, $pop0 +; NO-SIMD128-NEXT: i32.store16 8($0), $pop2 +; NO-SIMD128-NEXT: i32.extend8_s $push4=, $11 +; NO-SIMD128-NEXT: i32.extend8_s $push3=, $27 +; NO-SIMD128-NEXT: i32.mul $push5=, $pop4, $pop3 +; NO-SIMD128-NEXT: i32.store16 4($0), $pop5 +; NO-SIMD128-NEXT: i32.extend8_s $push7=, $10 +; NO-SIMD128-NEXT: i32.extend8_s $push6=, $26 +; NO-SIMD128-NEXT: i32.mul $push8=, $pop7, $pop6 +; NO-SIMD128-NEXT: i32.store16 2($0), $pop8 +; NO-SIMD128-NEXT: i32.extend8_s $push10=, $9 +; NO-SIMD128-NEXT: i32.extend8_s $push9=, $25 +; NO-SIMD128-NEXT: i32.mul $push11=, $pop10, $pop9 +; NO-SIMD128-NEXT: i32.store16 0($0), $pop11 +; NO-SIMD128-NEXT: i32.const $push15=, 14 +; NO-SIMD128-NEXT: i32.add $push16=, $0, $pop15 +; NO-SIMD128-NEXT: i32.extend8_s $push13=, $16 +; NO-SIMD128-NEXT: i32.extend8_s $push12=, $32 +; NO-SIMD128-NEXT: i32.mul $push14=, $pop13, $pop12 +; NO-SIMD128-NEXT: i32.store16 0($pop16), $pop14 +; NO-SIMD128-NEXT: i32.const $push20=, 12 +; NO-SIMD128-NEXT: i32.add $push21=, $0, $pop20 +; NO-SIMD128-NEXT: i32.extend8_s $push18=, $15 +; NO-SIMD128-NEXT: i32.extend8_s $push17=, $31 +; NO-SIMD128-NEXT: i32.mul $push19=, $pop18, $pop17 +; NO-SIMD128-NEXT: i32.store16 0($pop21), $pop19 +; NO-SIMD128-NEXT: i32.const $push25=, 10 +; NO-SIMD128-NEXT: i32.add $push26=, $0, $pop25 +; NO-SIMD128-NEXT: i32.extend8_s $push23=, $14 +; NO-SIMD128-NEXT: i32.extend8_s $push22=, $30 +; NO-SIMD128-NEXT: i32.mul $push24=, $pop23, $pop22 +; NO-SIMD128-NEXT: i32.store16 0($pop26), $pop24 +; NO-SIMD128-NEXT: i32.const $push30=, 6 +; NO-SIMD128-NEXT: i32.add $push31=, $0, $pop30 +; NO-SIMD128-NEXT: i32.extend8_s $push28=, $12 +; NO-SIMD128-NEXT: i32.extend8_s $push27=, $28 +; NO-SIMD128-NEXT: i32.mul $push29=, $pop28, $pop27 +; NO-SIMD128-NEXT: i32.store16 0($pop31), $pop29 +; NO-SIMD128-NEXT: return +; +; NO-SIMD128-FAST-LABEL: extmul_high_s_v8i16: +; NO-SIMD128-FAST: .functype extmul_high_s_v8i16 (i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32) -> () +; NO-SIMD128-FAST-NEXT: # %bb.0: +; NO-SIMD128-FAST-NEXT: i32.extend8_s $push1=, $9 +; NO-SIMD128-FAST-NEXT: i32.extend8_s $push0=, $25 +; NO-SIMD128-FAST-NEXT: i32.mul $push2=, $pop1, $pop0 +; NO-SIMD128-FAST-NEXT: i32.store16 0($0), $pop2 +; NO-SIMD128-FAST-NEXT: i32.extend8_s $push4=, $10 +; NO-SIMD128-FAST-NEXT: i32.extend8_s $push3=, $26 +; NO-SIMD128-FAST-NEXT: i32.mul $push5=, $pop4, $pop3 +; NO-SIMD128-FAST-NEXT: i32.store16 2($0), $pop5 +; NO-SIMD128-FAST-NEXT: i32.extend8_s $push7=, $11 +; NO-SIMD128-FAST-NEXT: i32.extend8_s $push6=, $27 +; NO-SIMD128-FAST-NEXT: i32.mul $push8=, $pop7, $pop6 +; NO-SIMD128-FAST-NEXT: i32.store16 4($0), $pop8 +; NO-SIMD128-FAST-NEXT: i32.const $push9=, 6 +; NO-SIMD128-FAST-NEXT: i32.add $push10=, $0, $pop9 +; NO-SIMD128-FAST-NEXT: i32.extend8_s $push12=, $12 +; NO-SIMD128-FAST-NEXT: i32.extend8_s $push11=, $28 +; NO-SIMD128-FAST-NEXT: i32.mul $push13=, $pop12, $pop11 +; NO-SIMD128-FAST-NEXT: i32.store16 0($pop10), $pop13 +; NO-SIMD128-FAST-NEXT: i32.extend8_s $push15=, $13 +; NO-SIMD128-FAST-NEXT: i32.extend8_s $push14=, $29 +; NO-SIMD128-FAST-NEXT: i32.mul $push16=, $pop15, $pop14 +; NO-SIMD128-FAST-NEXT: i32.store16 8($0), $pop16 +; NO-SIMD128-FAST-NEXT: i32.const $push17=, 10 +; NO-SIMD128-FAST-NEXT: i32.add $push18=, $0, $pop17 +; NO-SIMD128-FAST-NEXT: i32.extend8_s $push20=, $14 +; NO-SIMD128-FAST-NEXT: i32.extend8_s $push19=, $30 +; NO-SIMD128-FAST-NEXT: i32.mul $push21=, $pop20, $pop19 +; NO-SIMD128-FAST-NEXT: i32.store16 0($pop18), $pop21 +; NO-SIMD128-FAST-NEXT: i32.const $push22=, 12 +; NO-SIMD128-FAST-NEXT: i32.add $push23=, $0, $pop22 +; NO-SIMD128-FAST-NEXT: i32.extend8_s $push25=, $15 +; NO-SIMD128-FAST-NEXT: i32.extend8_s $push24=, $31 +; NO-SIMD128-FAST-NEXT: i32.mul $push26=, $pop25, $pop24 +; NO-SIMD128-FAST-NEXT: i32.store16 0($pop23), $pop26 +; NO-SIMD128-FAST-NEXT: i32.const $push27=, 14 +; NO-SIMD128-FAST-NEXT: i32.add $push28=, $0, $pop27 +; NO-SIMD128-FAST-NEXT: i32.extend8_s $push30=, $16 +; NO-SIMD128-FAST-NEXT: i32.extend8_s $push29=, $32 +; NO-SIMD128-FAST-NEXT: i32.mul $push31=, $pop30, $pop29 +; NO-SIMD128-FAST-NEXT: i32.store16 0($pop28), $pop31 +; NO-SIMD128-FAST-NEXT: return %high1 = shufflevector <16 x i8> %v1, <16 x i8> undef, <8 x i32> %high2 = shufflevector <16 x i8> %v2, <16 x i8> undef, @@ -757,12 +9668,142 @@ define <8 x i16> @extmul_high_s_v8i16(<16 x i8> %v1, <16 x i8> %v2) { ret <8 x i16> %a } -; CHECK-LABEL: extmul_low_u_v8i16: -; NO-SIMD128-NOT: i16x8 -; SIMD128-NEXT: .functype extmul_low_u_v8i16 (v128, v128) -> (v128){{$}} -; SIMD128-SLOW-NEXT: i16x8.extmul_low_i8x16_u $push[[R:[0-9]+]]=, $0, $1{{$}} -; SIMD128-SLOW-NEXT: return $pop[[R]]{{$}} define <8 x i16> @extmul_low_u_v8i16(<16 x i8> %v1, <16 x i8> %v2) { +; SIMD128-LABEL: extmul_low_u_v8i16: +; SIMD128: .functype extmul_low_u_v8i16 (v128, v128) -> (v128) +; SIMD128-NEXT: # %bb.0: +; SIMD128-NEXT: i16x8.extmul_low_i8x16_u $push0=, $0, $1 +; SIMD128-NEXT: return $pop0 +; +; SIMD128-FAST-LABEL: extmul_low_u_v8i16: +; SIMD128-FAST: .functype extmul_low_u_v8i16 (v128, v128) -> (v128) +; SIMD128-FAST-NEXT: # %bb.0: +; SIMD128-FAST-NEXT: i16x8.extend_low_i8x16_u $push0=, $0 +; SIMD128-FAST-NEXT: i16x8.extend_low_i8x16_u $push1=, $1 +; SIMD128-FAST-NEXT: i16x8.mul $push2=, $pop0, $pop1 +; SIMD128-FAST-NEXT: return $pop2 +; +; NO-SIMD128-LABEL: extmul_low_u_v8i16: +; NO-SIMD128: .functype extmul_low_u_v8i16 (i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32) -> () +; NO-SIMD128-NEXT: # %bb.0: +; NO-SIMD128-NEXT: i32.const $push0=, 255 +; NO-SIMD128-NEXT: i32.and $push2=, $5, $pop0 +; NO-SIMD128-NEXT: i32.const $push47=, 255 +; NO-SIMD128-NEXT: i32.and $push1=, $21, $pop47 +; NO-SIMD128-NEXT: i32.mul $push3=, $pop2, $pop1 +; NO-SIMD128-NEXT: i32.store16 8($0), $pop3 +; NO-SIMD128-NEXT: i32.const $push46=, 255 +; NO-SIMD128-NEXT: i32.and $push5=, $3, $pop46 +; NO-SIMD128-NEXT: i32.const $push45=, 255 +; NO-SIMD128-NEXT: i32.and $push4=, $19, $pop45 +; NO-SIMD128-NEXT: i32.mul $push6=, $pop5, $pop4 +; NO-SIMD128-NEXT: i32.store16 4($0), $pop6 +; NO-SIMD128-NEXT: i32.const $push44=, 255 +; NO-SIMD128-NEXT: i32.and $push8=, $2, $pop44 +; NO-SIMD128-NEXT: i32.const $push43=, 255 +; NO-SIMD128-NEXT: i32.and $push7=, $18, $pop43 +; NO-SIMD128-NEXT: i32.mul $push9=, $pop8, $pop7 +; NO-SIMD128-NEXT: i32.store16 2($0), $pop9 +; NO-SIMD128-NEXT: i32.const $push42=, 255 +; NO-SIMD128-NEXT: i32.and $push11=, $1, $pop42 +; NO-SIMD128-NEXT: i32.const $push41=, 255 +; NO-SIMD128-NEXT: i32.and $push10=, $17, $pop41 +; NO-SIMD128-NEXT: i32.mul $push12=, $pop11, $pop10 +; NO-SIMD128-NEXT: i32.store16 0($0), $pop12 +; NO-SIMD128-NEXT: i32.const $push16=, 14 +; NO-SIMD128-NEXT: i32.add $push17=, $0, $pop16 +; NO-SIMD128-NEXT: i32.const $push40=, 255 +; NO-SIMD128-NEXT: i32.and $push14=, $8, $pop40 +; NO-SIMD128-NEXT: i32.const $push39=, 255 +; NO-SIMD128-NEXT: i32.and $push13=, $24, $pop39 +; NO-SIMD128-NEXT: i32.mul $push15=, $pop14, $pop13 +; NO-SIMD128-NEXT: i32.store16 0($pop17), $pop15 +; NO-SIMD128-NEXT: i32.const $push21=, 12 +; NO-SIMD128-NEXT: i32.add $push22=, $0, $pop21 +; NO-SIMD128-NEXT: i32.const $push38=, 255 +; NO-SIMD128-NEXT: i32.and $push19=, $7, $pop38 +; NO-SIMD128-NEXT: i32.const $push37=, 255 +; NO-SIMD128-NEXT: i32.and $push18=, $23, $pop37 +; NO-SIMD128-NEXT: i32.mul $push20=, $pop19, $pop18 +; NO-SIMD128-NEXT: i32.store16 0($pop22), $pop20 +; NO-SIMD128-NEXT: i32.const $push26=, 10 +; NO-SIMD128-NEXT: i32.add $push27=, $0, $pop26 +; NO-SIMD128-NEXT: i32.const $push36=, 255 +; NO-SIMD128-NEXT: i32.and $push24=, $6, $pop36 +; NO-SIMD128-NEXT: i32.const $push35=, 255 +; NO-SIMD128-NEXT: i32.and $push23=, $22, $pop35 +; NO-SIMD128-NEXT: i32.mul $push25=, $pop24, $pop23 +; NO-SIMD128-NEXT: i32.store16 0($pop27), $pop25 +; NO-SIMD128-NEXT: i32.const $push31=, 6 +; NO-SIMD128-NEXT: i32.add $push32=, $0, $pop31 +; NO-SIMD128-NEXT: i32.const $push34=, 255 +; NO-SIMD128-NEXT: i32.and $push29=, $4, $pop34 +; NO-SIMD128-NEXT: i32.const $push33=, 255 +; NO-SIMD128-NEXT: i32.and $push28=, $20, $pop33 +; NO-SIMD128-NEXT: i32.mul $push30=, $pop29, $pop28 +; NO-SIMD128-NEXT: i32.store16 0($pop32), $pop30 +; NO-SIMD128-NEXT: return +; +; NO-SIMD128-FAST-LABEL: extmul_low_u_v8i16: +; NO-SIMD128-FAST: .functype extmul_low_u_v8i16 (i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32) -> () +; NO-SIMD128-FAST-NEXT: # %bb.0: +; NO-SIMD128-FAST-NEXT: i32.const $push0=, 255 +; NO-SIMD128-FAST-NEXT: i32.and $push2=, $1, $pop0 +; NO-SIMD128-FAST-NEXT: i32.const $push47=, 255 +; NO-SIMD128-FAST-NEXT: i32.and $push1=, $17, $pop47 +; NO-SIMD128-FAST-NEXT: i32.mul $push3=, $pop2, $pop1 +; NO-SIMD128-FAST-NEXT: i32.store16 0($0), $pop3 +; NO-SIMD128-FAST-NEXT: i32.const $push46=, 255 +; NO-SIMD128-FAST-NEXT: i32.and $push5=, $2, $pop46 +; NO-SIMD128-FAST-NEXT: i32.const $push45=, 255 +; NO-SIMD128-FAST-NEXT: i32.and $push4=, $18, $pop45 +; NO-SIMD128-FAST-NEXT: i32.mul $push6=, $pop5, $pop4 +; NO-SIMD128-FAST-NEXT: i32.store16 2($0), $pop6 +; NO-SIMD128-FAST-NEXT: i32.const $push44=, 255 +; NO-SIMD128-FAST-NEXT: i32.and $push8=, $3, $pop44 +; NO-SIMD128-FAST-NEXT: i32.const $push43=, 255 +; NO-SIMD128-FAST-NEXT: i32.and $push7=, $19, $pop43 +; NO-SIMD128-FAST-NEXT: i32.mul $push9=, $pop8, $pop7 +; NO-SIMD128-FAST-NEXT: i32.store16 4($0), $pop9 +; NO-SIMD128-FAST-NEXT: i32.const $push13=, 6 +; NO-SIMD128-FAST-NEXT: i32.add $push14=, $0, $pop13 +; NO-SIMD128-FAST-NEXT: i32.const $push42=, 255 +; NO-SIMD128-FAST-NEXT: i32.and $push11=, $4, $pop42 +; NO-SIMD128-FAST-NEXT: i32.const $push41=, 255 +; NO-SIMD128-FAST-NEXT: i32.and $push10=, $20, $pop41 +; NO-SIMD128-FAST-NEXT: i32.mul $push12=, $pop11, $pop10 +; NO-SIMD128-FAST-NEXT: i32.store16 0($pop14), $pop12 +; NO-SIMD128-FAST-NEXT: i32.const $push40=, 255 +; NO-SIMD128-FAST-NEXT: i32.and $push16=, $5, $pop40 +; NO-SIMD128-FAST-NEXT: i32.const $push39=, 255 +; NO-SIMD128-FAST-NEXT: i32.and $push15=, $21, $pop39 +; NO-SIMD128-FAST-NEXT: i32.mul $push17=, $pop16, $pop15 +; NO-SIMD128-FAST-NEXT: i32.store16 8($0), $pop17 +; NO-SIMD128-FAST-NEXT: i32.const $push21=, 10 +; NO-SIMD128-FAST-NEXT: i32.add $push22=, $0, $pop21 +; NO-SIMD128-FAST-NEXT: i32.const $push38=, 255 +; NO-SIMD128-FAST-NEXT: i32.and $push19=, $6, $pop38 +; NO-SIMD128-FAST-NEXT: i32.const $push37=, 255 +; NO-SIMD128-FAST-NEXT: i32.and $push18=, $22, $pop37 +; NO-SIMD128-FAST-NEXT: i32.mul $push20=, $pop19, $pop18 +; NO-SIMD128-FAST-NEXT: i32.store16 0($pop22), $pop20 +; NO-SIMD128-FAST-NEXT: i32.const $push26=, 12 +; NO-SIMD128-FAST-NEXT: i32.add $push27=, $0, $pop26 +; NO-SIMD128-FAST-NEXT: i32.const $push36=, 255 +; NO-SIMD128-FAST-NEXT: i32.and $push24=, $7, $pop36 +; NO-SIMD128-FAST-NEXT: i32.const $push35=, 255 +; NO-SIMD128-FAST-NEXT: i32.and $push23=, $23, $pop35 +; NO-SIMD128-FAST-NEXT: i32.mul $push25=, $pop24, $pop23 +; NO-SIMD128-FAST-NEXT: i32.store16 0($pop27), $pop25 +; NO-SIMD128-FAST-NEXT: i32.const $push31=, 14 +; NO-SIMD128-FAST-NEXT: i32.add $push32=, $0, $pop31 +; NO-SIMD128-FAST-NEXT: i32.const $push34=, 255 +; NO-SIMD128-FAST-NEXT: i32.and $push29=, $8, $pop34 +; NO-SIMD128-FAST-NEXT: i32.const $push33=, 255 +; NO-SIMD128-FAST-NEXT: i32.and $push28=, $24, $pop33 +; NO-SIMD128-FAST-NEXT: i32.mul $push30=, $pop29, $pop28 +; NO-SIMD128-FAST-NEXT: i32.store16 0($pop32), $pop30 +; NO-SIMD128-FAST-NEXT: return %low1 = shufflevector <16 x i8> %v1, <16 x i8> undef, <8 x i32> %low2 = shufflevector <16 x i8> %v2, <16 x i8> undef, @@ -773,12 +9814,142 @@ define <8 x i16> @extmul_low_u_v8i16(<16 x i8> %v1, <16 x i8> %v2) { ret <8 x i16> %a } -; CHECK-LABEL: extmul_high_u_v8i16: -; NO-SIMD128-NOT: i16x8 -; SIMD128-NEXT: .functype extmul_high_u_v8i16 (v128, v128) -> (v128){{$}} -; SIMD128-SLOW-NEXT: i16x8.extmul_high_i8x16_u $push[[R:[0-9]+]]=, $0, $1{{$}} -; SIMD128-SLOW-NEXT: return $pop[[R]]{{$}} define <8 x i16> @extmul_high_u_v8i16(<16 x i8> %v1, <16 x i8> %v2) { +; SIMD128-LABEL: extmul_high_u_v8i16: +; SIMD128: .functype extmul_high_u_v8i16 (v128, v128) -> (v128) +; SIMD128-NEXT: # %bb.0: +; SIMD128-NEXT: i16x8.extmul_high_i8x16_u $push0=, $0, $1 +; SIMD128-NEXT: return $pop0 +; +; SIMD128-FAST-LABEL: extmul_high_u_v8i16: +; SIMD128-FAST: .functype extmul_high_u_v8i16 (v128, v128) -> (v128) +; SIMD128-FAST-NEXT: # %bb.0: +; SIMD128-FAST-NEXT: i16x8.extend_high_i8x16_u $push0=, $0 +; SIMD128-FAST-NEXT: i16x8.extend_high_i8x16_u $push1=, $1 +; SIMD128-FAST-NEXT: i16x8.mul $push2=, $pop0, $pop1 +; SIMD128-FAST-NEXT: return $pop2 +; +; NO-SIMD128-LABEL: extmul_high_u_v8i16: +; NO-SIMD128: .functype extmul_high_u_v8i16 (i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32) -> () +; NO-SIMD128-NEXT: # %bb.0: +; NO-SIMD128-NEXT: i32.const $push0=, 255 +; NO-SIMD128-NEXT: i32.and $push2=, $13, $pop0 +; NO-SIMD128-NEXT: i32.const $push47=, 255 +; NO-SIMD128-NEXT: i32.and $push1=, $29, $pop47 +; NO-SIMD128-NEXT: i32.mul $push3=, $pop2, $pop1 +; NO-SIMD128-NEXT: i32.store16 8($0), $pop3 +; NO-SIMD128-NEXT: i32.const $push46=, 255 +; NO-SIMD128-NEXT: i32.and $push5=, $11, $pop46 +; NO-SIMD128-NEXT: i32.const $push45=, 255 +; NO-SIMD128-NEXT: i32.and $push4=, $27, $pop45 +; NO-SIMD128-NEXT: i32.mul $push6=, $pop5, $pop4 +; NO-SIMD128-NEXT: i32.store16 4($0), $pop6 +; NO-SIMD128-NEXT: i32.const $push44=, 255 +; NO-SIMD128-NEXT: i32.and $push8=, $10, $pop44 +; NO-SIMD128-NEXT: i32.const $push43=, 255 +; NO-SIMD128-NEXT: i32.and $push7=, $26, $pop43 +; NO-SIMD128-NEXT: i32.mul $push9=, $pop8, $pop7 +; NO-SIMD128-NEXT: i32.store16 2($0), $pop9 +; NO-SIMD128-NEXT: i32.const $push42=, 255 +; NO-SIMD128-NEXT: i32.and $push11=, $9, $pop42 +; NO-SIMD128-NEXT: i32.const $push41=, 255 +; NO-SIMD128-NEXT: i32.and $push10=, $25, $pop41 +; NO-SIMD128-NEXT: i32.mul $push12=, $pop11, $pop10 +; NO-SIMD128-NEXT: i32.store16 0($0), $pop12 +; NO-SIMD128-NEXT: i32.const $push16=, 14 +; NO-SIMD128-NEXT: i32.add $push17=, $0, $pop16 +; NO-SIMD128-NEXT: i32.const $push40=, 255 +; NO-SIMD128-NEXT: i32.and $push14=, $16, $pop40 +; NO-SIMD128-NEXT: i32.const $push39=, 255 +; NO-SIMD128-NEXT: i32.and $push13=, $32, $pop39 +; NO-SIMD128-NEXT: i32.mul $push15=, $pop14, $pop13 +; NO-SIMD128-NEXT: i32.store16 0($pop17), $pop15 +; NO-SIMD128-NEXT: i32.const $push21=, 12 +; NO-SIMD128-NEXT: i32.add $push22=, $0, $pop21 +; NO-SIMD128-NEXT: i32.const $push38=, 255 +; NO-SIMD128-NEXT: i32.and $push19=, $15, $pop38 +; NO-SIMD128-NEXT: i32.const $push37=, 255 +; NO-SIMD128-NEXT: i32.and $push18=, $31, $pop37 +; NO-SIMD128-NEXT: i32.mul $push20=, $pop19, $pop18 +; NO-SIMD128-NEXT: i32.store16 0($pop22), $pop20 +; NO-SIMD128-NEXT: i32.const $push26=, 10 +; NO-SIMD128-NEXT: i32.add $push27=, $0, $pop26 +; NO-SIMD128-NEXT: i32.const $push36=, 255 +; NO-SIMD128-NEXT: i32.and $push24=, $14, $pop36 +; NO-SIMD128-NEXT: i32.const $push35=, 255 +; NO-SIMD128-NEXT: i32.and $push23=, $30, $pop35 +; NO-SIMD128-NEXT: i32.mul $push25=, $pop24, $pop23 +; NO-SIMD128-NEXT: i32.store16 0($pop27), $pop25 +; NO-SIMD128-NEXT: i32.const $push31=, 6 +; NO-SIMD128-NEXT: i32.add $push32=, $0, $pop31 +; NO-SIMD128-NEXT: i32.const $push34=, 255 +; NO-SIMD128-NEXT: i32.and $push29=, $12, $pop34 +; NO-SIMD128-NEXT: i32.const $push33=, 255 +; NO-SIMD128-NEXT: i32.and $push28=, $28, $pop33 +; NO-SIMD128-NEXT: i32.mul $push30=, $pop29, $pop28 +; NO-SIMD128-NEXT: i32.store16 0($pop32), $pop30 +; NO-SIMD128-NEXT: return +; +; NO-SIMD128-FAST-LABEL: extmul_high_u_v8i16: +; NO-SIMD128-FAST: .functype extmul_high_u_v8i16 (i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32) -> () +; NO-SIMD128-FAST-NEXT: # %bb.0: +; NO-SIMD128-FAST-NEXT: i32.const $push0=, 255 +; NO-SIMD128-FAST-NEXT: i32.and $push2=, $9, $pop0 +; NO-SIMD128-FAST-NEXT: i32.const $push47=, 255 +; NO-SIMD128-FAST-NEXT: i32.and $push1=, $25, $pop47 +; NO-SIMD128-FAST-NEXT: i32.mul $push3=, $pop2, $pop1 +; NO-SIMD128-FAST-NEXT: i32.store16 0($0), $pop3 +; NO-SIMD128-FAST-NEXT: i32.const $push46=, 255 +; NO-SIMD128-FAST-NEXT: i32.and $push5=, $10, $pop46 +; NO-SIMD128-FAST-NEXT: i32.const $push45=, 255 +; NO-SIMD128-FAST-NEXT: i32.and $push4=, $26, $pop45 +; NO-SIMD128-FAST-NEXT: i32.mul $push6=, $pop5, $pop4 +; NO-SIMD128-FAST-NEXT: i32.store16 2($0), $pop6 +; NO-SIMD128-FAST-NEXT: i32.const $push44=, 255 +; NO-SIMD128-FAST-NEXT: i32.and $push8=, $11, $pop44 +; NO-SIMD128-FAST-NEXT: i32.const $push43=, 255 +; NO-SIMD128-FAST-NEXT: i32.and $push7=, $27, $pop43 +; NO-SIMD128-FAST-NEXT: i32.mul $push9=, $pop8, $pop7 +; NO-SIMD128-FAST-NEXT: i32.store16 4($0), $pop9 +; NO-SIMD128-FAST-NEXT: i32.const $push13=, 6 +; NO-SIMD128-FAST-NEXT: i32.add $push14=, $0, $pop13 +; NO-SIMD128-FAST-NEXT: i32.const $push42=, 255 +; NO-SIMD128-FAST-NEXT: i32.and $push11=, $12, $pop42 +; NO-SIMD128-FAST-NEXT: i32.const $push41=, 255 +; NO-SIMD128-FAST-NEXT: i32.and $push10=, $28, $pop41 +; NO-SIMD128-FAST-NEXT: i32.mul $push12=, $pop11, $pop10 +; NO-SIMD128-FAST-NEXT: i32.store16 0($pop14), $pop12 +; NO-SIMD128-FAST-NEXT: i32.const $push40=, 255 +; NO-SIMD128-FAST-NEXT: i32.and $push16=, $13, $pop40 +; NO-SIMD128-FAST-NEXT: i32.const $push39=, 255 +; NO-SIMD128-FAST-NEXT: i32.and $push15=, $29, $pop39 +; NO-SIMD128-FAST-NEXT: i32.mul $push17=, $pop16, $pop15 +; NO-SIMD128-FAST-NEXT: i32.store16 8($0), $pop17 +; NO-SIMD128-FAST-NEXT: i32.const $push21=, 10 +; NO-SIMD128-FAST-NEXT: i32.add $push22=, $0, $pop21 +; NO-SIMD128-FAST-NEXT: i32.const $push38=, 255 +; NO-SIMD128-FAST-NEXT: i32.and $push19=, $14, $pop38 +; NO-SIMD128-FAST-NEXT: i32.const $push37=, 255 +; NO-SIMD128-FAST-NEXT: i32.and $push18=, $30, $pop37 +; NO-SIMD128-FAST-NEXT: i32.mul $push20=, $pop19, $pop18 +; NO-SIMD128-FAST-NEXT: i32.store16 0($pop22), $pop20 +; NO-SIMD128-FAST-NEXT: i32.const $push26=, 12 +; NO-SIMD128-FAST-NEXT: i32.add $push27=, $0, $pop26 +; NO-SIMD128-FAST-NEXT: i32.const $push36=, 255 +; NO-SIMD128-FAST-NEXT: i32.and $push24=, $15, $pop36 +; NO-SIMD128-FAST-NEXT: i32.const $push35=, 255 +; NO-SIMD128-FAST-NEXT: i32.and $push23=, $31, $pop35 +; NO-SIMD128-FAST-NEXT: i32.mul $push25=, $pop24, $pop23 +; NO-SIMD128-FAST-NEXT: i32.store16 0($pop27), $pop25 +; NO-SIMD128-FAST-NEXT: i32.const $push31=, 14 +; NO-SIMD128-FAST-NEXT: i32.add $push32=, $0, $pop31 +; NO-SIMD128-FAST-NEXT: i32.const $push34=, 255 +; NO-SIMD128-FAST-NEXT: i32.and $push29=, $16, $pop34 +; NO-SIMD128-FAST-NEXT: i32.const $push33=, 255 +; NO-SIMD128-FAST-NEXT: i32.and $push28=, $32, $pop33 +; NO-SIMD128-FAST-NEXT: i32.mul $push30=, $pop29, $pop28 +; NO-SIMD128-FAST-NEXT: i32.store16 0($pop32), $pop30 +; NO-SIMD128-FAST-NEXT: return %high1 = shufflevector <16 x i8> %v1, <16 x i8> undef, <8 x i32> %high2 = shufflevector <16 x i8> %v2, <16 x i8> undef, @@ -792,108 +9963,540 @@ define <8 x i16> @extmul_high_u_v8i16(<16 x i8> %v1, <16 x i8> %v2) { ; ============================================================================== ; 4 x i32 ; ============================================================================== -; CHECK-LABEL: add_v4i32: -; NO-SIMD128-NOT: i32x4 -; SIMD128-NEXT: .functype add_v4i32 (v128, v128) -> (v128){{$}} -; SIMD128-NEXT: i32x4.add $push[[R:[0-9]+]]=, $0, $1{{$}} -; SIMD128-NEXT: return $pop[[R]]{{$}} define <4 x i32> @add_v4i32(<4 x i32> %x, <4 x i32> %y) { +; SIMD128-LABEL: add_v4i32: +; SIMD128: .functype add_v4i32 (v128, v128) -> (v128) +; SIMD128-NEXT: # %bb.0: +; SIMD128-NEXT: i32x4.add $push0=, $0, $1 +; SIMD128-NEXT: return $pop0 +; +; SIMD128-FAST-LABEL: add_v4i32: +; SIMD128-FAST: .functype add_v4i32 (v128, v128) -> (v128) +; SIMD128-FAST-NEXT: # %bb.0: +; SIMD128-FAST-NEXT: i32x4.add $push0=, $0, $1 +; SIMD128-FAST-NEXT: return $pop0 +; +; NO-SIMD128-LABEL: add_v4i32: +; NO-SIMD128: .functype add_v4i32 (i32, i32, i32, i32, i32, i32, i32, i32, i32) -> () +; NO-SIMD128-NEXT: # %bb.0: +; NO-SIMD128-NEXT: i32.add $push0=, $3, $7 +; NO-SIMD128-NEXT: i32.store 8($0), $pop0 +; NO-SIMD128-NEXT: i32.add $push1=, $2, $6 +; NO-SIMD128-NEXT: i32.store 4($0), $pop1 +; NO-SIMD128-NEXT: i32.add $push2=, $1, $5 +; NO-SIMD128-NEXT: i32.store 0($0), $pop2 +; NO-SIMD128-NEXT: i32.const $push4=, 12 +; NO-SIMD128-NEXT: i32.add $push5=, $0, $pop4 +; NO-SIMD128-NEXT: i32.add $push3=, $4, $8 +; NO-SIMD128-NEXT: i32.store 0($pop5), $pop3 +; NO-SIMD128-NEXT: return +; +; NO-SIMD128-FAST-LABEL: add_v4i32: +; NO-SIMD128-FAST: .functype add_v4i32 (i32, i32, i32, i32, i32, i32, i32, i32, i32) -> () +; NO-SIMD128-FAST-NEXT: # %bb.0: +; NO-SIMD128-FAST-NEXT: i32.add $push0=, $1, $5 +; NO-SIMD128-FAST-NEXT: i32.store 0($0), $pop0 +; NO-SIMD128-FAST-NEXT: i32.add $push1=, $2, $6 +; NO-SIMD128-FAST-NEXT: i32.store 4($0), $pop1 +; NO-SIMD128-FAST-NEXT: i32.add $push2=, $3, $7 +; NO-SIMD128-FAST-NEXT: i32.store 8($0), $pop2 +; NO-SIMD128-FAST-NEXT: i32.const $push3=, 12 +; NO-SIMD128-FAST-NEXT: i32.add $push4=, $0, $pop3 +; NO-SIMD128-FAST-NEXT: i32.add $push5=, $4, $8 +; NO-SIMD128-FAST-NEXT: i32.store 0($pop4), $pop5 +; NO-SIMD128-FAST-NEXT: return %a = add <4 x i32> %x, %y ret <4 x i32> %a } -; CHECK-LABEL: sub_v4i32: -; NO-SIMD128-NOT: i32x4 -; SIMD128-NEXT: .functype sub_v4i32 (v128, v128) -> (v128){{$}} -; SIMD128-NEXT: i32x4.sub $push[[R:[0-9]+]]=, $0, $1{{$}} -; SIMD128-NEXT: return $pop[[R]]{{$}} define <4 x i32> @sub_v4i32(<4 x i32> %x, <4 x i32> %y) { +; SIMD128-LABEL: sub_v4i32: +; SIMD128: .functype sub_v4i32 (v128, v128) -> (v128) +; SIMD128-NEXT: # %bb.0: +; SIMD128-NEXT: i32x4.sub $push0=, $0, $1 +; SIMD128-NEXT: return $pop0 +; +; SIMD128-FAST-LABEL: sub_v4i32: +; SIMD128-FAST: .functype sub_v4i32 (v128, v128) -> (v128) +; SIMD128-FAST-NEXT: # %bb.0: +; SIMD128-FAST-NEXT: i32x4.sub $push0=, $0, $1 +; SIMD128-FAST-NEXT: return $pop0 +; +; NO-SIMD128-LABEL: sub_v4i32: +; NO-SIMD128: .functype sub_v4i32 (i32, i32, i32, i32, i32, i32, i32, i32, i32) -> () +; NO-SIMD128-NEXT: # %bb.0: +; NO-SIMD128-NEXT: i32.sub $push0=, $3, $7 +; NO-SIMD128-NEXT: i32.store 8($0), $pop0 +; NO-SIMD128-NEXT: i32.sub $push1=, $2, $6 +; NO-SIMD128-NEXT: i32.store 4($0), $pop1 +; NO-SIMD128-NEXT: i32.sub $push2=, $1, $5 +; NO-SIMD128-NEXT: i32.store 0($0), $pop2 +; NO-SIMD128-NEXT: i32.const $push4=, 12 +; NO-SIMD128-NEXT: i32.add $push5=, $0, $pop4 +; NO-SIMD128-NEXT: i32.sub $push3=, $4, $8 +; NO-SIMD128-NEXT: i32.store 0($pop5), $pop3 +; NO-SIMD128-NEXT: return +; +; NO-SIMD128-FAST-LABEL: sub_v4i32: +; NO-SIMD128-FAST: .functype sub_v4i32 (i32, i32, i32, i32, i32, i32, i32, i32, i32) -> () +; NO-SIMD128-FAST-NEXT: # %bb.0: +; NO-SIMD128-FAST-NEXT: i32.sub $push0=, $1, $5 +; NO-SIMD128-FAST-NEXT: i32.store 0($0), $pop0 +; NO-SIMD128-FAST-NEXT: i32.sub $push1=, $2, $6 +; NO-SIMD128-FAST-NEXT: i32.store 4($0), $pop1 +; NO-SIMD128-FAST-NEXT: i32.sub $push2=, $3, $7 +; NO-SIMD128-FAST-NEXT: i32.store 8($0), $pop2 +; NO-SIMD128-FAST-NEXT: i32.const $push3=, 12 +; NO-SIMD128-FAST-NEXT: i32.add $push4=, $0, $pop3 +; NO-SIMD128-FAST-NEXT: i32.sub $push5=, $4, $8 +; NO-SIMD128-FAST-NEXT: i32.store 0($pop4), $pop5 +; NO-SIMD128-FAST-NEXT: return %a = sub <4 x i32> %x, %y ret <4 x i32> %a } -; CHECK-LABEL: mul_v4i32: -; NO-SIMD128-NOT: i32x4 -; SIMD128-NEXT: .functype mul_v4i32 (v128, v128) -> (v128){{$}} -; SIMD128-NEXT: i32x4.mul $push[[R:[0-9]+]]=, $0, $1{{$}} -; SIMD128-NEXT: return $pop[[R]]{{$}} define <4 x i32> @mul_v4i32(<4 x i32> %x, <4 x i32> %y) { +; SIMD128-LABEL: mul_v4i32: +; SIMD128: .functype mul_v4i32 (v128, v128) -> (v128) +; SIMD128-NEXT: # %bb.0: +; SIMD128-NEXT: i32x4.mul $push0=, $0, $1 +; SIMD128-NEXT: return $pop0 +; +; SIMD128-FAST-LABEL: mul_v4i32: +; SIMD128-FAST: .functype mul_v4i32 (v128, v128) -> (v128) +; SIMD128-FAST-NEXT: # %bb.0: +; SIMD128-FAST-NEXT: i32x4.mul $push0=, $0, $1 +; SIMD128-FAST-NEXT: return $pop0 +; +; NO-SIMD128-LABEL: mul_v4i32: +; NO-SIMD128: .functype mul_v4i32 (i32, i32, i32, i32, i32, i32, i32, i32, i32) -> () +; NO-SIMD128-NEXT: # %bb.0: +; NO-SIMD128-NEXT: i32.mul $push0=, $3, $7 +; NO-SIMD128-NEXT: i32.store 8($0), $pop0 +; NO-SIMD128-NEXT: i32.mul $push1=, $2, $6 +; NO-SIMD128-NEXT: i32.store 4($0), $pop1 +; NO-SIMD128-NEXT: i32.mul $push2=, $1, $5 +; NO-SIMD128-NEXT: i32.store 0($0), $pop2 +; NO-SIMD128-NEXT: i32.const $push4=, 12 +; NO-SIMD128-NEXT: i32.add $push5=, $0, $pop4 +; NO-SIMD128-NEXT: i32.mul $push3=, $4, $8 +; NO-SIMD128-NEXT: i32.store 0($pop5), $pop3 +; NO-SIMD128-NEXT: return +; +; NO-SIMD128-FAST-LABEL: mul_v4i32: +; NO-SIMD128-FAST: .functype mul_v4i32 (i32, i32, i32, i32, i32, i32, i32, i32, i32) -> () +; NO-SIMD128-FAST-NEXT: # %bb.0: +; NO-SIMD128-FAST-NEXT: i32.mul $push0=, $1, $5 +; NO-SIMD128-FAST-NEXT: i32.store 0($0), $pop0 +; NO-SIMD128-FAST-NEXT: i32.mul $push1=, $2, $6 +; NO-SIMD128-FAST-NEXT: i32.store 4($0), $pop1 +; NO-SIMD128-FAST-NEXT: i32.mul $push2=, $3, $7 +; NO-SIMD128-FAST-NEXT: i32.store 8($0), $pop2 +; NO-SIMD128-FAST-NEXT: i32.const $push3=, 12 +; NO-SIMD128-FAST-NEXT: i32.add $push4=, $0, $pop3 +; NO-SIMD128-FAST-NEXT: i32.mul $push5=, $4, $8 +; NO-SIMD128-FAST-NEXT: i32.store 0($pop4), $pop5 +; NO-SIMD128-FAST-NEXT: return %a = mul <4 x i32> %x, %y ret <4 x i32> %a } -; CHECK-LABEL: min_s_v4i32: -; NO-SIMD128-NOT: i32x4 -; SIMD128-NEXT: .functype min_s_v4i32 (v128, v128) -> (v128){{$}} -; SIMD128-NEXT: i32x4.min_s $push[[R:[0-9]+]]=, $0, $1{{$}} -; SIMD128-NEXT: return $pop[[R]]{{$}} define <4 x i32> @min_s_v4i32(<4 x i32> %x, <4 x i32> %y) { +; SIMD128-LABEL: min_s_v4i32: +; SIMD128: .functype min_s_v4i32 (v128, v128) -> (v128) +; SIMD128-NEXT: # %bb.0: +; SIMD128-NEXT: i32x4.min_s $push0=, $0, $1 +; SIMD128-NEXT: return $pop0 +; +; SIMD128-FAST-LABEL: min_s_v4i32: +; SIMD128-FAST: .functype min_s_v4i32 (v128, v128) -> (v128) +; SIMD128-FAST-NEXT: # %bb.0: +; SIMD128-FAST-NEXT: i32x4.min_s $push0=, $0, $1 +; SIMD128-FAST-NEXT: return $pop0 +; +; NO-SIMD128-LABEL: min_s_v4i32: +; NO-SIMD128: .functype min_s_v4i32 (i32, i32, i32, i32, i32, i32, i32, i32, i32) -> () +; NO-SIMD128-NEXT: # %bb.0: +; NO-SIMD128-NEXT: i32.lt_s $push0=, $3, $7 +; NO-SIMD128-NEXT: i32.select $push1=, $3, $7, $pop0 +; NO-SIMD128-NEXT: i32.store 8($0), $pop1 +; NO-SIMD128-NEXT: i32.lt_s $push2=, $2, $6 +; NO-SIMD128-NEXT: i32.select $push3=, $2, $6, $pop2 +; NO-SIMD128-NEXT: i32.store 4($0), $pop3 +; NO-SIMD128-NEXT: i32.lt_s $push4=, $1, $5 +; NO-SIMD128-NEXT: i32.select $push5=, $1, $5, $pop4 +; NO-SIMD128-NEXT: i32.store 0($0), $pop5 +; NO-SIMD128-NEXT: i32.const $push8=, 12 +; NO-SIMD128-NEXT: i32.add $push9=, $0, $pop8 +; NO-SIMD128-NEXT: i32.lt_s $push6=, $4, $8 +; NO-SIMD128-NEXT: i32.select $push7=, $4, $8, $pop6 +; NO-SIMD128-NEXT: i32.store 0($pop9), $pop7 +; NO-SIMD128-NEXT: return +; +; NO-SIMD128-FAST-LABEL: min_s_v4i32: +; NO-SIMD128-FAST: .functype min_s_v4i32 (i32, i32, i32, i32, i32, i32, i32, i32, i32) -> () +; NO-SIMD128-FAST-NEXT: # %bb.0: +; NO-SIMD128-FAST-NEXT: i32.lt_s $push0=, $1, $5 +; NO-SIMD128-FAST-NEXT: i32.select $push1=, $1, $5, $pop0 +; NO-SIMD128-FAST-NEXT: i32.store 0($0), $pop1 +; NO-SIMD128-FAST-NEXT: i32.lt_s $push2=, $2, $6 +; NO-SIMD128-FAST-NEXT: i32.select $push3=, $2, $6, $pop2 +; NO-SIMD128-FAST-NEXT: i32.store 4($0), $pop3 +; NO-SIMD128-FAST-NEXT: i32.lt_s $push4=, $3, $7 +; NO-SIMD128-FAST-NEXT: i32.select $push5=, $3, $7, $pop4 +; NO-SIMD128-FAST-NEXT: i32.store 8($0), $pop5 +; NO-SIMD128-FAST-NEXT: i32.const $push8=, 12 +; NO-SIMD128-FAST-NEXT: i32.add $push9=, $0, $pop8 +; NO-SIMD128-FAST-NEXT: i32.lt_s $push6=, $4, $8 +; NO-SIMD128-FAST-NEXT: i32.select $push7=, $4, $8, $pop6 +; NO-SIMD128-FAST-NEXT: i32.store 0($pop9), $pop7 +; NO-SIMD128-FAST-NEXT: return %c = icmp slt <4 x i32> %x, %y %a = select <4 x i1> %c, <4 x i32> %x, <4 x i32> %y ret <4 x i32> %a } -; CHECK-LABEL: min_u_v4i32: -; NO-SIMD128-NOT: i32x4 -; SIMD128-NEXT: .functype min_u_v4i32 (v128, v128) -> (v128){{$}} -; SIMD128-NEXT: i32x4.min_u $push[[R:[0-9]+]]=, $0, $1{{$}} -; SIMD128-NEXT: return $pop[[R]]{{$}} define <4 x i32> @min_u_v4i32(<4 x i32> %x, <4 x i32> %y) { +; SIMD128-LABEL: min_u_v4i32: +; SIMD128: .functype min_u_v4i32 (v128, v128) -> (v128) +; SIMD128-NEXT: # %bb.0: +; SIMD128-NEXT: i32x4.min_u $push0=, $0, $1 +; SIMD128-NEXT: return $pop0 +; +; SIMD128-FAST-LABEL: min_u_v4i32: +; SIMD128-FAST: .functype min_u_v4i32 (v128, v128) -> (v128) +; SIMD128-FAST-NEXT: # %bb.0: +; SIMD128-FAST-NEXT: i32x4.min_u $push0=, $0, $1 +; SIMD128-FAST-NEXT: return $pop0 +; +; NO-SIMD128-LABEL: min_u_v4i32: +; NO-SIMD128: .functype min_u_v4i32 (i32, i32, i32, i32, i32, i32, i32, i32, i32) -> () +; NO-SIMD128-NEXT: # %bb.0: +; NO-SIMD128-NEXT: i32.lt_u $push0=, $3, $7 +; NO-SIMD128-NEXT: i32.select $push1=, $3, $7, $pop0 +; NO-SIMD128-NEXT: i32.store 8($0), $pop1 +; NO-SIMD128-NEXT: i32.lt_u $push2=, $2, $6 +; NO-SIMD128-NEXT: i32.select $push3=, $2, $6, $pop2 +; NO-SIMD128-NEXT: i32.store 4($0), $pop3 +; NO-SIMD128-NEXT: i32.lt_u $push4=, $1, $5 +; NO-SIMD128-NEXT: i32.select $push5=, $1, $5, $pop4 +; NO-SIMD128-NEXT: i32.store 0($0), $pop5 +; NO-SIMD128-NEXT: i32.const $push8=, 12 +; NO-SIMD128-NEXT: i32.add $push9=, $0, $pop8 +; NO-SIMD128-NEXT: i32.lt_u $push6=, $4, $8 +; NO-SIMD128-NEXT: i32.select $push7=, $4, $8, $pop6 +; NO-SIMD128-NEXT: i32.store 0($pop9), $pop7 +; NO-SIMD128-NEXT: return +; +; NO-SIMD128-FAST-LABEL: min_u_v4i32: +; NO-SIMD128-FAST: .functype min_u_v4i32 (i32, i32, i32, i32, i32, i32, i32, i32, i32) -> () +; NO-SIMD128-FAST-NEXT: # %bb.0: +; NO-SIMD128-FAST-NEXT: i32.lt_u $push0=, $1, $5 +; NO-SIMD128-FAST-NEXT: i32.select $push1=, $1, $5, $pop0 +; NO-SIMD128-FAST-NEXT: i32.store 0($0), $pop1 +; NO-SIMD128-FAST-NEXT: i32.lt_u $push2=, $2, $6 +; NO-SIMD128-FAST-NEXT: i32.select $push3=, $2, $6, $pop2 +; NO-SIMD128-FAST-NEXT: i32.store 4($0), $pop3 +; NO-SIMD128-FAST-NEXT: i32.lt_u $push4=, $3, $7 +; NO-SIMD128-FAST-NEXT: i32.select $push5=, $3, $7, $pop4 +; NO-SIMD128-FAST-NEXT: i32.store 8($0), $pop5 +; NO-SIMD128-FAST-NEXT: i32.const $push8=, 12 +; NO-SIMD128-FAST-NEXT: i32.add $push9=, $0, $pop8 +; NO-SIMD128-FAST-NEXT: i32.lt_u $push6=, $4, $8 +; NO-SIMD128-FAST-NEXT: i32.select $push7=, $4, $8, $pop6 +; NO-SIMD128-FAST-NEXT: i32.store 0($pop9), $pop7 +; NO-SIMD128-FAST-NEXT: return %c = icmp ult <4 x i32> %x, %y %a = select <4 x i1> %c, <4 x i32> %x, <4 x i32> %y ret <4 x i32> %a } -; CHECK-LABEL: max_s_v4i32: -; NO-SIMD128-NOT: i32x4 -; SIMD128-NEXT: .functype max_s_v4i32 (v128, v128) -> (v128){{$}} -; SIMD128-NEXT: i32x4.max_s $push[[R:[0-9]+]]=, $0, $1{{$}} -; SIMD128-NEXT: return $pop[[R]]{{$}} define <4 x i32> @max_s_v4i32(<4 x i32> %x, <4 x i32> %y) { +; SIMD128-LABEL: max_s_v4i32: +; SIMD128: .functype max_s_v4i32 (v128, v128) -> (v128) +; SIMD128-NEXT: # %bb.0: +; SIMD128-NEXT: i32x4.max_s $push0=, $0, $1 +; SIMD128-NEXT: return $pop0 +; +; SIMD128-FAST-LABEL: max_s_v4i32: +; SIMD128-FAST: .functype max_s_v4i32 (v128, v128) -> (v128) +; SIMD128-FAST-NEXT: # %bb.0: +; SIMD128-FAST-NEXT: i32x4.max_s $push0=, $0, $1 +; SIMD128-FAST-NEXT: return $pop0 +; +; NO-SIMD128-LABEL: max_s_v4i32: +; NO-SIMD128: .functype max_s_v4i32 (i32, i32, i32, i32, i32, i32, i32, i32, i32) -> () +; NO-SIMD128-NEXT: # %bb.0: +; NO-SIMD128-NEXT: i32.gt_s $push0=, $3, $7 +; NO-SIMD128-NEXT: i32.select $push1=, $3, $7, $pop0 +; NO-SIMD128-NEXT: i32.store 8($0), $pop1 +; NO-SIMD128-NEXT: i32.gt_s $push2=, $2, $6 +; NO-SIMD128-NEXT: i32.select $push3=, $2, $6, $pop2 +; NO-SIMD128-NEXT: i32.store 4($0), $pop3 +; NO-SIMD128-NEXT: i32.gt_s $push4=, $1, $5 +; NO-SIMD128-NEXT: i32.select $push5=, $1, $5, $pop4 +; NO-SIMD128-NEXT: i32.store 0($0), $pop5 +; NO-SIMD128-NEXT: i32.const $push8=, 12 +; NO-SIMD128-NEXT: i32.add $push9=, $0, $pop8 +; NO-SIMD128-NEXT: i32.gt_s $push6=, $4, $8 +; NO-SIMD128-NEXT: i32.select $push7=, $4, $8, $pop6 +; NO-SIMD128-NEXT: i32.store 0($pop9), $pop7 +; NO-SIMD128-NEXT: return +; +; NO-SIMD128-FAST-LABEL: max_s_v4i32: +; NO-SIMD128-FAST: .functype max_s_v4i32 (i32, i32, i32, i32, i32, i32, i32, i32, i32) -> () +; NO-SIMD128-FAST-NEXT: # %bb.0: +; NO-SIMD128-FAST-NEXT: i32.gt_s $push0=, $1, $5 +; NO-SIMD128-FAST-NEXT: i32.select $push1=, $1, $5, $pop0 +; NO-SIMD128-FAST-NEXT: i32.store 0($0), $pop1 +; NO-SIMD128-FAST-NEXT: i32.gt_s $push2=, $2, $6 +; NO-SIMD128-FAST-NEXT: i32.select $push3=, $2, $6, $pop2 +; NO-SIMD128-FAST-NEXT: i32.store 4($0), $pop3 +; NO-SIMD128-FAST-NEXT: i32.gt_s $push4=, $3, $7 +; NO-SIMD128-FAST-NEXT: i32.select $push5=, $3, $7, $pop4 +; NO-SIMD128-FAST-NEXT: i32.store 8($0), $pop5 +; NO-SIMD128-FAST-NEXT: i32.const $push8=, 12 +; NO-SIMD128-FAST-NEXT: i32.add $push9=, $0, $pop8 +; NO-SIMD128-FAST-NEXT: i32.gt_s $push6=, $4, $8 +; NO-SIMD128-FAST-NEXT: i32.select $push7=, $4, $8, $pop6 +; NO-SIMD128-FAST-NEXT: i32.store 0($pop9), $pop7 +; NO-SIMD128-FAST-NEXT: return %c = icmp sgt <4 x i32> %x, %y %a = select <4 x i1> %c, <4 x i32> %x, <4 x i32> %y ret <4 x i32> %a } -; CHECK-LABEL: max_u_v4i32: -; NO-SIMD128-NOT: i32x4 -; SIMD128-NEXT: .functype max_u_v4i32 (v128, v128) -> (v128){{$}} -; SIMD128-NEXT: i32x4.max_u $push[[R:[0-9]+]]=, $0, $1{{$}} -; SIMD128-NEXT: return $pop[[R]]{{$}} define <4 x i32> @max_u_v4i32(<4 x i32> %x, <4 x i32> %y) { +; SIMD128-LABEL: max_u_v4i32: +; SIMD128: .functype max_u_v4i32 (v128, v128) -> (v128) +; SIMD128-NEXT: # %bb.0: +; SIMD128-NEXT: i32x4.max_u $push0=, $0, $1 +; SIMD128-NEXT: return $pop0 +; +; SIMD128-FAST-LABEL: max_u_v4i32: +; SIMD128-FAST: .functype max_u_v4i32 (v128, v128) -> (v128) +; SIMD128-FAST-NEXT: # %bb.0: +; SIMD128-FAST-NEXT: i32x4.max_u $push0=, $0, $1 +; SIMD128-FAST-NEXT: return $pop0 +; +; NO-SIMD128-LABEL: max_u_v4i32: +; NO-SIMD128: .functype max_u_v4i32 (i32, i32, i32, i32, i32, i32, i32, i32, i32) -> () +; NO-SIMD128-NEXT: # %bb.0: +; NO-SIMD128-NEXT: i32.gt_u $push0=, $3, $7 +; NO-SIMD128-NEXT: i32.select $push1=, $3, $7, $pop0 +; NO-SIMD128-NEXT: i32.store 8($0), $pop1 +; NO-SIMD128-NEXT: i32.gt_u $push2=, $2, $6 +; NO-SIMD128-NEXT: i32.select $push3=, $2, $6, $pop2 +; NO-SIMD128-NEXT: i32.store 4($0), $pop3 +; NO-SIMD128-NEXT: i32.gt_u $push4=, $1, $5 +; NO-SIMD128-NEXT: i32.select $push5=, $1, $5, $pop4 +; NO-SIMD128-NEXT: i32.store 0($0), $pop5 +; NO-SIMD128-NEXT: i32.const $push8=, 12 +; NO-SIMD128-NEXT: i32.add $push9=, $0, $pop8 +; NO-SIMD128-NEXT: i32.gt_u $push6=, $4, $8 +; NO-SIMD128-NEXT: i32.select $push7=, $4, $8, $pop6 +; NO-SIMD128-NEXT: i32.store 0($pop9), $pop7 +; NO-SIMD128-NEXT: return +; +; NO-SIMD128-FAST-LABEL: max_u_v4i32: +; NO-SIMD128-FAST: .functype max_u_v4i32 (i32, i32, i32, i32, i32, i32, i32, i32, i32) -> () +; NO-SIMD128-FAST-NEXT: # %bb.0: +; NO-SIMD128-FAST-NEXT: i32.gt_u $push0=, $1, $5 +; NO-SIMD128-FAST-NEXT: i32.select $push1=, $1, $5, $pop0 +; NO-SIMD128-FAST-NEXT: i32.store 0($0), $pop1 +; NO-SIMD128-FAST-NEXT: i32.gt_u $push2=, $2, $6 +; NO-SIMD128-FAST-NEXT: i32.select $push3=, $2, $6, $pop2 +; NO-SIMD128-FAST-NEXT: i32.store 4($0), $pop3 +; NO-SIMD128-FAST-NEXT: i32.gt_u $push4=, $3, $7 +; NO-SIMD128-FAST-NEXT: i32.select $push5=, $3, $7, $pop4 +; NO-SIMD128-FAST-NEXT: i32.store 8($0), $pop5 +; NO-SIMD128-FAST-NEXT: i32.const $push8=, 12 +; NO-SIMD128-FAST-NEXT: i32.add $push9=, $0, $pop8 +; NO-SIMD128-FAST-NEXT: i32.gt_u $push6=, $4, $8 +; NO-SIMD128-FAST-NEXT: i32.select $push7=, $4, $8, $pop6 +; NO-SIMD128-FAST-NEXT: i32.store 0($pop9), $pop7 +; NO-SIMD128-FAST-NEXT: return %c = icmp ugt <4 x i32> %x, %y %a = select <4 x i1> %c, <4 x i32> %x, <4 x i32> %y ret <4 x i32> %a } -; CHECK-LABEL: abs_v4i32: -; NO-SIMD128-NOT: i32x4 -; SIMD128-NEXT: .functype abs_v4i32 (v128) -> (v128){{$}} -; SIMD128-NEXT: i32x4.abs $push[[R:[0-9]+]]=, $0{{$}} -; SIMD128-NEXT: return $pop[[R]]{{$}} define <4 x i32> @abs_v4i32(<4 x i32> %x) { +; SIMD128-LABEL: abs_v4i32: +; SIMD128: .functype abs_v4i32 (v128) -> (v128) +; SIMD128-NEXT: # %bb.0: +; SIMD128-NEXT: i32x4.abs $push0=, $0 +; SIMD128-NEXT: return $pop0 +; +; SIMD128-FAST-LABEL: abs_v4i32: +; SIMD128-FAST: .functype abs_v4i32 (v128) -> (v128) +; SIMD128-FAST-NEXT: # %bb.0: +; SIMD128-FAST-NEXT: i32x4.abs $push0=, $0 +; SIMD128-FAST-NEXT: return $pop0 +; +; NO-SIMD128-LABEL: abs_v4i32: +; NO-SIMD128: .functype abs_v4i32 (i32, i32, i32, i32, i32) -> () +; NO-SIMD128-NEXT: # %bb.0: +; NO-SIMD128-NEXT: i32.const $push3=, 12 +; NO-SIMD128-NEXT: i32.add $push4=, $0, $pop3 +; NO-SIMD128-NEXT: i32.const $push0=, 31 +; NO-SIMD128-NEXT: i32.shr_s $push21=, $4, $pop0 +; NO-SIMD128-NEXT: local.tee $push20=, $5=, $pop21 +; NO-SIMD128-NEXT: i32.xor $push1=, $4, $pop20 +; NO-SIMD128-NEXT: i32.sub $push2=, $pop1, $5 +; NO-SIMD128-NEXT: i32.store 0($pop4), $pop2 +; NO-SIMD128-NEXT: i32.const $push19=, 31 +; NO-SIMD128-NEXT: i32.shr_s $push18=, $3, $pop19 +; NO-SIMD128-NEXT: local.tee $push17=, $4=, $pop18 +; NO-SIMD128-NEXT: i32.xor $push5=, $3, $pop17 +; NO-SIMD128-NEXT: i32.sub $push6=, $pop5, $4 +; NO-SIMD128-NEXT: i32.store 8($0), $pop6 +; NO-SIMD128-NEXT: i32.const $push16=, 31 +; NO-SIMD128-NEXT: i32.shr_s $push15=, $2, $pop16 +; NO-SIMD128-NEXT: local.tee $push14=, $4=, $pop15 +; NO-SIMD128-NEXT: i32.xor $push7=, $2, $pop14 +; NO-SIMD128-NEXT: i32.sub $push8=, $pop7, $4 +; NO-SIMD128-NEXT: i32.store 4($0), $pop8 +; NO-SIMD128-NEXT: i32.const $push13=, 31 +; NO-SIMD128-NEXT: i32.shr_s $push12=, $1, $pop13 +; NO-SIMD128-NEXT: local.tee $push11=, $4=, $pop12 +; NO-SIMD128-NEXT: i32.xor $push9=, $1, $pop11 +; NO-SIMD128-NEXT: i32.sub $push10=, $pop9, $4 +; NO-SIMD128-NEXT: i32.store 0($0), $pop10 +; NO-SIMD128-NEXT: return +; +; NO-SIMD128-FAST-LABEL: abs_v4i32: +; NO-SIMD128-FAST: .functype abs_v4i32 (i32, i32, i32, i32, i32) -> () +; NO-SIMD128-FAST-NEXT: # %bb.0: +; NO-SIMD128-FAST-NEXT: i32.const $push0=, 31 +; NO-SIMD128-FAST-NEXT: i32.shr_s $push21=, $1, $pop0 +; NO-SIMD128-FAST-NEXT: local.tee $push20=, $5=, $pop21 +; NO-SIMD128-FAST-NEXT: i32.xor $push1=, $1, $pop20 +; NO-SIMD128-FAST-NEXT: i32.sub $push2=, $pop1, $5 +; NO-SIMD128-FAST-NEXT: i32.store 0($0), $pop2 +; NO-SIMD128-FAST-NEXT: i32.const $push19=, 31 +; NO-SIMD128-FAST-NEXT: i32.shr_s $push18=, $2, $pop19 +; NO-SIMD128-FAST-NEXT: local.tee $push17=, $1=, $pop18 +; NO-SIMD128-FAST-NEXT: i32.xor $push3=, $2, $pop17 +; NO-SIMD128-FAST-NEXT: i32.sub $push4=, $pop3, $1 +; NO-SIMD128-FAST-NEXT: i32.store 4($0), $pop4 +; NO-SIMD128-FAST-NEXT: i32.const $push16=, 31 +; NO-SIMD128-FAST-NEXT: i32.shr_s $push15=, $3, $pop16 +; NO-SIMD128-FAST-NEXT: local.tee $push14=, $2=, $pop15 +; NO-SIMD128-FAST-NEXT: i32.xor $push5=, $3, $pop14 +; NO-SIMD128-FAST-NEXT: i32.sub $push6=, $pop5, $2 +; NO-SIMD128-FAST-NEXT: i32.store 8($0), $pop6 +; NO-SIMD128-FAST-NEXT: i32.const $push9=, 12 +; NO-SIMD128-FAST-NEXT: i32.add $push10=, $0, $pop9 +; NO-SIMD128-FAST-NEXT: i32.const $push13=, 31 +; NO-SIMD128-FAST-NEXT: i32.shr_s $push12=, $4, $pop13 +; NO-SIMD128-FAST-NEXT: local.tee $push11=, $0=, $pop12 +; NO-SIMD128-FAST-NEXT: i32.xor $push7=, $4, $pop11 +; NO-SIMD128-FAST-NEXT: i32.sub $push8=, $pop7, $0 +; NO-SIMD128-FAST-NEXT: i32.store 0($pop10), $pop8 +; NO-SIMD128-FAST-NEXT: return %a = sub <4 x i32> zeroinitializer, %x %b = icmp slt <4 x i32> %x, zeroinitializer %c = select <4 x i1> %b, <4 x i32> %a, <4 x i32> %x ret <4 x i32> %c } -; CHECK-LABEL: neg_v4i32: -; NO-SIMD128-NOT: i32x4 -; SIMD128-NEXT: .functype neg_v4i32 (v128) -> (v128){{$}} -; SIMD128-NEXT: i32x4.neg $push[[R:[0-9]+]]=, $0{{$}} -; SIMD128-NEXT: return $pop[[R]]{{$}} define <4 x i32> @neg_v4i32(<4 x i32> %x) { +; SIMD128-LABEL: neg_v4i32: +; SIMD128: .functype neg_v4i32 (v128) -> (v128) +; SIMD128-NEXT: # %bb.0: +; SIMD128-NEXT: i32x4.neg $push0=, $0 +; SIMD128-NEXT: return $pop0 +; +; SIMD128-FAST-LABEL: neg_v4i32: +; SIMD128-FAST: .functype neg_v4i32 (v128) -> (v128) +; SIMD128-FAST-NEXT: # %bb.0: +; SIMD128-FAST-NEXT: i32x4.neg $push0=, $0 +; SIMD128-FAST-NEXT: return $pop0 +; +; NO-SIMD128-LABEL: neg_v4i32: +; NO-SIMD128: .functype neg_v4i32 (i32, i32, i32, i32, i32) -> () +; NO-SIMD128-NEXT: # %bb.0: +; NO-SIMD128-NEXT: i32.const $push0=, 0 +; NO-SIMD128-NEXT: i32.sub $push1=, $pop0, $3 +; NO-SIMD128-NEXT: i32.store 8($0), $pop1 +; NO-SIMD128-NEXT: i32.const $push9=, 0 +; NO-SIMD128-NEXT: i32.sub $push2=, $pop9, $2 +; NO-SIMD128-NEXT: i32.store 4($0), $pop2 +; NO-SIMD128-NEXT: i32.const $push8=, 0 +; NO-SIMD128-NEXT: i32.sub $push3=, $pop8, $1 +; NO-SIMD128-NEXT: i32.store 0($0), $pop3 +; NO-SIMD128-NEXT: i32.const $push5=, 12 +; NO-SIMD128-NEXT: i32.add $push6=, $0, $pop5 +; NO-SIMD128-NEXT: i32.const $push7=, 0 +; NO-SIMD128-NEXT: i32.sub $push4=, $pop7, $4 +; NO-SIMD128-NEXT: i32.store 0($pop6), $pop4 +; NO-SIMD128-NEXT: return +; +; NO-SIMD128-FAST-LABEL: neg_v4i32: +; NO-SIMD128-FAST: .functype neg_v4i32 (i32, i32, i32, i32, i32) -> () +; NO-SIMD128-FAST-NEXT: # %bb.0: +; NO-SIMD128-FAST-NEXT: i32.const $push0=, 0 +; NO-SIMD128-FAST-NEXT: i32.sub $push1=, $pop0, $1 +; NO-SIMD128-FAST-NEXT: i32.store 0($0), $pop1 +; NO-SIMD128-FAST-NEXT: i32.const $push9=, 0 +; NO-SIMD128-FAST-NEXT: i32.sub $push2=, $pop9, $2 +; NO-SIMD128-FAST-NEXT: i32.store 4($0), $pop2 +; NO-SIMD128-FAST-NEXT: i32.const $push8=, 0 +; NO-SIMD128-FAST-NEXT: i32.sub $push3=, $pop8, $3 +; NO-SIMD128-FAST-NEXT: i32.store 8($0), $pop3 +; NO-SIMD128-FAST-NEXT: i32.const $push4=, 12 +; NO-SIMD128-FAST-NEXT: i32.add $push5=, $0, $pop4 +; NO-SIMD128-FAST-NEXT: i32.const $push7=, 0 +; NO-SIMD128-FAST-NEXT: i32.sub $push6=, $pop7, $4 +; NO-SIMD128-FAST-NEXT: i32.store 0($pop5), $pop6 +; NO-SIMD128-FAST-NEXT: return %a = sub <4 x i32> , %x ret <4 x i32> %a } -; CHECK-LABEL: shl_v4i32: -; NO-SIMD128-NOT: i32x4 -; SIMD128-NEXT: .functype shl_v4i32 (v128, i32) -> (v128){{$}} -; SIMD128-NEXT: i32x4.shl $push[[R:[0-9]+]]=, $0, $1{{$}} -; SIMD128-NEXT: return $pop[[R]]{{$}} define <4 x i32> @shl_v4i32(<4 x i32> %v, i32 %x) { +; SIMD128-LABEL: shl_v4i32: +; SIMD128: .functype shl_v4i32 (v128, i32) -> (v128) +; SIMD128-NEXT: # %bb.0: +; SIMD128-NEXT: i32x4.shl $push0=, $0, $1 +; SIMD128-NEXT: return $pop0 +; +; SIMD128-FAST-LABEL: shl_v4i32: +; SIMD128-FAST: .functype shl_v4i32 (v128, i32) -> (v128) +; SIMD128-FAST-NEXT: # %bb.0: +; SIMD128-FAST-NEXT: i32x4.shl $push0=, $0, $1 +; SIMD128-FAST-NEXT: return $pop0 +; +; NO-SIMD128-LABEL: shl_v4i32: +; NO-SIMD128: .functype shl_v4i32 (i32, i32, i32, i32, i32, i32) -> () +; NO-SIMD128-NEXT: # %bb.0: +; NO-SIMD128-NEXT: i32.shl $push0=, $3, $5 +; NO-SIMD128-NEXT: i32.store 8($0), $pop0 +; NO-SIMD128-NEXT: i32.shl $push1=, $2, $5 +; NO-SIMD128-NEXT: i32.store 4($0), $pop1 +; NO-SIMD128-NEXT: i32.shl $push2=, $1, $5 +; NO-SIMD128-NEXT: i32.store 0($0), $pop2 +; NO-SIMD128-NEXT: i32.const $push4=, 12 +; NO-SIMD128-NEXT: i32.add $push5=, $0, $pop4 +; NO-SIMD128-NEXT: i32.shl $push3=, $4, $5 +; NO-SIMD128-NEXT: i32.store 0($pop5), $pop3 +; NO-SIMD128-NEXT: return +; +; NO-SIMD128-FAST-LABEL: shl_v4i32: +; NO-SIMD128-FAST: .functype shl_v4i32 (i32, i32, i32, i32, i32, i32) -> () +; NO-SIMD128-FAST-NEXT: # %bb.0: +; NO-SIMD128-FAST-NEXT: i32.shl $push0=, $1, $5 +; NO-SIMD128-FAST-NEXT: i32.store 0($0), $pop0 +; NO-SIMD128-FAST-NEXT: i32.shl $push1=, $2, $5 +; NO-SIMD128-FAST-NEXT: i32.store 4($0), $pop1 +; NO-SIMD128-FAST-NEXT: i32.shl $push2=, $3, $5 +; NO-SIMD128-FAST-NEXT: i32.store 8($0), $pop2 +; NO-SIMD128-FAST-NEXT: i32.const $push3=, 12 +; NO-SIMD128-FAST-NEXT: i32.add $push4=, $0, $pop3 +; NO-SIMD128-FAST-NEXT: i32.shl $push5=, $4, $5 +; NO-SIMD128-FAST-NEXT: i32.store 0($pop4), $pop5 +; NO-SIMD128-FAST-NEXT: return %t = insertelement <4 x i32> undef, i32 %x, i32 0 %s = shufflevector <4 x i32> %t, <4 x i32> undef, <4 x i32> @@ -901,41 +10504,180 @@ define <4 x i32> @shl_v4i32(<4 x i32> %v, i32 %x) { ret <4 x i32> %a } -; CHECK-LABEL: shl_const_v4i32: -; NO-SIMD128-NOT: i32x4 -; SIMD128-NEXT: .functype shl_const_v4i32 (v128) -> (v128){{$}} -; SIMD128-NEXT: i32.const $push[[L0:[0-9]+]]=, 5 -; SIMD128-NEXT: i32x4.shl $push[[R:[0-9]+]]=, $0, $pop[[L0]]{{$}} -; SIMD128-NEXT: return $pop[[R]]{{$}} define <4 x i32> @shl_const_v4i32(<4 x i32> %v) { +; SIMD128-LABEL: shl_const_v4i32: +; SIMD128: .functype shl_const_v4i32 (v128) -> (v128) +; SIMD128-NEXT: # %bb.0: +; SIMD128-NEXT: i32.const $push0=, 5 +; SIMD128-NEXT: i32x4.shl $push1=, $0, $pop0 +; SIMD128-NEXT: return $pop1 +; +; SIMD128-FAST-LABEL: shl_const_v4i32: +; SIMD128-FAST: .functype shl_const_v4i32 (v128) -> (v128) +; SIMD128-FAST-NEXT: # %bb.0: +; SIMD128-FAST-NEXT: i32.const $push1=, 5 +; SIMD128-FAST-NEXT: i32x4.shl $push0=, $0, $pop1 +; SIMD128-FAST-NEXT: return $pop0 +; +; NO-SIMD128-LABEL: shl_const_v4i32: +; NO-SIMD128: .functype shl_const_v4i32 (i32, i32, i32, i32, i32) -> () +; NO-SIMD128-NEXT: # %bb.0: +; NO-SIMD128-NEXT: i32.const $push0=, 5 +; NO-SIMD128-NEXT: i32.shl $push1=, $3, $pop0 +; NO-SIMD128-NEXT: i32.store 8($0), $pop1 +; NO-SIMD128-NEXT: i32.const $push9=, 5 +; NO-SIMD128-NEXT: i32.shl $push2=, $2, $pop9 +; NO-SIMD128-NEXT: i32.store 4($0), $pop2 +; NO-SIMD128-NEXT: i32.const $push8=, 5 +; NO-SIMD128-NEXT: i32.shl $push3=, $1, $pop8 +; NO-SIMD128-NEXT: i32.store 0($0), $pop3 +; NO-SIMD128-NEXT: i32.const $push5=, 12 +; NO-SIMD128-NEXT: i32.add $push6=, $0, $pop5 +; NO-SIMD128-NEXT: i32.const $push7=, 5 +; NO-SIMD128-NEXT: i32.shl $push4=, $4, $pop7 +; NO-SIMD128-NEXT: i32.store 0($pop6), $pop4 +; NO-SIMD128-NEXT: return +; +; NO-SIMD128-FAST-LABEL: shl_const_v4i32: +; NO-SIMD128-FAST: .functype shl_const_v4i32 (i32, i32, i32, i32, i32) -> () +; NO-SIMD128-FAST-NEXT: # %bb.0: +; NO-SIMD128-FAST-NEXT: i32.const $push0=, 5 +; NO-SIMD128-FAST-NEXT: i32.shl $push1=, $1, $pop0 +; NO-SIMD128-FAST-NEXT: i32.store 0($0), $pop1 +; NO-SIMD128-FAST-NEXT: i32.const $push9=, 5 +; NO-SIMD128-FAST-NEXT: i32.shl $push2=, $2, $pop9 +; NO-SIMD128-FAST-NEXT: i32.store 4($0), $pop2 +; NO-SIMD128-FAST-NEXT: i32.const $push8=, 5 +; NO-SIMD128-FAST-NEXT: i32.shl $push3=, $3, $pop8 +; NO-SIMD128-FAST-NEXT: i32.store 8($0), $pop3 +; NO-SIMD128-FAST-NEXT: i32.const $push4=, 12 +; NO-SIMD128-FAST-NEXT: i32.add $push5=, $0, $pop4 +; NO-SIMD128-FAST-NEXT: i32.const $push7=, 5 +; NO-SIMD128-FAST-NEXT: i32.shl $push6=, $4, $pop7 +; NO-SIMD128-FAST-NEXT: i32.store 0($pop5), $pop6 +; NO-SIMD128-FAST-NEXT: return %a = shl <4 x i32> %v, ret <4 x i32> %a } -; CHECK-LABEL: shl_vec_v4i32: -; NO-SIMD128-NOT: i32x4 -; SIMD128-NEXT: .functype shl_vec_v4i32 (v128, v128) -> (v128){{$}} -; SIMD128-NEXT: i32x4.extract_lane $push[[L0:[0-9]+]]=, $0, 0{{$}} -; SIMD128-NEXT: i32x4.extract_lane $push[[L1:[0-9]+]]=, $1, 0{{$}} -; SIMD128-NEXT: i32.shl $push[[L2:[0-9]+]]=, $pop[[L0]], $pop[[L1]]{{$}} -; SIMD128-NEXT: i32x4.splat $push[[L3:[0-9]+]]=, $pop[[L2]]{{$}} -; Skip 2 lanes -; SIMD128: i32x4.extract_lane $push[[L4:[0-9]+]]=, $0, 3{{$}} -; SIMD128-NEXT: i32x4.extract_lane $push[[L5:[0-9]+]]=, $1, 3{{$}} -; SIMD128-NEXT: i32.shl $push[[L6:[0-9]+]]=, $pop[[L4]], $pop[[L5]]{{$}} -; SIMD128-NEXT: i32x4.replace_lane $push[[R:[0-9]+]]=, $pop[[L7:[0-9]+]], 3, $pop[[L6]]{{$}} -; SIMD128-NEXT: return $pop[[R]]{{$}} define <4 x i32> @shl_vec_v4i32(<4 x i32> %v, <4 x i32> %x) { +; SIMD128-LABEL: shl_vec_v4i32: +; SIMD128: .functype shl_vec_v4i32 (v128, v128) -> (v128) +; SIMD128-NEXT: # %bb.0: +; SIMD128-NEXT: i32x4.extract_lane $push4=, $0, 0 +; SIMD128-NEXT: i32x4.extract_lane $push3=, $1, 0 +; SIMD128-NEXT: i32.shl $push5=, $pop4, $pop3 +; SIMD128-NEXT: i32x4.splat $push6=, $pop5 +; SIMD128-NEXT: i32x4.extract_lane $push1=, $0, 1 +; SIMD128-NEXT: i32x4.extract_lane $push0=, $1, 1 +; SIMD128-NEXT: i32.shl $push2=, $pop1, $pop0 +; SIMD128-NEXT: i32x4.replace_lane $push7=, $pop6, 1, $pop2 +; SIMD128-NEXT: i32x4.extract_lane $push9=, $0, 2 +; SIMD128-NEXT: i32x4.extract_lane $push8=, $1, 2 +; SIMD128-NEXT: i32.shl $push10=, $pop9, $pop8 +; SIMD128-NEXT: i32x4.replace_lane $push11=, $pop7, 2, $pop10 +; SIMD128-NEXT: i32x4.extract_lane $push13=, $0, 3 +; SIMD128-NEXT: i32x4.extract_lane $push12=, $1, 3 +; SIMD128-NEXT: i32.shl $push14=, $pop13, $pop12 +; SIMD128-NEXT: i32x4.replace_lane $push15=, $pop11, 3, $pop14 +; SIMD128-NEXT: return $pop15 +; +; SIMD128-FAST-LABEL: shl_vec_v4i32: +; SIMD128-FAST: .functype shl_vec_v4i32 (v128, v128) -> (v128) +; SIMD128-FAST-NEXT: # %bb.0: +; SIMD128-FAST-NEXT: i32x4.extract_lane $push5=, $0, 0 +; SIMD128-FAST-NEXT: i32x4.extract_lane $push4=, $1, 0 +; SIMD128-FAST-NEXT: i32.shl $push6=, $pop5, $pop4 +; SIMD128-FAST-NEXT: i32x4.splat $push7=, $pop6 +; SIMD128-FAST-NEXT: i32x4.extract_lane $push2=, $0, 1 +; SIMD128-FAST-NEXT: i32x4.extract_lane $push1=, $1, 1 +; SIMD128-FAST-NEXT: i32.shl $push3=, $pop2, $pop1 +; SIMD128-FAST-NEXT: i32x4.replace_lane $push8=, $pop7, 1, $pop3 +; SIMD128-FAST-NEXT: i32x4.extract_lane $push10=, $0, 2 +; SIMD128-FAST-NEXT: i32x4.extract_lane $push9=, $1, 2 +; SIMD128-FAST-NEXT: i32.shl $push11=, $pop10, $pop9 +; SIMD128-FAST-NEXT: i32x4.replace_lane $push12=, $pop8, 2, $pop11 +; SIMD128-FAST-NEXT: i32x4.extract_lane $push14=, $0, 3 +; SIMD128-FAST-NEXT: i32x4.extract_lane $push13=, $1, 3 +; SIMD128-FAST-NEXT: i32.shl $push15=, $pop14, $pop13 +; SIMD128-FAST-NEXT: i32x4.replace_lane $push0=, $pop12, 3, $pop15 +; SIMD128-FAST-NEXT: return $pop0 +; +; NO-SIMD128-LABEL: shl_vec_v4i32: +; NO-SIMD128: .functype shl_vec_v4i32 (i32, i32, i32, i32, i32, i32, i32, i32, i32) -> () +; NO-SIMD128-NEXT: # %bb.0: +; NO-SIMD128-NEXT: i32.shl $push0=, $3, $7 +; NO-SIMD128-NEXT: i32.store 8($0), $pop0 +; NO-SIMD128-NEXT: i32.shl $push1=, $2, $6 +; NO-SIMD128-NEXT: i32.store 4($0), $pop1 +; NO-SIMD128-NEXT: i32.shl $push2=, $1, $5 +; NO-SIMD128-NEXT: i32.store 0($0), $pop2 +; NO-SIMD128-NEXT: i32.const $push4=, 12 +; NO-SIMD128-NEXT: i32.add $push5=, $0, $pop4 +; NO-SIMD128-NEXT: i32.shl $push3=, $4, $8 +; NO-SIMD128-NEXT: i32.store 0($pop5), $pop3 +; NO-SIMD128-NEXT: return +; +; NO-SIMD128-FAST-LABEL: shl_vec_v4i32: +; NO-SIMD128-FAST: .functype shl_vec_v4i32 (i32, i32, i32, i32, i32, i32, i32, i32, i32) -> () +; NO-SIMD128-FAST-NEXT: # %bb.0: +; NO-SIMD128-FAST-NEXT: i32.shl $push0=, $1, $5 +; NO-SIMD128-FAST-NEXT: i32.store 0($0), $pop0 +; NO-SIMD128-FAST-NEXT: i32.shl $push1=, $2, $6 +; NO-SIMD128-FAST-NEXT: i32.store 4($0), $pop1 +; NO-SIMD128-FAST-NEXT: i32.shl $push2=, $3, $7 +; NO-SIMD128-FAST-NEXT: i32.store 8($0), $pop2 +; NO-SIMD128-FAST-NEXT: i32.const $push3=, 12 +; NO-SIMD128-FAST-NEXT: i32.add $push4=, $0, $pop3 +; NO-SIMD128-FAST-NEXT: i32.shl $push5=, $4, $8 +; NO-SIMD128-FAST-NEXT: i32.store 0($pop4), $pop5 +; NO-SIMD128-FAST-NEXT: return %a = shl <4 x i32> %v, %x ret <4 x i32> %a } -; CHECK-LABEL: shr_s_v4i32: -; NO-SIMD128-NOT: i32x4 -; SIMD128-NEXT: .functype shr_s_v4i32 (v128, i32) -> (v128){{$}} -; SIMD128-NEXT: i32x4.shr_s $push[[R:[0-9]+]]=, $0, $1{{$}} -; SIMD128-NEXT: return $pop[[R]]{{$}} define <4 x i32> @shr_s_v4i32(<4 x i32> %v, i32 %x) { +; SIMD128-LABEL: shr_s_v4i32: +; SIMD128: .functype shr_s_v4i32 (v128, i32) -> (v128) +; SIMD128-NEXT: # %bb.0: +; SIMD128-NEXT: i32x4.shr_s $push0=, $0, $1 +; SIMD128-NEXT: return $pop0 +; +; SIMD128-FAST-LABEL: shr_s_v4i32: +; SIMD128-FAST: .functype shr_s_v4i32 (v128, i32) -> (v128) +; SIMD128-FAST-NEXT: # %bb.0: +; SIMD128-FAST-NEXT: i32x4.shr_s $push0=, $0, $1 +; SIMD128-FAST-NEXT: return $pop0 +; +; NO-SIMD128-LABEL: shr_s_v4i32: +; NO-SIMD128: .functype shr_s_v4i32 (i32, i32, i32, i32, i32, i32) -> () +; NO-SIMD128-NEXT: # %bb.0: +; NO-SIMD128-NEXT: i32.shr_s $push0=, $3, $5 +; NO-SIMD128-NEXT: i32.store 8($0), $pop0 +; NO-SIMD128-NEXT: i32.shr_s $push1=, $2, $5 +; NO-SIMD128-NEXT: i32.store 4($0), $pop1 +; NO-SIMD128-NEXT: i32.shr_s $push2=, $1, $5 +; NO-SIMD128-NEXT: i32.store 0($0), $pop2 +; NO-SIMD128-NEXT: i32.const $push4=, 12 +; NO-SIMD128-NEXT: i32.add $push5=, $0, $pop4 +; NO-SIMD128-NEXT: i32.shr_s $push3=, $4, $5 +; NO-SIMD128-NEXT: i32.store 0($pop5), $pop3 +; NO-SIMD128-NEXT: return +; +; NO-SIMD128-FAST-LABEL: shr_s_v4i32: +; NO-SIMD128-FAST: .functype shr_s_v4i32 (i32, i32, i32, i32, i32, i32) -> () +; NO-SIMD128-FAST-NEXT: # %bb.0: +; NO-SIMD128-FAST-NEXT: i32.shr_s $push0=, $1, $5 +; NO-SIMD128-FAST-NEXT: i32.store 0($0), $pop0 +; NO-SIMD128-FAST-NEXT: i32.shr_s $push1=, $2, $5 +; NO-SIMD128-FAST-NEXT: i32.store 4($0), $pop1 +; NO-SIMD128-FAST-NEXT: i32.shr_s $push2=, $3, $5 +; NO-SIMD128-FAST-NEXT: i32.store 8($0), $pop2 +; NO-SIMD128-FAST-NEXT: i32.const $push3=, 12 +; NO-SIMD128-FAST-NEXT: i32.add $push4=, $0, $pop3 +; NO-SIMD128-FAST-NEXT: i32.shr_s $push5=, $4, $5 +; NO-SIMD128-FAST-NEXT: i32.store 0($pop4), $pop5 +; NO-SIMD128-FAST-NEXT: return %t = insertelement <4 x i32> undef, i32 %x, i32 0 %s = shufflevector <4 x i32> %t, <4 x i32> undef, <4 x i32> @@ -943,30 +10685,124 @@ define <4 x i32> @shr_s_v4i32(<4 x i32> %v, i32 %x) { ret <4 x i32> %a } -; CHECK-LABEL: shr_s_vec_v4i32: -; NO-SIMD128-NOT: i32x4 -; SIMD128-NEXT: .functype shr_s_vec_v4i32 (v128, v128) -> (v128){{$}} -; SIMD128-NEXT: i32x4.extract_lane $push[[L0:[0-9]+]]=, $0, 0{{$}} -; SIMD128-NEXT: i32x4.extract_lane $push[[L1:[0-9]+]]=, $1, 0{{$}} -; SIMD128-NEXT: i32.shr_s $push[[L2:[0-9]+]]=, $pop[[L0]], $pop[[L1]]{{$}} -; SIMD128-NEXT: i32x4.splat $push[[L3:[0-9]+]]=, $pop[[L2]]{{$}} -; Skip 2 lanes -; SIMD128: i32x4.extract_lane $push[[L4:[0-9]+]]=, $0, 3{{$}} -; SIMD128-NEXT: i32x4.extract_lane $push[[L5:[0-9]+]]=, $1, 3{{$}} -; SIMD128-NEXT: i32.shr_s $push[[L6:[0-9]+]]=, $pop[[L4]], $pop[[L5]]{{$}} -; SIMD128-NEXT: i32x4.replace_lane $push[[R:[0-9]+]]=, $pop[[L7:[0-9]+]], 3, $pop[[L6]]{{$}} -; SIMD128-NEXT: return $pop[[R]]{{$}} define <4 x i32> @shr_s_vec_v4i32(<4 x i32> %v, <4 x i32> %x) { +; SIMD128-LABEL: shr_s_vec_v4i32: +; SIMD128: .functype shr_s_vec_v4i32 (v128, v128) -> (v128) +; SIMD128-NEXT: # %bb.0: +; SIMD128-NEXT: i32x4.extract_lane $push4=, $0, 0 +; SIMD128-NEXT: i32x4.extract_lane $push3=, $1, 0 +; SIMD128-NEXT: i32.shr_s $push5=, $pop4, $pop3 +; SIMD128-NEXT: i32x4.splat $push6=, $pop5 +; SIMD128-NEXT: i32x4.extract_lane $push1=, $0, 1 +; SIMD128-NEXT: i32x4.extract_lane $push0=, $1, 1 +; SIMD128-NEXT: i32.shr_s $push2=, $pop1, $pop0 +; SIMD128-NEXT: i32x4.replace_lane $push7=, $pop6, 1, $pop2 +; SIMD128-NEXT: i32x4.extract_lane $push9=, $0, 2 +; SIMD128-NEXT: i32x4.extract_lane $push8=, $1, 2 +; SIMD128-NEXT: i32.shr_s $push10=, $pop9, $pop8 +; SIMD128-NEXT: i32x4.replace_lane $push11=, $pop7, 2, $pop10 +; SIMD128-NEXT: i32x4.extract_lane $push13=, $0, 3 +; SIMD128-NEXT: i32x4.extract_lane $push12=, $1, 3 +; SIMD128-NEXT: i32.shr_s $push14=, $pop13, $pop12 +; SIMD128-NEXT: i32x4.replace_lane $push15=, $pop11, 3, $pop14 +; SIMD128-NEXT: return $pop15 +; +; SIMD128-FAST-LABEL: shr_s_vec_v4i32: +; SIMD128-FAST: .functype shr_s_vec_v4i32 (v128, v128) -> (v128) +; SIMD128-FAST-NEXT: # %bb.0: +; SIMD128-FAST-NEXT: i32x4.extract_lane $push5=, $0, 0 +; SIMD128-FAST-NEXT: i32x4.extract_lane $push4=, $1, 0 +; SIMD128-FAST-NEXT: i32.shr_s $push6=, $pop5, $pop4 +; SIMD128-FAST-NEXT: i32x4.splat $push7=, $pop6 +; SIMD128-FAST-NEXT: i32x4.extract_lane $push2=, $0, 1 +; SIMD128-FAST-NEXT: i32x4.extract_lane $push1=, $1, 1 +; SIMD128-FAST-NEXT: i32.shr_s $push3=, $pop2, $pop1 +; SIMD128-FAST-NEXT: i32x4.replace_lane $push8=, $pop7, 1, $pop3 +; SIMD128-FAST-NEXT: i32x4.extract_lane $push10=, $0, 2 +; SIMD128-FAST-NEXT: i32x4.extract_lane $push9=, $1, 2 +; SIMD128-FAST-NEXT: i32.shr_s $push11=, $pop10, $pop9 +; SIMD128-FAST-NEXT: i32x4.replace_lane $push12=, $pop8, 2, $pop11 +; SIMD128-FAST-NEXT: i32x4.extract_lane $push14=, $0, 3 +; SIMD128-FAST-NEXT: i32x4.extract_lane $push13=, $1, 3 +; SIMD128-FAST-NEXT: i32.shr_s $push15=, $pop14, $pop13 +; SIMD128-FAST-NEXT: i32x4.replace_lane $push0=, $pop12, 3, $pop15 +; SIMD128-FAST-NEXT: return $pop0 +; +; NO-SIMD128-LABEL: shr_s_vec_v4i32: +; NO-SIMD128: .functype shr_s_vec_v4i32 (i32, i32, i32, i32, i32, i32, i32, i32, i32) -> () +; NO-SIMD128-NEXT: # %bb.0: +; NO-SIMD128-NEXT: i32.shr_s $push0=, $3, $7 +; NO-SIMD128-NEXT: i32.store 8($0), $pop0 +; NO-SIMD128-NEXT: i32.shr_s $push1=, $2, $6 +; NO-SIMD128-NEXT: i32.store 4($0), $pop1 +; NO-SIMD128-NEXT: i32.shr_s $push2=, $1, $5 +; NO-SIMD128-NEXT: i32.store 0($0), $pop2 +; NO-SIMD128-NEXT: i32.const $push4=, 12 +; NO-SIMD128-NEXT: i32.add $push5=, $0, $pop4 +; NO-SIMD128-NEXT: i32.shr_s $push3=, $4, $8 +; NO-SIMD128-NEXT: i32.store 0($pop5), $pop3 +; NO-SIMD128-NEXT: return +; +; NO-SIMD128-FAST-LABEL: shr_s_vec_v4i32: +; NO-SIMD128-FAST: .functype shr_s_vec_v4i32 (i32, i32, i32, i32, i32, i32, i32, i32, i32) -> () +; NO-SIMD128-FAST-NEXT: # %bb.0: +; NO-SIMD128-FAST-NEXT: i32.shr_s $push0=, $1, $5 +; NO-SIMD128-FAST-NEXT: i32.store 0($0), $pop0 +; NO-SIMD128-FAST-NEXT: i32.shr_s $push1=, $2, $6 +; NO-SIMD128-FAST-NEXT: i32.store 4($0), $pop1 +; NO-SIMD128-FAST-NEXT: i32.shr_s $push2=, $3, $7 +; NO-SIMD128-FAST-NEXT: i32.store 8($0), $pop2 +; NO-SIMD128-FAST-NEXT: i32.const $push3=, 12 +; NO-SIMD128-FAST-NEXT: i32.add $push4=, $0, $pop3 +; NO-SIMD128-FAST-NEXT: i32.shr_s $push5=, $4, $8 +; NO-SIMD128-FAST-NEXT: i32.store 0($pop4), $pop5 +; NO-SIMD128-FAST-NEXT: return %a = ashr <4 x i32> %v, %x ret <4 x i32> %a } -; CHECK-LABEL: shr_u_v4i32: -; NO-SIMD128-NOT: i32x4 -; SIMD128-NEXT: .functype shr_u_v4i32 (v128, i32) -> (v128){{$}} -; SIMD128-NEXT: i32x4.shr_u $push[[R:[0-9]+]]=, $0, $1{{$}} -; SIMD128-NEXT: return $pop[[R]]{{$}} define <4 x i32> @shr_u_v4i32(<4 x i32> %v, i32 %x) { +; SIMD128-LABEL: shr_u_v4i32: +; SIMD128: .functype shr_u_v4i32 (v128, i32) -> (v128) +; SIMD128-NEXT: # %bb.0: +; SIMD128-NEXT: i32x4.shr_u $push0=, $0, $1 +; SIMD128-NEXT: return $pop0 +; +; SIMD128-FAST-LABEL: shr_u_v4i32: +; SIMD128-FAST: .functype shr_u_v4i32 (v128, i32) -> (v128) +; SIMD128-FAST-NEXT: # %bb.0: +; SIMD128-FAST-NEXT: i32x4.shr_u $push0=, $0, $1 +; SIMD128-FAST-NEXT: return $pop0 +; +; NO-SIMD128-LABEL: shr_u_v4i32: +; NO-SIMD128: .functype shr_u_v4i32 (i32, i32, i32, i32, i32, i32) -> () +; NO-SIMD128-NEXT: # %bb.0: +; NO-SIMD128-NEXT: i32.shr_u $push0=, $3, $5 +; NO-SIMD128-NEXT: i32.store 8($0), $pop0 +; NO-SIMD128-NEXT: i32.shr_u $push1=, $2, $5 +; NO-SIMD128-NEXT: i32.store 4($0), $pop1 +; NO-SIMD128-NEXT: i32.shr_u $push2=, $1, $5 +; NO-SIMD128-NEXT: i32.store 0($0), $pop2 +; NO-SIMD128-NEXT: i32.const $push4=, 12 +; NO-SIMD128-NEXT: i32.add $push5=, $0, $pop4 +; NO-SIMD128-NEXT: i32.shr_u $push3=, $4, $5 +; NO-SIMD128-NEXT: i32.store 0($pop5), $pop3 +; NO-SIMD128-NEXT: return +; +; NO-SIMD128-FAST-LABEL: shr_u_v4i32: +; NO-SIMD128-FAST: .functype shr_u_v4i32 (i32, i32, i32, i32, i32, i32) -> () +; NO-SIMD128-FAST-NEXT: # %bb.0: +; NO-SIMD128-FAST-NEXT: i32.shr_u $push0=, $1, $5 +; NO-SIMD128-FAST-NEXT: i32.store 0($0), $pop0 +; NO-SIMD128-FAST-NEXT: i32.shr_u $push1=, $2, $5 +; NO-SIMD128-FAST-NEXT: i32.store 4($0), $pop1 +; NO-SIMD128-FAST-NEXT: i32.shr_u $push2=, $3, $5 +; NO-SIMD128-FAST-NEXT: i32.store 8($0), $pop2 +; NO-SIMD128-FAST-NEXT: i32.const $push3=, 12 +; NO-SIMD128-FAST-NEXT: i32.add $push4=, $0, $pop3 +; NO-SIMD128-FAST-NEXT: i32.shr_u $push5=, $4, $5 +; NO-SIMD128-FAST-NEXT: i32.store 0($pop4), $pop5 +; NO-SIMD128-FAST-NEXT: return %t = insertelement <4 x i32> undef, i32 %x, i32 0 %s = shufflevector <4 x i32> %t, <4 x i32> undef, <4 x i32> @@ -974,89 +10810,415 @@ define <4 x i32> @shr_u_v4i32(<4 x i32> %v, i32 %x) { ret <4 x i32> %a } -; CHECK-LABEL: shr_u_vec_v4i32: -; NO-SIMD128-NOT: i32x4 -; SIMD128-NEXT: .functype shr_u_vec_v4i32 (v128, v128) -> (v128){{$}} -; SIMD128-NEXT: i32x4.extract_lane $push[[L0:[0-9]+]]=, $0, 0{{$}} -; SIMD128-NEXT: i32x4.extract_lane $push[[L1:[0-9]+]]=, $1, 0{{$}} -; SIMD128-NEXT: i32.shr_u $push[[L2:[0-9]+]]=, $pop[[L0]], $pop[[L1]]{{$}} -; SIMD128-NEXT: i32x4.splat $push[[L3:[0-9]+]]=, $pop[[L2]]{{$}} -; Skip 2 lanes -; SIMD128: i32x4.extract_lane $push[[L4:[0-9]+]]=, $0, 3{{$}} -; SIMD128-NEXT: i32x4.extract_lane $push[[L5:[0-9]+]]=, $1, 3{{$}} -; SIMD128-NEXT: i32.shr_u $push[[L6:[0-9]+]]=, $pop[[L4]], $pop[[L5]]{{$}} -; SIMD128-NEXT: i32x4.replace_lane $push[[R:[0-9]+]]=, $pop[[L7:[0-9]+]], 3, $pop[[L6]]{{$}} -; SIMD128-NEXT: return $pop[[R]]{{$}} define <4 x i32> @shr_u_vec_v4i32(<4 x i32> %v, <4 x i32> %x) { +; SIMD128-LABEL: shr_u_vec_v4i32: +; SIMD128: .functype shr_u_vec_v4i32 (v128, v128) -> (v128) +; SIMD128-NEXT: # %bb.0: +; SIMD128-NEXT: i32x4.extract_lane $push4=, $0, 0 +; SIMD128-NEXT: i32x4.extract_lane $push3=, $1, 0 +; SIMD128-NEXT: i32.shr_u $push5=, $pop4, $pop3 +; SIMD128-NEXT: i32x4.splat $push6=, $pop5 +; SIMD128-NEXT: i32x4.extract_lane $push1=, $0, 1 +; SIMD128-NEXT: i32x4.extract_lane $push0=, $1, 1 +; SIMD128-NEXT: i32.shr_u $push2=, $pop1, $pop0 +; SIMD128-NEXT: i32x4.replace_lane $push7=, $pop6, 1, $pop2 +; SIMD128-NEXT: i32x4.extract_lane $push9=, $0, 2 +; SIMD128-NEXT: i32x4.extract_lane $push8=, $1, 2 +; SIMD128-NEXT: i32.shr_u $push10=, $pop9, $pop8 +; SIMD128-NEXT: i32x4.replace_lane $push11=, $pop7, 2, $pop10 +; SIMD128-NEXT: i32x4.extract_lane $push13=, $0, 3 +; SIMD128-NEXT: i32x4.extract_lane $push12=, $1, 3 +; SIMD128-NEXT: i32.shr_u $push14=, $pop13, $pop12 +; SIMD128-NEXT: i32x4.replace_lane $push15=, $pop11, 3, $pop14 +; SIMD128-NEXT: return $pop15 +; +; SIMD128-FAST-LABEL: shr_u_vec_v4i32: +; SIMD128-FAST: .functype shr_u_vec_v4i32 (v128, v128) -> (v128) +; SIMD128-FAST-NEXT: # %bb.0: +; SIMD128-FAST-NEXT: i32x4.extract_lane $push5=, $0, 0 +; SIMD128-FAST-NEXT: i32x4.extract_lane $push4=, $1, 0 +; SIMD128-FAST-NEXT: i32.shr_u $push6=, $pop5, $pop4 +; SIMD128-FAST-NEXT: i32x4.splat $push7=, $pop6 +; SIMD128-FAST-NEXT: i32x4.extract_lane $push2=, $0, 1 +; SIMD128-FAST-NEXT: i32x4.extract_lane $push1=, $1, 1 +; SIMD128-FAST-NEXT: i32.shr_u $push3=, $pop2, $pop1 +; SIMD128-FAST-NEXT: i32x4.replace_lane $push8=, $pop7, 1, $pop3 +; SIMD128-FAST-NEXT: i32x4.extract_lane $push10=, $0, 2 +; SIMD128-FAST-NEXT: i32x4.extract_lane $push9=, $1, 2 +; SIMD128-FAST-NEXT: i32.shr_u $push11=, $pop10, $pop9 +; SIMD128-FAST-NEXT: i32x4.replace_lane $push12=, $pop8, 2, $pop11 +; SIMD128-FAST-NEXT: i32x4.extract_lane $push14=, $0, 3 +; SIMD128-FAST-NEXT: i32x4.extract_lane $push13=, $1, 3 +; SIMD128-FAST-NEXT: i32.shr_u $push15=, $pop14, $pop13 +; SIMD128-FAST-NEXT: i32x4.replace_lane $push0=, $pop12, 3, $pop15 +; SIMD128-FAST-NEXT: return $pop0 +; +; NO-SIMD128-LABEL: shr_u_vec_v4i32: +; NO-SIMD128: .functype shr_u_vec_v4i32 (i32, i32, i32, i32, i32, i32, i32, i32, i32) -> () +; NO-SIMD128-NEXT: # %bb.0: +; NO-SIMD128-NEXT: i32.shr_u $push0=, $3, $7 +; NO-SIMD128-NEXT: i32.store 8($0), $pop0 +; NO-SIMD128-NEXT: i32.shr_u $push1=, $2, $6 +; NO-SIMD128-NEXT: i32.store 4($0), $pop1 +; NO-SIMD128-NEXT: i32.shr_u $push2=, $1, $5 +; NO-SIMD128-NEXT: i32.store 0($0), $pop2 +; NO-SIMD128-NEXT: i32.const $push4=, 12 +; NO-SIMD128-NEXT: i32.add $push5=, $0, $pop4 +; NO-SIMD128-NEXT: i32.shr_u $push3=, $4, $8 +; NO-SIMD128-NEXT: i32.store 0($pop5), $pop3 +; NO-SIMD128-NEXT: return +; +; NO-SIMD128-FAST-LABEL: shr_u_vec_v4i32: +; NO-SIMD128-FAST: .functype shr_u_vec_v4i32 (i32, i32, i32, i32, i32, i32, i32, i32, i32) -> () +; NO-SIMD128-FAST-NEXT: # %bb.0: +; NO-SIMD128-FAST-NEXT: i32.shr_u $push0=, $1, $5 +; NO-SIMD128-FAST-NEXT: i32.store 0($0), $pop0 +; NO-SIMD128-FAST-NEXT: i32.shr_u $push1=, $2, $6 +; NO-SIMD128-FAST-NEXT: i32.store 4($0), $pop1 +; NO-SIMD128-FAST-NEXT: i32.shr_u $push2=, $3, $7 +; NO-SIMD128-FAST-NEXT: i32.store 8($0), $pop2 +; NO-SIMD128-FAST-NEXT: i32.const $push3=, 12 +; NO-SIMD128-FAST-NEXT: i32.add $push4=, $0, $pop3 +; NO-SIMD128-FAST-NEXT: i32.shr_u $push5=, $4, $8 +; NO-SIMD128-FAST-NEXT: i32.store 0($pop4), $pop5 +; NO-SIMD128-FAST-NEXT: return %a = lshr <4 x i32> %v, %x ret <4 x i32> %a } -; CHECK-LABEL: and_v4i32: -; NO-SIMD128-NOT: v128 -; SIMD128-NEXT: .functype and_v4i32 (v128, v128) -> (v128){{$}} -; SIMD128-NEXT: v128.and $push[[R:[0-9]+]]=, $0, $1{{$}} -; SIMD128-NEXT: return $pop[[R]]{{$}} define <4 x i32> @and_v4i32(<4 x i32> %x, <4 x i32> %y) { +; SIMD128-LABEL: and_v4i32: +; SIMD128: .functype and_v4i32 (v128, v128) -> (v128) +; SIMD128-NEXT: # %bb.0: +; SIMD128-NEXT: v128.and $push0=, $0, $1 +; SIMD128-NEXT: return $pop0 +; +; SIMD128-FAST-LABEL: and_v4i32: +; SIMD128-FAST: .functype and_v4i32 (v128, v128) -> (v128) +; SIMD128-FAST-NEXT: # %bb.0: +; SIMD128-FAST-NEXT: v128.and $push0=, $0, $1 +; SIMD128-FAST-NEXT: return $pop0 +; +; NO-SIMD128-LABEL: and_v4i32: +; NO-SIMD128: .functype and_v4i32 (i32, i32, i32, i32, i32, i32, i32, i32, i32) -> () +; NO-SIMD128-NEXT: # %bb.0: +; NO-SIMD128-NEXT: i32.and $push0=, $3, $7 +; NO-SIMD128-NEXT: i32.store 8($0), $pop0 +; NO-SIMD128-NEXT: i32.and $push1=, $2, $6 +; NO-SIMD128-NEXT: i32.store 4($0), $pop1 +; NO-SIMD128-NEXT: i32.and $push2=, $1, $5 +; NO-SIMD128-NEXT: i32.store 0($0), $pop2 +; NO-SIMD128-NEXT: i32.const $push4=, 12 +; NO-SIMD128-NEXT: i32.add $push5=, $0, $pop4 +; NO-SIMD128-NEXT: i32.and $push3=, $4, $8 +; NO-SIMD128-NEXT: i32.store 0($pop5), $pop3 +; NO-SIMD128-NEXT: return +; +; NO-SIMD128-FAST-LABEL: and_v4i32: +; NO-SIMD128-FAST: .functype and_v4i32 (i32, i32, i32, i32, i32, i32, i32, i32, i32) -> () +; NO-SIMD128-FAST-NEXT: # %bb.0: +; NO-SIMD128-FAST-NEXT: i32.and $push0=, $1, $5 +; NO-SIMD128-FAST-NEXT: i32.store 0($0), $pop0 +; NO-SIMD128-FAST-NEXT: i32.and $push1=, $2, $6 +; NO-SIMD128-FAST-NEXT: i32.store 4($0), $pop1 +; NO-SIMD128-FAST-NEXT: i32.and $push2=, $3, $7 +; NO-SIMD128-FAST-NEXT: i32.store 8($0), $pop2 +; NO-SIMD128-FAST-NEXT: i32.const $push3=, 12 +; NO-SIMD128-FAST-NEXT: i32.add $push4=, $0, $pop3 +; NO-SIMD128-FAST-NEXT: i32.and $push5=, $4, $8 +; NO-SIMD128-FAST-NEXT: i32.store 0($pop4), $pop5 +; NO-SIMD128-FAST-NEXT: return %a = and <4 x i32> %x, %y ret <4 x i32> %a } -; CHECK-LABEL: or_v4i32: -; NO-SIMD128-NOT: v128 -; SIMD128-NEXT: .functype or_v4i32 (v128, v128) -> (v128){{$}} -; SIMD128-NEXT: v128.or $push[[R:[0-9]+]]=, $0, $1{{$}} -; SIMD128-NEXT: return $pop[[R]]{{$}} define <4 x i32> @or_v4i32(<4 x i32> %x, <4 x i32> %y) { +; SIMD128-LABEL: or_v4i32: +; SIMD128: .functype or_v4i32 (v128, v128) -> (v128) +; SIMD128-NEXT: # %bb.0: +; SIMD128-NEXT: v128.or $push0=, $0, $1 +; SIMD128-NEXT: return $pop0 +; +; SIMD128-FAST-LABEL: or_v4i32: +; SIMD128-FAST: .functype or_v4i32 (v128, v128) -> (v128) +; SIMD128-FAST-NEXT: # %bb.0: +; SIMD128-FAST-NEXT: v128.or $push0=, $0, $1 +; SIMD128-FAST-NEXT: return $pop0 +; +; NO-SIMD128-LABEL: or_v4i32: +; NO-SIMD128: .functype or_v4i32 (i32, i32, i32, i32, i32, i32, i32, i32, i32) -> () +; NO-SIMD128-NEXT: # %bb.0: +; NO-SIMD128-NEXT: i32.or $push0=, $3, $7 +; NO-SIMD128-NEXT: i32.store 8($0), $pop0 +; NO-SIMD128-NEXT: i32.or $push1=, $2, $6 +; NO-SIMD128-NEXT: i32.store 4($0), $pop1 +; NO-SIMD128-NEXT: i32.or $push2=, $1, $5 +; NO-SIMD128-NEXT: i32.store 0($0), $pop2 +; NO-SIMD128-NEXT: i32.const $push4=, 12 +; NO-SIMD128-NEXT: i32.add $push5=, $0, $pop4 +; NO-SIMD128-NEXT: i32.or $push3=, $4, $8 +; NO-SIMD128-NEXT: i32.store 0($pop5), $pop3 +; NO-SIMD128-NEXT: return +; +; NO-SIMD128-FAST-LABEL: or_v4i32: +; NO-SIMD128-FAST: .functype or_v4i32 (i32, i32, i32, i32, i32, i32, i32, i32, i32) -> () +; NO-SIMD128-FAST-NEXT: # %bb.0: +; NO-SIMD128-FAST-NEXT: i32.or $push0=, $1, $5 +; NO-SIMD128-FAST-NEXT: i32.store 0($0), $pop0 +; NO-SIMD128-FAST-NEXT: i32.or $push1=, $2, $6 +; NO-SIMD128-FAST-NEXT: i32.store 4($0), $pop1 +; NO-SIMD128-FAST-NEXT: i32.or $push2=, $3, $7 +; NO-SIMD128-FAST-NEXT: i32.store 8($0), $pop2 +; NO-SIMD128-FAST-NEXT: i32.const $push3=, 12 +; NO-SIMD128-FAST-NEXT: i32.add $push4=, $0, $pop3 +; NO-SIMD128-FAST-NEXT: i32.or $push5=, $4, $8 +; NO-SIMD128-FAST-NEXT: i32.store 0($pop4), $pop5 +; NO-SIMD128-FAST-NEXT: return %a = or <4 x i32> %x, %y ret <4 x i32> %a } -; CHECK-LABEL: xor_v4i32: -; NO-SIMD128-NOT: v128 -; SIMD128-NEXT: .functype xor_v4i32 (v128, v128) -> (v128){{$}} -; SIMD128-NEXT: v128.xor $push[[R:[0-9]+]]=, $0, $1{{$}} -; SIMD128-NEXT: return $pop[[R]]{{$}} define <4 x i32> @xor_v4i32(<4 x i32> %x, <4 x i32> %y) { +; SIMD128-LABEL: xor_v4i32: +; SIMD128: .functype xor_v4i32 (v128, v128) -> (v128) +; SIMD128-NEXT: # %bb.0: +; SIMD128-NEXT: v128.xor $push0=, $0, $1 +; SIMD128-NEXT: return $pop0 +; +; SIMD128-FAST-LABEL: xor_v4i32: +; SIMD128-FAST: .functype xor_v4i32 (v128, v128) -> (v128) +; SIMD128-FAST-NEXT: # %bb.0: +; SIMD128-FAST-NEXT: v128.xor $push0=, $0, $1 +; SIMD128-FAST-NEXT: return $pop0 +; +; NO-SIMD128-LABEL: xor_v4i32: +; NO-SIMD128: .functype xor_v4i32 (i32, i32, i32, i32, i32, i32, i32, i32, i32) -> () +; NO-SIMD128-NEXT: # %bb.0: +; NO-SIMD128-NEXT: i32.xor $push0=, $3, $7 +; NO-SIMD128-NEXT: i32.store 8($0), $pop0 +; NO-SIMD128-NEXT: i32.xor $push1=, $2, $6 +; NO-SIMD128-NEXT: i32.store 4($0), $pop1 +; NO-SIMD128-NEXT: i32.xor $push2=, $1, $5 +; NO-SIMD128-NEXT: i32.store 0($0), $pop2 +; NO-SIMD128-NEXT: i32.const $push4=, 12 +; NO-SIMD128-NEXT: i32.add $push5=, $0, $pop4 +; NO-SIMD128-NEXT: i32.xor $push3=, $4, $8 +; NO-SIMD128-NEXT: i32.store 0($pop5), $pop3 +; NO-SIMD128-NEXT: return +; +; NO-SIMD128-FAST-LABEL: xor_v4i32: +; NO-SIMD128-FAST: .functype xor_v4i32 (i32, i32, i32, i32, i32, i32, i32, i32, i32) -> () +; NO-SIMD128-FAST-NEXT: # %bb.0: +; NO-SIMD128-FAST-NEXT: i32.xor $push0=, $1, $5 +; NO-SIMD128-FAST-NEXT: i32.store 0($0), $pop0 +; NO-SIMD128-FAST-NEXT: i32.xor $push1=, $2, $6 +; NO-SIMD128-FAST-NEXT: i32.store 4($0), $pop1 +; NO-SIMD128-FAST-NEXT: i32.xor $push2=, $3, $7 +; NO-SIMD128-FAST-NEXT: i32.store 8($0), $pop2 +; NO-SIMD128-FAST-NEXT: i32.const $push3=, 12 +; NO-SIMD128-FAST-NEXT: i32.add $push4=, $0, $pop3 +; NO-SIMD128-FAST-NEXT: i32.xor $push5=, $4, $8 +; NO-SIMD128-FAST-NEXT: i32.store 0($pop4), $pop5 +; NO-SIMD128-FAST-NEXT: return %a = xor <4 x i32> %x, %y ret <4 x i32> %a } -; CHECK-LABEL: not_v4i32: -; NO-SIMD128-NOT: v128 -; SIMD128-NEXT: .functype not_v4i32 (v128) -> (v128){{$}} -; SIMD128-NEXT: v128.not $push[[R:[0-9]+]]=, $0{{$}} -; SIMD128-NEXT: return $pop[[R]]{{$}} define <4 x i32> @not_v4i32(<4 x i32> %x) { +; SIMD128-LABEL: not_v4i32: +; SIMD128: .functype not_v4i32 (v128) -> (v128) +; SIMD128-NEXT: # %bb.0: +; SIMD128-NEXT: v128.not $push0=, $0 +; SIMD128-NEXT: return $pop0 +; +; SIMD128-FAST-LABEL: not_v4i32: +; SIMD128-FAST: .functype not_v4i32 (v128) -> (v128) +; SIMD128-FAST-NEXT: # %bb.0: +; SIMD128-FAST-NEXT: v128.not $push0=, $0 +; SIMD128-FAST-NEXT: return $pop0 +; +; NO-SIMD128-LABEL: not_v4i32: +; NO-SIMD128: .functype not_v4i32 (i32, i32, i32, i32, i32) -> () +; NO-SIMD128-NEXT: # %bb.0: +; NO-SIMD128-NEXT: i32.const $push0=, -1 +; NO-SIMD128-NEXT: i32.xor $push1=, $3, $pop0 +; NO-SIMD128-NEXT: i32.store 8($0), $pop1 +; NO-SIMD128-NEXT: i32.const $push9=, -1 +; NO-SIMD128-NEXT: i32.xor $push2=, $2, $pop9 +; NO-SIMD128-NEXT: i32.store 4($0), $pop2 +; NO-SIMD128-NEXT: i32.const $push8=, -1 +; NO-SIMD128-NEXT: i32.xor $push3=, $1, $pop8 +; NO-SIMD128-NEXT: i32.store 0($0), $pop3 +; NO-SIMD128-NEXT: i32.const $push5=, 12 +; NO-SIMD128-NEXT: i32.add $push6=, $0, $pop5 +; NO-SIMD128-NEXT: i32.const $push7=, -1 +; NO-SIMD128-NEXT: i32.xor $push4=, $4, $pop7 +; NO-SIMD128-NEXT: i32.store 0($pop6), $pop4 +; NO-SIMD128-NEXT: return +; +; NO-SIMD128-FAST-LABEL: not_v4i32: +; NO-SIMD128-FAST: .functype not_v4i32 (i32, i32, i32, i32, i32) -> () +; NO-SIMD128-FAST-NEXT: # %bb.0: +; NO-SIMD128-FAST-NEXT: i32.const $push0=, -1 +; NO-SIMD128-FAST-NEXT: i32.xor $push1=, $1, $pop0 +; NO-SIMD128-FAST-NEXT: i32.store 0($0), $pop1 +; NO-SIMD128-FAST-NEXT: i32.const $push9=, -1 +; NO-SIMD128-FAST-NEXT: i32.xor $push2=, $2, $pop9 +; NO-SIMD128-FAST-NEXT: i32.store 4($0), $pop2 +; NO-SIMD128-FAST-NEXT: i32.const $push8=, -1 +; NO-SIMD128-FAST-NEXT: i32.xor $push3=, $3, $pop8 +; NO-SIMD128-FAST-NEXT: i32.store 8($0), $pop3 +; NO-SIMD128-FAST-NEXT: i32.const $push4=, 12 +; NO-SIMD128-FAST-NEXT: i32.add $push5=, $0, $pop4 +; NO-SIMD128-FAST-NEXT: i32.const $push7=, -1 +; NO-SIMD128-FAST-NEXT: i32.xor $push6=, $4, $pop7 +; NO-SIMD128-FAST-NEXT: i32.store 0($pop5), $pop6 +; NO-SIMD128-FAST-NEXT: return %a = xor <4 x i32> %x, ret <4 x i32> %a } -; CHECK-LABEL: andnot_v4i32: -; NO-SIMD128-NOT: v128 -; SIMD128-NEXT: .functype andnot_v4i32 (v128, v128) -> (v128){{$}} -; SIMD128-SLOW-NEXT: v128.andnot $push[[R:[0-9]+]]=, $0, $1{{$}} -; SIMD128-SLOW-NEXT: return $pop[[R]]{{$}} -; SIMD128-FAST-NEXT: v128.not -; SIMD128-FAST-NEXT: v128.and -; SIMD128-FAST-NEXT: return define <4 x i32> @andnot_v4i32(<4 x i32> %x, <4 x i32> %y) { +; SIMD128-LABEL: andnot_v4i32: +; SIMD128: .functype andnot_v4i32 (v128, v128) -> (v128) +; SIMD128-NEXT: # %bb.0: +; SIMD128-NEXT: v128.andnot $push0=, $0, $1 +; SIMD128-NEXT: return $pop0 +; +; SIMD128-FAST-LABEL: andnot_v4i32: +; SIMD128-FAST: .functype andnot_v4i32 (v128, v128) -> (v128) +; SIMD128-FAST-NEXT: # %bb.0: +; SIMD128-FAST-NEXT: v128.not $push0=, $1 +; SIMD128-FAST-NEXT: v128.and $push1=, $0, $pop0 +; SIMD128-FAST-NEXT: return $pop1 +; +; NO-SIMD128-LABEL: andnot_v4i32: +; NO-SIMD128: .functype andnot_v4i32 (i32, i32, i32, i32, i32, i32, i32, i32, i32) -> () +; NO-SIMD128-NEXT: # %bb.0: +; NO-SIMD128-NEXT: i32.const $push0=, -1 +; NO-SIMD128-NEXT: i32.xor $push1=, $7, $pop0 +; NO-SIMD128-NEXT: i32.and $push2=, $3, $pop1 +; NO-SIMD128-NEXT: i32.store 8($0), $pop2 +; NO-SIMD128-NEXT: i32.const $push13=, -1 +; NO-SIMD128-NEXT: i32.xor $push3=, $6, $pop13 +; NO-SIMD128-NEXT: i32.and $push4=, $2, $pop3 +; NO-SIMD128-NEXT: i32.store 4($0), $pop4 +; NO-SIMD128-NEXT: i32.const $push12=, -1 +; NO-SIMD128-NEXT: i32.xor $push5=, $5, $pop12 +; NO-SIMD128-NEXT: i32.and $push6=, $1, $pop5 +; NO-SIMD128-NEXT: i32.store 0($0), $pop6 +; NO-SIMD128-NEXT: i32.const $push9=, 12 +; NO-SIMD128-NEXT: i32.add $push10=, $0, $pop9 +; NO-SIMD128-NEXT: i32.const $push11=, -1 +; NO-SIMD128-NEXT: i32.xor $push7=, $8, $pop11 +; NO-SIMD128-NEXT: i32.and $push8=, $4, $pop7 +; NO-SIMD128-NEXT: i32.store 0($pop10), $pop8 +; NO-SIMD128-NEXT: return +; +; NO-SIMD128-FAST-LABEL: andnot_v4i32: +; NO-SIMD128-FAST: .functype andnot_v4i32 (i32, i32, i32, i32, i32, i32, i32, i32, i32) -> () +; NO-SIMD128-FAST-NEXT: # %bb.0: +; NO-SIMD128-FAST-NEXT: i32.const $push0=, -1 +; NO-SIMD128-FAST-NEXT: i32.xor $push1=, $5, $pop0 +; NO-SIMD128-FAST-NEXT: i32.and $push2=, $1, $pop1 +; NO-SIMD128-FAST-NEXT: i32.store 0($0), $pop2 +; NO-SIMD128-FAST-NEXT: i32.const $push13=, -1 +; NO-SIMD128-FAST-NEXT: i32.xor $push3=, $6, $pop13 +; NO-SIMD128-FAST-NEXT: i32.and $push4=, $2, $pop3 +; NO-SIMD128-FAST-NEXT: i32.store 4($0), $pop4 +; NO-SIMD128-FAST-NEXT: i32.const $push12=, -1 +; NO-SIMD128-FAST-NEXT: i32.xor $push5=, $7, $pop12 +; NO-SIMD128-FAST-NEXT: i32.and $push6=, $3, $pop5 +; NO-SIMD128-FAST-NEXT: i32.store 8($0), $pop6 +; NO-SIMD128-FAST-NEXT: i32.const $push7=, 12 +; NO-SIMD128-FAST-NEXT: i32.add $push8=, $0, $pop7 +; NO-SIMD128-FAST-NEXT: i32.const $push11=, -1 +; NO-SIMD128-FAST-NEXT: i32.xor $push9=, $8, $pop11 +; NO-SIMD128-FAST-NEXT: i32.and $push10=, $4, $pop9 +; NO-SIMD128-FAST-NEXT: i32.store 0($pop8), $pop10 +; NO-SIMD128-FAST-NEXT: return %inv_y = xor <4 x i32> %y, %a = and <4 x i32> %x, %inv_y ret <4 x i32> %a } -; CHECK-LABEL: bitselect_v4i32: -; NO-SIMD128-NOT: v128 -; SIMD128-NEXT: .functype bitselect_v4i32 (v128, v128, v128) -> (v128){{$}} -; SIMD128-SLOW-NEXT: v128.bitselect $push[[R:[0-9]+]]=, $1, $2, $0{{$}} -; SIMD128-SLOW-NEXT: return $pop[[R]]{{$}} -; SIMD128-FAST-NEXT: v128.not -; SIMD128-FAST-NEXT: v128.and -; SIMD128-FAST-NEXT: v128.and -; SIMD128-FAST-NEXT: v128.or -; SIMD128-FAST-NEXT: return define <4 x i32> @bitselect_v4i32(<4 x i32> %c, <4 x i32> %v1, <4 x i32> %v2) { +; SIMD128-LABEL: bitselect_v4i32: +; SIMD128: .functype bitselect_v4i32 (v128, v128, v128) -> (v128) +; SIMD128-NEXT: # %bb.0: +; SIMD128-NEXT: v128.bitselect $push0=, $1, $2, $0 +; SIMD128-NEXT: return $pop0 +; +; SIMD128-FAST-LABEL: bitselect_v4i32: +; SIMD128-FAST: .functype bitselect_v4i32 (v128, v128, v128) -> (v128) +; SIMD128-FAST-NEXT: # %bb.0: +; SIMD128-FAST-NEXT: v128.not $push2=, $0 +; SIMD128-FAST-NEXT: v128.and $push3=, $pop2, $2 +; SIMD128-FAST-NEXT: v128.and $push0=, $0, $1 +; SIMD128-FAST-NEXT: v128.or $push1=, $pop3, $pop0 +; SIMD128-FAST-NEXT: return $pop1 +; +; NO-SIMD128-LABEL: bitselect_v4i32: +; NO-SIMD128: .functype bitselect_v4i32 (i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32) -> () +; NO-SIMD128-NEXT: # %bb.0: +; NO-SIMD128-NEXT: i32.const $push5=, 12 +; NO-SIMD128-NEXT: i32.add $push6=, $0, $pop5 +; NO-SIMD128-NEXT: i32.const $push1=, -1 +; NO-SIMD128-NEXT: i32.xor $push2=, $4, $pop1 +; NO-SIMD128-NEXT: i32.and $push3=, $pop2, $12 +; NO-SIMD128-NEXT: i32.and $push0=, $4, $8 +; NO-SIMD128-NEXT: i32.or $push4=, $pop3, $pop0 +; NO-SIMD128-NEXT: i32.store 0($pop6), $pop4 +; NO-SIMD128-NEXT: i32.const $push21=, -1 +; NO-SIMD128-NEXT: i32.xor $push8=, $3, $pop21 +; NO-SIMD128-NEXT: i32.and $push9=, $pop8, $11 +; NO-SIMD128-NEXT: i32.and $push7=, $3, $7 +; NO-SIMD128-NEXT: i32.or $push10=, $pop9, $pop7 +; NO-SIMD128-NEXT: i32.store 8($0), $pop10 +; NO-SIMD128-NEXT: i32.const $push20=, -1 +; NO-SIMD128-NEXT: i32.xor $push12=, $2, $pop20 +; NO-SIMD128-NEXT: i32.and $push13=, $pop12, $10 +; NO-SIMD128-NEXT: i32.and $push11=, $2, $6 +; NO-SIMD128-NEXT: i32.or $push14=, $pop13, $pop11 +; NO-SIMD128-NEXT: i32.store 4($0), $pop14 +; NO-SIMD128-NEXT: i32.const $push19=, -1 +; NO-SIMD128-NEXT: i32.xor $push16=, $1, $pop19 +; NO-SIMD128-NEXT: i32.and $push17=, $pop16, $9 +; NO-SIMD128-NEXT: i32.and $push15=, $1, $5 +; NO-SIMD128-NEXT: i32.or $push18=, $pop17, $pop15 +; NO-SIMD128-NEXT: i32.store 0($0), $pop18 +; NO-SIMD128-NEXT: return +; +; NO-SIMD128-FAST-LABEL: bitselect_v4i32: +; NO-SIMD128-FAST: .functype bitselect_v4i32 (i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32) -> () +; NO-SIMD128-FAST-NEXT: # %bb.0: +; NO-SIMD128-FAST-NEXT: i32.const $push1=, -1 +; NO-SIMD128-FAST-NEXT: i32.xor $push2=, $1, $pop1 +; NO-SIMD128-FAST-NEXT: i32.and $push3=, $pop2, $9 +; NO-SIMD128-FAST-NEXT: i32.and $push0=, $1, $5 +; NO-SIMD128-FAST-NEXT: i32.or $push4=, $pop3, $pop0 +; NO-SIMD128-FAST-NEXT: i32.store 0($0), $pop4 +; NO-SIMD128-FAST-NEXT: i32.const $push21=, -1 +; NO-SIMD128-FAST-NEXT: i32.xor $push6=, $2, $pop21 +; NO-SIMD128-FAST-NEXT: i32.and $push7=, $pop6, $10 +; NO-SIMD128-FAST-NEXT: i32.and $push5=, $2, $6 +; NO-SIMD128-FAST-NEXT: i32.or $push8=, $pop7, $pop5 +; NO-SIMD128-FAST-NEXT: i32.store 4($0), $pop8 +; NO-SIMD128-FAST-NEXT: i32.const $push20=, -1 +; NO-SIMD128-FAST-NEXT: i32.xor $push10=, $3, $pop20 +; NO-SIMD128-FAST-NEXT: i32.and $push11=, $pop10, $11 +; NO-SIMD128-FAST-NEXT: i32.and $push9=, $3, $7 +; NO-SIMD128-FAST-NEXT: i32.or $push12=, $pop11, $pop9 +; NO-SIMD128-FAST-NEXT: i32.store 8($0), $pop12 +; NO-SIMD128-FAST-NEXT: i32.const $push17=, 12 +; NO-SIMD128-FAST-NEXT: i32.add $push18=, $0, $pop17 +; NO-SIMD128-FAST-NEXT: i32.const $push19=, -1 +; NO-SIMD128-FAST-NEXT: i32.xor $push14=, $4, $pop19 +; NO-SIMD128-FAST-NEXT: i32.and $push15=, $pop14, $12 +; NO-SIMD128-FAST-NEXT: i32.and $push13=, $4, $8 +; NO-SIMD128-FAST-NEXT: i32.or $push16=, $pop15, $pop13 +; NO-SIMD128-FAST-NEXT: i32.store 0($pop18), $pop16 +; NO-SIMD128-FAST-NEXT: return %masked_v1 = and <4 x i32> %c, %v1 %inv_mask = xor <4 x i32> , %c %masked_v2 = and <4 x i32> %inv_mask, %v2 @@ -1064,31 +11226,149 @@ define <4 x i32> @bitselect_v4i32(<4 x i32> %c, <4 x i32> %v1, <4 x i32> %v2) { ret <4 x i32> %a } -; CHECK-LABEL: bitselect_xor_v4i32: -; NO-SIMD128-NOT: v128 -; SIMD128-NEXT: .functype bitselect_xor_v4i32 (v128, v128, v128) -> (v128){{$}} -; SIMD128-SLOW-NEXT: v128.bitselect $push[[R:[0-9]+]]=, $1, $2, $0{{$}} -; SIMD128-SLOW-NEXT: return $pop[[R]]{{$}} -; SIMD128-FAST-NEXT: v128.xor -; SIMD128-FAST-NEXT: v128.and -; SIMD128-FAST-NEXT: v128.xor define <4 x i32> @bitselect_xor_v4i32(<4 x i32> %c, <4 x i32> %v1, <4 x i32> %v2) { +; SIMD128-LABEL: bitselect_xor_v4i32: +; SIMD128: .functype bitselect_xor_v4i32 (v128, v128, v128) -> (v128) +; SIMD128-NEXT: # %bb.0: +; SIMD128-NEXT: v128.bitselect $push0=, $1, $2, $0 +; SIMD128-NEXT: return $pop0 +; +; SIMD128-FAST-LABEL: bitselect_xor_v4i32: +; SIMD128-FAST: .functype bitselect_xor_v4i32 (v128, v128, v128) -> (v128) +; SIMD128-FAST-NEXT: # %bb.0: +; SIMD128-FAST-NEXT: v128.xor $push2=, $1, $2 +; SIMD128-FAST-NEXT: v128.and $push1=, $pop2, $0 +; SIMD128-FAST-NEXT: v128.xor $push0=, $pop1, $2 +; SIMD128-FAST-NEXT: return $pop0 +; +; NO-SIMD128-LABEL: bitselect_xor_v4i32: +; NO-SIMD128: .functype bitselect_xor_v4i32 (i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32) -> () +; NO-SIMD128-NEXT: # %bb.0: +; NO-SIMD128-NEXT: i32.const $push3=, 12 +; NO-SIMD128-NEXT: i32.add $push4=, $0, $pop3 +; NO-SIMD128-NEXT: i32.xor $push0=, $8, $12 +; NO-SIMD128-NEXT: i32.and $push1=, $pop0, $4 +; NO-SIMD128-NEXT: i32.xor $push2=, $pop1, $12 +; NO-SIMD128-NEXT: i32.store 0($pop4), $pop2 +; NO-SIMD128-NEXT: i32.xor $push5=, $7, $11 +; NO-SIMD128-NEXT: i32.and $push6=, $pop5, $3 +; NO-SIMD128-NEXT: i32.xor $push7=, $pop6, $11 +; NO-SIMD128-NEXT: i32.store 8($0), $pop7 +; NO-SIMD128-NEXT: i32.xor $push8=, $6, $10 +; NO-SIMD128-NEXT: i32.and $push9=, $pop8, $2 +; NO-SIMD128-NEXT: i32.xor $push10=, $pop9, $10 +; NO-SIMD128-NEXT: i32.store 4($0), $pop10 +; NO-SIMD128-NEXT: i32.xor $push11=, $5, $9 +; NO-SIMD128-NEXT: i32.and $push12=, $pop11, $1 +; NO-SIMD128-NEXT: i32.xor $push13=, $pop12, $9 +; NO-SIMD128-NEXT: i32.store 0($0), $pop13 +; NO-SIMD128-NEXT: return +; +; NO-SIMD128-FAST-LABEL: bitselect_xor_v4i32: +; NO-SIMD128-FAST: .functype bitselect_xor_v4i32 (i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32) -> () +; NO-SIMD128-FAST-NEXT: # %bb.0: +; NO-SIMD128-FAST-NEXT: i32.xor $push0=, $5, $9 +; NO-SIMD128-FAST-NEXT: i32.and $push1=, $pop0, $1 +; NO-SIMD128-FAST-NEXT: i32.xor $push2=, $pop1, $9 +; NO-SIMD128-FAST-NEXT: i32.store 0($0), $pop2 +; NO-SIMD128-FAST-NEXT: i32.xor $push3=, $6, $10 +; NO-SIMD128-FAST-NEXT: i32.and $push4=, $pop3, $2 +; NO-SIMD128-FAST-NEXT: i32.xor $push5=, $pop4, $10 +; NO-SIMD128-FAST-NEXT: i32.store 4($0), $pop5 +; NO-SIMD128-FAST-NEXT: i32.xor $push6=, $7, $11 +; NO-SIMD128-FAST-NEXT: i32.and $push7=, $pop6, $3 +; NO-SIMD128-FAST-NEXT: i32.xor $push8=, $pop7, $11 +; NO-SIMD128-FAST-NEXT: i32.store 8($0), $pop8 +; NO-SIMD128-FAST-NEXT: i32.const $push9=, 12 +; NO-SIMD128-FAST-NEXT: i32.add $push10=, $0, $pop9 +; NO-SIMD128-FAST-NEXT: i32.xor $push11=, $8, $12 +; NO-SIMD128-FAST-NEXT: i32.and $push12=, $pop11, $4 +; NO-SIMD128-FAST-NEXT: i32.xor $push13=, $pop12, $12 +; NO-SIMD128-FAST-NEXT: i32.store 0($pop10), $pop13 +; NO-SIMD128-FAST-NEXT: return %xor1 = xor <4 x i32> %v1, %v2 %and = and <4 x i32> %xor1, %c %a = xor <4 x i32> %and, %v2 ret <4 x i32> %a } -; CHECK-LABEL: bitselect_xor_reversed_v4i32: -; NO-SIMD128-NOT: v128 -; SIMD128-NEXT: .functype bitselect_xor_reversed_v4i32 (v128, v128, v128) -> (v128){{$}} -; SIMD128-SLOW-NEXT: v128.bitselect $push[[R:[0-9]+]]=, $2, $1, $0{{$}} -; SIMD128-SLOW-NEXT: return $pop[[R]]{{$}} -; SIMD128-FAST-NEXT: v128.xor -; SIMD128-FAST-NEXT: v128.not -; SIMD128-FAST-NEXT: v128.and -; SIMD128-FAST-NEXT: v128.xor define <4 x i32> @bitselect_xor_reversed_v4i32(<4 x i32> %c, <4 x i32> %v1, <4 x i32> %v2) { +; SIMD128-LABEL: bitselect_xor_reversed_v4i32: +; SIMD128: .functype bitselect_xor_reversed_v4i32 (v128, v128, v128) -> (v128) +; SIMD128-NEXT: # %bb.0: +; SIMD128-NEXT: v128.bitselect $push0=, $2, $1, $0 +; SIMD128-NEXT: return $pop0 +; +; SIMD128-FAST-LABEL: bitselect_xor_reversed_v4i32: +; SIMD128-FAST: .functype bitselect_xor_reversed_v4i32 (v128, v128, v128) -> (v128) +; SIMD128-FAST-NEXT: # %bb.0: +; SIMD128-FAST-NEXT: v128.xor $push1=, $1, $2 +; SIMD128-FAST-NEXT: v128.not $push2=, $0 +; SIMD128-FAST-NEXT: v128.and $push3=, $pop1, $pop2 +; SIMD128-FAST-NEXT: v128.xor $push0=, $pop3, $2 +; SIMD128-FAST-NEXT: return $pop0 +; +; NO-SIMD128-LABEL: bitselect_xor_reversed_v4i32: +; NO-SIMD128: .functype bitselect_xor_reversed_v4i32 (i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32) -> () +; NO-SIMD128-NEXT: # %bb.0: +; NO-SIMD128-NEXT: i32.const $push5=, 12 +; NO-SIMD128-NEXT: i32.add $push6=, $0, $pop5 +; NO-SIMD128-NEXT: i32.xor $push2=, $8, $12 +; NO-SIMD128-NEXT: i32.const $push0=, -1 +; NO-SIMD128-NEXT: i32.xor $push1=, $4, $pop0 +; NO-SIMD128-NEXT: i32.and $push3=, $pop2, $pop1 +; NO-SIMD128-NEXT: i32.xor $push4=, $pop3, $12 +; NO-SIMD128-NEXT: i32.store 0($pop6), $pop4 +; NO-SIMD128-NEXT: i32.xor $push8=, $7, $11 +; NO-SIMD128-NEXT: i32.const $push21=, -1 +; NO-SIMD128-NEXT: i32.xor $push7=, $3, $pop21 +; NO-SIMD128-NEXT: i32.and $push9=, $pop8, $pop7 +; NO-SIMD128-NEXT: i32.xor $push10=, $pop9, $11 +; NO-SIMD128-NEXT: i32.store 8($0), $pop10 +; NO-SIMD128-NEXT: i32.xor $push12=, $6, $10 +; NO-SIMD128-NEXT: i32.const $push20=, -1 +; NO-SIMD128-NEXT: i32.xor $push11=, $2, $pop20 +; NO-SIMD128-NEXT: i32.and $push13=, $pop12, $pop11 +; NO-SIMD128-NEXT: i32.xor $push14=, $pop13, $10 +; NO-SIMD128-NEXT: i32.store 4($0), $pop14 +; NO-SIMD128-NEXT: i32.xor $push16=, $5, $9 +; NO-SIMD128-NEXT: i32.const $push19=, -1 +; NO-SIMD128-NEXT: i32.xor $push15=, $1, $pop19 +; NO-SIMD128-NEXT: i32.and $push17=, $pop16, $pop15 +; NO-SIMD128-NEXT: i32.xor $push18=, $pop17, $9 +; NO-SIMD128-NEXT: i32.store 0($0), $pop18 +; NO-SIMD128-NEXT: return +; +; NO-SIMD128-FAST-LABEL: bitselect_xor_reversed_v4i32: +; NO-SIMD128-FAST: .functype bitselect_xor_reversed_v4i32 (i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32) -> () +; NO-SIMD128-FAST-NEXT: # %bb.0: +; NO-SIMD128-FAST-NEXT: i32.xor $push2=, $5, $9 +; NO-SIMD128-FAST-NEXT: i32.const $push0=, -1 +; NO-SIMD128-FAST-NEXT: i32.xor $push1=, $1, $pop0 +; NO-SIMD128-FAST-NEXT: i32.and $push3=, $pop2, $pop1 +; NO-SIMD128-FAST-NEXT: i32.xor $push4=, $pop3, $9 +; NO-SIMD128-FAST-NEXT: i32.store 0($0), $pop4 +; NO-SIMD128-FAST-NEXT: i32.xor $push6=, $6, $10 +; NO-SIMD128-FAST-NEXT: i32.const $push21=, -1 +; NO-SIMD128-FAST-NEXT: i32.xor $push5=, $2, $pop21 +; NO-SIMD128-FAST-NEXT: i32.and $push7=, $pop6, $pop5 +; NO-SIMD128-FAST-NEXT: i32.xor $push8=, $pop7, $10 +; NO-SIMD128-FAST-NEXT: i32.store 4($0), $pop8 +; NO-SIMD128-FAST-NEXT: i32.xor $push10=, $7, $11 +; NO-SIMD128-FAST-NEXT: i32.const $push20=, -1 +; NO-SIMD128-FAST-NEXT: i32.xor $push9=, $3, $pop20 +; NO-SIMD128-FAST-NEXT: i32.and $push11=, $pop10, $pop9 +; NO-SIMD128-FAST-NEXT: i32.xor $push12=, $pop11, $11 +; NO-SIMD128-FAST-NEXT: i32.store 8($0), $pop12 +; NO-SIMD128-FAST-NEXT: i32.const $push17=, 12 +; NO-SIMD128-FAST-NEXT: i32.add $push18=, $0, $pop17 +; NO-SIMD128-FAST-NEXT: i32.xor $push14=, $8, $12 +; NO-SIMD128-FAST-NEXT: i32.const $push19=, -1 +; NO-SIMD128-FAST-NEXT: i32.xor $push13=, $4, $pop19 +; NO-SIMD128-FAST-NEXT: i32.and $push15=, $pop14, $pop13 +; NO-SIMD128-FAST-NEXT: i32.xor $push16=, $pop15, $12 +; NO-SIMD128-FAST-NEXT: i32.store 0($pop18), $pop16 +; NO-SIMD128-FAST-NEXT: return %xor1 = xor <4 x i32> %v1, %v2 %notc = xor <4 x i32> %c, %and = and <4 x i32> %xor1, %notc @@ -1096,12 +11376,66 @@ define <4 x i32> @bitselect_xor_reversed_v4i32(<4 x i32> %c, <4 x i32> %v1, <4 x ret <4 x i32> %a } -; CHECK-LABEL: extmul_low_s_v4i32: -; NO-SIMD128-NOT: i32x4 -; SIMD128-NEXT: .functype extmul_low_s_v4i32 (v128, v128) -> (v128){{$}} -; SIMD128-SLOW-NEXT: i32x4.extmul_low_i16x8_s $push[[R:[0-9]+]]=, $0, $1{{$}} -; SIMD128-SLOW-NEXT: return $pop[[R]]{{$}} define <4 x i32> @extmul_low_s_v4i32(<8 x i16> %v1, <8 x i16> %v2) { +; SIMD128-LABEL: extmul_low_s_v4i32: +; SIMD128: .functype extmul_low_s_v4i32 (v128, v128) -> (v128) +; SIMD128-NEXT: # %bb.0: +; SIMD128-NEXT: i32x4.extmul_low_i16x8_s $push0=, $0, $1 +; SIMD128-NEXT: return $pop0 +; +; SIMD128-FAST-LABEL: extmul_low_s_v4i32: +; SIMD128-FAST: .functype extmul_low_s_v4i32 (v128, v128) -> (v128) +; SIMD128-FAST-NEXT: # %bb.0: +; SIMD128-FAST-NEXT: i32x4.extend_low_i16x8_s $push0=, $0 +; SIMD128-FAST-NEXT: i32x4.extend_low_i16x8_s $push1=, $1 +; SIMD128-FAST-NEXT: i32x4.mul $push2=, $pop0, $pop1 +; SIMD128-FAST-NEXT: return $pop2 +; +; NO-SIMD128-LABEL: extmul_low_s_v4i32: +; NO-SIMD128: .functype extmul_low_s_v4i32 (i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32) -> () +; NO-SIMD128-NEXT: # %bb.0: +; NO-SIMD128-NEXT: i32.extend16_s $push1=, $3 +; NO-SIMD128-NEXT: i32.extend16_s $push0=, $11 +; NO-SIMD128-NEXT: i32.mul $push2=, $pop1, $pop0 +; NO-SIMD128-NEXT: i32.store 8($0), $pop2 +; NO-SIMD128-NEXT: i32.extend16_s $push4=, $2 +; NO-SIMD128-NEXT: i32.extend16_s $push3=, $10 +; NO-SIMD128-NEXT: i32.mul $push5=, $pop4, $pop3 +; NO-SIMD128-NEXT: i32.store 4($0), $pop5 +; NO-SIMD128-NEXT: i32.extend16_s $push7=, $1 +; NO-SIMD128-NEXT: i32.extend16_s $push6=, $9 +; NO-SIMD128-NEXT: i32.mul $push8=, $pop7, $pop6 +; NO-SIMD128-NEXT: i32.store 0($0), $pop8 +; NO-SIMD128-NEXT: i32.const $push12=, 12 +; NO-SIMD128-NEXT: i32.add $push13=, $0, $pop12 +; NO-SIMD128-NEXT: i32.extend16_s $push10=, $4 +; NO-SIMD128-NEXT: i32.extend16_s $push9=, $12 +; NO-SIMD128-NEXT: i32.mul $push11=, $pop10, $pop9 +; NO-SIMD128-NEXT: i32.store 0($pop13), $pop11 +; NO-SIMD128-NEXT: return +; +; NO-SIMD128-FAST-LABEL: extmul_low_s_v4i32: +; NO-SIMD128-FAST: .functype extmul_low_s_v4i32 (i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32) -> () +; NO-SIMD128-FAST-NEXT: # %bb.0: +; NO-SIMD128-FAST-NEXT: i32.extend16_s $push1=, $1 +; NO-SIMD128-FAST-NEXT: i32.extend16_s $push0=, $9 +; NO-SIMD128-FAST-NEXT: i32.mul $push2=, $pop1, $pop0 +; NO-SIMD128-FAST-NEXT: i32.store 0($0), $pop2 +; NO-SIMD128-FAST-NEXT: i32.extend16_s $push4=, $2 +; NO-SIMD128-FAST-NEXT: i32.extend16_s $push3=, $10 +; NO-SIMD128-FAST-NEXT: i32.mul $push5=, $pop4, $pop3 +; NO-SIMD128-FAST-NEXT: i32.store 4($0), $pop5 +; NO-SIMD128-FAST-NEXT: i32.extend16_s $push7=, $3 +; NO-SIMD128-FAST-NEXT: i32.extend16_s $push6=, $11 +; NO-SIMD128-FAST-NEXT: i32.mul $push8=, $pop7, $pop6 +; NO-SIMD128-FAST-NEXT: i32.store 8($0), $pop8 +; NO-SIMD128-FAST-NEXT: i32.const $push9=, 12 +; NO-SIMD128-FAST-NEXT: i32.add $push10=, $0, $pop9 +; NO-SIMD128-FAST-NEXT: i32.extend16_s $push12=, $4 +; NO-SIMD128-FAST-NEXT: i32.extend16_s $push11=, $12 +; NO-SIMD128-FAST-NEXT: i32.mul $push13=, $pop12, $pop11 +; NO-SIMD128-FAST-NEXT: i32.store 0($pop10), $pop13 +; NO-SIMD128-FAST-NEXT: return %low1 = shufflevector <8 x i16> %v1, <8 x i16> undef, <4 x i32> %low2 = shufflevector <8 x i16> %v2, <8 x i16> undef, @@ -1112,12 +11446,66 @@ define <4 x i32> @extmul_low_s_v4i32(<8 x i16> %v1, <8 x i16> %v2) { ret <4 x i32> %a } -; CHECK-LABEL: extmul_high_s_v4i32: -; NO-SIMD128-NOT: i32x4 -; SIMD128-NEXT: .functype extmul_high_s_v4i32 (v128, v128) -> (v128){{$}} -; SIMD128-SLOW-NEXT: i32x4.extmul_high_i16x8_s $push[[R:[0-9]+]]=, $0, $1{{$}} -; SIMD128-SLOW-NEXT: return $pop[[R]]{{$}} define <4 x i32> @extmul_high_s_v4i32(<8 x i16> %v1, <8 x i16> %v2) { +; SIMD128-LABEL: extmul_high_s_v4i32: +; SIMD128: .functype extmul_high_s_v4i32 (v128, v128) -> (v128) +; SIMD128-NEXT: # %bb.0: +; SIMD128-NEXT: i32x4.extmul_high_i16x8_s $push0=, $0, $1 +; SIMD128-NEXT: return $pop0 +; +; SIMD128-FAST-LABEL: extmul_high_s_v4i32: +; SIMD128-FAST: .functype extmul_high_s_v4i32 (v128, v128) -> (v128) +; SIMD128-FAST-NEXT: # %bb.0: +; SIMD128-FAST-NEXT: i32x4.extend_high_i16x8_s $push0=, $0 +; SIMD128-FAST-NEXT: i32x4.extend_high_i16x8_s $push1=, $1 +; SIMD128-FAST-NEXT: i32x4.mul $push2=, $pop0, $pop1 +; SIMD128-FAST-NEXT: return $pop2 +; +; NO-SIMD128-LABEL: extmul_high_s_v4i32: +; NO-SIMD128: .functype extmul_high_s_v4i32 (i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32) -> () +; NO-SIMD128-NEXT: # %bb.0: +; NO-SIMD128-NEXT: i32.extend16_s $push1=, $7 +; NO-SIMD128-NEXT: i32.extend16_s $push0=, $15 +; NO-SIMD128-NEXT: i32.mul $push2=, $pop1, $pop0 +; NO-SIMD128-NEXT: i32.store 8($0), $pop2 +; NO-SIMD128-NEXT: i32.extend16_s $push4=, $6 +; NO-SIMD128-NEXT: i32.extend16_s $push3=, $14 +; NO-SIMD128-NEXT: i32.mul $push5=, $pop4, $pop3 +; NO-SIMD128-NEXT: i32.store 4($0), $pop5 +; NO-SIMD128-NEXT: i32.extend16_s $push7=, $5 +; NO-SIMD128-NEXT: i32.extend16_s $push6=, $13 +; NO-SIMD128-NEXT: i32.mul $push8=, $pop7, $pop6 +; NO-SIMD128-NEXT: i32.store 0($0), $pop8 +; NO-SIMD128-NEXT: i32.const $push12=, 12 +; NO-SIMD128-NEXT: i32.add $push13=, $0, $pop12 +; NO-SIMD128-NEXT: i32.extend16_s $push10=, $8 +; NO-SIMD128-NEXT: i32.extend16_s $push9=, $16 +; NO-SIMD128-NEXT: i32.mul $push11=, $pop10, $pop9 +; NO-SIMD128-NEXT: i32.store 0($pop13), $pop11 +; NO-SIMD128-NEXT: return +; +; NO-SIMD128-FAST-LABEL: extmul_high_s_v4i32: +; NO-SIMD128-FAST: .functype extmul_high_s_v4i32 (i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32) -> () +; NO-SIMD128-FAST-NEXT: # %bb.0: +; NO-SIMD128-FAST-NEXT: i32.extend16_s $push1=, $5 +; NO-SIMD128-FAST-NEXT: i32.extend16_s $push0=, $13 +; NO-SIMD128-FAST-NEXT: i32.mul $push2=, $pop1, $pop0 +; NO-SIMD128-FAST-NEXT: i32.store 0($0), $pop2 +; NO-SIMD128-FAST-NEXT: i32.extend16_s $push4=, $6 +; NO-SIMD128-FAST-NEXT: i32.extend16_s $push3=, $14 +; NO-SIMD128-FAST-NEXT: i32.mul $push5=, $pop4, $pop3 +; NO-SIMD128-FAST-NEXT: i32.store 4($0), $pop5 +; NO-SIMD128-FAST-NEXT: i32.extend16_s $push7=, $7 +; NO-SIMD128-FAST-NEXT: i32.extend16_s $push6=, $15 +; NO-SIMD128-FAST-NEXT: i32.mul $push8=, $pop7, $pop6 +; NO-SIMD128-FAST-NEXT: i32.store 8($0), $pop8 +; NO-SIMD128-FAST-NEXT: i32.const $push9=, 12 +; NO-SIMD128-FAST-NEXT: i32.add $push10=, $0, $pop9 +; NO-SIMD128-FAST-NEXT: i32.extend16_s $push12=, $8 +; NO-SIMD128-FAST-NEXT: i32.extend16_s $push11=, $16 +; NO-SIMD128-FAST-NEXT: i32.mul $push13=, $pop12, $pop11 +; NO-SIMD128-FAST-NEXT: i32.store 0($pop10), $pop13 +; NO-SIMD128-FAST-NEXT: return %high1 = shufflevector <8 x i16> %v1, <8 x i16> undef, <4 x i32> %high2 = shufflevector <8 x i16> %v2, <8 x i16> undef, @@ -1128,12 +11516,82 @@ define <4 x i32> @extmul_high_s_v4i32(<8 x i16> %v1, <8 x i16> %v2) { ret <4 x i32> %a } -; CHECK-LABEL: extmul_low_u_v4i32: -; NO-SIMD128-NOT: i32x4 -; SIMD128-NEXT: .functype extmul_low_u_v4i32 (v128, v128) -> (v128){{$}} -; SIMD128-SLOW-NEXT: i32x4.extmul_low_i16x8_u $push[[R:[0-9]+]]=, $0, $1{{$}} -; SIMD128-SLOW-NEXT: return $pop[[R]]{{$}} define <4 x i32> @extmul_low_u_v4i32(<8 x i16> %v1, <8 x i16> %v2) { +; SIMD128-LABEL: extmul_low_u_v4i32: +; SIMD128: .functype extmul_low_u_v4i32 (v128, v128) -> (v128) +; SIMD128-NEXT: # %bb.0: +; SIMD128-NEXT: i32x4.extmul_low_i16x8_u $push0=, $0, $1 +; SIMD128-NEXT: return $pop0 +; +; SIMD128-FAST-LABEL: extmul_low_u_v4i32: +; SIMD128-FAST: .functype extmul_low_u_v4i32 (v128, v128) -> (v128) +; SIMD128-FAST-NEXT: # %bb.0: +; SIMD128-FAST-NEXT: i32x4.extend_low_i16x8_u $push0=, $0 +; SIMD128-FAST-NEXT: i32x4.extend_low_i16x8_u $push1=, $1 +; SIMD128-FAST-NEXT: i32x4.mul $push2=, $pop0, $pop1 +; SIMD128-FAST-NEXT: return $pop2 +; +; NO-SIMD128-LABEL: extmul_low_u_v4i32: +; NO-SIMD128: .functype extmul_low_u_v4i32 (i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32) -> () +; NO-SIMD128-NEXT: # %bb.0: +; NO-SIMD128-NEXT: i32.const $push0=, 65535 +; NO-SIMD128-NEXT: i32.and $push2=, $3, $pop0 +; NO-SIMD128-NEXT: i32.const $push21=, 65535 +; NO-SIMD128-NEXT: i32.and $push1=, $11, $pop21 +; NO-SIMD128-NEXT: i32.mul $push3=, $pop2, $pop1 +; NO-SIMD128-NEXT: i32.store 8($0), $pop3 +; NO-SIMD128-NEXT: i32.const $push20=, 65535 +; NO-SIMD128-NEXT: i32.and $push5=, $2, $pop20 +; NO-SIMD128-NEXT: i32.const $push19=, 65535 +; NO-SIMD128-NEXT: i32.and $push4=, $10, $pop19 +; NO-SIMD128-NEXT: i32.mul $push6=, $pop5, $pop4 +; NO-SIMD128-NEXT: i32.store 4($0), $pop6 +; NO-SIMD128-NEXT: i32.const $push18=, 65535 +; NO-SIMD128-NEXT: i32.and $push8=, $1, $pop18 +; NO-SIMD128-NEXT: i32.const $push17=, 65535 +; NO-SIMD128-NEXT: i32.and $push7=, $9, $pop17 +; NO-SIMD128-NEXT: i32.mul $push9=, $pop8, $pop7 +; NO-SIMD128-NEXT: i32.store 0($0), $pop9 +; NO-SIMD128-NEXT: i32.const $push13=, 12 +; NO-SIMD128-NEXT: i32.add $push14=, $0, $pop13 +; NO-SIMD128-NEXT: i32.const $push16=, 65535 +; NO-SIMD128-NEXT: i32.and $push11=, $4, $pop16 +; NO-SIMD128-NEXT: i32.const $push15=, 65535 +; NO-SIMD128-NEXT: i32.and $push10=, $12, $pop15 +; NO-SIMD128-NEXT: i32.mul $push12=, $pop11, $pop10 +; NO-SIMD128-NEXT: i32.store 0($pop14), $pop12 +; NO-SIMD128-NEXT: return +; +; NO-SIMD128-FAST-LABEL: extmul_low_u_v4i32: +; NO-SIMD128-FAST: .functype extmul_low_u_v4i32 (i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32) -> () +; NO-SIMD128-FAST-NEXT: # %bb.0: +; NO-SIMD128-FAST-NEXT: i32.const $push0=, 65535 +; NO-SIMD128-FAST-NEXT: i32.and $push2=, $1, $pop0 +; NO-SIMD128-FAST-NEXT: i32.const $push21=, 65535 +; NO-SIMD128-FAST-NEXT: i32.and $push1=, $9, $pop21 +; NO-SIMD128-FAST-NEXT: i32.mul $push3=, $pop2, $pop1 +; NO-SIMD128-FAST-NEXT: i32.store 0($0), $pop3 +; NO-SIMD128-FAST-NEXT: i32.const $push20=, 65535 +; NO-SIMD128-FAST-NEXT: i32.and $push5=, $2, $pop20 +; NO-SIMD128-FAST-NEXT: i32.const $push19=, 65535 +; NO-SIMD128-FAST-NEXT: i32.and $push4=, $10, $pop19 +; NO-SIMD128-FAST-NEXT: i32.mul $push6=, $pop5, $pop4 +; NO-SIMD128-FAST-NEXT: i32.store 4($0), $pop6 +; NO-SIMD128-FAST-NEXT: i32.const $push18=, 65535 +; NO-SIMD128-FAST-NEXT: i32.and $push8=, $3, $pop18 +; NO-SIMD128-FAST-NEXT: i32.const $push17=, 65535 +; NO-SIMD128-FAST-NEXT: i32.and $push7=, $11, $pop17 +; NO-SIMD128-FAST-NEXT: i32.mul $push9=, $pop8, $pop7 +; NO-SIMD128-FAST-NEXT: i32.store 8($0), $pop9 +; NO-SIMD128-FAST-NEXT: i32.const $push13=, 12 +; NO-SIMD128-FAST-NEXT: i32.add $push14=, $0, $pop13 +; NO-SIMD128-FAST-NEXT: i32.const $push16=, 65535 +; NO-SIMD128-FAST-NEXT: i32.and $push11=, $4, $pop16 +; NO-SIMD128-FAST-NEXT: i32.const $push15=, 65535 +; NO-SIMD128-FAST-NEXT: i32.and $push10=, $12, $pop15 +; NO-SIMD128-FAST-NEXT: i32.mul $push12=, $pop11, $pop10 +; NO-SIMD128-FAST-NEXT: i32.store 0($pop14), $pop12 +; NO-SIMD128-FAST-NEXT: return %low1 = shufflevector <8 x i16> %v1, <8 x i16> undef, <4 x i32> %low2 = shufflevector <8 x i16> %v2, <8 x i16> undef, @@ -1144,12 +11602,82 @@ define <4 x i32> @extmul_low_u_v4i32(<8 x i16> %v1, <8 x i16> %v2) { ret <4 x i32> %a } -; CHECK-LABEL: extmul_high_u_v4i32: -; NO-SIMD128-NOT: i32x4 -; SIMD128-NEXT: .functype extmul_high_u_v4i32 (v128, v128) -> (v128){{$}} -; SIMD128-SLOW-NEXT: i32x4.extmul_high_i16x8_u $push[[R:[0-9]+]]=, $0, $1{{$}} -; SIMD128-SLOW-NEXT: return $pop[[R]]{{$}} define <4 x i32> @extmul_high_u_v4i32(<8 x i16> %v1, <8 x i16> %v2) { +; SIMD128-LABEL: extmul_high_u_v4i32: +; SIMD128: .functype extmul_high_u_v4i32 (v128, v128) -> (v128) +; SIMD128-NEXT: # %bb.0: +; SIMD128-NEXT: i32x4.extmul_high_i16x8_u $push0=, $0, $1 +; SIMD128-NEXT: return $pop0 +; +; SIMD128-FAST-LABEL: extmul_high_u_v4i32: +; SIMD128-FAST: .functype extmul_high_u_v4i32 (v128, v128) -> (v128) +; SIMD128-FAST-NEXT: # %bb.0: +; SIMD128-FAST-NEXT: i32x4.extend_high_i16x8_u $push0=, $0 +; SIMD128-FAST-NEXT: i32x4.extend_high_i16x8_u $push1=, $1 +; SIMD128-FAST-NEXT: i32x4.mul $push2=, $pop0, $pop1 +; SIMD128-FAST-NEXT: return $pop2 +; +; NO-SIMD128-LABEL: extmul_high_u_v4i32: +; NO-SIMD128: .functype extmul_high_u_v4i32 (i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32) -> () +; NO-SIMD128-NEXT: # %bb.0: +; NO-SIMD128-NEXT: i32.const $push0=, 65535 +; NO-SIMD128-NEXT: i32.and $push2=, $7, $pop0 +; NO-SIMD128-NEXT: i32.const $push21=, 65535 +; NO-SIMD128-NEXT: i32.and $push1=, $15, $pop21 +; NO-SIMD128-NEXT: i32.mul $push3=, $pop2, $pop1 +; NO-SIMD128-NEXT: i32.store 8($0), $pop3 +; NO-SIMD128-NEXT: i32.const $push20=, 65535 +; NO-SIMD128-NEXT: i32.and $push5=, $6, $pop20 +; NO-SIMD128-NEXT: i32.const $push19=, 65535 +; NO-SIMD128-NEXT: i32.and $push4=, $14, $pop19 +; NO-SIMD128-NEXT: i32.mul $push6=, $pop5, $pop4 +; NO-SIMD128-NEXT: i32.store 4($0), $pop6 +; NO-SIMD128-NEXT: i32.const $push18=, 65535 +; NO-SIMD128-NEXT: i32.and $push8=, $5, $pop18 +; NO-SIMD128-NEXT: i32.const $push17=, 65535 +; NO-SIMD128-NEXT: i32.and $push7=, $13, $pop17 +; NO-SIMD128-NEXT: i32.mul $push9=, $pop8, $pop7 +; NO-SIMD128-NEXT: i32.store 0($0), $pop9 +; NO-SIMD128-NEXT: i32.const $push13=, 12 +; NO-SIMD128-NEXT: i32.add $push14=, $0, $pop13 +; NO-SIMD128-NEXT: i32.const $push16=, 65535 +; NO-SIMD128-NEXT: i32.and $push11=, $8, $pop16 +; NO-SIMD128-NEXT: i32.const $push15=, 65535 +; NO-SIMD128-NEXT: i32.and $push10=, $16, $pop15 +; NO-SIMD128-NEXT: i32.mul $push12=, $pop11, $pop10 +; NO-SIMD128-NEXT: i32.store 0($pop14), $pop12 +; NO-SIMD128-NEXT: return +; +; NO-SIMD128-FAST-LABEL: extmul_high_u_v4i32: +; NO-SIMD128-FAST: .functype extmul_high_u_v4i32 (i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32) -> () +; NO-SIMD128-FAST-NEXT: # %bb.0: +; NO-SIMD128-FAST-NEXT: i32.const $push0=, 65535 +; NO-SIMD128-FAST-NEXT: i32.and $push2=, $5, $pop0 +; NO-SIMD128-FAST-NEXT: i32.const $push21=, 65535 +; NO-SIMD128-FAST-NEXT: i32.and $push1=, $13, $pop21 +; NO-SIMD128-FAST-NEXT: i32.mul $push3=, $pop2, $pop1 +; NO-SIMD128-FAST-NEXT: i32.store 0($0), $pop3 +; NO-SIMD128-FAST-NEXT: i32.const $push20=, 65535 +; NO-SIMD128-FAST-NEXT: i32.and $push5=, $6, $pop20 +; NO-SIMD128-FAST-NEXT: i32.const $push19=, 65535 +; NO-SIMD128-FAST-NEXT: i32.and $push4=, $14, $pop19 +; NO-SIMD128-FAST-NEXT: i32.mul $push6=, $pop5, $pop4 +; NO-SIMD128-FAST-NEXT: i32.store 4($0), $pop6 +; NO-SIMD128-FAST-NEXT: i32.const $push18=, 65535 +; NO-SIMD128-FAST-NEXT: i32.and $push8=, $7, $pop18 +; NO-SIMD128-FAST-NEXT: i32.const $push17=, 65535 +; NO-SIMD128-FAST-NEXT: i32.and $push7=, $15, $pop17 +; NO-SIMD128-FAST-NEXT: i32.mul $push9=, $pop8, $pop7 +; NO-SIMD128-FAST-NEXT: i32.store 8($0), $pop9 +; NO-SIMD128-FAST-NEXT: i32.const $push13=, 12 +; NO-SIMD128-FAST-NEXT: i32.add $push14=, $0, $pop13 +; NO-SIMD128-FAST-NEXT: i32.const $push16=, 65535 +; NO-SIMD128-FAST-NEXT: i32.and $push11=, $8, $pop16 +; NO-SIMD128-FAST-NEXT: i32.const $push15=, 65535 +; NO-SIMD128-FAST-NEXT: i32.and $push10=, $16, $pop15 +; NO-SIMD128-FAST-NEXT: i32.mul $push12=, $pop11, $pop10 +; NO-SIMD128-FAST-NEXT: i32.store 0($pop14), $pop12 +; NO-SIMD128-FAST-NEXT: return %high1 = shufflevector <8 x i16> %v1, <8 x i16> undef, <4 x i32> %high2 = shufflevector <8 x i16> %v2, <8 x i16> undef, @@ -1163,64 +11691,232 @@ define <4 x i32> @extmul_high_u_v4i32(<8 x i16> %v1, <8 x i16> %v2) { ; ============================================================================== ; 2 x i64 ; ============================================================================== -; CHECK-LABEL: add_v2i64: -; NO-SIMD128-NOT: i64x2 -; SIMD128-NEXT: .functype add_v2i64 (v128, v128) -> (v128){{$}} -; SIMD128-NEXT: i64x2.add $push[[R:[0-9]+]]=, $0, $1{{$}} -; SIMD128-NEXT: return $pop[[R]]{{$}} define <2 x i64> @add_v2i64(<2 x i64> %x, <2 x i64> %y) { +; SIMD128-LABEL: add_v2i64: +; SIMD128: .functype add_v2i64 (v128, v128) -> (v128) +; SIMD128-NEXT: # %bb.0: +; SIMD128-NEXT: i64x2.add $push0=, $0, $1 +; SIMD128-NEXT: return $pop0 +; +; SIMD128-FAST-LABEL: add_v2i64: +; SIMD128-FAST: .functype add_v2i64 (v128, v128) -> (v128) +; SIMD128-FAST-NEXT: # %bb.0: +; SIMD128-FAST-NEXT: i64x2.add $push0=, $0, $1 +; SIMD128-FAST-NEXT: return $pop0 +; +; NO-SIMD128-LABEL: add_v2i64: +; NO-SIMD128: .functype add_v2i64 (i32, i64, i64, i64, i64) -> () +; NO-SIMD128-NEXT: # %bb.0: +; NO-SIMD128-NEXT: i64.add $push0=, $2, $4 +; NO-SIMD128-NEXT: i64.store 8($0), $pop0 +; NO-SIMD128-NEXT: i64.add $push1=, $1, $3 +; NO-SIMD128-NEXT: i64.store 0($0), $pop1 +; NO-SIMD128-NEXT: return +; +; NO-SIMD128-FAST-LABEL: add_v2i64: +; NO-SIMD128-FAST: .functype add_v2i64 (i32, i64, i64, i64, i64) -> () +; NO-SIMD128-FAST-NEXT: # %bb.0: +; NO-SIMD128-FAST-NEXT: i64.add $push0=, $1, $3 +; NO-SIMD128-FAST-NEXT: i64.store 0($0), $pop0 +; NO-SIMD128-FAST-NEXT: i64.add $push1=, $2, $4 +; NO-SIMD128-FAST-NEXT: i64.store 8($0), $pop1 +; NO-SIMD128-FAST-NEXT: return %a = add <2 x i64> %x, %y ret <2 x i64> %a } -; CHECK-LABEL: sub_v2i64: -; NO-SIMD128-NOT: i64x2 -; SIMD128-NEXT: .functype sub_v2i64 (v128, v128) -> (v128){{$}} -; SIMD128-NEXT: i64x2.sub $push[[R:[0-9]+]]=, $0, $1{{$}} -; SIMD128-NEXT: return $pop[[R]]{{$}} define <2 x i64> @sub_v2i64(<2 x i64> %x, <2 x i64> %y) { +; SIMD128-LABEL: sub_v2i64: +; SIMD128: .functype sub_v2i64 (v128, v128) -> (v128) +; SIMD128-NEXT: # %bb.0: +; SIMD128-NEXT: i64x2.sub $push0=, $0, $1 +; SIMD128-NEXT: return $pop0 +; +; SIMD128-FAST-LABEL: sub_v2i64: +; SIMD128-FAST: .functype sub_v2i64 (v128, v128) -> (v128) +; SIMD128-FAST-NEXT: # %bb.0: +; SIMD128-FAST-NEXT: i64x2.sub $push0=, $0, $1 +; SIMD128-FAST-NEXT: return $pop0 +; +; NO-SIMD128-LABEL: sub_v2i64: +; NO-SIMD128: .functype sub_v2i64 (i32, i64, i64, i64, i64) -> () +; NO-SIMD128-NEXT: # %bb.0: +; NO-SIMD128-NEXT: i64.sub $push0=, $2, $4 +; NO-SIMD128-NEXT: i64.store 8($0), $pop0 +; NO-SIMD128-NEXT: i64.sub $push1=, $1, $3 +; NO-SIMD128-NEXT: i64.store 0($0), $pop1 +; NO-SIMD128-NEXT: return +; +; NO-SIMD128-FAST-LABEL: sub_v2i64: +; NO-SIMD128-FAST: .functype sub_v2i64 (i32, i64, i64, i64, i64) -> () +; NO-SIMD128-FAST-NEXT: # %bb.0: +; NO-SIMD128-FAST-NEXT: i64.sub $push0=, $1, $3 +; NO-SIMD128-FAST-NEXT: i64.store 0($0), $pop0 +; NO-SIMD128-FAST-NEXT: i64.sub $push1=, $2, $4 +; NO-SIMD128-FAST-NEXT: i64.store 8($0), $pop1 +; NO-SIMD128-FAST-NEXT: return %a = sub <2 x i64> %x, %y ret <2 x i64> %a } -; CHECK-LABEL: mul_v2i64: -; NO-SIMD128-NOT: i64x2 -; SIMD128-NEXT: .functype mul_v2i64 (v128, v128) -> (v128){{$}} -; SIMD128: i64x2.mul $push[[R:[0-9]+]]=, $0, $1{{$}} -; SIMD128-NEXT: return $pop[[R]]{{$}} define <2 x i64> @mul_v2i64(<2 x i64> %x, <2 x i64> %y) { +; SIMD128-LABEL: mul_v2i64: +; SIMD128: .functype mul_v2i64 (v128, v128) -> (v128) +; SIMD128-NEXT: # %bb.0: +; SIMD128-NEXT: i64x2.mul $push0=, $0, $1 +; SIMD128-NEXT: return $pop0 +; +; SIMD128-FAST-LABEL: mul_v2i64: +; SIMD128-FAST: .functype mul_v2i64 (v128, v128) -> (v128) +; SIMD128-FAST-NEXT: # %bb.0: +; SIMD128-FAST-NEXT: i64x2.mul $push0=, $0, $1 +; SIMD128-FAST-NEXT: return $pop0 +; +; NO-SIMD128-LABEL: mul_v2i64: +; NO-SIMD128: .functype mul_v2i64 (i32, i64, i64, i64, i64) -> () +; NO-SIMD128-NEXT: # %bb.0: +; NO-SIMD128-NEXT: i64.mul $push0=, $2, $4 +; NO-SIMD128-NEXT: i64.store 8($0), $pop0 +; NO-SIMD128-NEXT: i64.mul $push1=, $1, $3 +; NO-SIMD128-NEXT: i64.store 0($0), $pop1 +; NO-SIMD128-NEXT: return +; +; NO-SIMD128-FAST-LABEL: mul_v2i64: +; NO-SIMD128-FAST: .functype mul_v2i64 (i32, i64, i64, i64, i64) -> () +; NO-SIMD128-FAST-NEXT: # %bb.0: +; NO-SIMD128-FAST-NEXT: i64.mul $push0=, $1, $3 +; NO-SIMD128-FAST-NEXT: i64.store 0($0), $pop0 +; NO-SIMD128-FAST-NEXT: i64.mul $push1=, $2, $4 +; NO-SIMD128-FAST-NEXT: i64.store 8($0), $pop1 +; NO-SIMD128-FAST-NEXT: return %a = mul <2 x i64> %x, %y ret <2 x i64> %a } -; CHECK-LABEL: abs_v2i64: -; NO-SIMD128-NOT: i64x2: -; SIMD128-NEXT: .functype abs_v2i64 (v128) -> (v128){{$}} -; SIMD128-NEXT: i64x2.abs $push[[R:[0-9]+]]=, $0{{$}} -; SIMD128-NEXT: return $pop[[R]]{{$}} define <2 x i64> @abs_v2i64(<2 x i64> %x) { +; SIMD128-LABEL: abs_v2i64: +; SIMD128: .functype abs_v2i64 (v128) -> (v128) +; SIMD128-NEXT: # %bb.0: +; SIMD128-NEXT: i64x2.abs $push0=, $0 +; SIMD128-NEXT: return $pop0 +; +; SIMD128-FAST-LABEL: abs_v2i64: +; SIMD128-FAST: .functype abs_v2i64 (v128) -> (v128) +; SIMD128-FAST-NEXT: # %bb.0: +; SIMD128-FAST-NEXT: i64x2.abs $push0=, $0 +; SIMD128-FAST-NEXT: return $pop0 +; +; NO-SIMD128-LABEL: abs_v2i64: +; NO-SIMD128: .functype abs_v2i64 (i32, i64, i64) -> () +; NO-SIMD128-NEXT: # %bb.0: +; NO-SIMD128-NEXT: i64.const $push0=, 63 +; NO-SIMD128-NEXT: i64.shr_s $push9=, $2, $pop0 +; NO-SIMD128-NEXT: local.tee $push8=, $3=, $pop9 +; NO-SIMD128-NEXT: i64.xor $push1=, $2, $pop8 +; NO-SIMD128-NEXT: i64.sub $push2=, $pop1, $3 +; NO-SIMD128-NEXT: i64.store 8($0), $pop2 +; NO-SIMD128-NEXT: i64.const $push7=, 63 +; NO-SIMD128-NEXT: i64.shr_s $push6=, $1, $pop7 +; NO-SIMD128-NEXT: local.tee $push5=, $2=, $pop6 +; NO-SIMD128-NEXT: i64.xor $push3=, $1, $pop5 +; NO-SIMD128-NEXT: i64.sub $push4=, $pop3, $2 +; NO-SIMD128-NEXT: i64.store 0($0), $pop4 +; NO-SIMD128-NEXT: return +; +; NO-SIMD128-FAST-LABEL: abs_v2i64: +; NO-SIMD128-FAST: .functype abs_v2i64 (i32, i64, i64) -> () +; NO-SIMD128-FAST-NEXT: # %bb.0: +; NO-SIMD128-FAST-NEXT: i64.const $push0=, 63 +; NO-SIMD128-FAST-NEXT: i64.shr_s $push9=, $1, $pop0 +; NO-SIMD128-FAST-NEXT: local.tee $push8=, $3=, $pop9 +; NO-SIMD128-FAST-NEXT: i64.xor $push1=, $1, $pop8 +; NO-SIMD128-FAST-NEXT: i64.sub $push2=, $pop1, $3 +; NO-SIMD128-FAST-NEXT: i64.store 0($0), $pop2 +; NO-SIMD128-FAST-NEXT: i64.const $push7=, 63 +; NO-SIMD128-FAST-NEXT: i64.shr_s $push6=, $2, $pop7 +; NO-SIMD128-FAST-NEXT: local.tee $push5=, $1=, $pop6 +; NO-SIMD128-FAST-NEXT: i64.xor $push3=, $2, $pop5 +; NO-SIMD128-FAST-NEXT: i64.sub $push4=, $pop3, $1 +; NO-SIMD128-FAST-NEXT: i64.store 8($0), $pop4 +; NO-SIMD128-FAST-NEXT: return %a = sub <2 x i64> zeroinitializer, %x %b = icmp slt <2 x i64> %x, zeroinitializer %c = select <2 x i1> %b, <2 x i64> %a, <2 x i64> %x ret <2 x i64> %c } -; CHECK-LABEL: neg_v2i64: -; NO-SIMD128-NOT: i64x2 -; SIMD128-NEXT: .functype neg_v2i64 (v128) -> (v128){{$}} -; SIMD128-NEXT: i64x2.neg $push[[R:[0-9]+]]=, $0{{$}} -; SIMD128-NEXT: return $pop[[R]]{{$}} define <2 x i64> @neg_v2i64(<2 x i64> %x) { +; SIMD128-LABEL: neg_v2i64: +; SIMD128: .functype neg_v2i64 (v128) -> (v128) +; SIMD128-NEXT: # %bb.0: +; SIMD128-NEXT: i64x2.neg $push0=, $0 +; SIMD128-NEXT: return $pop0 +; +; SIMD128-FAST-LABEL: neg_v2i64: +; SIMD128-FAST: .functype neg_v2i64 (v128) -> (v128) +; SIMD128-FAST-NEXT: # %bb.0: +; SIMD128-FAST-NEXT: i64x2.neg $push0=, $0 +; SIMD128-FAST-NEXT: return $pop0 +; +; NO-SIMD128-LABEL: neg_v2i64: +; NO-SIMD128: .functype neg_v2i64 (i32, i64, i64) -> () +; NO-SIMD128-NEXT: # %bb.0: +; NO-SIMD128-NEXT: i64.const $push0=, 0 +; NO-SIMD128-NEXT: i64.sub $push1=, $pop0, $2 +; NO-SIMD128-NEXT: i64.store 8($0), $pop1 +; NO-SIMD128-NEXT: i64.const $push3=, 0 +; NO-SIMD128-NEXT: i64.sub $push2=, $pop3, $1 +; NO-SIMD128-NEXT: i64.store 0($0), $pop2 +; NO-SIMD128-NEXT: return +; +; NO-SIMD128-FAST-LABEL: neg_v2i64: +; NO-SIMD128-FAST: .functype neg_v2i64 (i32, i64, i64) -> () +; NO-SIMD128-FAST-NEXT: # %bb.0: +; NO-SIMD128-FAST-NEXT: i64.const $push0=, 0 +; NO-SIMD128-FAST-NEXT: i64.sub $push1=, $pop0, $1 +; NO-SIMD128-FAST-NEXT: i64.store 0($0), $pop1 +; NO-SIMD128-FAST-NEXT: i64.const $push3=, 0 +; NO-SIMD128-FAST-NEXT: i64.sub $push2=, $pop3, $2 +; NO-SIMD128-FAST-NEXT: i64.store 8($0), $pop2 +; NO-SIMD128-FAST-NEXT: return %a = sub <2 x i64> , %x ret <2 x i64> %a } -; CHECK-LABEL: shl_v2i64: -; NO-SIMD128-NOT: i64x2 -; SIMD128-NEXT: .functype shl_v2i64 (v128, i32) -> (v128){{$}} -; SIMD128-NEXT: i64x2.shl $push[[R:[0-9]+]]=, $0, $1{{$}} -; SIMD128-NEXT: return $pop[[R]]{{$}} define <2 x i64> @shl_v2i64(<2 x i64> %v, i32 %x) { +; SIMD128-LABEL: shl_v2i64: +; SIMD128: .functype shl_v2i64 (v128, i32) -> (v128) +; SIMD128-NEXT: # %bb.0: +; SIMD128-NEXT: i64x2.shl $push0=, $0, $1 +; SIMD128-NEXT: return $pop0 +; +; SIMD128-FAST-LABEL: shl_v2i64: +; SIMD128-FAST: .functype shl_v2i64 (v128, i32) -> (v128) +; SIMD128-FAST-NEXT: # %bb.0: +; SIMD128-FAST-NEXT: i64x2.shl $push0=, $0, $1 +; SIMD128-FAST-NEXT: return $pop0 +; +; NO-SIMD128-LABEL: shl_v2i64: +; NO-SIMD128: .functype shl_v2i64 (i32, i64, i64, i32) -> () +; NO-SIMD128-NEXT: # %bb.0: +; NO-SIMD128-NEXT: i64.extend_i32_u $push3=, $3 +; NO-SIMD128-NEXT: local.tee $push2=, $4=, $pop3 +; NO-SIMD128-NEXT: i64.shl $push0=, $2, $pop2 +; NO-SIMD128-NEXT: i64.store 8($0), $pop0 +; NO-SIMD128-NEXT: i64.shl $push1=, $1, $4 +; NO-SIMD128-NEXT: i64.store 0($0), $pop1 +; NO-SIMD128-NEXT: return +; +; NO-SIMD128-FAST-LABEL: shl_v2i64: +; NO-SIMD128-FAST: .functype shl_v2i64 (i32, i64, i64, i32) -> () +; NO-SIMD128-FAST-NEXT: # %bb.0: +; NO-SIMD128-FAST-NEXT: i64.extend_i32_u $push3=, $3 +; NO-SIMD128-FAST-NEXT: local.tee $push2=, $4=, $pop3 +; NO-SIMD128-FAST-NEXT: i64.shl $push0=, $2, $pop2 +; NO-SIMD128-FAST-NEXT: i64.store 8($0), $pop0 +; NO-SIMD128-FAST-NEXT: i64.shl $push1=, $1, $4 +; NO-SIMD128-FAST-NEXT: i64.store 0($0), $pop1 +; NO-SIMD128-FAST-NEXT: return %x2 = zext i32 %x to i64 %t = insertelement <2 x i64> undef, i64 %x2, i32 0 %s = shufflevector <2 x i64> %t, <2 x i64> undef, <2 x i32> @@ -1228,12 +11924,40 @@ define <2 x i64> @shl_v2i64(<2 x i64> %v, i32 %x) { ret <2 x i64> %a } -; CHECK-LABEL: shl_sext_v2i64: -; NO-SIMD128-NOT: i64x2 -; SIMD128-NEXT: .functype shl_sext_v2i64 (v128, i32) -> (v128){{$}} -; SIMD128-NEXT: i64x2.shl $push[[R:[0-9]+]]=, $0, $1{{$}} -; SIMD128-NEXT: return $pop[[R]]{{$}} define <2 x i64> @shl_sext_v2i64(<2 x i64> %v, i32 %x) { +; SIMD128-LABEL: shl_sext_v2i64: +; SIMD128: .functype shl_sext_v2i64 (v128, i32) -> (v128) +; SIMD128-NEXT: # %bb.0: +; SIMD128-NEXT: i64x2.shl $push0=, $0, $1 +; SIMD128-NEXT: return $pop0 +; +; SIMD128-FAST-LABEL: shl_sext_v2i64: +; SIMD128-FAST: .functype shl_sext_v2i64 (v128, i32) -> (v128) +; SIMD128-FAST-NEXT: # %bb.0: +; SIMD128-FAST-NEXT: i64x2.shl $push0=, $0, $1 +; SIMD128-FAST-NEXT: return $pop0 +; +; NO-SIMD128-LABEL: shl_sext_v2i64: +; NO-SIMD128: .functype shl_sext_v2i64 (i32, i64, i64, i32) -> () +; NO-SIMD128-NEXT: # %bb.0: +; NO-SIMD128-NEXT: i64.extend_i32_s $push3=, $3 +; NO-SIMD128-NEXT: local.tee $push2=, $4=, $pop3 +; NO-SIMD128-NEXT: i64.shl $push0=, $2, $pop2 +; NO-SIMD128-NEXT: i64.store 8($0), $pop0 +; NO-SIMD128-NEXT: i64.shl $push1=, $1, $4 +; NO-SIMD128-NEXT: i64.store 0($0), $pop1 +; NO-SIMD128-NEXT: return +; +; NO-SIMD128-FAST-LABEL: shl_sext_v2i64: +; NO-SIMD128-FAST: .functype shl_sext_v2i64 (i32, i64, i64, i32) -> () +; NO-SIMD128-FAST-NEXT: # %bb.0: +; NO-SIMD128-FAST-NEXT: i64.extend_i32_s $push3=, $3 +; NO-SIMD128-FAST-NEXT: local.tee $push2=, $4=, $pop3 +; NO-SIMD128-FAST-NEXT: i64.shl $push0=, $2, $pop2 +; NO-SIMD128-FAST-NEXT: i64.store 8($0), $pop0 +; NO-SIMD128-FAST-NEXT: i64.shl $push1=, $1, $4 +; NO-SIMD128-FAST-NEXT: i64.store 0($0), $pop1 +; NO-SIMD128-FAST-NEXT: return %x2 = sext i32 %x to i64 %t = insertelement <2 x i64> undef, i64 %x2, i32 0 %s = shufflevector <2 x i64> %t, <2 x i64> undef, <2 x i32> @@ -1241,53 +11965,166 @@ define <2 x i64> @shl_sext_v2i64(<2 x i64> %v, i32 %x) { ret <2 x i64> %a } -; CHECK-LABEL: shl_noext_v2i64: -; NO-SIMD128-NOT: i64x2 -; SIMD128-NEXT: .functype shl_noext_v2i64 (v128, i64) -> (v128){{$}} -; SIMD128-NEXT: i32.wrap_i64 $push[[L0:[0-9]+]]=, $1{{$}} -; SIMD128-NEXT: i64x2.shl $push[[R:[0-9]+]]=, $0, $pop[[L0]]{{$}} -; SIMD128-NEXT: return $pop[[R]]{{$}} define <2 x i64> @shl_noext_v2i64(<2 x i64> %v, i64 %x) { +; SIMD128-LABEL: shl_noext_v2i64: +; SIMD128: .functype shl_noext_v2i64 (v128, i64) -> (v128) +; SIMD128-NEXT: # %bb.0: +; SIMD128-NEXT: i32.wrap_i64 $push0=, $1 +; SIMD128-NEXT: i64x2.shl $push1=, $0, $pop0 +; SIMD128-NEXT: return $pop1 +; +; SIMD128-FAST-LABEL: shl_noext_v2i64: +; SIMD128-FAST: .functype shl_noext_v2i64 (v128, i64) -> (v128) +; SIMD128-FAST-NEXT: # %bb.0: +; SIMD128-FAST-NEXT: i32.wrap_i64 $push1=, $1 +; SIMD128-FAST-NEXT: i64x2.shl $push0=, $0, $pop1 +; SIMD128-FAST-NEXT: return $pop0 +; +; NO-SIMD128-LABEL: shl_noext_v2i64: +; NO-SIMD128: .functype shl_noext_v2i64 (i32, i64, i64, i64) -> () +; NO-SIMD128-NEXT: # %bb.0: +; NO-SIMD128-NEXT: i64.shl $push0=, $2, $3 +; NO-SIMD128-NEXT: i64.store 8($0), $pop0 +; NO-SIMD128-NEXT: i64.shl $push1=, $1, $3 +; NO-SIMD128-NEXT: i64.store 0($0), $pop1 +; NO-SIMD128-NEXT: return +; +; NO-SIMD128-FAST-LABEL: shl_noext_v2i64: +; NO-SIMD128-FAST: .functype shl_noext_v2i64 (i32, i64, i64, i64) -> () +; NO-SIMD128-FAST-NEXT: # %bb.0: +; NO-SIMD128-FAST-NEXT: i64.shl $push0=, $1, $3 +; NO-SIMD128-FAST-NEXT: i64.store 0($0), $pop0 +; NO-SIMD128-FAST-NEXT: i64.shl $push1=, $2, $3 +; NO-SIMD128-FAST-NEXT: i64.store 8($0), $pop1 +; NO-SIMD128-FAST-NEXT: return %t = insertelement <2 x i64> undef, i64 %x, i32 0 %s = shufflevector <2 x i64> %t, <2 x i64> undef, <2 x i32> %a = shl <2 x i64> %v, %s ret <2 x i64> %a } -; CHECK-LABEL: shl_const_v2i64: -; NO-SIMD128-NOT: i64x2 -; SIMD128-NEXT: .functype shl_const_v2i64 (v128) -> (v128){{$}} -; SIMD128-NEXT: i32.const $push[[L0:[0-9]+]]=, 5{{$}} -; SIMD128-NEXT: i64x2.shl $push[[R:[0-9]+]]=, $0, $pop[[L0]]{{$}} -; SIMD128-NEXT: return $pop[[R]]{{$}} define <2 x i64> @shl_const_v2i64(<2 x i64> %v) { +; SIMD128-LABEL: shl_const_v2i64: +; SIMD128: .functype shl_const_v2i64 (v128) -> (v128) +; SIMD128-NEXT: # %bb.0: +; SIMD128-NEXT: i32.const $push0=, 5 +; SIMD128-NEXT: i64x2.shl $push1=, $0, $pop0 +; SIMD128-NEXT: return $pop1 +; +; SIMD128-FAST-LABEL: shl_const_v2i64: +; SIMD128-FAST: .functype shl_const_v2i64 (v128) -> (v128) +; SIMD128-FAST-NEXT: # %bb.0: +; SIMD128-FAST-NEXT: i32.const $push1=, 5 +; SIMD128-FAST-NEXT: i64x2.shl $push0=, $0, $pop1 +; SIMD128-FAST-NEXT: return $pop0 +; +; NO-SIMD128-LABEL: shl_const_v2i64: +; NO-SIMD128: .functype shl_const_v2i64 (i32, i64, i64) -> () +; NO-SIMD128-NEXT: # %bb.0: +; NO-SIMD128-NEXT: i64.const $push0=, 5 +; NO-SIMD128-NEXT: i64.shl $push1=, $2, $pop0 +; NO-SIMD128-NEXT: i64.store 8($0), $pop1 +; NO-SIMD128-NEXT: i64.const $push3=, 5 +; NO-SIMD128-NEXT: i64.shl $push2=, $1, $pop3 +; NO-SIMD128-NEXT: i64.store 0($0), $pop2 +; NO-SIMD128-NEXT: return +; +; NO-SIMD128-FAST-LABEL: shl_const_v2i64: +; NO-SIMD128-FAST: .functype shl_const_v2i64 (i32, i64, i64) -> () +; NO-SIMD128-FAST-NEXT: # %bb.0: +; NO-SIMD128-FAST-NEXT: i64.const $push0=, 5 +; NO-SIMD128-FAST-NEXT: i64.shl $push1=, $1, $pop0 +; NO-SIMD128-FAST-NEXT: i64.store 0($0), $pop1 +; NO-SIMD128-FAST-NEXT: i64.const $push3=, 5 +; NO-SIMD128-FAST-NEXT: i64.shl $push2=, $2, $pop3 +; NO-SIMD128-FAST-NEXT: i64.store 8($0), $pop2 +; NO-SIMD128-FAST-NEXT: return %a = shl <2 x i64> %v, ret <2 x i64> %a } -; CHECK-LABEL: shl_vec_v2i64: -; NO-SIMD128-NOT: i64x2 -; SIMD128-NEXT: .functype shl_vec_v2i64 (v128, v128) -> (v128){{$}} -; SIMD128-NEXT: i64x2.extract_lane $push[[L0:[0-9]+]]=, $0, 0{{$}} -; SIMD128-NEXT: i64x2.extract_lane $push[[L1:[0-9]+]]=, $1, 0{{$}} -; SIMD128-NEXT: i64.shl $push[[L2:[0-9]+]]=, $pop[[L0]], $pop[[L1]]{{$}} -; SIMD128-NEXT: i64x2.splat $push[[L3:[0-9]+]]=, $pop[[L2]]{{$}} -; SIMD128-NEXT: i64x2.extract_lane $push[[L4:[0-9]+]]=, $0, 1{{$}} -; SIMD128-NEXT: i64x2.extract_lane $push[[L5:[0-9]+]]=, $1, 1{{$}} -; SIMD128-NEXT: i64.shl $push[[L6:[0-9]+]]=, $pop[[L4]], $pop[[L5]]{{$}} -; SIMD128-NEXT: i64x2.replace_lane $push[[R:[0-9]+]]=, $pop[[L3]], 1, $pop[[L6]]{{$}} -; SIMD128-NEXT: return $pop[[R]]{{$}} define <2 x i64> @shl_vec_v2i64(<2 x i64> %v, <2 x i64> %x) { +; SIMD128-LABEL: shl_vec_v2i64: +; SIMD128: .functype shl_vec_v2i64 (v128, v128) -> (v128) +; SIMD128-NEXT: # %bb.0: +; SIMD128-NEXT: i64x2.extract_lane $push4=, $0, 0 +; SIMD128-NEXT: i64x2.extract_lane $push3=, $1, 0 +; SIMD128-NEXT: i64.shl $push5=, $pop4, $pop3 +; SIMD128-NEXT: i64x2.splat $push6=, $pop5 +; SIMD128-NEXT: i64x2.extract_lane $push1=, $0, 1 +; SIMD128-NEXT: i64x2.extract_lane $push0=, $1, 1 +; SIMD128-NEXT: i64.shl $push2=, $pop1, $pop0 +; SIMD128-NEXT: i64x2.replace_lane $push7=, $pop6, 1, $pop2 +; SIMD128-NEXT: return $pop7 +; +; SIMD128-FAST-LABEL: shl_vec_v2i64: +; SIMD128-FAST: .functype shl_vec_v2i64 (v128, v128) -> (v128) +; SIMD128-FAST-NEXT: # %bb.0: +; SIMD128-FAST-NEXT: i64x2.extract_lane $push5=, $0, 0 +; SIMD128-FAST-NEXT: i64x2.extract_lane $push4=, $1, 0 +; SIMD128-FAST-NEXT: i64.shl $push6=, $pop5, $pop4 +; SIMD128-FAST-NEXT: i64x2.splat $push7=, $pop6 +; SIMD128-FAST-NEXT: i64x2.extract_lane $push2=, $0, 1 +; SIMD128-FAST-NEXT: i64x2.extract_lane $push1=, $1, 1 +; SIMD128-FAST-NEXT: i64.shl $push3=, $pop2, $pop1 +; SIMD128-FAST-NEXT: i64x2.replace_lane $push0=, $pop7, 1, $pop3 +; SIMD128-FAST-NEXT: return $pop0 +; +; NO-SIMD128-LABEL: shl_vec_v2i64: +; NO-SIMD128: .functype shl_vec_v2i64 (i32, i64, i64, i64, i64) -> () +; NO-SIMD128-NEXT: # %bb.0: +; NO-SIMD128-NEXT: i64.shl $push0=, $2, $4 +; NO-SIMD128-NEXT: i64.store 8($0), $pop0 +; NO-SIMD128-NEXT: i64.shl $push1=, $1, $3 +; NO-SIMD128-NEXT: i64.store 0($0), $pop1 +; NO-SIMD128-NEXT: return +; +; NO-SIMD128-FAST-LABEL: shl_vec_v2i64: +; NO-SIMD128-FAST: .functype shl_vec_v2i64 (i32, i64, i64, i64, i64) -> () +; NO-SIMD128-FAST-NEXT: # %bb.0: +; NO-SIMD128-FAST-NEXT: i64.shl $push0=, $1, $3 +; NO-SIMD128-FAST-NEXT: i64.store 0($0), $pop0 +; NO-SIMD128-FAST-NEXT: i64.shl $push1=, $2, $4 +; NO-SIMD128-FAST-NEXT: i64.store 8($0), $pop1 +; NO-SIMD128-FAST-NEXT: return %a = shl <2 x i64> %v, %x ret <2 x i64> %a } -; CHECK-LABEL: shr_s_v2i64: -; NO-SIMD128-NOT: i64x2 -; SIMD128-NEXT: .functype shr_s_v2i64 (v128, i32) -> (v128){{$}} -; SIMD128-NEXT: i64x2.shr_s $push[[R:[0-9]+]]=, $0, $1{{$}} -; SIMD128-NEXT: return $pop[[R]]{{$}} define <2 x i64> @shr_s_v2i64(<2 x i64> %v, i32 %x) { +; SIMD128-LABEL: shr_s_v2i64: +; SIMD128: .functype shr_s_v2i64 (v128, i32) -> (v128) +; SIMD128-NEXT: # %bb.0: +; SIMD128-NEXT: i64x2.shr_s $push0=, $0, $1 +; SIMD128-NEXT: return $pop0 +; +; SIMD128-FAST-LABEL: shr_s_v2i64: +; SIMD128-FAST: .functype shr_s_v2i64 (v128, i32) -> (v128) +; SIMD128-FAST-NEXT: # %bb.0: +; SIMD128-FAST-NEXT: i64x2.shr_s $push0=, $0, $1 +; SIMD128-FAST-NEXT: return $pop0 +; +; NO-SIMD128-LABEL: shr_s_v2i64: +; NO-SIMD128: .functype shr_s_v2i64 (i32, i64, i64, i32) -> () +; NO-SIMD128-NEXT: # %bb.0: +; NO-SIMD128-NEXT: i64.extend_i32_u $push3=, $3 +; NO-SIMD128-NEXT: local.tee $push2=, $4=, $pop3 +; NO-SIMD128-NEXT: i64.shr_s $push0=, $2, $pop2 +; NO-SIMD128-NEXT: i64.store 8($0), $pop0 +; NO-SIMD128-NEXT: i64.shr_s $push1=, $1, $4 +; NO-SIMD128-NEXT: i64.store 0($0), $pop1 +; NO-SIMD128-NEXT: return +; +; NO-SIMD128-FAST-LABEL: shr_s_v2i64: +; NO-SIMD128-FAST: .functype shr_s_v2i64 (i32, i64, i64, i32) -> () +; NO-SIMD128-FAST-NEXT: # %bb.0: +; NO-SIMD128-FAST-NEXT: i64.extend_i32_u $push3=, $3 +; NO-SIMD128-FAST-NEXT: local.tee $push2=, $4=, $pop3 +; NO-SIMD128-FAST-NEXT: i64.shr_s $push0=, $2, $pop2 +; NO-SIMD128-FAST-NEXT: i64.store 8($0), $pop0 +; NO-SIMD128-FAST-NEXT: i64.shr_s $push1=, $1, $4 +; NO-SIMD128-FAST-NEXT: i64.store 0($0), $pop1 +; NO-SIMD128-FAST-NEXT: return %x2 = zext i32 %x to i64 %t = insertelement <2 x i64> undef, i64 %x2, i32 0 %s = shufflevector <2 x i64> %t, <2 x i64> undef, <2 x i32> @@ -1295,12 +12132,40 @@ define <2 x i64> @shr_s_v2i64(<2 x i64> %v, i32 %x) { ret <2 x i64> %a } -; CHECK-LABEL: shr_s_sext_v2i64: -; NO-SIMD128-NOT: i64x2 -; SIMD128-NEXT: .functype shr_s_sext_v2i64 (v128, i32) -> (v128){{$}} -; SIMD128-NEXT: i64x2.shr_s $push[[R:[0-9]+]]=, $0, $1{{$}} -; SIMD128-NEXT: return $pop[[R]]{{$}} define <2 x i64> @shr_s_sext_v2i64(<2 x i64> %v, i32 %x) { +; SIMD128-LABEL: shr_s_sext_v2i64: +; SIMD128: .functype shr_s_sext_v2i64 (v128, i32) -> (v128) +; SIMD128-NEXT: # %bb.0: +; SIMD128-NEXT: i64x2.shr_s $push0=, $0, $1 +; SIMD128-NEXT: return $pop0 +; +; SIMD128-FAST-LABEL: shr_s_sext_v2i64: +; SIMD128-FAST: .functype shr_s_sext_v2i64 (v128, i32) -> (v128) +; SIMD128-FAST-NEXT: # %bb.0: +; SIMD128-FAST-NEXT: i64x2.shr_s $push0=, $0, $1 +; SIMD128-FAST-NEXT: return $pop0 +; +; NO-SIMD128-LABEL: shr_s_sext_v2i64: +; NO-SIMD128: .functype shr_s_sext_v2i64 (i32, i64, i64, i32) -> () +; NO-SIMD128-NEXT: # %bb.0: +; NO-SIMD128-NEXT: i64.extend_i32_s $push3=, $3 +; NO-SIMD128-NEXT: local.tee $push2=, $4=, $pop3 +; NO-SIMD128-NEXT: i64.shr_s $push0=, $2, $pop2 +; NO-SIMD128-NEXT: i64.store 8($0), $pop0 +; NO-SIMD128-NEXT: i64.shr_s $push1=, $1, $4 +; NO-SIMD128-NEXT: i64.store 0($0), $pop1 +; NO-SIMD128-NEXT: return +; +; NO-SIMD128-FAST-LABEL: shr_s_sext_v2i64: +; NO-SIMD128-FAST: .functype shr_s_sext_v2i64 (i32, i64, i64, i32) -> () +; NO-SIMD128-FAST-NEXT: # %bb.0: +; NO-SIMD128-FAST-NEXT: i64.extend_i32_s $push3=, $3 +; NO-SIMD128-FAST-NEXT: local.tee $push2=, $4=, $pop3 +; NO-SIMD128-FAST-NEXT: i64.shr_s $push0=, $2, $pop2 +; NO-SIMD128-FAST-NEXT: i64.store 8($0), $pop0 +; NO-SIMD128-FAST-NEXT: i64.shr_s $push1=, $1, $4 +; NO-SIMD128-FAST-NEXT: i64.store 0($0), $pop1 +; NO-SIMD128-FAST-NEXT: return %x2 = sext i32 %x to i64 %t = insertelement <2 x i64> undef, i64 %x2, i32 0 %s = shufflevector <2 x i64> %t, <2 x i64> undef, <2 x i32> @@ -1308,53 +12173,166 @@ define <2 x i64> @shr_s_sext_v2i64(<2 x i64> %v, i32 %x) { ret <2 x i64> %a } -; CHECK-LABEL: shr_s_noext_v2i64: -; NO-SIMD128-NOT: i64x2 -; SIMD128-NEXT: .functype shr_s_noext_v2i64 (v128, i64) -> (v128){{$}} -; SIMD128-NEXT: i32.wrap_i64 $push[[L0:[0-9]+]]=, $1{{$}} -; SIMD128-NEXT: i64x2.shr_s $push[[R:[0-9]+]]=, $0, $pop[[L0]]{{$}} -; SIMD128-NEXT: return $pop[[R]]{{$}} define <2 x i64> @shr_s_noext_v2i64(<2 x i64> %v, i64 %x) { +; SIMD128-LABEL: shr_s_noext_v2i64: +; SIMD128: .functype shr_s_noext_v2i64 (v128, i64) -> (v128) +; SIMD128-NEXT: # %bb.0: +; SIMD128-NEXT: i32.wrap_i64 $push0=, $1 +; SIMD128-NEXT: i64x2.shr_s $push1=, $0, $pop0 +; SIMD128-NEXT: return $pop1 +; +; SIMD128-FAST-LABEL: shr_s_noext_v2i64: +; SIMD128-FAST: .functype shr_s_noext_v2i64 (v128, i64) -> (v128) +; SIMD128-FAST-NEXT: # %bb.0: +; SIMD128-FAST-NEXT: i32.wrap_i64 $push1=, $1 +; SIMD128-FAST-NEXT: i64x2.shr_s $push0=, $0, $pop1 +; SIMD128-FAST-NEXT: return $pop0 +; +; NO-SIMD128-LABEL: shr_s_noext_v2i64: +; NO-SIMD128: .functype shr_s_noext_v2i64 (i32, i64, i64, i64) -> () +; NO-SIMD128-NEXT: # %bb.0: +; NO-SIMD128-NEXT: i64.shr_s $push0=, $2, $3 +; NO-SIMD128-NEXT: i64.store 8($0), $pop0 +; NO-SIMD128-NEXT: i64.shr_s $push1=, $1, $3 +; NO-SIMD128-NEXT: i64.store 0($0), $pop1 +; NO-SIMD128-NEXT: return +; +; NO-SIMD128-FAST-LABEL: shr_s_noext_v2i64: +; NO-SIMD128-FAST: .functype shr_s_noext_v2i64 (i32, i64, i64, i64) -> () +; NO-SIMD128-FAST-NEXT: # %bb.0: +; NO-SIMD128-FAST-NEXT: i64.shr_s $push0=, $1, $3 +; NO-SIMD128-FAST-NEXT: i64.store 0($0), $pop0 +; NO-SIMD128-FAST-NEXT: i64.shr_s $push1=, $2, $3 +; NO-SIMD128-FAST-NEXT: i64.store 8($0), $pop1 +; NO-SIMD128-FAST-NEXT: return %t = insertelement <2 x i64> undef, i64 %x, i32 0 %s = shufflevector <2 x i64> %t, <2 x i64> undef, <2 x i32> %a = ashr <2 x i64> %v, %s ret <2 x i64> %a } -; CHECK-LABEL: shr_s_const_v2i64: -; NO-SIMD128-NOT: i64x2 -; SIMD128-NEXT: .functype shr_s_const_v2i64 (v128) -> (v128){{$}} -; SIMD128-NEXT: i32.const $push[[L0:[0-9]+]]=, 5{{$}} -; SIMD128-NEXT: i64x2.shr_s $push[[R:[0-9]+]]=, $0, $pop[[L0]]{{$}} -; SIMD128-NEXT: return $pop[[R]]{{$}} define <2 x i64> @shr_s_const_v2i64(<2 x i64> %v) { +; SIMD128-LABEL: shr_s_const_v2i64: +; SIMD128: .functype shr_s_const_v2i64 (v128) -> (v128) +; SIMD128-NEXT: # %bb.0: +; SIMD128-NEXT: i32.const $push0=, 5 +; SIMD128-NEXT: i64x2.shr_s $push1=, $0, $pop0 +; SIMD128-NEXT: return $pop1 +; +; SIMD128-FAST-LABEL: shr_s_const_v2i64: +; SIMD128-FAST: .functype shr_s_const_v2i64 (v128) -> (v128) +; SIMD128-FAST-NEXT: # %bb.0: +; SIMD128-FAST-NEXT: i32.const $push1=, 5 +; SIMD128-FAST-NEXT: i64x2.shr_s $push0=, $0, $pop1 +; SIMD128-FAST-NEXT: return $pop0 +; +; NO-SIMD128-LABEL: shr_s_const_v2i64: +; NO-SIMD128: .functype shr_s_const_v2i64 (i32, i64, i64) -> () +; NO-SIMD128-NEXT: # %bb.0: +; NO-SIMD128-NEXT: i64.const $push0=, 5 +; NO-SIMD128-NEXT: i64.shr_s $push1=, $2, $pop0 +; NO-SIMD128-NEXT: i64.store 8($0), $pop1 +; NO-SIMD128-NEXT: i64.const $push3=, 5 +; NO-SIMD128-NEXT: i64.shr_s $push2=, $1, $pop3 +; NO-SIMD128-NEXT: i64.store 0($0), $pop2 +; NO-SIMD128-NEXT: return +; +; NO-SIMD128-FAST-LABEL: shr_s_const_v2i64: +; NO-SIMD128-FAST: .functype shr_s_const_v2i64 (i32, i64, i64) -> () +; NO-SIMD128-FAST-NEXT: # %bb.0: +; NO-SIMD128-FAST-NEXT: i64.const $push0=, 5 +; NO-SIMD128-FAST-NEXT: i64.shr_s $push1=, $1, $pop0 +; NO-SIMD128-FAST-NEXT: i64.store 0($0), $pop1 +; NO-SIMD128-FAST-NEXT: i64.const $push3=, 5 +; NO-SIMD128-FAST-NEXT: i64.shr_s $push2=, $2, $pop3 +; NO-SIMD128-FAST-NEXT: i64.store 8($0), $pop2 +; NO-SIMD128-FAST-NEXT: return %a = ashr <2 x i64> %v, ret <2 x i64> %a } -; CHECK-LABEL: shr_s_vec_v2i64: -; NO-SIMD128-NOT: i64x2 -; SIMD128-NEXT: .functype shr_s_vec_v2i64 (v128, v128) -> (v128){{$}} -; SIMD128-NEXT: i64x2.extract_lane $push[[L0:[0-9]+]]=, $0, 0{{$}} -; SIMD128-NEXT: i64x2.extract_lane $push[[L1:[0-9]+]]=, $1, 0{{$}} -; SIMD128-NEXT: i64.shr_s $push[[L2:[0-9]+]]=, $pop[[L0]], $pop[[L1]]{{$}} -; SIMD128-NEXT: i64x2.splat $push[[L3:[0-9]+]]=, $pop[[L2]]{{$}} -; SIMD128-NEXT: i64x2.extract_lane $push[[L4:[0-9]+]]=, $0, 1{{$}} -; SIMD128-NEXT: i64x2.extract_lane $push[[L5:[0-9]+]]=, $1, 1{{$}} -; SIMD128-NEXT: i64.shr_s $push[[L6:[0-9]+]]=, $pop[[L4]], $pop[[L5]]{{$}} -; SIMD128-NEXT: i64x2.replace_lane $push[[R:[0-9]+]]=, $pop[[L3]], 1, $pop[[L6]]{{$}} -; SIMD128-NEXT: return $pop[[R]]{{$}} define <2 x i64> @shr_s_vec_v2i64(<2 x i64> %v, <2 x i64> %x) { +; SIMD128-LABEL: shr_s_vec_v2i64: +; SIMD128: .functype shr_s_vec_v2i64 (v128, v128) -> (v128) +; SIMD128-NEXT: # %bb.0: +; SIMD128-NEXT: i64x2.extract_lane $push4=, $0, 0 +; SIMD128-NEXT: i64x2.extract_lane $push3=, $1, 0 +; SIMD128-NEXT: i64.shr_s $push5=, $pop4, $pop3 +; SIMD128-NEXT: i64x2.splat $push6=, $pop5 +; SIMD128-NEXT: i64x2.extract_lane $push1=, $0, 1 +; SIMD128-NEXT: i64x2.extract_lane $push0=, $1, 1 +; SIMD128-NEXT: i64.shr_s $push2=, $pop1, $pop0 +; SIMD128-NEXT: i64x2.replace_lane $push7=, $pop6, 1, $pop2 +; SIMD128-NEXT: return $pop7 +; +; SIMD128-FAST-LABEL: shr_s_vec_v2i64: +; SIMD128-FAST: .functype shr_s_vec_v2i64 (v128, v128) -> (v128) +; SIMD128-FAST-NEXT: # %bb.0: +; SIMD128-FAST-NEXT: i64x2.extract_lane $push5=, $0, 0 +; SIMD128-FAST-NEXT: i64x2.extract_lane $push4=, $1, 0 +; SIMD128-FAST-NEXT: i64.shr_s $push6=, $pop5, $pop4 +; SIMD128-FAST-NEXT: i64x2.splat $push7=, $pop6 +; SIMD128-FAST-NEXT: i64x2.extract_lane $push2=, $0, 1 +; SIMD128-FAST-NEXT: i64x2.extract_lane $push1=, $1, 1 +; SIMD128-FAST-NEXT: i64.shr_s $push3=, $pop2, $pop1 +; SIMD128-FAST-NEXT: i64x2.replace_lane $push0=, $pop7, 1, $pop3 +; SIMD128-FAST-NEXT: return $pop0 +; +; NO-SIMD128-LABEL: shr_s_vec_v2i64: +; NO-SIMD128: .functype shr_s_vec_v2i64 (i32, i64, i64, i64, i64) -> () +; NO-SIMD128-NEXT: # %bb.0: +; NO-SIMD128-NEXT: i64.shr_s $push0=, $2, $4 +; NO-SIMD128-NEXT: i64.store 8($0), $pop0 +; NO-SIMD128-NEXT: i64.shr_s $push1=, $1, $3 +; NO-SIMD128-NEXT: i64.store 0($0), $pop1 +; NO-SIMD128-NEXT: return +; +; NO-SIMD128-FAST-LABEL: shr_s_vec_v2i64: +; NO-SIMD128-FAST: .functype shr_s_vec_v2i64 (i32, i64, i64, i64, i64) -> () +; NO-SIMD128-FAST-NEXT: # %bb.0: +; NO-SIMD128-FAST-NEXT: i64.shr_s $push0=, $1, $3 +; NO-SIMD128-FAST-NEXT: i64.store 0($0), $pop0 +; NO-SIMD128-FAST-NEXT: i64.shr_s $push1=, $2, $4 +; NO-SIMD128-FAST-NEXT: i64.store 8($0), $pop1 +; NO-SIMD128-FAST-NEXT: return %a = ashr <2 x i64> %v, %x ret <2 x i64> %a } -; CHECK-LABEL: shr_u_v2i64: -; NO-SIMD128-NOT: i64x2 -; SIMD128-NEXT: .functype shr_u_v2i64 (v128, i32) -> (v128){{$}} -; SIMD128-NEXT: i64x2.shr_u $push[[R:[0-9]+]]=, $0, $1{{$}} -; SIMD128-NEXT: return $pop[[R]]{{$}} define <2 x i64> @shr_u_v2i64(<2 x i64> %v, i32 %x) { +; SIMD128-LABEL: shr_u_v2i64: +; SIMD128: .functype shr_u_v2i64 (v128, i32) -> (v128) +; SIMD128-NEXT: # %bb.0: +; SIMD128-NEXT: i64x2.shr_u $push0=, $0, $1 +; SIMD128-NEXT: return $pop0 +; +; SIMD128-FAST-LABEL: shr_u_v2i64: +; SIMD128-FAST: .functype shr_u_v2i64 (v128, i32) -> (v128) +; SIMD128-FAST-NEXT: # %bb.0: +; SIMD128-FAST-NEXT: i64x2.shr_u $push0=, $0, $1 +; SIMD128-FAST-NEXT: return $pop0 +; +; NO-SIMD128-LABEL: shr_u_v2i64: +; NO-SIMD128: .functype shr_u_v2i64 (i32, i64, i64, i32) -> () +; NO-SIMD128-NEXT: # %bb.0: +; NO-SIMD128-NEXT: i64.extend_i32_u $push3=, $3 +; NO-SIMD128-NEXT: local.tee $push2=, $4=, $pop3 +; NO-SIMD128-NEXT: i64.shr_u $push0=, $2, $pop2 +; NO-SIMD128-NEXT: i64.store 8($0), $pop0 +; NO-SIMD128-NEXT: i64.shr_u $push1=, $1, $4 +; NO-SIMD128-NEXT: i64.store 0($0), $pop1 +; NO-SIMD128-NEXT: return +; +; NO-SIMD128-FAST-LABEL: shr_u_v2i64: +; NO-SIMD128-FAST: .functype shr_u_v2i64 (i32, i64, i64, i32) -> () +; NO-SIMD128-FAST-NEXT: # %bb.0: +; NO-SIMD128-FAST-NEXT: i64.extend_i32_u $push3=, $3 +; NO-SIMD128-FAST-NEXT: local.tee $push2=, $4=, $pop3 +; NO-SIMD128-FAST-NEXT: i64.shr_u $push0=, $2, $pop2 +; NO-SIMD128-FAST-NEXT: i64.store 8($0), $pop0 +; NO-SIMD128-FAST-NEXT: i64.shr_u $push1=, $1, $4 +; NO-SIMD128-FAST-NEXT: i64.store 0($0), $pop1 +; NO-SIMD128-FAST-NEXT: return %x2 = zext i32 %x to i64 %t = insertelement <2 x i64> undef, i64 %x2, i32 0 %s = shufflevector <2 x i64> %t, <2 x i64> undef, <2 x i32> @@ -1362,12 +12340,40 @@ define <2 x i64> @shr_u_v2i64(<2 x i64> %v, i32 %x) { ret <2 x i64> %a } -; CHECK-LABEL: shr_u_sext_v2i64: -; NO-SIMD128-NOT: i64x2 -; SIMD128-NEXT: .functype shr_u_sext_v2i64 (v128, i32) -> (v128){{$}} -; SIMD128-NEXT: i64x2.shr_u $push[[R:[0-9]+]]=, $0, $1{{$}} -; SIMD128-NEXT: return $pop[[R]]{{$}} define <2 x i64> @shr_u_sext_v2i64(<2 x i64> %v, i32 %x) { +; SIMD128-LABEL: shr_u_sext_v2i64: +; SIMD128: .functype shr_u_sext_v2i64 (v128, i32) -> (v128) +; SIMD128-NEXT: # %bb.0: +; SIMD128-NEXT: i64x2.shr_u $push0=, $0, $1 +; SIMD128-NEXT: return $pop0 +; +; SIMD128-FAST-LABEL: shr_u_sext_v2i64: +; SIMD128-FAST: .functype shr_u_sext_v2i64 (v128, i32) -> (v128) +; SIMD128-FAST-NEXT: # %bb.0: +; SIMD128-FAST-NEXT: i64x2.shr_u $push0=, $0, $1 +; SIMD128-FAST-NEXT: return $pop0 +; +; NO-SIMD128-LABEL: shr_u_sext_v2i64: +; NO-SIMD128: .functype shr_u_sext_v2i64 (i32, i64, i64, i32) -> () +; NO-SIMD128-NEXT: # %bb.0: +; NO-SIMD128-NEXT: i64.extend_i32_s $push3=, $3 +; NO-SIMD128-NEXT: local.tee $push2=, $4=, $pop3 +; NO-SIMD128-NEXT: i64.shr_u $push0=, $2, $pop2 +; NO-SIMD128-NEXT: i64.store 8($0), $pop0 +; NO-SIMD128-NEXT: i64.shr_u $push1=, $1, $4 +; NO-SIMD128-NEXT: i64.store 0($0), $pop1 +; NO-SIMD128-NEXT: return +; +; NO-SIMD128-FAST-LABEL: shr_u_sext_v2i64: +; NO-SIMD128-FAST: .functype shr_u_sext_v2i64 (i32, i64, i64, i32) -> () +; NO-SIMD128-FAST-NEXT: # %bb.0: +; NO-SIMD128-FAST-NEXT: i64.extend_i32_s $push3=, $3 +; NO-SIMD128-FAST-NEXT: local.tee $push2=, $4=, $pop3 +; NO-SIMD128-FAST-NEXT: i64.shr_u $push0=, $2, $pop2 +; NO-SIMD128-FAST-NEXT: i64.store 8($0), $pop0 +; NO-SIMD128-FAST-NEXT: i64.shr_u $push1=, $1, $4 +; NO-SIMD128-FAST-NEXT: i64.store 0($0), $pop1 +; NO-SIMD128-FAST-NEXT: return %x2 = sext i32 %x to i64 %t = insertelement <2 x i64> undef, i64 %x2, i32 0 %s = shufflevector <2 x i64> %t, <2 x i64> undef, <2 x i32> @@ -1375,112 +12381,365 @@ define <2 x i64> @shr_u_sext_v2i64(<2 x i64> %v, i32 %x) { ret <2 x i64> %a } -; CHECK-LABEL: shr_u_noext_v2i64: -; NO-SIMD128-NOT: i64x2 -; SIMD128-NEXT: .functype shr_u_noext_v2i64 (v128, i64) -> (v128){{$}} -; SIMD128-NEXT: i32.wrap_i64 $push[[L0:[0-9]+]]=, $1{{$}} -; SIMD128-NEXT: i64x2.shr_u $push[[R:[0-9]+]]=, $0, $pop[[L0]]{{$}} -; SIMD128-NEXT: return $pop[[R]]{{$}} define <2 x i64> @shr_u_noext_v2i64(<2 x i64> %v, i64 %x) { +; SIMD128-LABEL: shr_u_noext_v2i64: +; SIMD128: .functype shr_u_noext_v2i64 (v128, i64) -> (v128) +; SIMD128-NEXT: # %bb.0: +; SIMD128-NEXT: i32.wrap_i64 $push0=, $1 +; SIMD128-NEXT: i64x2.shr_u $push1=, $0, $pop0 +; SIMD128-NEXT: return $pop1 +; +; SIMD128-FAST-LABEL: shr_u_noext_v2i64: +; SIMD128-FAST: .functype shr_u_noext_v2i64 (v128, i64) -> (v128) +; SIMD128-FAST-NEXT: # %bb.0: +; SIMD128-FAST-NEXT: i32.wrap_i64 $push1=, $1 +; SIMD128-FAST-NEXT: i64x2.shr_u $push0=, $0, $pop1 +; SIMD128-FAST-NEXT: return $pop0 +; +; NO-SIMD128-LABEL: shr_u_noext_v2i64: +; NO-SIMD128: .functype shr_u_noext_v2i64 (i32, i64, i64, i64) -> () +; NO-SIMD128-NEXT: # %bb.0: +; NO-SIMD128-NEXT: i64.shr_u $push0=, $2, $3 +; NO-SIMD128-NEXT: i64.store 8($0), $pop0 +; NO-SIMD128-NEXT: i64.shr_u $push1=, $1, $3 +; NO-SIMD128-NEXT: i64.store 0($0), $pop1 +; NO-SIMD128-NEXT: return +; +; NO-SIMD128-FAST-LABEL: shr_u_noext_v2i64: +; NO-SIMD128-FAST: .functype shr_u_noext_v2i64 (i32, i64, i64, i64) -> () +; NO-SIMD128-FAST-NEXT: # %bb.0: +; NO-SIMD128-FAST-NEXT: i64.shr_u $push0=, $1, $3 +; NO-SIMD128-FAST-NEXT: i64.store 0($0), $pop0 +; NO-SIMD128-FAST-NEXT: i64.shr_u $push1=, $2, $3 +; NO-SIMD128-FAST-NEXT: i64.store 8($0), $pop1 +; NO-SIMD128-FAST-NEXT: return %t = insertelement <2 x i64> undef, i64 %x, i32 0 %s = shufflevector <2 x i64> %t, <2 x i64> undef, <2 x i32> %a = lshr <2 x i64> %v, %s ret <2 x i64> %a } -; CHECK-LABEL: shr_u_const_v2i64: -; NO-SIMD128-NOT: i64x2 -; SIMD128-NEXT: .functype shr_u_const_v2i64 (v128) -> (v128){{$}} -; SIMD128-NEXT: i32.const $push[[L0:[0-9]+]]=, 5{{$}} -; SIMD128-NEXT: i64x2.shr_u $push[[R:[0-9]+]]=, $0, $pop[[L0]]{{$}} -; SIMD128-NEXT: return $pop[[R]]{{$}} define <2 x i64> @shr_u_const_v2i64(<2 x i64> %v) { +; SIMD128-LABEL: shr_u_const_v2i64: +; SIMD128: .functype shr_u_const_v2i64 (v128) -> (v128) +; SIMD128-NEXT: # %bb.0: +; SIMD128-NEXT: i32.const $push0=, 5 +; SIMD128-NEXT: i64x2.shr_u $push1=, $0, $pop0 +; SIMD128-NEXT: return $pop1 +; +; SIMD128-FAST-LABEL: shr_u_const_v2i64: +; SIMD128-FAST: .functype shr_u_const_v2i64 (v128) -> (v128) +; SIMD128-FAST-NEXT: # %bb.0: +; SIMD128-FAST-NEXT: i32.const $push1=, 5 +; SIMD128-FAST-NEXT: i64x2.shr_u $push0=, $0, $pop1 +; SIMD128-FAST-NEXT: return $pop0 +; +; NO-SIMD128-LABEL: shr_u_const_v2i64: +; NO-SIMD128: .functype shr_u_const_v2i64 (i32, i64, i64) -> () +; NO-SIMD128-NEXT: # %bb.0: +; NO-SIMD128-NEXT: i64.const $push0=, 5 +; NO-SIMD128-NEXT: i64.shr_u $push1=, $2, $pop0 +; NO-SIMD128-NEXT: i64.store 8($0), $pop1 +; NO-SIMD128-NEXT: i64.const $push3=, 5 +; NO-SIMD128-NEXT: i64.shr_u $push2=, $1, $pop3 +; NO-SIMD128-NEXT: i64.store 0($0), $pop2 +; NO-SIMD128-NEXT: return +; +; NO-SIMD128-FAST-LABEL: shr_u_const_v2i64: +; NO-SIMD128-FAST: .functype shr_u_const_v2i64 (i32, i64, i64) -> () +; NO-SIMD128-FAST-NEXT: # %bb.0: +; NO-SIMD128-FAST-NEXT: i64.const $push0=, 5 +; NO-SIMD128-FAST-NEXT: i64.shr_u $push1=, $1, $pop0 +; NO-SIMD128-FAST-NEXT: i64.store 0($0), $pop1 +; NO-SIMD128-FAST-NEXT: i64.const $push3=, 5 +; NO-SIMD128-FAST-NEXT: i64.shr_u $push2=, $2, $pop3 +; NO-SIMD128-FAST-NEXT: i64.store 8($0), $pop2 +; NO-SIMD128-FAST-NEXT: return %a = lshr <2 x i64> %v, ret <2 x i64> %a } -; CHECK-LABEL: shr_u_vec_v2i64: -; NO-SIMD128-NOT: i64x2 -; SIMD128-NEXT: .functype shr_u_vec_v2i64 (v128, v128) -> (v128){{$}} -; SIMD128-NEXT: i64x2.extract_lane $push[[L0:[0-9]+]]=, $0, 0{{$}} -; SIMD128-NEXT: i64x2.extract_lane $push[[L1:[0-9]+]]=, $1, 0{{$}} -; SIMD128-NEXT: i64.shr_u $push[[L2:[0-9]+]]=, $pop[[L0]], $pop[[L1]]{{$}} -; SIMD128-NEXT: i64x2.splat $push[[L3:[0-9]+]]=, $pop[[L2]]{{$}} -; SIMD128-NEXT: i64x2.extract_lane $push[[L4:[0-9]+]]=, $0, 1{{$}} -; SIMD128-NEXT: i64x2.extract_lane $push[[L5:[0-9]+]]=, $1, 1{{$}} -; SIMD128-NEXT: i64.shr_u $push[[L6:[0-9]+]]=, $pop[[L4]], $pop[[L5]]{{$}} -; SIMD128-NEXT: i64x2.replace_lane $push[[R:[0-9]+]]=, $pop[[L3]], 1, $pop[[L6]]{{$}} -; SIMD128-NEXT: return $pop[[R]]{{$}} define <2 x i64> @shr_u_vec_v2i64(<2 x i64> %v, <2 x i64> %x) { +; SIMD128-LABEL: shr_u_vec_v2i64: +; SIMD128: .functype shr_u_vec_v2i64 (v128, v128) -> (v128) +; SIMD128-NEXT: # %bb.0: +; SIMD128-NEXT: i64x2.extract_lane $push4=, $0, 0 +; SIMD128-NEXT: i64x2.extract_lane $push3=, $1, 0 +; SIMD128-NEXT: i64.shr_u $push5=, $pop4, $pop3 +; SIMD128-NEXT: i64x2.splat $push6=, $pop5 +; SIMD128-NEXT: i64x2.extract_lane $push1=, $0, 1 +; SIMD128-NEXT: i64x2.extract_lane $push0=, $1, 1 +; SIMD128-NEXT: i64.shr_u $push2=, $pop1, $pop0 +; SIMD128-NEXT: i64x2.replace_lane $push7=, $pop6, 1, $pop2 +; SIMD128-NEXT: return $pop7 +; +; SIMD128-FAST-LABEL: shr_u_vec_v2i64: +; SIMD128-FAST: .functype shr_u_vec_v2i64 (v128, v128) -> (v128) +; SIMD128-FAST-NEXT: # %bb.0: +; SIMD128-FAST-NEXT: i64x2.extract_lane $push5=, $0, 0 +; SIMD128-FAST-NEXT: i64x2.extract_lane $push4=, $1, 0 +; SIMD128-FAST-NEXT: i64.shr_u $push6=, $pop5, $pop4 +; SIMD128-FAST-NEXT: i64x2.splat $push7=, $pop6 +; SIMD128-FAST-NEXT: i64x2.extract_lane $push2=, $0, 1 +; SIMD128-FAST-NEXT: i64x2.extract_lane $push1=, $1, 1 +; SIMD128-FAST-NEXT: i64.shr_u $push3=, $pop2, $pop1 +; SIMD128-FAST-NEXT: i64x2.replace_lane $push0=, $pop7, 1, $pop3 +; SIMD128-FAST-NEXT: return $pop0 +; +; NO-SIMD128-LABEL: shr_u_vec_v2i64: +; NO-SIMD128: .functype shr_u_vec_v2i64 (i32, i64, i64, i64, i64) -> () +; NO-SIMD128-NEXT: # %bb.0: +; NO-SIMD128-NEXT: i64.shr_u $push0=, $2, $4 +; NO-SIMD128-NEXT: i64.store 8($0), $pop0 +; NO-SIMD128-NEXT: i64.shr_u $push1=, $1, $3 +; NO-SIMD128-NEXT: i64.store 0($0), $pop1 +; NO-SIMD128-NEXT: return +; +; NO-SIMD128-FAST-LABEL: shr_u_vec_v2i64: +; NO-SIMD128-FAST: .functype shr_u_vec_v2i64 (i32, i64, i64, i64, i64) -> () +; NO-SIMD128-FAST-NEXT: # %bb.0: +; NO-SIMD128-FAST-NEXT: i64.shr_u $push0=, $1, $3 +; NO-SIMD128-FAST-NEXT: i64.store 0($0), $pop0 +; NO-SIMD128-FAST-NEXT: i64.shr_u $push1=, $2, $4 +; NO-SIMD128-FAST-NEXT: i64.store 8($0), $pop1 +; NO-SIMD128-FAST-NEXT: return %a = lshr <2 x i64> %v, %x ret <2 x i64> %a } -; CHECK-LABEL: and_v2i64: -; NO-SIMD128-NOT: v128 -; SIMD128-NEXT: .functype and_v2i64 (v128, v128) -> (v128){{$}} -; SIMD128-NEXT: v128.and $push[[R:[0-9]+]]=, $0, $1{{$}} -; SIMD128-NEXT: return $pop[[R]]{{$}} define <2 x i64> @and_v2i64(<2 x i64> %x, <2 x i64> %y) { +; SIMD128-LABEL: and_v2i64: +; SIMD128: .functype and_v2i64 (v128, v128) -> (v128) +; SIMD128-NEXT: # %bb.0: +; SIMD128-NEXT: v128.and $push0=, $0, $1 +; SIMD128-NEXT: return $pop0 +; +; SIMD128-FAST-LABEL: and_v2i64: +; SIMD128-FAST: .functype and_v2i64 (v128, v128) -> (v128) +; SIMD128-FAST-NEXT: # %bb.0: +; SIMD128-FAST-NEXT: v128.and $push0=, $0, $1 +; SIMD128-FAST-NEXT: return $pop0 +; +; NO-SIMD128-LABEL: and_v2i64: +; NO-SIMD128: .functype and_v2i64 (i32, i64, i64, i64, i64) -> () +; NO-SIMD128-NEXT: # %bb.0: +; NO-SIMD128-NEXT: i64.and $push0=, $2, $4 +; NO-SIMD128-NEXT: i64.store 8($0), $pop0 +; NO-SIMD128-NEXT: i64.and $push1=, $1, $3 +; NO-SIMD128-NEXT: i64.store 0($0), $pop1 +; NO-SIMD128-NEXT: return +; +; NO-SIMD128-FAST-LABEL: and_v2i64: +; NO-SIMD128-FAST: .functype and_v2i64 (i32, i64, i64, i64, i64) -> () +; NO-SIMD128-FAST-NEXT: # %bb.0: +; NO-SIMD128-FAST-NEXT: i64.and $push0=, $1, $3 +; NO-SIMD128-FAST-NEXT: i64.store 0($0), $pop0 +; NO-SIMD128-FAST-NEXT: i64.and $push1=, $2, $4 +; NO-SIMD128-FAST-NEXT: i64.store 8($0), $pop1 +; NO-SIMD128-FAST-NEXT: return %a = and <2 x i64> %x, %y ret <2 x i64> %a } -; CHECK-LABEL: or_v2i64: -; NO-SIMD128-NOT: v128 -; SIMD128-NEXT: .functype or_v2i64 (v128, v128) -> (v128){{$}} -; SIMD128-NEXT: v128.or $push[[R:[0-9]+]]=, $0, $1{{$}} -; SIMD128-NEXT: return $pop[[R]]{{$}} define <2 x i64> @or_v2i64(<2 x i64> %x, <2 x i64> %y) { +; SIMD128-LABEL: or_v2i64: +; SIMD128: .functype or_v2i64 (v128, v128) -> (v128) +; SIMD128-NEXT: # %bb.0: +; SIMD128-NEXT: v128.or $push0=, $0, $1 +; SIMD128-NEXT: return $pop0 +; +; SIMD128-FAST-LABEL: or_v2i64: +; SIMD128-FAST: .functype or_v2i64 (v128, v128) -> (v128) +; SIMD128-FAST-NEXT: # %bb.0: +; SIMD128-FAST-NEXT: v128.or $push0=, $0, $1 +; SIMD128-FAST-NEXT: return $pop0 +; +; NO-SIMD128-LABEL: or_v2i64: +; NO-SIMD128: .functype or_v2i64 (i32, i64, i64, i64, i64) -> () +; NO-SIMD128-NEXT: # %bb.0: +; NO-SIMD128-NEXT: i64.or $push0=, $2, $4 +; NO-SIMD128-NEXT: i64.store 8($0), $pop0 +; NO-SIMD128-NEXT: i64.or $push1=, $1, $3 +; NO-SIMD128-NEXT: i64.store 0($0), $pop1 +; NO-SIMD128-NEXT: return +; +; NO-SIMD128-FAST-LABEL: or_v2i64: +; NO-SIMD128-FAST: .functype or_v2i64 (i32, i64, i64, i64, i64) -> () +; NO-SIMD128-FAST-NEXT: # %bb.0: +; NO-SIMD128-FAST-NEXT: i64.or $push0=, $1, $3 +; NO-SIMD128-FAST-NEXT: i64.store 0($0), $pop0 +; NO-SIMD128-FAST-NEXT: i64.or $push1=, $2, $4 +; NO-SIMD128-FAST-NEXT: i64.store 8($0), $pop1 +; NO-SIMD128-FAST-NEXT: return %a = or <2 x i64> %x, %y ret <2 x i64> %a } -; CHECK-LABEL: xor_v2i64: -; NO-SIMD128-NOT: v128 -; SIMD128-NEXT: .functype xor_v2i64 (v128, v128) -> (v128){{$}} -; SIMD128-NEXT: v128.xor $push[[R:[0-9]+]]=, $0, $1{{$}} -; SIMD128-NEXT: return $pop[[R]]{{$}} define <2 x i64> @xor_v2i64(<2 x i64> %x, <2 x i64> %y) { +; SIMD128-LABEL: xor_v2i64: +; SIMD128: .functype xor_v2i64 (v128, v128) -> (v128) +; SIMD128-NEXT: # %bb.0: +; SIMD128-NEXT: v128.xor $push0=, $0, $1 +; SIMD128-NEXT: return $pop0 +; +; SIMD128-FAST-LABEL: xor_v2i64: +; SIMD128-FAST: .functype xor_v2i64 (v128, v128) -> (v128) +; SIMD128-FAST-NEXT: # %bb.0: +; SIMD128-FAST-NEXT: v128.xor $push0=, $0, $1 +; SIMD128-FAST-NEXT: return $pop0 +; +; NO-SIMD128-LABEL: xor_v2i64: +; NO-SIMD128: .functype xor_v2i64 (i32, i64, i64, i64, i64) -> () +; NO-SIMD128-NEXT: # %bb.0: +; NO-SIMD128-NEXT: i64.xor $push0=, $2, $4 +; NO-SIMD128-NEXT: i64.store 8($0), $pop0 +; NO-SIMD128-NEXT: i64.xor $push1=, $1, $3 +; NO-SIMD128-NEXT: i64.store 0($0), $pop1 +; NO-SIMD128-NEXT: return +; +; NO-SIMD128-FAST-LABEL: xor_v2i64: +; NO-SIMD128-FAST: .functype xor_v2i64 (i32, i64, i64, i64, i64) -> () +; NO-SIMD128-FAST-NEXT: # %bb.0: +; NO-SIMD128-FAST-NEXT: i64.xor $push0=, $1, $3 +; NO-SIMD128-FAST-NEXT: i64.store 0($0), $pop0 +; NO-SIMD128-FAST-NEXT: i64.xor $push1=, $2, $4 +; NO-SIMD128-FAST-NEXT: i64.store 8($0), $pop1 +; NO-SIMD128-FAST-NEXT: return %a = xor <2 x i64> %x, %y ret <2 x i64> %a } -; CHECK-LABEL: not_v2i64: -; NO-SIMD128-NOT: v128 -; SIMD128-NEXT: .functype not_v2i64 (v128) -> (v128){{$}} -; SIMD128-NEXT: v128.not $push[[R:[0-9]+]]=, $0{{$}} -; SIMD128-NEXT: return $pop[[R]]{{$}} define <2 x i64> @not_v2i64(<2 x i64> %x) { +; SIMD128-LABEL: not_v2i64: +; SIMD128: .functype not_v2i64 (v128) -> (v128) +; SIMD128-NEXT: # %bb.0: +; SIMD128-NEXT: v128.not $push0=, $0 +; SIMD128-NEXT: return $pop0 +; +; SIMD128-FAST-LABEL: not_v2i64: +; SIMD128-FAST: .functype not_v2i64 (v128) -> (v128) +; SIMD128-FAST-NEXT: # %bb.0: +; SIMD128-FAST-NEXT: v128.not $push0=, $0 +; SIMD128-FAST-NEXT: return $pop0 +; +; NO-SIMD128-LABEL: not_v2i64: +; NO-SIMD128: .functype not_v2i64 (i32, i64, i64) -> () +; NO-SIMD128-NEXT: # %bb.0: +; NO-SIMD128-NEXT: i64.const $push0=, -1 +; NO-SIMD128-NEXT: i64.xor $push1=, $2, $pop0 +; NO-SIMD128-NEXT: i64.store 8($0), $pop1 +; NO-SIMD128-NEXT: i64.const $push3=, -1 +; NO-SIMD128-NEXT: i64.xor $push2=, $1, $pop3 +; NO-SIMD128-NEXT: i64.store 0($0), $pop2 +; NO-SIMD128-NEXT: return +; +; NO-SIMD128-FAST-LABEL: not_v2i64: +; NO-SIMD128-FAST: .functype not_v2i64 (i32, i64, i64) -> () +; NO-SIMD128-FAST-NEXT: # %bb.0: +; NO-SIMD128-FAST-NEXT: i64.const $push0=, -1 +; NO-SIMD128-FAST-NEXT: i64.xor $push1=, $1, $pop0 +; NO-SIMD128-FAST-NEXT: i64.store 0($0), $pop1 +; NO-SIMD128-FAST-NEXT: i64.const $push3=, -1 +; NO-SIMD128-FAST-NEXT: i64.xor $push2=, $2, $pop3 +; NO-SIMD128-FAST-NEXT: i64.store 8($0), $pop2 +; NO-SIMD128-FAST-NEXT: return %a = xor <2 x i64> %x, ret <2 x i64> %a } -; CHECK-LABEL: andnot_v2i64: -; NO-SIMD128-NOT: v128 -; SIMD128-NEXT: .functype andnot_v2i64 (v128, v128) -> (v128){{$}} -; SIMD128-SLOW-NEXT: v128.andnot $push[[R:[0-9]+]]=, $0, $1{{$}} -; SIMD128-SLOW-NEXT: return $pop[[R]]{{$}} -; SIMD128-FAST-NEXT: v128.not -; SIMD128-FAST-NEXT: v128.and -; SIMD128-FAST-NEXT: return define <2 x i64> @andnot_v2i64(<2 x i64> %x, <2 x i64> %y) { +; SIMD128-LABEL: andnot_v2i64: +; SIMD128: .functype andnot_v2i64 (v128, v128) -> (v128) +; SIMD128-NEXT: # %bb.0: +; SIMD128-NEXT: v128.andnot $push0=, $0, $1 +; SIMD128-NEXT: return $pop0 +; +; SIMD128-FAST-LABEL: andnot_v2i64: +; SIMD128-FAST: .functype andnot_v2i64 (v128, v128) -> (v128) +; SIMD128-FAST-NEXT: # %bb.0: +; SIMD128-FAST-NEXT: v128.not $push0=, $1 +; SIMD128-FAST-NEXT: v128.and $push1=, $0, $pop0 +; SIMD128-FAST-NEXT: return $pop1 +; +; NO-SIMD128-LABEL: andnot_v2i64: +; NO-SIMD128: .functype andnot_v2i64 (i32, i64, i64, i64, i64) -> () +; NO-SIMD128-NEXT: # %bb.0: +; NO-SIMD128-NEXT: i64.const $push0=, -1 +; NO-SIMD128-NEXT: i64.xor $push1=, $4, $pop0 +; NO-SIMD128-NEXT: i64.and $push2=, $2, $pop1 +; NO-SIMD128-NEXT: i64.store 8($0), $pop2 +; NO-SIMD128-NEXT: i64.const $push5=, -1 +; NO-SIMD128-NEXT: i64.xor $push3=, $3, $pop5 +; NO-SIMD128-NEXT: i64.and $push4=, $1, $pop3 +; NO-SIMD128-NEXT: i64.store 0($0), $pop4 +; NO-SIMD128-NEXT: return +; +; NO-SIMD128-FAST-LABEL: andnot_v2i64: +; NO-SIMD128-FAST: .functype andnot_v2i64 (i32, i64, i64, i64, i64) -> () +; NO-SIMD128-FAST-NEXT: # %bb.0: +; NO-SIMD128-FAST-NEXT: i64.const $push0=, -1 +; NO-SIMD128-FAST-NEXT: i64.xor $push1=, $3, $pop0 +; NO-SIMD128-FAST-NEXT: i64.and $push2=, $1, $pop1 +; NO-SIMD128-FAST-NEXT: i64.store 0($0), $pop2 +; NO-SIMD128-FAST-NEXT: i64.const $push5=, -1 +; NO-SIMD128-FAST-NEXT: i64.xor $push3=, $4, $pop5 +; NO-SIMD128-FAST-NEXT: i64.and $push4=, $2, $pop3 +; NO-SIMD128-FAST-NEXT: i64.store 8($0), $pop4 +; NO-SIMD128-FAST-NEXT: return %inv_y = xor <2 x i64> %y, %a = and <2 x i64> %x, %inv_y ret <2 x i64> %a } -; CHECK-LABEL: bitselect_v2i64: -; NO-SIMD128-NOT: v128 -; SIMD128-NEXT: .functype bitselect_v2i64 (v128, v128, v128) -> (v128){{$}} -; SIMD128-SLOW-NEXT: v128.bitselect $push[[R:[0-9]+]]=, $1, $2, $0{{$}} -; SIMD128-SLOW-NEXT: return $pop[[R]]{{$}} -; SIMD128-FAST-NEXT: v128.not -; SIMD128-FAST-NEXT: v128.and -; SIMD128-FAST-NEXT: v128.and -; SIMD128-FAST-NEXT: v128.or -; SIMD128-FAST-NEXT: return define <2 x i64> @bitselect_v2i64(<2 x i64> %c, <2 x i64> %v1, <2 x i64> %v2) { +; SIMD128-LABEL: bitselect_v2i64: +; SIMD128: .functype bitselect_v2i64 (v128, v128, v128) -> (v128) +; SIMD128-NEXT: # %bb.0: +; SIMD128-NEXT: v128.bitselect $push0=, $1, $2, $0 +; SIMD128-NEXT: return $pop0 +; +; SIMD128-FAST-LABEL: bitselect_v2i64: +; SIMD128-FAST: .functype bitselect_v2i64 (v128, v128, v128) -> (v128) +; SIMD128-FAST-NEXT: # %bb.0: +; SIMD128-FAST-NEXT: v128.not $push2=, $0 +; SIMD128-FAST-NEXT: v128.and $push3=, $2, $pop2 +; SIMD128-FAST-NEXT: v128.and $push0=, $1, $0 +; SIMD128-FAST-NEXT: v128.or $push1=, $pop3, $pop0 +; SIMD128-FAST-NEXT: return $pop1 +; +; NO-SIMD128-LABEL: bitselect_v2i64: +; NO-SIMD128: .functype bitselect_v2i64 (i32, i64, i64, i64, i64, i64, i64) -> () +; NO-SIMD128-NEXT: # %bb.0: +; NO-SIMD128-NEXT: i64.const $push1=, -1 +; NO-SIMD128-NEXT: i64.xor $push2=, $2, $pop1 +; NO-SIMD128-NEXT: i64.and $push3=, $6, $pop2 +; NO-SIMD128-NEXT: i64.and $push0=, $4, $2 +; NO-SIMD128-NEXT: i64.or $push4=, $pop3, $pop0 +; NO-SIMD128-NEXT: i64.store 8($0), $pop4 +; NO-SIMD128-NEXT: i64.const $push9=, -1 +; NO-SIMD128-NEXT: i64.xor $push6=, $1, $pop9 +; NO-SIMD128-NEXT: i64.and $push7=, $5, $pop6 +; NO-SIMD128-NEXT: i64.and $push5=, $3, $1 +; NO-SIMD128-NEXT: i64.or $push8=, $pop7, $pop5 +; NO-SIMD128-NEXT: i64.store 0($0), $pop8 +; NO-SIMD128-NEXT: return +; +; NO-SIMD128-FAST-LABEL: bitselect_v2i64: +; NO-SIMD128-FAST: .functype bitselect_v2i64 (i32, i64, i64, i64, i64, i64, i64) -> () +; NO-SIMD128-FAST-NEXT: # %bb.0: +; NO-SIMD128-FAST-NEXT: i64.const $push1=, -1 +; NO-SIMD128-FAST-NEXT: i64.xor $push2=, $1, $pop1 +; NO-SIMD128-FAST-NEXT: i64.and $push3=, $5, $pop2 +; NO-SIMD128-FAST-NEXT: i64.and $push0=, $3, $1 +; NO-SIMD128-FAST-NEXT: i64.or $push4=, $pop3, $pop0 +; NO-SIMD128-FAST-NEXT: i64.store 0($0), $pop4 +; NO-SIMD128-FAST-NEXT: i64.const $push9=, -1 +; NO-SIMD128-FAST-NEXT: i64.xor $push6=, $2, $pop9 +; NO-SIMD128-FAST-NEXT: i64.and $push7=, $6, $pop6 +; NO-SIMD128-FAST-NEXT: i64.and $push5=, $4, $2 +; NO-SIMD128-FAST-NEXT: i64.or $push8=, $pop7, $pop5 +; NO-SIMD128-FAST-NEXT: i64.store 8($0), $pop8 +; NO-SIMD128-FAST-NEXT: return %masked_v1 = and <2 x i64> %v1, %c %inv_mask = xor <2 x i64> , %c %masked_v2 = and <2 x i64> %v2, %inv_mask @@ -1488,31 +12747,101 @@ define <2 x i64> @bitselect_v2i64(<2 x i64> %c, <2 x i64> %v1, <2 x i64> %v2) { ret <2 x i64> %a } -; CHECK-LABEL: bitselect_xor_v2i64: -; NO-SIMD128-NOT: v128 -; SIMD128-NEXT: .functype bitselect_xor_v2i64 (v128, v128, v128) -> (v128){{$}} -; SIMD128-SLOW-NEXT: v128.bitselect $push[[R:[0-9]+]]=, $1, $2, $0{{$}} -; SIMD128-SLOW-NEXT: return $pop[[R]]{{$}} -; SIMD128-FAST-NEXT: v128.xor -; SIMD128-FAST-NEXT: v128.and -; SIMD128-FAST-NEXT: v128.xor define <2 x i64> @bitselect_xor_v2i64(<2 x i64> %c, <2 x i64> %v1, <2 x i64> %v2) { +; SIMD128-LABEL: bitselect_xor_v2i64: +; SIMD128: .functype bitselect_xor_v2i64 (v128, v128, v128) -> (v128) +; SIMD128-NEXT: # %bb.0: +; SIMD128-NEXT: v128.bitselect $push0=, $1, $2, $0 +; SIMD128-NEXT: return $pop0 +; +; SIMD128-FAST-LABEL: bitselect_xor_v2i64: +; SIMD128-FAST: .functype bitselect_xor_v2i64 (v128, v128, v128) -> (v128) +; SIMD128-FAST-NEXT: # %bb.0: +; SIMD128-FAST-NEXT: v128.xor $push2=, $1, $2 +; SIMD128-FAST-NEXT: v128.and $push1=, $pop2, $0 +; SIMD128-FAST-NEXT: v128.xor $push0=, $pop1, $2 +; SIMD128-FAST-NEXT: return $pop0 +; +; NO-SIMD128-LABEL: bitselect_xor_v2i64: +; NO-SIMD128: .functype bitselect_xor_v2i64 (i32, i64, i64, i64, i64, i64, i64) -> () +; NO-SIMD128-NEXT: # %bb.0: +; NO-SIMD128-NEXT: i64.xor $push0=, $4, $6 +; NO-SIMD128-NEXT: i64.and $push1=, $pop0, $2 +; NO-SIMD128-NEXT: i64.xor $push2=, $pop1, $6 +; NO-SIMD128-NEXT: i64.store 8($0), $pop2 +; NO-SIMD128-NEXT: i64.xor $push3=, $3, $5 +; NO-SIMD128-NEXT: i64.and $push4=, $pop3, $1 +; NO-SIMD128-NEXT: i64.xor $push5=, $pop4, $5 +; NO-SIMD128-NEXT: i64.store 0($0), $pop5 +; NO-SIMD128-NEXT: return +; +; NO-SIMD128-FAST-LABEL: bitselect_xor_v2i64: +; NO-SIMD128-FAST: .functype bitselect_xor_v2i64 (i32, i64, i64, i64, i64, i64, i64) -> () +; NO-SIMD128-FAST-NEXT: # %bb.0: +; NO-SIMD128-FAST-NEXT: i64.xor $push0=, $3, $5 +; NO-SIMD128-FAST-NEXT: i64.and $push1=, $pop0, $1 +; NO-SIMD128-FAST-NEXT: i64.xor $push2=, $pop1, $5 +; NO-SIMD128-FAST-NEXT: i64.store 0($0), $pop2 +; NO-SIMD128-FAST-NEXT: i64.xor $push3=, $4, $6 +; NO-SIMD128-FAST-NEXT: i64.and $push4=, $pop3, $2 +; NO-SIMD128-FAST-NEXT: i64.xor $push5=, $pop4, $6 +; NO-SIMD128-FAST-NEXT: i64.store 8($0), $pop5 +; NO-SIMD128-FAST-NEXT: return %xor1 = xor <2 x i64> %v1, %v2 %and = and <2 x i64> %xor1, %c %a = xor <2 x i64> %and, %v2 ret <2 x i64> %a } -; CHECK-LABEL: bitselect_xor_reversed_v2i64: -; NO-SIMD128-NOT: v128 -; SIMD128-NEXT: .functype bitselect_xor_reversed_v2i64 (v128, v128, v128) -> (v128){{$}} -; SIMD128-SLOW-NEXT: v128.bitselect $push[[R:[0-9]+]]=, $2, $1, $0{{$}} -; SIMD128-SLOW-NEXT: return $pop[[R]]{{$}} -; SIMD128-FAST-NEXT: v128.xor -; SIMD128-FAST-NEXT: v128.not -; SIMD128-FAST-NEXT: v128.and -; SIMD128-FAST-NEXT: v128.xor define <2 x i64> @bitselect_xor_reversed_v2i64(<2 x i64> %c, <2 x i64> %v1, <2 x i64> %v2) { +; SIMD128-LABEL: bitselect_xor_reversed_v2i64: +; SIMD128: .functype bitselect_xor_reversed_v2i64 (v128, v128, v128) -> (v128) +; SIMD128-NEXT: # %bb.0: +; SIMD128-NEXT: v128.bitselect $push0=, $2, $1, $0 +; SIMD128-NEXT: return $pop0 +; +; SIMD128-FAST-LABEL: bitselect_xor_reversed_v2i64: +; SIMD128-FAST: .functype bitselect_xor_reversed_v2i64 (v128, v128, v128) -> (v128) +; SIMD128-FAST-NEXT: # %bb.0: +; SIMD128-FAST-NEXT: v128.xor $push1=, $1, $2 +; SIMD128-FAST-NEXT: v128.not $push2=, $0 +; SIMD128-FAST-NEXT: v128.and $push3=, $pop1, $pop2 +; SIMD128-FAST-NEXT: v128.xor $push0=, $pop3, $2 +; SIMD128-FAST-NEXT: return $pop0 +; +; NO-SIMD128-LABEL: bitselect_xor_reversed_v2i64: +; NO-SIMD128: .functype bitselect_xor_reversed_v2i64 (i32, i64, i64, i64, i64, i64, i64) -> () +; NO-SIMD128-NEXT: # %bb.0: +; NO-SIMD128-NEXT: i64.xor $push2=, $4, $6 +; NO-SIMD128-NEXT: i64.const $push0=, -1 +; NO-SIMD128-NEXT: i64.xor $push1=, $2, $pop0 +; NO-SIMD128-NEXT: i64.and $push3=, $pop2, $pop1 +; NO-SIMD128-NEXT: i64.xor $push4=, $pop3, $6 +; NO-SIMD128-NEXT: i64.store 8($0), $pop4 +; NO-SIMD128-NEXT: i64.xor $push6=, $3, $5 +; NO-SIMD128-NEXT: i64.const $push9=, -1 +; NO-SIMD128-NEXT: i64.xor $push5=, $1, $pop9 +; NO-SIMD128-NEXT: i64.and $push7=, $pop6, $pop5 +; NO-SIMD128-NEXT: i64.xor $push8=, $pop7, $5 +; NO-SIMD128-NEXT: i64.store 0($0), $pop8 +; NO-SIMD128-NEXT: return +; +; NO-SIMD128-FAST-LABEL: bitselect_xor_reversed_v2i64: +; NO-SIMD128-FAST: .functype bitselect_xor_reversed_v2i64 (i32, i64, i64, i64, i64, i64, i64) -> () +; NO-SIMD128-FAST-NEXT: # %bb.0: +; NO-SIMD128-FAST-NEXT: i64.xor $push2=, $3, $5 +; NO-SIMD128-FAST-NEXT: i64.const $push0=, -1 +; NO-SIMD128-FAST-NEXT: i64.xor $push1=, $1, $pop0 +; NO-SIMD128-FAST-NEXT: i64.and $push3=, $pop2, $pop1 +; NO-SIMD128-FAST-NEXT: i64.xor $push4=, $pop3, $5 +; NO-SIMD128-FAST-NEXT: i64.store 0($0), $pop4 +; NO-SIMD128-FAST-NEXT: i64.xor $push6=, $4, $6 +; NO-SIMD128-FAST-NEXT: i64.const $push9=, -1 +; NO-SIMD128-FAST-NEXT: i64.xor $push5=, $2, $pop9 +; NO-SIMD128-FAST-NEXT: i64.and $push7=, $pop6, $pop5 +; NO-SIMD128-FAST-NEXT: i64.xor $push8=, $pop7, $6 +; NO-SIMD128-FAST-NEXT: i64.store 8($0), $pop8 +; NO-SIMD128-FAST-NEXT: return %xor1 = xor <2 x i64> %v1, %v2 %notc = xor <2 x i64> %c, %and = and <2 x i64> %xor1, %notc @@ -1520,12 +12849,46 @@ define <2 x i64> @bitselect_xor_reversed_v2i64(<2 x i64> %c, <2 x i64> %v1, <2 x ret <2 x i64> %a } -; CHECK-LABEL: extmul_low_s_v2i64: -; NO-SIMD128-NOT: i64x2 -; SIMD128-NEXT: .functype extmul_low_s_v2i64 (v128, v128) -> (v128){{$}} -; SIMD128-SLOW-NEXT: i64x2.extmul_low_i32x4_s $push[[R:[0-9]+]]=, $0, $1{{$}} -; SIMD128-SLOW-NEXT: return $pop[[R]]{{$}} define <2 x i64> @extmul_low_s_v2i64(<4 x i32> %v1, <4 x i32> %v2) { +; SIMD128-LABEL: extmul_low_s_v2i64: +; SIMD128: .functype extmul_low_s_v2i64 (v128, v128) -> (v128) +; SIMD128-NEXT: # %bb.0: +; SIMD128-NEXT: i64x2.extmul_low_i32x4_s $push0=, $0, $1 +; SIMD128-NEXT: return $pop0 +; +; SIMD128-FAST-LABEL: extmul_low_s_v2i64: +; SIMD128-FAST: .functype extmul_low_s_v2i64 (v128, v128) -> (v128) +; SIMD128-FAST-NEXT: # %bb.0: +; SIMD128-FAST-NEXT: i64x2.extend_low_i32x4_s $push0=, $0 +; SIMD128-FAST-NEXT: i64x2.extend_low_i32x4_s $push1=, $1 +; SIMD128-FAST-NEXT: i64x2.mul $push2=, $pop0, $pop1 +; SIMD128-FAST-NEXT: return $pop2 +; +; NO-SIMD128-LABEL: extmul_low_s_v2i64: +; NO-SIMD128: .functype extmul_low_s_v2i64 (i32, i32, i32, i32, i32, i32, i32, i32, i32) -> () +; NO-SIMD128-NEXT: # %bb.0: +; NO-SIMD128-NEXT: i64.extend_i32_s $push1=, $2 +; NO-SIMD128-NEXT: i64.extend_i32_s $push0=, $6 +; NO-SIMD128-NEXT: i64.mul $push2=, $pop1, $pop0 +; NO-SIMD128-NEXT: i64.store 8($0), $pop2 +; NO-SIMD128-NEXT: i64.extend_i32_s $push4=, $1 +; NO-SIMD128-NEXT: i64.extend_i32_s $push3=, $5 +; NO-SIMD128-NEXT: i64.mul $push5=, $pop4, $pop3 +; NO-SIMD128-NEXT: i64.store 0($0), $pop5 +; NO-SIMD128-NEXT: return +; +; NO-SIMD128-FAST-LABEL: extmul_low_s_v2i64: +; NO-SIMD128-FAST: .functype extmul_low_s_v2i64 (i32, i32, i32, i32, i32, i32, i32, i32, i32) -> () +; NO-SIMD128-FAST-NEXT: # %bb.0: +; NO-SIMD128-FAST-NEXT: i64.extend_i32_s $push1=, $1 +; NO-SIMD128-FAST-NEXT: i64.extend_i32_s $push0=, $5 +; NO-SIMD128-FAST-NEXT: i64.mul $push2=, $pop1, $pop0 +; NO-SIMD128-FAST-NEXT: i64.store 0($0), $pop2 +; NO-SIMD128-FAST-NEXT: i64.extend_i32_s $push4=, $2 +; NO-SIMD128-FAST-NEXT: i64.extend_i32_s $push3=, $6 +; NO-SIMD128-FAST-NEXT: i64.mul $push5=, $pop4, $pop3 +; NO-SIMD128-FAST-NEXT: i64.store 8($0), $pop5 +; NO-SIMD128-FAST-NEXT: return %low1 = shufflevector <4 x i32> %v1, <4 x i32> undef, <2 x i32> %low2 = shufflevector <4 x i32> %v2, <4 x i32> undef, <2 x i32> %extended1 = sext <2 x i32> %low1 to <2 x i64> @@ -1534,12 +12897,46 @@ define <2 x i64> @extmul_low_s_v2i64(<4 x i32> %v1, <4 x i32> %v2) { ret <2 x i64> %a } -; CHECK-LABEL: extmul_high_s_v2i64: -; NO-SIMD128-NOT: i64x2 -; SIMD128-NEXT: .functype extmul_high_s_v2i64 (v128, v128) -> (v128){{$}} -; SIMD128-SLOW-NEXT: i64x2.extmul_high_i32x4_s $push[[R:[0-9]+]]=, $0, $1{{$}} -; SIMD128-SLOW-NEXT: return $pop[[R]]{{$}} define <2 x i64> @extmul_high_s_v2i64(<4 x i32> %v1, <4 x i32> %v2) { +; SIMD128-LABEL: extmul_high_s_v2i64: +; SIMD128: .functype extmul_high_s_v2i64 (v128, v128) -> (v128) +; SIMD128-NEXT: # %bb.0: +; SIMD128-NEXT: i64x2.extmul_high_i32x4_s $push0=, $0, $1 +; SIMD128-NEXT: return $pop0 +; +; SIMD128-FAST-LABEL: extmul_high_s_v2i64: +; SIMD128-FAST: .functype extmul_high_s_v2i64 (v128, v128) -> (v128) +; SIMD128-FAST-NEXT: # %bb.0: +; SIMD128-FAST-NEXT: i64x2.extend_high_i32x4_s $push0=, $0 +; SIMD128-FAST-NEXT: i64x2.extend_high_i32x4_s $push1=, $1 +; SIMD128-FAST-NEXT: i64x2.mul $push2=, $pop0, $pop1 +; SIMD128-FAST-NEXT: return $pop2 +; +; NO-SIMD128-LABEL: extmul_high_s_v2i64: +; NO-SIMD128: .functype extmul_high_s_v2i64 (i32, i32, i32, i32, i32, i32, i32, i32, i32) -> () +; NO-SIMD128-NEXT: # %bb.0: +; NO-SIMD128-NEXT: i64.extend_i32_s $push1=, $4 +; NO-SIMD128-NEXT: i64.extend_i32_s $push0=, $8 +; NO-SIMD128-NEXT: i64.mul $push2=, $pop1, $pop0 +; NO-SIMD128-NEXT: i64.store 8($0), $pop2 +; NO-SIMD128-NEXT: i64.extend_i32_s $push4=, $3 +; NO-SIMD128-NEXT: i64.extend_i32_s $push3=, $7 +; NO-SIMD128-NEXT: i64.mul $push5=, $pop4, $pop3 +; NO-SIMD128-NEXT: i64.store 0($0), $pop5 +; NO-SIMD128-NEXT: return +; +; NO-SIMD128-FAST-LABEL: extmul_high_s_v2i64: +; NO-SIMD128-FAST: .functype extmul_high_s_v2i64 (i32, i32, i32, i32, i32, i32, i32, i32, i32) -> () +; NO-SIMD128-FAST-NEXT: # %bb.0: +; NO-SIMD128-FAST-NEXT: i64.extend_i32_s $push1=, $3 +; NO-SIMD128-FAST-NEXT: i64.extend_i32_s $push0=, $7 +; NO-SIMD128-FAST-NEXT: i64.mul $push2=, $pop1, $pop0 +; NO-SIMD128-FAST-NEXT: i64.store 0($0), $pop2 +; NO-SIMD128-FAST-NEXT: i64.extend_i32_s $push4=, $4 +; NO-SIMD128-FAST-NEXT: i64.extend_i32_s $push3=, $8 +; NO-SIMD128-FAST-NEXT: i64.mul $push5=, $pop4, $pop3 +; NO-SIMD128-FAST-NEXT: i64.store 8($0), $pop5 +; NO-SIMD128-FAST-NEXT: return %high1 = shufflevector <4 x i32> %v1, <4 x i32> undef, <2 x i32> %high2 = shufflevector <4 x i32> %v2, <4 x i32> undef, <2 x i32> %extended1 = sext <2 x i32> %high1 to <2 x i64> @@ -1548,12 +12945,46 @@ define <2 x i64> @extmul_high_s_v2i64(<4 x i32> %v1, <4 x i32> %v2) { ret <2 x i64> %a } -; CHECK-LABEL: extmul_low_u_v2i64: -; NO-SIMD128-NOT: i64x2 -; SIMD128-NEXT: .functype extmul_low_u_v2i64 (v128, v128) -> (v128){{$}} -; SIMD128-SLOW-NEXT: i64x2.extmul_low_i32x4_u $push[[R:[0-9]+]]=, $0, $1{{$}} -; SIMD128-SLOW-NEXT: return $pop[[R]]{{$}} define <2 x i64> @extmul_low_u_v2i64(<4 x i32> %v1, <4 x i32> %v2) { +; SIMD128-LABEL: extmul_low_u_v2i64: +; SIMD128: .functype extmul_low_u_v2i64 (v128, v128) -> (v128) +; SIMD128-NEXT: # %bb.0: +; SIMD128-NEXT: i64x2.extmul_low_i32x4_u $push0=, $0, $1 +; SIMD128-NEXT: return $pop0 +; +; SIMD128-FAST-LABEL: extmul_low_u_v2i64: +; SIMD128-FAST: .functype extmul_low_u_v2i64 (v128, v128) -> (v128) +; SIMD128-FAST-NEXT: # %bb.0: +; SIMD128-FAST-NEXT: i64x2.extend_low_i32x4_u $push0=, $0 +; SIMD128-FAST-NEXT: i64x2.extend_low_i32x4_u $push1=, $1 +; SIMD128-FAST-NEXT: i64x2.mul $push2=, $pop0, $pop1 +; SIMD128-FAST-NEXT: return $pop2 +; +; NO-SIMD128-LABEL: extmul_low_u_v2i64: +; NO-SIMD128: .functype extmul_low_u_v2i64 (i32, i32, i32, i32, i32, i32, i32, i32, i32) -> () +; NO-SIMD128-NEXT: # %bb.0: +; NO-SIMD128-NEXT: i64.extend_i32_u $push1=, $2 +; NO-SIMD128-NEXT: i64.extend_i32_u $push0=, $6 +; NO-SIMD128-NEXT: i64.mul $push2=, $pop1, $pop0 +; NO-SIMD128-NEXT: i64.store 8($0), $pop2 +; NO-SIMD128-NEXT: i64.extend_i32_u $push4=, $1 +; NO-SIMD128-NEXT: i64.extend_i32_u $push3=, $5 +; NO-SIMD128-NEXT: i64.mul $push5=, $pop4, $pop3 +; NO-SIMD128-NEXT: i64.store 0($0), $pop5 +; NO-SIMD128-NEXT: return +; +; NO-SIMD128-FAST-LABEL: extmul_low_u_v2i64: +; NO-SIMD128-FAST: .functype extmul_low_u_v2i64 (i32, i32, i32, i32, i32, i32, i32, i32, i32) -> () +; NO-SIMD128-FAST-NEXT: # %bb.0: +; NO-SIMD128-FAST-NEXT: i64.extend_i32_u $push1=, $1 +; NO-SIMD128-FAST-NEXT: i64.extend_i32_u $push0=, $5 +; NO-SIMD128-FAST-NEXT: i64.mul $push2=, $pop1, $pop0 +; NO-SIMD128-FAST-NEXT: i64.store 0($0), $pop2 +; NO-SIMD128-FAST-NEXT: i64.extend_i32_u $push4=, $2 +; NO-SIMD128-FAST-NEXT: i64.extend_i32_u $push3=, $6 +; NO-SIMD128-FAST-NEXT: i64.mul $push5=, $pop4, $pop3 +; NO-SIMD128-FAST-NEXT: i64.store 8($0), $pop5 +; NO-SIMD128-FAST-NEXT: return %low1 = shufflevector <4 x i32> %v1, <4 x i32> undef, <2 x i32> %low2 = shufflevector <4 x i32> %v2, <4 x i32> undef, <2 x i32> %extended1 = zext <2 x i32> %low1 to <2 x i64> @@ -1562,12 +12993,46 @@ define <2 x i64> @extmul_low_u_v2i64(<4 x i32> %v1, <4 x i32> %v2) { ret <2 x i64> %a } -; CHECK-LABEL: extmul_high_u_v2i64: -; NO-SIMD128-NOT: i64x2 -; SIMD128-NEXT: .functype extmul_high_u_v2i64 (v128, v128) -> (v128){{$}} -; SIMD128-SLOW-NEXT: i64x2.extmul_high_i32x4_u $push[[R:[0-9]+]]=, $0, $1{{$}} -; SIMD128-SLOW-NEXT: return $pop[[R]]{{$}} define <2 x i64> @extmul_high_u_v2i64(<4 x i32> %v1, <4 x i32> %v2) { +; SIMD128-LABEL: extmul_high_u_v2i64: +; SIMD128: .functype extmul_high_u_v2i64 (v128, v128) -> (v128) +; SIMD128-NEXT: # %bb.0: +; SIMD128-NEXT: i64x2.extmul_high_i32x4_u $push0=, $0, $1 +; SIMD128-NEXT: return $pop0 +; +; SIMD128-FAST-LABEL: extmul_high_u_v2i64: +; SIMD128-FAST: .functype extmul_high_u_v2i64 (v128, v128) -> (v128) +; SIMD128-FAST-NEXT: # %bb.0: +; SIMD128-FAST-NEXT: i64x2.extend_high_i32x4_u $push0=, $0 +; SIMD128-FAST-NEXT: i64x2.extend_high_i32x4_u $push1=, $1 +; SIMD128-FAST-NEXT: i64x2.mul $push2=, $pop0, $pop1 +; SIMD128-FAST-NEXT: return $pop2 +; +; NO-SIMD128-LABEL: extmul_high_u_v2i64: +; NO-SIMD128: .functype extmul_high_u_v2i64 (i32, i32, i32, i32, i32, i32, i32, i32, i32) -> () +; NO-SIMD128-NEXT: # %bb.0: +; NO-SIMD128-NEXT: i64.extend_i32_u $push1=, $4 +; NO-SIMD128-NEXT: i64.extend_i32_u $push0=, $8 +; NO-SIMD128-NEXT: i64.mul $push2=, $pop1, $pop0 +; NO-SIMD128-NEXT: i64.store 8($0), $pop2 +; NO-SIMD128-NEXT: i64.extend_i32_u $push4=, $3 +; NO-SIMD128-NEXT: i64.extend_i32_u $push3=, $7 +; NO-SIMD128-NEXT: i64.mul $push5=, $pop4, $pop3 +; NO-SIMD128-NEXT: i64.store 0($0), $pop5 +; NO-SIMD128-NEXT: return +; +; NO-SIMD128-FAST-LABEL: extmul_high_u_v2i64: +; NO-SIMD128-FAST: .functype extmul_high_u_v2i64 (i32, i32, i32, i32, i32, i32, i32, i32, i32) -> () +; NO-SIMD128-FAST-NEXT: # %bb.0: +; NO-SIMD128-FAST-NEXT: i64.extend_i32_u $push1=, $3 +; NO-SIMD128-FAST-NEXT: i64.extend_i32_u $push0=, $7 +; NO-SIMD128-FAST-NEXT: i64.mul $push2=, $pop1, $pop0 +; NO-SIMD128-FAST-NEXT: i64.store 0($0), $pop2 +; NO-SIMD128-FAST-NEXT: i64.extend_i32_u $push4=, $4 +; NO-SIMD128-FAST-NEXT: i64.extend_i32_u $push3=, $8 +; NO-SIMD128-FAST-NEXT: i64.mul $push5=, $pop4, $pop3 +; NO-SIMD128-FAST-NEXT: i64.store 8($0), $pop5 +; NO-SIMD128-FAST-NEXT: return %high1 = shufflevector <4 x i32> %v1, <4 x i32> undef, <2 x i32> %high2 = shufflevector <4 x i32> %v2, <4 x i32> undef, <2 x i32> %extended1 = zext <2 x i32> %high1 to <2 x i64> @@ -1579,130 +13044,550 @@ define <2 x i64> @extmul_high_u_v2i64(<4 x i32> %v1, <4 x i32> %v2) { ; ============================================================================== ; 4 x float ; ============================================================================== -; CHECK-LABEL: neg_v4f32: -; NO-SIMD128-NOT: f32x4 -; SIMD128-NEXT: .functype neg_v4f32 (v128) -> (v128){{$}} -; SIMD128-NEXT: f32x4.neg $push[[R:[0-9]+]]=, $0{{$}} -; SIMD128-NEXT: return $pop[[R]]{{$}} define <4 x float> @neg_v4f32(<4 x float> %x) { ; nsz makes this semantically equivalent to flipping sign bit +; SIMD128-LABEL: neg_v4f32: +; SIMD128: .functype neg_v4f32 (v128) -> (v128) +; SIMD128-NEXT: # %bb.0: +; SIMD128-NEXT: f32x4.neg $push0=, $0 +; SIMD128-NEXT: return $pop0 +; +; SIMD128-FAST-LABEL: neg_v4f32: +; SIMD128-FAST: .functype neg_v4f32 (v128) -> (v128) +; SIMD128-FAST-NEXT: # %bb.0: +; SIMD128-FAST-NEXT: f32x4.neg $push0=, $0 +; SIMD128-FAST-NEXT: return $pop0 +; +; NO-SIMD128-LABEL: neg_v4f32: +; NO-SIMD128: .functype neg_v4f32 (i32, f32, f32, f32, f32) -> () +; NO-SIMD128-NEXT: # %bb.0: +; NO-SIMD128-NEXT: f32.neg $push0=, $3 +; NO-SIMD128-NEXT: f32.store 8($0), $pop0 +; NO-SIMD128-NEXT: f32.neg $push1=, $2 +; NO-SIMD128-NEXT: f32.store 4($0), $pop1 +; NO-SIMD128-NEXT: f32.neg $push2=, $1 +; NO-SIMD128-NEXT: f32.store 0($0), $pop2 +; NO-SIMD128-NEXT: i32.const $push3=, 12 +; NO-SIMD128-NEXT: i32.add $push4=, $0, $pop3 +; NO-SIMD128-NEXT: f32.neg $push5=, $4 +; NO-SIMD128-NEXT: f32.store 0($pop4), $pop5 +; NO-SIMD128-NEXT: return +; +; NO-SIMD128-FAST-LABEL: neg_v4f32: +; NO-SIMD128-FAST: .functype neg_v4f32 (i32, f32, f32, f32, f32) -> () +; NO-SIMD128-FAST-NEXT: # %bb.0: +; NO-SIMD128-FAST-NEXT: f32.neg $push0=, $1 +; NO-SIMD128-FAST-NEXT: f32.store 0($0), $pop0 +; NO-SIMD128-FAST-NEXT: f32.neg $push1=, $2 +; NO-SIMD128-FAST-NEXT: f32.store 4($0), $pop1 +; NO-SIMD128-FAST-NEXT: f32.neg $push2=, $3 +; NO-SIMD128-FAST-NEXT: f32.store 8($0), $pop2 +; NO-SIMD128-FAST-NEXT: i32.const $push3=, 12 +; NO-SIMD128-FAST-NEXT: i32.add $push4=, $0, $pop3 +; NO-SIMD128-FAST-NEXT: f32.neg $push5=, $4 +; NO-SIMD128-FAST-NEXT: f32.store 0($pop4), $pop5 +; NO-SIMD128-FAST-NEXT: return %a = fsub nsz <4 x float> , %x ret <4 x float> %a } -; CHECK-LABEL: abs_v4f32: -; NO-SIMD128-NOT: f32x4 -; SIMD128-NEXT: .functype abs_v4f32 (v128) -> (v128){{$}} -; SIMD128-NEXT: f32x4.abs $push[[R:[0-9]+]]=, $0{{$}} -; SIMD128-NEXT: return $pop[[R]]{{$}} declare <4 x float> @llvm.fabs.v4f32(<4 x float>) nounwind readnone define <4 x float> @abs_v4f32(<4 x float> %x) { +; SIMD128-LABEL: abs_v4f32: +; SIMD128: .functype abs_v4f32 (v128) -> (v128) +; SIMD128-NEXT: # %bb.0: +; SIMD128-NEXT: f32x4.abs $push0=, $0 +; SIMD128-NEXT: return $pop0 +; +; SIMD128-FAST-LABEL: abs_v4f32: +; SIMD128-FAST: .functype abs_v4f32 (v128) -> (v128) +; SIMD128-FAST-NEXT: # %bb.0: +; SIMD128-FAST-NEXT: f32x4.abs $push0=, $0 +; SIMD128-FAST-NEXT: return $pop0 +; +; NO-SIMD128-LABEL: abs_v4f32: +; NO-SIMD128: .functype abs_v4f32 (i32, f32, f32, f32, f32) -> () +; NO-SIMD128-NEXT: # %bb.0: +; NO-SIMD128-NEXT: f32.abs $push0=, $3 +; NO-SIMD128-NEXT: f32.store 8($0), $pop0 +; NO-SIMD128-NEXT: f32.abs $push1=, $2 +; NO-SIMD128-NEXT: f32.store 4($0), $pop1 +; NO-SIMD128-NEXT: f32.abs $push2=, $1 +; NO-SIMD128-NEXT: f32.store 0($0), $pop2 +; NO-SIMD128-NEXT: i32.const $push3=, 12 +; NO-SIMD128-NEXT: i32.add $push4=, $0, $pop3 +; NO-SIMD128-NEXT: f32.abs $push5=, $4 +; NO-SIMD128-NEXT: f32.store 0($pop4), $pop5 +; NO-SIMD128-NEXT: return +; +; NO-SIMD128-FAST-LABEL: abs_v4f32: +; NO-SIMD128-FAST: .functype abs_v4f32 (i32, f32, f32, f32, f32) -> () +; NO-SIMD128-FAST-NEXT: # %bb.0: +; NO-SIMD128-FAST-NEXT: f32.abs $push0=, $1 +; NO-SIMD128-FAST-NEXT: f32.store 0($0), $pop0 +; NO-SIMD128-FAST-NEXT: f32.abs $push1=, $2 +; NO-SIMD128-FAST-NEXT: f32.store 4($0), $pop1 +; NO-SIMD128-FAST-NEXT: f32.abs $push2=, $3 +; NO-SIMD128-FAST-NEXT: f32.store 8($0), $pop2 +; NO-SIMD128-FAST-NEXT: i32.const $push3=, 12 +; NO-SIMD128-FAST-NEXT: i32.add $push4=, $0, $pop3 +; NO-SIMD128-FAST-NEXT: f32.abs $push5=, $4 +; NO-SIMD128-FAST-NEXT: f32.store 0($pop4), $pop5 +; NO-SIMD128-FAST-NEXT: return %a = call <4 x float> @llvm.fabs.v4f32(<4 x float> %x) ret <4 x float> %a } -; CHECK-LABEL: min_unordered_v4f32: -; NO-SIMD128-NOT: f32x4 -; SIMD128-NEXT: .functype min_unordered_v4f32 (v128) -> (v128){{$}} -; SIMD128-NEXT: v128.const $push[[L0:[0-9]+]]=, 0x1.4p2, 0x1.4p2, 0x1.4p2, 0x1.4p2{{$}} -; SIMD128-NEXT: f32x4.min $push[[R:[0-9]+]]=, $0, $pop[[L0]]{{$}} -; SIMD128-NEXT: return $pop[[R]]{{$}} define <4 x float> @min_unordered_v4f32(<4 x float> %x) { +; SIMD128-LABEL: min_unordered_v4f32: +; SIMD128: .functype min_unordered_v4f32 (v128) -> (v128) +; SIMD128-NEXT: # %bb.0: +; SIMD128-NEXT: v128.const $push0=, 0x1.4p2, 0x1.4p2, 0x1.4p2, 0x1.4p2 +; SIMD128-NEXT: f32x4.min $push1=, $0, $pop0 +; SIMD128-NEXT: return $pop1 +; +; SIMD128-FAST-LABEL: min_unordered_v4f32: +; SIMD128-FAST: .functype min_unordered_v4f32 (v128) -> (v128) +; SIMD128-FAST-NEXT: # %bb.0: +; SIMD128-FAST-NEXT: v128.const $push1=, 0x1.4p2, 0x1.4p2, 0x1.4p2, 0x1.4p2 +; SIMD128-FAST-NEXT: f32x4.min $push0=, $0, $pop1 +; SIMD128-FAST-NEXT: return $pop0 +; +; NO-SIMD128-LABEL: min_unordered_v4f32: +; NO-SIMD128: .functype min_unordered_v4f32 (i32, f32, f32, f32, f32) -> () +; NO-SIMD128-NEXT: # %bb.0: +; NO-SIMD128-NEXT: f32.const $push0=, 0x1.4p2 +; NO-SIMD128-NEXT: f32.min $push1=, $3, $pop0 +; NO-SIMD128-NEXT: f32.store 8($0), $pop1 +; NO-SIMD128-NEXT: f32.const $push9=, 0x1.4p2 +; NO-SIMD128-NEXT: f32.min $push2=, $2, $pop9 +; NO-SIMD128-NEXT: f32.store 4($0), $pop2 +; NO-SIMD128-NEXT: f32.const $push8=, 0x1.4p2 +; NO-SIMD128-NEXT: f32.min $push3=, $1, $pop8 +; NO-SIMD128-NEXT: f32.store 0($0), $pop3 +; NO-SIMD128-NEXT: i32.const $push5=, 12 +; NO-SIMD128-NEXT: i32.add $push6=, $0, $pop5 +; NO-SIMD128-NEXT: f32.const $push7=, 0x1.4p2 +; NO-SIMD128-NEXT: f32.min $push4=, $4, $pop7 +; NO-SIMD128-NEXT: f32.store 0($pop6), $pop4 +; NO-SIMD128-NEXT: return +; +; NO-SIMD128-FAST-LABEL: min_unordered_v4f32: +; NO-SIMD128-FAST: .functype min_unordered_v4f32 (i32, f32, f32, f32, f32) -> () +; NO-SIMD128-FAST-NEXT: # %bb.0: +; NO-SIMD128-FAST-NEXT: f32.const $push0=, 0x1.4p2 +; NO-SIMD128-FAST-NEXT: f32.min $push1=, $1, $pop0 +; NO-SIMD128-FAST-NEXT: f32.store 0($0), $pop1 +; NO-SIMD128-FAST-NEXT: f32.const $push9=, 0x1.4p2 +; NO-SIMD128-FAST-NEXT: f32.min $push2=, $2, $pop9 +; NO-SIMD128-FAST-NEXT: f32.store 4($0), $pop2 +; NO-SIMD128-FAST-NEXT: f32.const $push8=, 0x1.4p2 +; NO-SIMD128-FAST-NEXT: f32.min $push3=, $3, $pop8 +; NO-SIMD128-FAST-NEXT: f32.store 8($0), $pop3 +; NO-SIMD128-FAST-NEXT: i32.const $push4=, 12 +; NO-SIMD128-FAST-NEXT: i32.add $push5=, $0, $pop4 +; NO-SIMD128-FAST-NEXT: f32.const $push7=, 0x1.4p2 +; NO-SIMD128-FAST-NEXT: f32.min $push6=, $4, $pop7 +; NO-SIMD128-FAST-NEXT: f32.store 0($pop5), $pop6 +; NO-SIMD128-FAST-NEXT: return %cmps = fcmp ule <4 x float> %x, %a = select <4 x i1> %cmps, <4 x float> %x, <4 x float> ret <4 x float> %a } -; CHECK-LABEL: max_unordered_v4f32: -; NO-SIMD128-NOT: f32x4 -; SIMD128-NEXT: .functype max_unordered_v4f32 (v128) -> (v128){{$}} -; SIMD128-NEXT: v128.const $push[[L0:[0-9]+]]=, 0x1.4p2, 0x1.4p2, 0x1.4p2, 0x1.4p2 -; SIMD128-NEXT: f32x4.max $push[[R:[0-9]+]]=, $0, $pop[[L0]]{{$}} -; SIMD128-NEXT: return $pop[[R]]{{$}} define <4 x float> @max_unordered_v4f32(<4 x float> %x) { +; SIMD128-LABEL: max_unordered_v4f32: +; SIMD128: .functype max_unordered_v4f32 (v128) -> (v128) +; SIMD128-NEXT: # %bb.0: +; SIMD128-NEXT: v128.const $push0=, 0x1.4p2, 0x1.4p2, 0x1.4p2, 0x1.4p2 +; SIMD128-NEXT: f32x4.max $push1=, $0, $pop0 +; SIMD128-NEXT: return $pop1 +; +; SIMD128-FAST-LABEL: max_unordered_v4f32: +; SIMD128-FAST: .functype max_unordered_v4f32 (v128) -> (v128) +; SIMD128-FAST-NEXT: # %bb.0: +; SIMD128-FAST-NEXT: v128.const $push1=, 0x1.4p2, 0x1.4p2, 0x1.4p2, 0x1.4p2 +; SIMD128-FAST-NEXT: f32x4.max $push0=, $0, $pop1 +; SIMD128-FAST-NEXT: return $pop0 +; +; NO-SIMD128-LABEL: max_unordered_v4f32: +; NO-SIMD128: .functype max_unordered_v4f32 (i32, f32, f32, f32, f32) -> () +; NO-SIMD128-NEXT: # %bb.0: +; NO-SIMD128-NEXT: f32.const $push0=, 0x1.4p2 +; NO-SIMD128-NEXT: f32.max $push1=, $3, $pop0 +; NO-SIMD128-NEXT: f32.store 8($0), $pop1 +; NO-SIMD128-NEXT: f32.const $push9=, 0x1.4p2 +; NO-SIMD128-NEXT: f32.max $push2=, $2, $pop9 +; NO-SIMD128-NEXT: f32.store 4($0), $pop2 +; NO-SIMD128-NEXT: f32.const $push8=, 0x1.4p2 +; NO-SIMD128-NEXT: f32.max $push3=, $1, $pop8 +; NO-SIMD128-NEXT: f32.store 0($0), $pop3 +; NO-SIMD128-NEXT: i32.const $push5=, 12 +; NO-SIMD128-NEXT: i32.add $push6=, $0, $pop5 +; NO-SIMD128-NEXT: f32.const $push7=, 0x1.4p2 +; NO-SIMD128-NEXT: f32.max $push4=, $4, $pop7 +; NO-SIMD128-NEXT: f32.store 0($pop6), $pop4 +; NO-SIMD128-NEXT: return +; +; NO-SIMD128-FAST-LABEL: max_unordered_v4f32: +; NO-SIMD128-FAST: .functype max_unordered_v4f32 (i32, f32, f32, f32, f32) -> () +; NO-SIMD128-FAST-NEXT: # %bb.0: +; NO-SIMD128-FAST-NEXT: f32.const $push0=, 0x1.4p2 +; NO-SIMD128-FAST-NEXT: f32.max $push1=, $1, $pop0 +; NO-SIMD128-FAST-NEXT: f32.store 0($0), $pop1 +; NO-SIMD128-FAST-NEXT: f32.const $push9=, 0x1.4p2 +; NO-SIMD128-FAST-NEXT: f32.max $push2=, $2, $pop9 +; NO-SIMD128-FAST-NEXT: f32.store 4($0), $pop2 +; NO-SIMD128-FAST-NEXT: f32.const $push8=, 0x1.4p2 +; NO-SIMD128-FAST-NEXT: f32.max $push3=, $3, $pop8 +; NO-SIMD128-FAST-NEXT: f32.store 8($0), $pop3 +; NO-SIMD128-FAST-NEXT: i32.const $push4=, 12 +; NO-SIMD128-FAST-NEXT: i32.add $push5=, $0, $pop4 +; NO-SIMD128-FAST-NEXT: f32.const $push7=, 0x1.4p2 +; NO-SIMD128-FAST-NEXT: f32.max $push6=, $4, $pop7 +; NO-SIMD128-FAST-NEXT: f32.store 0($pop5), $pop6 +; NO-SIMD128-FAST-NEXT: return %cmps = fcmp uge <4 x float> %x, %a = select <4 x i1> %cmps, <4 x float> %x, <4 x float> ret <4 x float> %a } -; CHECK-LABEL: min_ordered_v4f32: -; NO-SIMD128-NOT: f32x4 -; SIMD128-NEXT: .functype min_ordered_v4f32 (v128) -> (v128){{$}} -; SIMD128-NEXT: v128.const $push[[L0:[0-9]+]]=, 0x1.4p2, 0x1.4p2, 0x1.4p2, 0x1.4p2{{$}} -; SIMD128-NEXT: f32x4.min $push[[R:[0-9]+]]=, $0, $pop[[L0]]{{$}} -; SIMD128-NEXT: return $pop[[R]]{{$}} define <4 x float> @min_ordered_v4f32(<4 x float> %x) { +; SIMD128-LABEL: min_ordered_v4f32: +; SIMD128: .functype min_ordered_v4f32 (v128) -> (v128) +; SIMD128-NEXT: # %bb.0: +; SIMD128-NEXT: v128.const $push0=, 0x1.4p2, 0x1.4p2, 0x1.4p2, 0x1.4p2 +; SIMD128-NEXT: f32x4.min $push1=, $0, $pop0 +; SIMD128-NEXT: return $pop1 +; +; SIMD128-FAST-LABEL: min_ordered_v4f32: +; SIMD128-FAST: .functype min_ordered_v4f32 (v128) -> (v128) +; SIMD128-FAST-NEXT: # %bb.0: +; SIMD128-FAST-NEXT: v128.const $push1=, 0x1.4p2, 0x1.4p2, 0x1.4p2, 0x1.4p2 +; SIMD128-FAST-NEXT: f32x4.min $push0=, $0, $pop1 +; SIMD128-FAST-NEXT: return $pop0 +; +; NO-SIMD128-LABEL: min_ordered_v4f32: +; NO-SIMD128: .functype min_ordered_v4f32 (i32, f32, f32, f32, f32) -> () +; NO-SIMD128-NEXT: # %bb.0: +; NO-SIMD128-NEXT: f32.const $push0=, 0x1.4p2 +; NO-SIMD128-NEXT: f32.min $push1=, $3, $pop0 +; NO-SIMD128-NEXT: f32.store 8($0), $pop1 +; NO-SIMD128-NEXT: f32.const $push9=, 0x1.4p2 +; NO-SIMD128-NEXT: f32.min $push2=, $2, $pop9 +; NO-SIMD128-NEXT: f32.store 4($0), $pop2 +; NO-SIMD128-NEXT: f32.const $push8=, 0x1.4p2 +; NO-SIMD128-NEXT: f32.min $push3=, $1, $pop8 +; NO-SIMD128-NEXT: f32.store 0($0), $pop3 +; NO-SIMD128-NEXT: i32.const $push5=, 12 +; NO-SIMD128-NEXT: i32.add $push6=, $0, $pop5 +; NO-SIMD128-NEXT: f32.const $push7=, 0x1.4p2 +; NO-SIMD128-NEXT: f32.min $push4=, $4, $pop7 +; NO-SIMD128-NEXT: f32.store 0($pop6), $pop4 +; NO-SIMD128-NEXT: return +; +; NO-SIMD128-FAST-LABEL: min_ordered_v4f32: +; NO-SIMD128-FAST: .functype min_ordered_v4f32 (i32, f32, f32, f32, f32) -> () +; NO-SIMD128-FAST-NEXT: # %bb.0: +; NO-SIMD128-FAST-NEXT: f32.const $push0=, 0x1.4p2 +; NO-SIMD128-FAST-NEXT: f32.min $push1=, $1, $pop0 +; NO-SIMD128-FAST-NEXT: f32.store 0($0), $pop1 +; NO-SIMD128-FAST-NEXT: f32.const $push9=, 0x1.4p2 +; NO-SIMD128-FAST-NEXT: f32.min $push2=, $2, $pop9 +; NO-SIMD128-FAST-NEXT: f32.store 4($0), $pop2 +; NO-SIMD128-FAST-NEXT: f32.const $push8=, 0x1.4p2 +; NO-SIMD128-FAST-NEXT: f32.min $push3=, $3, $pop8 +; NO-SIMD128-FAST-NEXT: f32.store 8($0), $pop3 +; NO-SIMD128-FAST-NEXT: i32.const $push4=, 12 +; NO-SIMD128-FAST-NEXT: i32.add $push5=, $0, $pop4 +; NO-SIMD128-FAST-NEXT: f32.const $push7=, 0x1.4p2 +; NO-SIMD128-FAST-NEXT: f32.min $push6=, $4, $pop7 +; NO-SIMD128-FAST-NEXT: f32.store 0($pop5), $pop6 +; NO-SIMD128-FAST-NEXT: return %cmps = fcmp ole <4 x float> , %x %a = select <4 x i1> %cmps, <4 x float> , <4 x float> %x ret <4 x float> %a } -; CHECK-LABEL: max_ordered_v4f32: -; NO-SIMD128-NOT: f32x4 -; SIMD128-NEXT: .functype max_ordered_v4f32 (v128) -> (v128){{$}} -; SIMD128-NEXT: v128.const $push[[L0:[0-9]+]]=, 0x1.4p2, 0x1.4p2, 0x1.4p2, 0x1.4p2{{$}} -; SIMD128-NEXT: f32x4.max $push[[R:[0-9]+]]=, $0, $pop[[L0]]{{$}} -; SIMD128-NEXT: return $pop[[R]]{{$}} define <4 x float> @max_ordered_v4f32(<4 x float> %x) { +; SIMD128-LABEL: max_ordered_v4f32: +; SIMD128: .functype max_ordered_v4f32 (v128) -> (v128) +; SIMD128-NEXT: # %bb.0: +; SIMD128-NEXT: v128.const $push0=, 0x1.4p2, 0x1.4p2, 0x1.4p2, 0x1.4p2 +; SIMD128-NEXT: f32x4.max $push1=, $0, $pop0 +; SIMD128-NEXT: return $pop1 +; +; SIMD128-FAST-LABEL: max_ordered_v4f32: +; SIMD128-FAST: .functype max_ordered_v4f32 (v128) -> (v128) +; SIMD128-FAST-NEXT: # %bb.0: +; SIMD128-FAST-NEXT: v128.const $push1=, 0x1.4p2, 0x1.4p2, 0x1.4p2, 0x1.4p2 +; SIMD128-FAST-NEXT: f32x4.max $push0=, $0, $pop1 +; SIMD128-FAST-NEXT: return $pop0 +; +; NO-SIMD128-LABEL: max_ordered_v4f32: +; NO-SIMD128: .functype max_ordered_v4f32 (i32, f32, f32, f32, f32) -> () +; NO-SIMD128-NEXT: # %bb.0: +; NO-SIMD128-NEXT: f32.const $push0=, 0x1.4p2 +; NO-SIMD128-NEXT: f32.max $push1=, $3, $pop0 +; NO-SIMD128-NEXT: f32.store 8($0), $pop1 +; NO-SIMD128-NEXT: f32.const $push9=, 0x1.4p2 +; NO-SIMD128-NEXT: f32.max $push2=, $2, $pop9 +; NO-SIMD128-NEXT: f32.store 4($0), $pop2 +; NO-SIMD128-NEXT: f32.const $push8=, 0x1.4p2 +; NO-SIMD128-NEXT: f32.max $push3=, $1, $pop8 +; NO-SIMD128-NEXT: f32.store 0($0), $pop3 +; NO-SIMD128-NEXT: i32.const $push5=, 12 +; NO-SIMD128-NEXT: i32.add $push6=, $0, $pop5 +; NO-SIMD128-NEXT: f32.const $push7=, 0x1.4p2 +; NO-SIMD128-NEXT: f32.max $push4=, $4, $pop7 +; NO-SIMD128-NEXT: f32.store 0($pop6), $pop4 +; NO-SIMD128-NEXT: return +; +; NO-SIMD128-FAST-LABEL: max_ordered_v4f32: +; NO-SIMD128-FAST: .functype max_ordered_v4f32 (i32, f32, f32, f32, f32) -> () +; NO-SIMD128-FAST-NEXT: # %bb.0: +; NO-SIMD128-FAST-NEXT: f32.const $push0=, 0x1.4p2 +; NO-SIMD128-FAST-NEXT: f32.max $push1=, $1, $pop0 +; NO-SIMD128-FAST-NEXT: f32.store 0($0), $pop1 +; NO-SIMD128-FAST-NEXT: f32.const $push9=, 0x1.4p2 +; NO-SIMD128-FAST-NEXT: f32.max $push2=, $2, $pop9 +; NO-SIMD128-FAST-NEXT: f32.store 4($0), $pop2 +; NO-SIMD128-FAST-NEXT: f32.const $push8=, 0x1.4p2 +; NO-SIMD128-FAST-NEXT: f32.max $push3=, $3, $pop8 +; NO-SIMD128-FAST-NEXT: f32.store 8($0), $pop3 +; NO-SIMD128-FAST-NEXT: i32.const $push4=, 12 +; NO-SIMD128-FAST-NEXT: i32.add $push5=, $0, $pop4 +; NO-SIMD128-FAST-NEXT: f32.const $push7=, 0x1.4p2 +; NO-SIMD128-FAST-NEXT: f32.max $push6=, $4, $pop7 +; NO-SIMD128-FAST-NEXT: f32.store 0($pop5), $pop6 +; NO-SIMD128-FAST-NEXT: return %cmps = fcmp oge <4 x float> , %x %a = select <4 x i1> %cmps, <4 x float> , <4 x float> %x ret <4 x float> %a } -; CHECK-LABEL: min_intrinsic_v4f32: -; NO-SIMD128-NOT: f32x4 -; SIMD128-NEXT: .functype min_intrinsic_v4f32 (v128, v128) -> (v128){{$}} -; SIMD128-NEXT: f32x4.min $push[[R:[0-9]+]]=, $0, $1{{$}} -; SIMD128-NEXT: return $pop[[R]]{{$}} declare <4 x float> @llvm.minimum.v4f32(<4 x float>, <4 x float>) define <4 x float> @min_intrinsic_v4f32(<4 x float> %x, <4 x float> %y) { +; SIMD128-LABEL: min_intrinsic_v4f32: +; SIMD128: .functype min_intrinsic_v4f32 (v128, v128) -> (v128) +; SIMD128-NEXT: # %bb.0: +; SIMD128-NEXT: f32x4.min $push0=, $0, $1 +; SIMD128-NEXT: return $pop0 +; +; SIMD128-FAST-LABEL: min_intrinsic_v4f32: +; SIMD128-FAST: .functype min_intrinsic_v4f32 (v128, v128) -> (v128) +; SIMD128-FAST-NEXT: # %bb.0: +; SIMD128-FAST-NEXT: f32x4.min $push0=, $0, $1 +; SIMD128-FAST-NEXT: return $pop0 +; +; NO-SIMD128-LABEL: min_intrinsic_v4f32: +; NO-SIMD128: .functype min_intrinsic_v4f32 (i32, f32, f32, f32, f32, f32, f32, f32, f32) -> () +; NO-SIMD128-NEXT: # %bb.0: +; NO-SIMD128-NEXT: f32.min $push0=, $3, $7 +; NO-SIMD128-NEXT: f32.store 8($0), $pop0 +; NO-SIMD128-NEXT: f32.min $push1=, $2, $6 +; NO-SIMD128-NEXT: f32.store 4($0), $pop1 +; NO-SIMD128-NEXT: f32.min $push2=, $1, $5 +; NO-SIMD128-NEXT: f32.store 0($0), $pop2 +; NO-SIMD128-NEXT: i32.const $push4=, 12 +; NO-SIMD128-NEXT: i32.add $push5=, $0, $pop4 +; NO-SIMD128-NEXT: f32.min $push3=, $4, $8 +; NO-SIMD128-NEXT: f32.store 0($pop5), $pop3 +; NO-SIMD128-NEXT: return +; +; NO-SIMD128-FAST-LABEL: min_intrinsic_v4f32: +; NO-SIMD128-FAST: .functype min_intrinsic_v4f32 (i32, f32, f32, f32, f32, f32, f32, f32, f32) -> () +; NO-SIMD128-FAST-NEXT: # %bb.0: +; NO-SIMD128-FAST-NEXT: f32.min $push0=, $1, $5 +; NO-SIMD128-FAST-NEXT: f32.store 0($0), $pop0 +; NO-SIMD128-FAST-NEXT: f32.min $push1=, $2, $6 +; NO-SIMD128-FAST-NEXT: f32.store 4($0), $pop1 +; NO-SIMD128-FAST-NEXT: f32.min $push2=, $3, $7 +; NO-SIMD128-FAST-NEXT: f32.store 8($0), $pop2 +; NO-SIMD128-FAST-NEXT: i32.const $push3=, 12 +; NO-SIMD128-FAST-NEXT: i32.add $push4=, $0, $pop3 +; NO-SIMD128-FAST-NEXT: f32.min $push5=, $4, $8 +; NO-SIMD128-FAST-NEXT: f32.store 0($pop4), $pop5 +; NO-SIMD128-FAST-NEXT: return %a = call <4 x float> @llvm.minimum.v4f32(<4 x float> %x, <4 x float> %y) ret <4 x float> %a } -; CHECK-LABEL: minnum_intrinsic_v4f32: -; NO-SIMD128-NOT: f32x4 -; SIMD128-NEXT: .functype minnum_intrinsic_v4f32 (v128, v128) -> (v128){{$}} -; SIMD128-NEXT: f32x4.min $push[[R:[0-9]+]]=, $0, $1{{$}} -; SIMD128-NEXT: return $pop[[R]]{{$}} declare <4 x float> @llvm.minnum.v4f32(<4 x float>, <4 x float>) define <4 x float> @minnum_intrinsic_v4f32(<4 x float> %x, <4 x float> %y) { +; SIMD128-LABEL: minnum_intrinsic_v4f32: +; SIMD128: .functype minnum_intrinsic_v4f32 (v128, v128) -> (v128) +; SIMD128-NEXT: # %bb.0: +; SIMD128-NEXT: f32x4.min $push0=, $0, $1 +; SIMD128-NEXT: return $pop0 +; +; SIMD128-FAST-LABEL: minnum_intrinsic_v4f32: +; SIMD128-FAST: .functype minnum_intrinsic_v4f32 (v128, v128) -> (v128) +; SIMD128-FAST-NEXT: # %bb.0: +; SIMD128-FAST-NEXT: f32x4.min $push0=, $0, $1 +; SIMD128-FAST-NEXT: return $pop0 +; +; NO-SIMD128-LABEL: minnum_intrinsic_v4f32: +; NO-SIMD128: .functype minnum_intrinsic_v4f32 (i32, f32, f32, f32, f32, f32, f32, f32, f32) -> () +; NO-SIMD128-NEXT: # %bb.0: +; NO-SIMD128-NEXT: call $push0=, fminf, $3, $7 +; NO-SIMD128-NEXT: f32.store 8($0), $pop0 +; NO-SIMD128-NEXT: call $push1=, fminf, $2, $6 +; NO-SIMD128-NEXT: f32.store 4($0), $pop1 +; NO-SIMD128-NEXT: call $push2=, fminf, $1, $5 +; NO-SIMD128-NEXT: f32.store 0($0), $pop2 +; NO-SIMD128-NEXT: i32.const $push3=, 12 +; NO-SIMD128-NEXT: i32.add $push4=, $0, $pop3 +; NO-SIMD128-NEXT: call $push5=, fminf, $4, $8 +; NO-SIMD128-NEXT: f32.store 0($pop4), $pop5 +; NO-SIMD128-NEXT: return +; +; NO-SIMD128-FAST-LABEL: minnum_intrinsic_v4f32: +; NO-SIMD128-FAST: .functype minnum_intrinsic_v4f32 (i32, f32, f32, f32, f32, f32, f32, f32, f32) -> () +; NO-SIMD128-FAST-NEXT: # %bb.0: +; NO-SIMD128-FAST-NEXT: call $push0=, fminf, $1, $5 +; NO-SIMD128-FAST-NEXT: f32.store 0($0), $pop0 +; NO-SIMD128-FAST-NEXT: call $push1=, fminf, $2, $6 +; NO-SIMD128-FAST-NEXT: f32.store 4($0), $pop1 +; NO-SIMD128-FAST-NEXT: call $push2=, fminf, $3, $7 +; NO-SIMD128-FAST-NEXT: f32.store 8($0), $pop2 +; NO-SIMD128-FAST-NEXT: i32.const $push3=, 12 +; NO-SIMD128-FAST-NEXT: i32.add $push4=, $0, $pop3 +; NO-SIMD128-FAST-NEXT: call $push5=, fminf, $4, $8 +; NO-SIMD128-FAST-NEXT: f32.store 0($pop4), $pop5 +; NO-SIMD128-FAST-NEXT: return %a = call nnan <4 x float> @llvm.minnum.v4f32(<4 x float> %x, <4 x float> %y) ret <4 x float> %a } -; CHECK-LABEL: max_intrinsic_v4f32: -; NO-SIMD128-NOT: f32x4 -; SIMD128-NEXT: .functype max_intrinsic_v4f32 (v128, v128) -> (v128){{$}} -; SIMD128-NEXT: f32x4.max $push[[R:[0-9]+]]=, $0, $1{{$}} -; SIMD128-NEXT: return $pop[[R]]{{$}} declare <4 x float> @llvm.maximum.v4f32(<4 x float>, <4 x float>) define <4 x float> @max_intrinsic_v4f32(<4 x float> %x, <4 x float> %y) { +; SIMD128-LABEL: max_intrinsic_v4f32: +; SIMD128: .functype max_intrinsic_v4f32 (v128, v128) -> (v128) +; SIMD128-NEXT: # %bb.0: +; SIMD128-NEXT: f32x4.max $push0=, $0, $1 +; SIMD128-NEXT: return $pop0 +; +; SIMD128-FAST-LABEL: max_intrinsic_v4f32: +; SIMD128-FAST: .functype max_intrinsic_v4f32 (v128, v128) -> (v128) +; SIMD128-FAST-NEXT: # %bb.0: +; SIMD128-FAST-NEXT: f32x4.max $push0=, $0, $1 +; SIMD128-FAST-NEXT: return $pop0 +; +; NO-SIMD128-LABEL: max_intrinsic_v4f32: +; NO-SIMD128: .functype max_intrinsic_v4f32 (i32, f32, f32, f32, f32, f32, f32, f32, f32) -> () +; NO-SIMD128-NEXT: # %bb.0: +; NO-SIMD128-NEXT: f32.max $push0=, $3, $7 +; NO-SIMD128-NEXT: f32.store 8($0), $pop0 +; NO-SIMD128-NEXT: f32.max $push1=, $2, $6 +; NO-SIMD128-NEXT: f32.store 4($0), $pop1 +; NO-SIMD128-NEXT: f32.max $push2=, $1, $5 +; NO-SIMD128-NEXT: f32.store 0($0), $pop2 +; NO-SIMD128-NEXT: i32.const $push4=, 12 +; NO-SIMD128-NEXT: i32.add $push5=, $0, $pop4 +; NO-SIMD128-NEXT: f32.max $push3=, $4, $8 +; NO-SIMD128-NEXT: f32.store 0($pop5), $pop3 +; NO-SIMD128-NEXT: return +; +; NO-SIMD128-FAST-LABEL: max_intrinsic_v4f32: +; NO-SIMD128-FAST: .functype max_intrinsic_v4f32 (i32, f32, f32, f32, f32, f32, f32, f32, f32) -> () +; NO-SIMD128-FAST-NEXT: # %bb.0: +; NO-SIMD128-FAST-NEXT: f32.max $push0=, $1, $5 +; NO-SIMD128-FAST-NEXT: f32.store 0($0), $pop0 +; NO-SIMD128-FAST-NEXT: f32.max $push1=, $2, $6 +; NO-SIMD128-FAST-NEXT: f32.store 4($0), $pop1 +; NO-SIMD128-FAST-NEXT: f32.max $push2=, $3, $7 +; NO-SIMD128-FAST-NEXT: f32.store 8($0), $pop2 +; NO-SIMD128-FAST-NEXT: i32.const $push3=, 12 +; NO-SIMD128-FAST-NEXT: i32.add $push4=, $0, $pop3 +; NO-SIMD128-FAST-NEXT: f32.max $push5=, $4, $8 +; NO-SIMD128-FAST-NEXT: f32.store 0($pop4), $pop5 +; NO-SIMD128-FAST-NEXT: return %a = call <4 x float> @llvm.maximum.v4f32(<4 x float> %x, <4 x float> %y) ret <4 x float> %a } -; CHECK-LABEL: maxnum_intrinsic_v4f32: -; NO-SIMD128-NOT: f32x4 -; SIMD128-NEXT: .functype maxnum_intrinsic_v4f32 (v128, v128) -> (v128){{$}} -; SIMD128-NEXT: f32x4.max $push[[R:[0-9]+]]=, $0, $1{{$}} -; SIMD128-NEXT: return $pop[[R]]{{$}} declare <4 x float> @llvm.maxnum.v4f32(<4 x float>, <4 x float>) define <4 x float> @maxnum_intrinsic_v4f32(<4 x float> %x, <4 x float> %y) { +; SIMD128-LABEL: maxnum_intrinsic_v4f32: +; SIMD128: .functype maxnum_intrinsic_v4f32 (v128, v128) -> (v128) +; SIMD128-NEXT: # %bb.0: +; SIMD128-NEXT: f32x4.max $push0=, $0, $1 +; SIMD128-NEXT: return $pop0 +; +; SIMD128-FAST-LABEL: maxnum_intrinsic_v4f32: +; SIMD128-FAST: .functype maxnum_intrinsic_v4f32 (v128, v128) -> (v128) +; SIMD128-FAST-NEXT: # %bb.0: +; SIMD128-FAST-NEXT: f32x4.max $push0=, $0, $1 +; SIMD128-FAST-NEXT: return $pop0 +; +; NO-SIMD128-LABEL: maxnum_intrinsic_v4f32: +; NO-SIMD128: .functype maxnum_intrinsic_v4f32 (i32, f32, f32, f32, f32, f32, f32, f32, f32) -> () +; NO-SIMD128-NEXT: # %bb.0: +; NO-SIMD128-NEXT: call $push0=, fmaxf, $3, $7 +; NO-SIMD128-NEXT: f32.store 8($0), $pop0 +; NO-SIMD128-NEXT: call $push1=, fmaxf, $2, $6 +; NO-SIMD128-NEXT: f32.store 4($0), $pop1 +; NO-SIMD128-NEXT: call $push2=, fmaxf, $1, $5 +; NO-SIMD128-NEXT: f32.store 0($0), $pop2 +; NO-SIMD128-NEXT: i32.const $push3=, 12 +; NO-SIMD128-NEXT: i32.add $push4=, $0, $pop3 +; NO-SIMD128-NEXT: call $push5=, fmaxf, $4, $8 +; NO-SIMD128-NEXT: f32.store 0($pop4), $pop5 +; NO-SIMD128-NEXT: return +; +; NO-SIMD128-FAST-LABEL: maxnum_intrinsic_v4f32: +; NO-SIMD128-FAST: .functype maxnum_intrinsic_v4f32 (i32, f32, f32, f32, f32, f32, f32, f32, f32) -> () +; NO-SIMD128-FAST-NEXT: # %bb.0: +; NO-SIMD128-FAST-NEXT: call $push0=, fmaxf, $1, $5 +; NO-SIMD128-FAST-NEXT: f32.store 0($0), $pop0 +; NO-SIMD128-FAST-NEXT: call $push1=, fmaxf, $2, $6 +; NO-SIMD128-FAST-NEXT: f32.store 4($0), $pop1 +; NO-SIMD128-FAST-NEXT: call $push2=, fmaxf, $3, $7 +; NO-SIMD128-FAST-NEXT: f32.store 8($0), $pop2 +; NO-SIMD128-FAST-NEXT: i32.const $push3=, 12 +; NO-SIMD128-FAST-NEXT: i32.add $push4=, $0, $pop3 +; NO-SIMD128-FAST-NEXT: call $push5=, fmaxf, $4, $8 +; NO-SIMD128-FAST-NEXT: f32.store 0($pop4), $pop5 +; NO-SIMD128-FAST-NEXT: return %a = call nnan <4 x float> @llvm.maxnum.v4f32(<4 x float> %x, <4 x float> %y) ret <4 x float> %a } -; CHECK-LABEL: min_const_intrinsic_v4f32: -; NO-SIMD128-NOT: f32x4 -; SIMD128-NEXT: .functype min_const_intrinsic_v4f32 () -> (v128){{$}} -; SIMD128-NEXT: v128.const $push[[R:[0-9]+]]=, 0x1.4p2, 0x1.4p2, 0x1.4p2, 0x1.4p2{{$}} -; SIMD128-NEXT: return $pop[[R]]{{$}} define <4 x float> @min_const_intrinsic_v4f32() { +; SIMD128-LABEL: min_const_intrinsic_v4f32: +; SIMD128: .functype min_const_intrinsic_v4f32 () -> (v128) +; SIMD128-NEXT: # %bb.0: +; SIMD128-NEXT: v128.const $push0=, 0x1.4p2, 0x1.4p2, 0x1.4p2, 0x1.4p2 +; SIMD128-NEXT: return $pop0 +; +; SIMD128-FAST-LABEL: min_const_intrinsic_v4f32: +; SIMD128-FAST: .functype min_const_intrinsic_v4f32 () -> (v128) +; SIMD128-FAST-NEXT: # %bb.0: +; SIMD128-FAST-NEXT: v128.const $push0=, 0x1.4p2, 0x1.4p2, 0x1.4p2, 0x1.4p2 +; SIMD128-FAST-NEXT: return $pop0 +; +; NO-SIMD128-LABEL: min_const_intrinsic_v4f32: +; NO-SIMD128: .functype min_const_intrinsic_v4f32 (i32) -> () +; NO-SIMD128-NEXT: # %bb.0: +; NO-SIMD128-NEXT: i64.const $push0=, 4656722015785320448 +; NO-SIMD128-NEXT: i64.store 8($0), $pop0 +; NO-SIMD128-NEXT: i64.const $push1=, 4656722015785320448 +; NO-SIMD128-NEXT: i64.store 0($0), $pop1 +; NO-SIMD128-NEXT: return +; +; NO-SIMD128-FAST-LABEL: min_const_intrinsic_v4f32: +; NO-SIMD128-FAST: .functype min_const_intrinsic_v4f32 (i32) -> () +; NO-SIMD128-FAST-NEXT: # %bb.0: +; NO-SIMD128-FAST-NEXT: i64.const $push0=, 4656722015785320448 +; NO-SIMD128-FAST-NEXT: i64.store 8($0), $pop0 +; NO-SIMD128-FAST-NEXT: i64.const $push1=, 4656722015785320448 +; NO-SIMD128-FAST-NEXT: i64.store 0($0), $pop1 +; NO-SIMD128-FAST-NEXT: return %a = call <4 x float> @llvm.minimum.v4f32( <4 x float> , <4 x float> @@ -1710,12 +13595,36 @@ define <4 x float> @min_const_intrinsic_v4f32() { ret <4 x float> %a } -; CHECK-LABEL: max_const_intrinsic_v4f32: -; NO-SIMD128-NOT: f32x4 -; SIMD128-NEXT: .functype max_const_intrinsic_v4f32 () -> (v128){{$}} -; SIMD128-NEXT: v128.const $push[[R:[0-9]+]]=, 0x1.5p5, 0x1.5p5, 0x1.5p5, 0x1.5p5{{$}} -; SIMD128-NEXT: return $pop[[R]]{{$}} define <4 x float> @max_const_intrinsic_v4f32() { +; SIMD128-LABEL: max_const_intrinsic_v4f32: +; SIMD128: .functype max_const_intrinsic_v4f32 () -> (v128) +; SIMD128-NEXT: # %bb.0: +; SIMD128-NEXT: v128.const $push0=, 0x1.5p5, 0x1.5p5, 0x1.5p5, 0x1.5p5 +; SIMD128-NEXT: return $pop0 +; +; SIMD128-FAST-LABEL: max_const_intrinsic_v4f32: +; SIMD128-FAST: .functype max_const_intrinsic_v4f32 () -> (v128) +; SIMD128-FAST-NEXT: # %bb.0: +; SIMD128-FAST-NEXT: v128.const $push0=, 0x1.5p5, 0x1.5p5, 0x1.5p5, 0x1.5p5 +; SIMD128-FAST-NEXT: return $pop0 +; +; NO-SIMD128-LABEL: max_const_intrinsic_v4f32: +; NO-SIMD128: .functype max_const_intrinsic_v4f32 (i32) -> () +; NO-SIMD128-NEXT: # %bb.0: +; NO-SIMD128-NEXT: i64.const $push0=, 4767060206681587712 +; NO-SIMD128-NEXT: i64.store 8($0), $pop0 +; NO-SIMD128-NEXT: i64.const $push1=, 4767060206681587712 +; NO-SIMD128-NEXT: i64.store 0($0), $pop1 +; NO-SIMD128-NEXT: return +; +; NO-SIMD128-FAST-LABEL: max_const_intrinsic_v4f32: +; NO-SIMD128-FAST: .functype max_const_intrinsic_v4f32 (i32) -> () +; NO-SIMD128-FAST-NEXT: # %bb.0: +; NO-SIMD128-FAST-NEXT: i64.const $push0=, 4767060206681587712 +; NO-SIMD128-FAST-NEXT: i64.store 8($0), $pop0 +; NO-SIMD128-FAST-NEXT: i64.const $push1=, 4767060206681587712 +; NO-SIMD128-FAST-NEXT: i64.store 0($0), $pop1 +; NO-SIMD128-FAST-NEXT: return %a = call <4 x float> @llvm.maximum.v4f32( <4 x float> , <4 x float> @@ -1723,23 +13632,127 @@ define <4 x float> @max_const_intrinsic_v4f32() { ret <4 x float> %a } -; CHECK-LABEL: pmin_v4f32: -; NO-SIMD128-NOT: f32x4 -; SIMD128-NEXT: .functype pmin_v4f32 (v128, v128) -> (v128){{$}} -; SIMD128-NEXT: f32x4.pmin $push[[R:[0-9]+]]=, $0, $1{{$}} -; SIMD128-NEXT: return $pop[[R]]{{$}} define <4 x float> @pmin_v4f32(<4 x float> %x, <4 x float> %y) { +; SIMD128-LABEL: pmin_v4f32: +; SIMD128: .functype pmin_v4f32 (v128, v128) -> (v128) +; SIMD128-NEXT: # %bb.0: +; SIMD128-NEXT: f32x4.pmin $push0=, $0, $1 +; SIMD128-NEXT: return $pop0 +; +; SIMD128-FAST-LABEL: pmin_v4f32: +; SIMD128-FAST: .functype pmin_v4f32 (v128, v128) -> (v128) +; SIMD128-FAST-NEXT: # %bb.0: +; SIMD128-FAST-NEXT: f32x4.pmin $push0=, $0, $1 +; SIMD128-FAST-NEXT: return $pop0 +; +; NO-SIMD128-LABEL: pmin_v4f32: +; NO-SIMD128: .functype pmin_v4f32 (i32, f32, f32, f32, f32, f32, f32, f32, f32) -> () +; NO-SIMD128-NEXT: # %bb.0: +; NO-SIMD128-NEXT: f32.lt $push0=, $7, $3 +; NO-SIMD128-NEXT: f32.select $push1=, $7, $3, $pop0 +; NO-SIMD128-NEXT: f32.store 8($0), $pop1 +; NO-SIMD128-NEXT: f32.lt $push2=, $6, $2 +; NO-SIMD128-NEXT: f32.select $push3=, $6, $2, $pop2 +; NO-SIMD128-NEXT: f32.store 4($0), $pop3 +; NO-SIMD128-NEXT: f32.lt $push4=, $5, $1 +; NO-SIMD128-NEXT: f32.select $push5=, $5, $1, $pop4 +; NO-SIMD128-NEXT: f32.store 0($0), $pop5 +; NO-SIMD128-NEXT: i32.const $push8=, 12 +; NO-SIMD128-NEXT: i32.add $push9=, $0, $pop8 +; NO-SIMD128-NEXT: f32.lt $push6=, $8, $4 +; NO-SIMD128-NEXT: f32.select $push7=, $8, $4, $pop6 +; NO-SIMD128-NEXT: f32.store 0($pop9), $pop7 +; NO-SIMD128-NEXT: return +; +; NO-SIMD128-FAST-LABEL: pmin_v4f32: +; NO-SIMD128-FAST: .functype pmin_v4f32 (i32, f32, f32, f32, f32, f32, f32, f32, f32) -> () +; NO-SIMD128-FAST-NEXT: # %bb.0: +; NO-SIMD128-FAST-NEXT: f32.lt $push0=, $5, $1 +; NO-SIMD128-FAST-NEXT: f32.select $push1=, $5, $1, $pop0 +; NO-SIMD128-FAST-NEXT: f32.store 0($0), $pop1 +; NO-SIMD128-FAST-NEXT: f32.lt $push2=, $6, $2 +; NO-SIMD128-FAST-NEXT: f32.select $push3=, $6, $2, $pop2 +; NO-SIMD128-FAST-NEXT: f32.store 4($0), $pop3 +; NO-SIMD128-FAST-NEXT: f32.lt $push4=, $7, $3 +; NO-SIMD128-FAST-NEXT: f32.select $push5=, $7, $3, $pop4 +; NO-SIMD128-FAST-NEXT: f32.store 8($0), $pop5 +; NO-SIMD128-FAST-NEXT: i32.const $push8=, 12 +; NO-SIMD128-FAST-NEXT: i32.add $push9=, $0, $pop8 +; NO-SIMD128-FAST-NEXT: f32.lt $push6=, $8, $4 +; NO-SIMD128-FAST-NEXT: f32.select $push7=, $8, $4, $pop6 +; NO-SIMD128-FAST-NEXT: f32.store 0($pop9), $pop7 +; NO-SIMD128-FAST-NEXT: return %c = fcmp olt <4 x float> %y, %x %a = select <4 x i1> %c, <4 x float> %y, <4 x float> %x ret <4 x float> %a } -; CHECK-LABEL: pmin_int_v4f32: -; NO-SIMD128-NOT: f32x4 -; SIMD128-NEXT: .functype pmin_int_v4f32 (v128, v128) -> (v128){{$}} -; SIMD128-NEXT: f32x4.pmin $push[[R:[0-9]+]]=, $0, $1{{$}} -; SIMD128-NEXT: return $pop[[R]]{{$}} define <4 x i32> @pmin_int_v4f32(<4 x i32> %x, <4 x i32> %y) { +; SIMD128-LABEL: pmin_int_v4f32: +; SIMD128: .functype pmin_int_v4f32 (v128, v128) -> (v128) +; SIMD128-NEXT: # %bb.0: +; SIMD128-NEXT: f32x4.pmin $push0=, $0, $1 +; SIMD128-NEXT: return $pop0 +; +; SIMD128-FAST-LABEL: pmin_int_v4f32: +; SIMD128-FAST: .functype pmin_int_v4f32 (v128, v128) -> (v128) +; SIMD128-FAST-NEXT: # %bb.0: +; SIMD128-FAST-NEXT: f32x4.pmin $push0=, $0, $1 +; SIMD128-FAST-NEXT: return $pop0 +; +; NO-SIMD128-LABEL: pmin_int_v4f32: +; NO-SIMD128: .functype pmin_int_v4f32 (i32, i32, i32, i32, i32, i32, i32, i32, i32) -> () +; NO-SIMD128-NEXT: # %bb.0: +; NO-SIMD128-NEXT: i32.const $push4=, 12 +; NO-SIMD128-NEXT: i32.add $push5=, $0, $pop4 +; NO-SIMD128-NEXT: f32.reinterpret_i32 $push1=, $8 +; NO-SIMD128-NEXT: f32.reinterpret_i32 $push0=, $4 +; NO-SIMD128-NEXT: f32.lt $push2=, $pop1, $pop0 +; NO-SIMD128-NEXT: i32.select $push3=, $8, $4, $pop2 +; NO-SIMD128-NEXT: i32.store 0($pop5), $pop3 +; NO-SIMD128-NEXT: f32.reinterpret_i32 $push7=, $7 +; NO-SIMD128-NEXT: f32.reinterpret_i32 $push6=, $3 +; NO-SIMD128-NEXT: f32.lt $push8=, $pop7, $pop6 +; NO-SIMD128-NEXT: i32.select $push9=, $7, $3, $pop8 +; NO-SIMD128-NEXT: i32.store 8($0), $pop9 +; NO-SIMD128-NEXT: f32.reinterpret_i32 $push11=, $6 +; NO-SIMD128-NEXT: f32.reinterpret_i32 $push10=, $2 +; NO-SIMD128-NEXT: f32.lt $push12=, $pop11, $pop10 +; NO-SIMD128-NEXT: i32.select $push13=, $6, $2, $pop12 +; NO-SIMD128-NEXT: i32.store 4($0), $pop13 +; NO-SIMD128-NEXT: f32.reinterpret_i32 $push15=, $5 +; NO-SIMD128-NEXT: f32.reinterpret_i32 $push14=, $1 +; NO-SIMD128-NEXT: f32.lt $push16=, $pop15, $pop14 +; NO-SIMD128-NEXT: i32.select $push17=, $5, $1, $pop16 +; NO-SIMD128-NEXT: i32.store 0($0), $pop17 +; NO-SIMD128-NEXT: return +; +; NO-SIMD128-FAST-LABEL: pmin_int_v4f32: +; NO-SIMD128-FAST: .functype pmin_int_v4f32 (i32, i32, i32, i32, i32, i32, i32, i32, i32) -> () +; NO-SIMD128-FAST-NEXT: # %bb.0: +; NO-SIMD128-FAST-NEXT: f32.reinterpret_i32 $push1=, $5 +; NO-SIMD128-FAST-NEXT: f32.reinterpret_i32 $push0=, $1 +; NO-SIMD128-FAST-NEXT: f32.lt $push2=, $pop1, $pop0 +; NO-SIMD128-FAST-NEXT: i32.select $push3=, $5, $1, $pop2 +; NO-SIMD128-FAST-NEXT: i32.store 0($0), $pop3 +; NO-SIMD128-FAST-NEXT: f32.reinterpret_i32 $push5=, $6 +; NO-SIMD128-FAST-NEXT: f32.reinterpret_i32 $push4=, $2 +; NO-SIMD128-FAST-NEXT: f32.lt $push6=, $pop5, $pop4 +; NO-SIMD128-FAST-NEXT: i32.select $push7=, $6, $2, $pop6 +; NO-SIMD128-FAST-NEXT: i32.store 4($0), $pop7 +; NO-SIMD128-FAST-NEXT: f32.reinterpret_i32 $push9=, $7 +; NO-SIMD128-FAST-NEXT: f32.reinterpret_i32 $push8=, $3 +; NO-SIMD128-FAST-NEXT: f32.lt $push10=, $pop9, $pop8 +; NO-SIMD128-FAST-NEXT: i32.select $push11=, $7, $3, $pop10 +; NO-SIMD128-FAST-NEXT: i32.store 8($0), $pop11 +; NO-SIMD128-FAST-NEXT: i32.const $push16=, 12 +; NO-SIMD128-FAST-NEXT: i32.add $push17=, $0, $pop16 +; NO-SIMD128-FAST-NEXT: f32.reinterpret_i32 $push13=, $8 +; NO-SIMD128-FAST-NEXT: f32.reinterpret_i32 $push12=, $4 +; NO-SIMD128-FAST-NEXT: f32.lt $push14=, $pop13, $pop12 +; NO-SIMD128-FAST-NEXT: i32.select $push15=, $8, $4, $pop14 +; NO-SIMD128-FAST-NEXT: i32.store 0($pop17), $pop15 +; NO-SIMD128-FAST-NEXT: return %fx = bitcast <4 x i32> %x to <4 x float> %fy = bitcast <4 x i32> %y to <4 x float> %c = fcmp olt <4 x float> %fy, %fx @@ -1747,23 +13760,127 @@ define <4 x i32> @pmin_int_v4f32(<4 x i32> %x, <4 x i32> %y) { ret <4 x i32> %a } -; CHECK-LABEL: pmax_v4f32: -; NO-SIMD128-NOT: f32x4 -; SIMD128-NEXT: .functype pmax_v4f32 (v128, v128) -> (v128){{$}} -; SIMD128-NEXT: f32x4.pmax $push[[R:[0-9]+]]=, $0, $1{{$}} -; SIMD128-NEXT: return $pop[[R]]{{$}} define <4 x float> @pmax_v4f32(<4 x float> %x, <4 x float> %y) { +; SIMD128-LABEL: pmax_v4f32: +; SIMD128: .functype pmax_v4f32 (v128, v128) -> (v128) +; SIMD128-NEXT: # %bb.0: +; SIMD128-NEXT: f32x4.pmax $push0=, $0, $1 +; SIMD128-NEXT: return $pop0 +; +; SIMD128-FAST-LABEL: pmax_v4f32: +; SIMD128-FAST: .functype pmax_v4f32 (v128, v128) -> (v128) +; SIMD128-FAST-NEXT: # %bb.0: +; SIMD128-FAST-NEXT: f32x4.pmax $push0=, $0, $1 +; SIMD128-FAST-NEXT: return $pop0 +; +; NO-SIMD128-LABEL: pmax_v4f32: +; NO-SIMD128: .functype pmax_v4f32 (i32, f32, f32, f32, f32, f32, f32, f32, f32) -> () +; NO-SIMD128-NEXT: # %bb.0: +; NO-SIMD128-NEXT: f32.lt $push0=, $3, $7 +; NO-SIMD128-NEXT: f32.select $push1=, $7, $3, $pop0 +; NO-SIMD128-NEXT: f32.store 8($0), $pop1 +; NO-SIMD128-NEXT: f32.lt $push2=, $2, $6 +; NO-SIMD128-NEXT: f32.select $push3=, $6, $2, $pop2 +; NO-SIMD128-NEXT: f32.store 4($0), $pop3 +; NO-SIMD128-NEXT: f32.lt $push4=, $1, $5 +; NO-SIMD128-NEXT: f32.select $push5=, $5, $1, $pop4 +; NO-SIMD128-NEXT: f32.store 0($0), $pop5 +; NO-SIMD128-NEXT: i32.const $push8=, 12 +; NO-SIMD128-NEXT: i32.add $push9=, $0, $pop8 +; NO-SIMD128-NEXT: f32.lt $push6=, $4, $8 +; NO-SIMD128-NEXT: f32.select $push7=, $8, $4, $pop6 +; NO-SIMD128-NEXT: f32.store 0($pop9), $pop7 +; NO-SIMD128-NEXT: return +; +; NO-SIMD128-FAST-LABEL: pmax_v4f32: +; NO-SIMD128-FAST: .functype pmax_v4f32 (i32, f32, f32, f32, f32, f32, f32, f32, f32) -> () +; NO-SIMD128-FAST-NEXT: # %bb.0: +; NO-SIMD128-FAST-NEXT: f32.lt $push0=, $1, $5 +; NO-SIMD128-FAST-NEXT: f32.select $push1=, $5, $1, $pop0 +; NO-SIMD128-FAST-NEXT: f32.store 0($0), $pop1 +; NO-SIMD128-FAST-NEXT: f32.lt $push2=, $2, $6 +; NO-SIMD128-FAST-NEXT: f32.select $push3=, $6, $2, $pop2 +; NO-SIMD128-FAST-NEXT: f32.store 4($0), $pop3 +; NO-SIMD128-FAST-NEXT: f32.lt $push4=, $3, $7 +; NO-SIMD128-FAST-NEXT: f32.select $push5=, $7, $3, $pop4 +; NO-SIMD128-FAST-NEXT: f32.store 8($0), $pop5 +; NO-SIMD128-FAST-NEXT: i32.const $push8=, 12 +; NO-SIMD128-FAST-NEXT: i32.add $push9=, $0, $pop8 +; NO-SIMD128-FAST-NEXT: f32.lt $push6=, $4, $8 +; NO-SIMD128-FAST-NEXT: f32.select $push7=, $8, $4, $pop6 +; NO-SIMD128-FAST-NEXT: f32.store 0($pop9), $pop7 +; NO-SIMD128-FAST-NEXT: return %c = fcmp olt <4 x float> %x, %y %a = select <4 x i1> %c, <4 x float> %y, <4 x float> %x ret <4 x float> %a } -; CHECK-LABEL: pmax_int_v4f32: -; NO-SIMD128-NOT: f32x4 -; SIMD128-NEXT: .functype pmax_int_v4f32 (v128, v128) -> (v128){{$}} -; SIMD128-NEXT: f32x4.pmax $push[[R:[0-9]+]]=, $0, $1{{$}} -; SIMD128-NEXT: return $pop[[R]]{{$}} define <4 x i32> @pmax_int_v4f32(<4 x i32> %x, <4 x i32> %y) { +; SIMD128-LABEL: pmax_int_v4f32: +; SIMD128: .functype pmax_int_v4f32 (v128, v128) -> (v128) +; SIMD128-NEXT: # %bb.0: +; SIMD128-NEXT: f32x4.pmax $push0=, $0, $1 +; SIMD128-NEXT: return $pop0 +; +; SIMD128-FAST-LABEL: pmax_int_v4f32: +; SIMD128-FAST: .functype pmax_int_v4f32 (v128, v128) -> (v128) +; SIMD128-FAST-NEXT: # %bb.0: +; SIMD128-FAST-NEXT: f32x4.pmax $push0=, $0, $1 +; SIMD128-FAST-NEXT: return $pop0 +; +; NO-SIMD128-LABEL: pmax_int_v4f32: +; NO-SIMD128: .functype pmax_int_v4f32 (i32, i32, i32, i32, i32, i32, i32, i32, i32) -> () +; NO-SIMD128-NEXT: # %bb.0: +; NO-SIMD128-NEXT: i32.const $push4=, 12 +; NO-SIMD128-NEXT: i32.add $push5=, $0, $pop4 +; NO-SIMD128-NEXT: f32.reinterpret_i32 $push1=, $4 +; NO-SIMD128-NEXT: f32.reinterpret_i32 $push0=, $8 +; NO-SIMD128-NEXT: f32.lt $push2=, $pop1, $pop0 +; NO-SIMD128-NEXT: i32.select $push3=, $8, $4, $pop2 +; NO-SIMD128-NEXT: i32.store 0($pop5), $pop3 +; NO-SIMD128-NEXT: f32.reinterpret_i32 $push7=, $3 +; NO-SIMD128-NEXT: f32.reinterpret_i32 $push6=, $7 +; NO-SIMD128-NEXT: f32.lt $push8=, $pop7, $pop6 +; NO-SIMD128-NEXT: i32.select $push9=, $7, $3, $pop8 +; NO-SIMD128-NEXT: i32.store 8($0), $pop9 +; NO-SIMD128-NEXT: f32.reinterpret_i32 $push11=, $2 +; NO-SIMD128-NEXT: f32.reinterpret_i32 $push10=, $6 +; NO-SIMD128-NEXT: f32.lt $push12=, $pop11, $pop10 +; NO-SIMD128-NEXT: i32.select $push13=, $6, $2, $pop12 +; NO-SIMD128-NEXT: i32.store 4($0), $pop13 +; NO-SIMD128-NEXT: f32.reinterpret_i32 $push15=, $1 +; NO-SIMD128-NEXT: f32.reinterpret_i32 $push14=, $5 +; NO-SIMD128-NEXT: f32.lt $push16=, $pop15, $pop14 +; NO-SIMD128-NEXT: i32.select $push17=, $5, $1, $pop16 +; NO-SIMD128-NEXT: i32.store 0($0), $pop17 +; NO-SIMD128-NEXT: return +; +; NO-SIMD128-FAST-LABEL: pmax_int_v4f32: +; NO-SIMD128-FAST: .functype pmax_int_v4f32 (i32, i32, i32, i32, i32, i32, i32, i32, i32) -> () +; NO-SIMD128-FAST-NEXT: # %bb.0: +; NO-SIMD128-FAST-NEXT: f32.reinterpret_i32 $push1=, $1 +; NO-SIMD128-FAST-NEXT: f32.reinterpret_i32 $push0=, $5 +; NO-SIMD128-FAST-NEXT: f32.lt $push2=, $pop1, $pop0 +; NO-SIMD128-FAST-NEXT: i32.select $push3=, $5, $1, $pop2 +; NO-SIMD128-FAST-NEXT: i32.store 0($0), $pop3 +; NO-SIMD128-FAST-NEXT: f32.reinterpret_i32 $push5=, $2 +; NO-SIMD128-FAST-NEXT: f32.reinterpret_i32 $push4=, $6 +; NO-SIMD128-FAST-NEXT: f32.lt $push6=, $pop5, $pop4 +; NO-SIMD128-FAST-NEXT: i32.select $push7=, $6, $2, $pop6 +; NO-SIMD128-FAST-NEXT: i32.store 4($0), $pop7 +; NO-SIMD128-FAST-NEXT: f32.reinterpret_i32 $push9=, $3 +; NO-SIMD128-FAST-NEXT: f32.reinterpret_i32 $push8=, $7 +; NO-SIMD128-FAST-NEXT: f32.lt $push10=, $pop9, $pop8 +; NO-SIMD128-FAST-NEXT: i32.select $push11=, $7, $3, $pop10 +; NO-SIMD128-FAST-NEXT: i32.store 8($0), $pop11 +; NO-SIMD128-FAST-NEXT: i32.const $push16=, 12 +; NO-SIMD128-FAST-NEXT: i32.add $push17=, $0, $pop16 +; NO-SIMD128-FAST-NEXT: f32.reinterpret_i32 $push13=, $4 +; NO-SIMD128-FAST-NEXT: f32.reinterpret_i32 $push12=, $8 +; NO-SIMD128-FAST-NEXT: f32.lt $push14=, $pop13, $pop12 +; NO-SIMD128-FAST-NEXT: i32.select $push15=, $8, $4, $pop14 +; NO-SIMD128-FAST-NEXT: i32.store 0($pop17), $pop15 +; NO-SIMD128-FAST-NEXT: return %fx = bitcast <4 x i32> %x to <4 x float> %fy = bitcast <4 x i32> %y to <4 x float> %c = fcmp olt <4 x float> %fx, %fy @@ -1771,53 +13888,233 @@ define <4 x i32> @pmax_int_v4f32(<4 x i32> %x, <4 x i32> %y) { ret <4 x i32> %a } -; CHECK-LABEL: add_v4f32: -; NO-SIMD128-NOT: f32x4 -; SIMD128-NEXT: .functype add_v4f32 (v128, v128) -> (v128){{$}} -; SIMD128-NEXT: f32x4.add $push[[R:[0-9]+]]=, $0, $1{{$}} -; SIMD128-NEXT: return $pop[[R]]{{$}} define <4 x float> @add_v4f32(<4 x float> %x, <4 x float> %y) { +; SIMD128-LABEL: add_v4f32: +; SIMD128: .functype add_v4f32 (v128, v128) -> (v128) +; SIMD128-NEXT: # %bb.0: +; SIMD128-NEXT: f32x4.add $push0=, $0, $1 +; SIMD128-NEXT: return $pop0 +; +; SIMD128-FAST-LABEL: add_v4f32: +; SIMD128-FAST: .functype add_v4f32 (v128, v128) -> (v128) +; SIMD128-FAST-NEXT: # %bb.0: +; SIMD128-FAST-NEXT: f32x4.add $push0=, $0, $1 +; SIMD128-FAST-NEXT: return $pop0 +; +; NO-SIMD128-LABEL: add_v4f32: +; NO-SIMD128: .functype add_v4f32 (i32, f32, f32, f32, f32, f32, f32, f32, f32) -> () +; NO-SIMD128-NEXT: # %bb.0: +; NO-SIMD128-NEXT: f32.add $push0=, $3, $7 +; NO-SIMD128-NEXT: f32.store 8($0), $pop0 +; NO-SIMD128-NEXT: f32.add $push1=, $2, $6 +; NO-SIMD128-NEXT: f32.store 4($0), $pop1 +; NO-SIMD128-NEXT: f32.add $push2=, $1, $5 +; NO-SIMD128-NEXT: f32.store 0($0), $pop2 +; NO-SIMD128-NEXT: i32.const $push4=, 12 +; NO-SIMD128-NEXT: i32.add $push5=, $0, $pop4 +; NO-SIMD128-NEXT: f32.add $push3=, $4, $8 +; NO-SIMD128-NEXT: f32.store 0($pop5), $pop3 +; NO-SIMD128-NEXT: return +; +; NO-SIMD128-FAST-LABEL: add_v4f32: +; NO-SIMD128-FAST: .functype add_v4f32 (i32, f32, f32, f32, f32, f32, f32, f32, f32) -> () +; NO-SIMD128-FAST-NEXT: # %bb.0: +; NO-SIMD128-FAST-NEXT: f32.add $push0=, $1, $5 +; NO-SIMD128-FAST-NEXT: f32.store 0($0), $pop0 +; NO-SIMD128-FAST-NEXT: f32.add $push1=, $2, $6 +; NO-SIMD128-FAST-NEXT: f32.store 4($0), $pop1 +; NO-SIMD128-FAST-NEXT: f32.add $push2=, $3, $7 +; NO-SIMD128-FAST-NEXT: f32.store 8($0), $pop2 +; NO-SIMD128-FAST-NEXT: i32.const $push3=, 12 +; NO-SIMD128-FAST-NEXT: i32.add $push4=, $0, $pop3 +; NO-SIMD128-FAST-NEXT: f32.add $push5=, $4, $8 +; NO-SIMD128-FAST-NEXT: f32.store 0($pop4), $pop5 +; NO-SIMD128-FAST-NEXT: return %a = fadd <4 x float> %x, %y ret <4 x float> %a } -; CHECK-LABEL: sub_v4f32: -; NO-SIMD128-NOT: f32x4 -; SIMD128-NEXT: .functype sub_v4f32 (v128, v128) -> (v128){{$}} -; SIMD128-NEXT: f32x4.sub $push[[R:[0-9]+]]=, $0, $1{{$}} -; SIMD128-NEXT: return $pop[[R]]{{$}} define <4 x float> @sub_v4f32(<4 x float> %x, <4 x float> %y) { +; SIMD128-LABEL: sub_v4f32: +; SIMD128: .functype sub_v4f32 (v128, v128) -> (v128) +; SIMD128-NEXT: # %bb.0: +; SIMD128-NEXT: f32x4.sub $push0=, $0, $1 +; SIMD128-NEXT: return $pop0 +; +; SIMD128-FAST-LABEL: sub_v4f32: +; SIMD128-FAST: .functype sub_v4f32 (v128, v128) -> (v128) +; SIMD128-FAST-NEXT: # %bb.0: +; SIMD128-FAST-NEXT: f32x4.sub $push0=, $0, $1 +; SIMD128-FAST-NEXT: return $pop0 +; +; NO-SIMD128-LABEL: sub_v4f32: +; NO-SIMD128: .functype sub_v4f32 (i32, f32, f32, f32, f32, f32, f32, f32, f32) -> () +; NO-SIMD128-NEXT: # %bb.0: +; NO-SIMD128-NEXT: f32.sub $push0=, $3, $7 +; NO-SIMD128-NEXT: f32.store 8($0), $pop0 +; NO-SIMD128-NEXT: f32.sub $push1=, $2, $6 +; NO-SIMD128-NEXT: f32.store 4($0), $pop1 +; NO-SIMD128-NEXT: f32.sub $push2=, $1, $5 +; NO-SIMD128-NEXT: f32.store 0($0), $pop2 +; NO-SIMD128-NEXT: i32.const $push4=, 12 +; NO-SIMD128-NEXT: i32.add $push5=, $0, $pop4 +; NO-SIMD128-NEXT: f32.sub $push3=, $4, $8 +; NO-SIMD128-NEXT: f32.store 0($pop5), $pop3 +; NO-SIMD128-NEXT: return +; +; NO-SIMD128-FAST-LABEL: sub_v4f32: +; NO-SIMD128-FAST: .functype sub_v4f32 (i32, f32, f32, f32, f32, f32, f32, f32, f32) -> () +; NO-SIMD128-FAST-NEXT: # %bb.0: +; NO-SIMD128-FAST-NEXT: f32.sub $push0=, $1, $5 +; NO-SIMD128-FAST-NEXT: f32.store 0($0), $pop0 +; NO-SIMD128-FAST-NEXT: f32.sub $push1=, $2, $6 +; NO-SIMD128-FAST-NEXT: f32.store 4($0), $pop1 +; NO-SIMD128-FAST-NEXT: f32.sub $push2=, $3, $7 +; NO-SIMD128-FAST-NEXT: f32.store 8($0), $pop2 +; NO-SIMD128-FAST-NEXT: i32.const $push3=, 12 +; NO-SIMD128-FAST-NEXT: i32.add $push4=, $0, $pop3 +; NO-SIMD128-FAST-NEXT: f32.sub $push5=, $4, $8 +; NO-SIMD128-FAST-NEXT: f32.store 0($pop4), $pop5 +; NO-SIMD128-FAST-NEXT: return %a = fsub <4 x float> %x, %y ret <4 x float> %a } -; CHECK-LABEL: div_v4f32: -; NO-SIMD128-NOT: f32x4 -; SIMD128-NEXT: .functype div_v4f32 (v128, v128) -> (v128){{$}} -; SIMD128-NEXT: f32x4.div $push[[R:[0-9]+]]=, $0, $1{{$}} -; SIMD128-NEXT: return $pop[[R]]{{$}} define <4 x float> @div_v4f32(<4 x float> %x, <4 x float> %y) { +; SIMD128-LABEL: div_v4f32: +; SIMD128: .functype div_v4f32 (v128, v128) -> (v128) +; SIMD128-NEXT: # %bb.0: +; SIMD128-NEXT: f32x4.div $push0=, $0, $1 +; SIMD128-NEXT: return $pop0 +; +; SIMD128-FAST-LABEL: div_v4f32: +; SIMD128-FAST: .functype div_v4f32 (v128, v128) -> (v128) +; SIMD128-FAST-NEXT: # %bb.0: +; SIMD128-FAST-NEXT: f32x4.div $push0=, $0, $1 +; SIMD128-FAST-NEXT: return $pop0 +; +; NO-SIMD128-LABEL: div_v4f32: +; NO-SIMD128: .functype div_v4f32 (i32, f32, f32, f32, f32, f32, f32, f32, f32) -> () +; NO-SIMD128-NEXT: # %bb.0: +; NO-SIMD128-NEXT: f32.div $push0=, $3, $7 +; NO-SIMD128-NEXT: f32.store 8($0), $pop0 +; NO-SIMD128-NEXT: f32.div $push1=, $2, $6 +; NO-SIMD128-NEXT: f32.store 4($0), $pop1 +; NO-SIMD128-NEXT: f32.div $push2=, $1, $5 +; NO-SIMD128-NEXT: f32.store 0($0), $pop2 +; NO-SIMD128-NEXT: i32.const $push4=, 12 +; NO-SIMD128-NEXT: i32.add $push5=, $0, $pop4 +; NO-SIMD128-NEXT: f32.div $push3=, $4, $8 +; NO-SIMD128-NEXT: f32.store 0($pop5), $pop3 +; NO-SIMD128-NEXT: return +; +; NO-SIMD128-FAST-LABEL: div_v4f32: +; NO-SIMD128-FAST: .functype div_v4f32 (i32, f32, f32, f32, f32, f32, f32, f32, f32) -> () +; NO-SIMD128-FAST-NEXT: # %bb.0: +; NO-SIMD128-FAST-NEXT: f32.div $push0=, $1, $5 +; NO-SIMD128-FAST-NEXT: f32.store 0($0), $pop0 +; NO-SIMD128-FAST-NEXT: f32.div $push1=, $2, $6 +; NO-SIMD128-FAST-NEXT: f32.store 4($0), $pop1 +; NO-SIMD128-FAST-NEXT: f32.div $push2=, $3, $7 +; NO-SIMD128-FAST-NEXT: f32.store 8($0), $pop2 +; NO-SIMD128-FAST-NEXT: i32.const $push3=, 12 +; NO-SIMD128-FAST-NEXT: i32.add $push4=, $0, $pop3 +; NO-SIMD128-FAST-NEXT: f32.div $push5=, $4, $8 +; NO-SIMD128-FAST-NEXT: f32.store 0($pop4), $pop5 +; NO-SIMD128-FAST-NEXT: return %a = fdiv <4 x float> %x, %y ret <4 x float> %a } -; CHECK-LABEL: mul_v4f32: -; NO-SIMD128-NOT: f32x4 -; SIMD128-NEXT: .functype mul_v4f32 (v128, v128) -> (v128){{$}} -; SIMD128-NEXT: f32x4.mul $push[[R:[0-9]+]]=, $0, $1{{$}} -; SIMD128-NEXT: return $pop[[R]]{{$}} define <4 x float> @mul_v4f32(<4 x float> %x, <4 x float> %y) { +; SIMD128-LABEL: mul_v4f32: +; SIMD128: .functype mul_v4f32 (v128, v128) -> (v128) +; SIMD128-NEXT: # %bb.0: +; SIMD128-NEXT: f32x4.mul $push0=, $0, $1 +; SIMD128-NEXT: return $pop0 +; +; SIMD128-FAST-LABEL: mul_v4f32: +; SIMD128-FAST: .functype mul_v4f32 (v128, v128) -> (v128) +; SIMD128-FAST-NEXT: # %bb.0: +; SIMD128-FAST-NEXT: f32x4.mul $push0=, $0, $1 +; SIMD128-FAST-NEXT: return $pop0 +; +; NO-SIMD128-LABEL: mul_v4f32: +; NO-SIMD128: .functype mul_v4f32 (i32, f32, f32, f32, f32, f32, f32, f32, f32) -> () +; NO-SIMD128-NEXT: # %bb.0: +; NO-SIMD128-NEXT: f32.mul $push0=, $3, $7 +; NO-SIMD128-NEXT: f32.store 8($0), $pop0 +; NO-SIMD128-NEXT: f32.mul $push1=, $2, $6 +; NO-SIMD128-NEXT: f32.store 4($0), $pop1 +; NO-SIMD128-NEXT: f32.mul $push2=, $1, $5 +; NO-SIMD128-NEXT: f32.store 0($0), $pop2 +; NO-SIMD128-NEXT: i32.const $push4=, 12 +; NO-SIMD128-NEXT: i32.add $push5=, $0, $pop4 +; NO-SIMD128-NEXT: f32.mul $push3=, $4, $8 +; NO-SIMD128-NEXT: f32.store 0($pop5), $pop3 +; NO-SIMD128-NEXT: return +; +; NO-SIMD128-FAST-LABEL: mul_v4f32: +; NO-SIMD128-FAST: .functype mul_v4f32 (i32, f32, f32, f32, f32, f32, f32, f32, f32) -> () +; NO-SIMD128-FAST-NEXT: # %bb.0: +; NO-SIMD128-FAST-NEXT: f32.mul $push0=, $1, $5 +; NO-SIMD128-FAST-NEXT: f32.store 0($0), $pop0 +; NO-SIMD128-FAST-NEXT: f32.mul $push1=, $2, $6 +; NO-SIMD128-FAST-NEXT: f32.store 4($0), $pop1 +; NO-SIMD128-FAST-NEXT: f32.mul $push2=, $3, $7 +; NO-SIMD128-FAST-NEXT: f32.store 8($0), $pop2 +; NO-SIMD128-FAST-NEXT: i32.const $push3=, 12 +; NO-SIMD128-FAST-NEXT: i32.add $push4=, $0, $pop3 +; NO-SIMD128-FAST-NEXT: f32.mul $push5=, $4, $8 +; NO-SIMD128-FAST-NEXT: f32.store 0($pop4), $pop5 +; NO-SIMD128-FAST-NEXT: return %a = fmul <4 x float> %x, %y ret <4 x float> %a } -; CHECK-LABEL: sqrt_v4f32: -; NO-SIMD128-NOT: f32x4 -; SIMD128-NEXT: .functype sqrt_v4f32 (v128) -> (v128){{$}} -; SIMD128-NEXT: f32x4.sqrt $push[[R:[0-9]+]]=, $0{{$}} -; SIMD128-NEXT: return $pop[[R]]{{$}} declare <4 x float> @llvm.sqrt.v4f32(<4 x float> %x) define <4 x float> @sqrt_v4f32(<4 x float> %x) { +; SIMD128-LABEL: sqrt_v4f32: +; SIMD128: .functype sqrt_v4f32 (v128) -> (v128) +; SIMD128-NEXT: # %bb.0: +; SIMD128-NEXT: f32x4.sqrt $push0=, $0 +; SIMD128-NEXT: return $pop0 +; +; SIMD128-FAST-LABEL: sqrt_v4f32: +; SIMD128-FAST: .functype sqrt_v4f32 (v128) -> (v128) +; SIMD128-FAST-NEXT: # %bb.0: +; SIMD128-FAST-NEXT: f32x4.sqrt $push0=, $0 +; SIMD128-FAST-NEXT: return $pop0 +; +; NO-SIMD128-LABEL: sqrt_v4f32: +; NO-SIMD128: .functype sqrt_v4f32 (i32, f32, f32, f32, f32) -> () +; NO-SIMD128-NEXT: # %bb.0: +; NO-SIMD128-NEXT: f32.sqrt $push0=, $3 +; NO-SIMD128-NEXT: f32.store 8($0), $pop0 +; NO-SIMD128-NEXT: f32.sqrt $push1=, $2 +; NO-SIMD128-NEXT: f32.store 4($0), $pop1 +; NO-SIMD128-NEXT: f32.sqrt $push2=, $1 +; NO-SIMD128-NEXT: f32.store 0($0), $pop2 +; NO-SIMD128-NEXT: i32.const $push3=, 12 +; NO-SIMD128-NEXT: i32.add $push4=, $0, $pop3 +; NO-SIMD128-NEXT: f32.sqrt $push5=, $4 +; NO-SIMD128-NEXT: f32.store 0($pop4), $pop5 +; NO-SIMD128-NEXT: return +; +; NO-SIMD128-FAST-LABEL: sqrt_v4f32: +; NO-SIMD128-FAST: .functype sqrt_v4f32 (i32, f32, f32, f32, f32) -> () +; NO-SIMD128-FAST-NEXT: # %bb.0: +; NO-SIMD128-FAST-NEXT: f32.sqrt $push0=, $1 +; NO-SIMD128-FAST-NEXT: f32.store 0($0), $pop0 +; NO-SIMD128-FAST-NEXT: f32.sqrt $push1=, $2 +; NO-SIMD128-FAST-NEXT: f32.store 4($0), $pop1 +; NO-SIMD128-FAST-NEXT: f32.sqrt $push2=, $3 +; NO-SIMD128-FAST-NEXT: f32.store 8($0), $pop2 +; NO-SIMD128-FAST-NEXT: i32.const $push3=, 12 +; NO-SIMD128-FAST-NEXT: i32.add $push4=, $0, $pop3 +; NO-SIMD128-FAST-NEXT: f32.sqrt $push5=, $4 +; NO-SIMD128-FAST-NEXT: f32.store 0($pop4), $pop5 +; NO-SIMD128-FAST-NEXT: return %a = call <4 x float> @llvm.sqrt.v4f32(<4 x float> %x) ret <4 x float> %a } @@ -1825,108 +14122,344 @@ define <4 x float> @sqrt_v4f32(<4 x float> %x) { ; ============================================================================== ; 2 x double ; ============================================================================== -; CHECK-LABEL: neg_v2f64: -; NO-SIMD128-NOT: f64x2 -; SIMD128-NEXT: .functype neg_v2f64 (v128) -> (v128){{$}} -; SIMD128-NEXT: f64x2.neg $push[[R:[0-9]+]]=, $0{{$}} -; SIMD128-NEXT: return $pop[[R]]{{$}} define <2 x double> @neg_v2f64(<2 x double> %x) { ; nsz makes this semantically equivalent to flipping sign bit +; SIMD128-LABEL: neg_v2f64: +; SIMD128: .functype neg_v2f64 (v128) -> (v128) +; SIMD128-NEXT: # %bb.0: +; SIMD128-NEXT: f64x2.neg $push0=, $0 +; SIMD128-NEXT: return $pop0 +; +; SIMD128-FAST-LABEL: neg_v2f64: +; SIMD128-FAST: .functype neg_v2f64 (v128) -> (v128) +; SIMD128-FAST-NEXT: # %bb.0: +; SIMD128-FAST-NEXT: f64x2.neg $push0=, $0 +; SIMD128-FAST-NEXT: return $pop0 +; +; NO-SIMD128-LABEL: neg_v2f64: +; NO-SIMD128: .functype neg_v2f64 (i32, f64, f64) -> () +; NO-SIMD128-NEXT: # %bb.0: +; NO-SIMD128-NEXT: f64.neg $push0=, $2 +; NO-SIMD128-NEXT: f64.store 8($0), $pop0 +; NO-SIMD128-NEXT: f64.neg $push1=, $1 +; NO-SIMD128-NEXT: f64.store 0($0), $pop1 +; NO-SIMD128-NEXT: return +; +; NO-SIMD128-FAST-LABEL: neg_v2f64: +; NO-SIMD128-FAST: .functype neg_v2f64 (i32, f64, f64) -> () +; NO-SIMD128-FAST-NEXT: # %bb.0: +; NO-SIMD128-FAST-NEXT: f64.neg $push0=, $1 +; NO-SIMD128-FAST-NEXT: f64.store 0($0), $pop0 +; NO-SIMD128-FAST-NEXT: f64.neg $push1=, $2 +; NO-SIMD128-FAST-NEXT: f64.store 8($0), $pop1 +; NO-SIMD128-FAST-NEXT: return %a = fsub nsz <2 x double> , %x ret <2 x double> %a } -; CHECK-LABEL: abs_v2f64: -; NO-SIMD128-NOT: f64x2 -; SIMD128-NEXT: .functype abs_v2f64 (v128) -> (v128){{$}} -; SIMD128-NEXT: f64x2.abs $push[[R:[0-9]+]]=, $0{{$}} -; SIMD128-NEXT: return $pop[[R]]{{$}} declare <2 x double> @llvm.fabs.v2f64(<2 x double>) nounwind readnone define <2 x double> @abs_v2f64(<2 x double> %x) { +; SIMD128-LABEL: abs_v2f64: +; SIMD128: .functype abs_v2f64 (v128) -> (v128) +; SIMD128-NEXT: # %bb.0: +; SIMD128-NEXT: f64x2.abs $push0=, $0 +; SIMD128-NEXT: return $pop0 +; +; SIMD128-FAST-LABEL: abs_v2f64: +; SIMD128-FAST: .functype abs_v2f64 (v128) -> (v128) +; SIMD128-FAST-NEXT: # %bb.0: +; SIMD128-FAST-NEXT: f64x2.abs $push0=, $0 +; SIMD128-FAST-NEXT: return $pop0 +; +; NO-SIMD128-LABEL: abs_v2f64: +; NO-SIMD128: .functype abs_v2f64 (i32, f64, f64) -> () +; NO-SIMD128-NEXT: # %bb.0: +; NO-SIMD128-NEXT: f64.abs $push0=, $2 +; NO-SIMD128-NEXT: f64.store 8($0), $pop0 +; NO-SIMD128-NEXT: f64.abs $push1=, $1 +; NO-SIMD128-NEXT: f64.store 0($0), $pop1 +; NO-SIMD128-NEXT: return +; +; NO-SIMD128-FAST-LABEL: abs_v2f64: +; NO-SIMD128-FAST: .functype abs_v2f64 (i32, f64, f64) -> () +; NO-SIMD128-FAST-NEXT: # %bb.0: +; NO-SIMD128-FAST-NEXT: f64.abs $push0=, $1 +; NO-SIMD128-FAST-NEXT: f64.store 0($0), $pop0 +; NO-SIMD128-FAST-NEXT: f64.abs $push1=, $2 +; NO-SIMD128-FAST-NEXT: f64.store 8($0), $pop1 +; NO-SIMD128-FAST-NEXT: return %a = call <2 x double> @llvm.fabs.v2f64(<2 x double> %x) ret <2 x double> %a } -; CHECK-LABEL: min_unordered_v2f64: -; NO-SIMD128-NOT: f64x2 -; SIMD128-NEXT: .functype min_unordered_v2f64 (v128) -> (v128){{$}} -; SIMD128-NEXT: v128.const $push[[L0:[0-9]+]]=, 0x1.4p2, 0x1.4p2{{$}} -; SIMD128-NEXT: f64x2.min $push[[R:[0-9]+]]=, $0, $pop[[L0]]{{$}} -; SIMD128-NEXT: return $pop[[R]]{{$}} define <2 x double> @min_unordered_v2f64(<2 x double> %x) { +; SIMD128-LABEL: min_unordered_v2f64: +; SIMD128: .functype min_unordered_v2f64 (v128) -> (v128) +; SIMD128-NEXT: # %bb.0: +; SIMD128-NEXT: v128.const $push0=, 0x1.4p2, 0x1.4p2 +; SIMD128-NEXT: f64x2.min $push1=, $0, $pop0 +; SIMD128-NEXT: return $pop1 +; +; SIMD128-FAST-LABEL: min_unordered_v2f64: +; SIMD128-FAST: .functype min_unordered_v2f64 (v128) -> (v128) +; SIMD128-FAST-NEXT: # %bb.0: +; SIMD128-FAST-NEXT: v128.const $push1=, 0x1.4p2, 0x1.4p2 +; SIMD128-FAST-NEXT: f64x2.min $push0=, $0, $pop1 +; SIMD128-FAST-NEXT: return $pop0 +; +; NO-SIMD128-LABEL: min_unordered_v2f64: +; NO-SIMD128: .functype min_unordered_v2f64 (i32, f64, f64) -> () +; NO-SIMD128-NEXT: # %bb.0: +; NO-SIMD128-NEXT: f64.const $push0=, 0x1.4p2 +; NO-SIMD128-NEXT: f64.min $push1=, $2, $pop0 +; NO-SIMD128-NEXT: f64.store 8($0), $pop1 +; NO-SIMD128-NEXT: f64.const $push3=, 0x1.4p2 +; NO-SIMD128-NEXT: f64.min $push2=, $1, $pop3 +; NO-SIMD128-NEXT: f64.store 0($0), $pop2 +; NO-SIMD128-NEXT: return +; +; NO-SIMD128-FAST-LABEL: min_unordered_v2f64: +; NO-SIMD128-FAST: .functype min_unordered_v2f64 (i32, f64, f64) -> () +; NO-SIMD128-FAST-NEXT: # %bb.0: +; NO-SIMD128-FAST-NEXT: f64.const $push0=, 0x1.4p2 +; NO-SIMD128-FAST-NEXT: f64.min $push1=, $1, $pop0 +; NO-SIMD128-FAST-NEXT: f64.store 0($0), $pop1 +; NO-SIMD128-FAST-NEXT: f64.const $push3=, 0x1.4p2 +; NO-SIMD128-FAST-NEXT: f64.min $push2=, $2, $pop3 +; NO-SIMD128-FAST-NEXT: f64.store 8($0), $pop2 +; NO-SIMD128-FAST-NEXT: return %cmps = fcmp ule <2 x double> %x, %a = select <2 x i1> %cmps, <2 x double> %x, <2 x double> ret <2 x double> %a } -; CHECK-LABEL: max_unordered_v2f64: -; NO-SIMD128-NOT: f64x2 -; SIMD128-NEXT: .functype max_unordered_v2f64 (v128) -> (v128){{$}} -; SIMD128-NEXT: v128.const $push[[L0:[0-9]+]]=, 0x1.4p2, 0x1.4p2{{$}} -; SIMD128-NEXT: f64x2.max $push[[R:[0-9]+]]=, $0, $pop[[L0]]{{$}} -; SIMD128-NEXT: return $pop[[R]]{{$}} define <2 x double> @max_unordered_v2f64(<2 x double> %x) { +; SIMD128-LABEL: max_unordered_v2f64: +; SIMD128: .functype max_unordered_v2f64 (v128) -> (v128) +; SIMD128-NEXT: # %bb.0: +; SIMD128-NEXT: v128.const $push0=, 0x1.4p2, 0x1.4p2 +; SIMD128-NEXT: f64x2.max $push1=, $0, $pop0 +; SIMD128-NEXT: return $pop1 +; +; SIMD128-FAST-LABEL: max_unordered_v2f64: +; SIMD128-FAST: .functype max_unordered_v2f64 (v128) -> (v128) +; SIMD128-FAST-NEXT: # %bb.0: +; SIMD128-FAST-NEXT: v128.const $push1=, 0x1.4p2, 0x1.4p2 +; SIMD128-FAST-NEXT: f64x2.max $push0=, $0, $pop1 +; SIMD128-FAST-NEXT: return $pop0 +; +; NO-SIMD128-LABEL: max_unordered_v2f64: +; NO-SIMD128: .functype max_unordered_v2f64 (i32, f64, f64) -> () +; NO-SIMD128-NEXT: # %bb.0: +; NO-SIMD128-NEXT: f64.const $push0=, 0x1.4p2 +; NO-SIMD128-NEXT: f64.max $push1=, $2, $pop0 +; NO-SIMD128-NEXT: f64.store 8($0), $pop1 +; NO-SIMD128-NEXT: f64.const $push3=, 0x1.4p2 +; NO-SIMD128-NEXT: f64.max $push2=, $1, $pop3 +; NO-SIMD128-NEXT: f64.store 0($0), $pop2 +; NO-SIMD128-NEXT: return +; +; NO-SIMD128-FAST-LABEL: max_unordered_v2f64: +; NO-SIMD128-FAST: .functype max_unordered_v2f64 (i32, f64, f64) -> () +; NO-SIMD128-FAST-NEXT: # %bb.0: +; NO-SIMD128-FAST-NEXT: f64.const $push0=, 0x1.4p2 +; NO-SIMD128-FAST-NEXT: f64.max $push1=, $1, $pop0 +; NO-SIMD128-FAST-NEXT: f64.store 0($0), $pop1 +; NO-SIMD128-FAST-NEXT: f64.const $push3=, 0x1.4p2 +; NO-SIMD128-FAST-NEXT: f64.max $push2=, $2, $pop3 +; NO-SIMD128-FAST-NEXT: f64.store 8($0), $pop2 +; NO-SIMD128-FAST-NEXT: return %cmps = fcmp uge <2 x double> %x, %a = select <2 x i1> %cmps, <2 x double> %x, <2 x double> ret <2 x double> %a } -; CHECK-LABEL: min_ordered_v2f64: -; NO-SIMD128-NOT: f64x2 -; SIMD128-NEXT: .functype min_ordered_v2f64 (v128) -> (v128){{$}} -; SIMD128-NEXT: v128.const $push[[L0:[0-9]+]]=, 0x1.4p2, 0x1.4p2{{$}} -; SIMD128-NEXT: f64x2.min $push[[R:[0-9]+]]=, $0, $pop[[L0]]{{$}} -; SIMD128-NEXT: return $pop[[R]]{{$}} define <2 x double> @min_ordered_v2f64(<2 x double> %x) { +; SIMD128-LABEL: min_ordered_v2f64: +; SIMD128: .functype min_ordered_v2f64 (v128) -> (v128) +; SIMD128-NEXT: # %bb.0: +; SIMD128-NEXT: v128.const $push0=, 0x1.4p2, 0x1.4p2 +; SIMD128-NEXT: f64x2.min $push1=, $0, $pop0 +; SIMD128-NEXT: return $pop1 +; +; SIMD128-FAST-LABEL: min_ordered_v2f64: +; SIMD128-FAST: .functype min_ordered_v2f64 (v128) -> (v128) +; SIMD128-FAST-NEXT: # %bb.0: +; SIMD128-FAST-NEXT: v128.const $push1=, 0x1.4p2, 0x1.4p2 +; SIMD128-FAST-NEXT: f64x2.min $push0=, $0, $pop1 +; SIMD128-FAST-NEXT: return $pop0 +; +; NO-SIMD128-LABEL: min_ordered_v2f64: +; NO-SIMD128: .functype min_ordered_v2f64 (i32, f64, f64) -> () +; NO-SIMD128-NEXT: # %bb.0: +; NO-SIMD128-NEXT: f64.const $push0=, 0x1.4p2 +; NO-SIMD128-NEXT: f64.min $push1=, $2, $pop0 +; NO-SIMD128-NEXT: f64.store 8($0), $pop1 +; NO-SIMD128-NEXT: f64.const $push3=, 0x1.4p2 +; NO-SIMD128-NEXT: f64.min $push2=, $1, $pop3 +; NO-SIMD128-NEXT: f64.store 0($0), $pop2 +; NO-SIMD128-NEXT: return +; +; NO-SIMD128-FAST-LABEL: min_ordered_v2f64: +; NO-SIMD128-FAST: .functype min_ordered_v2f64 (i32, f64, f64) -> () +; NO-SIMD128-FAST-NEXT: # %bb.0: +; NO-SIMD128-FAST-NEXT: f64.const $push0=, 0x1.4p2 +; NO-SIMD128-FAST-NEXT: f64.min $push1=, $1, $pop0 +; NO-SIMD128-FAST-NEXT: f64.store 0($0), $pop1 +; NO-SIMD128-FAST-NEXT: f64.const $push3=, 0x1.4p2 +; NO-SIMD128-FAST-NEXT: f64.min $push2=, $2, $pop3 +; NO-SIMD128-FAST-NEXT: f64.store 8($0), $pop2 +; NO-SIMD128-FAST-NEXT: return %cmps = fcmp ole <2 x double> , %x %a = select <2 x i1> %cmps, <2 x double> , <2 x double> %x ret <2 x double> %a } -; CHECK-LABEL: max_ordered_v2f64: -; NO-SIMD128-NOT: f64x2 -; SIMD128-NEXT: .functype max_ordered_v2f64 (v128) -> (v128){{$}} -; SIMD128-NEXT: v128.const $push[[L0:[0-9]+]]=, 0x1.4p2, 0x1.4p2{{$}} -; SIMD128-NEXT: f64x2.max $push[[R:[0-9]+]]=, $0, $pop[[L0]]{{$}} -; SIMD128-NEXT: return $pop[[R]]{{$}} define <2 x double> @max_ordered_v2f64(<2 x double> %x) { +; SIMD128-LABEL: max_ordered_v2f64: +; SIMD128: .functype max_ordered_v2f64 (v128) -> (v128) +; SIMD128-NEXT: # %bb.0: +; SIMD128-NEXT: v128.const $push0=, 0x1.4p2, 0x1.4p2 +; SIMD128-NEXT: f64x2.max $push1=, $0, $pop0 +; SIMD128-NEXT: return $pop1 +; +; SIMD128-FAST-LABEL: max_ordered_v2f64: +; SIMD128-FAST: .functype max_ordered_v2f64 (v128) -> (v128) +; SIMD128-FAST-NEXT: # %bb.0: +; SIMD128-FAST-NEXT: v128.const $push1=, 0x1.4p2, 0x1.4p2 +; SIMD128-FAST-NEXT: f64x2.max $push0=, $0, $pop1 +; SIMD128-FAST-NEXT: return $pop0 +; +; NO-SIMD128-LABEL: max_ordered_v2f64: +; NO-SIMD128: .functype max_ordered_v2f64 (i32, f64, f64) -> () +; NO-SIMD128-NEXT: # %bb.0: +; NO-SIMD128-NEXT: f64.const $push0=, 0x1.4p2 +; NO-SIMD128-NEXT: f64.max $push1=, $2, $pop0 +; NO-SIMD128-NEXT: f64.store 8($0), $pop1 +; NO-SIMD128-NEXT: f64.const $push3=, 0x1.4p2 +; NO-SIMD128-NEXT: f64.max $push2=, $1, $pop3 +; NO-SIMD128-NEXT: f64.store 0($0), $pop2 +; NO-SIMD128-NEXT: return +; +; NO-SIMD128-FAST-LABEL: max_ordered_v2f64: +; NO-SIMD128-FAST: .functype max_ordered_v2f64 (i32, f64, f64) -> () +; NO-SIMD128-FAST-NEXT: # %bb.0: +; NO-SIMD128-FAST-NEXT: f64.const $push0=, 0x1.4p2 +; NO-SIMD128-FAST-NEXT: f64.max $push1=, $1, $pop0 +; NO-SIMD128-FAST-NEXT: f64.store 0($0), $pop1 +; NO-SIMD128-FAST-NEXT: f64.const $push3=, 0x1.4p2 +; NO-SIMD128-FAST-NEXT: f64.max $push2=, $2, $pop3 +; NO-SIMD128-FAST-NEXT: f64.store 8($0), $pop2 +; NO-SIMD128-FAST-NEXT: return %cmps = fcmp oge <2 x double> , %x %a = select <2 x i1> %cmps, <2 x double> , <2 x double> %x ret <2 x double> %a } -; CHECK-LABEL: min_intrinsic_v2f64: -; NO-SIMD128-NOT: f64x2 -; SIMD128-NEXT: .functype min_intrinsic_v2f64 (v128, v128) -> (v128){{$}} -; SIMD128-NEXT: f64x2.min $push[[R:[0-9]+]]=, $0, $1{{$}} -; SIMD128-NEXT: return $pop[[R]]{{$}} declare <2 x double> @llvm.minimum.v2f64(<2 x double>, <2 x double>) define <2 x double> @min_intrinsic_v2f64(<2 x double> %x, <2 x double> %y) { +; SIMD128-LABEL: min_intrinsic_v2f64: +; SIMD128: .functype min_intrinsic_v2f64 (v128, v128) -> (v128) +; SIMD128-NEXT: # %bb.0: +; SIMD128-NEXT: f64x2.min $push0=, $0, $1 +; SIMD128-NEXT: return $pop0 +; +; SIMD128-FAST-LABEL: min_intrinsic_v2f64: +; SIMD128-FAST: .functype min_intrinsic_v2f64 (v128, v128) -> (v128) +; SIMD128-FAST-NEXT: # %bb.0: +; SIMD128-FAST-NEXT: f64x2.min $push0=, $0, $1 +; SIMD128-FAST-NEXT: return $pop0 +; +; NO-SIMD128-LABEL: min_intrinsic_v2f64: +; NO-SIMD128: .functype min_intrinsic_v2f64 (i32, f64, f64, f64, f64) -> () +; NO-SIMD128-NEXT: # %bb.0: +; NO-SIMD128-NEXT: f64.min $push0=, $2, $4 +; NO-SIMD128-NEXT: f64.store 8($0), $pop0 +; NO-SIMD128-NEXT: f64.min $push1=, $1, $3 +; NO-SIMD128-NEXT: f64.store 0($0), $pop1 +; NO-SIMD128-NEXT: return +; +; NO-SIMD128-FAST-LABEL: min_intrinsic_v2f64: +; NO-SIMD128-FAST: .functype min_intrinsic_v2f64 (i32, f64, f64, f64, f64) -> () +; NO-SIMD128-FAST-NEXT: # %bb.0: +; NO-SIMD128-FAST-NEXT: f64.min $push0=, $1, $3 +; NO-SIMD128-FAST-NEXT: f64.store 0($0), $pop0 +; NO-SIMD128-FAST-NEXT: f64.min $push1=, $2, $4 +; NO-SIMD128-FAST-NEXT: f64.store 8($0), $pop1 +; NO-SIMD128-FAST-NEXT: return %a = call <2 x double> @llvm.minimum.v2f64(<2 x double> %x, <2 x double> %y) ret <2 x double> %a } -; CHECK-LABEL: max_intrinsic_v2f64: -; NO-SIMD128-NOT: f64x2 -; SIMD128-NEXT: .functype max_intrinsic_v2f64 (v128, v128) -> (v128){{$}} -; SIMD128-NEXT: f64x2.max $push[[R:[0-9]+]]=, $0, $1{{$}} -; SIMD128-NEXT: return $pop[[R]]{{$}} declare <2 x double> @llvm.maximum.v2f64(<2 x double>, <2 x double>) define <2 x double> @max_intrinsic_v2f64(<2 x double> %x, <2 x double> %y) { +; SIMD128-LABEL: max_intrinsic_v2f64: +; SIMD128: .functype max_intrinsic_v2f64 (v128, v128) -> (v128) +; SIMD128-NEXT: # %bb.0: +; SIMD128-NEXT: f64x2.max $push0=, $0, $1 +; SIMD128-NEXT: return $pop0 +; +; SIMD128-FAST-LABEL: max_intrinsic_v2f64: +; SIMD128-FAST: .functype max_intrinsic_v2f64 (v128, v128) -> (v128) +; SIMD128-FAST-NEXT: # %bb.0: +; SIMD128-FAST-NEXT: f64x2.max $push0=, $0, $1 +; SIMD128-FAST-NEXT: return $pop0 +; +; NO-SIMD128-LABEL: max_intrinsic_v2f64: +; NO-SIMD128: .functype max_intrinsic_v2f64 (i32, f64, f64, f64, f64) -> () +; NO-SIMD128-NEXT: # %bb.0: +; NO-SIMD128-NEXT: f64.max $push0=, $2, $4 +; NO-SIMD128-NEXT: f64.store 8($0), $pop0 +; NO-SIMD128-NEXT: f64.max $push1=, $1, $3 +; NO-SIMD128-NEXT: f64.store 0($0), $pop1 +; NO-SIMD128-NEXT: return +; +; NO-SIMD128-FAST-LABEL: max_intrinsic_v2f64: +; NO-SIMD128-FAST: .functype max_intrinsic_v2f64 (i32, f64, f64, f64, f64) -> () +; NO-SIMD128-FAST-NEXT: # %bb.0: +; NO-SIMD128-FAST-NEXT: f64.max $push0=, $1, $3 +; NO-SIMD128-FAST-NEXT: f64.store 0($0), $pop0 +; NO-SIMD128-FAST-NEXT: f64.max $push1=, $2, $4 +; NO-SIMD128-FAST-NEXT: f64.store 8($0), $pop1 +; NO-SIMD128-FAST-NEXT: return %a = call <2 x double> @llvm.maximum.v2f64(<2 x double> %x, <2 x double> %y) ret <2 x double> %a } -; CHECK-LABEL: min_const_intrinsic_v2f64: -; NO-SIMD128-NOT: f64x2 -; SIMD128-NEXT: .functype min_const_intrinsic_v2f64 () -> (v128){{$}} -; SIMD128-NEXT: v128.const $push[[R:[0-9]+]]=, 0x1.4p2, 0x1.4p2{{$}} -; SIMD128-NEXT: return $pop[[R]]{{$}} define <2 x double> @min_const_intrinsic_v2f64() { +; SIMD128-LABEL: min_const_intrinsic_v2f64: +; SIMD128: .functype min_const_intrinsic_v2f64 () -> (v128) +; SIMD128-NEXT: # %bb.0: +; SIMD128-NEXT: v128.const $push0=, 0x1.4p2, 0x1.4p2 +; SIMD128-NEXT: return $pop0 +; +; SIMD128-FAST-LABEL: min_const_intrinsic_v2f64: +; SIMD128-FAST: .functype min_const_intrinsic_v2f64 () -> (v128) +; SIMD128-FAST-NEXT: # %bb.0: +; SIMD128-FAST-NEXT: v128.const $push0=, 0x1.4p2, 0x1.4p2 +; SIMD128-FAST-NEXT: return $pop0 +; +; NO-SIMD128-LABEL: min_const_intrinsic_v2f64: +; NO-SIMD128: .functype min_const_intrinsic_v2f64 (i32) -> () +; NO-SIMD128-NEXT: # %bb.0: +; NO-SIMD128-NEXT: i64.const $push0=, 4617315517961601024 +; NO-SIMD128-NEXT: i64.store 8($0), $pop0 +; NO-SIMD128-NEXT: i64.const $push1=, 4617315517961601024 +; NO-SIMD128-NEXT: i64.store 0($0), $pop1 +; NO-SIMD128-NEXT: return +; +; NO-SIMD128-FAST-LABEL: min_const_intrinsic_v2f64: +; NO-SIMD128-FAST: .functype min_const_intrinsic_v2f64 (i32) -> () +; NO-SIMD128-FAST-NEXT: # %bb.0: +; NO-SIMD128-FAST-NEXT: i64.const $push0=, 4617315517961601024 +; NO-SIMD128-FAST-NEXT: i64.store 8($0), $pop0 +; NO-SIMD128-FAST-NEXT: i64.const $push1=, 4617315517961601024 +; NO-SIMD128-FAST-NEXT: i64.store 0($0), $pop1 +; NO-SIMD128-FAST-NEXT: return %a = call <2 x double> @llvm.minimum.v2f64( <2 x double> , <2 x double> @@ -1934,12 +14467,36 @@ define <2 x double> @min_const_intrinsic_v2f64() { ret <2 x double> %a } -; CHECK-LABEL: max_const_intrinsic_v2f64: -; NO-SIMD128-NOT: f64x2 -; SIMD128-NEXT: .functype max_const_intrinsic_v2f64 () -> (v128){{$}} -; SIMD128-NEXT: v128.const $push[[R:[0-9]+]]=, 0x1.5p5, 0x1.5p5{{$}} -; SIMD128-NEXT: return $pop[[R]]{{$}} define <2 x double> @max_const_intrinsic_v2f64() { +; SIMD128-LABEL: max_const_intrinsic_v2f64: +; SIMD128: .functype max_const_intrinsic_v2f64 () -> (v128) +; SIMD128-NEXT: # %bb.0: +; SIMD128-NEXT: v128.const $push0=, 0x1.5p5, 0x1.5p5 +; SIMD128-NEXT: return $pop0 +; +; SIMD128-FAST-LABEL: max_const_intrinsic_v2f64: +; SIMD128-FAST: .functype max_const_intrinsic_v2f64 () -> (v128) +; SIMD128-FAST-NEXT: # %bb.0: +; SIMD128-FAST-NEXT: v128.const $push0=, 0x1.5p5, 0x1.5p5 +; SIMD128-FAST-NEXT: return $pop0 +; +; NO-SIMD128-LABEL: max_const_intrinsic_v2f64: +; NO-SIMD128: .functype max_const_intrinsic_v2f64 (i32) -> () +; NO-SIMD128-NEXT: # %bb.0: +; NO-SIMD128-NEXT: i64.const $push0=, 4631107791820423168 +; NO-SIMD128-NEXT: i64.store 8($0), $pop0 +; NO-SIMD128-NEXT: i64.const $push1=, 4631107791820423168 +; NO-SIMD128-NEXT: i64.store 0($0), $pop1 +; NO-SIMD128-NEXT: return +; +; NO-SIMD128-FAST-LABEL: max_const_intrinsic_v2f64: +; NO-SIMD128-FAST: .functype max_const_intrinsic_v2f64 (i32) -> () +; NO-SIMD128-FAST-NEXT: # %bb.0: +; NO-SIMD128-FAST-NEXT: i64.const $push0=, 4631107791820423168 +; NO-SIMD128-FAST-NEXT: i64.store 8($0), $pop0 +; NO-SIMD128-FAST-NEXT: i64.const $push1=, 4631107791820423168 +; NO-SIMD128-FAST-NEXT: i64.store 0($0), $pop1 +; NO-SIMD128-FAST-NEXT: return %a = call <2 x double> @llvm.maximum.v2f64( <2 x double> , <2 x double> @@ -1947,23 +14504,87 @@ define <2 x double> @max_const_intrinsic_v2f64() { ret <2 x double> %a } -; CHECK-LABEL: pmin_v2f64: -; NO-SIMD128-NOT: f64x2 -; SIMD128-NEXT: .functype pmin_v2f64 (v128, v128) -> (v128){{$}} -; SIMD128-NEXT: f64x2.pmin $push[[R:[0-9]+]]=, $0, $1{{$}} -; SIMD128-NEXT: return $pop[[R]]{{$}} define <2 x double> @pmin_v2f64(<2 x double> %x, <2 x double> %y) { +; SIMD128-LABEL: pmin_v2f64: +; SIMD128: .functype pmin_v2f64 (v128, v128) -> (v128) +; SIMD128-NEXT: # %bb.0: +; SIMD128-NEXT: f64x2.pmin $push0=, $0, $1 +; SIMD128-NEXT: return $pop0 +; +; SIMD128-FAST-LABEL: pmin_v2f64: +; SIMD128-FAST: .functype pmin_v2f64 (v128, v128) -> (v128) +; SIMD128-FAST-NEXT: # %bb.0: +; SIMD128-FAST-NEXT: f64x2.pmin $push0=, $0, $1 +; SIMD128-FAST-NEXT: return $pop0 +; +; NO-SIMD128-LABEL: pmin_v2f64: +; NO-SIMD128: .functype pmin_v2f64 (i32, f64, f64, f64, f64) -> () +; NO-SIMD128-NEXT: # %bb.0: +; NO-SIMD128-NEXT: f64.lt $push0=, $4, $2 +; NO-SIMD128-NEXT: f64.select $push1=, $4, $2, $pop0 +; NO-SIMD128-NEXT: f64.store 8($0), $pop1 +; NO-SIMD128-NEXT: f64.lt $push2=, $3, $1 +; NO-SIMD128-NEXT: f64.select $push3=, $3, $1, $pop2 +; NO-SIMD128-NEXT: f64.store 0($0), $pop3 +; NO-SIMD128-NEXT: return +; +; NO-SIMD128-FAST-LABEL: pmin_v2f64: +; NO-SIMD128-FAST: .functype pmin_v2f64 (i32, f64, f64, f64, f64) -> () +; NO-SIMD128-FAST-NEXT: # %bb.0: +; NO-SIMD128-FAST-NEXT: f64.lt $push0=, $3, $1 +; NO-SIMD128-FAST-NEXT: f64.select $push1=, $3, $1, $pop0 +; NO-SIMD128-FAST-NEXT: f64.store 0($0), $pop1 +; NO-SIMD128-FAST-NEXT: f64.lt $push2=, $4, $2 +; NO-SIMD128-FAST-NEXT: f64.select $push3=, $4, $2, $pop2 +; NO-SIMD128-FAST-NEXT: f64.store 8($0), $pop3 +; NO-SIMD128-FAST-NEXT: return %c = fcmp olt <2 x double> %y, %x %a = select <2 x i1> %c, <2 x double> %y, <2 x double> %x ret <2 x double> %a } -; CHECK-LABEL: pmin_int_v2f64: -; NO-SIMD128-NOT: f64x2 -; SIMD128-NEXT: .functype pmin_int_v2f64 (v128, v128) -> (v128){{$}} -; SIMD128-NEXT: f64x2.pmin $push[[R:[0-9]+]]=, $0, $1{{$}} -; SIMD128-NEXT: return $pop[[R]]{{$}} define <2 x i64> @pmin_int_v2f64(<2 x i64> %x, <2 x i64> %y) { +; SIMD128-LABEL: pmin_int_v2f64: +; SIMD128: .functype pmin_int_v2f64 (v128, v128) -> (v128) +; SIMD128-NEXT: # %bb.0: +; SIMD128-NEXT: f64x2.pmin $push0=, $0, $1 +; SIMD128-NEXT: return $pop0 +; +; SIMD128-FAST-LABEL: pmin_int_v2f64: +; SIMD128-FAST: .functype pmin_int_v2f64 (v128, v128) -> (v128) +; SIMD128-FAST-NEXT: # %bb.0: +; SIMD128-FAST-NEXT: f64x2.pmin $push0=, $0, $1 +; SIMD128-FAST-NEXT: return $pop0 +; +; NO-SIMD128-LABEL: pmin_int_v2f64: +; NO-SIMD128: .functype pmin_int_v2f64 (i32, i64, i64, i64, i64) -> () +; NO-SIMD128-NEXT: # %bb.0: +; NO-SIMD128-NEXT: f64.reinterpret_i64 $push1=, $4 +; NO-SIMD128-NEXT: f64.reinterpret_i64 $push0=, $2 +; NO-SIMD128-NEXT: f64.lt $push2=, $pop1, $pop0 +; NO-SIMD128-NEXT: i64.select $push3=, $4, $2, $pop2 +; NO-SIMD128-NEXT: i64.store 8($0), $pop3 +; NO-SIMD128-NEXT: f64.reinterpret_i64 $push5=, $3 +; NO-SIMD128-NEXT: f64.reinterpret_i64 $push4=, $1 +; NO-SIMD128-NEXT: f64.lt $push6=, $pop5, $pop4 +; NO-SIMD128-NEXT: i64.select $push7=, $3, $1, $pop6 +; NO-SIMD128-NEXT: i64.store 0($0), $pop7 +; NO-SIMD128-NEXT: return +; +; NO-SIMD128-FAST-LABEL: pmin_int_v2f64: +; NO-SIMD128-FAST: .functype pmin_int_v2f64 (i32, i64, i64, i64, i64) -> () +; NO-SIMD128-FAST-NEXT: # %bb.0: +; NO-SIMD128-FAST-NEXT: f64.reinterpret_i64 $push1=, $3 +; NO-SIMD128-FAST-NEXT: f64.reinterpret_i64 $push0=, $1 +; NO-SIMD128-FAST-NEXT: f64.lt $push2=, $pop1, $pop0 +; NO-SIMD128-FAST-NEXT: i64.select $push3=, $3, $1, $pop2 +; NO-SIMD128-FAST-NEXT: i64.store 0($0), $pop3 +; NO-SIMD128-FAST-NEXT: f64.reinterpret_i64 $push5=, $4 +; NO-SIMD128-FAST-NEXT: f64.reinterpret_i64 $push4=, $2 +; NO-SIMD128-FAST-NEXT: f64.lt $push6=, $pop5, $pop4 +; NO-SIMD128-FAST-NEXT: i64.select $push7=, $4, $2, $pop6 +; NO-SIMD128-FAST-NEXT: i64.store 8($0), $pop7 +; NO-SIMD128-FAST-NEXT: return %fx = bitcast <2 x i64> %x to <2 x double> %fy = bitcast <2 x i64> %y to <2 x double> %c = fcmp olt <2 x double> %fy, %fx @@ -1971,23 +14592,87 @@ define <2 x i64> @pmin_int_v2f64(<2 x i64> %x, <2 x i64> %y) { ret <2 x i64> %a } -; CHECK-LABEL: pmax_v2f64: -; NO-SIMD128-NOT: f64x2 -; SIMD128-NEXT: .functype pmax_v2f64 (v128, v128) -> (v128){{$}} -; SIMD128-NEXT: f64x2.pmax $push[[R:[0-9]+]]=, $0, $1{{$}} -; SIMD128-NEXT: return $pop[[R]]{{$}} define <2 x double> @pmax_v2f64(<2 x double> %x, <2 x double> %y) { +; SIMD128-LABEL: pmax_v2f64: +; SIMD128: .functype pmax_v2f64 (v128, v128) -> (v128) +; SIMD128-NEXT: # %bb.0: +; SIMD128-NEXT: f64x2.pmax $push0=, $0, $1 +; SIMD128-NEXT: return $pop0 +; +; SIMD128-FAST-LABEL: pmax_v2f64: +; SIMD128-FAST: .functype pmax_v2f64 (v128, v128) -> (v128) +; SIMD128-FAST-NEXT: # %bb.0: +; SIMD128-FAST-NEXT: f64x2.pmax $push0=, $0, $1 +; SIMD128-FAST-NEXT: return $pop0 +; +; NO-SIMD128-LABEL: pmax_v2f64: +; NO-SIMD128: .functype pmax_v2f64 (i32, f64, f64, f64, f64) -> () +; NO-SIMD128-NEXT: # %bb.0: +; NO-SIMD128-NEXT: f64.lt $push0=, $2, $4 +; NO-SIMD128-NEXT: f64.select $push1=, $4, $2, $pop0 +; NO-SIMD128-NEXT: f64.store 8($0), $pop1 +; NO-SIMD128-NEXT: f64.lt $push2=, $1, $3 +; NO-SIMD128-NEXT: f64.select $push3=, $3, $1, $pop2 +; NO-SIMD128-NEXT: f64.store 0($0), $pop3 +; NO-SIMD128-NEXT: return +; +; NO-SIMD128-FAST-LABEL: pmax_v2f64: +; NO-SIMD128-FAST: .functype pmax_v2f64 (i32, f64, f64, f64, f64) -> () +; NO-SIMD128-FAST-NEXT: # %bb.0: +; NO-SIMD128-FAST-NEXT: f64.lt $push0=, $1, $3 +; NO-SIMD128-FAST-NEXT: f64.select $push1=, $3, $1, $pop0 +; NO-SIMD128-FAST-NEXT: f64.store 0($0), $pop1 +; NO-SIMD128-FAST-NEXT: f64.lt $push2=, $2, $4 +; NO-SIMD128-FAST-NEXT: f64.select $push3=, $4, $2, $pop2 +; NO-SIMD128-FAST-NEXT: f64.store 8($0), $pop3 +; NO-SIMD128-FAST-NEXT: return %c = fcmp olt <2 x double> %x, %y %a = select <2 x i1> %c, <2 x double> %y, <2 x double> %x ret <2 x double> %a } -; CHECK-LABEL: pmax_int_v2f64: -; NO-SIMD128-NOT: f64x2 -; SIMD128-NEXT: .functype pmax_int_v2f64 (v128, v128) -> (v128){{$}} -; SIMD128-NEXT: f64x2.pmax $push[[R:[0-9]+]]=, $0, $1{{$}} -; SIMD128-NEXT: return $pop[[R]]{{$}} define <2 x i64> @pmax_int_v2f64(<2 x i64> %x, <2 x i64> %y) { +; SIMD128-LABEL: pmax_int_v2f64: +; SIMD128: .functype pmax_int_v2f64 (v128, v128) -> (v128) +; SIMD128-NEXT: # %bb.0: +; SIMD128-NEXT: f64x2.pmax $push0=, $0, $1 +; SIMD128-NEXT: return $pop0 +; +; SIMD128-FAST-LABEL: pmax_int_v2f64: +; SIMD128-FAST: .functype pmax_int_v2f64 (v128, v128) -> (v128) +; SIMD128-FAST-NEXT: # %bb.0: +; SIMD128-FAST-NEXT: f64x2.pmax $push0=, $0, $1 +; SIMD128-FAST-NEXT: return $pop0 +; +; NO-SIMD128-LABEL: pmax_int_v2f64: +; NO-SIMD128: .functype pmax_int_v2f64 (i32, i64, i64, i64, i64) -> () +; NO-SIMD128-NEXT: # %bb.0: +; NO-SIMD128-NEXT: f64.reinterpret_i64 $push1=, $2 +; NO-SIMD128-NEXT: f64.reinterpret_i64 $push0=, $4 +; NO-SIMD128-NEXT: f64.lt $push2=, $pop1, $pop0 +; NO-SIMD128-NEXT: i64.select $push3=, $4, $2, $pop2 +; NO-SIMD128-NEXT: i64.store 8($0), $pop3 +; NO-SIMD128-NEXT: f64.reinterpret_i64 $push5=, $1 +; NO-SIMD128-NEXT: f64.reinterpret_i64 $push4=, $3 +; NO-SIMD128-NEXT: f64.lt $push6=, $pop5, $pop4 +; NO-SIMD128-NEXT: i64.select $push7=, $3, $1, $pop6 +; NO-SIMD128-NEXT: i64.store 0($0), $pop7 +; NO-SIMD128-NEXT: return +; +; NO-SIMD128-FAST-LABEL: pmax_int_v2f64: +; NO-SIMD128-FAST: .functype pmax_int_v2f64 (i32, i64, i64, i64, i64) -> () +; NO-SIMD128-FAST-NEXT: # %bb.0: +; NO-SIMD128-FAST-NEXT: f64.reinterpret_i64 $push1=, $1 +; NO-SIMD128-FAST-NEXT: f64.reinterpret_i64 $push0=, $3 +; NO-SIMD128-FAST-NEXT: f64.lt $push2=, $pop1, $pop0 +; NO-SIMD128-FAST-NEXT: i64.select $push3=, $3, $1, $pop2 +; NO-SIMD128-FAST-NEXT: i64.store 0($0), $pop3 +; NO-SIMD128-FAST-NEXT: f64.reinterpret_i64 $push5=, $2 +; NO-SIMD128-FAST-NEXT: f64.reinterpret_i64 $push4=, $4 +; NO-SIMD128-FAST-NEXT: f64.lt $push6=, $pop5, $pop4 +; NO-SIMD128-FAST-NEXT: i64.select $push7=, $4, $2, $pop6 +; NO-SIMD128-FAST-NEXT: i64.store 8($0), $pop7 +; NO-SIMD128-FAST-NEXT: return %fx = bitcast <2 x i64> %x to <2 x double> %fy = bitcast <2 x i64> %y to <2 x double> %c = fcmp olt <2 x double> %fx, %fy @@ -1995,53 +14680,173 @@ define <2 x i64> @pmax_int_v2f64(<2 x i64> %x, <2 x i64> %y) { ret <2 x i64> %a } -; CHECK-LABEL: add_v2f64: -; NO-SIMD128-NOT: f64x2 -; SIMD128-NEXT: .functype add_v2f64 (v128, v128) -> (v128){{$}} -; SIMD128-NEXT: f64x2.add $push[[R:[0-9]+]]=, $0, $1{{$}} -; SIMD128-NEXT: return $pop[[R]]{{$}} define <2 x double> @add_v2f64(<2 x double> %x, <2 x double> %y) { +; SIMD128-LABEL: add_v2f64: +; SIMD128: .functype add_v2f64 (v128, v128) -> (v128) +; SIMD128-NEXT: # %bb.0: +; SIMD128-NEXT: f64x2.add $push0=, $0, $1 +; SIMD128-NEXT: return $pop0 +; +; SIMD128-FAST-LABEL: add_v2f64: +; SIMD128-FAST: .functype add_v2f64 (v128, v128) -> (v128) +; SIMD128-FAST-NEXT: # %bb.0: +; SIMD128-FAST-NEXT: f64x2.add $push0=, $0, $1 +; SIMD128-FAST-NEXT: return $pop0 +; +; NO-SIMD128-LABEL: add_v2f64: +; NO-SIMD128: .functype add_v2f64 (i32, f64, f64, f64, f64) -> () +; NO-SIMD128-NEXT: # %bb.0: +; NO-SIMD128-NEXT: f64.add $push0=, $2, $4 +; NO-SIMD128-NEXT: f64.store 8($0), $pop0 +; NO-SIMD128-NEXT: f64.add $push1=, $1, $3 +; NO-SIMD128-NEXT: f64.store 0($0), $pop1 +; NO-SIMD128-NEXT: return +; +; NO-SIMD128-FAST-LABEL: add_v2f64: +; NO-SIMD128-FAST: .functype add_v2f64 (i32, f64, f64, f64, f64) -> () +; NO-SIMD128-FAST-NEXT: # %bb.0: +; NO-SIMD128-FAST-NEXT: f64.add $push0=, $1, $3 +; NO-SIMD128-FAST-NEXT: f64.store 0($0), $pop0 +; NO-SIMD128-FAST-NEXT: f64.add $push1=, $2, $4 +; NO-SIMD128-FAST-NEXT: f64.store 8($0), $pop1 +; NO-SIMD128-FAST-NEXT: return %a = fadd <2 x double> %x, %y ret <2 x double> %a } -; CHECK-LABEL: sub_v2f64: -; NO-SIMD128-NOT: f64x2 -; SIMD128-NEXT: .functype sub_v2f64 (v128, v128) -> (v128){{$}} -; SIMD128-NEXT: f64x2.sub $push[[R:[0-9]+]]=, $0, $1{{$}} -; SIMD128-NEXT: return $pop[[R]]{{$}} define <2 x double> @sub_v2f64(<2 x double> %x, <2 x double> %y) { +; SIMD128-LABEL: sub_v2f64: +; SIMD128: .functype sub_v2f64 (v128, v128) -> (v128) +; SIMD128-NEXT: # %bb.0: +; SIMD128-NEXT: f64x2.sub $push0=, $0, $1 +; SIMD128-NEXT: return $pop0 +; +; SIMD128-FAST-LABEL: sub_v2f64: +; SIMD128-FAST: .functype sub_v2f64 (v128, v128) -> (v128) +; SIMD128-FAST-NEXT: # %bb.0: +; SIMD128-FAST-NEXT: f64x2.sub $push0=, $0, $1 +; SIMD128-FAST-NEXT: return $pop0 +; +; NO-SIMD128-LABEL: sub_v2f64: +; NO-SIMD128: .functype sub_v2f64 (i32, f64, f64, f64, f64) -> () +; NO-SIMD128-NEXT: # %bb.0: +; NO-SIMD128-NEXT: f64.sub $push0=, $2, $4 +; NO-SIMD128-NEXT: f64.store 8($0), $pop0 +; NO-SIMD128-NEXT: f64.sub $push1=, $1, $3 +; NO-SIMD128-NEXT: f64.store 0($0), $pop1 +; NO-SIMD128-NEXT: return +; +; NO-SIMD128-FAST-LABEL: sub_v2f64: +; NO-SIMD128-FAST: .functype sub_v2f64 (i32, f64, f64, f64, f64) -> () +; NO-SIMD128-FAST-NEXT: # %bb.0: +; NO-SIMD128-FAST-NEXT: f64.sub $push0=, $1, $3 +; NO-SIMD128-FAST-NEXT: f64.store 0($0), $pop0 +; NO-SIMD128-FAST-NEXT: f64.sub $push1=, $2, $4 +; NO-SIMD128-FAST-NEXT: f64.store 8($0), $pop1 +; NO-SIMD128-FAST-NEXT: return %a = fsub <2 x double> %x, %y ret <2 x double> %a } -; CHECK-LABEL: div_v2f64: -; NO-SIMD128-NOT: f64x2 -; SIMD128-NEXT: .functype div_v2f64 (v128, v128) -> (v128){{$}} -; SIMD128-NEXT: f64x2.div $push[[R:[0-9]+]]=, $0, $1{{$}} -; SIMD128-NEXT: return $pop[[R]]{{$}} define <2 x double> @div_v2f64(<2 x double> %x, <2 x double> %y) { +; SIMD128-LABEL: div_v2f64: +; SIMD128: .functype div_v2f64 (v128, v128) -> (v128) +; SIMD128-NEXT: # %bb.0: +; SIMD128-NEXT: f64x2.div $push0=, $0, $1 +; SIMD128-NEXT: return $pop0 +; +; SIMD128-FAST-LABEL: div_v2f64: +; SIMD128-FAST: .functype div_v2f64 (v128, v128) -> (v128) +; SIMD128-FAST-NEXT: # %bb.0: +; SIMD128-FAST-NEXT: f64x2.div $push0=, $0, $1 +; SIMD128-FAST-NEXT: return $pop0 +; +; NO-SIMD128-LABEL: div_v2f64: +; NO-SIMD128: .functype div_v2f64 (i32, f64, f64, f64, f64) -> () +; NO-SIMD128-NEXT: # %bb.0: +; NO-SIMD128-NEXT: f64.div $push0=, $2, $4 +; NO-SIMD128-NEXT: f64.store 8($0), $pop0 +; NO-SIMD128-NEXT: f64.div $push1=, $1, $3 +; NO-SIMD128-NEXT: f64.store 0($0), $pop1 +; NO-SIMD128-NEXT: return +; +; NO-SIMD128-FAST-LABEL: div_v2f64: +; NO-SIMD128-FAST: .functype div_v2f64 (i32, f64, f64, f64, f64) -> () +; NO-SIMD128-FAST-NEXT: # %bb.0: +; NO-SIMD128-FAST-NEXT: f64.div $push0=, $1, $3 +; NO-SIMD128-FAST-NEXT: f64.store 0($0), $pop0 +; NO-SIMD128-FAST-NEXT: f64.div $push1=, $2, $4 +; NO-SIMD128-FAST-NEXT: f64.store 8($0), $pop1 +; NO-SIMD128-FAST-NEXT: return %a = fdiv <2 x double> %x, %y ret <2 x double> %a } -; CHECK-LABEL: mul_v2f64: -; NO-SIMD128-NOT: f64x2 -; SIMD128-NEXT: .functype mul_v2f64 (v128, v128) -> (v128){{$}} -; SIMD128-NEXT: f64x2.mul $push[[R:[0-9]+]]=, $0, $1{{$}} -; SIMD128-NEXT: return $pop[[R]]{{$}} define <2 x double> @mul_v2f64(<2 x double> %x, <2 x double> %y) { +; SIMD128-LABEL: mul_v2f64: +; SIMD128: .functype mul_v2f64 (v128, v128) -> (v128) +; SIMD128-NEXT: # %bb.0: +; SIMD128-NEXT: f64x2.mul $push0=, $0, $1 +; SIMD128-NEXT: return $pop0 +; +; SIMD128-FAST-LABEL: mul_v2f64: +; SIMD128-FAST: .functype mul_v2f64 (v128, v128) -> (v128) +; SIMD128-FAST-NEXT: # %bb.0: +; SIMD128-FAST-NEXT: f64x2.mul $push0=, $0, $1 +; SIMD128-FAST-NEXT: return $pop0 +; +; NO-SIMD128-LABEL: mul_v2f64: +; NO-SIMD128: .functype mul_v2f64 (i32, f64, f64, f64, f64) -> () +; NO-SIMD128-NEXT: # %bb.0: +; NO-SIMD128-NEXT: f64.mul $push0=, $2, $4 +; NO-SIMD128-NEXT: f64.store 8($0), $pop0 +; NO-SIMD128-NEXT: f64.mul $push1=, $1, $3 +; NO-SIMD128-NEXT: f64.store 0($0), $pop1 +; NO-SIMD128-NEXT: return +; +; NO-SIMD128-FAST-LABEL: mul_v2f64: +; NO-SIMD128-FAST: .functype mul_v2f64 (i32, f64, f64, f64, f64) -> () +; NO-SIMD128-FAST-NEXT: # %bb.0: +; NO-SIMD128-FAST-NEXT: f64.mul $push0=, $1, $3 +; NO-SIMD128-FAST-NEXT: f64.store 0($0), $pop0 +; NO-SIMD128-FAST-NEXT: f64.mul $push1=, $2, $4 +; NO-SIMD128-FAST-NEXT: f64.store 8($0), $pop1 +; NO-SIMD128-FAST-NEXT: return %a = fmul <2 x double> %x, %y ret <2 x double> %a } -; CHECK-LABEL: sqrt_v2f64: -; NO-SIMD128-NOT: f64x2 -; SIMD128-NEXT: .functype sqrt_v2f64 (v128) -> (v128){{$}} -; SIMD128-NEXT: f64x2.sqrt $push[[R:[0-9]+]]=, $0{{$}} -; SIMD128-NEXT: return $pop[[R]]{{$}} declare <2 x double> @llvm.sqrt.v2f64(<2 x double> %x) define <2 x double> @sqrt_v2f64(<2 x double> %x) { +; SIMD128-LABEL: sqrt_v2f64: +; SIMD128: .functype sqrt_v2f64 (v128) -> (v128) +; SIMD128-NEXT: # %bb.0: +; SIMD128-NEXT: f64x2.sqrt $push0=, $0 +; SIMD128-NEXT: return $pop0 +; +; SIMD128-FAST-LABEL: sqrt_v2f64: +; SIMD128-FAST: .functype sqrt_v2f64 (v128) -> (v128) +; SIMD128-FAST-NEXT: # %bb.0: +; SIMD128-FAST-NEXT: f64x2.sqrt $push0=, $0 +; SIMD128-FAST-NEXT: return $pop0 +; +; NO-SIMD128-LABEL: sqrt_v2f64: +; NO-SIMD128: .functype sqrt_v2f64 (i32, f64, f64) -> () +; NO-SIMD128-NEXT: # %bb.0: +; NO-SIMD128-NEXT: f64.sqrt $push0=, $2 +; NO-SIMD128-NEXT: f64.store 8($0), $pop0 +; NO-SIMD128-NEXT: f64.sqrt $push1=, $1 +; NO-SIMD128-NEXT: f64.store 0($0), $pop1 +; NO-SIMD128-NEXT: return +; +; NO-SIMD128-FAST-LABEL: sqrt_v2f64: +; NO-SIMD128-FAST: .functype sqrt_v2f64 (i32, f64, f64) -> () +; NO-SIMD128-FAST-NEXT: # %bb.0: +; NO-SIMD128-FAST-NEXT: f64.sqrt $push0=, $1 +; NO-SIMD128-FAST-NEXT: f64.store 0($0), $pop0 +; NO-SIMD128-FAST-NEXT: f64.sqrt $push1=, $2 +; NO-SIMD128-FAST-NEXT: f64.store 8($0), $pop1 +; NO-SIMD128-FAST-NEXT: return %a = call <2 x double> @llvm.sqrt.v2f64(<2 x double> %x) ret <2 x double> %a } diff --git a/llvm/test/CodeGen/WebAssembly/simd-build-pair.ll b/llvm/test/CodeGen/WebAssembly/simd-build-pair.ll index af1d80a..8ea79ca 100644 --- a/llvm/test/CodeGen/WebAssembly/simd-build-pair.ll +++ b/llvm/test/CodeGen/WebAssembly/simd-build-pair.ll @@ -1,4 +1,5 @@ -; RUN: llc < %s -asm-verbose=false -verify-machineinstrs -disable-wasm-fallthrough-return-opt -wasm-disable-explicit-locals -wasm-keep-registers -mattr=simd128 | FileCheck %s --check-prefixes CHECK +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc < %s -verify-machineinstrs -disable-wasm-fallthrough-return-opt -wasm-disable-explicit-locals -wasm-keep-registers -mattr=simd128 | FileCheck %s target triple = "wasm32-unknown-unknown" @@ -13,8 +14,14 @@ target triple = "wasm32-unknown-unknown" ; t8: ch = store<(store 8 into `ptr undef`, align 1)> t3:1, t24, undef:i32, undef:i32 ; t9: ch = WebAssemblyISD::RETURN t8 -; CHECK: v128.store64_lane define void @build_pair_i32s() { +; CHECK-LABEL: build_pair_i32s: +; CHECK: .functype build_pair_i32s () -> () +; CHECK-NEXT: # %bb.0: # %entry +; CHECK-NEXT: v128.load $push0=, 0($0) +; CHECK-NEXT: i8x16.shuffle $push1=, $pop0, $1, 8, 9, 10, 11, 12, 13, 14, 15, 0, 1, 2, 3, 0, 1, 2, 3 +; CHECK-NEXT: v128.store64_lane 0($0):p2align=0, $pop1, 0 +; CHECK-NEXT: return entry: %0 = load <4 x i32>, ptr undef, align 16 %shuffle.i184 = shufflevector <4 x i32> %0, <4 x i32> undef, <4 x i32> diff --git a/llvm/test/CodeGen/WebAssembly/simd-illegal-signext.ll b/llvm/test/CodeGen/WebAssembly/simd-illegal-signext.ll index 9e9bcbf2..f448ae3 100644 --- a/llvm/test/CodeGen/WebAssembly/simd-illegal-signext.ll +++ b/llvm/test/CodeGen/WebAssembly/simd-illegal-signext.ll @@ -1,3 +1,4 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ; RUN: llc < %s -mattr=+simd128 | FileCheck %s ; Regression test for a crash caused by @@ -9,9 +10,16 @@ target triple = "wasm32-unknown-emscripten" -; CHECK: i32.load8_s -; CHECK-NEXT: i32.store16 define void @foo() { +; CHECK-LABEL: foo: +; CHECK: .functype foo () -> () +; CHECK-NEXT: .local i32 +; CHECK-NEXT: # %bb.0: # %entry +; CHECK-NEXT: i32.const 0 +; CHECK-NEXT: local.get 0 +; CHECK-NEXT: i32.load8_s 0 +; CHECK-NEXT: i32.store16 0 +; CHECK-NEXT: # fallthrough-return entry: %0 = load ptr, ptr undef, align 4 %1 = load i32, ptr %0, align 4 diff --git a/llvm/test/CodeGen/WebAssembly/simd.ll b/llvm/test/CodeGen/WebAssembly/simd.ll index 723beb41..d2a38de 100644 --- a/llvm/test/CodeGen/WebAssembly/simd.ll +++ b/llvm/test/CodeGen/WebAssembly/simd.ll @@ -1,5 +1,6 @@ -; RUN: llc < %s -asm-verbose=false -verify-machineinstrs -disable-wasm-fallthrough-return-opt -wasm-disable-explicit-locals -wasm-keep-registers -mattr=+simd128,+sign-ext | FileCheck %s --check-prefixes CHECK,SIMD128 -; RUN: llc < %s -asm-verbose=false -verify-machineinstrs -disable-wasm-fallthrough-return-opt -wasm-disable-explicit-locals -wasm-keep-registers | FileCheck %s --check-prefixes CHECK,NO-SIMD128 +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc < %s -verify-machineinstrs -disable-wasm-fallthrough-return-opt -wasm-disable-explicit-locals -wasm-keep-registers -mattr=+simd128,+sign-ext | FileCheck %s --check-prefix=SIMD128 +; RUN: llc < %s -verify-machineinstrs -disable-wasm-fallthrough-return-opt -wasm-disable-explicit-locals -wasm-keep-registers | FileCheck %s --check-prefix=NO-SIMD128 ; Test that basic SIMD128 vector manipulation operations assemble as expected. @@ -8,23 +9,74 @@ target triple = "wasm32-unknown-unknown" ; ============================================================================== ; 16 x i8 ; ============================================================================== -; CHECK-LABEL: const_v16i8: -; NO-SIMD128-NOT: i8x16 -; SIMD128-NEXT: .functype const_v16i8 () -> (v128){{$}} -; SIMD128-NEXT: v128.const $push[[R:[0-9]+]]=, -; SIMD128-SAME: 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15 -; SIMD128-NEXT: return $pop[[R]]{{$}} define <16 x i8> @const_v16i8() { +; SIMD128-LABEL: const_v16i8: +; SIMD128: .functype const_v16i8 () -> (v128) +; SIMD128-NEXT: # %bb.0: +; SIMD128-NEXT: v128.const $push0=, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15 +; SIMD128-NEXT: return $pop0 +; +; NO-SIMD128-LABEL: const_v16i8: +; NO-SIMD128: .functype const_v16i8 (i32) -> () +; NO-SIMD128-NEXT: # %bb.0: +; NO-SIMD128-NEXT: i64.const $push0=, 1084818905618843912 +; NO-SIMD128-NEXT: i64.store 8($0), $pop0 +; NO-SIMD128-NEXT: i64.const $push1=, 506097522914230528 +; NO-SIMD128-NEXT: i64.store 0($0), $pop1 +; NO-SIMD128-NEXT: return ret <16 x i8> } -; CHECK-LABEL: splat_v16i8: -; NO-SIMD128-NOT: i8x16 -; SIMD128-NEXT: .functype splat_v16i8 (i32) -> (v128){{$}} -; SIMD128-NEXT: i8x16.splat $push[[R:[0-9]+]]=, $0{{$}} -; SIMD128-NEXT: return $pop[[R]]{{$}} define <16 x i8> @splat_v16i8(i8 %x) { +; SIMD128-LABEL: splat_v16i8: +; SIMD128: .functype splat_v16i8 (i32) -> (v128) +; SIMD128-NEXT: # %bb.0: +; SIMD128-NEXT: i8x16.splat $push0=, $0 +; SIMD128-NEXT: return $pop0 +; +; NO-SIMD128-LABEL: splat_v16i8: +; NO-SIMD128: .functype splat_v16i8 (i32, i32) -> () +; NO-SIMD128-NEXT: # %bb.0: +; NO-SIMD128-NEXT: i32.store8 8($0), $1 +; NO-SIMD128-NEXT: i32.store8 4($0), $1 +; NO-SIMD128-NEXT: i32.store8 2($0), $1 +; NO-SIMD128-NEXT: i32.store8 1($0), $1 +; NO-SIMD128-NEXT: i32.store8 0($0), $1 +; NO-SIMD128-NEXT: i32.const $push0=, 15 +; NO-SIMD128-NEXT: i32.add $push1=, $0, $pop0 +; NO-SIMD128-NEXT: i32.store8 0($pop1), $1 +; NO-SIMD128-NEXT: i32.const $push2=, 14 +; NO-SIMD128-NEXT: i32.add $push3=, $0, $pop2 +; NO-SIMD128-NEXT: i32.store8 0($pop3), $1 +; NO-SIMD128-NEXT: i32.const $push4=, 13 +; NO-SIMD128-NEXT: i32.add $push5=, $0, $pop4 +; NO-SIMD128-NEXT: i32.store8 0($pop5), $1 +; NO-SIMD128-NEXT: i32.const $push6=, 12 +; NO-SIMD128-NEXT: i32.add $push7=, $0, $pop6 +; NO-SIMD128-NEXT: i32.store8 0($pop7), $1 +; NO-SIMD128-NEXT: i32.const $push8=, 11 +; NO-SIMD128-NEXT: i32.add $push9=, $0, $pop8 +; NO-SIMD128-NEXT: i32.store8 0($pop9), $1 +; NO-SIMD128-NEXT: i32.const $push10=, 10 +; NO-SIMD128-NEXT: i32.add $push11=, $0, $pop10 +; NO-SIMD128-NEXT: i32.store8 0($pop11), $1 +; NO-SIMD128-NEXT: i32.const $push12=, 9 +; NO-SIMD128-NEXT: i32.add $push13=, $0, $pop12 +; NO-SIMD128-NEXT: i32.store8 0($pop13), $1 +; NO-SIMD128-NEXT: i32.const $push14=, 7 +; NO-SIMD128-NEXT: i32.add $push15=, $0, $pop14 +; NO-SIMD128-NEXT: i32.store8 0($pop15), $1 +; NO-SIMD128-NEXT: i32.const $push16=, 6 +; NO-SIMD128-NEXT: i32.add $push17=, $0, $pop16 +; NO-SIMD128-NEXT: i32.store8 0($pop17), $1 +; NO-SIMD128-NEXT: i32.const $push18=, 5 +; NO-SIMD128-NEXT: i32.add $push19=, $0, $pop18 +; NO-SIMD128-NEXT: i32.store8 0($pop19), $1 +; NO-SIMD128-NEXT: i32.const $push20=, 3 +; NO-SIMD128-NEXT: i32.add $push21=, $0, $pop20 +; NO-SIMD128-NEXT: i32.store8 0($pop21), $1 +; NO-SIMD128-NEXT: return %v = insertelement <16 x i8> undef, i8 %x, i32 0 %res = shufflevector <16 x i8> %v, <16 x i8> undef, <16 x i32> @splat_v16i8(i8 %x) { ret <16 x i8> %res } -; CHECK-LABEL: const_splat_v16i8: -; SIMD128: v128.const $push0=, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42{{$}} define <16 x i8> @const_splat_v16i8() { +; SIMD128-LABEL: const_splat_v16i8: +; SIMD128: .functype const_splat_v16i8 () -> (v128) +; SIMD128-NEXT: # %bb.0: +; SIMD128-NEXT: v128.const $push0=, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42 +; SIMD128-NEXT: return $pop0 +; +; NO-SIMD128-LABEL: const_splat_v16i8: +; NO-SIMD128: .functype const_splat_v16i8 (i32) -> () +; NO-SIMD128-NEXT: # %bb.0: +; NO-SIMD128-NEXT: i64.const $push0=, 3038287259199220266 +; NO-SIMD128-NEXT: i64.store 8($0), $pop0 +; NO-SIMD128-NEXT: i64.const $push1=, 3038287259199220266 +; NO-SIMD128-NEXT: i64.store 0($0), $pop1 +; NO-SIMD128-NEXT: return ret <16 x i8> } -; CHECK-LABEL: extract_v16i8_s: -; NO-SIMD128-NOT: i8x16 -; SIMD128-NEXT: .functype extract_v16i8_s (v128) -> (i32){{$}} -; SIMD128-NEXT: i8x16.extract_lane_s $push[[R:[0-9]+]]=, $0, 13{{$}} -; SIMD128-NEXT: return $pop[[R]]{{$}} define i32 @extract_v16i8_s(<16 x i8> %v) { +; SIMD128-LABEL: extract_v16i8_s: +; SIMD128: .functype extract_v16i8_s (v128) -> (i32) +; SIMD128-NEXT: # %bb.0: +; SIMD128-NEXT: i8x16.extract_lane_s $push0=, $0, 13 +; SIMD128-NEXT: return $pop0 +; +; NO-SIMD128-LABEL: extract_v16i8_s: +; NO-SIMD128: .functype extract_v16i8_s (i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32) -> (i32) +; NO-SIMD128-NEXT: # %bb.0: +; NO-SIMD128-NEXT: i32.extend8_s $push0=, $13 +; NO-SIMD128-NEXT: return $pop0 %elem = extractelement <16 x i8> %v, i8 13 %a = sext i8 %elem to i32 ret i32 %a } -; CHECK-LABEL: extract_var_v16i8_s: -; NO-SIMD128-NOT: i8x16 -; SIMD128-NEXT: .functype extract_var_v16i8_s (v128, i32) -> (i32){{$}} -; SIMD128-NEXT: global.get $push[[L0:[0-9]+]]=, __stack_pointer -; SIMD128-NEXT: i32.const $push[[L1:[0-9]+]]=, 16 -; SIMD128-NEXT: i32.sub $push[[L2:[0-9]+]]=, $pop[[L0]], $pop[[L1]] -; SIMD128-NEXT: local.tee $push[[L3:[0-9]+]]=, $2=, $pop[[L2]] -; SIMD128-NEXT: v128.store 0($pop[[L3]]), $0 -; SIMD128-NEXT: i32.const $push[[L4:[0-9]+]]=, 15 -; SIMD128-NEXT: i32.and $push[[L5:[0-9]+]]=, $1, $pop[[L4]] -; SIMD128-NEXT: i32.or $push[[L6:[0-9]+]]=, $2, $pop[[L5]] -; SIMD128-NEXT: i32.load8_s $push[[R:[0-9]+]]=, 0($pop[[L6]]) -; SIMD128-NEXT: return $pop[[R]] define i32 @extract_var_v16i8_s(<16 x i8> %v, i32 %i) { +; SIMD128-LABEL: extract_var_v16i8_s: +; SIMD128: .functype extract_var_v16i8_s (v128, i32) -> (i32) +; SIMD128-NEXT: # %bb.0: +; SIMD128-NEXT: global.get $push4=, __stack_pointer +; SIMD128-NEXT: i32.const $push5=, 16 +; SIMD128-NEXT: i32.sub $push7=, $pop4, $pop5 +; SIMD128-NEXT: local.tee $push6=, $2=, $pop7 +; SIMD128-NEXT: v128.store 0($pop6), $0 +; SIMD128-NEXT: i32.const $push0=, 15 +; SIMD128-NEXT: i32.and $push1=, $1, $pop0 +; SIMD128-NEXT: i32.or $push2=, $2, $pop1 +; SIMD128-NEXT: i32.load8_s $push3=, 0($pop2) +; SIMD128-NEXT: return $pop3 +; +; NO-SIMD128-LABEL: extract_var_v16i8_s: +; NO-SIMD128: .functype extract_var_v16i8_s (i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32) -> (i32) +; NO-SIMD128-NEXT: # %bb.0: +; NO-SIMD128-NEXT: global.get $push4=, __stack_pointer +; NO-SIMD128-NEXT: i32.const $push5=, 16 +; NO-SIMD128-NEXT: i32.sub $push7=, $pop4, $pop5 +; NO-SIMD128-NEXT: local.tee $push6=, $17=, $pop7 +; NO-SIMD128-NEXT: i32.store8 15($pop6), $15 +; NO-SIMD128-NEXT: i32.store8 14($17), $14 +; NO-SIMD128-NEXT: i32.store8 13($17), $13 +; NO-SIMD128-NEXT: i32.store8 12($17), $12 +; NO-SIMD128-NEXT: i32.store8 11($17), $11 +; NO-SIMD128-NEXT: i32.store8 10($17), $10 +; NO-SIMD128-NEXT: i32.store8 9($17), $9 +; NO-SIMD128-NEXT: i32.store8 8($17), $8 +; NO-SIMD128-NEXT: i32.store8 7($17), $7 +; NO-SIMD128-NEXT: i32.store8 6($17), $6 +; NO-SIMD128-NEXT: i32.store8 5($17), $5 +; NO-SIMD128-NEXT: i32.store8 4($17), $4 +; NO-SIMD128-NEXT: i32.store8 3($17), $3 +; NO-SIMD128-NEXT: i32.store8 2($17), $2 +; NO-SIMD128-NEXT: i32.store8 1($17), $1 +; NO-SIMD128-NEXT: i32.store8 0($17), $0 +; NO-SIMD128-NEXT: i32.const $push0=, 15 +; NO-SIMD128-NEXT: i32.and $push1=, $16, $pop0 +; NO-SIMD128-NEXT: i32.or $push2=, $17, $pop1 +; NO-SIMD128-NEXT: i32.load8_s $push3=, 0($pop2) +; NO-SIMD128-NEXT: return $pop3 %elem = extractelement <16 x i8> %v, i32 %i %a = sext i8 %elem to i32 ret i32 %a } -; CHECK-LABEL: extract_undef_v16i8_s: -; NO-SIMD128-NOT: i8x16 -; SIMD128-NEXT: .functype extract_undef_v16i8_s (v128) -> (i32){{$}} -; SIMD128-NEXT: i8x16.extract_lane_s $push[[R:[0-9]+]]=, $0, 0{{$}} -; SIMD128-NEXT: return $pop[[R]]{{$}} define i32 @extract_undef_v16i8_s(<16 x i8> %v) { +; SIMD128-LABEL: extract_undef_v16i8_s: +; SIMD128: .functype extract_undef_v16i8_s (v128) -> (i32) +; SIMD128-NEXT: # %bb.0: +; SIMD128-NEXT: i8x16.extract_lane_s $push0=, $0, 0 +; SIMD128-NEXT: return $pop0 +; +; NO-SIMD128-LABEL: extract_undef_v16i8_s: +; NO-SIMD128: .functype extract_undef_v16i8_s (i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32) -> (i32) +; NO-SIMD128-NEXT: # %bb.0: +; NO-SIMD128-NEXT: i32.extend8_s $push0=, $0 +; NO-SIMD128-NEXT: return $pop0 %elem = extractelement <16 x i8> %v, i8 undef %a = sext i8 %elem to i32 ret i32 %a } -; CHECK-LABEL: extract_v16i8_u: -; NO-SIMD128-NOT: i8x16 -; SIMD128-NEXT: .functype extract_v16i8_u (v128) -> (i32){{$}} -; SIMD128-NEXT: i8x16.extract_lane_u $push[[R:[0-9]+]]=, $0, 13{{$}} -; SIMD128-NEXT: return $pop[[R]]{{$}} define i32 @extract_v16i8_u(<16 x i8> %v) { +; SIMD128-LABEL: extract_v16i8_u: +; SIMD128: .functype extract_v16i8_u (v128) -> (i32) +; SIMD128-NEXT: # %bb.0: +; SIMD128-NEXT: i8x16.extract_lane_u $push0=, $0, 13 +; SIMD128-NEXT: return $pop0 +; +; NO-SIMD128-LABEL: extract_v16i8_u: +; NO-SIMD128: .functype extract_v16i8_u (i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32) -> (i32) +; NO-SIMD128-NEXT: # %bb.0: +; NO-SIMD128-NEXT: i32.const $push0=, 255 +; NO-SIMD128-NEXT: i32.and $push1=, $13, $pop0 +; NO-SIMD128-NEXT: return $pop1 %elem = extractelement <16 x i8> %v, i8 13 %a = zext i8 %elem to i32 ret i32 %a } -; CHECK-LABEL: extract_var_v16i8_u: -; NO-SIMD128-NOT: i8x16 -; SIMD128-NEXT: .functype extract_var_v16i8_u (v128, i32) -> (i32){{$}} -; SIMD128-NEXT: global.get $push[[L0:[0-9]+]]=, __stack_pointer{{$}} -; SIMD128-NEXT: i32.const $push[[L1:[0-9]+]]=, 16{{$}} -; SIMD128-NEXT: i32.sub $push[[L2:[0-9]+]]=, $pop[[L0]], $pop[[L1]]{{$}} -; SIMD128-NEXT: local.tee $push[[L3:[0-9]+]]=, $2=, $pop[[L2]]{{$}} -; SIMD128-NEXT: v128.store 0($pop[[L3]]), $0{{$}} -; SIMD128-NEXT: i32.const $push[[L4:[0-9]+]]=, 15{{$}} -; SIMD128-NEXT: i32.and $push[[L5:[0-9]+]]=, $1, $pop[[L4]]{{$}} -; SIMD128-NEXT: i32.or $push[[L6:[0-9]+]]=, $2, $pop[[L5]]{{$}} -; SIMD128-NEXT: i32.load8_u $push[[R:[0-9]+]]=, 0($pop[[L6]]){{$}} -; SIMD128-NEXT: return $pop[[R]]{{$}} define i32 @extract_var_v16i8_u(<16 x i8> %v, i32 %i) { +; SIMD128-LABEL: extract_var_v16i8_u: +; SIMD128: .functype extract_var_v16i8_u (v128, i32) -> (i32) +; SIMD128-NEXT: # %bb.0: +; SIMD128-NEXT: global.get $push4=, __stack_pointer +; SIMD128-NEXT: i32.const $push5=, 16 +; SIMD128-NEXT: i32.sub $push7=, $pop4, $pop5 +; SIMD128-NEXT: local.tee $push6=, $2=, $pop7 +; SIMD128-NEXT: v128.store 0($pop6), $0 +; SIMD128-NEXT: i32.const $push0=, 15 +; SIMD128-NEXT: i32.and $push1=, $1, $pop0 +; SIMD128-NEXT: i32.or $push2=, $2, $pop1 +; SIMD128-NEXT: i32.load8_u $push3=, 0($pop2) +; SIMD128-NEXT: return $pop3 +; +; NO-SIMD128-LABEL: extract_var_v16i8_u: +; NO-SIMD128: .functype extract_var_v16i8_u (i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32) -> (i32) +; NO-SIMD128-NEXT: # %bb.0: +; NO-SIMD128-NEXT: global.get $push4=, __stack_pointer +; NO-SIMD128-NEXT: i32.const $push5=, 16 +; NO-SIMD128-NEXT: i32.sub $push7=, $pop4, $pop5 +; NO-SIMD128-NEXT: local.tee $push6=, $17=, $pop7 +; NO-SIMD128-NEXT: i32.store8 15($pop6), $15 +; NO-SIMD128-NEXT: i32.store8 14($17), $14 +; NO-SIMD128-NEXT: i32.store8 13($17), $13 +; NO-SIMD128-NEXT: i32.store8 12($17), $12 +; NO-SIMD128-NEXT: i32.store8 11($17), $11 +; NO-SIMD128-NEXT: i32.store8 10($17), $10 +; NO-SIMD128-NEXT: i32.store8 9($17), $9 +; NO-SIMD128-NEXT: i32.store8 8($17), $8 +; NO-SIMD128-NEXT: i32.store8 7($17), $7 +; NO-SIMD128-NEXT: i32.store8 6($17), $6 +; NO-SIMD128-NEXT: i32.store8 5($17), $5 +; NO-SIMD128-NEXT: i32.store8 4($17), $4 +; NO-SIMD128-NEXT: i32.store8 3($17), $3 +; NO-SIMD128-NEXT: i32.store8 2($17), $2 +; NO-SIMD128-NEXT: i32.store8 1($17), $1 +; NO-SIMD128-NEXT: i32.store8 0($17), $0 +; NO-SIMD128-NEXT: i32.const $push0=, 15 +; NO-SIMD128-NEXT: i32.and $push1=, $16, $pop0 +; NO-SIMD128-NEXT: i32.or $push2=, $17, $pop1 +; NO-SIMD128-NEXT: i32.load8_u $push3=, 0($pop2) +; NO-SIMD128-NEXT: return $pop3 %elem = extractelement <16 x i8> %v, i32 %i %a = zext i8 %elem to i32 ret i32 %a } -; CHECK-LABEL: extract_undef_v16i8_u: -; NO-SIMD128-NOT: i8x16 -; SIMD128-NEXT: .functype extract_undef_v16i8_u (v128) -> (i32){{$}} -; SIMD128-NEXT: i8x16.extract_lane_u $push[[R:[0-9]+]]=, $0, 0{{$}} -; SIMD128-NEXT: return $pop[[R]]{{$}} define i32 @extract_undef_v16i8_u(<16 x i8> %v) { +; SIMD128-LABEL: extract_undef_v16i8_u: +; SIMD128: .functype extract_undef_v16i8_u (v128) -> (i32) +; SIMD128-NEXT: # %bb.0: +; SIMD128-NEXT: i8x16.extract_lane_u $push0=, $0, 0 +; SIMD128-NEXT: return $pop0 +; +; NO-SIMD128-LABEL: extract_undef_v16i8_u: +; NO-SIMD128: .functype extract_undef_v16i8_u (i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32) -> (i32) +; NO-SIMD128-NEXT: # %bb.0: +; NO-SIMD128-NEXT: i32.const $push0=, 255 +; NO-SIMD128-NEXT: i32.and $push1=, $0, $pop0 +; NO-SIMD128-NEXT: return $pop1 %elem = extractelement <16 x i8> %v, i8 undef %a = zext i8 %elem to i32 ret i32 %a } -; CHECK-LABEL: extract_v16i8: -; NO-SIMD128-NOT: i8x16 -; SIMD128-NEXT: .functype extract_v16i8 (v128) -> (i32){{$}} -; SIMD128-NEXT: i8x16.extract_lane_u $push[[R:[0-9]+]]=, $0, 13{{$}} -; SIMD128-NEXT: return $pop[[R]]{{$}} define i8 @extract_v16i8(<16 x i8> %v) { +; SIMD128-LABEL: extract_v16i8: +; SIMD128: .functype extract_v16i8 (v128) -> (i32) +; SIMD128-NEXT: # %bb.0: +; SIMD128-NEXT: i8x16.extract_lane_u $push0=, $0, 13 +; SIMD128-NEXT: return $pop0 +; +; NO-SIMD128-LABEL: extract_v16i8: +; NO-SIMD128: .functype extract_v16i8 (i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32) -> (i32) +; NO-SIMD128-NEXT: # %bb.0: +; NO-SIMD128-NEXT: return $13 %elem = extractelement <16 x i8> %v, i8 13 ret i8 %elem } -; CHECK-LABEL: extract_var_v16i8: -; NO-SIMD128-NOT: i8x16 -; SIMD128-NEXT: .functype extract_var_v16i8 (v128, i32) -> (i32){{$}} -; SIMD128-NEXT: global.get $push[[L0:[0-9]+]]=, __stack_pointer{{$}} -; SIMD128-NEXT: i32.const $push[[L1:[0-9]+]]=, 16{{$}} -; SIMD128-NEXT: i32.sub $push[[L2:[0-9]+]]=, $pop[[L0]], $pop[[L1]]{{$}} -; SIMD128-NEXT: local.tee $push[[L3:[0-9]+]]=, $2=, $pop[[L2]]{{$}} -; SIMD128-NEXT: v128.store 0($pop[[L3]]), $0{{$}} -; SIMD128-NEXT: i32.const $push[[L4:[0-9]+]]=, 15{{$}} -; SIMD128-NEXT: i32.and $push[[L5:[0-9]+]]=, $1, $pop[[L4]]{{$}} -; SIMD128-NEXT: i32.or $push[[L6:[0-9]+]]=, $2, $pop[[L5]]{{$}} -; SIMD128-NEXT: i32.load8_u $push[[R:[0-9]+]]=, 0($pop[[L6]]){{$}} -; SIMD128-NEXT: return $pop[[R]]{{$}} define i8 @extract_var_v16i8(<16 x i8> %v, i32 %i) { +; SIMD128-LABEL: extract_var_v16i8: +; SIMD128: .functype extract_var_v16i8 (v128, i32) -> (i32) +; SIMD128-NEXT: # %bb.0: +; SIMD128-NEXT: global.get $push4=, __stack_pointer +; SIMD128-NEXT: i32.const $push5=, 16 +; SIMD128-NEXT: i32.sub $push7=, $pop4, $pop5 +; SIMD128-NEXT: local.tee $push6=, $2=, $pop7 +; SIMD128-NEXT: v128.store 0($pop6), $0 +; SIMD128-NEXT: i32.const $push0=, 15 +; SIMD128-NEXT: i32.and $push1=, $1, $pop0 +; SIMD128-NEXT: i32.or $push2=, $2, $pop1 +; SIMD128-NEXT: i32.load8_u $push3=, 0($pop2) +; SIMD128-NEXT: return $pop3 +; +; NO-SIMD128-LABEL: extract_var_v16i8: +; NO-SIMD128: .functype extract_var_v16i8 (i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32) -> (i32) +; NO-SIMD128-NEXT: # %bb.0: +; NO-SIMD128-NEXT: global.get $push4=, __stack_pointer +; NO-SIMD128-NEXT: i32.const $push5=, 16 +; NO-SIMD128-NEXT: i32.sub $push7=, $pop4, $pop5 +; NO-SIMD128-NEXT: local.tee $push6=, $17=, $pop7 +; NO-SIMD128-NEXT: i32.store8 15($pop6), $15 +; NO-SIMD128-NEXT: i32.store8 14($17), $14 +; NO-SIMD128-NEXT: i32.store8 13($17), $13 +; NO-SIMD128-NEXT: i32.store8 12($17), $12 +; NO-SIMD128-NEXT: i32.store8 11($17), $11 +; NO-SIMD128-NEXT: i32.store8 10($17), $10 +; NO-SIMD128-NEXT: i32.store8 9($17), $9 +; NO-SIMD128-NEXT: i32.store8 8($17), $8 +; NO-SIMD128-NEXT: i32.store8 7($17), $7 +; NO-SIMD128-NEXT: i32.store8 6($17), $6 +; NO-SIMD128-NEXT: i32.store8 5($17), $5 +; NO-SIMD128-NEXT: i32.store8 4($17), $4 +; NO-SIMD128-NEXT: i32.store8 3($17), $3 +; NO-SIMD128-NEXT: i32.store8 2($17), $2 +; NO-SIMD128-NEXT: i32.store8 1($17), $1 +; NO-SIMD128-NEXT: i32.store8 0($17), $0 +; NO-SIMD128-NEXT: i32.const $push0=, 15 +; NO-SIMD128-NEXT: i32.and $push1=, $16, $pop0 +; NO-SIMD128-NEXT: i32.or $push2=, $17, $pop1 +; NO-SIMD128-NEXT: i32.load8_u $push3=, 0($pop2) +; NO-SIMD128-NEXT: return $pop3 %elem = extractelement <16 x i8> %v, i32 %i ret i8 %elem } -; CHECK-LABEL: extract_undef_v16i8: -; NO-SIMD128-NOT: i8x16 -; SIMD128-NEXT: .functype extract_undef_v16i8 (v128) -> (i32){{$}} -; SIMD128-NEXT: i8x16.extract_lane_u $push[[R:[0-9]+]]=, $0, 0{{$}} -; SIMD128-NEXT: return $pop[[R]]{{$}} define i8 @extract_undef_v16i8(<16 x i8> %v) { +; SIMD128-LABEL: extract_undef_v16i8: +; SIMD128: .functype extract_undef_v16i8 (v128) -> (i32) +; SIMD128-NEXT: # %bb.0: +; SIMD128-NEXT: i8x16.extract_lane_u $push0=, $0, 0 +; SIMD128-NEXT: return $pop0 +; +; NO-SIMD128-LABEL: extract_undef_v16i8: +; NO-SIMD128: .functype extract_undef_v16i8 (i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32) -> (i32) +; NO-SIMD128-NEXT: # %bb.0: +; NO-SIMD128-NEXT: return $0 %elem = extractelement <16 x i8> %v, i8 undef ret i8 %elem } -; CHECK-LABEL: replace_v16i8: -; NO-SIMD128-NOT: i8x16 -; SIMD128-NEXT: .functype replace_v16i8 (v128, i32) -> (v128){{$}} -; SIMD128-NEXT: i8x16.replace_lane $push[[R:[0-9]+]]=, $0, 11, $1{{$}} -; SIMD128-NEXT: return $pop[[R]]{{$}} define <16 x i8> @replace_v16i8(<16 x i8> %v, i8 %x) { +; SIMD128-LABEL: replace_v16i8: +; SIMD128: .functype replace_v16i8 (v128, i32) -> (v128) +; SIMD128-NEXT: # %bb.0: +; SIMD128-NEXT: i8x16.replace_lane $push0=, $0, 11, $1 +; SIMD128-NEXT: return $pop0 +; +; NO-SIMD128-LABEL: replace_v16i8: +; NO-SIMD128: .functype replace_v16i8 (i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32) -> () +; NO-SIMD128-NEXT: # %bb.0: +; NO-SIMD128-NEXT: i32.store8 8($0), $9 +; NO-SIMD128-NEXT: i32.store8 4($0), $5 +; NO-SIMD128-NEXT: i32.store8 2($0), $3 +; NO-SIMD128-NEXT: i32.store8 1($0), $2 +; NO-SIMD128-NEXT: i32.store8 0($0), $1 +; NO-SIMD128-NEXT: i32.const $push0=, 15 +; NO-SIMD128-NEXT: i32.add $push1=, $0, $pop0 +; NO-SIMD128-NEXT: i32.store8 0($pop1), $16 +; NO-SIMD128-NEXT: i32.const $push2=, 14 +; NO-SIMD128-NEXT: i32.add $push3=, $0, $pop2 +; NO-SIMD128-NEXT: i32.store8 0($pop3), $15 +; NO-SIMD128-NEXT: i32.const $push4=, 13 +; NO-SIMD128-NEXT: i32.add $push5=, $0, $pop4 +; NO-SIMD128-NEXT: i32.store8 0($pop5), $14 +; NO-SIMD128-NEXT: i32.const $push6=, 12 +; NO-SIMD128-NEXT: i32.add $push7=, $0, $pop6 +; NO-SIMD128-NEXT: i32.store8 0($pop7), $13 +; NO-SIMD128-NEXT: i32.const $push8=, 11 +; NO-SIMD128-NEXT: i32.add $push9=, $0, $pop8 +; NO-SIMD128-NEXT: i32.store8 0($pop9), $17 +; NO-SIMD128-NEXT: i32.const $push10=, 10 +; NO-SIMD128-NEXT: i32.add $push11=, $0, $pop10 +; NO-SIMD128-NEXT: i32.store8 0($pop11), $11 +; NO-SIMD128-NEXT: i32.const $push12=, 9 +; NO-SIMD128-NEXT: i32.add $push13=, $0, $pop12 +; NO-SIMD128-NEXT: i32.store8 0($pop13), $10 +; NO-SIMD128-NEXT: i32.const $push14=, 7 +; NO-SIMD128-NEXT: i32.add $push15=, $0, $pop14 +; NO-SIMD128-NEXT: i32.store8 0($pop15), $8 +; NO-SIMD128-NEXT: i32.const $push16=, 6 +; NO-SIMD128-NEXT: i32.add $push17=, $0, $pop16 +; NO-SIMD128-NEXT: i32.store8 0($pop17), $7 +; NO-SIMD128-NEXT: i32.const $push18=, 5 +; NO-SIMD128-NEXT: i32.add $push19=, $0, $pop18 +; NO-SIMD128-NEXT: i32.store8 0($pop19), $6 +; NO-SIMD128-NEXT: i32.const $push20=, 3 +; NO-SIMD128-NEXT: i32.add $push21=, $0, $pop20 +; NO-SIMD128-NEXT: i32.store8 0($pop21), $4 +; NO-SIMD128-NEXT: return %res = insertelement <16 x i8> %v, i8 %x, i32 11 ret <16 x i8> %res } -; CHECK-LABEL: replace_var_v16i8: -; NO-SIMD128-NOT: i8x16 -; SIMD128-NEXT: .functype replace_var_v16i8 (v128, i32, i32) -> (v128){{$}} -; SIMD128-NEXT: global.get $push[[L0:[0-9]+]]=, __stack_pointer{{$}} -; SIMD128-NEXT: i32.const $push[[L1:[0-9]+]]=, 16{{$}} -; SIMD128-NEXT: i32.sub $push[[L2:[0-9]+]]=, $pop[[L0]], $pop[[L1]]{{$}} -; SIMD128-NEXT: local.tee $push[[L3:[0-9]+]]=, $3=, $pop[[L2]]{{$}} -; SIMD128-NEXT: v128.store 0($pop[[L3]]), $0{{$}} -; SIMD128-NEXT: i32.const $push[[L4:[0-9]+]]=, 15{{$}} -; SIMD128-NEXT: i32.and $push[[L5:[0-9]+]]=, $1, $pop[[L4]]{{$}} -; SIMD128-NEXT: i32.or $push[[L6:[0-9]+]]=, $3, $pop[[L5]]{{$}} -; SIMD128-NEXT: i32.store8 0($pop[[L6]]), $2{{$}} -; SIMD128-NEXT: v128.load $push[[R:[0-9]+]]=, 0($3){{$}} -; SIMD128-NEXT: return $pop[[R]]{{$}} define <16 x i8> @replace_var_v16i8(<16 x i8> %v, i32 %i, i8 %x) { +; SIMD128-LABEL: replace_var_v16i8: +; SIMD128: .functype replace_var_v16i8 (v128, i32, i32) -> (v128) +; SIMD128-NEXT: # %bb.0: +; SIMD128-NEXT: global.get $push4=, __stack_pointer +; SIMD128-NEXT: i32.const $push5=, 16 +; SIMD128-NEXT: i32.sub $push7=, $pop4, $pop5 +; SIMD128-NEXT: local.tee $push6=, $3=, $pop7 +; SIMD128-NEXT: v128.store 0($pop6), $0 +; SIMD128-NEXT: i32.const $push0=, 15 +; SIMD128-NEXT: i32.and $push1=, $1, $pop0 +; SIMD128-NEXT: i32.or $push2=, $3, $pop1 +; SIMD128-NEXT: i32.store8 0($pop2), $2 +; SIMD128-NEXT: v128.load $push3=, 0($3) +; SIMD128-NEXT: return $pop3 +; +; NO-SIMD128-LABEL: replace_var_v16i8: +; NO-SIMD128: .functype replace_var_v16i8 (i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32) -> () +; NO-SIMD128-NEXT: # %bb.0: +; NO-SIMD128-NEXT: global.get $push5=, __stack_pointer +; NO-SIMD128-NEXT: i32.const $push6=, 16 +; NO-SIMD128-NEXT: i32.sub $push8=, $pop5, $pop6 +; NO-SIMD128-NEXT: local.tee $push7=, $19=, $pop8 +; NO-SIMD128-NEXT: i32.store8 15($pop7), $16 +; NO-SIMD128-NEXT: i32.store8 14($19), $15 +; NO-SIMD128-NEXT: i32.store8 13($19), $14 +; NO-SIMD128-NEXT: i32.store8 12($19), $13 +; NO-SIMD128-NEXT: i32.store8 11($19), $12 +; NO-SIMD128-NEXT: i32.store8 10($19), $11 +; NO-SIMD128-NEXT: i32.store8 9($19), $10 +; NO-SIMD128-NEXT: i32.store8 8($19), $9 +; NO-SIMD128-NEXT: i32.store8 7($19), $8 +; NO-SIMD128-NEXT: i32.store8 6($19), $7 +; NO-SIMD128-NEXT: i32.store8 5($19), $6 +; NO-SIMD128-NEXT: i32.store8 4($19), $5 +; NO-SIMD128-NEXT: i32.store8 3($19), $4 +; NO-SIMD128-NEXT: i32.store8 2($19), $3 +; NO-SIMD128-NEXT: i32.store8 1($19), $2 +; NO-SIMD128-NEXT: i32.store8 0($19), $1 +; NO-SIMD128-NEXT: i32.const $push0=, 15 +; NO-SIMD128-NEXT: i32.and $push1=, $17, $pop0 +; NO-SIMD128-NEXT: i32.or $push2=, $19, $pop1 +; NO-SIMD128-NEXT: i32.store8 0($pop2), $18 +; NO-SIMD128-NEXT: i64.load $push3=, 8($19) +; NO-SIMD128-NEXT: i64.store 8($0), $pop3 +; NO-SIMD128-NEXT: i64.load $push4=, 0($19) +; NO-SIMD128-NEXT: i64.store 0($0), $pop4 +; NO-SIMD128-NEXT: return %res = insertelement <16 x i8> %v, i8 %x, i32 %i ret <16 x i8> %res } -; CHECK-LABEL: replace_zero_v16i8: -; NO-SIMD128-NOT: i8x16 -; SIMD128-NEXT: .functype replace_zero_v16i8 (v128, i32) -> (v128){{$}} -; SIMD128-NEXT: i8x16.replace_lane $push[[R:[0-9]+]]=, $0, 0, $1{{$}} -; SIMD128-NEXT: return $pop[[R]]{{$}} define <16 x i8> @replace_zero_v16i8(<16 x i8> %v, i8 %x) { +; SIMD128-LABEL: replace_zero_v16i8: +; SIMD128: .functype replace_zero_v16i8 (v128, i32) -> (v128) +; SIMD128-NEXT: # %bb.0: +; SIMD128-NEXT: i8x16.replace_lane $push0=, $0, 0, $1 +; SIMD128-NEXT: return $pop0 +; +; NO-SIMD128-LABEL: replace_zero_v16i8: +; NO-SIMD128: .functype replace_zero_v16i8 (i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32) -> () +; NO-SIMD128-NEXT: # %bb.0: +; NO-SIMD128-NEXT: i32.store8 8($0), $9 +; NO-SIMD128-NEXT: i32.store8 4($0), $5 +; NO-SIMD128-NEXT: i32.store8 2($0), $3 +; NO-SIMD128-NEXT: i32.store8 1($0), $2 +; NO-SIMD128-NEXT: i32.store8 0($0), $17 +; NO-SIMD128-NEXT: i32.const $push0=, 15 +; NO-SIMD128-NEXT: i32.add $push1=, $0, $pop0 +; NO-SIMD128-NEXT: i32.store8 0($pop1), $16 +; NO-SIMD128-NEXT: i32.const $push2=, 14 +; NO-SIMD128-NEXT: i32.add $push3=, $0, $pop2 +; NO-SIMD128-NEXT: i32.store8 0($pop3), $15 +; NO-SIMD128-NEXT: i32.const $push4=, 13 +; NO-SIMD128-NEXT: i32.add $push5=, $0, $pop4 +; NO-SIMD128-NEXT: i32.store8 0($pop5), $14 +; NO-SIMD128-NEXT: i32.const $push6=, 12 +; NO-SIMD128-NEXT: i32.add $push7=, $0, $pop6 +; NO-SIMD128-NEXT: i32.store8 0($pop7), $13 +; NO-SIMD128-NEXT: i32.const $push8=, 11 +; NO-SIMD128-NEXT: i32.add $push9=, $0, $pop8 +; NO-SIMD128-NEXT: i32.store8 0($pop9), $12 +; NO-SIMD128-NEXT: i32.const $push10=, 10 +; NO-SIMD128-NEXT: i32.add $push11=, $0, $pop10 +; NO-SIMD128-NEXT: i32.store8 0($pop11), $11 +; NO-SIMD128-NEXT: i32.const $push12=, 9 +; NO-SIMD128-NEXT: i32.add $push13=, $0, $pop12 +; NO-SIMD128-NEXT: i32.store8 0($pop13), $10 +; NO-SIMD128-NEXT: i32.const $push14=, 7 +; NO-SIMD128-NEXT: i32.add $push15=, $0, $pop14 +; NO-SIMD128-NEXT: i32.store8 0($pop15), $8 +; NO-SIMD128-NEXT: i32.const $push16=, 6 +; NO-SIMD128-NEXT: i32.add $push17=, $0, $pop16 +; NO-SIMD128-NEXT: i32.store8 0($pop17), $7 +; NO-SIMD128-NEXT: i32.const $push18=, 5 +; NO-SIMD128-NEXT: i32.add $push19=, $0, $pop18 +; NO-SIMD128-NEXT: i32.store8 0($pop19), $6 +; NO-SIMD128-NEXT: i32.const $push20=, 3 +; NO-SIMD128-NEXT: i32.add $push21=, $0, $pop20 +; NO-SIMD128-NEXT: i32.store8 0($pop21), $4 +; NO-SIMD128-NEXT: return %res = insertelement <16 x i8> %v, i8 %x, i32 0 ret <16 x i8> %res } -; CHECK-LABEL: shuffle_v16i8: -; NO-SIMD128-NOT: i8x16 -; SIMD128-NEXT: .functype shuffle_v16i8 (v128, v128) -> (v128){{$}} -; SIMD128-NEXT: i8x16.shuffle $push[[R:[0-9]+]]=, $0, $1, -; SIMD128-SAME: 0, 17, 2, 19, 4, 21, 6, 23, 8, 25, 10, 27, 12, 29, 14, 31{{$}} -; SIMD128-NEXT: return $pop[[R]]{{$}} define <16 x i8> @shuffle_v16i8(<16 x i8> %x, <16 x i8> %y) { +; SIMD128-LABEL: shuffle_v16i8: +; SIMD128: .functype shuffle_v16i8 (v128, v128) -> (v128) +; SIMD128-NEXT: # %bb.0: +; SIMD128-NEXT: i8x16.shuffle $push0=, $0, $1, 0, 17, 2, 19, 4, 21, 6, 23, 8, 25, 10, 27, 12, 29, 14, 31 +; SIMD128-NEXT: return $pop0 +; +; NO-SIMD128-LABEL: shuffle_v16i8: +; NO-SIMD128: .functype shuffle_v16i8 (i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32) -> () +; NO-SIMD128-NEXT: # %bb.0: +; NO-SIMD128-NEXT: i32.store8 8($0), $9 +; NO-SIMD128-NEXT: i32.store8 4($0), $5 +; NO-SIMD128-NEXT: i32.store8 2($0), $3 +; NO-SIMD128-NEXT: i32.store8 1($0), $18 +; NO-SIMD128-NEXT: i32.store8 0($0), $1 +; NO-SIMD128-NEXT: i32.const $push0=, 15 +; NO-SIMD128-NEXT: i32.add $push1=, $0, $pop0 +; NO-SIMD128-NEXT: i32.store8 0($pop1), $32 +; NO-SIMD128-NEXT: i32.const $push2=, 14 +; NO-SIMD128-NEXT: i32.add $push3=, $0, $pop2 +; NO-SIMD128-NEXT: i32.store8 0($pop3), $15 +; NO-SIMD128-NEXT: i32.const $push4=, 13 +; NO-SIMD128-NEXT: i32.add $push5=, $0, $pop4 +; NO-SIMD128-NEXT: i32.store8 0($pop5), $30 +; NO-SIMD128-NEXT: i32.const $push6=, 12 +; NO-SIMD128-NEXT: i32.add $push7=, $0, $pop6 +; NO-SIMD128-NEXT: i32.store8 0($pop7), $13 +; NO-SIMD128-NEXT: i32.const $push8=, 11 +; NO-SIMD128-NEXT: i32.add $push9=, $0, $pop8 +; NO-SIMD128-NEXT: i32.store8 0($pop9), $28 +; NO-SIMD128-NEXT: i32.const $push10=, 10 +; NO-SIMD128-NEXT: i32.add $push11=, $0, $pop10 +; NO-SIMD128-NEXT: i32.store8 0($pop11), $11 +; NO-SIMD128-NEXT: i32.const $push12=, 9 +; NO-SIMD128-NEXT: i32.add $push13=, $0, $pop12 +; NO-SIMD128-NEXT: i32.store8 0($pop13), $26 +; NO-SIMD128-NEXT: i32.const $push14=, 7 +; NO-SIMD128-NEXT: i32.add $push15=, $0, $pop14 +; NO-SIMD128-NEXT: i32.store8 0($pop15), $24 +; NO-SIMD128-NEXT: i32.const $push16=, 6 +; NO-SIMD128-NEXT: i32.add $push17=, $0, $pop16 +; NO-SIMD128-NEXT: i32.store8 0($pop17), $7 +; NO-SIMD128-NEXT: i32.const $push18=, 5 +; NO-SIMD128-NEXT: i32.add $push19=, $0, $pop18 +; NO-SIMD128-NEXT: i32.store8 0($pop19), $22 +; NO-SIMD128-NEXT: i32.const $push20=, 3 +; NO-SIMD128-NEXT: i32.add $push21=, $0, $pop20 +; NO-SIMD128-NEXT: i32.store8 0($pop21), $20 +; NO-SIMD128-NEXT: return %res = shufflevector <16 x i8> %x, <16 x i8> %y, <16 x i32> ret <16 x i8> %res } -; CHECK-LABEL: shuffle_undef_v16i8: -; NO-SIMD128-NOT: i8x16 -; SIMD128-NEXT: .functype shuffle_undef_v16i8 (v128, v128) -> (v128){{$}} -; SIMD128-NEXT: i8x16.shuffle $push[[R:[0-9]+]]=, $0, $0, -; SIMD128-SAME: 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0{{$}} -; SIMD128-NEXT: return $pop[[R]]{{$}} define <16 x i8> @shuffle_undef_v16i8(<16 x i8> %x, <16 x i8> %y) { +; SIMD128-LABEL: shuffle_undef_v16i8: +; SIMD128: .functype shuffle_undef_v16i8 (v128, v128) -> (v128) +; SIMD128-NEXT: # %bb.0: +; SIMD128-NEXT: i8x16.shuffle $push0=, $0, $0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 +; SIMD128-NEXT: return $pop0 +; +; NO-SIMD128-LABEL: shuffle_undef_v16i8: +; NO-SIMD128: .functype shuffle_undef_v16i8 (i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32) -> () +; NO-SIMD128-NEXT: # %bb.0: +; NO-SIMD128-NEXT: i32.store8 8($0), $2 +; NO-SIMD128-NEXT: i32.store8 4($0), $2 +; NO-SIMD128-NEXT: i32.store8 2($0), $2 +; NO-SIMD128-NEXT: i32.store8 1($0), $2 +; NO-SIMD128-NEXT: i32.store8 0($0), $2 +; NO-SIMD128-NEXT: i32.const $push0=, 15 +; NO-SIMD128-NEXT: i32.add $push1=, $0, $pop0 +; NO-SIMD128-NEXT: i32.store8 0($pop1), $2 +; NO-SIMD128-NEXT: i32.const $push2=, 14 +; NO-SIMD128-NEXT: i32.add $push3=, $0, $pop2 +; NO-SIMD128-NEXT: i32.store8 0($pop3), $2 +; NO-SIMD128-NEXT: i32.const $push4=, 13 +; NO-SIMD128-NEXT: i32.add $push5=, $0, $pop4 +; NO-SIMD128-NEXT: i32.store8 0($pop5), $2 +; NO-SIMD128-NEXT: i32.const $push6=, 12 +; NO-SIMD128-NEXT: i32.add $push7=, $0, $pop6 +; NO-SIMD128-NEXT: i32.store8 0($pop7), $2 +; NO-SIMD128-NEXT: i32.const $push8=, 11 +; NO-SIMD128-NEXT: i32.add $push9=, $0, $pop8 +; NO-SIMD128-NEXT: i32.store8 0($pop9), $2 +; NO-SIMD128-NEXT: i32.const $push10=, 10 +; NO-SIMD128-NEXT: i32.add $push11=, $0, $pop10 +; NO-SIMD128-NEXT: i32.store8 0($pop11), $2 +; NO-SIMD128-NEXT: i32.const $push12=, 9 +; NO-SIMD128-NEXT: i32.add $push13=, $0, $pop12 +; NO-SIMD128-NEXT: i32.store8 0($pop13), $2 +; NO-SIMD128-NEXT: i32.const $push14=, 7 +; NO-SIMD128-NEXT: i32.add $push15=, $0, $pop14 +; NO-SIMD128-NEXT: i32.store8 0($pop15), $2 +; NO-SIMD128-NEXT: i32.const $push16=, 6 +; NO-SIMD128-NEXT: i32.add $push17=, $0, $pop16 +; NO-SIMD128-NEXT: i32.store8 0($pop17), $2 +; NO-SIMD128-NEXT: i32.const $push18=, 5 +; NO-SIMD128-NEXT: i32.add $push19=, $0, $pop18 +; NO-SIMD128-NEXT: i32.store8 0($pop19), $2 +; NO-SIMD128-NEXT: i32.const $push20=, 3 +; NO-SIMD128-NEXT: i32.add $push21=, $0, $pop20 +; NO-SIMD128-NEXT: i32.store8 0($pop21), $2 +; NO-SIMD128-NEXT: return %res = shufflevector <16 x i8> %x, <16 x i8> %y, <16 x i32> @shuffle_undef_v16i8(<16 x i8> %x, <16 x i8> %y) { ret <16 x i8> %res } -; CHECK-LABEL: build_v16i8: -; NO-SIMD128-NOT: i8x16 -; SIMD128-NEXT: .functype build_v16i8 (i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32) -> (v128){{$}} -; SIMD128-NEXT: i8x16.splat $push[[L0:[0-9]+]]=, $0{{$}} -; SIMD128-NEXT: i8x16.replace_lane $push[[L1:[0-9]+]]=, $pop[[L0]], 1, $1{{$}} -; SIMD128-NEXT: i8x16.replace_lane $push[[L2:[0-9]+]]=, $pop[[L1]], 2, $2{{$}} -; SIMD128-NEXT: i8x16.replace_lane $push[[L3:[0-9]+]]=, $pop[[L2]], 3, $3{{$}} -; SIMD128-NEXT: i8x16.replace_lane $push[[L4:[0-9]+]]=, $pop[[L3]], 4, $4{{$}} -; SIMD128-NEXT: i8x16.replace_lane $push[[L5:[0-9]+]]=, $pop[[L4]], 5, $5{{$}} -; SIMD128-NEXT: i8x16.replace_lane $push[[L6:[0-9]+]]=, $pop[[L5]], 6, $6{{$}} -; SIMD128-NEXT: i8x16.replace_lane $push[[L7:[0-9]+]]=, $pop[[L6]], 7, $7{{$}} -; SIMD128-NEXT: i8x16.replace_lane $push[[L8:[0-9]+]]=, $pop[[L7]], 8, $8{{$}} -; SIMD128-NEXT: i8x16.replace_lane $push[[L9:[0-9]+]]=, $pop[[L8]], 9, $9{{$}} -; SIMD128-NEXT: i8x16.replace_lane $push[[L10:[0-9]+]]=, $pop[[L9]], 10, $10{{$}} -; SIMD128-NEXT: i8x16.replace_lane $push[[L11:[0-9]+]]=, $pop[[L10]], 11, $11{{$}} -; SIMD128-NEXT: i8x16.replace_lane $push[[L12:[0-9]+]]=, $pop[[L11]], 12, $12{{$}} -; SIMD128-NEXT: i8x16.replace_lane $push[[L13:[0-9]+]]=, $pop[[L12]], 13, $13{{$}} -; SIMD128-NEXT: i8x16.replace_lane $push[[L14:[0-9]+]]=, $pop[[L13]], 14, $14{{$}} -; SIMD128-NEXT: i8x16.replace_lane $push[[R:[0-9]+]]=, $pop[[L14]], 15, $15{{$}} -; SIMD128-NEXT: return $pop[[R]]{{$}} define <16 x i8> @build_v16i8(i8 %x0, i8 %x1, i8 %x2, i8 %x3, +; SIMD128-LABEL: build_v16i8: +; SIMD128: .functype build_v16i8 (i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32) -> (v128) +; SIMD128-NEXT: # %bb.0: +; SIMD128-NEXT: i8x16.splat $push0=, $0 +; SIMD128-NEXT: i8x16.replace_lane $push1=, $pop0, 1, $1 +; SIMD128-NEXT: i8x16.replace_lane $push2=, $pop1, 2, $2 +; SIMD128-NEXT: i8x16.replace_lane $push3=, $pop2, 3, $3 +; SIMD128-NEXT: i8x16.replace_lane $push4=, $pop3, 4, $4 +; SIMD128-NEXT: i8x16.replace_lane $push5=, $pop4, 5, $5 +; SIMD128-NEXT: i8x16.replace_lane $push6=, $pop5, 6, $6 +; SIMD128-NEXT: i8x16.replace_lane $push7=, $pop6, 7, $7 +; SIMD128-NEXT: i8x16.replace_lane $push8=, $pop7, 8, $8 +; SIMD128-NEXT: i8x16.replace_lane $push9=, $pop8, 9, $9 +; SIMD128-NEXT: i8x16.replace_lane $push10=, $pop9, 10, $10 +; SIMD128-NEXT: i8x16.replace_lane $push11=, $pop10, 11, $11 +; SIMD128-NEXT: i8x16.replace_lane $push12=, $pop11, 12, $12 +; SIMD128-NEXT: i8x16.replace_lane $push13=, $pop12, 13, $13 +; SIMD128-NEXT: i8x16.replace_lane $push14=, $pop13, 14, $14 +; SIMD128-NEXT: i8x16.replace_lane $push15=, $pop14, 15, $15 +; SIMD128-NEXT: return $pop15 +; +; NO-SIMD128-LABEL: build_v16i8: +; NO-SIMD128: .functype build_v16i8 (i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32) -> () +; NO-SIMD128-NEXT: # %bb.0: +; NO-SIMD128-NEXT: i32.store8 8($0), $9 +; NO-SIMD128-NEXT: i32.store8 4($0), $5 +; NO-SIMD128-NEXT: i32.store8 2($0), $3 +; NO-SIMD128-NEXT: i32.store8 1($0), $2 +; NO-SIMD128-NEXT: i32.store8 0($0), $1 +; NO-SIMD128-NEXT: i32.const $push0=, 15 +; NO-SIMD128-NEXT: i32.add $push1=, $0, $pop0 +; NO-SIMD128-NEXT: i32.store8 0($pop1), $16 +; NO-SIMD128-NEXT: i32.const $push2=, 14 +; NO-SIMD128-NEXT: i32.add $push3=, $0, $pop2 +; NO-SIMD128-NEXT: i32.store8 0($pop3), $15 +; NO-SIMD128-NEXT: i32.const $push4=, 13 +; NO-SIMD128-NEXT: i32.add $push5=, $0, $pop4 +; NO-SIMD128-NEXT: i32.store8 0($pop5), $14 +; NO-SIMD128-NEXT: i32.const $push6=, 12 +; NO-SIMD128-NEXT: i32.add $push7=, $0, $pop6 +; NO-SIMD128-NEXT: i32.store8 0($pop7), $13 +; NO-SIMD128-NEXT: i32.const $push8=, 11 +; NO-SIMD128-NEXT: i32.add $push9=, $0, $pop8 +; NO-SIMD128-NEXT: i32.store8 0($pop9), $12 +; NO-SIMD128-NEXT: i32.const $push10=, 10 +; NO-SIMD128-NEXT: i32.add $push11=, $0, $pop10 +; NO-SIMD128-NEXT: i32.store8 0($pop11), $11 +; NO-SIMD128-NEXT: i32.const $push12=, 9 +; NO-SIMD128-NEXT: i32.add $push13=, $0, $pop12 +; NO-SIMD128-NEXT: i32.store8 0($pop13), $10 +; NO-SIMD128-NEXT: i32.const $push14=, 7 +; NO-SIMD128-NEXT: i32.add $push15=, $0, $pop14 +; NO-SIMD128-NEXT: i32.store8 0($pop15), $8 +; NO-SIMD128-NEXT: i32.const $push16=, 6 +; NO-SIMD128-NEXT: i32.add $push17=, $0, $pop16 +; NO-SIMD128-NEXT: i32.store8 0($pop17), $7 +; NO-SIMD128-NEXT: i32.const $push18=, 5 +; NO-SIMD128-NEXT: i32.add $push19=, $0, $pop18 +; NO-SIMD128-NEXT: i32.store8 0($pop19), $6 +; NO-SIMD128-NEXT: i32.const $push20=, 3 +; NO-SIMD128-NEXT: i32.add $push21=, $0, $pop20 +; NO-SIMD128-NEXT: i32.store8 0($pop21), $4 +; NO-SIMD128-NEXT: return i8 %x4, i8 %x5, i8 %x6, i8 %x7, i8 %x8, i8 %x9, i8 %x10, i8 %x11, i8 %x12, i8 %x13, i8 %x14, i8 %x15) { @@ -272,239 +705,516 @@ define <16 x i8> @build_v16i8(i8 %x0, i8 %x1, i8 %x2, i8 %x3, ; ============================================================================== ; 8 x i16 ; ============================================================================== -; CHECK-LABEL: const_v8i16: -; NO-SIMD128-NOT: i16x8 -; SIMD128-NEXT: .functype const_v8i16 () -> (v128){{$}} -; SIMD128-NEXT: v128.const $push[[R:[0-9]+]]=, 256, 770, 1284, 1798, 2312, 2826, 3340, 3854{{$}} -; SIMD128-NEXT: return $pop[[R]]{{$}} define <8 x i16> @const_v8i16() { +; SIMD128-LABEL: const_v8i16: +; SIMD128: .functype const_v8i16 () -> (v128) +; SIMD128-NEXT: # %bb.0: +; SIMD128-NEXT: v128.const $push0=, 256, 770, 1284, 1798, 2312, 2826, 3340, 3854 +; SIMD128-NEXT: return $pop0 +; +; NO-SIMD128-LABEL: const_v8i16: +; NO-SIMD128: .functype const_v8i16 (i32) -> () +; NO-SIMD128-NEXT: # %bb.0: +; NO-SIMD128-NEXT: i64.const $push0=, 1084818905618843912 +; NO-SIMD128-NEXT: i64.store 8($0), $pop0 +; NO-SIMD128-NEXT: i64.const $push1=, 506097522914230528 +; NO-SIMD128-NEXT: i64.store 0($0), $pop1 +; NO-SIMD128-NEXT: return ret <8 x i16> } -; CHECK-LABEL: splat_v8i16: -; NO-SIMD128-NOT: i16x8 -; SIMD128-NEXT: .functype splat_v8i16 (i32) -> (v128){{$}} -; SIMD128-NEXT: i16x8.splat $push[[R:[0-9]+]]=, $0{{$}} -; SIMD128-NEXT: return $pop[[R]]{{$}} define <8 x i16> @splat_v8i16(i16 %x) { +; SIMD128-LABEL: splat_v8i16: +; SIMD128: .functype splat_v8i16 (i32) -> (v128) +; SIMD128-NEXT: # %bb.0: +; SIMD128-NEXT: i16x8.splat $push0=, $0 +; SIMD128-NEXT: return $pop0 +; +; NO-SIMD128-LABEL: splat_v8i16: +; NO-SIMD128: .functype splat_v8i16 (i32, i32) -> () +; NO-SIMD128-NEXT: # %bb.0: +; NO-SIMD128-NEXT: i32.store16 8($0), $1 +; NO-SIMD128-NEXT: i32.store16 4($0), $1 +; NO-SIMD128-NEXT: i32.store16 2($0), $1 +; NO-SIMD128-NEXT: i32.store16 0($0), $1 +; NO-SIMD128-NEXT: i32.const $push0=, 14 +; NO-SIMD128-NEXT: i32.add $push1=, $0, $pop0 +; NO-SIMD128-NEXT: i32.store16 0($pop1), $1 +; NO-SIMD128-NEXT: i32.const $push2=, 12 +; NO-SIMD128-NEXT: i32.add $push3=, $0, $pop2 +; NO-SIMD128-NEXT: i32.store16 0($pop3), $1 +; NO-SIMD128-NEXT: i32.const $push4=, 10 +; NO-SIMD128-NEXT: i32.add $push5=, $0, $pop4 +; NO-SIMD128-NEXT: i32.store16 0($pop5), $1 +; NO-SIMD128-NEXT: i32.const $push6=, 6 +; NO-SIMD128-NEXT: i32.add $push7=, $0, $pop6 +; NO-SIMD128-NEXT: i32.store16 0($pop7), $1 +; NO-SIMD128-NEXT: return %v = insertelement <8 x i16> undef, i16 %x, i32 0 %res = shufflevector <8 x i16> %v, <8 x i16> undef, <8 x i32> ret <8 x i16> %res } -; CHECK-LABEL: const_splat_v8i16: -; SIMD128: v128.const $push0=, 42, 42, 42, 42, 42, 42, 42, 42{{$}} define <8 x i16> @const_splat_v8i16() { +; SIMD128-LABEL: const_splat_v8i16: +; SIMD128: .functype const_splat_v8i16 () -> (v128) +; SIMD128-NEXT: # %bb.0: +; SIMD128-NEXT: v128.const $push0=, 42, 42, 42, 42, 42, 42, 42, 42 +; SIMD128-NEXT: return $pop0 +; +; NO-SIMD128-LABEL: const_splat_v8i16: +; NO-SIMD128: .functype const_splat_v8i16 (i32) -> () +; NO-SIMD128-NEXT: # %bb.0: +; NO-SIMD128-NEXT: i64.const $push0=, 11822129413226538 +; NO-SIMD128-NEXT: i64.store 8($0), $pop0 +; NO-SIMD128-NEXT: i64.const $push1=, 11822129413226538 +; NO-SIMD128-NEXT: i64.store 0($0), $pop1 +; NO-SIMD128-NEXT: return ret <8 x i16> } -; CHECK-LABEL: extract_v8i16_s: -; NO-SIMD128-NOT: i16x8 -; SIMD128-NEXT: .functype extract_v8i16_s (v128) -> (i32){{$}} -; SIMD128-NEXT: i16x8.extract_lane_s $push[[R:[0-9]+]]=, $0, 5{{$}} -; SIMD128-NEXT: return $pop[[R]]{{$}} define i32 @extract_v8i16_s(<8 x i16> %v) { +; SIMD128-LABEL: extract_v8i16_s: +; SIMD128: .functype extract_v8i16_s (v128) -> (i32) +; SIMD128-NEXT: # %bb.0: +; SIMD128-NEXT: i16x8.extract_lane_s $push0=, $0, 5 +; SIMD128-NEXT: return $pop0 +; +; NO-SIMD128-LABEL: extract_v8i16_s: +; NO-SIMD128: .functype extract_v8i16_s (i32, i32, i32, i32, i32, i32, i32, i32) -> (i32) +; NO-SIMD128-NEXT: # %bb.0: +; NO-SIMD128-NEXT: i32.extend16_s $push0=, $5 +; NO-SIMD128-NEXT: return $pop0 %elem = extractelement <8 x i16> %v, i16 5 %a = sext i16 %elem to i32 ret i32 %a } -; CHECK-LABEL: extract_var_v8i16_s: -; NO-SIMD128-NOT: i16x8 -; SIMD128-NEXT: .functype extract_var_v8i16_s (v128, i32) -> (i32){{$}} -; SIMD128-NEXT: global.get $push[[L0:[0-9]+]]=, __stack_pointer{{$}} -; SIMD128-NEXT: i32.const $push[[L1:[0-9]+]]=, 16{{$}} -; SIMD128-NEXT: i32.sub $push[[L2:[0-9]+]]=, $pop[[L0]], $pop[[L1]]{{$}} -; SIMD128-NEXT: local.tee $push[[L3:[0-9]+]]=, $2=, $pop[[L2]]{{$}} -; SIMD128-NEXT: v128.store 0($pop[[L3]]), $0{{$}} -; SIMD128-NEXT: i32.const $push[[L4:[0-9]+]]=, 7{{$}} -; SIMD128-NEXT: i32.and $push[[L5:[0-9]+]]=, $1, $pop[[L4]]{{$}} -; SIMD128-NEXT: i32.const $push[[L6:[0-9]+]]=, 1{{$}} -; SIMD128-NEXT: i32.shl $push[[L7:[0-9]+]]=, $pop[[L5]], $pop[[L6]]{{$}} -; SIMD128-NEXT: i32.or $push[[L8:[0-9]+]]=, $2, $pop[[L7]]{{$}} -; SIMD128-NEXT: i32.load16_s $push[[R:[0-9]+]]=, 0($pop[[L8]]){{$}} -; SIMD128-NEXT: return $pop[[R]]{{$}} define i32 @extract_var_v8i16_s(<8 x i16> %v, i32 %i) { +; SIMD128-LABEL: extract_var_v8i16_s: +; SIMD128: .functype extract_var_v8i16_s (v128, i32) -> (i32) +; SIMD128-NEXT: # %bb.0: +; SIMD128-NEXT: global.get $push6=, __stack_pointer +; SIMD128-NEXT: i32.const $push7=, 16 +; SIMD128-NEXT: i32.sub $push9=, $pop6, $pop7 +; SIMD128-NEXT: local.tee $push8=, $2=, $pop9 +; SIMD128-NEXT: v128.store 0($pop8), $0 +; SIMD128-NEXT: i32.const $push0=, 7 +; SIMD128-NEXT: i32.and $push1=, $1, $pop0 +; SIMD128-NEXT: i32.const $push2=, 1 +; SIMD128-NEXT: i32.shl $push3=, $pop1, $pop2 +; SIMD128-NEXT: i32.or $push4=, $2, $pop3 +; SIMD128-NEXT: i32.load16_s $push5=, 0($pop4) +; SIMD128-NEXT: return $pop5 +; +; NO-SIMD128-LABEL: extract_var_v8i16_s: +; NO-SIMD128: .functype extract_var_v8i16_s (i32, i32, i32, i32, i32, i32, i32, i32, i32) -> (i32) +; NO-SIMD128-NEXT: # %bb.0: +; NO-SIMD128-NEXT: global.get $push6=, __stack_pointer +; NO-SIMD128-NEXT: i32.const $push7=, 16 +; NO-SIMD128-NEXT: i32.sub $push9=, $pop6, $pop7 +; NO-SIMD128-NEXT: local.tee $push8=, $9=, $pop9 +; NO-SIMD128-NEXT: i32.store16 14($pop8), $7 +; NO-SIMD128-NEXT: i32.store16 12($9), $6 +; NO-SIMD128-NEXT: i32.store16 10($9), $5 +; NO-SIMD128-NEXT: i32.store16 8($9), $4 +; NO-SIMD128-NEXT: i32.store16 6($9), $3 +; NO-SIMD128-NEXT: i32.store16 4($9), $2 +; NO-SIMD128-NEXT: i32.store16 2($9), $1 +; NO-SIMD128-NEXT: i32.store16 0($9), $0 +; NO-SIMD128-NEXT: i32.const $push0=, 7 +; NO-SIMD128-NEXT: i32.and $push1=, $8, $pop0 +; NO-SIMD128-NEXT: i32.const $push2=, 1 +; NO-SIMD128-NEXT: i32.shl $push3=, $pop1, $pop2 +; NO-SIMD128-NEXT: i32.or $push4=, $9, $pop3 +; NO-SIMD128-NEXT: i32.load16_s $push5=, 0($pop4) +; NO-SIMD128-NEXT: return $pop5 %elem = extractelement <8 x i16> %v, i32 %i %a = sext i16 %elem to i32 ret i32 %a } -; CHECK-LABEL: extract_undef_v8i16_s: -; NO-SIMD128-NOT: i16x8 -; SIMD128-NEXT: .functype extract_undef_v8i16_s (v128) -> (i32){{$}} -; SIMD128-NEXT: i16x8.extract_lane_s $push[[R:[0-9]+]]=, $0, 0{{$}} -; SIMD128-NEXT: return $pop[[R]]{{$}} define i32 @extract_undef_v8i16_s(<8 x i16> %v) { +; SIMD128-LABEL: extract_undef_v8i16_s: +; SIMD128: .functype extract_undef_v8i16_s (v128) -> (i32) +; SIMD128-NEXT: # %bb.0: +; SIMD128-NEXT: i16x8.extract_lane_s $push0=, $0, 0 +; SIMD128-NEXT: return $pop0 +; +; NO-SIMD128-LABEL: extract_undef_v8i16_s: +; NO-SIMD128: .functype extract_undef_v8i16_s (i32, i32, i32, i32, i32, i32, i32, i32) -> (i32) +; NO-SIMD128-NEXT: # %bb.0: +; NO-SIMD128-NEXT: i32.extend16_s $push0=, $0 +; NO-SIMD128-NEXT: return $pop0 %elem = extractelement <8 x i16> %v, i16 undef %a = sext i16 %elem to i32 ret i32 %a } -; CHECK-LABEL: extract_v8i16_u: -; NO-SIMD128-NOT: i16x8 -; SIMD128-NEXT: .functype extract_v8i16_u (v128) -> (i32){{$}} -; SIMD128-NEXT: i16x8.extract_lane_u $push[[R:[0-9]+]]=, $0, 5{{$}} -; SIMD128-NEXT: return $pop[[R]]{{$}} define i32 @extract_v8i16_u(<8 x i16> %v) { +; SIMD128-LABEL: extract_v8i16_u: +; SIMD128: .functype extract_v8i16_u (v128) -> (i32) +; SIMD128-NEXT: # %bb.0: +; SIMD128-NEXT: i16x8.extract_lane_u $push0=, $0, 5 +; SIMD128-NEXT: return $pop0 +; +; NO-SIMD128-LABEL: extract_v8i16_u: +; NO-SIMD128: .functype extract_v8i16_u (i32, i32, i32, i32, i32, i32, i32, i32) -> (i32) +; NO-SIMD128-NEXT: # %bb.0: +; NO-SIMD128-NEXT: i32.const $push0=, 65535 +; NO-SIMD128-NEXT: i32.and $push1=, $5, $pop0 +; NO-SIMD128-NEXT: return $pop1 %elem = extractelement <8 x i16> %v, i16 5 %a = zext i16 %elem to i32 ret i32 %a } -; CHECK-LABEL: extract_var_v8i16_u: -; NO-SIMD128-NOT: i16x8 -; SIMD128-NEXT: .functype extract_var_v8i16_u (v128, i32) -> (i32){{$}} -; SIMD128-NEXT: global.get $push[[L0:[0-9]+]]=, __stack_pointer{{$}} -; SIMD128-NEXT: i32.const $push[[L1:[0-9]+]]=, 16{{$}} -; SIMD128-NEXT: i32.sub $push[[L2:[0-9]+]]=, $pop[[L0]], $pop[[L1]]{{$}} -; SIMD128-NEXT: local.tee $push[[L3:[0-9]+]]=, $2=, $pop[[L2]]{{$}} -; SIMD128-NEXT: v128.store 0($pop[[L3]]), $0{{$}} -; SIMD128-NEXT: i32.const $push[[L4:[0-9]+]]=, 7{{$}} -; SIMD128-NEXT: i32.and $push[[L5:[0-9]+]]=, $1, $pop[[L4]]{{$}} -; SIMD128-NEXT: i32.const $push[[L6:[0-9]+]]=, 1{{$}} -; SIMD128-NEXT: i32.shl $push[[L7:[0-9]+]]=, $pop[[L5]], $pop[[L6]]{{$}} -; SIMD128-NEXT: i32.or $push[[L8:[0-9]+]]=, $2, $pop[[L7]]{{$}} -; SIMD128-NEXT: i32.load16_u $push[[R:[0-9]+]]=, 0($pop[[L8]]){{$}} -; SIMD128-NEXT: return $pop[[R]]{{$}} define i32 @extract_var_v8i16_u(<8 x i16> %v, i32 %i) { +; SIMD128-LABEL: extract_var_v8i16_u: +; SIMD128: .functype extract_var_v8i16_u (v128, i32) -> (i32) +; SIMD128-NEXT: # %bb.0: +; SIMD128-NEXT: global.get $push6=, __stack_pointer +; SIMD128-NEXT: i32.const $push7=, 16 +; SIMD128-NEXT: i32.sub $push9=, $pop6, $pop7 +; SIMD128-NEXT: local.tee $push8=, $2=, $pop9 +; SIMD128-NEXT: v128.store 0($pop8), $0 +; SIMD128-NEXT: i32.const $push0=, 7 +; SIMD128-NEXT: i32.and $push1=, $1, $pop0 +; SIMD128-NEXT: i32.const $push2=, 1 +; SIMD128-NEXT: i32.shl $push3=, $pop1, $pop2 +; SIMD128-NEXT: i32.or $push4=, $2, $pop3 +; SIMD128-NEXT: i32.load16_u $push5=, 0($pop4) +; SIMD128-NEXT: return $pop5 +; +; NO-SIMD128-LABEL: extract_var_v8i16_u: +; NO-SIMD128: .functype extract_var_v8i16_u (i32, i32, i32, i32, i32, i32, i32, i32, i32) -> (i32) +; NO-SIMD128-NEXT: # %bb.0: +; NO-SIMD128-NEXT: global.get $push6=, __stack_pointer +; NO-SIMD128-NEXT: i32.const $push7=, 16 +; NO-SIMD128-NEXT: i32.sub $push9=, $pop6, $pop7 +; NO-SIMD128-NEXT: local.tee $push8=, $9=, $pop9 +; NO-SIMD128-NEXT: i32.store16 14($pop8), $7 +; NO-SIMD128-NEXT: i32.store16 12($9), $6 +; NO-SIMD128-NEXT: i32.store16 10($9), $5 +; NO-SIMD128-NEXT: i32.store16 8($9), $4 +; NO-SIMD128-NEXT: i32.store16 6($9), $3 +; NO-SIMD128-NEXT: i32.store16 4($9), $2 +; NO-SIMD128-NEXT: i32.store16 2($9), $1 +; NO-SIMD128-NEXT: i32.store16 0($9), $0 +; NO-SIMD128-NEXT: i32.const $push0=, 7 +; NO-SIMD128-NEXT: i32.and $push1=, $8, $pop0 +; NO-SIMD128-NEXT: i32.const $push2=, 1 +; NO-SIMD128-NEXT: i32.shl $push3=, $pop1, $pop2 +; NO-SIMD128-NEXT: i32.or $push4=, $9, $pop3 +; NO-SIMD128-NEXT: i32.load16_u $push5=, 0($pop4) +; NO-SIMD128-NEXT: return $pop5 %elem = extractelement <8 x i16> %v, i32 %i %a = zext i16 %elem to i32 ret i32 %a } -; CHECK-LABEL: extract_undef_v8i16_u: -; NO-SIMD128-NOT: i16x8 -; SIMD128-NEXT: .functype extract_undef_v8i16_u (v128) -> (i32){{$}} -; SIMD128-NEXT: i16x8.extract_lane_u $push[[R:[0-9]+]]=, $0, 0{{$}} -; SIMD128-NEXT: return $pop[[R]]{{$}} define i32 @extract_undef_v8i16_u(<8 x i16> %v) { +; SIMD128-LABEL: extract_undef_v8i16_u: +; SIMD128: .functype extract_undef_v8i16_u (v128) -> (i32) +; SIMD128-NEXT: # %bb.0: +; SIMD128-NEXT: i16x8.extract_lane_u $push0=, $0, 0 +; SIMD128-NEXT: return $pop0 +; +; NO-SIMD128-LABEL: extract_undef_v8i16_u: +; NO-SIMD128: .functype extract_undef_v8i16_u (i32, i32, i32, i32, i32, i32, i32, i32) -> (i32) +; NO-SIMD128-NEXT: # %bb.0: +; NO-SIMD128-NEXT: i32.const $push0=, 65535 +; NO-SIMD128-NEXT: i32.and $push1=, $0, $pop0 +; NO-SIMD128-NEXT: return $pop1 %elem = extractelement <8 x i16> %v, i16 undef %a = zext i16 %elem to i32 ret i32 %a } -; CHECK-LABEL: extract_v8i16: -; NO-SIMD128-NOT: i16x8 -; SIMD128-NEXT: .functype extract_v8i16 (v128) -> (i32){{$}} -; SIMD128-NEXT: i16x8.extract_lane_u $push[[R:[0-9]+]]=, $0, 5{{$}} -; SIMD128-NEXT: return $pop[[R]]{{$}} define i16 @extract_v8i16(<8 x i16> %v) { +; SIMD128-LABEL: extract_v8i16: +; SIMD128: .functype extract_v8i16 (v128) -> (i32) +; SIMD128-NEXT: # %bb.0: +; SIMD128-NEXT: i16x8.extract_lane_u $push0=, $0, 5 +; SIMD128-NEXT: return $pop0 +; +; NO-SIMD128-LABEL: extract_v8i16: +; NO-SIMD128: .functype extract_v8i16 (i32, i32, i32, i32, i32, i32, i32, i32) -> (i32) +; NO-SIMD128-NEXT: # %bb.0: +; NO-SIMD128-NEXT: return $5 %elem = extractelement <8 x i16> %v, i16 5 ret i16 %elem } -; CHECK-LABEL: extract_var_v8i16: -; NO-SIMD128-NOT: i16x8 -; SIMD128-NEXT: .functype extract_var_v8i16 (v128, i32) -> (i32){{$}} -; SIMD128-NEXT: global.get $push[[L0:[0-9]+]]=, __stack_pointer{{$}} -; SIMD128-NEXT: i32.const $push[[L1:[0-9]+]]=, 16{{$}} -; SIMD128-NEXT: i32.sub $push[[L2:[0-9]+]]=, $pop[[L0]], $pop[[L1]]{{$}} -; SIMD128-NEXT: local.tee $push[[L3:[0-9]+]]=, $2=, $pop[[L2]]{{$}} -; SIMD128-NEXT: v128.store 0($pop[[L3]]), $0{{$}} -; SIMD128-NEXT: i32.const $push[[L4:[0-9]+]]=, 7{{$}} -; SIMD128-NEXT: i32.and $push[[L5:[0-9]+]]=, $1, $pop[[L4]]{{$}} -; SIMD128-NEXT: i32.const $push[[L6:[0-9]+]]=, 1{{$}} -; SIMD128-NEXT: i32.shl $push[[L7:[0-9]+]]=, $pop[[L5]], $pop[[L6]]{{$}} -; SIMD128-NEXT: i32.or $push[[L8:[0-9]+]]=, $2, $pop[[L7]]{{$}} -; SIMD128-NEXT: i32.load16_u $push[[R:[0-9]+]]=, 0($pop[[L8]]){{$}} -; SIMD128-NEXT: return $pop[[R]]{{$}} define i16 @extract_var_v8i16(<8 x i16> %v, i32 %i) { +; SIMD128-LABEL: extract_var_v8i16: +; SIMD128: .functype extract_var_v8i16 (v128, i32) -> (i32) +; SIMD128-NEXT: # %bb.0: +; SIMD128-NEXT: global.get $push6=, __stack_pointer +; SIMD128-NEXT: i32.const $push7=, 16 +; SIMD128-NEXT: i32.sub $push9=, $pop6, $pop7 +; SIMD128-NEXT: local.tee $push8=, $2=, $pop9 +; SIMD128-NEXT: v128.store 0($pop8), $0 +; SIMD128-NEXT: i32.const $push0=, 7 +; SIMD128-NEXT: i32.and $push1=, $1, $pop0 +; SIMD128-NEXT: i32.const $push2=, 1 +; SIMD128-NEXT: i32.shl $push3=, $pop1, $pop2 +; SIMD128-NEXT: i32.or $push4=, $2, $pop3 +; SIMD128-NEXT: i32.load16_u $push5=, 0($pop4) +; SIMD128-NEXT: return $pop5 +; +; NO-SIMD128-LABEL: extract_var_v8i16: +; NO-SIMD128: .functype extract_var_v8i16 (i32, i32, i32, i32, i32, i32, i32, i32, i32) -> (i32) +; NO-SIMD128-NEXT: # %bb.0: +; NO-SIMD128-NEXT: global.get $push6=, __stack_pointer +; NO-SIMD128-NEXT: i32.const $push7=, 16 +; NO-SIMD128-NEXT: i32.sub $push9=, $pop6, $pop7 +; NO-SIMD128-NEXT: local.tee $push8=, $9=, $pop9 +; NO-SIMD128-NEXT: i32.store16 14($pop8), $7 +; NO-SIMD128-NEXT: i32.store16 12($9), $6 +; NO-SIMD128-NEXT: i32.store16 10($9), $5 +; NO-SIMD128-NEXT: i32.store16 8($9), $4 +; NO-SIMD128-NEXT: i32.store16 6($9), $3 +; NO-SIMD128-NEXT: i32.store16 4($9), $2 +; NO-SIMD128-NEXT: i32.store16 2($9), $1 +; NO-SIMD128-NEXT: i32.store16 0($9), $0 +; NO-SIMD128-NEXT: i32.const $push0=, 7 +; NO-SIMD128-NEXT: i32.and $push1=, $8, $pop0 +; NO-SIMD128-NEXT: i32.const $push2=, 1 +; NO-SIMD128-NEXT: i32.shl $push3=, $pop1, $pop2 +; NO-SIMD128-NEXT: i32.or $push4=, $9, $pop3 +; NO-SIMD128-NEXT: i32.load16_u $push5=, 0($pop4) +; NO-SIMD128-NEXT: return $pop5 %elem = extractelement <8 x i16> %v, i32 %i ret i16 %elem } -; CHECK-LABEL: extract_undef_v8i16: -; NO-SIMD128-NOT: i16x8 -; SIMD128-NEXT: .functype extract_undef_v8i16 (v128) -> (i32){{$}} -; SIMD128-NEXT: i16x8.extract_lane_u $push[[R:[0-9]+]]=, $0, 0{{$}} -; SIMD128-NEXT: return $pop[[R]]{{$}} define i16 @extract_undef_v8i16(<8 x i16> %v) { +; SIMD128-LABEL: extract_undef_v8i16: +; SIMD128: .functype extract_undef_v8i16 (v128) -> (i32) +; SIMD128-NEXT: # %bb.0: +; SIMD128-NEXT: i16x8.extract_lane_u $push0=, $0, 0 +; SIMD128-NEXT: return $pop0 +; +; NO-SIMD128-LABEL: extract_undef_v8i16: +; NO-SIMD128: .functype extract_undef_v8i16 (i32, i32, i32, i32, i32, i32, i32, i32) -> (i32) +; NO-SIMD128-NEXT: # %bb.0: +; NO-SIMD128-NEXT: return $0 %elem = extractelement <8 x i16> %v, i16 undef ret i16 %elem } -; CHECK-LABEL: replace_v8i16: -; NO-SIMD128-NOT: i16x8 -; SIMD128-NEXT: .functype replace_v8i16 (v128, i32) -> (v128){{$}} -; SIMD128-NEXT: i16x8.replace_lane $push[[R:[0-9]+]]=, $0, 7, $1{{$}} -; SIMD128-NEXT: return $pop[[R]]{{$}} define <8 x i16> @replace_v8i16(<8 x i16> %v, i16 %x) { +; SIMD128-LABEL: replace_v8i16: +; SIMD128: .functype replace_v8i16 (v128, i32) -> (v128) +; SIMD128-NEXT: # %bb.0: +; SIMD128-NEXT: i16x8.replace_lane $push0=, $0, 7, $1 +; SIMD128-NEXT: return $pop0 +; +; NO-SIMD128-LABEL: replace_v8i16: +; NO-SIMD128: .functype replace_v8i16 (i32, i32, i32, i32, i32, i32, i32, i32, i32, i32) -> () +; NO-SIMD128-NEXT: # %bb.0: +; NO-SIMD128-NEXT: i32.store16 8($0), $5 +; NO-SIMD128-NEXT: i32.store16 4($0), $3 +; NO-SIMD128-NEXT: i32.store16 2($0), $2 +; NO-SIMD128-NEXT: i32.store16 0($0), $1 +; NO-SIMD128-NEXT: i32.const $push0=, 14 +; NO-SIMD128-NEXT: i32.add $push1=, $0, $pop0 +; NO-SIMD128-NEXT: i32.store16 0($pop1), $9 +; NO-SIMD128-NEXT: i32.const $push2=, 12 +; NO-SIMD128-NEXT: i32.add $push3=, $0, $pop2 +; NO-SIMD128-NEXT: i32.store16 0($pop3), $7 +; NO-SIMD128-NEXT: i32.const $push4=, 10 +; NO-SIMD128-NEXT: i32.add $push5=, $0, $pop4 +; NO-SIMD128-NEXT: i32.store16 0($pop5), $6 +; NO-SIMD128-NEXT: i32.const $push6=, 6 +; NO-SIMD128-NEXT: i32.add $push7=, $0, $pop6 +; NO-SIMD128-NEXT: i32.store16 0($pop7), $4 +; NO-SIMD128-NEXT: return %res = insertelement <8 x i16> %v, i16 %x, i32 7 ret <8 x i16> %res } -; CHECK-LABEL: replace_var_v8i16: -; NO-SIMD128-NOT: i16x8 -; SIMD128-NEXT: .functype replace_var_v8i16 (v128, i32, i32) -> (v128){{$}} -; SIMD128-NEXT: global.get $push[[L0:[0-9]+]]=, __stack_pointer{{$}} -; SIMD128-NEXT: i32.const $push[[L1:[0-9]+]]=, 16{{$}} -; SIMD128-NEXT: i32.sub $push[[L2:[0-9]+]]=, $pop[[L0]], $pop[[L1]]{{$}} -; SIMD128-NEXT: local.tee $push[[L3:[0-9]+]]=, $3=, $pop[[L2]]{{$}} -; SIMD128-NEXT: v128.store 0($pop[[L3]]), $0{{$}} -; SIMD128-NEXT: i32.const $push[[L4:[0-9]+]]=, 7{{$}} -; SIMD128-NEXT: i32.and $push[[L5:[0-9]+]]=, $1, $pop[[L4]]{{$}} -; SIMD128-NEXT: i32.const $push[[L6:[0-9]+]]=, 1{{$}} -; SIMD128-NEXT: i32.shl $push[[L7:[0-9]+]]=, $pop[[L5]], $pop[[L6]]{{$}} -; SIMD128-NEXT: i32.or $push[[L8:[0-9]+]]=, $3, $pop[[L7]]{{$}} -; SIMD128-NEXT: i32.store16 0($pop[[L8]]), $2{{$}} -; SIMD128-NEXT: v128.load $push[[R:[0-9]+]]=, 0($3){{$}} -; SIMD128-NEXT: return $pop[[R]]{{$}} define <8 x i16> @replace_var_v8i16(<8 x i16> %v, i32 %i, i16 %x) { +; SIMD128-LABEL: replace_var_v8i16: +; SIMD128: .functype replace_var_v8i16 (v128, i32, i32) -> (v128) +; SIMD128-NEXT: # %bb.0: +; SIMD128-NEXT: global.get $push6=, __stack_pointer +; SIMD128-NEXT: i32.const $push7=, 16 +; SIMD128-NEXT: i32.sub $push9=, $pop6, $pop7 +; SIMD128-NEXT: local.tee $push8=, $3=, $pop9 +; SIMD128-NEXT: v128.store 0($pop8), $0 +; SIMD128-NEXT: i32.const $push0=, 7 +; SIMD128-NEXT: i32.and $push1=, $1, $pop0 +; SIMD128-NEXT: i32.const $push2=, 1 +; SIMD128-NEXT: i32.shl $push3=, $pop1, $pop2 +; SIMD128-NEXT: i32.or $push4=, $3, $pop3 +; SIMD128-NEXT: i32.store16 0($pop4), $2 +; SIMD128-NEXT: v128.load $push5=, 0($3) +; SIMD128-NEXT: return $pop5 +; +; NO-SIMD128-LABEL: replace_var_v8i16: +; NO-SIMD128: .functype replace_var_v8i16 (i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32) -> () +; NO-SIMD128-NEXT: # %bb.0: +; NO-SIMD128-NEXT: global.get $push7=, __stack_pointer +; NO-SIMD128-NEXT: i32.const $push8=, 16 +; NO-SIMD128-NEXT: i32.sub $push10=, $pop7, $pop8 +; NO-SIMD128-NEXT: local.tee $push9=, $11=, $pop10 +; NO-SIMD128-NEXT: i32.store16 14($pop9), $8 +; NO-SIMD128-NEXT: i32.store16 12($11), $7 +; NO-SIMD128-NEXT: i32.store16 10($11), $6 +; NO-SIMD128-NEXT: i32.store16 8($11), $5 +; NO-SIMD128-NEXT: i32.store16 6($11), $4 +; NO-SIMD128-NEXT: i32.store16 4($11), $3 +; NO-SIMD128-NEXT: i32.store16 2($11), $2 +; NO-SIMD128-NEXT: i32.store16 0($11), $1 +; NO-SIMD128-NEXT: i32.const $push0=, 7 +; NO-SIMD128-NEXT: i32.and $push1=, $9, $pop0 +; NO-SIMD128-NEXT: i32.const $push2=, 1 +; NO-SIMD128-NEXT: i32.shl $push3=, $pop1, $pop2 +; NO-SIMD128-NEXT: i32.or $push4=, $11, $pop3 +; NO-SIMD128-NEXT: i32.store16 0($pop4), $10 +; NO-SIMD128-NEXT: i64.load $push5=, 8($11) +; NO-SIMD128-NEXT: i64.store 8($0), $pop5 +; NO-SIMD128-NEXT: i64.load $push6=, 0($11) +; NO-SIMD128-NEXT: i64.store 0($0), $pop6 +; NO-SIMD128-NEXT: return %res = insertelement <8 x i16> %v, i16 %x, i32 %i ret <8 x i16> %res } -; CHECK-LABEL: replace_zero_v8i16: -; NO-SIMD128-NOT: i16x8 -; SIMD128-NEXT: .functype replace_zero_v8i16 (v128, i32) -> (v128){{$}} -; SIMD128-NEXT: i16x8.replace_lane $push[[R:[0-9]+]]=, $0, 0, $1{{$}} -; SIMD128-NEXT: return $pop[[R]]{{$}} define <8 x i16> @replace_zero_v8i16(<8 x i16> %v, i16 %x) { +; SIMD128-LABEL: replace_zero_v8i16: +; SIMD128: .functype replace_zero_v8i16 (v128, i32) -> (v128) +; SIMD128-NEXT: # %bb.0: +; SIMD128-NEXT: i16x8.replace_lane $push0=, $0, 0, $1 +; SIMD128-NEXT: return $pop0 +; +; NO-SIMD128-LABEL: replace_zero_v8i16: +; NO-SIMD128: .functype replace_zero_v8i16 (i32, i32, i32, i32, i32, i32, i32, i32, i32, i32) -> () +; NO-SIMD128-NEXT: # %bb.0: +; NO-SIMD128-NEXT: i32.store16 8($0), $5 +; NO-SIMD128-NEXT: i32.store16 4($0), $3 +; NO-SIMD128-NEXT: i32.store16 2($0), $2 +; NO-SIMD128-NEXT: i32.store16 0($0), $9 +; NO-SIMD128-NEXT: i32.const $push0=, 14 +; NO-SIMD128-NEXT: i32.add $push1=, $0, $pop0 +; NO-SIMD128-NEXT: i32.store16 0($pop1), $8 +; NO-SIMD128-NEXT: i32.const $push2=, 12 +; NO-SIMD128-NEXT: i32.add $push3=, $0, $pop2 +; NO-SIMD128-NEXT: i32.store16 0($pop3), $7 +; NO-SIMD128-NEXT: i32.const $push4=, 10 +; NO-SIMD128-NEXT: i32.add $push5=, $0, $pop4 +; NO-SIMD128-NEXT: i32.store16 0($pop5), $6 +; NO-SIMD128-NEXT: i32.const $push6=, 6 +; NO-SIMD128-NEXT: i32.add $push7=, $0, $pop6 +; NO-SIMD128-NEXT: i32.store16 0($pop7), $4 +; NO-SIMD128-NEXT: return %res = insertelement <8 x i16> %v, i16 %x, i32 0 ret <8 x i16> %res } -; CHECK-LABEL: shuffle_v8i16: -; NO-SIMD128-NOT: i8x16 -; SIMD128-NEXT: .functype shuffle_v8i16 (v128, v128) -> (v128){{$}} -; SIMD128-NEXT: i8x16.shuffle $push[[R:[0-9]+]]=, $0, $1, -; SIMD128-SAME: 0, 1, 18, 19, 4, 5, 22, 23, 8, 9, 26, 27, 12, 13, 30, 31{{$}} -; SIMD128-NEXT: return $pop[[R]]{{$}} define <8 x i16> @shuffle_v8i16(<8 x i16> %x, <8 x i16> %y) { +; SIMD128-LABEL: shuffle_v8i16: +; SIMD128: .functype shuffle_v8i16 (v128, v128) -> (v128) +; SIMD128-NEXT: # %bb.0: +; SIMD128-NEXT: i8x16.shuffle $push0=, $0, $1, 0, 1, 18, 19, 4, 5, 22, 23, 8, 9, 26, 27, 12, 13, 30, 31 +; SIMD128-NEXT: return $pop0 +; +; NO-SIMD128-LABEL: shuffle_v8i16: +; NO-SIMD128: .functype shuffle_v8i16 (i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32) -> () +; NO-SIMD128-NEXT: # %bb.0: +; NO-SIMD128-NEXT: i32.store16 8($0), $5 +; NO-SIMD128-NEXT: i32.store16 4($0), $3 +; NO-SIMD128-NEXT: i32.store16 2($0), $10 +; NO-SIMD128-NEXT: i32.store16 0($0), $1 +; NO-SIMD128-NEXT: i32.const $push0=, 14 +; NO-SIMD128-NEXT: i32.add $push1=, $0, $pop0 +; NO-SIMD128-NEXT: i32.store16 0($pop1), $16 +; NO-SIMD128-NEXT: i32.const $push2=, 12 +; NO-SIMD128-NEXT: i32.add $push3=, $0, $pop2 +; NO-SIMD128-NEXT: i32.store16 0($pop3), $7 +; NO-SIMD128-NEXT: i32.const $push4=, 10 +; NO-SIMD128-NEXT: i32.add $push5=, $0, $pop4 +; NO-SIMD128-NEXT: i32.store16 0($pop5), $14 +; NO-SIMD128-NEXT: i32.const $push6=, 6 +; NO-SIMD128-NEXT: i32.add $push7=, $0, $pop6 +; NO-SIMD128-NEXT: i32.store16 0($pop7), $12 +; NO-SIMD128-NEXT: return %res = shufflevector <8 x i16> %x, <8 x i16> %y, <8 x i32> ret <8 x i16> %res } -; CHECK-LABEL: shuffle_undef_v8i16: -; NO-SIMD128-NOT: i8x16 -; SIMD128-NEXT: .functype shuffle_undef_v8i16 (v128, v128) -> (v128){{$}} -; SIMD128-NEXT: i8x16.shuffle $push[[R:[0-9]+]]=, $0, $0, -; SIMD128-SAME: 2, 3, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1{{$}} -; SIMD128-NEXT: return $pop[[R]]{{$}} define <8 x i16> @shuffle_undef_v8i16(<8 x i16> %x, <8 x i16> %y) { +; SIMD128-LABEL: shuffle_undef_v8i16: +; SIMD128: .functype shuffle_undef_v8i16 (v128, v128) -> (v128) +; SIMD128-NEXT: # %bb.0: +; SIMD128-NEXT: i8x16.shuffle $push0=, $0, $0, 2, 3, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1 +; SIMD128-NEXT: return $pop0 +; +; NO-SIMD128-LABEL: shuffle_undef_v8i16: +; NO-SIMD128: .functype shuffle_undef_v8i16 (i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32) -> () +; NO-SIMD128-NEXT: # %bb.0: +; NO-SIMD128-NEXT: i32.store16 8($0), $2 +; NO-SIMD128-NEXT: i32.store16 4($0), $2 +; NO-SIMD128-NEXT: i32.store16 2($0), $2 +; NO-SIMD128-NEXT: i32.store16 0($0), $2 +; NO-SIMD128-NEXT: i32.const $push0=, 14 +; NO-SIMD128-NEXT: i32.add $push1=, $0, $pop0 +; NO-SIMD128-NEXT: i32.store16 0($pop1), $2 +; NO-SIMD128-NEXT: i32.const $push2=, 12 +; NO-SIMD128-NEXT: i32.add $push3=, $0, $pop2 +; NO-SIMD128-NEXT: i32.store16 0($pop3), $2 +; NO-SIMD128-NEXT: i32.const $push4=, 10 +; NO-SIMD128-NEXT: i32.add $push5=, $0, $pop4 +; NO-SIMD128-NEXT: i32.store16 0($pop5), $2 +; NO-SIMD128-NEXT: i32.const $push6=, 6 +; NO-SIMD128-NEXT: i32.add $push7=, $0, $pop6 +; NO-SIMD128-NEXT: i32.store16 0($pop7), $2 +; NO-SIMD128-NEXT: return %res = shufflevector <8 x i16> %x, <8 x i16> %y, <8 x i32> ret <8 x i16> %res } -; CHECK-LABEL: build_v8i16: -; NO-SIMD128-NOT: i16x8 -; SIMD128-NEXT: .functype build_v8i16 (i32, i32, i32, i32, i32, i32, i32, i32) -> (v128){{$}} -; SIMD128-NEXT: i16x8.splat $push[[L0:[0-9]+]]=, $0{{$}} -; SIMD128-NEXT: i16x8.replace_lane $push[[L1:[0-9]+]]=, $pop[[L0]], 1, $1{{$}} -; SIMD128-NEXT: i16x8.replace_lane $push[[L2:[0-9]+]]=, $pop[[L1]], 2, $2{{$}} -; SIMD128-NEXT: i16x8.replace_lane $push[[L3:[0-9]+]]=, $pop[[L2]], 3, $3{{$}} -; SIMD128-NEXT: i16x8.replace_lane $push[[L4:[0-9]+]]=, $pop[[L3]], 4, $4{{$}} -; SIMD128-NEXT: i16x8.replace_lane $push[[L5:[0-9]+]]=, $pop[[L4]], 5, $5{{$}} -; SIMD128-NEXT: i16x8.replace_lane $push[[L6:[0-9]+]]=, $pop[[L5]], 6, $6{{$}} -; SIMD128-NEXT: i16x8.replace_lane $push[[R:[0-9]+]]=, $pop[[L6]], 7, $7{{$}} -; SIMD128-NEXT: return $pop[[R]]{{$}} define <8 x i16> @build_v8i16(i16 %x0, i16 %x1, i16 %x2, i16 %x3, +; SIMD128-LABEL: build_v8i16: +; SIMD128: .functype build_v8i16 (i32, i32, i32, i32, i32, i32, i32, i32) -> (v128) +; SIMD128-NEXT: # %bb.0: +; SIMD128-NEXT: i16x8.splat $push0=, $0 +; SIMD128-NEXT: i16x8.replace_lane $push1=, $pop0, 1, $1 +; SIMD128-NEXT: i16x8.replace_lane $push2=, $pop1, 2, $2 +; SIMD128-NEXT: i16x8.replace_lane $push3=, $pop2, 3, $3 +; SIMD128-NEXT: i16x8.replace_lane $push4=, $pop3, 4, $4 +; SIMD128-NEXT: i16x8.replace_lane $push5=, $pop4, 5, $5 +; SIMD128-NEXT: i16x8.replace_lane $push6=, $pop5, 6, $6 +; SIMD128-NEXT: i16x8.replace_lane $push7=, $pop6, 7, $7 +; SIMD128-NEXT: return $pop7 +; +; NO-SIMD128-LABEL: build_v8i16: +; NO-SIMD128: .functype build_v8i16 (i32, i32, i32, i32, i32, i32, i32, i32, i32) -> () +; NO-SIMD128-NEXT: # %bb.0: +; NO-SIMD128-NEXT: i32.store16 8($0), $5 +; NO-SIMD128-NEXT: i32.store16 4($0), $3 +; NO-SIMD128-NEXT: i32.store16 2($0), $2 +; NO-SIMD128-NEXT: i32.store16 0($0), $1 +; NO-SIMD128-NEXT: i32.const $push0=, 14 +; NO-SIMD128-NEXT: i32.add $push1=, $0, $pop0 +; NO-SIMD128-NEXT: i32.store16 0($pop1), $8 +; NO-SIMD128-NEXT: i32.const $push2=, 12 +; NO-SIMD128-NEXT: i32.add $push3=, $0, $pop2 +; NO-SIMD128-NEXT: i32.store16 0($pop3), $7 +; NO-SIMD128-NEXT: i32.const $push4=, 10 +; NO-SIMD128-NEXT: i32.add $push5=, $0, $pop4 +; NO-SIMD128-NEXT: i32.store16 0($pop5), $6 +; NO-SIMD128-NEXT: i32.const $push6=, 6 +; NO-SIMD128-NEXT: i32.add $push7=, $0, $pop6 +; NO-SIMD128-NEXT: i32.store16 0($pop7), $4 +; NO-SIMD128-NEXT: return i16 %x4, i16 %x5, i16 %x6, i16 %x7) { %t0 = insertelement <8 x i16> undef, i16 %x0, i32 0 %t1 = insertelement <8 x i16> %t0, i16 %x1, i32 1 @@ -520,147 +1230,284 @@ define <8 x i16> @build_v8i16(i16 %x0, i16 %x1, i16 %x2, i16 %x3, ; ============================================================================== ; 4 x i32 ; ============================================================================== -; CHECK-LABEL: const_v4i32: -; NO-SIMD128-NOT: i32x4 -; SIMD128-NEXT: .functype const_v4i32 () -> (v128){{$}} -; SIMD128-NEXT: v128.const $push[[R:[0-9]+]]=, 50462976, 117835012, 185207048, 252579084{{$}} -; SIMD128-NEXT: return $pop[[R]]{{$}} define <4 x i32> @const_v4i32() { +; SIMD128-LABEL: const_v4i32: +; SIMD128: .functype const_v4i32 () -> (v128) +; SIMD128-NEXT: # %bb.0: +; SIMD128-NEXT: v128.const $push0=, 50462976, 117835012, 185207048, 252579084 +; SIMD128-NEXT: return $pop0 +; +; NO-SIMD128-LABEL: const_v4i32: +; NO-SIMD128: .functype const_v4i32 (i32) -> () +; NO-SIMD128-NEXT: # %bb.0: +; NO-SIMD128-NEXT: i64.const $push0=, 1084818905618843912 +; NO-SIMD128-NEXT: i64.store 8($0), $pop0 +; NO-SIMD128-NEXT: i64.const $push1=, 506097522914230528 +; NO-SIMD128-NEXT: i64.store 0($0), $pop1 +; NO-SIMD128-NEXT: return ret <4 x i32> } -; CHECK-LABEL: splat_v4i32: -; NO-SIMD128-NOT: i32x4 -; SIMD128-NEXT: .functype splat_v4i32 (i32) -> (v128){{$}} -; SIMD128-NEXT: i32x4.splat $push[[R:[0-9]+]]=, $0{{$}} -; SIMD128-NEXT: return $pop[[R]]{{$}} define <4 x i32> @splat_v4i32(i32 %x) { +; SIMD128-LABEL: splat_v4i32: +; SIMD128: .functype splat_v4i32 (i32) -> (v128) +; SIMD128-NEXT: # %bb.0: +; SIMD128-NEXT: i32x4.splat $push0=, $0 +; SIMD128-NEXT: return $pop0 +; +; NO-SIMD128-LABEL: splat_v4i32: +; NO-SIMD128: .functype splat_v4i32 (i32, i32) -> () +; NO-SIMD128-NEXT: # %bb.0: +; NO-SIMD128-NEXT: i32.store 8($0), $1 +; NO-SIMD128-NEXT: i32.store 4($0), $1 +; NO-SIMD128-NEXT: i32.store 0($0), $1 +; NO-SIMD128-NEXT: i32.const $push0=, 12 +; NO-SIMD128-NEXT: i32.add $push1=, $0, $pop0 +; NO-SIMD128-NEXT: i32.store 0($pop1), $1 +; NO-SIMD128-NEXT: return %v = insertelement <4 x i32> undef, i32 %x, i32 0 %res = shufflevector <4 x i32> %v, <4 x i32> undef, <4 x i32> ret <4 x i32> %res } -; CHECK-LABEL: const_splat_v4i32: -; SIMD128: v128.const $push0=, 42, 42, 42, 42{{$}} define <4 x i32> @const_splat_v4i32() { +; SIMD128-LABEL: const_splat_v4i32: +; SIMD128: .functype const_splat_v4i32 () -> (v128) +; SIMD128-NEXT: # %bb.0: +; SIMD128-NEXT: v128.const $push0=, 42, 42, 42, 42 +; SIMD128-NEXT: return $pop0 +; +; NO-SIMD128-LABEL: const_splat_v4i32: +; NO-SIMD128: .functype const_splat_v4i32 (i32) -> () +; NO-SIMD128-NEXT: # %bb.0: +; NO-SIMD128-NEXT: i64.const $push0=, 180388626474 +; NO-SIMD128-NEXT: i64.store 8($0), $pop0 +; NO-SIMD128-NEXT: i64.const $push1=, 180388626474 +; NO-SIMD128-NEXT: i64.store 0($0), $pop1 +; NO-SIMD128-NEXT: return ret <4 x i32> } -; CHECK-LABEL: extract_v4i32: -; NO-SIMD128-NOT: i32x4 -; SIMD128-NEXT: .functype extract_v4i32 (v128) -> (i32){{$}} -; SIMD128-NEXT: i32x4.extract_lane $push[[R:[0-9]+]]=, $0, 3{{$}} -; SIMD128-NEXT: return $pop[[R]]{{$}} define i32 @extract_v4i32(<4 x i32> %v) { +; SIMD128-LABEL: extract_v4i32: +; SIMD128: .functype extract_v4i32 (v128) -> (i32) +; SIMD128-NEXT: # %bb.0: +; SIMD128-NEXT: i32x4.extract_lane $push0=, $0, 3 +; SIMD128-NEXT: return $pop0 +; +; NO-SIMD128-LABEL: extract_v4i32: +; NO-SIMD128: .functype extract_v4i32 (i32, i32, i32, i32) -> (i32) +; NO-SIMD128-NEXT: # %bb.0: +; NO-SIMD128-NEXT: return $3 %elem = extractelement <4 x i32> %v, i32 3 ret i32 %elem } -; CHECK-LABEL: extract_var_v4i32: -; NO-SIMD128-NOT: i32x4 -; SIMD128-NEXT: .functype extract_var_v4i32 (v128, i32) -> (i32){{$}} -; SIMD128-NEXT: global.get $push[[L0:[0-9]+]]=, __stack_pointer{{$}} -; SIMD128-NEXT: i32.const $push[[L1:[0-9]+]]=, 16{{$}} -; SIMD128-NEXT: i32.sub $push[[L2:[0-9]+]]=, $pop[[L0]], $pop[[L1]]{{$}} -; SIMD128-NEXT: local.tee $push[[L3:[0-9]+]]=, $2=, $pop[[L2]]{{$}} -; SIMD128-NEXT: v128.store 0($pop[[L3]]), $0{{$}} -; SIMD128-NEXT: i32.const $push[[L4:[0-9]+]]=, 3{{$}} -; SIMD128-NEXT: i32.and $push[[L5:[0-9]+]]=, $1, $pop[[L4]]{{$}} -; SIMD128-NEXT: i32.const $push[[L6:[0-9]+]]=, 2{{$}} -; SIMD128-NEXT: i32.shl $push[[L7:[0-9]+]]=, $pop[[L5]], $pop[[L6]]{{$}} -; SIMD128-NEXT: i32.or $push[[L4:[0-9]+]]=, $2, $pop[[L7]]{{$}} -; SIMD128-NEXT: i32.load $push[[R:[0-9]+]]=, 0($pop[[L4]]){{$}} -; SIMD128-NEXT: return $pop[[R]]{{$}} define i32 @extract_var_v4i32(<4 x i32> %v, i32 %i) { +; SIMD128-LABEL: extract_var_v4i32: +; SIMD128: .functype extract_var_v4i32 (v128, i32) -> (i32) +; SIMD128-NEXT: # %bb.0: +; SIMD128-NEXT: global.get $push6=, __stack_pointer +; SIMD128-NEXT: i32.const $push7=, 16 +; SIMD128-NEXT: i32.sub $push9=, $pop6, $pop7 +; SIMD128-NEXT: local.tee $push8=, $2=, $pop9 +; SIMD128-NEXT: v128.store 0($pop8), $0 +; SIMD128-NEXT: i32.const $push0=, 3 +; SIMD128-NEXT: i32.and $push1=, $1, $pop0 +; SIMD128-NEXT: i32.const $push2=, 2 +; SIMD128-NEXT: i32.shl $push3=, $pop1, $pop2 +; SIMD128-NEXT: i32.or $push4=, $2, $pop3 +; SIMD128-NEXT: i32.load $push5=, 0($pop4) +; SIMD128-NEXT: return $pop5 +; +; NO-SIMD128-LABEL: extract_var_v4i32: +; NO-SIMD128: .functype extract_var_v4i32 (i32, i32, i32, i32, i32) -> (i32) +; NO-SIMD128-NEXT: # %bb.0: +; NO-SIMD128-NEXT: global.get $push6=, __stack_pointer +; NO-SIMD128-NEXT: i32.const $push7=, 16 +; NO-SIMD128-NEXT: i32.sub $push9=, $pop6, $pop7 +; NO-SIMD128-NEXT: local.tee $push8=, $5=, $pop9 +; NO-SIMD128-NEXT: i32.store 12($pop8), $3 +; NO-SIMD128-NEXT: i32.store 8($5), $2 +; NO-SIMD128-NEXT: i32.store 4($5), $1 +; NO-SIMD128-NEXT: i32.store 0($5), $0 +; NO-SIMD128-NEXT: i32.const $push0=, 3 +; NO-SIMD128-NEXT: i32.and $push1=, $4, $pop0 +; NO-SIMD128-NEXT: i32.const $push2=, 2 +; NO-SIMD128-NEXT: i32.shl $push3=, $pop1, $pop2 +; NO-SIMD128-NEXT: i32.or $push4=, $5, $pop3 +; NO-SIMD128-NEXT: i32.load $push5=, 0($pop4) +; NO-SIMD128-NEXT: return $pop5 %elem = extractelement <4 x i32> %v, i32 %i ret i32 %elem } -; CHECK-LABEL: extract_zero_v4i32: -; NO-SIMD128-NOT: i32x4 -; SIMD128-NEXT: .functype extract_zero_v4i32 (v128) -> (i32){{$}} -; SIMD128-NEXT: i32x4.extract_lane $push[[R:[0-9]+]]=, $0, 0{{$}} -; SIMD128-NEXT: return $pop[[R]]{{$}} define i32 @extract_zero_v4i32(<4 x i32> %v) { +; SIMD128-LABEL: extract_zero_v4i32: +; SIMD128: .functype extract_zero_v4i32 (v128) -> (i32) +; SIMD128-NEXT: # %bb.0: +; SIMD128-NEXT: i32x4.extract_lane $push0=, $0, 0 +; SIMD128-NEXT: return $pop0 +; +; NO-SIMD128-LABEL: extract_zero_v4i32: +; NO-SIMD128: .functype extract_zero_v4i32 (i32, i32, i32, i32) -> (i32) +; NO-SIMD128-NEXT: # %bb.0: +; NO-SIMD128-NEXT: return $0 %elem = extractelement <4 x i32> %v, i32 0 ret i32 %elem } -; CHECK-LABEL: replace_v4i32: -; NO-SIMD128-NOT: i32x4 -; SIMD128-NEXT: .functype replace_v4i32 (v128, i32) -> (v128){{$}} -; SIMD128-NEXT: i32x4.replace_lane $push[[R:[0-9]+]]=, $0, 2, $1{{$}} -; SIMD128-NEXT: return $pop[[R]]{{$}} define <4 x i32> @replace_v4i32(<4 x i32> %v, i32 %x) { +; SIMD128-LABEL: replace_v4i32: +; SIMD128: .functype replace_v4i32 (v128, i32) -> (v128) +; SIMD128-NEXT: # %bb.0: +; SIMD128-NEXT: i32x4.replace_lane $push0=, $0, 2, $1 +; SIMD128-NEXT: return $pop0 +; +; NO-SIMD128-LABEL: replace_v4i32: +; NO-SIMD128: .functype replace_v4i32 (i32, i32, i32, i32, i32, i32) -> () +; NO-SIMD128-NEXT: # %bb.0: +; NO-SIMD128-NEXT: i32.store 8($0), $5 +; NO-SIMD128-NEXT: i32.store 4($0), $2 +; NO-SIMD128-NEXT: i32.store 0($0), $1 +; NO-SIMD128-NEXT: i32.const $push0=, 12 +; NO-SIMD128-NEXT: i32.add $push1=, $0, $pop0 +; NO-SIMD128-NEXT: i32.store 0($pop1), $4 +; NO-SIMD128-NEXT: return %res = insertelement <4 x i32> %v, i32 %x, i32 2 ret <4 x i32> %res } -; CHECK-LABEL: replace_var_v4i32: -; NO-SIMD128-NOT: i32x4 -; SIMD128-NEXT: .functype replace_var_v4i32 (v128, i32, i32) -> (v128){{$}} -; SIMD128-NEXT: global.get $push[[L0:[0-9]+]]=, __stack_pointer{{$}} -; SIMD128-NEXT: i32.const $push[[L1:[0-9]+]]=, 16{{$}} -; SIMD128-NEXT: i32.sub $push[[L2:[0-9]+]]=, $pop[[L0]], $pop[[L1]]{{$}} -; SIMD128-NEXT: local.tee $push[[L3:[0-9]+]]=, $3=, $pop[[L2]]{{$}} -; SIMD128-NEXT: v128.store 0($pop[[L3]]), $0{{$}} -; SIMD128-NEXT: i32.const $push[[L4:[0-9]+]]=, 3{{$}} -; SIMD128-NEXT: i32.and $push[[L5:[0-9]+]]=, $1, $pop[[L4]]{{$}} -; SIMD128-NEXT: i32.const $push[[L6:[0-9]+]]=, 2{{$}} -; SIMD128-NEXT: i32.shl $push[[L7:[0-9]+]]=, $pop[[L5]], $pop[[L6]]{{$}} -; SIMD128-NEXT: i32.or $push[[L4:[0-9]+]]=, $3, $pop[[L7]]{{$}} -; SIMD128-NEXT: i32.store 0($pop[[L4]]), $2{{$}} -; SIMD128-NEXT: v128.load $push[[R:[0-9]+]]=, 0($3){{$}} -; SIMD128-NEXT: return $pop[[R]]{{$}} define <4 x i32> @replace_var_v4i32(<4 x i32> %v, i32 %i, i32 %x) { +; SIMD128-LABEL: replace_var_v4i32: +; SIMD128: .functype replace_var_v4i32 (v128, i32, i32) -> (v128) +; SIMD128-NEXT: # %bb.0: +; SIMD128-NEXT: global.get $push6=, __stack_pointer +; SIMD128-NEXT: i32.const $push7=, 16 +; SIMD128-NEXT: i32.sub $push9=, $pop6, $pop7 +; SIMD128-NEXT: local.tee $push8=, $3=, $pop9 +; SIMD128-NEXT: v128.store 0($pop8), $0 +; SIMD128-NEXT: i32.const $push0=, 3 +; SIMD128-NEXT: i32.and $push1=, $1, $pop0 +; SIMD128-NEXT: i32.const $push2=, 2 +; SIMD128-NEXT: i32.shl $push3=, $pop1, $pop2 +; SIMD128-NEXT: i32.or $push4=, $3, $pop3 +; SIMD128-NEXT: i32.store 0($pop4), $2 +; SIMD128-NEXT: v128.load $push5=, 0($3) +; SIMD128-NEXT: return $pop5 +; +; NO-SIMD128-LABEL: replace_var_v4i32: +; NO-SIMD128: .functype replace_var_v4i32 (i32, i32, i32, i32, i32, i32, i32) -> () +; NO-SIMD128-NEXT: # %bb.0: +; NO-SIMD128-NEXT: global.get $push7=, __stack_pointer +; NO-SIMD128-NEXT: i32.const $push8=, 16 +; NO-SIMD128-NEXT: i32.sub $push10=, $pop7, $pop8 +; NO-SIMD128-NEXT: local.tee $push9=, $7=, $pop10 +; NO-SIMD128-NEXT: i32.store 12($pop9), $4 +; NO-SIMD128-NEXT: i32.store 8($7), $3 +; NO-SIMD128-NEXT: i32.store 4($7), $2 +; NO-SIMD128-NEXT: i32.store 0($7), $1 +; NO-SIMD128-NEXT: i32.const $push0=, 3 +; NO-SIMD128-NEXT: i32.and $push1=, $5, $pop0 +; NO-SIMD128-NEXT: i32.const $push2=, 2 +; NO-SIMD128-NEXT: i32.shl $push3=, $pop1, $pop2 +; NO-SIMD128-NEXT: i32.or $push4=, $7, $pop3 +; NO-SIMD128-NEXT: i32.store 0($pop4), $6 +; NO-SIMD128-NEXT: i64.load $push5=, 8($7) +; NO-SIMD128-NEXT: i64.store 8($0), $pop5 +; NO-SIMD128-NEXT: i64.load $push6=, 0($7) +; NO-SIMD128-NEXT: i64.store 0($0), $pop6 +; NO-SIMD128-NEXT: return %res = insertelement <4 x i32> %v, i32 %x, i32 %i ret <4 x i32> %res } -; CHECK-LABEL: replace_zero_v4i32: -; NO-SIMD128-NOT: i32x4 -; SIMD128-NEXT: .functype replace_zero_v4i32 (v128, i32) -> (v128){{$}} -; SIMD128-NEXT: i32x4.replace_lane $push[[R:[0-9]+]]=, $0, 0, $1{{$}} -; SIMD128-NEXT: return $pop[[R]]{{$}} define <4 x i32> @replace_zero_v4i32(<4 x i32> %v, i32 %x) { +; SIMD128-LABEL: replace_zero_v4i32: +; SIMD128: .functype replace_zero_v4i32 (v128, i32) -> (v128) +; SIMD128-NEXT: # %bb.0: +; SIMD128-NEXT: i32x4.replace_lane $push0=, $0, 0, $1 +; SIMD128-NEXT: return $pop0 +; +; NO-SIMD128-LABEL: replace_zero_v4i32: +; NO-SIMD128: .functype replace_zero_v4i32 (i32, i32, i32, i32, i32, i32) -> () +; NO-SIMD128-NEXT: # %bb.0: +; NO-SIMD128-NEXT: i32.store 8($0), $3 +; NO-SIMD128-NEXT: i32.store 4($0), $2 +; NO-SIMD128-NEXT: i32.store 0($0), $5 +; NO-SIMD128-NEXT: i32.const $push0=, 12 +; NO-SIMD128-NEXT: i32.add $push1=, $0, $pop0 +; NO-SIMD128-NEXT: i32.store 0($pop1), $4 +; NO-SIMD128-NEXT: return %res = insertelement <4 x i32> %v, i32 %x, i32 0 ret <4 x i32> %res } -; CHECK-LABEL: shuffle_v4i32: -; NO-SIMD128-NOT: i8x16 -; SIMD128-NEXT: .functype shuffle_v4i32 (v128, v128) -> (v128){{$}} -; SIMD128-NEXT: i8x16.shuffle $push[[R:[0-9]+]]=, $0, $1, -; SIMD128-SAME: 0, 1, 2, 3, 20, 21, 22, 23, 8, 9, 10, 11, 28, 29, 30, 31{{$}} -; SIMD128-NEXT: return $pop[[R]]{{$}} define <4 x i32> @shuffle_v4i32(<4 x i32> %x, <4 x i32> %y) { +; SIMD128-LABEL: shuffle_v4i32: +; SIMD128: .functype shuffle_v4i32 (v128, v128) -> (v128) +; SIMD128-NEXT: # %bb.0: +; SIMD128-NEXT: i8x16.shuffle $push0=, $0, $1, 0, 1, 2, 3, 20, 21, 22, 23, 8, 9, 10, 11, 28, 29, 30, 31 +; SIMD128-NEXT: return $pop0 +; +; NO-SIMD128-LABEL: shuffle_v4i32: +; NO-SIMD128: .functype shuffle_v4i32 (i32, i32, i32, i32, i32, i32, i32, i32, i32) -> () +; NO-SIMD128-NEXT: # %bb.0: +; NO-SIMD128-NEXT: i32.store 8($0), $3 +; NO-SIMD128-NEXT: i32.store 4($0), $6 +; NO-SIMD128-NEXT: i32.store 0($0), $1 +; NO-SIMD128-NEXT: i32.const $push0=, 12 +; NO-SIMD128-NEXT: i32.add $push1=, $0, $pop0 +; NO-SIMD128-NEXT: i32.store 0($pop1), $8 +; NO-SIMD128-NEXT: return %res = shufflevector <4 x i32> %x, <4 x i32> %y, <4 x i32> ret <4 x i32> %res } -; CHECK-LABEL: shuffle_undef_v4i32: -; NO-SIMD128-NOT: i8x16 -; SIMD128-NEXT: .functype shuffle_undef_v4i32 (v128, v128) -> (v128){{$}} -; SIMD128-NEXT: i8x16.shuffle $push[[R:[0-9]+]]=, $0, $0, -; SIMD128-SAME: 4, 5, 6, 7, 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3{{$}} -; SIMD128-NEXT: return $pop[[R]]{{$}} define <4 x i32> @shuffle_undef_v4i32(<4 x i32> %x, <4 x i32> %y) { +; SIMD128-LABEL: shuffle_undef_v4i32: +; SIMD128: .functype shuffle_undef_v4i32 (v128, v128) -> (v128) +; SIMD128-NEXT: # %bb.0: +; SIMD128-NEXT: i8x16.shuffle $push0=, $0, $0, 4, 5, 6, 7, 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3 +; SIMD128-NEXT: return $pop0 +; +; NO-SIMD128-LABEL: shuffle_undef_v4i32: +; NO-SIMD128: .functype shuffle_undef_v4i32 (i32, i32, i32, i32, i32, i32, i32, i32, i32) -> () +; NO-SIMD128-NEXT: # %bb.0: +; NO-SIMD128-NEXT: i32.store 8($0), $2 +; NO-SIMD128-NEXT: i32.store 4($0), $2 +; NO-SIMD128-NEXT: i32.store 0($0), $2 +; NO-SIMD128-NEXT: i32.const $push0=, 12 +; NO-SIMD128-NEXT: i32.add $push1=, $0, $pop0 +; NO-SIMD128-NEXT: i32.store 0($pop1), $2 +; NO-SIMD128-NEXT: return %res = shufflevector <4 x i32> %x, <4 x i32> %y, <4 x i32> ret <4 x i32> %res } -; CHECK-LABEL: build_v4i32: -; NO-SIMD128-NOT: i32x4 -; SIMD128-NEXT: .functype build_v4i32 (i32, i32, i32, i32) -> (v128){{$}} -; SIMD128-NEXT: i32x4.splat $push[[L0:[0-9]+]]=, $0{{$}} -; SIMD128-NEXT: i32x4.replace_lane $push[[L1:[0-9]+]]=, $pop[[L0]], 1, $1{{$}} -; SIMD128-NEXT: i32x4.replace_lane $push[[L2:[0-9]+]]=, $pop[[L1]], 2, $2{{$}} -; SIMD128-NEXT: i32x4.replace_lane $push[[R:[0-9]+]]=, $pop[[L2]], 3, $3{{$}} -; SIMD128-NEXT: return $pop[[R]]{{$}} define <4 x i32> @build_v4i32(i32 %x0, i32 %x1, i32 %x2, i32 %x3) { +; SIMD128-LABEL: build_v4i32: +; SIMD128: .functype build_v4i32 (i32, i32, i32, i32) -> (v128) +; SIMD128-NEXT: # %bb.0: +; SIMD128-NEXT: i32x4.splat $push0=, $0 +; SIMD128-NEXT: i32x4.replace_lane $push1=, $pop0, 1, $1 +; SIMD128-NEXT: i32x4.replace_lane $push2=, $pop1, 2, $2 +; SIMD128-NEXT: i32x4.replace_lane $push3=, $pop2, 3, $3 +; SIMD128-NEXT: return $pop3 +; +; NO-SIMD128-LABEL: build_v4i32: +; NO-SIMD128: .functype build_v4i32 (i32, i32, i32, i32, i32) -> () +; NO-SIMD128-NEXT: # %bb.0: +; NO-SIMD128-NEXT: i32.store 8($0), $3 +; NO-SIMD128-NEXT: i32.store 4($0), $2 +; NO-SIMD128-NEXT: i32.store 0($0), $1 +; NO-SIMD128-NEXT: i32.const $push0=, 12 +; NO-SIMD128-NEXT: i32.add $push1=, $0, $pop0 +; NO-SIMD128-NEXT: i32.store 0($pop1), $4 +; NO-SIMD128-NEXT: return %t0 = insertelement <4 x i32> undef, i32 %x0, i32 0 %t1 = insertelement <4 x i32> %t0, i32 %x1, i32 1 %t2 = insertelement <4 x i32> %t1, i32 %x2, i32 2 @@ -671,143 +1518,252 @@ define <4 x i32> @build_v4i32(i32 %x0, i32 %x1, i32 %x2, i32 %x3) { ; ============================================================================== ; 2 x i64 ; ============================================================================== -; CHECK-LABEL: const_v2i64: -; NO-SIMD128-NOT: i64x2 -; SIMD128-NEXT: .functype const_v2i64 () -> (v128){{$}} -; SIMD128-NEXT: v128.const $push[[R:[0-9]+]]=, 506097522914230528, 1084818905618843912{{$}} -; SIMD128-NEXT: return $pop[[R]]{{$}} define <2 x i64> @const_v2i64() { +; SIMD128-LABEL: const_v2i64: +; SIMD128: .functype const_v2i64 () -> (v128) +; SIMD128-NEXT: # %bb.0: +; SIMD128-NEXT: v128.const $push0=, 506097522914230528, 1084818905618843912 +; SIMD128-NEXT: return $pop0 +; +; NO-SIMD128-LABEL: const_v2i64: +; NO-SIMD128: .functype const_v2i64 (i32) -> () +; NO-SIMD128-NEXT: # %bb.0: +; NO-SIMD128-NEXT: i64.const $push0=, 1084818905618843912 +; NO-SIMD128-NEXT: i64.store 8($0), $pop0 +; NO-SIMD128-NEXT: i64.const $push1=, 506097522914230528 +; NO-SIMD128-NEXT: i64.store 0($0), $pop1 +; NO-SIMD128-NEXT: return ret <2 x i64> } -; CHECK-LABEL: splat_v2i64: -; NO-SIMD128-NOT: i64x2 -; SIMD128-NEXT: .functype splat_v2i64 (i64) -> (v128){{$}} -; SIMD128-NEXT: i64x2.splat $push[[R:[0-9]+]]=, $0{{$}} -; SIMD128-NEXT: return $pop[[R]]{{$}} define <2 x i64> @splat_v2i64(i64 %x) { +; SIMD128-LABEL: splat_v2i64: +; SIMD128: .functype splat_v2i64 (i64) -> (v128) +; SIMD128-NEXT: # %bb.0: +; SIMD128-NEXT: i64x2.splat $push0=, $0 +; SIMD128-NEXT: return $pop0 +; +; NO-SIMD128-LABEL: splat_v2i64: +; NO-SIMD128: .functype splat_v2i64 (i32, i64) -> () +; NO-SIMD128-NEXT: # %bb.0: +; NO-SIMD128-NEXT: i64.store 8($0), $1 +; NO-SIMD128-NEXT: i64.store 0($0), $1 +; NO-SIMD128-NEXT: return %t1 = insertelement <2 x i64> zeroinitializer, i64 %x, i32 0 %res = insertelement <2 x i64> %t1, i64 %x, i32 1 ret <2 x i64> %res } -; CHECK-LABEL: const_splat_v2i64: -; SIMD128: v128.const $push0=, 42, 42{{$}} define <2 x i64> @const_splat_v2i64() { +; SIMD128-LABEL: const_splat_v2i64: +; SIMD128: .functype const_splat_v2i64 () -> (v128) +; SIMD128-NEXT: # %bb.0: +; SIMD128-NEXT: v128.const $push0=, 42, 42 +; SIMD128-NEXT: return $pop0 +; +; NO-SIMD128-LABEL: const_splat_v2i64: +; NO-SIMD128: .functype const_splat_v2i64 (i32) -> () +; NO-SIMD128-NEXT: # %bb.0: +; NO-SIMD128-NEXT: i64.const $push0=, 42 +; NO-SIMD128-NEXT: i64.store 8($0), $pop0 +; NO-SIMD128-NEXT: i64.const $push1=, 42 +; NO-SIMD128-NEXT: i64.store 0($0), $pop1 +; NO-SIMD128-NEXT: return ret <2 x i64> } -; CHECK-LABEL: extract_v2i64: -; NO-SIMD128-NOT: i64x2 -; SIMD128-NEXT: .functype extract_v2i64 (v128) -> (i64){{$}} -; SIMD128-NEXT: i64x2.extract_lane $push[[R:[0-9]+]]=, $0, 1{{$}} -; SIMD128-NEXT: return $pop[[R]]{{$}} define i64 @extract_v2i64(<2 x i64> %v) { +; SIMD128-LABEL: extract_v2i64: +; SIMD128: .functype extract_v2i64 (v128) -> (i64) +; SIMD128-NEXT: # %bb.0: +; SIMD128-NEXT: i64x2.extract_lane $push0=, $0, 1 +; SIMD128-NEXT: return $pop0 +; +; NO-SIMD128-LABEL: extract_v2i64: +; NO-SIMD128: .functype extract_v2i64 (i64, i64) -> (i64) +; NO-SIMD128-NEXT: # %bb.0: +; NO-SIMD128-NEXT: return $1 %elem = extractelement <2 x i64> %v, i64 1 ret i64 %elem } -; CHECK-LABEL: extract_var_v2i64: -; NO-SIMD128-NOT: i64x2 -; SIMD128-NEXT: .functype extract_var_v2i64 (v128, i32) -> (i64){{$}} -; SIMD128-NEXT: global.get $push[[L0:[0-9]+]]=, __stack_pointer{{$}} -; SIMD128-NEXT: i32.const $push[[L1:[0-9]+]]=, 16{{$}} -; SIMD128-NEXT: i32.sub $push[[L2:[0-9]+]]=, $pop[[L0]], $pop[[L1]]{{$}} -; SIMD128-NEXT: local.tee $push[[L3:[0-9]+]]=, $2=, $pop[[L2]]{{$}} -; SIMD128-NEXT: v128.store 0($pop[[L3]]), $0{{$}} -; SIMD128-NEXT: i32.const $push[[L2:[0-9]+]]=, 1{{$}} -; SIMD128-NEXT: i32.and $push[[L5:[0-9]+]]=, $1, $pop[[L2]]{{$}} -; SIMD128-NEXT: i32.const $push[[L6:[0-9]+]]=, 3{{$}} -; SIMD128-NEXT: i32.shl $push[[L7:[0-9]+]]=, $pop[[L5]], $pop[[L6]]{{$}} -; SIMD128-NEXT: i32.or $push[[L2:[0-9]+]]=, $2, $pop[[L7]]{{$}} -; SIMD128-NEXT: i64.load $push[[R:[0-9]+]]=, 0($pop[[L2]]){{$}} -; SIMD128-NEXT: return $pop[[R]]{{$}} define i64 @extract_var_v2i64(<2 x i64> %v, i32 %i) { +; SIMD128-LABEL: extract_var_v2i64: +; SIMD128: .functype extract_var_v2i64 (v128, i32) -> (i64) +; SIMD128-NEXT: # %bb.0: +; SIMD128-NEXT: global.get $push6=, __stack_pointer +; SIMD128-NEXT: i32.const $push7=, 16 +; SIMD128-NEXT: i32.sub $push9=, $pop6, $pop7 +; SIMD128-NEXT: local.tee $push8=, $2=, $pop9 +; SIMD128-NEXT: v128.store 0($pop8), $0 +; SIMD128-NEXT: i32.const $push0=, 1 +; SIMD128-NEXT: i32.and $push1=, $1, $pop0 +; SIMD128-NEXT: i32.const $push2=, 3 +; SIMD128-NEXT: i32.shl $push3=, $pop1, $pop2 +; SIMD128-NEXT: i32.or $push4=, $2, $pop3 +; SIMD128-NEXT: i64.load $push5=, 0($pop4) +; SIMD128-NEXT: return $pop5 +; +; NO-SIMD128-LABEL: extract_var_v2i64: +; NO-SIMD128: .functype extract_var_v2i64 (i64, i64, i32) -> (i64) +; NO-SIMD128-NEXT: # %bb.0: +; NO-SIMD128-NEXT: global.get $push6=, __stack_pointer +; NO-SIMD128-NEXT: i32.const $push7=, 16 +; NO-SIMD128-NEXT: i32.sub $push9=, $pop6, $pop7 +; NO-SIMD128-NEXT: local.tee $push8=, $3=, $pop9 +; NO-SIMD128-NEXT: i64.store 8($pop8), $1 +; NO-SIMD128-NEXT: i64.store 0($3), $0 +; NO-SIMD128-NEXT: i32.const $push0=, 1 +; NO-SIMD128-NEXT: i32.and $push1=, $2, $pop0 +; NO-SIMD128-NEXT: i32.const $push2=, 3 +; NO-SIMD128-NEXT: i32.shl $push3=, $pop1, $pop2 +; NO-SIMD128-NEXT: i32.or $push4=, $3, $pop3 +; NO-SIMD128-NEXT: i64.load $push5=, 0($pop4) +; NO-SIMD128-NEXT: return $pop5 %elem = extractelement <2 x i64> %v, i32 %i ret i64 %elem } -; CHECK-LABEL: extract_zero_v2i64: -; NO-SIMD128-NOT: i64x2 -; SIMD128-NEXT: .functype extract_zero_v2i64 (v128) -> (i64){{$}} -; SIMD128-NEXT: i64x2.extract_lane $push[[R:[0-9]+]]=, $0, 0{{$}} -; SIMD128-NEXT: return $pop[[R]]{{$}} define i64 @extract_zero_v2i64(<2 x i64> %v) { +; SIMD128-LABEL: extract_zero_v2i64: +; SIMD128: .functype extract_zero_v2i64 (v128) -> (i64) +; SIMD128-NEXT: # %bb.0: +; SIMD128-NEXT: i64x2.extract_lane $push0=, $0, 0 +; SIMD128-NEXT: return $pop0 +; +; NO-SIMD128-LABEL: extract_zero_v2i64: +; NO-SIMD128: .functype extract_zero_v2i64 (i64, i64) -> (i64) +; NO-SIMD128-NEXT: # %bb.0: +; NO-SIMD128-NEXT: return $0 %elem = extractelement <2 x i64> %v, i64 0 ret i64 %elem } -; CHECK-LABEL: replace_v2i64: -; NO-SIMD128-NOT: i64x2 -; SIMD128-NEXT: .functype replace_v2i64 (v128, i64) -> (v128){{$}} -; SIMD128-NEXT: i64x2.replace_lane $push[[R:[0-9]+]]=, $0, 0, $1{{$}} -; SIMD128-NEXT: return $pop[[R]]{{$}} define <2 x i64> @replace_v2i64(<2 x i64> %v, i64 %x) { +; SIMD128-LABEL: replace_v2i64: +; SIMD128: .functype replace_v2i64 (v128, i64) -> (v128) +; SIMD128-NEXT: # %bb.0: +; SIMD128-NEXT: i64x2.replace_lane $push0=, $0, 0, $1 +; SIMD128-NEXT: return $pop0 +; +; NO-SIMD128-LABEL: replace_v2i64: +; NO-SIMD128: .functype replace_v2i64 (i32, i64, i64, i64) -> () +; NO-SIMD128-NEXT: # %bb.0: +; NO-SIMD128-NEXT: i64.store 8($0), $2 +; NO-SIMD128-NEXT: i64.store 0($0), $3 +; NO-SIMD128-NEXT: return %res = insertelement <2 x i64> %v, i64 %x, i32 0 ret <2 x i64> %res } -; CHECK-LABEL: replace_var_v2i64: -; NO-SIMD128-NOT: i64x2 -; SIMD128-NEXT: .functype replace_var_v2i64 (v128, i32, i64) -> (v128){{$}} -; SIMD128-NEXT: global.get $push[[L0:[0-9]+]]=, __stack_pointer{{$}} -; SIMD128-NEXT: i32.const $push[[L1:[0-9]+]]=, 16{{$}} -; SIMD128-NEXT: i32.sub $push[[L2:[0-9]+]]=, $pop[[L0]], $pop[[L1]]{{$}} -; SIMD128-NEXT: local.tee $push[[L3:[0-9]+]]=, $3=, $pop[[L2]]{{$}} -; SIMD128-NEXT: v128.store 0($pop[[L3]]), $0{{$}} -; SIMD128-NEXT: i32.const $push[[L2:[0-9]+]]=, 1{{$}} -; SIMD128-NEXT: i32.and $push[[L5:[0-9]+]]=, $1, $pop[[L2]]{{$}} -; SIMD128-NEXT: i32.const $push[[L6:[0-9]+]]=, 3{{$}} -; SIMD128-NEXT: i32.shl $push[[L7:[0-9]+]]=, $pop[[L5]], $pop[[L6]]{{$}} -; SIMD128-NEXT: i32.or $push[[L2:[0-9]+]]=, $3, $pop[[L7]]{{$}} -; SIMD128-NEXT: i64.store 0($pop[[L2]]), $2{{$}} -; SIMD128-NEXT: v128.load $push[[R:[0-9]+]]=, 0($3){{$}} -; SIMD128-NEXT: return $pop[[R]]{{$}} define <2 x i64> @replace_var_v2i64(<2 x i64> %v, i32 %i, i64 %x) { +; SIMD128-LABEL: replace_var_v2i64: +; SIMD128: .functype replace_var_v2i64 (v128, i32, i64) -> (v128) +; SIMD128-NEXT: # %bb.0: +; SIMD128-NEXT: global.get $push6=, __stack_pointer +; SIMD128-NEXT: i32.const $push7=, 16 +; SIMD128-NEXT: i32.sub $push9=, $pop6, $pop7 +; SIMD128-NEXT: local.tee $push8=, $3=, $pop9 +; SIMD128-NEXT: v128.store 0($pop8), $0 +; SIMD128-NEXT: i32.const $push0=, 1 +; SIMD128-NEXT: i32.and $push1=, $1, $pop0 +; SIMD128-NEXT: i32.const $push2=, 3 +; SIMD128-NEXT: i32.shl $push3=, $pop1, $pop2 +; SIMD128-NEXT: i32.or $push4=, $3, $pop3 +; SIMD128-NEXT: i64.store 0($pop4), $2 +; SIMD128-NEXT: v128.load $push5=, 0($3) +; SIMD128-NEXT: return $pop5 +; +; NO-SIMD128-LABEL: replace_var_v2i64: +; NO-SIMD128: .functype replace_var_v2i64 (i32, i64, i64, i32, i64) -> () +; NO-SIMD128-NEXT: # %bb.0: +; NO-SIMD128-NEXT: global.get $push7=, __stack_pointer +; NO-SIMD128-NEXT: i32.const $push8=, 16 +; NO-SIMD128-NEXT: i32.sub $push10=, $pop7, $pop8 +; NO-SIMD128-NEXT: local.tee $push9=, $5=, $pop10 +; NO-SIMD128-NEXT: i64.store 8($pop9), $2 +; NO-SIMD128-NEXT: i64.store 0($5), $1 +; NO-SIMD128-NEXT: i32.const $push0=, 1 +; NO-SIMD128-NEXT: i32.and $push1=, $3, $pop0 +; NO-SIMD128-NEXT: i32.const $push2=, 3 +; NO-SIMD128-NEXT: i32.shl $push3=, $pop1, $pop2 +; NO-SIMD128-NEXT: i32.or $push4=, $5, $pop3 +; NO-SIMD128-NEXT: i64.store 0($pop4), $4 +; NO-SIMD128-NEXT: i64.load $push5=, 8($5) +; NO-SIMD128-NEXT: i64.store 8($0), $pop5 +; NO-SIMD128-NEXT: i64.load $push6=, 0($5) +; NO-SIMD128-NEXT: i64.store 0($0), $pop6 +; NO-SIMD128-NEXT: return %res = insertelement <2 x i64> %v, i64 %x, i32 %i ret <2 x i64> %res } -; CHECK-LABEL: replace_zero_v2i64: -; NO-SIMD128-NOT: i64x2 -; SIMD128-NEXT: .functype replace_zero_v2i64 (v128, i64) -> (v128){{$}} -; SIMD128-NEXT: i64x2.replace_lane $push[[R:[0-9]+]]=, $0, 0, $1{{$}} -; SIMD128-NEXT: return $pop[[R]]{{$}} define <2 x i64> @replace_zero_v2i64(<2 x i64> %v, i64 %x) { +; SIMD128-LABEL: replace_zero_v2i64: +; SIMD128: .functype replace_zero_v2i64 (v128, i64) -> (v128) +; SIMD128-NEXT: # %bb.0: +; SIMD128-NEXT: i64x2.replace_lane $push0=, $0, 0, $1 +; SIMD128-NEXT: return $pop0 +; +; NO-SIMD128-LABEL: replace_zero_v2i64: +; NO-SIMD128: .functype replace_zero_v2i64 (i32, i64, i64, i64) -> () +; NO-SIMD128-NEXT: # %bb.0: +; NO-SIMD128-NEXT: i64.store 8($0), $2 +; NO-SIMD128-NEXT: i64.store 0($0), $3 +; NO-SIMD128-NEXT: return %res = insertelement <2 x i64> %v, i64 %x, i32 0 ret <2 x i64> %res } -; CHECK-LABEL: shuffle_v2i64: -; NO-SIMD128-NOT: i8x16 -; SIMD128-NEXT: .functype shuffle_v2i64 (v128, v128) -> (v128){{$}} -; SIMD128-NEXT: i8x16.shuffle $push[[R:[0-9]+]]=, $0, $1, -; SIMD128-SAME: 0, 1, 2, 3, 4, 5, 6, 7, 24, 25, 26, 27, 28, 29, 30, 31{{$}} -; SIMD128-NEXT: return $pop[[R]]{{$}} define <2 x i64> @shuffle_v2i64(<2 x i64> %x, <2 x i64> %y) { +; SIMD128-LABEL: shuffle_v2i64: +; SIMD128: .functype shuffle_v2i64 (v128, v128) -> (v128) +; SIMD128-NEXT: # %bb.0: +; SIMD128-NEXT: i8x16.shuffle $push0=, $0, $1, 0, 1, 2, 3, 4, 5, 6, 7, 24, 25, 26, 27, 28, 29, 30, 31 +; SIMD128-NEXT: return $pop0 +; +; NO-SIMD128-LABEL: shuffle_v2i64: +; NO-SIMD128: .functype shuffle_v2i64 (i32, i64, i64, i64, i64) -> () +; NO-SIMD128-NEXT: # %bb.0: +; NO-SIMD128-NEXT: i64.store 8($0), $4 +; NO-SIMD128-NEXT: i64.store 0($0), $1 +; NO-SIMD128-NEXT: return %res = shufflevector <2 x i64> %x, <2 x i64> %y, <2 x i32> ret <2 x i64> %res } -; CHECK-LABEL: shuffle_undef_v2i64: -; NO-SIMD128-NOT: i8x16 -; SIMD128-NEXT: .functype shuffle_undef_v2i64 (v128, v128) -> (v128){{$}} -; SIMD128-NEXT: i8x16.shuffle $push[[R:[0-9]+]]=, $0, $0, -; SIMD128-SAME: 8, 9, 10, 11, 12, 13, 14, 15, 0, 1, 2, 3, 4, 5, 6, 7{{$}} -; SIMD128-NEXT: return $pop[[R]]{{$}} define <2 x i64> @shuffle_undef_v2i64(<2 x i64> %x, <2 x i64> %y) { +; SIMD128-LABEL: shuffle_undef_v2i64: +; SIMD128: .functype shuffle_undef_v2i64 (v128, v128) -> (v128) +; SIMD128-NEXT: # %bb.0: +; SIMD128-NEXT: i8x16.shuffle $push0=, $0, $0, 8, 9, 10, 11, 12, 13, 14, 15, 0, 1, 2, 3, 4, 5, 6, 7 +; SIMD128-NEXT: return $pop0 +; +; NO-SIMD128-LABEL: shuffle_undef_v2i64: +; NO-SIMD128: .functype shuffle_undef_v2i64 (i32, i64, i64, i64, i64) -> () +; NO-SIMD128-NEXT: # %bb.0: +; NO-SIMD128-NEXT: i64.store 8($0), $2 +; NO-SIMD128-NEXT: i64.store 0($0), $2 +; NO-SIMD128-NEXT: return %res = shufflevector <2 x i64> %x, <2 x i64> %y, <2 x i32> ret <2 x i64> %res } -; CHECK-LABEL: build_v2i64: -; NO-SIMD128-NOT: i64x2 -; SIMD128-NEXT: .functype build_v2i64 (i64, i64) -> (v128){{$}} -; SIMD128-NEXT: i64x2.splat $push[[L0:[0-9]+]]=, $0{{$}} -; SIMD128-NEXT: i64x2.replace_lane $push[[R:[0-9]+]]=, $pop[[L0]], 1, $1{{$}} -; SIMD128-NEXT: return $pop[[R]]{{$}} define <2 x i64> @build_v2i64(i64 %x0, i64 %x1) { +; SIMD128-LABEL: build_v2i64: +; SIMD128: .functype build_v2i64 (i64, i64) -> (v128) +; SIMD128-NEXT: # %bb.0: +; SIMD128-NEXT: i64x2.splat $push0=, $0 +; SIMD128-NEXT: i64x2.replace_lane $push1=, $pop0, 1, $1 +; SIMD128-NEXT: return $pop1 +; +; NO-SIMD128-LABEL: build_v2i64: +; NO-SIMD128: .functype build_v2i64 (i32, i64, i64) -> () +; NO-SIMD128-NEXT: # %bb.0: +; NO-SIMD128-NEXT: i64.store 8($0), $2 +; NO-SIMD128-NEXT: i64.store 0($0), $1 +; NO-SIMD128-NEXT: return %t0 = insertelement <2 x i64> undef, i64 %x0, i32 0 %res = insertelement <2 x i64> %t0, i64 %x1, i32 1 ret <2 x i64> %res @@ -816,149 +1772,285 @@ define <2 x i64> @build_v2i64(i64 %x0, i64 %x1) { ; ============================================================================== ; 4 x f32 ; ============================================================================== -; CHECK-LABEL: const_v4f32: -; NO-SIMD128-NOT: f32x4 -; SIMD128-NEXT: .functype const_v4f32 () -> (v128){{$}} -; SIMD128-NEXT: v128.const $push[[R:[0-9]+]]=, -; SIMD128-SAME: 0x1.0402p-121, 0x1.0c0a08p-113, 0x1.14121p-105, 0x1.1c1a18p-97{{$}} -; SIMD128-NEXT: return $pop[[R]]{{$}} define <4 x float> @const_v4f32() { +; SIMD128-LABEL: const_v4f32: +; SIMD128: .functype const_v4f32 () -> (v128) +; SIMD128-NEXT: # %bb.0: +; SIMD128-NEXT: v128.const $push0=, 0x1.0402p-121, 0x1.0c0a08p-113, 0x1.14121p-105, 0x1.1c1a18p-97 +; SIMD128-NEXT: return $pop0 +; +; NO-SIMD128-LABEL: const_v4f32: +; NO-SIMD128: .functype const_v4f32 (i32) -> () +; NO-SIMD128-NEXT: # %bb.0: +; NO-SIMD128-NEXT: i64.const $push0=, 1084818905618843912 +; NO-SIMD128-NEXT: i64.store 8($0), $pop0 +; NO-SIMD128-NEXT: i64.const $push1=, 506097522914230528 +; NO-SIMD128-NEXT: i64.store 0($0), $pop1 +; NO-SIMD128-NEXT: return ret <4 x float> } -; CHECK-LABEL: splat_v4f32: -; NO-SIMD128-NOT: f32x4 -; SIMD128-NEXT: .functype splat_v4f32 (f32) -> (v128){{$}} -; SIMD128-NEXT: f32x4.splat $push[[R:[0-9]+]]=, $0{{$}} -; SIMD128-NEXT: return $pop[[R]]{{$}} define <4 x float> @splat_v4f32(float %x) { +; SIMD128-LABEL: splat_v4f32: +; SIMD128: .functype splat_v4f32 (f32) -> (v128) +; SIMD128-NEXT: # %bb.0: +; SIMD128-NEXT: f32x4.splat $push0=, $0 +; SIMD128-NEXT: return $pop0 +; +; NO-SIMD128-LABEL: splat_v4f32: +; NO-SIMD128: .functype splat_v4f32 (i32, f32) -> () +; NO-SIMD128-NEXT: # %bb.0: +; NO-SIMD128-NEXT: f32.store 8($0), $1 +; NO-SIMD128-NEXT: f32.store 4($0), $1 +; NO-SIMD128-NEXT: f32.store 0($0), $1 +; NO-SIMD128-NEXT: i32.const $push0=, 12 +; NO-SIMD128-NEXT: i32.add $push1=, $0, $pop0 +; NO-SIMD128-NEXT: f32.store 0($pop1), $1 +; NO-SIMD128-NEXT: return %v = insertelement <4 x float> undef, float %x, i32 0 %res = shufflevector <4 x float> %v, <4 x float> undef, <4 x i32> ret <4 x float> %res } -; CHECK-LABEL: const_splat_v4f32 -; SIMD128: v128.const $push0=, 0x1.5p5, 0x1.5p5, 0x1.5p5, 0x1.5p5{{$}} define <4 x float> @const_splat_v4f32() { +; SIMD128-LABEL: const_splat_v4f32: +; SIMD128: .functype const_splat_v4f32 () -> (v128) +; SIMD128-NEXT: # %bb.0: +; SIMD128-NEXT: v128.const $push0=, 0x1.5p5, 0x1.5p5, 0x1.5p5, 0x1.5p5 +; SIMD128-NEXT: return $pop0 +; +; NO-SIMD128-LABEL: const_splat_v4f32: +; NO-SIMD128: .functype const_splat_v4f32 (i32) -> () +; NO-SIMD128-NEXT: # %bb.0: +; NO-SIMD128-NEXT: i64.const $push0=, 4767060206681587712 +; NO-SIMD128-NEXT: i64.store 8($0), $pop0 +; NO-SIMD128-NEXT: i64.const $push1=, 4767060206681587712 +; NO-SIMD128-NEXT: i64.store 0($0), $pop1 +; NO-SIMD128-NEXT: return ret <4 x float> } -; CHECK-LABEL: extract_v4f32: -; NO-SIMD128-NOT: f32x4 -; SIMD128-NEXT: .functype extract_v4f32 (v128) -> (f32){{$}} -; SIMD128-NEXT: f32x4.extract_lane $push[[R:[0-9]+]]=, $0, 3{{$}} -; SIMD128-NEXT: return $pop[[R]]{{$}} define float @extract_v4f32(<4 x float> %v) { +; SIMD128-LABEL: extract_v4f32: +; SIMD128: .functype extract_v4f32 (v128) -> (f32) +; SIMD128-NEXT: # %bb.0: +; SIMD128-NEXT: f32x4.extract_lane $push0=, $0, 3 +; SIMD128-NEXT: return $pop0 +; +; NO-SIMD128-LABEL: extract_v4f32: +; NO-SIMD128: .functype extract_v4f32 (f32, f32, f32, f32) -> (f32) +; NO-SIMD128-NEXT: # %bb.0: +; NO-SIMD128-NEXT: return $3 %elem = extractelement <4 x float> %v, i32 3 ret float %elem } -; CHECK-LABEL: extract_var_v4f32: -; NO-SIMD128-NOT: i64x2 -; SIMD128-NEXT: .functype extract_var_v4f32 (v128, i32) -> (f32){{$}} -; SIMD128-NEXT: global.get $push[[L0:[0-9]+]]=, __stack_pointer{{$}} -; SIMD128-NEXT: i32.const $push[[L1:[0-9]+]]=, 16{{$}} -; SIMD128-NEXT: i32.sub $push[[L2:[0-9]+]]=, $pop[[L0]], $pop[[L1]]{{$}} -; SIMD128-NEXT: local.tee $push[[L3:[0-9]+]]=, $2=, $pop[[L2]]{{$}} -; SIMD128-NEXT: v128.store 0($pop[[L3]]), $0{{$}} -; SIMD128-NEXT: i32.const $push[[L2:[0-9]+]]=, 3{{$}} -; SIMD128-NEXT: i32.and $push[[L5:[0-9]+]]=, $1, $pop[[L2]]{{$}} -; SIMD128-NEXT: i32.const $push[[L6:[0-9]+]]=, 2{{$}} -; SIMD128-NEXT: i32.shl $push[[L7:[0-9]+]]=, $pop[[L5]], $pop[[L6]]{{$}} -; SIMD128-NEXT: i32.or $push[[L2:[0-9]+]]=, $2, $pop[[L7]]{{$}} -; SIMD128-NEXT: f32.load $push[[R:[0-9]+]]=, 0($pop[[L2]]){{$}} -; SIMD128-NEXT: return $pop[[R]]{{$}} define float @extract_var_v4f32(<4 x float> %v, i32 %i) { +; SIMD128-LABEL: extract_var_v4f32: +; SIMD128: .functype extract_var_v4f32 (v128, i32) -> (f32) +; SIMD128-NEXT: # %bb.0: +; SIMD128-NEXT: global.get $push6=, __stack_pointer +; SIMD128-NEXT: i32.const $push7=, 16 +; SIMD128-NEXT: i32.sub $push9=, $pop6, $pop7 +; SIMD128-NEXT: local.tee $push8=, $2=, $pop9 +; SIMD128-NEXT: v128.store 0($pop8), $0 +; SIMD128-NEXT: i32.const $push0=, 3 +; SIMD128-NEXT: i32.and $push1=, $1, $pop0 +; SIMD128-NEXT: i32.const $push2=, 2 +; SIMD128-NEXT: i32.shl $push3=, $pop1, $pop2 +; SIMD128-NEXT: i32.or $push4=, $2, $pop3 +; SIMD128-NEXT: f32.load $push5=, 0($pop4) +; SIMD128-NEXT: return $pop5 +; +; NO-SIMD128-LABEL: extract_var_v4f32: +; NO-SIMD128: .functype extract_var_v4f32 (f32, f32, f32, f32, i32) -> (f32) +; NO-SIMD128-NEXT: # %bb.0: +; NO-SIMD128-NEXT: global.get $push6=, __stack_pointer +; NO-SIMD128-NEXT: i32.const $push7=, 16 +; NO-SIMD128-NEXT: i32.sub $push9=, $pop6, $pop7 +; NO-SIMD128-NEXT: local.tee $push8=, $5=, $pop9 +; NO-SIMD128-NEXT: f32.store 12($pop8), $3 +; NO-SIMD128-NEXT: f32.store 8($5), $2 +; NO-SIMD128-NEXT: f32.store 4($5), $1 +; NO-SIMD128-NEXT: f32.store 0($5), $0 +; NO-SIMD128-NEXT: i32.const $push0=, 3 +; NO-SIMD128-NEXT: i32.and $push1=, $4, $pop0 +; NO-SIMD128-NEXT: i32.const $push2=, 2 +; NO-SIMD128-NEXT: i32.shl $push3=, $pop1, $pop2 +; NO-SIMD128-NEXT: i32.or $push4=, $5, $pop3 +; NO-SIMD128-NEXT: f32.load $push5=, 0($pop4) +; NO-SIMD128-NEXT: return $pop5 %elem = extractelement <4 x float> %v, i32 %i ret float %elem } -; CHECK-LABEL: extract_zero_v4f32: -; NO-SIMD128-NOT: f32x4 -; SIMD128-NEXT: .functype extract_zero_v4f32 (v128) -> (f32){{$}} -; SIMD128-NEXT: f32x4.extract_lane $push[[R:[0-9]+]]=, $0, 0{{$}} -; SIMD128-NEXT: return $pop[[R]]{{$}} define float @extract_zero_v4f32(<4 x float> %v) { +; SIMD128-LABEL: extract_zero_v4f32: +; SIMD128: .functype extract_zero_v4f32 (v128) -> (f32) +; SIMD128-NEXT: # %bb.0: +; SIMD128-NEXT: f32x4.extract_lane $push0=, $0, 0 +; SIMD128-NEXT: return $pop0 +; +; NO-SIMD128-LABEL: extract_zero_v4f32: +; NO-SIMD128: .functype extract_zero_v4f32 (f32, f32, f32, f32) -> (f32) +; NO-SIMD128-NEXT: # %bb.0: +; NO-SIMD128-NEXT: return $0 %elem = extractelement <4 x float> %v, i32 0 ret float %elem } -; CHECK-LABEL: replace_v4f32: -; NO-SIMD128-NOT: f32x4 -; SIMD128-NEXT: .functype replace_v4f32 (v128, f32) -> (v128){{$}} -; SIMD128-NEXT: f32x4.replace_lane $push[[R:[0-9]+]]=, $0, 2, $1{{$}} -; SIMD128-NEXT: return $pop[[R]]{{$}} define <4 x float> @replace_v4f32(<4 x float> %v, float %x) { +; SIMD128-LABEL: replace_v4f32: +; SIMD128: .functype replace_v4f32 (v128, f32) -> (v128) +; SIMD128-NEXT: # %bb.0: +; SIMD128-NEXT: f32x4.replace_lane $push0=, $0, 2, $1 +; SIMD128-NEXT: return $pop0 +; +; NO-SIMD128-LABEL: replace_v4f32: +; NO-SIMD128: .functype replace_v4f32 (i32, f32, f32, f32, f32, f32) -> () +; NO-SIMD128-NEXT: # %bb.0: +; NO-SIMD128-NEXT: f32.store 8($0), $5 +; NO-SIMD128-NEXT: f32.store 4($0), $2 +; NO-SIMD128-NEXT: f32.store 0($0), $1 +; NO-SIMD128-NEXT: i32.const $push0=, 12 +; NO-SIMD128-NEXT: i32.add $push1=, $0, $pop0 +; NO-SIMD128-NEXT: f32.store 0($pop1), $4 +; NO-SIMD128-NEXT: return %res = insertelement <4 x float> %v, float %x, i32 2 ret <4 x float> %res } -; CHECK-LABEL: replace_var_v4f32: -; NO-SIMD128-NOT: f32x4 -; SIMD128-NEXT: .functype replace_var_v4f32 (v128, i32, f32) -> (v128){{$}} -; SIMD128-NEXT: global.get $push[[L0:[0-9]+]]=, __stack_pointer{{$}} -; SIMD128-NEXT: i32.const $push[[L1:[0-9]+]]=, 16{{$}} -; SIMD128-NEXT: i32.sub $push[[L2:[0-9]+]]=, $pop[[L0]], $pop[[L1]]{{$}} -; SIMD128-NEXT: local.tee $push[[L3:[0-9]+]]=, $3=, $pop[[L2]]{{$}} -; SIMD128-NEXT: v128.store 0($pop[[L3]]), $0{{$}} -; SIMD128-NEXT: i32.const $push[[L2:[0-9]+]]=, 3{{$}} -; SIMD128-NEXT: i32.and $push[[L5:[0-9]+]]=, $1, $pop[[L2]]{{$}} -; SIMD128-NEXT: i32.const $push[[L6:[0-9]+]]=, 2{{$}} -; SIMD128-NEXT: i32.shl $push[[L7:[0-9]+]]=, $pop[[L5]], $pop[[L6]]{{$}} -; SIMD128-NEXT: i32.or $push[[L2:[0-9]+]]=, $3, $pop[[L7]]{{$}} -; SIMD128-NEXT: f32.store 0($pop[[L2]]), $2{{$}} -; SIMD128-NEXT: v128.load $push[[R:[0-9]+]]=, 0($3){{$}} -; SIMD128-NEXT: return $pop[[R]]{{$}} define <4 x float> @replace_var_v4f32(<4 x float> %v, i32 %i, float %x) { +; SIMD128-LABEL: replace_var_v4f32: +; SIMD128: .functype replace_var_v4f32 (v128, i32, f32) -> (v128) +; SIMD128-NEXT: # %bb.0: +; SIMD128-NEXT: global.get $push6=, __stack_pointer +; SIMD128-NEXT: i32.const $push7=, 16 +; SIMD128-NEXT: i32.sub $push9=, $pop6, $pop7 +; SIMD128-NEXT: local.tee $push8=, $3=, $pop9 +; SIMD128-NEXT: v128.store 0($pop8), $0 +; SIMD128-NEXT: i32.const $push0=, 3 +; SIMD128-NEXT: i32.and $push1=, $1, $pop0 +; SIMD128-NEXT: i32.const $push2=, 2 +; SIMD128-NEXT: i32.shl $push3=, $pop1, $pop2 +; SIMD128-NEXT: i32.or $push4=, $3, $pop3 +; SIMD128-NEXT: f32.store 0($pop4), $2 +; SIMD128-NEXT: v128.load $push5=, 0($3) +; SIMD128-NEXT: return $pop5 +; +; NO-SIMD128-LABEL: replace_var_v4f32: +; NO-SIMD128: .functype replace_var_v4f32 (i32, f32, f32, f32, f32, i32, f32) -> () +; NO-SIMD128-NEXT: # %bb.0: +; NO-SIMD128-NEXT: global.get $push7=, __stack_pointer +; NO-SIMD128-NEXT: i32.const $push8=, 16 +; NO-SIMD128-NEXT: i32.sub $push10=, $pop7, $pop8 +; NO-SIMD128-NEXT: local.tee $push9=, $7=, $pop10 +; NO-SIMD128-NEXT: f32.store 12($pop9), $4 +; NO-SIMD128-NEXT: f32.store 8($7), $3 +; NO-SIMD128-NEXT: f32.store 4($7), $2 +; NO-SIMD128-NEXT: f32.store 0($7), $1 +; NO-SIMD128-NEXT: i32.const $push0=, 3 +; NO-SIMD128-NEXT: i32.and $push1=, $5, $pop0 +; NO-SIMD128-NEXT: i32.const $push2=, 2 +; NO-SIMD128-NEXT: i32.shl $push3=, $pop1, $pop2 +; NO-SIMD128-NEXT: i32.or $push4=, $7, $pop3 +; NO-SIMD128-NEXT: f32.store 0($pop4), $6 +; NO-SIMD128-NEXT: i64.load $push5=, 8($7) +; NO-SIMD128-NEXT: i64.store 8($0), $pop5 +; NO-SIMD128-NEXT: i64.load $push6=, 0($7) +; NO-SIMD128-NEXT: i64.store 0($0), $pop6 +; NO-SIMD128-NEXT: return %res = insertelement <4 x float> %v, float %x, i32 %i ret <4 x float> %res } -; CHECK-LABEL: replace_zero_v4f32: -; NO-SIMD128-NOT: f32x4 -; SIMD128-NEXT: .functype replace_zero_v4f32 (v128, f32) -> (v128){{$}} -; SIMD128-NEXT: f32x4.replace_lane $push[[R:[0-9]+]]=, $0, 0, $1{{$}} -; SIMD128-NEXT: return $pop[[R]]{{$}} define <4 x float> @replace_zero_v4f32(<4 x float> %v, float %x) { +; SIMD128-LABEL: replace_zero_v4f32: +; SIMD128: .functype replace_zero_v4f32 (v128, f32) -> (v128) +; SIMD128-NEXT: # %bb.0: +; SIMD128-NEXT: f32x4.replace_lane $push0=, $0, 0, $1 +; SIMD128-NEXT: return $pop0 +; +; NO-SIMD128-LABEL: replace_zero_v4f32: +; NO-SIMD128: .functype replace_zero_v4f32 (i32, f32, f32, f32, f32, f32) -> () +; NO-SIMD128-NEXT: # %bb.0: +; NO-SIMD128-NEXT: f32.store 8($0), $3 +; NO-SIMD128-NEXT: f32.store 4($0), $2 +; NO-SIMD128-NEXT: f32.store 0($0), $5 +; NO-SIMD128-NEXT: i32.const $push0=, 12 +; NO-SIMD128-NEXT: i32.add $push1=, $0, $pop0 +; NO-SIMD128-NEXT: f32.store 0($pop1), $4 +; NO-SIMD128-NEXT: return %res = insertelement <4 x float> %v, float %x, i32 0 ret <4 x float> %res } -; CHECK-LABEL: shuffle_v4f32: -; NO-SIMD128-NOT: i8x16 -; SIMD128-NEXT: .functype shuffle_v4f32 (v128, v128) -> (v128){{$}} -; SIMD128-NEXT: i8x16.shuffle $push[[R:[0-9]+]]=, $0, $1, -; SIMD128-SAME: 0, 1, 2, 3, 20, 21, 22, 23, 8, 9, 10, 11, 28, 29, 30, 31{{$}} -; SIMD128-NEXT: return $pop[[R]]{{$}} define <4 x float> @shuffle_v4f32(<4 x float> %x, <4 x float> %y) { +; SIMD128-LABEL: shuffle_v4f32: +; SIMD128: .functype shuffle_v4f32 (v128, v128) -> (v128) +; SIMD128-NEXT: # %bb.0: +; SIMD128-NEXT: i8x16.shuffle $push0=, $0, $1, 0, 1, 2, 3, 20, 21, 22, 23, 8, 9, 10, 11, 28, 29, 30, 31 +; SIMD128-NEXT: return $pop0 +; +; NO-SIMD128-LABEL: shuffle_v4f32: +; NO-SIMD128: .functype shuffle_v4f32 (i32, f32, f32, f32, f32, f32, f32, f32, f32) -> () +; NO-SIMD128-NEXT: # %bb.0: +; NO-SIMD128-NEXT: f32.store 8($0), $3 +; NO-SIMD128-NEXT: f32.store 4($0), $6 +; NO-SIMD128-NEXT: f32.store 0($0), $1 +; NO-SIMD128-NEXT: i32.const $push0=, 12 +; NO-SIMD128-NEXT: i32.add $push1=, $0, $pop0 +; NO-SIMD128-NEXT: f32.store 0($pop1), $8 +; NO-SIMD128-NEXT: return %res = shufflevector <4 x float> %x, <4 x float> %y, <4 x i32> ret <4 x float> %res } -; CHECK-LABEL: shuffle_undef_v4f32: -; NO-SIMD128-NOT: i8x16 -; SIMD128-NEXT: .functype shuffle_undef_v4f32 (v128, v128) -> (v128){{$}} -; SIMD128-NEXT: i8x16.shuffle $push[[R:[0-9]+]]=, $0, $0, -; SIMD128-SAME: 4, 5, 6, 7, 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3{{$}} -; SIMD128-NEXT: return $pop[[R]]{{$}} define <4 x float> @shuffle_undef_v4f32(<4 x float> %x, <4 x float> %y) { +; SIMD128-LABEL: shuffle_undef_v4f32: +; SIMD128: .functype shuffle_undef_v4f32 (v128, v128) -> (v128) +; SIMD128-NEXT: # %bb.0: +; SIMD128-NEXT: i8x16.shuffle $push0=, $0, $0, 4, 5, 6, 7, 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3 +; SIMD128-NEXT: return $pop0 +; +; NO-SIMD128-LABEL: shuffle_undef_v4f32: +; NO-SIMD128: .functype shuffle_undef_v4f32 (i32, f32, f32, f32, f32, f32, f32, f32, f32) -> () +; NO-SIMD128-NEXT: # %bb.0: +; NO-SIMD128-NEXT: f32.store 8($0), $2 +; NO-SIMD128-NEXT: f32.store 4($0), $2 +; NO-SIMD128-NEXT: f32.store 0($0), $2 +; NO-SIMD128-NEXT: i32.const $push0=, 12 +; NO-SIMD128-NEXT: i32.add $push1=, $0, $pop0 +; NO-SIMD128-NEXT: f32.store 0($pop1), $2 +; NO-SIMD128-NEXT: return %res = shufflevector <4 x float> %x, <4 x float> %y, <4 x i32> ret <4 x float> %res } -; CHECK-LABEL: build_v4f32: -; NO-SIMD128-NOT: f32x4 -; SIMD128-NEXT: .functype build_v4f32 (f32, f32, f32, f32) -> (v128){{$}} -; SIMD128-NEXT: f32x4.splat $push[[L0:[0-9]+]]=, $0{{$}} -; SIMD128-NEXT: f32x4.replace_lane $push[[L1:[0-9]+]]=, $pop[[L0]], 1, $1{{$}} -; SIMD128-NEXT: f32x4.replace_lane $push[[L2:[0-9]+]]=, $pop[[L1]], 2, $2{{$}} -; SIMD128-NEXT: f32x4.replace_lane $push[[R:[0-9]+]]=, $pop[[L2]], 3, $3{{$}} -; SIMD128-NEXT: return $pop[[R]]{{$}} define <4 x float> @build_v4f32(float %x0, float %x1, float %x2, float %x3) { +; SIMD128-LABEL: build_v4f32: +; SIMD128: .functype build_v4f32 (f32, f32, f32, f32) -> (v128) +; SIMD128-NEXT: # %bb.0: +; SIMD128-NEXT: f32x4.splat $push0=, $0 +; SIMD128-NEXT: f32x4.replace_lane $push1=, $pop0, 1, $1 +; SIMD128-NEXT: f32x4.replace_lane $push2=, $pop1, 2, $2 +; SIMD128-NEXT: f32x4.replace_lane $push3=, $pop2, 3, $3 +; SIMD128-NEXT: return $pop3 +; +; NO-SIMD128-LABEL: build_v4f32: +; NO-SIMD128: .functype build_v4f32 (i32, f32, f32, f32, f32) -> () +; NO-SIMD128-NEXT: # %bb.0: +; NO-SIMD128-NEXT: f32.store 8($0), $3 +; NO-SIMD128-NEXT: f32.store 4($0), $2 +; NO-SIMD128-NEXT: f32.store 0($0), $1 +; NO-SIMD128-NEXT: i32.const $push0=, 12 +; NO-SIMD128-NEXT: i32.add $push1=, $0, $pop0 +; NO-SIMD128-NEXT: f32.store 0($pop1), $4 +; NO-SIMD128-NEXT: return %t0 = insertelement <4 x float> undef, float %x0, i32 0 %t1 = insertelement <4 x float> %t0, float %x1, i32 1 %t2 = insertelement <4 x float> %t1, float %x2, i32 2 @@ -969,144 +2061,253 @@ define <4 x float> @build_v4f32(float %x0, float %x1, float %x2, float %x3) { ; ============================================================================== ; 2 x f64 ; ============================================================================== -; CHECK-LABEL: const_v2f64: -; NO-SIMD128-NOT: f64x2 -; SIMD128-NEXT: .functype const_v2f64 () -> (v128){{$}} -; SIMD128-NEXT: v128.const $push[[R:[0-9]+]]=, 0x1.60504030201p-911, 0x1.e0d0c0b0a0908p-783{{$}} -; SIMD128-NEXT: return $pop[[R]]{{$}} define <2 x double> @const_v2f64() { +; SIMD128-LABEL: const_v2f64: +; SIMD128: .functype const_v2f64 () -> (v128) +; SIMD128-NEXT: # %bb.0: +; SIMD128-NEXT: v128.const $push0=, 0x1.60504030201p-911, 0x1.e0d0c0b0a0908p-783 +; SIMD128-NEXT: return $pop0 +; +; NO-SIMD128-LABEL: const_v2f64: +; NO-SIMD128: .functype const_v2f64 (i32) -> () +; NO-SIMD128-NEXT: # %bb.0: +; NO-SIMD128-NEXT: i64.const $push0=, 1084818905618843912 +; NO-SIMD128-NEXT: i64.store 8($0), $pop0 +; NO-SIMD128-NEXT: i64.const $push1=, 506097522914230528 +; NO-SIMD128-NEXT: i64.store 0($0), $pop1 +; NO-SIMD128-NEXT: return ret <2 x double> } -; CHECK-LABEL: splat_v2f64: -; NO-SIMD128-NOT: f64x2 -; SIMD128-NEXT: .functype splat_v2f64 (f64) -> (v128){{$}} -; SIMD128-NEXT: f64x2.splat $push[[R:[0-9]+]]=, $0{{$}} -; SIMD128-NEXT: return $pop[[R]]{{$}} define <2 x double> @splat_v2f64(double %x) { +; SIMD128-LABEL: splat_v2f64: +; SIMD128: .functype splat_v2f64 (f64) -> (v128) +; SIMD128-NEXT: # %bb.0: +; SIMD128-NEXT: f64x2.splat $push0=, $0 +; SIMD128-NEXT: return $pop0 +; +; NO-SIMD128-LABEL: splat_v2f64: +; NO-SIMD128: .functype splat_v2f64 (i32, f64) -> () +; NO-SIMD128-NEXT: # %bb.0: +; NO-SIMD128-NEXT: f64.store 8($0), $1 +; NO-SIMD128-NEXT: f64.store 0($0), $1 +; NO-SIMD128-NEXT: return %t1 = insertelement <2 x double> zeroinitializer, double %x, i3 0 %res = insertelement <2 x double> %t1, double %x, i32 1 ret <2 x double> %res } -; CHECK-LABEL: const_splat_v2f64: -; SIMD128: v128.const $push0=, 0x1.5p5, 0x1.5p5{{$}} define <2 x double> @const_splat_v2f64() { +; SIMD128-LABEL: const_splat_v2f64: +; SIMD128: .functype const_splat_v2f64 () -> (v128) +; SIMD128-NEXT: # %bb.0: +; SIMD128-NEXT: v128.const $push0=, 0x1.5p5, 0x1.5p5 +; SIMD128-NEXT: return $pop0 +; +; NO-SIMD128-LABEL: const_splat_v2f64: +; NO-SIMD128: .functype const_splat_v2f64 (i32) -> () +; NO-SIMD128-NEXT: # %bb.0: +; NO-SIMD128-NEXT: i64.const $push0=, 4631107791820423168 +; NO-SIMD128-NEXT: i64.store 8($0), $pop0 +; NO-SIMD128-NEXT: i64.const $push1=, 4631107791820423168 +; NO-SIMD128-NEXT: i64.store 0($0), $pop1 +; NO-SIMD128-NEXT: return ret <2 x double> } -; CHECK-LABEL: extract_v2f64: -; NO-SIMD128-NOT: f64x2 -; SIMD128-NEXT: .functype extract_v2f64 (v128) -> (f64){{$}} -; SIMD128-NEXT: f64x2.extract_lane $push[[R:[0-9]+]]=, $0, 1{{$}} -; SIMD128-NEXT: return $pop[[R]]{{$}} define double @extract_v2f64(<2 x double> %v) { +; SIMD128-LABEL: extract_v2f64: +; SIMD128: .functype extract_v2f64 (v128) -> (f64) +; SIMD128-NEXT: # %bb.0: +; SIMD128-NEXT: f64x2.extract_lane $push0=, $0, 1 +; SIMD128-NEXT: return $pop0 +; +; NO-SIMD128-LABEL: extract_v2f64: +; NO-SIMD128: .functype extract_v2f64 (f64, f64) -> (f64) +; NO-SIMD128-NEXT: # %bb.0: +; NO-SIMD128-NEXT: return $1 %elem = extractelement <2 x double> %v, i32 1 ret double %elem } -; CHECK-LABEL: extract_var_v2f64: -; NO-SIMD128-NOT: i62x2 -; SIMD128-NEXT: .functype extract_var_v2f64 (v128, i32) -> (f64){{$}} -; SIMD128-NEXT: global.get $push[[L0:[0-9]+]]=, __stack_pointer{{$}} -; SIMD128-NEXT: i32.const $push[[L1:[0-9]+]]=, 16{{$}} -; SIMD128-NEXT: i32.sub $push[[L2:[0-9]+]]=, $pop[[L0]], $pop[[L1]]{{$}} -; SIMD128-NEXT: local.tee $push[[L3:[0-9]+]]=, $2=, $pop[[L2]]{{$}} -; SIMD128-NEXT: v128.store 0($pop[[L3]]), $0{{$}} -; SIMD128-NEXT: i32.const $push[[L2:[0-9]+]]=, 1{{$}} -; SIMD128-NEXT: i32.and $push[[L5:[0-9]+]]=, $1, $pop[[L2]]{{$}} -; SIMD128-NEXT: i32.const $push[[L6:[0-9]+]]=, 3{{$}} -; SIMD128-NEXT: i32.shl $push[[L7:[0-9]+]]=, $pop[[L5]], $pop[[L6]]{{$}} -; SIMD128-NEXT: i32.or $push[[L2:[0-9]+]]=, $2, $pop[[L7]]{{$}} -; SIMD128-NEXT: f64.load $push[[R:[0-9]+]]=, 0($pop[[L2]]){{$}} -; SIMD128-NEXT: return $pop[[R]]{{$}} define double @extract_var_v2f64(<2 x double> %v, i32 %i) { +; SIMD128-LABEL: extract_var_v2f64: +; SIMD128: .functype extract_var_v2f64 (v128, i32) -> (f64) +; SIMD128-NEXT: # %bb.0: +; SIMD128-NEXT: global.get $push6=, __stack_pointer +; SIMD128-NEXT: i32.const $push7=, 16 +; SIMD128-NEXT: i32.sub $push9=, $pop6, $pop7 +; SIMD128-NEXT: local.tee $push8=, $2=, $pop9 +; SIMD128-NEXT: v128.store 0($pop8), $0 +; SIMD128-NEXT: i32.const $push0=, 1 +; SIMD128-NEXT: i32.and $push1=, $1, $pop0 +; SIMD128-NEXT: i32.const $push2=, 3 +; SIMD128-NEXT: i32.shl $push3=, $pop1, $pop2 +; SIMD128-NEXT: i32.or $push4=, $2, $pop3 +; SIMD128-NEXT: f64.load $push5=, 0($pop4) +; SIMD128-NEXT: return $pop5 +; +; NO-SIMD128-LABEL: extract_var_v2f64: +; NO-SIMD128: .functype extract_var_v2f64 (f64, f64, i32) -> (f64) +; NO-SIMD128-NEXT: # %bb.0: +; NO-SIMD128-NEXT: global.get $push6=, __stack_pointer +; NO-SIMD128-NEXT: i32.const $push7=, 16 +; NO-SIMD128-NEXT: i32.sub $push9=, $pop6, $pop7 +; NO-SIMD128-NEXT: local.tee $push8=, $3=, $pop9 +; NO-SIMD128-NEXT: f64.store 8($pop8), $1 +; NO-SIMD128-NEXT: f64.store 0($3), $0 +; NO-SIMD128-NEXT: i32.const $push0=, 1 +; NO-SIMD128-NEXT: i32.and $push1=, $2, $pop0 +; NO-SIMD128-NEXT: i32.const $push2=, 3 +; NO-SIMD128-NEXT: i32.shl $push3=, $pop1, $pop2 +; NO-SIMD128-NEXT: i32.or $push4=, $3, $pop3 +; NO-SIMD128-NEXT: f64.load $push5=, 0($pop4) +; NO-SIMD128-NEXT: return $pop5 %elem = extractelement <2 x double> %v, i32 %i ret double %elem } -; CHECK-LABEL: extract_zero_v2f64: -; NO-SIMD128-NOT: f64x2 -; SIMD128-NEXT: .functype extract_zero_v2f64 (v128) -> (f64){{$}} -; SIMD128-NEXT: f64x2.extract_lane $push[[R:[0-9]+]]=, $0, 0{{$}} -; SIMD128-NEXT: return $pop[[R]]{{$}} define double @extract_zero_v2f64(<2 x double> %v) { +; SIMD128-LABEL: extract_zero_v2f64: +; SIMD128: .functype extract_zero_v2f64 (v128) -> (f64) +; SIMD128-NEXT: # %bb.0: +; SIMD128-NEXT: f64x2.extract_lane $push0=, $0, 0 +; SIMD128-NEXT: return $pop0 +; +; NO-SIMD128-LABEL: extract_zero_v2f64: +; NO-SIMD128: .functype extract_zero_v2f64 (f64, f64) -> (f64) +; NO-SIMD128-NEXT: # %bb.0: +; NO-SIMD128-NEXT: return $0 %elem = extractelement <2 x double> %v, i32 0 ret double %elem } -; CHECK-LABEL: replace_v2f64: -; NO-SIMD128-NOT: f64x2 -; SIMD128-NEXT: .functype replace_v2f64 (v128, f64) -> (v128){{$}} -; SIMD128-NEXT: f64x2.replace_lane $push[[R:[0-9]+]]=, $0, 0, $1{{$}} -; SIMD128-NEXT: return $pop[[R]]{{$}} define <2 x double> @replace_v2f64(<2 x double> %v, double %x) { +; SIMD128-LABEL: replace_v2f64: +; SIMD128: .functype replace_v2f64 (v128, f64) -> (v128) +; SIMD128-NEXT: # %bb.0: +; SIMD128-NEXT: f64x2.replace_lane $push0=, $0, 0, $1 +; SIMD128-NEXT: return $pop0 +; +; NO-SIMD128-LABEL: replace_v2f64: +; NO-SIMD128: .functype replace_v2f64 (i32, f64, f64, f64) -> () +; NO-SIMD128-NEXT: # %bb.0: +; NO-SIMD128-NEXT: f64.store 8($0), $2 +; NO-SIMD128-NEXT: f64.store 0($0), $3 +; NO-SIMD128-NEXT: return %res = insertelement <2 x double> %v, double %x, i32 0 ret <2 x double> %res } -; CHECK-LABEL: replace_var_v2f64: -; NO-SIMD128-NOT: f64x2 -; SIMD128-NEXT: .functype replace_var_v2f64 (v128, i32, f64) -> (v128){{$}} -; SIMD128-NEXT: global.get $push[[L0:[0-9]+]]=, __stack_pointer{{$}} -; SIMD128-NEXT: i32.const $push[[L1:[0-9]+]]=, 16{{$}} -; SIMD128-NEXT: i32.sub $push[[L2:[0-9]+]]=, $pop[[L0]], $pop[[L1]]{{$}} -; SIMD128-NEXT: local.tee $push[[L3:[0-9]+]]=, $3=, $pop[[L2]]{{$}} -; SIMD128-NEXT: v128.store 0($pop[[L3]]), $0{{$}} -; SIMD128-NEXT: i32.const $push[[L2:[0-9]+]]=, 1{{$}} -; SIMD128-NEXT: i32.and $push[[L5:[0-9]+]]=, $1, $pop[[L2]]{{$}} -; SIMD128-NEXT: i32.const $push[[L6:[0-9]+]]=, 3{{$}} -; SIMD128-NEXT: i32.shl $push[[L7:[0-9]+]]=, $pop[[L5]], $pop[[L6]]{{$}} -; SIMD128-NEXT: i32.or $push[[L2:[0-9]+]]=, $3, $pop[[L7]]{{$}} -; SIMD128-NEXT: f64.store 0($pop[[L2]]), $2{{$}} -; SIMD128-NEXT: v128.load $push[[R:[0-9]+]]=, 0($3){{$}} -; SIMD128-NEXT: return $pop[[R]]{{$}} define <2 x double> @replace_var_v2f64(<2 x double> %v, i32 %i, double %x) { +; SIMD128-LABEL: replace_var_v2f64: +; SIMD128: .functype replace_var_v2f64 (v128, i32, f64) -> (v128) +; SIMD128-NEXT: # %bb.0: +; SIMD128-NEXT: global.get $push6=, __stack_pointer +; SIMD128-NEXT: i32.const $push7=, 16 +; SIMD128-NEXT: i32.sub $push9=, $pop6, $pop7 +; SIMD128-NEXT: local.tee $push8=, $3=, $pop9 +; SIMD128-NEXT: v128.store 0($pop8), $0 +; SIMD128-NEXT: i32.const $push0=, 1 +; SIMD128-NEXT: i32.and $push1=, $1, $pop0 +; SIMD128-NEXT: i32.const $push2=, 3 +; SIMD128-NEXT: i32.shl $push3=, $pop1, $pop2 +; SIMD128-NEXT: i32.or $push4=, $3, $pop3 +; SIMD128-NEXT: f64.store 0($pop4), $2 +; SIMD128-NEXT: v128.load $push5=, 0($3) +; SIMD128-NEXT: return $pop5 +; +; NO-SIMD128-LABEL: replace_var_v2f64: +; NO-SIMD128: .functype replace_var_v2f64 (i32, f64, f64, i32, f64) -> () +; NO-SIMD128-NEXT: # %bb.0: +; NO-SIMD128-NEXT: global.get $push7=, __stack_pointer +; NO-SIMD128-NEXT: i32.const $push8=, 16 +; NO-SIMD128-NEXT: i32.sub $push10=, $pop7, $pop8 +; NO-SIMD128-NEXT: local.tee $push9=, $5=, $pop10 +; NO-SIMD128-NEXT: f64.store 8($pop9), $2 +; NO-SIMD128-NEXT: f64.store 0($5), $1 +; NO-SIMD128-NEXT: i32.const $push0=, 1 +; NO-SIMD128-NEXT: i32.and $push1=, $3, $pop0 +; NO-SIMD128-NEXT: i32.const $push2=, 3 +; NO-SIMD128-NEXT: i32.shl $push3=, $pop1, $pop2 +; NO-SIMD128-NEXT: i32.or $push4=, $5, $pop3 +; NO-SIMD128-NEXT: f64.store 0($pop4), $4 +; NO-SIMD128-NEXT: f64.load $push5=, 8($5) +; NO-SIMD128-NEXT: f64.store 8($0), $pop5 +; NO-SIMD128-NEXT: f64.load $push6=, 0($5) +; NO-SIMD128-NEXT: f64.store 0($0), $pop6 +; NO-SIMD128-NEXT: return %res = insertelement <2 x double> %v, double %x, i32 %i ret <2 x double> %res } -; CHECK-LABEL: replace_zero_v2f64: -; NO-SIMD128-NOT: f64x2 -; SIMD128-NEXT: .functype replace_zero_v2f64 (v128, f64) -> (v128){{$}} -; SIMD128-NEXT: f64x2.replace_lane $push[[R:[0-9]+]]=, $0, 0, $1{{$}} -; SIMD128-NEXT: return $pop[[R]]{{$}} define <2 x double> @replace_zero_v2f64(<2 x double> %v, double %x) { +; SIMD128-LABEL: replace_zero_v2f64: +; SIMD128: .functype replace_zero_v2f64 (v128, f64) -> (v128) +; SIMD128-NEXT: # %bb.0: +; SIMD128-NEXT: f64x2.replace_lane $push0=, $0, 0, $1 +; SIMD128-NEXT: return $pop0 +; +; NO-SIMD128-LABEL: replace_zero_v2f64: +; NO-SIMD128: .functype replace_zero_v2f64 (i32, f64, f64, f64) -> () +; NO-SIMD128-NEXT: # %bb.0: +; NO-SIMD128-NEXT: f64.store 8($0), $2 +; NO-SIMD128-NEXT: f64.store 0($0), $3 +; NO-SIMD128-NEXT: return %res = insertelement <2 x double> %v, double %x, i32 0 ret <2 x double> %res } -; CHECK-LABEL: shuffle_v2f64: -; NO-SIMD128-NOT: i8x16 -; SIMD128-NEXT: .functype shuffle_v2f64 (v128, v128) -> (v128){{$}} -; SIMD128-NEXT: i8x16.shuffle $push[[R:[0-9]+]]=, $0, $1, -; SIMD128-SAME: 0, 1, 2, 3, 4, 5, 6, 7, 24, 25, 26, 27, 28, 29, 30, 31{{$}} -; SIMD128-NEXT: return $pop[[R]]{{$}} define <2 x double> @shuffle_v2f64(<2 x double> %x, <2 x double> %y) { +; SIMD128-LABEL: shuffle_v2f64: +; SIMD128: .functype shuffle_v2f64 (v128, v128) -> (v128) +; SIMD128-NEXT: # %bb.0: +; SIMD128-NEXT: i8x16.shuffle $push0=, $0, $1, 0, 1, 2, 3, 4, 5, 6, 7, 24, 25, 26, 27, 28, 29, 30, 31 +; SIMD128-NEXT: return $pop0 +; +; NO-SIMD128-LABEL: shuffle_v2f64: +; NO-SIMD128: .functype shuffle_v2f64 (i32, f64, f64, f64, f64) -> () +; NO-SIMD128-NEXT: # %bb.0: +; NO-SIMD128-NEXT: f64.store 8($0), $4 +; NO-SIMD128-NEXT: f64.store 0($0), $1 +; NO-SIMD128-NEXT: return %res = shufflevector <2 x double> %x, <2 x double> %y, <2 x i32> ret <2 x double> %res } -; CHECK-LABEL: shuffle_undef_v2f64: -; NO-SIMD128-NOT: i8x16 -; SIMD128-NEXT: .functype shuffle_undef_v2f64 (v128, v128) -> (v128){{$}} -; SIMD128-NEXT: i8x16.shuffle $push[[R:[0-9]+]]=, $0, $0, -; SIMD128-SAME: 8, 9, 10, 11, 12, 13, 14, 15, 0, 1, 2, 3, 4, 5, 6, 7{{$}} -; SIMD128-NEXT: return $pop[[R]]{{$}} define <2 x double> @shuffle_undef_v2f64(<2 x double> %x, <2 x double> %y) { +; SIMD128-LABEL: shuffle_undef_v2f64: +; SIMD128: .functype shuffle_undef_v2f64 (v128, v128) -> (v128) +; SIMD128-NEXT: # %bb.0: +; SIMD128-NEXT: i8x16.shuffle $push0=, $0, $0, 8, 9, 10, 11, 12, 13, 14, 15, 0, 1, 2, 3, 4, 5, 6, 7 +; SIMD128-NEXT: return $pop0 +; +; NO-SIMD128-LABEL: shuffle_undef_v2f64: +; NO-SIMD128: .functype shuffle_undef_v2f64 (i32, f64, f64, f64, f64) -> () +; NO-SIMD128-NEXT: # %bb.0: +; NO-SIMD128-NEXT: f64.store 8($0), $2 +; NO-SIMD128-NEXT: f64.store 0($0), $2 +; NO-SIMD128-NEXT: return %res = shufflevector <2 x double> %x, <2 x double> %y, <2 x i32> ret <2 x double> %res } -; CHECK-LABEL: build_v2f64: -; NO-SIMD128-NOT: f64x2 -; SIMD128-NEXT: .functype build_v2f64 (f64, f64) -> (v128){{$}} -; SIMD128-NEXT: f64x2.splat $push[[L0:[0-9]+]]=, $0{{$}} -; SIMD128-NEXT: f64x2.replace_lane $push[[R:[0-9]+]]=, $pop[[L0]], 1, $1{{$}} -; SIMD128-NEXT: return $pop[[R]]{{$}} define <2 x double> @build_v2f64(double %x0, double %x1) { +; SIMD128-LABEL: build_v2f64: +; SIMD128: .functype build_v2f64 (f64, f64) -> (v128) +; SIMD128-NEXT: # %bb.0: +; SIMD128-NEXT: f64x2.splat $push0=, $0 +; SIMD128-NEXT: f64x2.replace_lane $push1=, $pop0, 1, $1 +; SIMD128-NEXT: return $pop1 +; +; NO-SIMD128-LABEL: build_v2f64: +; NO-SIMD128: .functype build_v2f64 (i32, f64, f64) -> () +; NO-SIMD128-NEXT: # %bb.0: +; NO-SIMD128-NEXT: f64.store 8($0), $2 +; NO-SIMD128-NEXT: f64.store 0($0), $1 +; NO-SIMD128-NEXT: return %t0 = insertelement <2 x double> undef, double %x0, i32 0 %res = insertelement <2 x double> %t0, double %x1, i32 1 ret <2 x double> %res diff --git a/llvm/test/CodeGen/WebAssembly/stack-protector.ll b/llvm/test/CodeGen/WebAssembly/stack-protector.ll index 3a97849..1b36b61 100644 --- a/llvm/test/CodeGen/WebAssembly/stack-protector.ll +++ b/llvm/test/CodeGen/WebAssembly/stack-protector.ll @@ -1,14 +1,57 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ; RUN: llc -verify-machineinstrs -mtriple=wasm32-unknown-unknown < %s | FileCheck -check-prefix=WASM32 %s @"\01LC" = internal constant [11 x i8] c"buf == %s\0A\00" ; [#uses=1] -; WASM32-LABEL: test: -; WASM32: i32.load 28 -; WASM32: br_if 0 -; WASM32: call __stack_chk_fail -; WASM32-NEXT: unreachable - define void @test(ptr %a) nounwind ssp { +; WASM32-LABEL: test: +; WASM32: .functype test (i32) -> () +; WASM32-NEXT: .local i32 +; WASM32-NEXT: # %bb.0: # %entry +; WASM32-NEXT: global.get __stack_pointer +; WASM32-NEXT: i32.const 32 +; WASM32-NEXT: i32.sub +; WASM32-NEXT: local.tee 1 +; WASM32-NEXT: global.set __stack_pointer +; WASM32-NEXT: local.get 1 +; WASM32-NEXT: local.get 0 +; WASM32-NEXT: i32.store 16 +; WASM32-NEXT: local.get 1 +; WASM32-NEXT: i32.const 0 +; WASM32-NEXT: i32.load __stack_chk_guard +; WASM32-NEXT: i32.store 28 +; WASM32-NEXT: local.get 1 +; WASM32-NEXT: i32.const 20 +; WASM32-NEXT: i32.add +; WASM32-NEXT: local.get 0 +; WASM32-NEXT: call strcpy +; WASM32-NEXT: drop +; WASM32-NEXT: local.get 1 +; WASM32-NEXT: local.get 1 +; WASM32-NEXT: i32.const 20 +; WASM32-NEXT: i32.add +; WASM32-NEXT: i32.store 0 +; WASM32-NEXT: i32.const LC +; WASM32-NEXT: local.get 1 +; WASM32-NEXT: call printf +; WASM32-NEXT: drop +; WASM32-NEXT: block +; WASM32-NEXT: i32.const 0 +; WASM32-NEXT: i32.load __stack_chk_guard +; WASM32-NEXT: local.get 1 +; WASM32-NEXT: i32.load 28 +; WASM32-NEXT: i32.eq +; WASM32-NEXT: br_if 0 # 0: down to label0 +; WASM32-NEXT: # %bb.1: # %return +; WASM32-NEXT: call __stack_chk_fail +; WASM32-NEXT: unreachable +; WASM32-NEXT: .LBB0_2: # %return +; WASM32-NEXT: end_block # label0: +; WASM32-NEXT: local.get 1 +; WASM32-NEXT: i32.const 32 +; WASM32-NEXT: i32.add +; WASM32-NEXT: global.set __stack_pointer +; WASM32-NEXT: # fallthrough-return entry: %a_addr = alloca ptr ; [#uses=2] %buf = alloca [8 x i8] ; [#uses=2] @@ -23,11 +66,56 @@ return: ; preds = %entry ret void } -; WASM32-LABEL: test_return_i32: -; WASM32: call __stack_chk_fail -; WASM32-NEXT: unreachable - define i32 @test_return_i32(ptr %a) nounwind ssp { +; WASM32-LABEL: test_return_i32: +; WASM32: .functype test_return_i32 (i32) -> (i32) +; WASM32-NEXT: .local i32 +; WASM32-NEXT: # %bb.0: # %entry +; WASM32-NEXT: global.get __stack_pointer +; WASM32-NEXT: i32.const 32 +; WASM32-NEXT: i32.sub +; WASM32-NEXT: local.tee 1 +; WASM32-NEXT: global.set __stack_pointer +; WASM32-NEXT: local.get 1 +; WASM32-NEXT: local.get 0 +; WASM32-NEXT: i32.store 16 +; WASM32-NEXT: local.get 1 +; WASM32-NEXT: i32.const 0 +; WASM32-NEXT: i32.load __stack_chk_guard +; WASM32-NEXT: i32.store 28 +; WASM32-NEXT: local.get 1 +; WASM32-NEXT: i32.const 20 +; WASM32-NEXT: i32.add +; WASM32-NEXT: local.get 0 +; WASM32-NEXT: call strcpy +; WASM32-NEXT: drop +; WASM32-NEXT: local.get 1 +; WASM32-NEXT: local.get 1 +; WASM32-NEXT: i32.const 20 +; WASM32-NEXT: i32.add +; WASM32-NEXT: i32.store 0 +; WASM32-NEXT: i32.const LC +; WASM32-NEXT: local.get 1 +; WASM32-NEXT: call printf +; WASM32-NEXT: drop +; WASM32-NEXT: block +; WASM32-NEXT: i32.const 0 +; WASM32-NEXT: i32.load __stack_chk_guard +; WASM32-NEXT: local.get 1 +; WASM32-NEXT: i32.load 28 +; WASM32-NEXT: i32.eq +; WASM32-NEXT: br_if 0 # 0: down to label1 +; WASM32-NEXT: # %bb.1: # %return +; WASM32-NEXT: call __stack_chk_fail +; WASM32-NEXT: unreachable +; WASM32-NEXT: .LBB1_2: # %return +; WASM32-NEXT: end_block # label1: +; WASM32-NEXT: local.get 1 +; WASM32-NEXT: i32.const 32 +; WASM32-NEXT: i32.add +; WASM32-NEXT: global.set __stack_pointer +; WASM32-NEXT: i32.const 0 +; WASM32-NEXT: # fallthrough-return entry: %a_addr = alloca ptr ; [#uses=2] %buf = alloca [8 x i8] ; [#uses=2] diff --git a/llvm/test/CodeGen/WebAssembly/umulo-i64.ll b/llvm/test/CodeGen/WebAssembly/umulo-i64.ll index dabe643..85b6bd2 100644 --- a/llvm/test/CodeGen/WebAssembly/umulo-i64.ll +++ b/llvm/test/CodeGen/WebAssembly/umulo-i64.ll @@ -1,11 +1,24 @@ -; RUN: llc < %s -asm-verbose=false -wasm-disable-explicit-locals -wasm-keep-registers | FileCheck %s +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc < %s -wasm-disable-explicit-locals -wasm-keep-registers | FileCheck %s ; Test that UMULO works correctly on 64-bit operands. target triple = "wasm32-unknown-unknown" -; CHECK-LABEL: _ZN4core3num21_$LT$impl$u20$u64$GT$15overflowing_mul17h07be88b4cbac028fE: -; CHECK: __multi3 ; Function Attrs: inlinehint define void @"_ZN4core3num21_$LT$impl$u20$u64$GT$15overflowing_mul17h07be88b4cbac028fE"(i64, i64) unnamed_addr #0 { +; CHECK-LABEL: _ZN4core3num21_$LT$impl$u20$u64$GT$15overflowing_mul17h07be88b4cbac028fE: +; CHECK: .functype _ZN4core3num21_$LT$impl$u20$u64$GT$15overflowing_mul17h07be88b4cbac028fE (i64, i64) -> () +; CHECK-NEXT: # %bb.0: # %start +; CHECK-NEXT: global.get $push2=, __stack_pointer +; CHECK-NEXT: i32.const $push3=, 16 +; CHECK-NEXT: i32.sub $push6=, $pop2, $pop3 +; CHECK-NEXT: local.tee $push5=, $2=, $pop6 +; CHECK-NEXT: global.set __stack_pointer, $pop5 +; CHECK-NEXT: i64.const $push0=, 0 +; CHECK-NEXT: i64.const $push4=, 0 +; CHECK-NEXT: call __multi3, $2, $0, $pop0, $1, $pop4 +; CHECK-NEXT: i64.load $push1=, 0($2) +; CHECK-NEXT: i64.store 0($2), $pop1 +; CHECK-NEXT: unreachable start: %2 = call { i64, i1 } @llvm.umul.with.overflow.i64(i64 %0, i64 %1) %3 = extractvalue { i64, i1 } %2, 0 @@ -19,10 +32,25 @@ declare { i64, i1 } @llvm.umul.with.overflow.i64(i64, i64) #1 attributes #0 = { inlinehint } attributes #1 = { nounwind readnone speculatable } -; CHECK-LABEL: wut: -; CHECK: call __multi3, $2, $0, $pop0, $1, $pop7 -; CHECK: i64.load $1=, 8($2) define i1 @wut(i64, i64) { +; CHECK-LABEL: wut: +; CHECK: .functype wut (i64, i64) -> (i32) +; CHECK-NEXT: # %bb.0: # %start +; CHECK-NEXT: global.get $push2=, __stack_pointer +; CHECK-NEXT: i32.const $push3=, 16 +; CHECK-NEXT: i32.sub $push9=, $pop2, $pop3 +; CHECK-NEXT: local.tee $push8=, $2=, $pop9 +; CHECK-NEXT: global.set __stack_pointer, $pop8 +; CHECK-NEXT: i64.const $push0=, 0 +; CHECK-NEXT: i64.const $push7=, 0 +; CHECK-NEXT: call __multi3, $2, $0, $pop0, $1, $pop7 +; CHECK-NEXT: i64.load $1=, 8($2) +; CHECK-NEXT: i32.const $push4=, 16 +; CHECK-NEXT: i32.add $push5=, $2, $pop4 +; CHECK-NEXT: global.set __stack_pointer, $pop5 +; CHECK-NEXT: i64.const $push6=, 0 +; CHECK-NEXT: i64.ne $push1=, $1, $pop6 +; CHECK-NEXT: # fallthrough-return start: %2 = call { i64, i1 } @llvm.umul.with.overflow.i64(i64 %0, i64 %1) %3 = extractvalue { i64, i1 } %2, 1 diff --git a/llvm/test/CodeGen/WebAssembly/userstack.ll b/llvm/test/CodeGen/WebAssembly/userstack.ll index 61706db..98218db 100644 --- a/llvm/test/CodeGen/WebAssembly/userstack.ll +++ b/llvm/test/CodeGen/WebAssembly/userstack.ll @@ -1,287 +1,669 @@ -; RUN: llc < %s --mtriple=wasm32-unknown-unknown -asm-verbose=false -disable-wasm-fallthrough-return-opt -wasm-keep-registers | FileCheck -DPTR=32 %s -; RUN: llc < %s --mtriple=wasm64-unknown-unknown -asm-verbose=false -disable-wasm-fallthrough-return-opt -wasm-keep-registers | FileCheck -DPTR=64 %s +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc < %s --mtriple=wasm32-unknown-unknown -disable-wasm-fallthrough-return-opt -wasm-keep-registers | FileCheck -DPTR=32 %s --check-prefix=CHECK-32 +; RUN: llc < %s --mtriple=wasm64-unknown-unknown -disable-wasm-fallthrough-return-opt -wasm-keep-registers | FileCheck -DPTR=64 %s --check-prefix=CHECK-64 declare void @ext_func(ptr %ptr) declare void @ext_func_i32(ptr %ptr) -; CHECK: .globaltype __stack_pointer, i[[PTR]]{{$}} - -; CHECK-LABEL: alloca32: ; Check that there is an extra local for the stack pointer. -; CHECK: .local i[[PTR]]{{$}} define void @alloca32() noredzone { - ; CHECK-NEXT: global.get $push[[L2:.+]]=, __stack_pointer{{$}} - ; CHECK-NEXT: i[[PTR]].const $push[[L3:.+]]=, 16 - ; CHECK-NEXT: i[[PTR]].sub $push[[L9:.+]]=, $pop[[L2]], $pop[[L3]] - ; CHECK-NEXT: local.tee $push[[L8:.+]]=, [[SP:.+]], $pop[[L9]]{{$}} - ; CHECK-NEXT: global.set __stack_pointer, $pop[[L8]]{{$}} +; CHECK-32-LABEL: alloca32: +; CHECK-32: .functype alloca32 () -> () +; CHECK-32-NEXT: .local i32 +; CHECK-32-NEXT: # %bb.0: +; CHECK-32-NEXT: global.get $push1=, __stack_pointer +; CHECK-32-NEXT: i32.const $push2=, 16 +; CHECK-32-NEXT: i32.sub $push6=, $pop1, $pop2 +; CHECK-32-NEXT: local.tee $push5=, 0, $pop6 +; CHECK-32-NEXT: global.set __stack_pointer, $pop5 +; CHECK-32-NEXT: local.get $push7=, 0 +; CHECK-32-NEXT: i32.const $push0=, 0 +; CHECK-32-NEXT: i32.store 12($pop7), $pop0 +; CHECK-32-NEXT: local.get $push8=, 0 +; CHECK-32-NEXT: i32.const $push3=, 16 +; CHECK-32-NEXT: i32.add $push4=, $pop8, $pop3 +; CHECK-32-NEXT: global.set __stack_pointer, $pop4 +; CHECK-32-NEXT: return +; +; CHECK-64-LABEL: alloca32: +; CHECK-64: .functype alloca32 () -> () +; CHECK-64-NEXT: .local i64 +; CHECK-64-NEXT: # %bb.0: +; CHECK-64-NEXT: global.get $push1=, __stack_pointer +; CHECK-64-NEXT: i64.const $push2=, 16 +; CHECK-64-NEXT: i64.sub $push6=, $pop1, $pop2 +; CHECK-64-NEXT: local.tee $push5=, 0, $pop6 +; CHECK-64-NEXT: global.set __stack_pointer, $pop5 +; CHECK-64-NEXT: local.get $push7=, 0 +; CHECK-64-NEXT: i32.const $push0=, 0 +; CHECK-64-NEXT: i32.store 12($pop7), $pop0 +; CHECK-64-NEXT: local.get $push8=, 0 +; CHECK-64-NEXT: i64.const $push3=, 16 +; CHECK-64-NEXT: i64.add $push4=, $pop8, $pop3 +; CHECK-64-NEXT: global.set __stack_pointer, $pop4 +; CHECK-64-NEXT: return %retval = alloca i32 - ; CHECK: local.get $push[[L4:.+]]=, [[SP]]{{$}} - ; CHECK: i32.const $push[[L0:.+]]=, 0 - ; CHECK: i32.store 12($pop[[L4]]), $pop[[L0]] store i32 0, ptr %retval - ; CHECK: local.get $push[[L6:.+]]=, [[SP]]{{$}} - ; CHECK-NEXT: i[[PTR]].const $push[[L5:.+]]=, 16 - ; CHECK-NEXT: i[[PTR]].add $push[[L7:.+]]=, $pop[[L6]], $pop[[L5]] - ; CHECK-NEXT: global.set __stack_pointer, $pop[[L7]] ret void } -; CHECK-LABEL: alloca3264: -; CHECK: .local i[[PTR]]{{$}} define void @alloca3264() { - ; CHECK: global.get $push[[L3:.+]]=, __stack_pointer{{$}} - ; CHECK-NEXT: i[[PTR]].const $push[[L4:.+]]=, 16 - ; CHECK-NEXT: i[[PTR]].sub $push[[L6:.+]]=, $pop[[L3]], $pop[[L4]] - ; CHECK-NEXT: local.tee $push[[L5:.+]]=, [[SP:.+]], $pop[[L6]] +; CHECK-32-LABEL: alloca3264: +; CHECK-32: .functype alloca3264 () -> () +; CHECK-32-NEXT: .local i32 +; CHECK-32-NEXT: # %bb.0: +; CHECK-32-NEXT: global.get $push2=, __stack_pointer +; CHECK-32-NEXT: i32.const $push3=, 16 +; CHECK-32-NEXT: i32.sub $push5=, $pop2, $pop3 +; CHECK-32-NEXT: local.tee $push4=, 0, $pop5 +; CHECK-32-NEXT: i64.const $push0=, 0 +; CHECK-32-NEXT: i64.store 0($pop4), $pop0 +; CHECK-32-NEXT: local.get $push6=, 0 +; CHECK-32-NEXT: i32.const $push1=, 0 +; CHECK-32-NEXT: i32.store 12($pop6), $pop1 +; CHECK-32-NEXT: return +; +; CHECK-64-LABEL: alloca3264: +; CHECK-64: .functype alloca3264 () -> () +; CHECK-64-NEXT: .local i64 +; CHECK-64-NEXT: # %bb.0: +; CHECK-64-NEXT: global.get $push2=, __stack_pointer +; CHECK-64-NEXT: i64.const $push3=, 16 +; CHECK-64-NEXT: i64.sub $push5=, $pop2, $pop3 +; CHECK-64-NEXT: local.tee $push4=, 0, $pop5 +; CHECK-64-NEXT: i64.const $push0=, 0 +; CHECK-64-NEXT: i64.store 0($pop4), $pop0 +; CHECK-64-NEXT: local.get $push6=, 0 +; CHECK-64-NEXT: i32.const $push1=, 0 +; CHECK-64-NEXT: i32.store 12($pop6), $pop1 +; CHECK-64-NEXT: return %r1 = alloca i32 %r2 = alloca double store i32 0, ptr %r1 store double 0.0, ptr %r2 - ; CHECK-NEXT: i64.const $push[[L1:.+]]=, 0 - ; CHECK-NEXT: i64.store 0($pop[[L5]]), $pop[[L1]] - ; CHECK-NEXT: local.get $push[[L2:.+]]=, [[SP]]{{$}} - ; CHECK-NEXT: i32.const $push[[L0:.+]]=, 0 - ; CHECK-NEXT: i32.store 12($pop[[L2]]), $pop[[L0]] - ; CHECK-NEXT: return ret void } -; CHECK-LABEL: allocarray: -; CHECK: .local i[[PTR]]{{$}} define void @allocarray() { - ; CHECK-NEXT: global.get $push[[L4:.+]]=, __stack_pointer{{$}} - ; CHECK-NEXT: i[[PTR]].const $push[[L5:.+]]=, 144{{$}} - ; CHECK-NEXT: i[[PTR]].sub $push[[L12:.+]]=, $pop[[L4]], $pop[[L5]] - ; CHECK-NEXT: local.tee $push[[L11:.+]]=, 0, $pop[[L12]] - ; CHECK-NEXT: global.set __stack_pointer, $pop[[L11]] +; CHECK-32-LABEL: allocarray: +; CHECK-32: .functype allocarray () -> () +; CHECK-32-NEXT: .local i32 +; CHECK-32-NEXT: # %bb.0: +; CHECK-32-NEXT: global.get $push3=, __stack_pointer +; CHECK-32-NEXT: i32.const $push4=, 144 +; CHECK-32-NEXT: i32.sub $push9=, $pop3, $pop4 +; CHECK-32-NEXT: local.tee $push8=, 0, $pop9 +; CHECK-32-NEXT: global.set __stack_pointer, $pop8 +; CHECK-32-NEXT: local.get $push10=, 0 +; CHECK-32-NEXT: i32.const $push0=, 24 +; CHECK-32-NEXT: i32.add $push1=, $pop10, $pop0 +; CHECK-32-NEXT: i32.const $push2=, 1 +; CHECK-32-NEXT: i32.store 0($pop1), $pop2 +; CHECK-32-NEXT: local.get $push11=, 0 +; CHECK-32-NEXT: i32.const $push7=, 1 +; CHECK-32-NEXT: i32.store 12($pop11), $pop7 +; CHECK-32-NEXT: local.get $push12=, 0 +; CHECK-32-NEXT: i32.const $push5=, 144 +; CHECK-32-NEXT: i32.add $push6=, $pop12, $pop5 +; CHECK-32-NEXT: global.set __stack_pointer, $pop6 +; CHECK-32-NEXT: return +; +; CHECK-64-LABEL: allocarray: +; CHECK-64: .functype allocarray () -> () +; CHECK-64-NEXT: .local i64 +; CHECK-64-NEXT: # %bb.0: +; CHECK-64-NEXT: global.get $push3=, __stack_pointer +; CHECK-64-NEXT: i64.const $push4=, 144 +; CHECK-64-NEXT: i64.sub $push9=, $pop3, $pop4 +; CHECK-64-NEXT: local.tee $push8=, 0, $pop9 +; CHECK-64-NEXT: global.set __stack_pointer, $pop8 +; CHECK-64-NEXT: local.get $push10=, 0 +; CHECK-64-NEXT: i64.const $push0=, 24 +; CHECK-64-NEXT: i64.add $push1=, $pop10, $pop0 +; CHECK-64-NEXT: i32.const $push2=, 1 +; CHECK-64-NEXT: i32.store 0($pop1), $pop2 +; CHECK-64-NEXT: local.get $push11=, 0 +; CHECK-64-NEXT: i32.const $push7=, 1 +; CHECK-64-NEXT: i32.store 12($pop11), $pop7 +; CHECK-64-NEXT: local.get $push12=, 0 +; CHECK-64-NEXT: i64.const $push5=, 144 +; CHECK-64-NEXT: i64.add $push6=, $pop12, $pop5 +; CHECK-64-NEXT: global.set __stack_pointer, $pop6 +; CHECK-64-NEXT: return %r = alloca [33 x i32] - - ; CHECK: i[[PTR]].const $push{{.+}}=, 24 - ; CHECK-NEXT: i[[PTR]].add $push[[L3:.+]]=, $pop{{.+}}, $pop{{.+}} - ; CHECK-NEXT: i32.const $push[[L1:.+]]=, 1{{$}} - ; CHECK-NEXT: i32.store 0($pop[[L3]]), $pop[[L1]]{{$}} - ; CHECK-NEXT: local.get $push[[L4:.+]]=, 0{{$}} - ; CHECK-NEXT: i32.const $push[[L10:.+]]=, 1{{$}} - ; CHECK-NEXT: i32.store 12($pop[[L4]]), $pop[[L10]]{{$}} store i32 1, ptr %r %p2 = getelementptr [33 x i32], ptr %r, i32 0, i32 3 store i32 1, ptr %p2 - - ; CHECK-NEXT: local.get $push[[L2:.+]]=, [[SP]]{{$}} - ; CHECK-NEXT: i[[PTR]].const $push[[L7:.+]]=, 144 - ; CHECK-NEXT: i[[PTR]].add $push[[L8:.+]]=, $pop[[L2]], $pop[[L7]] - ; CHECK-NEXT: global.set __stack_pointer, $pop[[L8]] ret void } -; CHECK-LABEL: non_mem_use define void @non_mem_use(ptr %addr) { - ; CHECK: i[[PTR]].const $push[[L2:.+]]=, 48 - ; CHECK-NEXT: i[[PTR]].sub $push[[L12:.+]]=, {{.+}}, $pop[[L2]] - ; CHECK-NEXT: local.tee $push[[L11:.+]]=, [[SP:.+]], $pop[[L12]] - ; CHECK-NEXT: global.set __stack_pointer, $pop[[L11]] +; CHECK-32-LABEL: non_mem_use: +; CHECK-32: .functype non_mem_use (i32) -> () +; CHECK-32-NEXT: .local i32 +; CHECK-32-NEXT: # %bb.0: +; CHECK-32-NEXT: global.get $push0=, __stack_pointer +; CHECK-32-NEXT: i32.const $push1=, 48 +; CHECK-32-NEXT: i32.sub $push9=, $pop0, $pop1 +; CHECK-32-NEXT: local.tee $push8=, 1, $pop9 +; CHECK-32-NEXT: global.set __stack_pointer, $pop8 +; CHECK-32-NEXT: local.get $push10=, 1 +; CHECK-32-NEXT: i32.const $push4=, 8 +; CHECK-32-NEXT: i32.add $push5=, $pop10, $pop4 +; CHECK-32-NEXT: call ext_func, $pop5 +; CHECK-32-NEXT: local.get $push11=, 1 +; CHECK-32-NEXT: call ext_func, $pop11 +; CHECK-32-NEXT: local.get $push13=, 0 +; CHECK-32-NEXT: local.get $push12=, 1 +; CHECK-32-NEXT: i32.const $push6=, 16 +; CHECK-32-NEXT: i32.add $push7=, $pop12, $pop6 +; CHECK-32-NEXT: i32.store 0($pop13), $pop7 +; CHECK-32-NEXT: local.get $push14=, 1 +; CHECK-32-NEXT: i32.const $push2=, 48 +; CHECK-32-NEXT: i32.add $push3=, $pop14, $pop2 +; CHECK-32-NEXT: global.set __stack_pointer, $pop3 +; CHECK-32-NEXT: return +; +; CHECK-64-LABEL: non_mem_use: +; CHECK-64: .functype non_mem_use (i64) -> () +; CHECK-64-NEXT: .local i64 +; CHECK-64-NEXT: # %bb.0: +; CHECK-64-NEXT: global.get $push0=, __stack_pointer +; CHECK-64-NEXT: i64.const $push1=, 48 +; CHECK-64-NEXT: i64.sub $push9=, $pop0, $pop1 +; CHECK-64-NEXT: local.tee $push8=, 1, $pop9 +; CHECK-64-NEXT: global.set __stack_pointer, $pop8 +; CHECK-64-NEXT: local.get $push10=, 1 +; CHECK-64-NEXT: i64.const $push4=, 8 +; CHECK-64-NEXT: i64.add $push5=, $pop10, $pop4 +; CHECK-64-NEXT: call ext_func, $pop5 +; CHECK-64-NEXT: local.get $push11=, 1 +; CHECK-64-NEXT: call ext_func, $pop11 +; CHECK-64-NEXT: local.get $push13=, 0 +; CHECK-64-NEXT: local.get $push12=, 1 +; CHECK-64-NEXT: i64.const $push6=, 16 +; CHECK-64-NEXT: i64.add $push7=, $pop12, $pop6 +; CHECK-64-NEXT: i64.store 0($pop13), $pop7 +; CHECK-64-NEXT: local.get $push14=, 1 +; CHECK-64-NEXT: i64.const $push2=, 48 +; CHECK-64-NEXT: i64.add $push3=, $pop14, $pop2 +; CHECK-64-NEXT: global.set __stack_pointer, $pop3 +; CHECK-64-NEXT: return %buf = alloca [27 x i8], align 16 %r = alloca i64 %r2 = alloca i64 ; %r is at SP+8 - ; CHECK: local.get $push[[L3:.+]]=, [[SP]] - ; CHECK: i[[PTR]].const $push[[OFF:.+]]=, 8 - ; CHECK-NEXT: i[[PTR]].add $push[[ARG1:.+]]=, $pop[[L3]], $pop[[OFF]] - ; CHECK-NEXT: call ext_func, $pop[[ARG1]] call void @ext_func(ptr %r) ; %r2 is at SP+0, no add needed - ; CHECK: local.get $push[[L4:.+]]=, [[SP]] - ; CHECK-NEXT: call ext_func, $pop[[L4]] call void @ext_func(ptr %r2) ; Use as a value, but in a store ; %buf is at SP+16 - ; CHECK: local.get $push[[L5:.+]]=, [[SP]] - ; CHECK: i[[PTR]].const $push[[OFF:.+]]=, 16 - ; CHECK-NEXT: i[[PTR]].add $push[[VAL:.+]]=, $pop[[L5]], $pop[[OFF]] - ; CHECK-NEXT: i[[PTR]].store 0($pop{{.+}}), $pop[[VAL]] store ptr %buf, ptr %addr ret void } -; CHECK-LABEL: allocarray_inbounds: -; CHECK: .local i[[PTR]]{{$}} define void @allocarray_inbounds() { - ; CHECK: global.get $push[[L3:.+]]=, __stack_pointer{{$}} - ; CHECK-NEXT: i[[PTR]].const $push[[L4:.+]]=, 32{{$}} - ; CHECK-NEXT: i[[PTR]].sub $push[[L11:.+]]=, $pop[[L3]], $pop[[L4]] - ; CHECK-NEXT: local.tee $push[[L10:.+]]=, [[SP:.+]], $pop[[L11]] - ; CHECK-NEXT: global.set __stack_pointer, $pop[[L10]]{{$}} +; CHECK-32-LABEL: allocarray_inbounds: +; CHECK-32: .functype allocarray_inbounds () -> () +; CHECK-32-NEXT: .local i32 +; CHECK-32-NEXT: # %bb.0: +; CHECK-32-NEXT: global.get $push2=, __stack_pointer +; CHECK-32-NEXT: i32.const $push3=, 32 +; CHECK-32-NEXT: i32.sub $push8=, $pop2, $pop3 +; CHECK-32-NEXT: local.tee $push7=, 0, $pop8 +; CHECK-32-NEXT: global.set __stack_pointer, $pop7 +; CHECK-32-NEXT: local.get $push9=, 0 +; CHECK-32-NEXT: i32.const $push0=, 1 +; CHECK-32-NEXT: i32.store 24($pop9), $pop0 +; CHECK-32-NEXT: local.get $push10=, 0 +; CHECK-32-NEXT: i32.const $push6=, 1 +; CHECK-32-NEXT: i32.store 12($pop10), $pop6 +; CHECK-32-NEXT: i32.const $push1=, 0 +; CHECK-32-NEXT: call ext_func, $pop1 +; CHECK-32-NEXT: local.get $push11=, 0 +; CHECK-32-NEXT: i32.const $push4=, 32 +; CHECK-32-NEXT: i32.add $push5=, $pop11, $pop4 +; CHECK-32-NEXT: global.set __stack_pointer, $pop5 +; CHECK-32-NEXT: return +; +; CHECK-64-LABEL: allocarray_inbounds: +; CHECK-64: .functype allocarray_inbounds () -> () +; CHECK-64-NEXT: .local i64 +; CHECK-64-NEXT: # %bb.0: +; CHECK-64-NEXT: global.get $push2=, __stack_pointer +; CHECK-64-NEXT: i64.const $push3=, 32 +; CHECK-64-NEXT: i64.sub $push8=, $pop2, $pop3 +; CHECK-64-NEXT: local.tee $push7=, 0, $pop8 +; CHECK-64-NEXT: global.set __stack_pointer, $pop7 +; CHECK-64-NEXT: local.get $push9=, 0 +; CHECK-64-NEXT: i32.const $push0=, 1 +; CHECK-64-NEXT: i32.store 24($pop9), $pop0 +; CHECK-64-NEXT: local.get $push10=, 0 +; CHECK-64-NEXT: i32.const $push6=, 1 +; CHECK-64-NEXT: i32.store 12($pop10), $pop6 +; CHECK-64-NEXT: i64.const $push1=, 0 +; CHECK-64-NEXT: call ext_func, $pop1 +; CHECK-64-NEXT: local.get $push11=, 0 +; CHECK-64-NEXT: i64.const $push4=, 32 +; CHECK-64-NEXT: i64.add $push5=, $pop11, $pop4 +; CHECK-64-NEXT: global.set __stack_pointer, $pop5 +; CHECK-64-NEXT: return %r = alloca [5 x i32] - ; CHECK: i32.const $push[[L3:.+]]=, 1 - ; CHECK-DAG: i32.store 24(${{.+}}), $pop[[L3]] store i32 1, ptr %r ; This store should have both the GEP and the FI folded into it. - ; CHECK-DAG: i32.store 12(${{.+}}), $pop %p2 = getelementptr inbounds [5 x i32], ptr %r, i32 0, i32 3 store i32 1, ptr %p2 call void @ext_func(ptr null); - ; CHECK: call ext_func - ; CHECK: i[[PTR]].const $push[[L5:.+]]=, 32{{$}} - ; CHECK-NEXT: i[[PTR]].add $push[[L7:.+]]=, ${{.+}}, $pop[[L5]] - ; CHECK-NEXT: global.set __stack_pointer, $pop[[L7]] ret void } -; CHECK-LABEL: dynamic_alloca: define void @dynamic_alloca(i32 %alloc) { - ; CHECK: global.get $push[[L13:.+]]=, __stack_pointer{{$}} - ; CHECK-NEXT: local.tee $push[[L12:.+]]=, [[SP:.+]], $pop[[L13]]{{$}} ; Target independent codegen bumps the stack pointer. - ; CHECK: i[[PTR]].sub ; Check that SP is written back to memory after decrement - ; CHECK: global.set __stack_pointer, +; CHECK-32-LABEL: dynamic_alloca: +; CHECK-32: .functype dynamic_alloca (i32) -> () +; CHECK-32-NEXT: .local i32 +; CHECK-32-NEXT: # %bb.0: +; CHECK-32-NEXT: global.get $push10=, __stack_pointer +; CHECK-32-NEXT: local.tee $push9=, 1, $pop10 +; CHECK-32-NEXT: local.get $push11=, 0 +; CHECK-32-NEXT: i32.const $push0=, 2 +; CHECK-32-NEXT: i32.shl $push1=, $pop11, $pop0 +; CHECK-32-NEXT: i32.const $push2=, 15 +; CHECK-32-NEXT: i32.add $push3=, $pop1, $pop2 +; CHECK-32-NEXT: i32.const $push4=, -16 +; CHECK-32-NEXT: i32.and $push5=, $pop3, $pop4 +; CHECK-32-NEXT: i32.sub $push8=, $pop9, $pop5 +; CHECK-32-NEXT: local.tee $push7=, 0, $pop8 +; CHECK-32-NEXT: global.set __stack_pointer, $pop7 +; CHECK-32-NEXT: local.get $push12=, 0 +; CHECK-32-NEXT: call ext_func_i32, $pop12 +; CHECK-32-NEXT: local.get $push6=, 1 +; CHECK-32-NEXT: global.set __stack_pointer, $pop6 +; CHECK-32-NEXT: return +; +; CHECK-64-LABEL: dynamic_alloca: +; CHECK-64: .functype dynamic_alloca (i32) -> () +; CHECK-64-NEXT: .local i64, i64 +; CHECK-64-NEXT: # %bb.0: +; CHECK-64-NEXT: global.get $push11=, __stack_pointer +; CHECK-64-NEXT: local.tee $push10=, 1, $pop11 +; CHECK-64-NEXT: local.get $push12=, 0 +; CHECK-64-NEXT: i64.extend_i32_u $push0=, $pop12 +; CHECK-64-NEXT: i64.const $push1=, 2 +; CHECK-64-NEXT: i64.shl $push2=, $pop0, $pop1 +; CHECK-64-NEXT: i64.const $push3=, 15 +; CHECK-64-NEXT: i64.add $push4=, $pop2, $pop3 +; CHECK-64-NEXT: i64.const $push5=, 34359738352 +; CHECK-64-NEXT: i64.and $push6=, $pop4, $pop5 +; CHECK-64-NEXT: i64.sub $push9=, $pop10, $pop6 +; CHECK-64-NEXT: local.tee $push8=, 2, $pop9 +; CHECK-64-NEXT: global.set __stack_pointer, $pop8 +; CHECK-64-NEXT: local.get $push13=, 2 +; CHECK-64-NEXT: call ext_func_i32, $pop13 +; CHECK-64-NEXT: local.get $push7=, 1 +; CHECK-64-NEXT: global.set __stack_pointer, $pop7 +; CHECK-64-NEXT: return %r = alloca i32, i32 %alloc ; Target-independent codegen also calculates the store addr - ; CHECK: call ext_func_i32 call void @ext_func_i32(ptr %r) - ; CHECK: global.set __stack_pointer, $pop{{.+}} ret void } -; CHECK-LABEL: dynamic_alloca_redzone: define void @dynamic_alloca_redzone(i32 %alloc) { - ; CHECK: global.get $push[[L13:.+]]=, __stack_pointer{{$}} - ; CHECK-NEXT: local.tee $push[[L12:.+]]=, [[SP:.+]], $pop[[L13]]{{$}} ; Target independent codegen bumps the stack pointer - ; CHECK: i[[PTR]].sub +; CHECK-32-LABEL: dynamic_alloca_redzone: +; CHECK-32: .functype dynamic_alloca_redzone (i32) -> () +; CHECK-32-NEXT: .local i32 +; CHECK-32-NEXT: # %bb.0: +; CHECK-32-NEXT: global.get $push8=, __stack_pointer +; CHECK-32-NEXT: local.tee $push9=, 1, $pop8 +; CHECK-32-NEXT: drop $pop9 +; CHECK-32-NEXT: local.get $push11=, 1 +; CHECK-32-NEXT: local.get $push10=, 0 +; CHECK-32-NEXT: i32.const $push0=, 2 +; CHECK-32-NEXT: i32.shl $push1=, $pop10, $pop0 +; CHECK-32-NEXT: i32.const $push2=, 15 +; CHECK-32-NEXT: i32.add $push3=, $pop1, $pop2 +; CHECK-32-NEXT: i32.const $push4=, -16 +; CHECK-32-NEXT: i32.and $push5=, $pop3, $pop4 +; CHECK-32-NEXT: i32.sub $push7=, $pop11, $pop5 +; CHECK-32-NEXT: local.tee $push12=, 0, $pop7 +; CHECK-32-NEXT: drop $pop12 +; CHECK-32-NEXT: local.get $push13=, 0 +; CHECK-32-NEXT: i32.const $push6=, 0 +; CHECK-32-NEXT: i32.store 0($pop13), $pop6 +; CHECK-32-NEXT: return +; +; CHECK-64-LABEL: dynamic_alloca_redzone: +; CHECK-64: .functype dynamic_alloca_redzone (i32) -> () +; CHECK-64-NEXT: .local i64 +; CHECK-64-NEXT: # %bb.0: +; CHECK-64-NEXT: global.get $push9=, __stack_pointer +; CHECK-64-NEXT: local.tee $push10=, 1, $pop9 +; CHECK-64-NEXT: drop $pop10 +; CHECK-64-NEXT: local.get $push12=, 1 +; CHECK-64-NEXT: local.get $push11=, 0 +; CHECK-64-NEXT: i64.extend_i32_u $push0=, $pop11 +; CHECK-64-NEXT: i64.const $push1=, 2 +; CHECK-64-NEXT: i64.shl $push2=, $pop0, $pop1 +; CHECK-64-NEXT: i64.const $push3=, 15 +; CHECK-64-NEXT: i64.add $push4=, $pop2, $pop3 +; CHECK-64-NEXT: i64.const $push5=, 34359738352 +; CHECK-64-NEXT: i64.and $push6=, $pop4, $pop5 +; CHECK-64-NEXT: i64.sub $push8=, $pop12, $pop6 +; CHECK-64-NEXT: local.tee $push13=, 1, $pop8 +; CHECK-64-NEXT: drop $pop13 +; CHECK-64-NEXT: local.get $push14=, 1 +; CHECK-64-NEXT: i32.const $push7=, 0 +; CHECK-64-NEXT: i32.store 0($pop14), $pop7 +; CHECK-64-NEXT: return %r = alloca i32, i32 %alloc - ; CHECK-NEXT: local.tee $push[[L8:.+]]=, [[SP2:.+]], $pop - ; CHECK: local.get $push[[L7:.+]]=, [[SP2]]{{$}} - ; CHECK-NEXT: i32.const $push[[L6:.+]]=, 0{{$}} - ; CHECK-NEXT: i32.store 0($pop[[L7]]), $pop[[L6]]{{$}} store i32 0, ptr %r - ; CHECK-NEXT: return ret void } -; CHECK-LABEL: dynamic_static_alloca: define void @dynamic_static_alloca(i32 %alloc) noredzone { ; Decrement SP in the prolog by the static amount and writeback to memory. - ; CHECK: global.get $push[[L11:.+]]=, __stack_pointer{{$}} - ; CHECK-NEXT: i[[PTR]].const $push[[L12:.+]]=, 16 - ; CHECK-NEXT: i[[PTR]].sub $push[[L23:.+]]=, $pop[[L11]], $pop[[L12]] - ; CHECK-NEXT: local.tee $push[[L22:.+]]=, [[SP:.+]], $pop[[L23]] - ; CHECK-NEXT: global.set __stack_pointer, $pop[[L22]] - ; Alloc and write to a static alloca - ; CHECK: local.get $push[[L21:.+]]=, [[SP:.+]] - ; CHECK-NEXT: local.tee $push[[pushedFP:.+]]=, [[FP:.+]], $pop[[L21]] - ; CHECK-NEXT: i32.const $push[[L0:.+]]=, 101 - ; CHECK-NEXT: i32.store [[static_offset:.+]]($pop[[pushedFP]]), $pop[[L0]] +; CHECK-32-LABEL: dynamic_static_alloca: +; CHECK-32: .functype dynamic_static_alloca (i32) -> () +; CHECK-32-NEXT: .local i32, i32, i32 +; CHECK-32-NEXT: # %bb.0: +; CHECK-32-NEXT: global.get $push11=, __stack_pointer +; CHECK-32-NEXT: i32.const $push12=, 16 +; CHECK-32-NEXT: i32.sub $push25=, $pop11, $pop12 +; CHECK-32-NEXT: local.tee $push24=, 1, $pop25 +; CHECK-32-NEXT: global.set __stack_pointer, $pop24 +; CHECK-32-NEXT: local.get $push23=, 1 +; CHECK-32-NEXT: local.tee $push22=, 2, $pop23 +; CHECK-32-NEXT: i32.const $push0=, 101 +; CHECK-32-NEXT: i32.store 12($pop22), $pop0 +; CHECK-32-NEXT: local.get $push27=, 1 +; CHECK-32-NEXT: local.get $push26=, 0 +; CHECK-32-NEXT: i32.const $push1=, 2 +; CHECK-32-NEXT: i32.shl $push2=, $pop26, $pop1 +; CHECK-32-NEXT: i32.const $push3=, 15 +; CHECK-32-NEXT: i32.add $push4=, $pop2, $pop3 +; CHECK-32-NEXT: i32.const $push5=, -16 +; CHECK-32-NEXT: i32.and $push21=, $pop4, $pop5 +; CHECK-32-NEXT: local.tee $push20=, 0, $pop21 +; CHECK-32-NEXT: i32.sub $push19=, $pop27, $pop20 +; CHECK-32-NEXT: local.tee $push18=, 1, $pop19 +; CHECK-32-NEXT: local.tee $push17=, 3, $pop18 +; CHECK-32-NEXT: global.set __stack_pointer, $pop17 +; CHECK-32-NEXT: local.get $push28=, 2 +; CHECK-32-NEXT: i32.const $push6=, 102 +; CHECK-32-NEXT: i32.store 12($pop28), $pop6 +; CHECK-32-NEXT: local.get $push29=, 1 +; CHECK-32-NEXT: i32.const $push7=, 103 +; CHECK-32-NEXT: i32.store 0($pop29), $pop7 +; CHECK-32-NEXT: local.get $push31=, 3 +; CHECK-32-NEXT: local.get $push30=, 0 +; CHECK-32-NEXT: i32.sub $push16=, $pop31, $pop30 +; CHECK-32-NEXT: local.tee $push15=, 0, $pop16 +; CHECK-32-NEXT: global.set __stack_pointer, $pop15 +; CHECK-32-NEXT: local.get $push32=, 2 +; CHECK-32-NEXT: i32.const $push8=, 104 +; CHECK-32-NEXT: i32.store 12($pop32), $pop8 +; CHECK-32-NEXT: local.get $push33=, 1 +; CHECK-32-NEXT: i32.const $push9=, 105 +; CHECK-32-NEXT: i32.store 0($pop33), $pop9 +; CHECK-32-NEXT: local.get $push34=, 0 +; CHECK-32-NEXT: i32.const $push10=, 106 +; CHECK-32-NEXT: i32.store 0($pop34), $pop10 +; CHECK-32-NEXT: local.get $push35=, 2 +; CHECK-32-NEXT: i32.const $push13=, 16 +; CHECK-32-NEXT: i32.add $push14=, $pop35, $pop13 +; CHECK-32-NEXT: global.set __stack_pointer, $pop14 +; CHECK-32-NEXT: return +; +; CHECK-64-LABEL: dynamic_static_alloca: +; CHECK-64: .functype dynamic_static_alloca (i32) -> () +; CHECK-64-NEXT: .local i64, i64, i64, i64 +; CHECK-64-NEXT: # %bb.0: +; CHECK-64-NEXT: global.get $push12=, __stack_pointer +; CHECK-64-NEXT: i64.const $push13=, 16 +; CHECK-64-NEXT: i64.sub $push26=, $pop12, $pop13 +; CHECK-64-NEXT: local.tee $push25=, 1, $pop26 +; CHECK-64-NEXT: global.set __stack_pointer, $pop25 +; CHECK-64-NEXT: local.get $push24=, 1 +; CHECK-64-NEXT: local.tee $push23=, 2, $pop24 +; CHECK-64-NEXT: i32.const $push0=, 101 +; CHECK-64-NEXT: i32.store 12($pop23), $pop0 +; CHECK-64-NEXT: local.get $push28=, 1 +; CHECK-64-NEXT: local.get $push27=, 0 +; CHECK-64-NEXT: i64.extend_i32_u $push1=, $pop27 +; CHECK-64-NEXT: i64.const $push2=, 2 +; CHECK-64-NEXT: i64.shl $push3=, $pop1, $pop2 +; CHECK-64-NEXT: i64.const $push4=, 15 +; CHECK-64-NEXT: i64.add $push5=, $pop3, $pop4 +; CHECK-64-NEXT: i64.const $push6=, 34359738352 +; CHECK-64-NEXT: i64.and $push22=, $pop5, $pop6 +; CHECK-64-NEXT: local.tee $push21=, 3, $pop22 +; CHECK-64-NEXT: i64.sub $push20=, $pop28, $pop21 +; CHECK-64-NEXT: local.tee $push19=, 1, $pop20 +; CHECK-64-NEXT: local.tee $push18=, 4, $pop19 +; CHECK-64-NEXT: global.set __stack_pointer, $pop18 +; CHECK-64-NEXT: local.get $push29=, 2 +; CHECK-64-NEXT: i32.const $push7=, 102 +; CHECK-64-NEXT: i32.store 12($pop29), $pop7 +; CHECK-64-NEXT: local.get $push30=, 1 +; CHECK-64-NEXT: i32.const $push8=, 103 +; CHECK-64-NEXT: i32.store 0($pop30), $pop8 +; CHECK-64-NEXT: local.get $push32=, 4 +; CHECK-64-NEXT: local.get $push31=, 3 +; CHECK-64-NEXT: i64.sub $push17=, $pop32, $pop31 +; CHECK-64-NEXT: local.tee $push16=, 3, $pop17 +; CHECK-64-NEXT: global.set __stack_pointer, $pop16 +; CHECK-64-NEXT: local.get $push33=, 2 +; CHECK-64-NEXT: i32.const $push9=, 104 +; CHECK-64-NEXT: i32.store 12($pop33), $pop9 +; CHECK-64-NEXT: local.get $push34=, 1 +; CHECK-64-NEXT: i32.const $push10=, 105 +; CHECK-64-NEXT: i32.store 0($pop34), $pop10 +; CHECK-64-NEXT: local.get $push35=, 3 +; CHECK-64-NEXT: i32.const $push11=, 106 +; CHECK-64-NEXT: i32.store 0($pop35), $pop11 +; CHECK-64-NEXT: local.get $push36=, 2 +; CHECK-64-NEXT: i64.const $push14=, 16 +; CHECK-64-NEXT: i64.add $push15=, $pop36, $pop14 +; CHECK-64-NEXT: global.set __stack_pointer, $pop15 +; CHECK-64-NEXT: return %static = alloca i32 store volatile i32 101, ptr %static - ; Decrement SP in the body by the dynamic amount. - ; CHECK: i[[PTR]].sub - ; CHECK: local.tee $push[[L16:.+]]=, [[dynamic_local:.+]], $pop{{.+}} - ; CHECK: local.tee $push[[L15:.+]]=, [[other:.+]], $pop[[L16]]{{$}} - ; CHECK: global.set __stack_pointer, $pop[[L15]]{{$}} %dynamic = alloca i32, i32 %alloc - ; Ensure we don't modify the frame pointer after assigning it. - ; CHECK-NOT: $[[FP]]= - ; Ensure the static address doesn't change after modifying the stack pointer. - ; CHECK: local.get $push[[L17:.+]]=, [[FP]] - ; CHECK: i32.const $push[[L7:.+]]=, 102 - ; CHECK-NEXT: i32.store [[static_offset]]($pop[[L17]]), $pop[[L7]] - ; CHECK-NEXT: local.get $push[[L9:.+]]=, [[dynamic_local]]{{$}} - ; CHECK-NEXT: i32.const $push[[L8:.+]]=, 103 - ; CHECK-NEXT: i32.store 0($pop[[L9]]), $pop[[L8]] store volatile i32 102, ptr %static store volatile i32 103, ptr %dynamic - ; Decrement SP in the body by the dynamic amount. - ; CHECK: i[[PTR]].sub - ; CHECK: local.tee $push{{.+}}=, [[dynamic2_local:.+]], $pop{{.+}} %dynamic.2 = alloca i32, i32 %alloc - - ; CHECK-NOT: $[[FP]]= - ; Ensure neither the static nor dynamic address changes after the second ; modification of the stack pointer. - ; CHECK: local.get $push[[L22:.+]]=, [[FP]] - ; CHECK: i32.const $push[[L9:.+]]=, 104 - ; CHECK-NEXT: i32.store [[static_offset]]($pop[[L22]]), $pop[[L9]] - ; CHECK-NEXT: local.get $push[[L23:.+]]=, [[dynamic_local]] - ; CHECK-NEXT: i32.const $push[[L10:.+]]=, 105 - ; CHECK-NEXT: i32.store 0($pop[[L23]]), $pop[[L10]] - ; CHECK-NEXT: local.get $push[[L23:.+]]=, [[dynamic2_local]] - ; CHECK-NEXT: i32.const $push[[L11:.+]]=, 106 - ; CHECK-NEXT: i32.store 0($pop[[L23]]), $pop[[L11]] store volatile i32 104, ptr %static store volatile i32 105, ptr %dynamic store volatile i32 106, ptr %dynamic.2 - ; Writeback to memory. - ; CHECK: local.get $push[[L24:.+]]=, [[FP]]{{$}} - ; CHECK: i[[PTR]].const $push[[L18:.+]]=, 16 - ; CHECK-NEXT: i[[PTR]].add $push[[L19:.+]]=, $pop[[L24]], $pop[[L18]] - ; CHECK-NEXT: global.set __stack_pointer, $pop[[L19]] ret void } declare ptr @llvm.stacksave() declare void @llvm.stackrestore(ptr) -; CHECK-LABEL: llvm_stack_builtins: define void @llvm_stack_builtins(i32 %alloc) noredzone { - ; CHECK: global.get $push[[L11:.+]]=, __stack_pointer{{$}} - ; CHECK-NEXT: local.tee $push[[L10:.+]]=, {{.+}}, $pop[[L11]] - ; CHECK-NEXT: local.set [[STACK:.+]], $pop[[L10]] +; CHECK-32-LABEL: llvm_stack_builtins: +; CHECK-32: .functype llvm_stack_builtins (i32) -> () +; CHECK-32-NEXT: .local i32, i32, i32 +; CHECK-32-NEXT: # %bb.0: +; CHECK-32-NEXT: global.get $push7=, __stack_pointer +; CHECK-32-NEXT: local.tee $push8=, 1, $pop7 +; CHECK-32-NEXT: local.set 2, $pop8 +; CHECK-32-NEXT: local.get $push9=, 1 +; CHECK-32-NEXT: local.set 3, $pop9 +; CHECK-32-NEXT: local.get $push11=, 1 +; CHECK-32-NEXT: local.get $push10=, 0 +; CHECK-32-NEXT: i32.const $push0=, 2 +; CHECK-32-NEXT: i32.shl $push1=, $pop10, $pop0 +; CHECK-32-NEXT: i32.const $push2=, 15 +; CHECK-32-NEXT: i32.add $push3=, $pop1, $pop2 +; CHECK-32-NEXT: i32.const $push4=, -16 +; CHECK-32-NEXT: i32.and $push5=, $pop3, $pop4 +; CHECK-32-NEXT: i32.sub $push6=, $pop11, $pop5 +; CHECK-32-NEXT: global.set __stack_pointer, $pop6 +; CHECK-32-NEXT: local.get $push12=, 3 +; CHECK-32-NEXT: drop $pop12 +; CHECK-32-NEXT: local.get $push13=, 2 +; CHECK-32-NEXT: global.set __stack_pointer, $pop13 +; CHECK-32-NEXT: return +; +; CHECK-64-LABEL: llvm_stack_builtins: +; CHECK-64: .functype llvm_stack_builtins (i32) -> () +; CHECK-64-NEXT: .local i64, i64, i64 +; CHECK-64-NEXT: # %bb.0: +; CHECK-64-NEXT: global.get $push8=, __stack_pointer +; CHECK-64-NEXT: local.tee $push9=, 1, $pop8 +; CHECK-64-NEXT: local.set 2, $pop9 +; CHECK-64-NEXT: local.get $push10=, 1 +; CHECK-64-NEXT: local.set 3, $pop10 +; CHECK-64-NEXT: local.get $push12=, 1 +; CHECK-64-NEXT: local.get $push11=, 0 +; CHECK-64-NEXT: i64.extend_i32_u $push0=, $pop11 +; CHECK-64-NEXT: i64.const $push1=, 2 +; CHECK-64-NEXT: i64.shl $push2=, $pop0, $pop1 +; CHECK-64-NEXT: i64.const $push3=, 15 +; CHECK-64-NEXT: i64.add $push4=, $pop2, $pop3 +; CHECK-64-NEXT: i64.const $push5=, 34359738352 +; CHECK-64-NEXT: i64.and $push6=, $pop4, $pop5 +; CHECK-64-NEXT: i64.sub $push7=, $pop12, $pop6 +; CHECK-64-NEXT: global.set __stack_pointer, $pop7 +; CHECK-64-NEXT: local.get $push13=, 3 +; CHECK-64-NEXT: drop $pop13 +; CHECK-64-NEXT: local.get $push14=, 2 +; CHECK-64-NEXT: global.set __stack_pointer, $pop14 +; CHECK-64-NEXT: return %stack = call ptr @llvm.stacksave() - ; Ensure we don't reassign the stacksave local - ; CHECK-NOT: local.set [[STACK]], %dynamic = alloca i32, i32 %alloc - - ; CHECK: local.get $push[[L12:.+]]=, [[STACK]] - ; CHECK-NEXT: global.set __stack_pointer, $pop[[L12]] call void @llvm.stackrestore(ptr %stack) - ret void } ; Not actually using the alloca'd variables exposed an issue with register ; stackification, where copying the stack pointer into the frame pointer was ; moved after the stack pointer was updated for the dynamic alloca. -; CHECK-LABEL: dynamic_alloca_nouse: define void @dynamic_alloca_nouse(i32 %alloc) noredzone { - ; CHECK: global.get $push[[L11:.+]]=, __stack_pointer{{$}} - ; CHECK-NEXT: local.tee $push[[L10:.+]]=, {{.+}}, $pop[[L11]] - ; CHECK-NEXT: local.set [[FP:.+]], $pop[[L10]] +; CHECK-32-LABEL: dynamic_alloca_nouse: +; CHECK-32: .functype dynamic_alloca_nouse (i32) -> () +; CHECK-32-NEXT: .local i32, i32 +; CHECK-32-NEXT: # %bb.0: +; CHECK-32-NEXT: global.get $push7=, __stack_pointer +; CHECK-32-NEXT: local.tee $push8=, 1, $pop7 +; CHECK-32-NEXT: local.set 2, $pop8 +; CHECK-32-NEXT: local.get $push10=, 1 +; CHECK-32-NEXT: local.get $push9=, 0 +; CHECK-32-NEXT: i32.const $push0=, 2 +; CHECK-32-NEXT: i32.shl $push1=, $pop9, $pop0 +; CHECK-32-NEXT: i32.const $push2=, 15 +; CHECK-32-NEXT: i32.add $push3=, $pop1, $pop2 +; CHECK-32-NEXT: i32.const $push4=, -16 +; CHECK-32-NEXT: i32.and $push5=, $pop3, $pop4 +; CHECK-32-NEXT: i32.sub $push6=, $pop10, $pop5 +; CHECK-32-NEXT: global.set __stack_pointer, $pop6 +; CHECK-32-NEXT: local.get $push11=, 2 +; CHECK-32-NEXT: global.set __stack_pointer, $pop11 +; CHECK-32-NEXT: return +; +; CHECK-64-LABEL: dynamic_alloca_nouse: +; CHECK-64: .functype dynamic_alloca_nouse (i32) -> () +; CHECK-64-NEXT: .local i64, i64 +; CHECK-64-NEXT: # %bb.0: +; CHECK-64-NEXT: global.get $push8=, __stack_pointer +; CHECK-64-NEXT: local.tee $push9=, 1, $pop8 +; CHECK-64-NEXT: local.set 2, $pop9 +; CHECK-64-NEXT: local.get $push11=, 1 +; CHECK-64-NEXT: local.get $push10=, 0 +; CHECK-64-NEXT: i64.extend_i32_u $push0=, $pop10 +; CHECK-64-NEXT: i64.const $push1=, 2 +; CHECK-64-NEXT: i64.shl $push2=, $pop0, $pop1 +; CHECK-64-NEXT: i64.const $push3=, 15 +; CHECK-64-NEXT: i64.add $push4=, $pop2, $pop3 +; CHECK-64-NEXT: i64.const $push5=, 34359738352 +; CHECK-64-NEXT: i64.and $push6=, $pop4, $pop5 +; CHECK-64-NEXT: i64.sub $push7=, $pop11, $pop6 +; CHECK-64-NEXT: global.set __stack_pointer, $pop7 +; CHECK-64-NEXT: local.get $push12=, 2 +; CHECK-64-NEXT: global.set __stack_pointer, $pop12 +; CHECK-64-NEXT: return %dynamic = alloca i32, i32 %alloc - - ; CHECK-NOT: local.set [[FP]], - - ; CHECK: local.get $push[[L12:.+]]=, [[FP]] - ; CHECK-NEXT: global.set __stack_pointer, $pop[[L12]] ret void } ; The use of the alloca in a phi causes a CopyToReg DAG node to be generated, ; which has to have special handling because CopyToReg can't have a FI operand -; CHECK-LABEL: copytoreg_fi: define void @copytoreg_fi(i1 %cond, ptr %b) { +; CHECK-32-LABEL: copytoreg_fi: +; CHECK-32: .functype copytoreg_fi (i32, i32) -> () +; CHECK-32-NEXT: .local i32 +; CHECK-32-NEXT: # %bb.0: # %entry +; CHECK-32-NEXT: global.get $push0=, __stack_pointer +; CHECK-32-NEXT: i32.const $push1=, 16 +; CHECK-32-NEXT: i32.sub $push3=, $pop0, $pop1 +; CHECK-32-NEXT: i32.const $push2=, 12 +; CHECK-32-NEXT: i32.add $push6=, $pop3, $pop2 +; CHECK-32-NEXT: local.set 2, $pop6 +; CHECK-32-NEXT: local.get $push8=, 0 +; CHECK-32-NEXT: i32.const $push4=, 1 +; CHECK-32-NEXT: i32.and $push7=, $pop8, $pop4 +; CHECK-32-NEXT: local.set 0, $pop7 +; CHECK-32-NEXT: .LBB10_1: # %body +; CHECK-32-NEXT: # =>This Inner Loop Header: Depth=1 +; CHECK-32-NEXT: loop # label0: +; CHECK-32-NEXT: local.get $push9=, 2 +; CHECK-32-NEXT: i32.const $push5=, 1 +; CHECK-32-NEXT: i32.store 0($pop9), $pop5 +; CHECK-32-NEXT: local.get $push10=, 1 +; CHECK-32-NEXT: local.set 2, $pop10 +; CHECK-32-NEXT: local.get $push11=, 0 +; CHECK-32-NEXT: br_if 0, $pop11 # 0: up to label0 +; CHECK-32-NEXT: # %bb.2: # %exit +; CHECK-32-NEXT: end_loop +; CHECK-32-NEXT: return +; +; CHECK-64-LABEL: copytoreg_fi: +; CHECK-64: .functype copytoreg_fi (i32, i64) -> () +; CHECK-64-NEXT: .local i64 +; CHECK-64-NEXT: # %bb.0: # %entry +; CHECK-64-NEXT: global.get $push0=, __stack_pointer +; CHECK-64-NEXT: i64.const $push1=, 16 +; CHECK-64-NEXT: i64.sub $push3=, $pop0, $pop1 +; CHECK-64-NEXT: i64.const $push2=, 12 +; CHECK-64-NEXT: i64.add $push6=, $pop3, $pop2 +; CHECK-64-NEXT: local.set 2, $pop6 +; CHECK-64-NEXT: local.get $push8=, 0 +; CHECK-64-NEXT: i32.const $push4=, 1 +; CHECK-64-NEXT: i32.and $push7=, $pop8, $pop4 +; CHECK-64-NEXT: local.set 0, $pop7 +; CHECK-64-NEXT: .LBB10_1: # %body +; CHECK-64-NEXT: # =>This Inner Loop Header: Depth=1 +; CHECK-64-NEXT: loop # label0: +; CHECK-64-NEXT: local.get $push9=, 2 +; CHECK-64-NEXT: i32.const $push5=, 1 +; CHECK-64-NEXT: i32.store 0($pop9), $pop5 +; CHECK-64-NEXT: local.get $push10=, 1 +; CHECK-64-NEXT: local.set 2, $pop10 +; CHECK-64-NEXT: local.get $push11=, 0 +; CHECK-64-NEXT: br_if 0, $pop11 # 0: up to label0 +; CHECK-64-NEXT: # %bb.2: # %exit +; CHECK-64-NEXT: end_loop +; CHECK-64-NEXT: return entry: - ; CHECK: i[[PTR]].const $push[[L1:.+]]=, 16 - ; CHECK-NEXT: i[[PTR]].sub $push[[L3:.+]]=, {{.+}}, $pop[[L1]] %addr = alloca i32 - ; CHECK: i[[PTR]].const $push[[OFF:.+]]=, 12 - ; CHECK-NEXT: i[[PTR]].add $push[[ADDR:.+]]=, $pop[[L3]], $pop[[OFF]] - ; CHECK-NEXT: local.set [[COPY:.+]], $pop[[ADDR]] br label %body body: %a = phi ptr [%addr, %entry], [%b, %body] store i32 1, ptr %a - ; CHECK: local.get $push[[L12:.+]]=, [[COPY]] - ; CHECK: i32.store 0($pop[[L12]]), br i1 %cond, label %body, label %exit exit: ret void @@ -291,37 +673,84 @@ declare void @use_i8_star(ptr) declare ptr @llvm.frameaddress(i32) ; Test __builtin_frame_address(0). -; CHECK-LABEL: frameaddress_0: -; CHECK: global.get $push[[L3:.+]]=, __stack_pointer{{$}} -; CHECK-NEXT: local.tee $push[[L2:.+]]=, [[FP:.+]], $pop[[L3]]{{$}} -; CHECK-NEXT: call use_i8_star, $pop[[L2]] -; CHECK-NEXT: local.get $push[[L5:.+]]=, [[FP]] -; CHECK-NEXT: global.set __stack_pointer, $pop[[L5]] define void @frameaddress_0() { +; CHECK-32-LABEL: frameaddress_0: +; CHECK-32: .functype frameaddress_0 () -> () +; CHECK-32-NEXT: .local i32 +; CHECK-32-NEXT: # %bb.0: +; CHECK-32-NEXT: global.get $push1=, __stack_pointer +; CHECK-32-NEXT: local.tee $push0=, 0, $pop1 +; CHECK-32-NEXT: call use_i8_star, $pop0 +; CHECK-32-NEXT: local.get $push2=, 0 +; CHECK-32-NEXT: global.set __stack_pointer, $pop2 +; CHECK-32-NEXT: return +; +; CHECK-64-LABEL: frameaddress_0: +; CHECK-64: .functype frameaddress_0 () -> () +; CHECK-64-NEXT: .local i64 +; CHECK-64-NEXT: # %bb.0: +; CHECK-64-NEXT: global.get $push1=, __stack_pointer +; CHECK-64-NEXT: local.tee $push0=, 0, $pop1 +; CHECK-64-NEXT: call use_i8_star, $pop0 +; CHECK-64-NEXT: local.get $push2=, 0 +; CHECK-64-NEXT: global.set __stack_pointer, $pop2 +; CHECK-64-NEXT: return %t = call ptr @llvm.frameaddress(i32 0) call void @use_i8_star(ptr %t) ret void } ; Test __builtin_frame_address(1). - -; CHECK-LABEL: frameaddress_1: -; CHECK: i[[PTR]].const $push0=, 0{{$}} -; CHECK-NEXT: call use_i8_star, $pop0{{$}} -; CHECK-NEXT: return{{$}} define void @frameaddress_1() { +; CHECK-32-LABEL: frameaddress_1: +; CHECK-32: .functype frameaddress_1 () -> () +; CHECK-32-NEXT: # %bb.0: +; CHECK-32-NEXT: i32.const $push0=, 0 +; CHECK-32-NEXT: call use_i8_star, $pop0 +; CHECK-32-NEXT: return +; +; CHECK-64-LABEL: frameaddress_1: +; CHECK-64: .functype frameaddress_1 () -> () +; CHECK-64-NEXT: # %bb.0: +; CHECK-64-NEXT: i64.const $push0=, 0 +; CHECK-64-NEXT: call use_i8_star, $pop0 +; CHECK-64-NEXT: return %t = call ptr @llvm.frameaddress(i32 1) call void @use_i8_star(ptr %t) ret void } ; Test a stack address passed to an inline asm. -; CHECK-LABEL: inline_asm: -; CHECK: global.get {{.+}}, __stack_pointer{{$}} -; CHECK: #APP -; CHECK-NEXT: # %{{[0-9]+}}{{$}} -; CHECK-NEXT: #NO_APP define void @inline_asm() { +; CHECK-32-LABEL: inline_asm: +; CHECK-32: .functype inline_asm () -> () +; CHECK-32-NEXT: .local i32 +; CHECK-32-NEXT: # %bb.0: +; CHECK-32-NEXT: global.get $push0=, __stack_pointer +; CHECK-32-NEXT: i32.const $push1=, 16 +; CHECK-32-NEXT: i32.sub $push3=, $pop0, $pop1 +; CHECK-32-NEXT: i32.const $push2=, 15 +; CHECK-32-NEXT: i32.add $push4=, $pop3, $pop2 +; CHECK-32-NEXT: local.set 0, $pop4 +; CHECK-32-NEXT: #APP +; CHECK-32-NEXT: # %0 +; CHECK-32-NEXT: #NO_APP +; CHECK-32-NEXT: return +; +; CHECK-64-LABEL: inline_asm: +; CHECK-64: .functype inline_asm () -> () +; CHECK-64-NEXT: .local i64 +; CHECK-64-NEXT: # %bb.0: +; CHECK-64-NEXT: global.get $push0=, __stack_pointer +; CHECK-64-NEXT: i64.const $push1=, 16 +; CHECK-64-NEXT: i64.sub $push3=, $pop0, $pop1 +; CHECK-64-NEXT: i64.const $push2=, 15 +; CHECK-64-NEXT: i64.add $push4=, $pop3, $pop2 +; CHECK-64-NEXT: local.set 0, $pop4 +; CHECK-64-NEXT: #APP +; CHECK-64-NEXT: # %0 +; CHECK-64-NEXT: #NO_APP +; CHECK-64-NEXT: return %tmp = alloca i8 call void asm sideeffect "# %0", "r"(ptr %tmp) ret void @@ -330,10 +759,38 @@ define void @inline_asm() { ; We optimize the format of "frame offset + operand" by folding it, but this is ; only possible when that operand is an immediate. In this example it is a ; global address, so we should not fold it. -; CHECK-LABEL: frame_offset_with_global_address -; CHECK: i[[PTR]].const ${{.*}}=, str @str = local_unnamed_addr global [3 x i8] c"abc", align 16 define i8 @frame_offset_with_global_address() { +; CHECK-32-LABEL: frame_offset_with_global_address: +; CHECK-32: .functype frame_offset_with_global_address () -> (i32) +; CHECK-32-NEXT: # %bb.0: +; CHECK-32-NEXT: i32.const $push0=, str +; CHECK-32-NEXT: global.get $push5=, __stack_pointer +; CHECK-32-NEXT: i32.const $push6=, 16 +; CHECK-32-NEXT: i32.sub $push9=, $pop5, $pop6 +; CHECK-32-NEXT: i32.const $push7=, 12 +; CHECK-32-NEXT: i32.add $push8=, $pop9, $pop7 +; CHECK-32-NEXT: i32.add $push1=, $pop0, $pop8 +; CHECK-32-NEXT: i32.load8_u $push2=, 0($pop1) +; CHECK-32-NEXT: i32.const $push3=, 67 +; CHECK-32-NEXT: i32.and $push4=, $pop2, $pop3 +; CHECK-32-NEXT: return $pop4 +; +; CHECK-64-LABEL: frame_offset_with_global_address: +; CHECK-64: .functype frame_offset_with_global_address () -> (i32) +; CHECK-64-NEXT: # %bb.0: +; CHECK-64-NEXT: i64.const $push1=, str +; CHECK-64-NEXT: global.get $push6=, __stack_pointer +; CHECK-64-NEXT: i64.const $push7=, 16 +; CHECK-64-NEXT: i64.sub $push10=, $pop6, $pop7 +; CHECK-64-NEXT: i64.const $push8=, 12 +; CHECK-64-NEXT: i64.add $push9=, $pop10, $pop8 +; CHECK-64-NEXT: i64.extend32_s $push0=, $pop9 +; CHECK-64-NEXT: i64.add $push2=, $pop1, $pop0 +; CHECK-64-NEXT: i32.load8_u $push3=, 0($pop2) +; CHECK-64-NEXT: i32.const $push4=, 67 +; CHECK-64-NEXT: i32.and $push5=, $pop3, $pop4 +; CHECK-64-NEXT: return $pop5 %1 = alloca i8, align 4 %2 = ptrtoint ptr %1 to i32 ;; Here @str is a global address and not an immediate, so cannot be folded