From 51cd326f99bb328cb2c9ca1f6ccc28c11e73a3c1 Mon Sep 17 00:00:00 2001 From: Thomas Lively Date: Wed, 22 Jul 2020 10:12:26 -0700 Subject: [PATCH] [WebAssembly] Autogenerate checks in simd-offset.ll Implementing new functionality tested in this file requires adding new tests for many IR addressing patterns, which can be a large maintenance burden. This patch makes adding tests easier by switching to using autogenerated checks. This patch also removes the testing mode that has simd128 disabled because it would produce very large checks and is not particularly interesting. Differential Revision: https://reviews.llvm.org/D84288 --- llvm/test/CodeGen/WebAssembly/simd-offset.ll | 2757 ++++++++++++++------------ 1 file changed, 1508 insertions(+), 1249 deletions(-) diff --git a/llvm/test/CodeGen/WebAssembly/simd-offset.ll b/llvm/test/CodeGen/WebAssembly/simd-offset.ll index 7ece5b7..9338972 100644 --- a/llvm/test/CodeGen/WebAssembly/simd-offset.ll +++ b/llvm/test/CodeGen/WebAssembly/simd-offset.ll @@ -1,5 +1,5 @@ -; RUN: llc < %s -asm-verbose=false -verify-machineinstrs -disable-wasm-fallthrough-return-opt -wasm-keep-registers -wasm-disable-explicit-locals -mattr=+simd128 | FileCheck %s --check-prefixes CHECK,SIMD128 -; RUN: llc < %s -asm-verbose=false -verify-machineinstrs -disable-wasm-fallthrough-return-opt -wasm-keep-registers -wasm-disable-explicit-locals | FileCheck %s --check-prefixes CHECK,NO-SIMD128 +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc < %s -verify-machineinstrs -mattr=+simd128 | FileCheck %s ; Test SIMD loads and stores @@ -9,34 +9,37 @@ target triple = "wasm32-unknown-unknown" ; ============================================================================== ; 16 x i8 ; ============================================================================== -; CHECK-LABEL: load_v16i8: -; NO-SIMD128-NOT: v128 -; SIMD128-NEXT: .functype load_v16i8 (i32) -> (v128){{$}} -; SIMD128-NEXT: v128.load $push[[R:[0-9]+]]=, 0($0){{$}} -; SIMD128-NEXT: return $pop[[R]]{{$}} define <16 x i8> @load_v16i8(<16 x i8>* %p) { +; CHECK-LABEL: load_v16i8: +; CHECK: .functype load_v16i8 (i32) -> (v128) +; CHECK-NEXT: # %bb.0: +; CHECK-NEXT: local.get 0 +; CHECK-NEXT: v128.load 0 +; CHECK-NEXT: # fallthrough-return %v = load <16 x i8>, <16 x i8>* %p ret <16 x i8> %v } -; CHECK-LABEL: load_splat_v16i8: -; NO-SIMD128-NOT: v128 -; SIMD128-NEXT: .functype load_splat_v16i8 (i32) -> (v128){{$}} -; SIMD128-NEXT: v8x16.load_splat $push[[R:[0-9]+]]=, 0($0){{$}} -; SIMD128-NEXT: return $pop[[R]]{{$}} define <16 x i8> @load_splat_v16i8(i8* %p) { +; CHECK-LABEL: load_splat_v16i8: +; CHECK: .functype load_splat_v16i8 (i32) -> (v128) +; CHECK-NEXT: # %bb.0: +; CHECK-NEXT: local.get 0 +; CHECK-NEXT: v8x16.load_splat 0 +; CHECK-NEXT: # fallthrough-return %e = load i8, i8* %p %v1 = insertelement <16 x i8> undef, i8 %e, i32 0 %v2 = shufflevector <16 x i8> %v1, <16 x i8> undef, <16 x i32> zeroinitializer ret <16 x i8> %v2 } -; CHECK-LABEL: load_v16i8_with_folded_offset: -; NO-SIMD128-NOT: v128 -; SIMD128-NEXT: .functype load_v16i8_with_folded_offset (i32) -> (v128){{$}} -; SIMD128-NEXT: v128.load $push[[R:[0-9]+]]=, 16($0){{$}} -; SIMD128-NEXT: return $pop[[R]]{{$}} define <16 x i8> @load_v16i8_with_folded_offset(<16 x i8>* %p) { +; CHECK-LABEL: load_v16i8_with_folded_offset: +; CHECK: .functype load_v16i8_with_folded_offset (i32) -> (v128) +; CHECK-NEXT: # %bb.0: +; CHECK-NEXT: local.get 0 +; CHECK-NEXT: v128.load 16 +; CHECK-NEXT: # fallthrough-return %q = ptrtoint <16 x i8>* %p to i32 %r = add nuw i32 %q, 16 %s = inttoptr i32 %r to <16 x i8>* @@ -44,12 +47,13 @@ define <16 x i8> @load_v16i8_with_folded_offset(<16 x i8>* %p) { ret <16 x i8> %v } -; CHECK-LABEL: load_splat_v16i8_with_folded_offset: -; NO-SIMD128-NOT: v128 -; SIMD128-NEXT: .functype load_splat_v16i8_with_folded_offset (i32) -> (v128){{$}} -; SIMD128-NEXT: v8x16.load_splat $push[[R:[0-9]+]]=, 16($0){{$}} -; SIMD128-NEXT: return $pop[[R]]{{$}} define <16 x i8> @load_splat_v16i8_with_folded_offset(i8* %p) { +; CHECK-LABEL: load_splat_v16i8_with_folded_offset: +; CHECK: .functype load_splat_v16i8_with_folded_offset (i32) -> (v128) +; CHECK-NEXT: # %bb.0: +; CHECK-NEXT: local.get 0 +; CHECK-NEXT: v8x16.load_splat 16 +; CHECK-NEXT: # fallthrough-return %q = ptrtoint i8* %p to i32 %r = add nuw i32 %q, 16 %s = inttoptr i32 %r to i8* @@ -59,23 +63,25 @@ define <16 x i8> @load_splat_v16i8_with_folded_offset(i8* %p) { ret <16 x i8> %v2 } -; CHECK-LABEL: load_v16i8_with_folded_gep_offset: -; NO-SIMD128-NOT: v128 -; SIMD128-NEXT: .functype load_v16i8_with_folded_gep_offset (i32) -> (v128){{$}} -; SIMD128-NEXT: v128.load $push[[R:[0-9]+]]=, 16($0){{$}} -; SIMD128-NEXT: return $pop[[R]]{{$}} define <16 x i8> @load_v16i8_with_folded_gep_offset(<16 x i8>* %p) { +; CHECK-LABEL: load_v16i8_with_folded_gep_offset: +; CHECK: .functype load_v16i8_with_folded_gep_offset (i32) -> (v128) +; CHECK-NEXT: # %bb.0: +; CHECK-NEXT: local.get 0 +; CHECK-NEXT: v128.load 16 +; CHECK-NEXT: # fallthrough-return %s = getelementptr inbounds <16 x i8>, <16 x i8>* %p, i32 1 %v = load <16 x i8>, <16 x i8>* %s ret <16 x i8> %v } -; CHECK-LABEL: load_splat_v16i8_with_folded_gep_offset: -; NO-SIMD128-NOT: v128 -; SIMD128-NEXT: .functype load_splat_v16i8_with_folded_gep_offset (i32) -> (v128){{$}} -; SIMD128-NEXT: v8x16.load_splat $push[[R:[0-9]+]]=, 1($0){{$}} -; SIMD128-NEXT: return $pop[[R]]{{$}} define <16 x i8> @load_splat_v16i8_with_folded_gep_offset(i8* %p) { +; CHECK-LABEL: load_splat_v16i8_with_folded_gep_offset: +; CHECK: .functype load_splat_v16i8_with_folded_gep_offset (i32) -> (v128) +; CHECK-NEXT: # %bb.0: +; CHECK-NEXT: local.get 0 +; CHECK-NEXT: v8x16.load_splat 1 +; CHECK-NEXT: # fallthrough-return %s = getelementptr inbounds i8, i8* %p, i32 1 %e = load i8, i8* %s %v1 = insertelement <16 x i8> undef, i8 %e, i32 0 @@ -83,27 +89,29 @@ define <16 x i8> @load_splat_v16i8_with_folded_gep_offset(i8* %p) { ret <16 x i8> %v2 } -; CHECK-LABEL: load_v16i8_with_unfolded_gep_negative_offset: -; NO-SIMD128-NOT: v128 -; SIMD128-NEXT: .functype load_v16i8_with_unfolded_gep_negative_offset (i32) -> (v128){{$}} -; SIMD128-NEXT: i32.const $push[[L0:[0-9]+]]=, -16{{$}} -; SIMD128-NEXT: i32.add $push[[L1:[0-9]+]]=, $0, $pop[[L0]]{{$}} -; SIMD128-NEXT: v128.load $push[[R:[0-9]+]]=, 0($pop[[L1]]){{$}} -; SIMD128-NEXT: return $pop[[R]]{{$}} define <16 x i8> @load_v16i8_with_unfolded_gep_negative_offset(<16 x i8>* %p) { +; CHECK-LABEL: load_v16i8_with_unfolded_gep_negative_offset: +; CHECK: .functype load_v16i8_with_unfolded_gep_negative_offset (i32) -> (v128) +; CHECK-NEXT: # %bb.0: +; CHECK-NEXT: local.get 0 +; CHECK-NEXT: i32.const -16 +; CHECK-NEXT: i32.add +; CHECK-NEXT: v128.load 0 +; CHECK-NEXT: # fallthrough-return %s = getelementptr inbounds <16 x i8>, <16 x i8>* %p, i32 -1 %v = load <16 x i8>, <16 x i8>* %s ret <16 x i8> %v } -; CHECK-LABEL: load_splat_v16i8_with_unfolded_gep_negative_offset: -; NO-SIMD128-NOT: v128 -; SIMD128-NEXT: .functype load_splat_v16i8_with_unfolded_gep_negative_offset (i32) -> (v128){{$}} -; SIMD128-NEXT: i32.const $push[[L0:[0-9]+]]=, -1{{$}} -; SIMD128-NEXT: i32.add $push[[L1:[0-9]+]]=, $0, $pop[[L0]]{{$}} -; SIMD128-NEXT: v8x16.load_splat $push[[R:[0-9]+]]=, 0($pop[[L1]]){{$}} -; SIMD128-NEXT: return $pop[[R]]{{$}} define <16 x i8> @load_splat_v16i8_with_unfolded_gep_negative_offset(i8* %p) { +; CHECK-LABEL: load_splat_v16i8_with_unfolded_gep_negative_offset: +; CHECK: .functype load_splat_v16i8_with_unfolded_gep_negative_offset (i32) -> (v128) +; CHECK-NEXT: # %bb.0: +; CHECK-NEXT: local.get 0 +; CHECK-NEXT: i32.const -1 +; CHECK-NEXT: i32.add +; CHECK-NEXT: v8x16.load_splat 0 +; CHECK-NEXT: # fallthrough-return %s = getelementptr inbounds i8, i8* %p, i32 -1 %e = load i8, i8* %s %v1 = insertelement <16 x i8> undef, i8 %e, i32 0 @@ -111,14 +119,15 @@ define <16 x i8> @load_splat_v16i8_with_unfolded_gep_negative_offset(i8* %p) { ret <16 x i8> %v2 } -; CHECK-LABEL: load_v16i8_with_unfolded_offset: -; NO-SIMD128-NOT: v128 -; SIMD128-NEXT: .functype load_v16i8_with_unfolded_offset (i32) -> (v128){{$}} -; SIMD128-NEXT: i32.const $push[[L0:[0-9]+]]=, 16{{$}} -; SIMD128-NEXT: i32.add $push[[L1:[0-9]+]]=, $0, $pop[[L0]]{{$}} -; SIMD128-NEXT: v128.load $push[[R:[0-9]+]]=, 0($pop[[L1]]){{$}} -; SIMD128-NEXT: return $pop[[R]]{{$}} define <16 x i8> @load_v16i8_with_unfolded_offset(<16 x i8>* %p) { +; CHECK-LABEL: load_v16i8_with_unfolded_offset: +; CHECK: .functype load_v16i8_with_unfolded_offset (i32) -> (v128) +; CHECK-NEXT: # %bb.0: +; CHECK-NEXT: local.get 0 +; CHECK-NEXT: i32.const 16 +; CHECK-NEXT: i32.add +; CHECK-NEXT: v128.load 0 +; CHECK-NEXT: # fallthrough-return %q = ptrtoint <16 x i8>* %p to i32 %r = add nsw i32 %q, 16 %s = inttoptr i32 %r to <16 x i8>* @@ -126,14 +135,15 @@ define <16 x i8> @load_v16i8_with_unfolded_offset(<16 x i8>* %p) { ret <16 x i8> %v } -; CHECK-LABEL: load_splat_v16i8_with_unfolded_offset: -; NO-SIMD128-NOT: v128 -; SIMD128-NEXT: .functype load_splat_v16i8_with_unfolded_offset (i32) -> (v128){{$}} -; SIMD128-NEXT: i32.const $push[[L0:[0-9]+]]=, 16{{$}} -; SIMD128-NEXT: i32.add $push[[L1:[0-9]+]]=, $0, $pop[[L0]]{{$}} -; SIMD128-NEXT: v8x16.load_splat $push[[R:[0-9]+]]=, 0($pop[[L1]]){{$}} -; SIMD128-NEXT: return $pop[[R]]{{$}} define <16 x i8> @load_splat_v16i8_with_unfolded_offset(i8* %p) { +; CHECK-LABEL: load_splat_v16i8_with_unfolded_offset: +; CHECK: .functype load_splat_v16i8_with_unfolded_offset (i32) -> (v128) +; CHECK-NEXT: # %bb.0: +; CHECK-NEXT: local.get 0 +; CHECK-NEXT: i32.const 16 +; CHECK-NEXT: i32.add +; CHECK-NEXT: v8x16.load_splat 0 +; CHECK-NEXT: # fallthrough-return %q = ptrtoint i8* %p to i32 %r = add nsw i32 %q, 16 %s = inttoptr i32 %r to i8* @@ -143,27 +153,29 @@ define <16 x i8> @load_splat_v16i8_with_unfolded_offset(i8* %p) { ret <16 x i8> %v2 } -; CHECK-LABEL: load_v16i8_with_unfolded_gep_offset: -; NO-SIMD128-NOT: v128 -; SIMD128-NEXT: .functype load_v16i8_with_unfolded_gep_offset (i32) -> (v128){{$}} -; SIMD128-NEXT: i32.const $push[[L0:[0-9]+]]=, 16{{$}} -; SIMD128-NEXT: i32.add $push[[L1:[0-9]+]]=, $0, $pop[[L0]]{{$}} -; SIMD128-NEXT: v128.load $push[[R:[0-9]+]]=, 0($pop[[L1]]){{$}} -; SIMD128-NEXT: return $pop[[R]]{{$}} define <16 x i8> @load_v16i8_with_unfolded_gep_offset(<16 x i8>* %p) { +; CHECK-LABEL: load_v16i8_with_unfolded_gep_offset: +; CHECK: .functype load_v16i8_with_unfolded_gep_offset (i32) -> (v128) +; CHECK-NEXT: # %bb.0: +; CHECK-NEXT: local.get 0 +; CHECK-NEXT: i32.const 16 +; CHECK-NEXT: i32.add +; CHECK-NEXT: v128.load 0 +; CHECK-NEXT: # fallthrough-return %s = getelementptr <16 x i8>, <16 x i8>* %p, i32 1 %v = load <16 x i8>, <16 x i8>* %s ret <16 x i8> %v } -; CHECK-LABEL: load_splat_v16i8_with_unfolded_gep_offset: -; NO-SIMD128-NOT: v128 -; SIMD128-NEXT: .functype load_splat_v16i8_with_unfolded_gep_offset (i32) -> (v128){{$}} -; SIMD128-NEXT: i32.const $push[[L0:[0-9]+]]=, 1{{$}} -; SIMD128-NEXT: i32.add $push[[L1:[0-9]+]]=, $0, $pop[[L0]]{{$}} -; SIMD128-NEXT: v8x16.load_splat $push[[R:[0-9]+]]=, 0($pop[[L1]]){{$}} -; SIMD128-NEXT: return $pop[[R]]{{$}} define <16 x i8> @load_splat_v16i8_with_unfolded_gep_offset(i8* %p) { +; CHECK-LABEL: load_splat_v16i8_with_unfolded_gep_offset: +; CHECK: .functype load_splat_v16i8_with_unfolded_gep_offset (i32) -> (v128) +; CHECK-NEXT: # %bb.0: +; CHECK-NEXT: local.get 0 +; CHECK-NEXT: i32.const 1 +; CHECK-NEXT: i32.add +; CHECK-NEXT: v8x16.load_splat 0 +; CHECK-NEXT: # fallthrough-return %s = getelementptr i8, i8* %p, i32 1 %e = load i8, i8* %s %v1 = insertelement <16 x i8> undef, i8 %e, i32 0 @@ -171,25 +183,25 @@ define <16 x i8> @load_splat_v16i8_with_unfolded_gep_offset(i8* %p) { ret <16 x i8> %v2 } -; CHECK-LABEL: load_v16i8_from_numeric_address: -; NO-SIMD128-NOT: v128 -; SIMD128-NEXT: .functype load_v16i8_from_numeric_address () -> (v128){{$}} -; SIMD128-NEXT: i32.const $push[[L0:[0-9]+]]=, 0{{$}} -; SIMD128-NEXT: v128.load $push[[R:[0-9]+]]=, 32($pop[[L0]]){{$}} -; SIMD128-NEXT: return $pop[[R]]{{$}} define <16 x i8> @load_v16i8_from_numeric_address() { +; CHECK-LABEL: load_v16i8_from_numeric_address: +; CHECK: .functype load_v16i8_from_numeric_address () -> (v128) +; CHECK-NEXT: # %bb.0: +; CHECK-NEXT: i32.const 0 +; CHECK-NEXT: v128.load 32 +; CHECK-NEXT: # fallthrough-return %s = inttoptr i32 32 to <16 x i8>* %v = load <16 x i8>, <16 x i8>* %s ret <16 x i8> %v } -; CHECK-LABEL: load_splat_v16i8_from_numeric_address: -; NO-SIMD128-NOT: v128 -; SIMD128-NEXT: .functype load_splat_v16i8_from_numeric_address () -> (v128){{$}} -; SIMD128-NEXT: i32.const $push[[L0:[0-9]+]]=, 0{{$}} -; SIMD128-NEXT: v8x16.load_splat $push[[R:[0-9]+]]=, 32($pop[[L0]]){{$}} -; SIMD128-NEXT: return $pop[[R]]{{$}} define <16 x i8> @load_splat_v16i8_from_numeric_address() { +; CHECK-LABEL: load_splat_v16i8_from_numeric_address: +; CHECK: .functype load_splat_v16i8_from_numeric_address () -> (v128) +; CHECK-NEXT: # %bb.0: +; CHECK-NEXT: i32.const 0 +; CHECK-NEXT: v8x16.load_splat 32 +; CHECK-NEXT: # fallthrough-return %s = inttoptr i32 32 to i8* %e = load i8, i8* %s %v1 = insertelement <16 x i8> undef, i8 %e, i32 0 @@ -197,46 +209,52 @@ define <16 x i8> @load_splat_v16i8_from_numeric_address() { ret <16 x i8> %v2 } -; CHECK-LABEL: load_v16i8_from_global_address: -; NO-SIMD128-NOT: v128 -; SIMD128-NEXT: .functype load_v16i8_from_global_address () -> (v128){{$}} -; SIMD128-NEXT: i32.const $push[[L0:[0-9]+]]=, 0{{$}} -; SIMD128-NEXT: v128.load $push[[R:[0-9]+]]=, gv_v16i8($pop[[L0]]){{$}} -; SIMD128-NEXT: return $pop[[R]]{{$}} @gv_v16i8 = global <16 x i8> define <16 x i8> @load_v16i8_from_global_address() { +; CHECK-LABEL: load_v16i8_from_global_address: +; CHECK: .functype load_v16i8_from_global_address () -> (v128) +; CHECK-NEXT: # %bb.0: +; CHECK-NEXT: i32.const 0 +; CHECK-NEXT: v128.load gv_v16i8 +; CHECK-NEXT: # fallthrough-return %v = load <16 x i8>, <16 x i8>* @gv_v16i8 ret <16 x i8> %v } -; CHECK-LABEL: load_splat_v16i8_from_global_address: -; NO-SIMD128-NOT: v128 -; SIMD128-NEXT: .functype load_splat_v16i8_from_global_address () -> (v128){{$}} -; SIMD128-NEXT: i32.const $push[[L0:[0-9]+]]=, 0{{$}} -; SIMD128-NEXT: v8x16.load_splat $push[[R:[0-9]+]]=, gv_i8($pop[[L0]]){{$}} -; SIMD128-NEXT: return $pop[[R]]{{$}} @gv_i8 = global i8 42 define <16 x i8> @load_splat_v16i8_from_global_address() { +; CHECK-LABEL: load_splat_v16i8_from_global_address: +; CHECK: .functype load_splat_v16i8_from_global_address () -> (v128) +; CHECK-NEXT: # %bb.0: +; CHECK-NEXT: i32.const 0 +; CHECK-NEXT: v8x16.load_splat gv_i8 +; CHECK-NEXT: # fallthrough-return %e = load i8, i8* @gv_i8 %v1 = insertelement <16 x i8> undef, i8 %e, i32 0 %v2 = shufflevector <16 x i8> %v1, <16 x i8> undef, <16 x i32> zeroinitializer ret <16 x i8> %v2 } -; CHECK-LABEL: store_v16i8: -; NO-SIMD128-NOT: v128 -; SIMD128-NEXT: .functype store_v16i8 (v128, i32) -> (){{$}} -; SIMD128-NEXT: v128.store 0($1), $0{{$}} define void @store_v16i8(<16 x i8> %v, <16 x i8>* %p) { +; CHECK-LABEL: store_v16i8: +; CHECK: .functype store_v16i8 (v128, i32) -> () +; CHECK-NEXT: # %bb.0: +; CHECK-NEXT: local.get 1 +; CHECK-NEXT: local.get 0 +; CHECK-NEXT: v128.store 0 +; CHECK-NEXT: # fallthrough-return store <16 x i8> %v , <16 x i8>* %p ret void } -; CHECK-LABEL: store_v16i8_with_folded_offset: -; NO-SIMD128-NOT: v128 -; SIMD128-NEXT: .functype store_v16i8_with_folded_offset (v128, i32) -> (){{$}} -; SIMD128-NEXT: v128.store 16($1), $0{{$}} define void @store_v16i8_with_folded_offset(<16 x i8> %v, <16 x i8>* %p) { +; CHECK-LABEL: store_v16i8_with_folded_offset: +; CHECK: .functype store_v16i8_with_folded_offset (v128, i32) -> () +; CHECK-NEXT: # %bb.0: +; CHECK-NEXT: local.get 1 +; CHECK-NEXT: local.get 0 +; CHECK-NEXT: v128.store 16 +; CHECK-NEXT: # fallthrough-return %q = ptrtoint <16 x i8>* %p to i32 %r = add nuw i32 %q, 16 %s = inttoptr i32 %r to <16 x i8>* @@ -244,69 +262,85 @@ define void @store_v16i8_with_folded_offset(<16 x i8> %v, <16 x i8>* %p) { ret void } -; CHECK-LABEL: store_v16i8_with_folded_gep_offset: -; NO-SIMD128-NOT: v128 -; SIMD128-NEXT: .functype store_v16i8_with_folded_gep_offset (v128, i32) -> (){{$}} -; SIMD128-NEXT: v128.store 16($1), $0{{$}} define void @store_v16i8_with_folded_gep_offset(<16 x i8> %v, <16 x i8>* %p) { +; CHECK-LABEL: store_v16i8_with_folded_gep_offset: +; CHECK: .functype store_v16i8_with_folded_gep_offset (v128, i32) -> () +; CHECK-NEXT: # %bb.0: +; CHECK-NEXT: local.get 1 +; CHECK-NEXT: local.get 0 +; CHECK-NEXT: v128.store 16 +; CHECK-NEXT: # fallthrough-return %s = getelementptr inbounds <16 x i8>, <16 x i8>* %p, i32 1 store <16 x i8> %v , <16 x i8>* %s ret void } -; CHECK-LABEL: store_v16i8_with_unfolded_gep_negative_offset: -; NO-SIMD128-NOT: v128 -; SIMD128-NEXT: .functype store_v16i8_with_unfolded_gep_negative_offset (v128, i32) -> (){{$}} -; SIMD128-NEXT: i32.const $push[[L0:[0-9]+]]=, -16{{$}} -; SIMD128-NEXT: i32.add $push[[R:[0-9]+]]=, $1, $pop[[L0]]{{$}} -; SIMD128-NEXT: v128.store 0($pop[[R]]), $0{{$}} define void @store_v16i8_with_unfolded_gep_negative_offset(<16 x i8> %v, <16 x i8>* %p) { +; CHECK-LABEL: store_v16i8_with_unfolded_gep_negative_offset: +; CHECK: .functype store_v16i8_with_unfolded_gep_negative_offset (v128, i32) -> () +; CHECK-NEXT: # %bb.0: +; CHECK-NEXT: local.get 1 +; CHECK-NEXT: i32.const -16 +; CHECK-NEXT: i32.add +; CHECK-NEXT: local.get 0 +; CHECK-NEXT: v128.store 0 +; CHECK-NEXT: # fallthrough-return %s = getelementptr inbounds <16 x i8>, <16 x i8>* %p, i32 -1 store <16 x i8> %v , <16 x i8>* %s ret void } -; CHECK-LABEL: store_v16i8_with_unfolded_offset: -; NO-SIMD128-NOT: v128 -; SIMD128-NEXT: .functype store_v16i8_with_unfolded_offset (v128, i32) -> (){{$}} -; SIMD128-NEXT: i32.const $push[[L0:[0-9]+]]=, -16{{$}} -; SIMD128-NEXT: i32.add $push[[R:[0-9]+]]=, $1, $pop[[L0]]{{$}} -; SIMD128-NEXT: v128.store 0($pop[[R]]), $0{{$}} define void @store_v16i8_with_unfolded_offset(<16 x i8> %v, <16 x i8>* %p) { +; CHECK-LABEL: store_v16i8_with_unfolded_offset: +; CHECK: .functype store_v16i8_with_unfolded_offset (v128, i32) -> () +; CHECK-NEXT: # %bb.0: +; CHECK-NEXT: local.get 1 +; CHECK-NEXT: i32.const -16 +; CHECK-NEXT: i32.add +; CHECK-NEXT: local.get 0 +; CHECK-NEXT: v128.store 0 +; CHECK-NEXT: # fallthrough-return %s = getelementptr inbounds <16 x i8>, <16 x i8>* %p, i32 -1 store <16 x i8> %v , <16 x i8>* %s ret void } -; CHECK-LABEL: store_v16i8_with_unfolded_gep_offset: -; NO-SIMD128-NOT: v128 -; SIMD128-NEXT: .functype store_v16i8_with_unfolded_gep_offset (v128, i32) -> (){{$}} -; SIMD128-NEXT: i32.const $push[[L0:[0-9]+]]=, 16{{$}} -; SIMD128-NEXT: i32.add $push[[R:[0-9]+]]=, $1, $pop[[L0]]{{$}} -; SIMD128-NEXT: v128.store 0($pop[[R]]), $0{{$}} define void @store_v16i8_with_unfolded_gep_offset(<16 x i8> %v, <16 x i8>* %p) { +; CHECK-LABEL: store_v16i8_with_unfolded_gep_offset: +; CHECK: .functype store_v16i8_with_unfolded_gep_offset (v128, i32) -> () +; CHECK-NEXT: # %bb.0: +; CHECK-NEXT: local.get 1 +; CHECK-NEXT: i32.const 16 +; CHECK-NEXT: i32.add +; CHECK-NEXT: local.get 0 +; CHECK-NEXT: v128.store 0 +; CHECK-NEXT: # fallthrough-return %s = getelementptr <16 x i8>, <16 x i8>* %p, i32 1 store <16 x i8> %v , <16 x i8>* %s ret void } -; CHECK-LABEL: store_v16i8_to_numeric_address: -; NO-SIMD128-NOT: v128 -; SIMD128-NEXT: .functype store_v16i8_to_numeric_address (v128) -> (){{$}} -; SIMD128-NEXT: i32.const $push[[R:[0-9]+]]=, 0{{$}} -; SIMD128-NEXT: v128.store 32($pop[[R]]), $0{{$}} define void @store_v16i8_to_numeric_address(<16 x i8> %v) { +; CHECK-LABEL: store_v16i8_to_numeric_address: +; CHECK: .functype store_v16i8_to_numeric_address (v128) -> () +; CHECK-NEXT: # %bb.0: +; CHECK-NEXT: i32.const 0 +; CHECK-NEXT: local.get 0 +; CHECK-NEXT: v128.store 32 +; CHECK-NEXT: # fallthrough-return %s = inttoptr i32 32 to <16 x i8>* store <16 x i8> %v , <16 x i8>* %s ret void } -; CHECK-LABEL: store_v16i8_to_global_address: -; NO-SIMD128-NOT: v128 -; SIMD128-NEXT: .functype store_v16i8_to_global_address (v128) -> (){{$}} -; SIMD128-NEXT: i32.const $push[[R:[0-9]+]]=, 0{{$}} -; SIMD128-NEXT: v128.store gv_v16i8($pop[[R]]), $0{{$}} define void @store_v16i8_to_global_address(<16 x i8> %v) { +; CHECK-LABEL: store_v16i8_to_global_address: +; CHECK: .functype store_v16i8_to_global_address (v128) -> () +; CHECK-NEXT: # %bb.0: +; CHECK-NEXT: i32.const 0 +; CHECK-NEXT: local.get 0 +; CHECK-NEXT: v128.store gv_v16i8 +; CHECK-NEXT: # fallthrough-return store <16 x i8> %v , <16 x i8>* @gv_v16i8 ret void } @@ -314,66 +348,72 @@ define void @store_v16i8_to_global_address(<16 x i8> %v) { ; ============================================================================== ; 8 x i16 ; ============================================================================== -; CHECK-LABEL: load_v8i16: -; NO-SIMD128-NOT: v128 -; SIMD128-NEXT: .functype load_v8i16 (i32) -> (v128){{$}} -; SIMD128-NEXT: v128.load $push[[R:[0-9]+]]=, 0($0){{$}} -; SIMD128-NEXT: return $pop[[R]]{{$}} define <8 x i16> @load_v8i16(<8 x i16>* %p) { +; CHECK-LABEL: load_v8i16: +; CHECK: .functype load_v8i16 (i32) -> (v128) +; CHECK-NEXT: # %bb.0: +; CHECK-NEXT: local.get 0 +; CHECK-NEXT: v128.load 0 +; CHECK-NEXT: # fallthrough-return %v = load <8 x i16>, <8 x i16>* %p ret <8 x i16> %v } -; CHECK-LABEL: load_splat_v8i16: -; NO-SIMD128-NOT: v128 -; SIMD128-NEXT: .functype load_splat_v8i16 (i32) -> (v128){{$}} -; SIMD128-NEXT: v16x8.load_splat $push[[R:[0-9]+]]=, 0($0){{$}} -; SIMD128-NEXT: return $pop[[R]]{{$}} define <8 x i16> @load_splat_v8i16(i16* %p) { +; CHECK-LABEL: load_splat_v8i16: +; CHECK: .functype load_splat_v8i16 (i32) -> (v128) +; CHECK-NEXT: # %bb.0: +; CHECK-NEXT: local.get 0 +; CHECK-NEXT: v16x8.load_splat 0 +; CHECK-NEXT: # fallthrough-return %e = load i16, i16* %p %v1 = insertelement <8 x i16> undef, i16 %e, i32 0 %v2 = shufflevector <8 x i16> %v1, <8 x i16> undef, <8 x i32> zeroinitializer ret <8 x i16> %v2 } -; CHECK-LABEL: load_sext_v8i16: -; NO-SIMD128-NOT: v128 -; SIMD128-NEXT: .functype load_sext_v8i16 (i32) -> (v128){{$}} -; SIMD128-NEXT: i16x8.load8x8_s $push[[R:[0-9]+]]=, 0($0){{$}} -; SIMD128-NEXT: return $pop[[R]]{{$}} define <8 x i16> @load_sext_v8i16(<8 x i8>* %p) { +; CHECK-LABEL: load_sext_v8i16: +; CHECK: .functype load_sext_v8i16 (i32) -> (v128) +; CHECK-NEXT: # %bb.0: +; CHECK-NEXT: local.get 0 +; CHECK-NEXT: i16x8.load8x8_s 0 +; CHECK-NEXT: # fallthrough-return %v = load <8 x i8>, <8 x i8>* %p %v2 = sext <8 x i8> %v to <8 x i16> ret <8 x i16> %v2 } -; CHECK-LABEL: load_zext_v8i16: -; NO-SIMD128-NOT: v128 -; SIMD128-NEXT: .functype load_zext_v8i16 (i32) -> (v128){{$}} -; SIMD128-NEXT: i16x8.load8x8_u $push[[R:[0-9]+]]=, 0($0){{$}} -; SIMD128-NEXT: return $pop[[R]]{{$}} define <8 x i16> @load_zext_v8i16(<8 x i8>* %p) { +; CHECK-LABEL: load_zext_v8i16: +; CHECK: .functype load_zext_v8i16 (i32) -> (v128) +; CHECK-NEXT: # %bb.0: +; CHECK-NEXT: local.get 0 +; CHECK-NEXT: i16x8.load8x8_u 0 +; CHECK-NEXT: # fallthrough-return %v = load <8 x i8>, <8 x i8>* %p %v2 = zext <8 x i8> %v to <8 x i16> ret <8 x i16> %v2 } -; CHECK-LABEL: load_ext_v8i16: -; NO-SIMD128-NOT: load8x8 -; SIMD128-NEXT: .functype load_ext_v8i16 (i32) -> (v128){{$}} -; SIMD128-NEXT: i16x8.load8x8_u $push[[R:[0-9]+]]=, 0($0){{$}} -; SIMD128-NEXT: return $pop[[R]]{{$}} define <8 x i8> @load_ext_v8i16(<8 x i8>* %p) { +; CHECK-LABEL: load_ext_v8i16: +; CHECK: .functype load_ext_v8i16 (i32) -> (v128) +; CHECK-NEXT: # %bb.0: +; CHECK-NEXT: local.get 0 +; CHECK-NEXT: i16x8.load8x8_u 0 +; CHECK-NEXT: # fallthrough-return %v = load <8 x i8>, <8 x i8>* %p ret <8 x i8> %v } -; CHECK-LABEL: load_v8i16_with_folded_offset: -; NO-SIMD128-NOT: v128 -; SIMD128-NEXT: .functype load_v8i16_with_folded_offset (i32) -> (v128){{$}} -; SIMD128-NEXT: v128.load $push[[R:[0-9]+]]=, 16($0){{$}} -; SIMD128-NEXT: return $pop[[R]]{{$}} define <8 x i16> @load_v8i16_with_folded_offset(<8 x i16>* %p) { +; CHECK-LABEL: load_v8i16_with_folded_offset: +; CHECK: .functype load_v8i16_with_folded_offset (i32) -> (v128) +; CHECK-NEXT: # %bb.0: +; CHECK-NEXT: local.get 0 +; CHECK-NEXT: v128.load 16 +; CHECK-NEXT: # fallthrough-return %q = ptrtoint <8 x i16>* %p to i32 %r = add nuw i32 %q, 16 %s = inttoptr i32 %r to <8 x i16>* @@ -381,12 +421,13 @@ define <8 x i16> @load_v8i16_with_folded_offset(<8 x i16>* %p) { ret <8 x i16> %v } -; CHECK-LABEL: load_splat_v8i16_with_folded_offset: -; NO-SIMD128-NOT: v128 -; SIMD128-NEXT: .functype load_splat_v8i16_with_folded_offset (i32) -> (v128){{$}} -; SIMD128-NEXT: v16x8.load_splat $push[[R:[0-9]+]]=, 16($0){{$}} -; SIMD128-NEXT: return $pop[[R]]{{$}} define <8 x i16> @load_splat_v8i16_with_folded_offset(i16* %p) { +; CHECK-LABEL: load_splat_v8i16_with_folded_offset: +; CHECK: .functype load_splat_v8i16_with_folded_offset (i32) -> (v128) +; CHECK-NEXT: # %bb.0: +; CHECK-NEXT: local.get 0 +; CHECK-NEXT: v16x8.load_splat 16 +; CHECK-NEXT: # fallthrough-return %q = ptrtoint i16* %p to i32 %r = add nuw i32 %q, 16 %s = inttoptr i32 %r to i16* @@ -396,12 +437,13 @@ define <8 x i16> @load_splat_v8i16_with_folded_offset(i16* %p) { ret <8 x i16> %v2 } -; CHECK-LABEL: load_sext_v8i16_with_folded_offset: -; NO-SIMD128-NOT: v128 -; SIMD128-NEXT: .functype load_sext_v8i16_with_folded_offset (i32) -> (v128){{$}} -; SIMD128-NEXT: i16x8.load8x8_s $push[[R:[0-9]+]]=, 16($0){{$}} -; SIMD128-NEXT: return $pop[[R]]{{$}} define <8 x i16> @load_sext_v8i16_with_folded_offset(<8 x i8>* %p) { +; CHECK-LABEL: load_sext_v8i16_with_folded_offset: +; CHECK: .functype load_sext_v8i16_with_folded_offset (i32) -> (v128) +; CHECK-NEXT: # %bb.0: +; CHECK-NEXT: local.get 0 +; CHECK-NEXT: i16x8.load8x8_s 16 +; CHECK-NEXT: # fallthrough-return %q = ptrtoint <8 x i8>* %p to i32 %r = add nuw i32 %q, 16 %s = inttoptr i32 %r to <8 x i8>* @@ -410,12 +452,13 @@ define <8 x i16> @load_sext_v8i16_with_folded_offset(<8 x i8>* %p) { ret <8 x i16> %v2 } -; CHECK-LABEL: load_zext_v8i16_with_folded_offset: -; NO-SIMD128-NOT: v128 -; SIMD128-NEXT: .functype load_zext_v8i16_with_folded_offset (i32) -> (v128){{$}} -; SIMD128-NEXT: i16x8.load8x8_u $push[[R:[0-9]+]]=, 16($0){{$}} -; SIMD128-NEXT: return $pop[[R]]{{$}} define <8 x i16> @load_zext_v8i16_with_folded_offset(<8 x i8>* %p) { +; CHECK-LABEL: load_zext_v8i16_with_folded_offset: +; CHECK: .functype load_zext_v8i16_with_folded_offset (i32) -> (v128) +; CHECK-NEXT: # %bb.0: +; CHECK-NEXT: local.get 0 +; CHECK-NEXT: i16x8.load8x8_u 16 +; CHECK-NEXT: # fallthrough-return %q = ptrtoint <8 x i8>* %p to i32 %r = add nuw i32 %q, 16 %s = inttoptr i32 %r to <8 x i8>* @@ -424,12 +467,13 @@ define <8 x i16> @load_zext_v8i16_with_folded_offset(<8 x i8>* %p) { ret <8 x i16> %v2 } -; CHECK-LABEL: load_ext_v8i16_with_folded_offset: -; NO-SIMD128-NOT: load8x8 -; SIMD128-NEXT: .functype load_ext_v8i16_with_folded_offset (i32) -> (v128){{$}} -; SIMD128-NEXT: i16x8.load8x8_u $push[[R:[0-9]+]]=, 16($0){{$}} -; SIMD128-NEXT: return $pop[[R]]{{$}} define <8 x i8> @load_ext_v8i16_with_folded_offset(<8 x i8>* %p) { +; CHECK-LABEL: load_ext_v8i16_with_folded_offset: +; CHECK: .functype load_ext_v8i16_with_folded_offset (i32) -> (v128) +; CHECK-NEXT: # %bb.0: +; CHECK-NEXT: local.get 0 +; CHECK-NEXT: i16x8.load8x8_u 16 +; CHECK-NEXT: # fallthrough-return %q = ptrtoint <8 x i8>* %p to i32 %r = add nuw i32 %q, 16 %s = inttoptr i32 %r to <8 x i8>* @@ -437,23 +481,25 @@ define <8 x i8> @load_ext_v8i16_with_folded_offset(<8 x i8>* %p) { ret <8 x i8> %v } -; CHECK-LABEL: load_v8i16_with_folded_gep_offset: -; NO-SIMD128-NOT: v128 -; SIMD128-NEXT: .functype load_v8i16_with_folded_gep_offset (i32) -> (v128){{$}} -; SIMD128-NEXT: v128.load $push[[R:[0-9]+]]=, 16($0){{$}} -; SIMD128-NEXT: return $pop[[R]]{{$}} define <8 x i16> @load_v8i16_with_folded_gep_offset(<8 x i16>* %p) { +; CHECK-LABEL: load_v8i16_with_folded_gep_offset: +; CHECK: .functype load_v8i16_with_folded_gep_offset (i32) -> (v128) +; CHECK-NEXT: # %bb.0: +; CHECK-NEXT: local.get 0 +; CHECK-NEXT: v128.load 16 +; CHECK-NEXT: # fallthrough-return %s = getelementptr inbounds <8 x i16>, <8 x i16>* %p, i32 1 %v = load <8 x i16>, <8 x i16>* %s ret <8 x i16> %v } -; CHECK-LABEL: load_splat_v8i16_with_folded_gep_offset: -; NO-SIMD128-NOT: v128 -; SIMD128-NEXT: .functype load_splat_v8i16_with_folded_gep_offset (i32) -> (v128){{$}} -; SIMD128-NEXT: v16x8.load_splat $push[[R:[0-9]+]]=, 2($0){{$}} -; SIMD128-NEXT: return $pop[[R]]{{$}} define <8 x i16> @load_splat_v8i16_with_folded_gep_offset(i16* %p) { +; CHECK-LABEL: load_splat_v8i16_with_folded_gep_offset: +; CHECK: .functype load_splat_v8i16_with_folded_gep_offset (i32) -> (v128) +; CHECK-NEXT: # %bb.0: +; CHECK-NEXT: local.get 0 +; CHECK-NEXT: v16x8.load_splat 2 +; CHECK-NEXT: # fallthrough-return %s = getelementptr inbounds i16, i16* %p, i32 1 %e = load i16, i16* %s %v1 = insertelement <8 x i16> undef, i16 %e, i32 0 @@ -461,62 +507,67 @@ define <8 x i16> @load_splat_v8i16_with_folded_gep_offset(i16* %p) { ret <8 x i16> %v2 } -; CHECK-LABEL: load_sext_v8i16_with_folded_gep_offset: -; NO-SIMD128-NOT: v128 -; SIMD128-NEXT: .functype load_sext_v8i16_with_folded_gep_offset (i32) -> (v128){{$}} -; SIMD128-NEXT: i16x8.load8x8_s $push[[R:[0-9]+]]=, 8($0){{$}} -; SIMD128-NEXT: return $pop[[R]]{{$}} define <8 x i16> @load_sext_v8i16_with_folded_gep_offset(<8 x i8>* %p) { +; CHECK-LABEL: load_sext_v8i16_with_folded_gep_offset: +; CHECK: .functype load_sext_v8i16_with_folded_gep_offset (i32) -> (v128) +; CHECK-NEXT: # %bb.0: +; CHECK-NEXT: local.get 0 +; CHECK-NEXT: i16x8.load8x8_s 8 +; CHECK-NEXT: # fallthrough-return %s = getelementptr inbounds <8 x i8>, <8 x i8>* %p, i32 1 %v = load <8 x i8>, <8 x i8>* %s %v2 = sext <8 x i8> %v to <8 x i16> ret <8 x i16> %v2 } -; CHECK-LABEL: load_zext_v8i16_with_folded_gep_offset: -; NO-SIMD128-NOT: v128 -; SIMD128-NEXT: .functype load_zext_v8i16_with_folded_gep_offset (i32) -> (v128){{$}} -; SIMD128-NEXT: i16x8.load8x8_u $push[[R:[0-9]+]]=, 8($0){{$}} -; SIMD128-NEXT: return $pop[[R]]{{$}} define <8 x i16> @load_zext_v8i16_with_folded_gep_offset(<8 x i8>* %p) { +; CHECK-LABEL: load_zext_v8i16_with_folded_gep_offset: +; CHECK: .functype load_zext_v8i16_with_folded_gep_offset (i32) -> (v128) +; CHECK-NEXT: # %bb.0: +; CHECK-NEXT: local.get 0 +; CHECK-NEXT: i16x8.load8x8_u 8 +; CHECK-NEXT: # fallthrough-return %s = getelementptr inbounds <8 x i8>, <8 x i8>* %p, i32 1 %v = load <8 x i8>, <8 x i8>* %s %v2 = zext <8 x i8> %v to <8 x i16> ret <8 x i16> %v2 } -; CHECK-LABEL: load_ext_v8i16_with_folded_gep_offset: -; NO-SIMD128-NOT: load8x8 -; SIMD128-NEXT: .functype load_ext_v8i16_with_folded_gep_offset (i32) -> (v128){{$}} -; SIMD128-NEXT: i16x8.load8x8_u $push[[R:[0-9]+]]=, 8($0){{$}} -; SIMD128-NEXT: return $pop[[R]]{{$}} define <8 x i8> @load_ext_v8i16_with_folded_gep_offset(<8 x i8>* %p) { +; CHECK-LABEL: load_ext_v8i16_with_folded_gep_offset: +; CHECK: .functype load_ext_v8i16_with_folded_gep_offset (i32) -> (v128) +; CHECK-NEXT: # %bb.0: +; CHECK-NEXT: local.get 0 +; CHECK-NEXT: i16x8.load8x8_u 8 +; CHECK-NEXT: # fallthrough-return %s = getelementptr inbounds <8 x i8>, <8 x i8>* %p, i32 1 %v = load <8 x i8>, <8 x i8>* %s ret <8 x i8> %v } -; CHECK-LABEL: load_v8i16_with_unfolded_gep_negative_offset: -; NO-SIMD128-NOT: v128 -; SIMD128-NEXT: .functype load_v8i16_with_unfolded_gep_negative_offset (i32) -> (v128){{$}} -; SIMD128-NEXT: i32.const $push[[L0:[0-9]+]]=, -16{{$}} -; SIMD128-NEXT: i32.add $push[[L1:[0-9]+]]=, $0, $pop[[L0]]{{$}} -; SIMD128-NEXT: v128.load $push[[R:[0-9]+]]=, 0($pop[[L1]]){{$}} -; SIMD128-NEXT: return $pop[[R]]{{$}} define <8 x i16> @load_v8i16_with_unfolded_gep_negative_offset(<8 x i16>* %p) { +; CHECK-LABEL: load_v8i16_with_unfolded_gep_negative_offset: +; CHECK: .functype load_v8i16_with_unfolded_gep_negative_offset (i32) -> (v128) +; CHECK-NEXT: # %bb.0: +; CHECK-NEXT: local.get 0 +; CHECK-NEXT: i32.const -16 +; CHECK-NEXT: i32.add +; CHECK-NEXT: v128.load 0 +; CHECK-NEXT: # fallthrough-return %s = getelementptr inbounds <8 x i16>, <8 x i16>* %p, i32 -1 %v = load <8 x i16>, <8 x i16>* %s ret <8 x i16> %v } -; CHECK-LABEL: load_splat_v8i16_with_unfolded_gep_negative_offset: -; NO-SIMD128-NOT: v128 -; SIMD128-NEXT: .functype load_splat_v8i16_with_unfolded_gep_negative_offset (i32) -> (v128){{$}} -; SIMD128-NEXT: i32.const $push[[L0:[0-9]+]]=, -2{{$}} -; SIMD128-NEXT: i32.add $push[[L1:[0-9]+]]=, $0, $pop[[L0]]{{$}} -; SIMD128-NEXT: v16x8.load_splat $push[[R:[0-9]+]]=, 0($pop[[L1]]){{$}} -; SIMD128-NEXT: return $pop[[R]]{{$}} define <8 x i16> @load_splat_v8i16_with_unfolded_gep_negative_offset(i16* %p) { +; CHECK-LABEL: load_splat_v8i16_with_unfolded_gep_negative_offset: +; CHECK: .functype load_splat_v8i16_with_unfolded_gep_negative_offset (i32) -> (v128) +; CHECK-NEXT: # %bb.0: +; CHECK-NEXT: local.get 0 +; CHECK-NEXT: i32.const -2 +; CHECK-NEXT: i32.add +; CHECK-NEXT: v16x8.load_splat 0 +; CHECK-NEXT: # fallthrough-return %s = getelementptr inbounds i16, i16* %p, i32 -1 %e = load i16, i16* %s %v1 = insertelement <8 x i16> undef, i16 %e, i32 0 @@ -524,55 +575,59 @@ define <8 x i16> @load_splat_v8i16_with_unfolded_gep_negative_offset(i16* %p) { ret <8 x i16> %v2 } -; CHECK-LABEL: load_sext_v8i16_with_unfolded_gep_negative_offset: -; NO-SIMD128-NOT: v128 -; SIMD128-NEXT: .functype load_sext_v8i16_with_unfolded_gep_negative_offset (i32) -> (v128){{$}} -; SIMD128-NEXT: i32.const $push[[L0:[0-9]+]]=, -8{{$}} -; SIMD128-NEXT: i32.add $push[[L1:[0-9]+]]=, $0, $pop[[L0]]{{$}} -; SIMD128-NEXT: i16x8.load8x8_s $push[[R:[0-9]+]]=, 0($pop[[L1]]){{$}} -; SIMD128-NEXT: return $pop[[R]]{{$}} define <8 x i16> @load_sext_v8i16_with_unfolded_gep_negative_offset(<8 x i8>* %p) { +; CHECK-LABEL: load_sext_v8i16_with_unfolded_gep_negative_offset: +; CHECK: .functype load_sext_v8i16_with_unfolded_gep_negative_offset (i32) -> (v128) +; CHECK-NEXT: # %bb.0: +; CHECK-NEXT: local.get 0 +; CHECK-NEXT: i32.const -8 +; CHECK-NEXT: i32.add +; CHECK-NEXT: i16x8.load8x8_s 0 +; CHECK-NEXT: # fallthrough-return %s = getelementptr inbounds <8 x i8>, <8 x i8>* %p, i32 -1 %v = load <8 x i8>, <8 x i8>* %s %v2 = sext <8 x i8> %v to <8 x i16> ret <8 x i16> %v2 } -; CHECK-LABEL: load_zext_v8i16_with_unfolded_gep_negative_offset: -; NO-SIMD128-NOT: v128 -; SIMD128-NEXT: .functype load_zext_v8i16_with_unfolded_gep_negative_offset (i32) -> (v128){{$}} -; SIMD128-NEXT: i32.const $push[[L0:[0-9]+]]=, -8{{$}} -; SIMD128-NEXT: i32.add $push[[L1:[0-9]+]]=, $0, $pop[[L0]]{{$}} -; SIMD128-NEXT: i16x8.load8x8_u $push[[R:[0-9]+]]=, 0($pop[[L1]]){{$}} -; SIMD128-NEXT: return $pop[[R]]{{$}} define <8 x i16> @load_zext_v8i16_with_unfolded_gep_negative_offset(<8 x i8>* %p) { +; CHECK-LABEL: load_zext_v8i16_with_unfolded_gep_negative_offset: +; CHECK: .functype load_zext_v8i16_with_unfolded_gep_negative_offset (i32) -> (v128) +; CHECK-NEXT: # %bb.0: +; CHECK-NEXT: local.get 0 +; CHECK-NEXT: i32.const -8 +; CHECK-NEXT: i32.add +; CHECK-NEXT: i16x8.load8x8_u 0 +; CHECK-NEXT: # fallthrough-return %s = getelementptr inbounds <8 x i8>, <8 x i8>* %p, i32 -1 %v = load <8 x i8>, <8 x i8>* %s %v2 = zext <8 x i8> %v to <8 x i16> ret <8 x i16> %v2 } -; CHECK-LABEL: load_ext_v8i16_with_unfolded_gep_negative_offset: -; NO-SIMD128-NOT: load8x8 -; SIMD128-NEXT: .functype load_ext_v8i16_with_unfolded_gep_negative_offset (i32) -> (v128){{$}} -; SIMD128-NEXT: i32.const $push[[L0:[0-9]+]]=, -8{{$}} -; SIMD128-NEXT: i32.add $push[[L1:[0-9]+]]=, $0, $pop[[L0]]{{$}} -; SIMD128-NEXT: i16x8.load8x8_u $push[[R:[0-9]+]]=, 0($pop[[L1]]){{$}} -; SIMD128-NEXT: return $pop[[R]]{{$}} define <8 x i8> @load_ext_v8i16_with_unfolded_gep_negative_offset(<8 x i8>* %p) { +; CHECK-LABEL: load_ext_v8i16_with_unfolded_gep_negative_offset: +; CHECK: .functype load_ext_v8i16_with_unfolded_gep_negative_offset (i32) -> (v128) +; CHECK-NEXT: # %bb.0: +; CHECK-NEXT: local.get 0 +; CHECK-NEXT: i32.const -8 +; CHECK-NEXT: i32.add +; CHECK-NEXT: i16x8.load8x8_u 0 +; CHECK-NEXT: # fallthrough-return %s = getelementptr inbounds <8 x i8>, <8 x i8>* %p, i32 -1 %v = load <8 x i8>, <8 x i8>* %s ret <8 x i8> %v } -; CHECK-LABEL: load_v8i16_with_unfolded_offset: -; NO-SIMD128-NOT: v128 -; SIMD128-NEXT: .functype load_v8i16_with_unfolded_offset (i32) -> (v128){{$}} -; SIMD128-NEXT: i32.const $push[[L0:[0-9]+]]=, 16{{$}} -; SIMD128-NEXT: i32.add $push[[L1:[0-9]+]]=, $0, $pop[[L0]]{{$}} -; SIMD128-NEXT: v128.load $push[[L0:[0-9]+]]=, 0($pop[[L1]]){{$}} -; SIMD128-NEXT: return $pop[[L0]]{{$}} define <8 x i16> @load_v8i16_with_unfolded_offset(<8 x i16>* %p) { +; CHECK-LABEL: load_v8i16_with_unfolded_offset: +; CHECK: .functype load_v8i16_with_unfolded_offset (i32) -> (v128) +; CHECK-NEXT: # %bb.0: +; CHECK-NEXT: local.get 0 +; CHECK-NEXT: i32.const 16 +; CHECK-NEXT: i32.add +; CHECK-NEXT: v128.load 0 +; CHECK-NEXT: # fallthrough-return %q = ptrtoint <8 x i16>* %p to i32 %r = add nsw i32 %q, 16 %s = inttoptr i32 %r to <8 x i16>* @@ -580,14 +635,15 @@ define <8 x i16> @load_v8i16_with_unfolded_offset(<8 x i16>* %p) { ret <8 x i16> %v } -; CHECK-LABEL: load_splat_v8i16_with_unfolded_offset: -; NO-SIMD128-NOT: v128 -; SIMD128-NEXT: .functype load_splat_v8i16_with_unfolded_offset (i32) -> (v128){{$}} -; SIMD128-NEXT: i32.const $push[[L0:[0-9]+]]=, 16{{$}} -; SIMD128-NEXT: i32.add $push[[L1:[0-9]+]]=, $0, $pop[[L0]]{{$}} -; SIMD128-NEXT: v16x8.load_splat $push[[L0:[0-9]+]]=, 0($pop[[L1]]){{$}} -; SIMD128-NEXT: return $pop[[L0]]{{$}} define <8 x i16> @load_splat_v8i16_with_unfolded_offset(i16* %p) { +; CHECK-LABEL: load_splat_v8i16_with_unfolded_offset: +; CHECK: .functype load_splat_v8i16_with_unfolded_offset (i32) -> (v128) +; CHECK-NEXT: # %bb.0: +; CHECK-NEXT: local.get 0 +; CHECK-NEXT: i32.const 16 +; CHECK-NEXT: i32.add +; CHECK-NEXT: v16x8.load_splat 0 +; CHECK-NEXT: # fallthrough-return %q = ptrtoint i16* %p to i32 %r = add nsw i32 %q, 16 %s = inttoptr i32 %r to i16* @@ -597,14 +653,15 @@ define <8 x i16> @load_splat_v8i16_with_unfolded_offset(i16* %p) { ret <8 x i16> %v2 } -; CHECK-LABEL: load_sext_v8i16_with_unfolded_offset: -; NO-SIMD128-NOT: v128 -; SIMD128-NEXT: .functype load_sext_v8i16_with_unfolded_offset (i32) -> (v128){{$}} -; SIMD128-NEXT: i32.const $push[[L0:[0-9]+]]=, 16{{$}} -; SIMD128-NEXT: i32.add $push[[L1:[0-9]+]]=, $0, $pop[[L0]]{{$}} -; SIMD128-NEXT: i16x8.load8x8_s $push[[L0:[0-9]+]]=, 0($pop[[L1]]){{$}} -; SIMD128-NEXT: return $pop[[L0]]{{$}} define <8 x i16> @load_sext_v8i16_with_unfolded_offset(<8 x i8>* %p) { +; CHECK-LABEL: load_sext_v8i16_with_unfolded_offset: +; CHECK: .functype load_sext_v8i16_with_unfolded_offset (i32) -> (v128) +; CHECK-NEXT: # %bb.0: +; CHECK-NEXT: local.get 0 +; CHECK-NEXT: i32.const 16 +; CHECK-NEXT: i32.add +; CHECK-NEXT: i16x8.load8x8_s 0 +; CHECK-NEXT: # fallthrough-return %q = ptrtoint <8 x i8>* %p to i32 %r = add nsw i32 %q, 16 %s = inttoptr i32 %r to <8 x i8>* @@ -613,14 +670,15 @@ define <8 x i16> @load_sext_v8i16_with_unfolded_offset(<8 x i8>* %p) { ret <8 x i16> %v2 } -; CHECK-LABEL: load_zext_v8i16_with_unfolded_offset: -; NO-SIMD128-NOT: v128 -; SIMD128-NEXT: .functype load_zext_v8i16_with_unfolded_offset (i32) -> (v128){{$}} -; SIMD128-NEXT: i32.const $push[[L0:[0-9]+]]=, 16{{$}} -; SIMD128-NEXT: i32.add $push[[L1:[0-9]+]]=, $0, $pop[[L0]]{{$}} -; SIMD128-NEXT: i16x8.load8x8_u $push[[L0:[0-9]+]]=, 0($pop[[L1]]){{$}} -; SIMD128-NEXT: return $pop[[L0]]{{$}} define <8 x i16> @load_zext_v8i16_with_unfolded_offset(<8 x i8>* %p) { +; CHECK-LABEL: load_zext_v8i16_with_unfolded_offset: +; CHECK: .functype load_zext_v8i16_with_unfolded_offset (i32) -> (v128) +; CHECK-NEXT: # %bb.0: +; CHECK-NEXT: local.get 0 +; CHECK-NEXT: i32.const 16 +; CHECK-NEXT: i32.add +; CHECK-NEXT: i16x8.load8x8_u 0 +; CHECK-NEXT: # fallthrough-return %q = ptrtoint <8 x i8>* %p to i32 %r = add nsw i32 %q, 16 %s = inttoptr i32 %r to <8 x i8>* @@ -629,14 +687,15 @@ define <8 x i16> @load_zext_v8i16_with_unfolded_offset(<8 x i8>* %p) { ret <8 x i16> %v2 } -; CHECK-LABEL: load_ext_v8i16_with_unfolded_offset: -; NO-SIMD128-NOT: load8x8 -; SIMD128-NEXT: .functype load_ext_v8i16_with_unfolded_offset (i32) -> (v128){{$}} -; SIMD128-NEXT: i32.const $push[[L0:[0-9]+]]=, 16{{$}} -; SIMD128-NEXT: i32.add $push[[L1:[0-9]+]]=, $0, $pop[[L0]]{{$}} -; SIMD128-NEXT: i16x8.load8x8_u $push[[L0:[0-9]+]]=, 0($pop[[L1]]){{$}} -; SIMD128-NEXT: return $pop[[L0]]{{$}} define <8 x i8> @load_ext_v8i16_with_unfolded_offset(<8 x i8>* %p) { +; CHECK-LABEL: load_ext_v8i16_with_unfolded_offset: +; CHECK: .functype load_ext_v8i16_with_unfolded_offset (i32) -> (v128) +; CHECK-NEXT: # %bb.0: +; CHECK-NEXT: local.get 0 +; CHECK-NEXT: i32.const 16 +; CHECK-NEXT: i32.add +; CHECK-NEXT: i16x8.load8x8_u 0 +; CHECK-NEXT: # fallthrough-return %q = ptrtoint <8 x i8>* %p to i32 %r = add nsw i32 %q, 16 %s = inttoptr i32 %r to <8 x i8>* @@ -644,27 +703,29 @@ define <8 x i8> @load_ext_v8i16_with_unfolded_offset(<8 x i8>* %p) { ret <8 x i8> %v } -; CHECK-LABEL: load_v8i16_with_unfolded_gep_offset: -; NO-SIMD128-NOT: v128 -; SIMD128-NEXT: .functype load_v8i16_with_unfolded_gep_offset (i32) -> (v128){{$}} -; SIMD128-NEXT: i32.const $push[[L0:[0-9]+]]=, 16{{$}} -; SIMD128-NEXT: i32.add $push[[L1:[0-9]+]]=, $0, $pop[[L0]]{{$}} -; SIMD128-NEXT: v128.load $push[[R:[0-9]+]]=, 0($pop[[L1]]){{$}} -; SIMD128-NEXT: return $pop[[R]]{{$}} define <8 x i16> @load_v8i16_with_unfolded_gep_offset(<8 x i16>* %p) { +; CHECK-LABEL: load_v8i16_with_unfolded_gep_offset: +; CHECK: .functype load_v8i16_with_unfolded_gep_offset (i32) -> (v128) +; CHECK-NEXT: # %bb.0: +; CHECK-NEXT: local.get 0 +; CHECK-NEXT: i32.const 16 +; CHECK-NEXT: i32.add +; CHECK-NEXT: v128.load 0 +; CHECK-NEXT: # fallthrough-return %s = getelementptr <8 x i16>, <8 x i16>* %p, i32 1 %v = load <8 x i16>, <8 x i16>* %s ret <8 x i16> %v } -; CHECK-LABEL: load_splat_v8i16_with_unfolded_gep_offset: -; NO-SIMD128-NOT: v128 -; SIMD128-NEXT: .functype load_splat_v8i16_with_unfolded_gep_offset (i32) -> (v128){{$}} -; SIMD128-NEXT: i32.const $push[[L0:[0-9]+]]=, 2{{$}} -; SIMD128-NEXT: i32.add $push[[L1:[0-9]+]]=, $0, $pop[[L0]]{{$}} -; SIMD128-NEXT: v16x8.load_splat $push[[R:[0-9]+]]=, 0($pop[[L1]]){{$}} -; SIMD128-NEXT: return $pop[[R]]{{$}} define <8 x i16> @load_splat_v8i16_with_unfolded_gep_offset(i16* %p) { +; CHECK-LABEL: load_splat_v8i16_with_unfolded_gep_offset: +; CHECK: .functype load_splat_v8i16_with_unfolded_gep_offset (i32) -> (v128) +; CHECK-NEXT: # %bb.0: +; CHECK-NEXT: local.get 0 +; CHECK-NEXT: i32.const 2 +; CHECK-NEXT: i32.add +; CHECK-NEXT: v16x8.load_splat 0 +; CHECK-NEXT: # fallthrough-return %s = getelementptr i16, i16* %p, i32 1 %e = load i16, i16* %s %v1 = insertelement <8 x i16> undef, i16 %e, i32 0 @@ -672,66 +733,69 @@ define <8 x i16> @load_splat_v8i16_with_unfolded_gep_offset(i16* %p) { ret <8 x i16> %v2 } -; CHECK-LABEL: load_sext_v8i16_with_unfolded_gep_offset: -; NO-SIMD128-NOT: v128 -; SIMD128-NEXT: .functype load_sext_v8i16_with_unfolded_gep_offset (i32) -> (v128){{$}} -; SIMD128-NEXT: i32.const $push[[L0:[0-9]+]]=, 8{{$}} -; SIMD128-NEXT: i32.add $push[[L1:[0-9]+]]=, $0, $pop[[L0]]{{$}} -; SIMD128-NEXT: i16x8.load8x8_s $push[[R:[0-9]+]]=, 0($pop[[L1]]){{$}} -; SIMD128-NEXT: return $pop[[R]]{{$}} define <8 x i16> @load_sext_v8i16_with_unfolded_gep_offset(<8 x i8>* %p) { +; CHECK-LABEL: load_sext_v8i16_with_unfolded_gep_offset: +; CHECK: .functype load_sext_v8i16_with_unfolded_gep_offset (i32) -> (v128) +; CHECK-NEXT: # %bb.0: +; CHECK-NEXT: local.get 0 +; CHECK-NEXT: i32.const 8 +; CHECK-NEXT: i32.add +; CHECK-NEXT: i16x8.load8x8_s 0 +; CHECK-NEXT: # fallthrough-return %s = getelementptr <8 x i8>, <8 x i8>* %p, i32 1 %v = load <8 x i8>, <8 x i8>* %s %v2 = sext <8 x i8> %v to <8 x i16> ret <8 x i16> %v2 } -; CHECK-LABEL: load_zext_v8i16_with_unfolded_gep_offset: -; NO-SIMD128-NOT: v128 -; SIMD128-NEXT: .functype load_zext_v8i16_with_unfolded_gep_offset (i32) -> (v128){{$}} -; SIMD128-NEXT: i32.const $push[[L0:[0-9]+]]=, 8{{$}} -; SIMD128-NEXT: i32.add $push[[L1:[0-9]+]]=, $0, $pop[[L0]]{{$}} -; SIMD128-NEXT: i16x8.load8x8_u $push[[R:[0-9]+]]=, 0($pop[[L1]]){{$}} -; SIMD128-NEXT: return $pop[[R]]{{$}} define <8 x i16> @load_zext_v8i16_with_unfolded_gep_offset(<8 x i8>* %p) { +; CHECK-LABEL: load_zext_v8i16_with_unfolded_gep_offset: +; CHECK: .functype load_zext_v8i16_with_unfolded_gep_offset (i32) -> (v128) +; CHECK-NEXT: # %bb.0: +; CHECK-NEXT: local.get 0 +; CHECK-NEXT: i32.const 8 +; CHECK-NEXT: i32.add +; CHECK-NEXT: i16x8.load8x8_u 0 +; CHECK-NEXT: # fallthrough-return %s = getelementptr <8 x i8>, <8 x i8>* %p, i32 1 %v = load <8 x i8>, <8 x i8>* %s %v2 = zext <8 x i8> %v to <8 x i16> ret <8 x i16> %v2 } -; CHECK-LABEL: load_ext_v8i16_with_unfolded_gep_offset: -; NO-SIMD128-NOT: load8x8 -; SIMD128-NEXT: .functype load_ext_v8i16_with_unfolded_gep_offset (i32) -> (v128){{$}} -; SIMD128-NEXT: i32.const $push[[L0:[0-9]+]]=, 8{{$}} -; SIMD128-NEXT: i32.add $push[[L1:[0-9]+]]=, $0, $pop[[L0]]{{$}} -; SIMD128-NEXT: i16x8.load8x8_u $push[[R:[0-9]+]]=, 0($pop[[L1]]){{$}} -; SIMD128-NEXT: return $pop[[R]]{{$}} define <8 x i8> @load_ext_v8i16_with_unfolded_gep_offset(<8 x i8>* %p) { +; CHECK-LABEL: load_ext_v8i16_with_unfolded_gep_offset: +; CHECK: .functype load_ext_v8i16_with_unfolded_gep_offset (i32) -> (v128) +; CHECK-NEXT: # %bb.0: +; CHECK-NEXT: local.get 0 +; CHECK-NEXT: i32.const 8 +; CHECK-NEXT: i32.add +; CHECK-NEXT: i16x8.load8x8_u 0 +; CHECK-NEXT: # fallthrough-return %s = getelementptr <8 x i8>, <8 x i8>* %p, i32 1 %v = load <8 x i8>, <8 x i8>* %s ret <8 x i8> %v } -; CHECK-LABEL: load_v8i16_from_numeric_address: -; NO-SIMD128-NOT: v128 -; SIMD128-NEXT: .functype load_v8i16_from_numeric_address () -> (v128){{$}} -; SIMD128-NEXT: i32.const $push[[L0:[0-9]+]]=, 0{{$}} -; SIMD128-NEXT: v128.load $push[[R:[0-9]+]]=, 32($pop[[L0]]){{$}} -; SIMD128-NEXT: return $pop[[R]]{{$}} define <8 x i16> @load_v8i16_from_numeric_address() { +; CHECK-LABEL: load_v8i16_from_numeric_address: +; CHECK: .functype load_v8i16_from_numeric_address () -> (v128) +; CHECK-NEXT: # %bb.0: +; CHECK-NEXT: i32.const 0 +; CHECK-NEXT: v128.load 32 +; CHECK-NEXT: # fallthrough-return %s = inttoptr i32 32 to <8 x i16>* %v = load <8 x i16>, <8 x i16>* %s ret <8 x i16> %v } -; CHECK-LABEL: load_splat_v8i16_from_numeric_address: -; NO-SIMD128-NOT: v128 -; SIMD128-NEXT: .functype load_splat_v8i16_from_numeric_address () -> (v128){{$}} -; SIMD128-NEXT: i32.const $push[[L0:[0-9]+]]=, 0{{$}} -; SIMD128-NEXT: v16x8.load_splat $push[[R:[0-9]+]]=, 32($pop[[L0]]){{$}} -; SIMD128-NEXT: return $pop[[R]]{{$}} define <8 x i16> @load_splat_v8i16_from_numeric_address() { +; CHECK-LABEL: load_splat_v8i16_from_numeric_address: +; CHECK: .functype load_splat_v8i16_from_numeric_address () -> (v128) +; CHECK-NEXT: # %bb.0: +; CHECK-NEXT: i32.const 0 +; CHECK-NEXT: v16x8.load_splat 32 +; CHECK-NEXT: # fallthrough-return %s = inttoptr i32 32 to i16* %e = load i16, i16* %s %v1 = insertelement <8 x i16> undef, i16 %e, i32 0 @@ -739,121 +803,127 @@ define <8 x i16> @load_splat_v8i16_from_numeric_address() { ret <8 x i16> %v2 } -; CHECK-LABEL: load_sext_v8i16_from_numeric_address: -; NO-SIMD128-NOT: v128 -; SIMD128-NEXT: .functype load_sext_v8i16_from_numeric_address () -> (v128){{$}} -; SIMD128-NEXT: i32.const $push[[L0:[0-9]+]]=, 0{{$}} -; SIMD128-NEXT: i16x8.load8x8_s $push[[R:[0-9]+]]=, 32($pop[[L0]]){{$}} -; SIMD128-NEXT: return $pop[[R]]{{$}} define <8 x i16> @load_sext_v8i16_from_numeric_address() { +; CHECK-LABEL: load_sext_v8i16_from_numeric_address: +; CHECK: .functype load_sext_v8i16_from_numeric_address () -> (v128) +; CHECK-NEXT: # %bb.0: +; CHECK-NEXT: i32.const 0 +; CHECK-NEXT: i16x8.load8x8_s 32 +; CHECK-NEXT: # fallthrough-return %s = inttoptr i32 32 to <8 x i8>* %v = load <8 x i8>, <8 x i8>* %s %v2 = sext <8 x i8> %v to <8 x i16> ret <8 x i16> %v2 } -; CHECK-LABEL: load_zext_v8i16_from_numeric_address: -; NO-SIMD128-NOT: v128 -; SIMD128-NEXT: .functype load_zext_v8i16_from_numeric_address () -> (v128){{$}} -; SIMD128-NEXT: i32.const $push[[L0:[0-9]+]]=, 0{{$}} -; SIMD128-NEXT: i16x8.load8x8_u $push[[R:[0-9]+]]=, 32($pop[[L0]]){{$}} -; SIMD128-NEXT: return $pop[[R]]{{$}} define <8 x i16> @load_zext_v8i16_from_numeric_address() { +; CHECK-LABEL: load_zext_v8i16_from_numeric_address: +; CHECK: .functype load_zext_v8i16_from_numeric_address () -> (v128) +; CHECK-NEXT: # %bb.0: +; CHECK-NEXT: i32.const 0 +; CHECK-NEXT: i16x8.load8x8_u 32 +; CHECK-NEXT: # fallthrough-return %s = inttoptr i32 32 to <8 x i8>* %v = load <8 x i8>, <8 x i8>* %s %v2 = zext <8 x i8> %v to <8 x i16> ret <8 x i16> %v2 } -; CHECK-LABEL: load_ext_v8i16_from_numeric_address: -; NO-SIMD128-NOT: load8x8 -; SIMD128-NEXT: .functype load_ext_v8i16_from_numeric_address () -> (v128){{$}} -; SIMD128-NEXT: i32.const $push[[L0:[0-9]+]]=, 0{{$}} -; SIMD128-NEXT: i16x8.load8x8_u $push[[R:[0-9]+]]=, 32($pop[[L0]]){{$}} -; SIMD128-NEXT: return $pop[[R]]{{$}} define <8 x i8> @load_ext_v8i16_from_numeric_address() { +; CHECK-LABEL: load_ext_v8i16_from_numeric_address: +; CHECK: .functype load_ext_v8i16_from_numeric_address () -> (v128) +; CHECK-NEXT: # %bb.0: +; CHECK-NEXT: i32.const 0 +; CHECK-NEXT: i16x8.load8x8_u 32 +; CHECK-NEXT: # fallthrough-return %s = inttoptr i32 32 to <8 x i8>* %v = load <8 x i8>, <8 x i8>* %s ret <8 x i8> %v } -; CHECK-LABEL: load_v8i16_from_global_address: -; NO-SIMD128-NOT: v128 -; SIMD128-NEXT: .functype load_v8i16_from_global_address () -> (v128){{$}} -; SIMD128-NEXT: i32.const $push[[L0:[0-9]+]]=, 0{{$}} -; SIMD128-NEXT: v128.load $push[[R:[0-9]+]]=, gv_v8i16($pop[[L0]]){{$}} -; SIMD128-NEXT: return $pop[[R]]{{$}} @gv_v8i16 = global <8 x i16> define <8 x i16> @load_v8i16_from_global_address() { +; CHECK-LABEL: load_v8i16_from_global_address: +; CHECK: .functype load_v8i16_from_global_address () -> (v128) +; CHECK-NEXT: # %bb.0: +; CHECK-NEXT: i32.const 0 +; CHECK-NEXT: v128.load gv_v8i16 +; CHECK-NEXT: # fallthrough-return %v = load <8 x i16>, <8 x i16>* @gv_v8i16 ret <8 x i16> %v } -; CHECK-LABEL: load_splat_v8i16_from_global_address: -; NO-SIMD128-NOT: v128 -; SIMD128-NEXT: .functype load_splat_v8i16_from_global_address () -> (v128){{$}} -; SIMD128-NEXT: i32.const $push[[L0:[0-9]+]]=, 0{{$}} -; SIMD128-NEXT: v16x8.load_splat $push[[R:[0-9]+]]=, gv_i16($pop[[L0]]){{$}} -; SIMD128-NEXT: return $pop[[R]]{{$}} @gv_i16 = global i16 42 define <8 x i16> @load_splat_v8i16_from_global_address() { +; CHECK-LABEL: load_splat_v8i16_from_global_address: +; CHECK: .functype load_splat_v8i16_from_global_address () -> (v128) +; CHECK-NEXT: # %bb.0: +; CHECK-NEXT: i32.const 0 +; CHECK-NEXT: v16x8.load_splat gv_i16 +; CHECK-NEXT: # fallthrough-return %e = load i16, i16* @gv_i16 %v1 = insertelement <8 x i16> undef, i16 %e, i32 0 %v2 = shufflevector <8 x i16> %v1, <8 x i16> undef, <8 x i32> zeroinitializer ret <8 x i16> %v2 } -; CHECK-LABEL: load_sext_v8i16_from_global_address: -; NO-SIMD128-NOT: v128 -; SIMD128-NEXT: .functype load_sext_v8i16_from_global_address () -> (v128){{$}} -; SIMD128-NEXT: i32.const $push[[L0:[0-9]+]]=, 0{{$}} -; SIMD128-NEXT: i16x8.load8x8_s $push[[R:[0-9]+]]=, gv_v8i8($pop[[L0]]){{$}} -; SIMD128-NEXT: return $pop[[R]]{{$}} @gv_v8i8 = global <8 x i8> define <8 x i16> @load_sext_v8i16_from_global_address() { +; CHECK-LABEL: load_sext_v8i16_from_global_address: +; CHECK: .functype load_sext_v8i16_from_global_address () -> (v128) +; CHECK-NEXT: # %bb.0: +; CHECK-NEXT: i32.const 0 +; CHECK-NEXT: i16x8.load8x8_s gv_v8i8 +; CHECK-NEXT: # fallthrough-return %v = load <8 x i8>, <8 x i8>* @gv_v8i8 %v2 = sext <8 x i8> %v to <8 x i16> ret <8 x i16> %v2 } -; CHECK-LABEL: load_zext_v8i16_from_global_address: -; NO-SIMD128-NOT: v128 -; SIMD128-NEXT: .functype load_zext_v8i16_from_global_address () -> (v128){{$}} -; SIMD128-NEXT: i32.const $push[[L0:[0-9]+]]=, 0{{$}} -; SIMD128-NEXT: i16x8.load8x8_u $push[[R:[0-9]+]]=, gv_v8i8($pop[[L0]]){{$}} -; SIMD128-NEXT: return $pop[[R]]{{$}} define <8 x i16> @load_zext_v8i16_from_global_address() { +; CHECK-LABEL: load_zext_v8i16_from_global_address: +; CHECK: .functype load_zext_v8i16_from_global_address () -> (v128) +; CHECK-NEXT: # %bb.0: +; CHECK-NEXT: i32.const 0 +; CHECK-NEXT: i16x8.load8x8_u gv_v8i8 +; CHECK-NEXT: # fallthrough-return %v = load <8 x i8>, <8 x i8>* @gv_v8i8 %v2 = zext <8 x i8> %v to <8 x i16> ret <8 x i16> %v2 } -; CHECK-LABEL: load_ext_v8i16_from_global_address: -; NO-SIMD128-NOT: load8x8 -; SIMD128-NEXT: .functype load_ext_v8i16_from_global_address () -> (v128){{$}} -; SIMD128-NEXT: i32.const $push[[L0:[0-9]+]]=, 0{{$}} -; SIMD128-NEXT: i16x8.load8x8_u $push[[R:[0-9]+]]=, gv_v8i8($pop[[L0]]){{$}} -; SIMD128-NEXT: return $pop[[R]]{{$}} define <8 x i8> @load_ext_v8i16_from_global_address() { +; CHECK-LABEL: load_ext_v8i16_from_global_address: +; CHECK: .functype load_ext_v8i16_from_global_address () -> (v128) +; CHECK-NEXT: # %bb.0: +; CHECK-NEXT: i32.const 0 +; CHECK-NEXT: i16x8.load8x8_u gv_v8i8 +; CHECK-NEXT: # fallthrough-return %v = load <8 x i8>, <8 x i8>* @gv_v8i8 ret <8 x i8> %v } -; CHECK-LABEL: store_v8i16: -; NO-SIMD128-NOT: v128 -; SIMD128-NEXT: .functype store_v8i16 (v128, i32) -> (){{$}} -; SIMD128-NEXT: v128.store 0($1), $0{{$}} define void @store_v8i16(<8 x i16> %v, <8 x i16>* %p) { +; CHECK-LABEL: store_v8i16: +; CHECK: .functype store_v8i16 (v128, i32) -> () +; CHECK-NEXT: # %bb.0: +; CHECK-NEXT: local.get 1 +; CHECK-NEXT: local.get 0 +; CHECK-NEXT: v128.store 0 +; CHECK-NEXT: # fallthrough-return store <8 x i16> %v , <8 x i16>* %p ret void } -; CHECK-LABEL: store_v8i16_with_folded_offset: -; NO-SIMD128-NOT: v128 -; SIMD128-NEXT: .functype store_v8i16_with_folded_offset (v128, i32) -> (){{$}} -; SIMD128-NEXT: v128.store 16($1), $0{{$}} define void @store_v8i16_with_folded_offset(<8 x i16> %v, <8 x i16>* %p) { +; CHECK-LABEL: store_v8i16_with_folded_offset: +; CHECK: .functype store_v8i16_with_folded_offset (v128, i32) -> () +; CHECK-NEXT: # %bb.0: +; CHECK-NEXT: local.get 1 +; CHECK-NEXT: local.get 0 +; CHECK-NEXT: v128.store 16 +; CHECK-NEXT: # fallthrough-return %q = ptrtoint <8 x i16>* %p to i32 %r = add nuw i32 %q, 16 %s = inttoptr i32 %r to <8 x i16>* @@ -861,69 +931,85 @@ define void @store_v8i16_with_folded_offset(<8 x i16> %v, <8 x i16>* %p) { ret void } -; CHECK-LABEL: store_v8i16_with_folded_gep_offset: -; NO-SIMD128-NOT: v128 -; SIMD128-NEXT: .functype store_v8i16_with_folded_gep_offset (v128, i32) -> (){{$}} -; SIMD128-NEXT: v128.store 16($1), $0{{$}} define void @store_v8i16_with_folded_gep_offset(<8 x i16> %v, <8 x i16>* %p) { +; CHECK-LABEL: store_v8i16_with_folded_gep_offset: +; CHECK: .functype store_v8i16_with_folded_gep_offset (v128, i32) -> () +; CHECK-NEXT: # %bb.0: +; CHECK-NEXT: local.get 1 +; CHECK-NEXT: local.get 0 +; CHECK-NEXT: v128.store 16 +; CHECK-NEXT: # fallthrough-return %s = getelementptr inbounds <8 x i16>, <8 x i16>* %p, i32 1 store <8 x i16> %v , <8 x i16>* %s ret void } -; CHECK-LABEL: store_v8i16_with_unfolded_gep_negative_offset: -; NO-SIMD128-NOT: v128 -; SIMD128-NEXT: .functype store_v8i16_with_unfolded_gep_negative_offset (v128, i32) -> (){{$}} -; SIMD128-NEXT: i32.const $push[[L0:[0-9]+]]=, -16{{$}} -; SIMD128-NEXT: i32.add $push[[R:[0-9]+]]=, $1, $pop[[L0]]{{$}} -; SIMD128-NEXT: v128.store 0($pop[[R]]), $0{{$}} define void @store_v8i16_with_unfolded_gep_negative_offset(<8 x i16> %v, <8 x i16>* %p) { +; CHECK-LABEL: store_v8i16_with_unfolded_gep_negative_offset: +; CHECK: .functype store_v8i16_with_unfolded_gep_negative_offset (v128, i32) -> () +; CHECK-NEXT: # %bb.0: +; CHECK-NEXT: local.get 1 +; CHECK-NEXT: i32.const -16 +; CHECK-NEXT: i32.add +; CHECK-NEXT: local.get 0 +; CHECK-NEXT: v128.store 0 +; CHECK-NEXT: # fallthrough-return %s = getelementptr inbounds <8 x i16>, <8 x i16>* %p, i32 -1 store <8 x i16> %v , <8 x i16>* %s ret void } -; CHECK-LABEL: store_v8i16_with_unfolded_offset: -; NO-SIMD128-NOT: v128 -; SIMD128-NEXT: .functype store_v8i16_with_unfolded_offset (v128, i32) -> (){{$}} -; SIMD128-NEXT: i32.const $push[[L0:[0-9]+]]=, -16{{$}} -; SIMD128-NEXT: i32.add $push[[R:[0-9]+]]=, $1, $pop[[L0]]{{$}} -; SIMD128-NEXT: v128.store 0($pop[[R]]), $0{{$}} define void @store_v8i16_with_unfolded_offset(<8 x i16> %v, <8 x i16>* %p) { +; CHECK-LABEL: store_v8i16_with_unfolded_offset: +; CHECK: .functype store_v8i16_with_unfolded_offset (v128, i32) -> () +; CHECK-NEXT: # %bb.0: +; CHECK-NEXT: local.get 1 +; CHECK-NEXT: i32.const -16 +; CHECK-NEXT: i32.add +; CHECK-NEXT: local.get 0 +; CHECK-NEXT: v128.store 0 +; CHECK-NEXT: # fallthrough-return %s = getelementptr inbounds <8 x i16>, <8 x i16>* %p, i32 -1 store <8 x i16> %v , <8 x i16>* %s ret void } -; CHECK-LABEL: store_v8i16_with_unfolded_gep_offset: -; NO-SIMD128-NOT: v128 -; SIMD128-NEXT: .functype store_v8i16_with_unfolded_gep_offset (v128, i32) -> (){{$}} -; SIMD128-NEXT: i32.const $push[[L0:[0-9]+]]=, 16{{$}} -; SIMD128-NEXT: i32.add $push[[R:[0-9]+]]=, $1, $pop[[L0]]{{$}} -; SIMD128-NEXT: v128.store 0($pop[[R]]), $0{{$}} define void @store_v8i16_with_unfolded_gep_offset(<8 x i16> %v, <8 x i16>* %p) { +; CHECK-LABEL: store_v8i16_with_unfolded_gep_offset: +; CHECK: .functype store_v8i16_with_unfolded_gep_offset (v128, i32) -> () +; CHECK-NEXT: # %bb.0: +; CHECK-NEXT: local.get 1 +; CHECK-NEXT: i32.const 16 +; CHECK-NEXT: i32.add +; CHECK-NEXT: local.get 0 +; CHECK-NEXT: v128.store 0 +; CHECK-NEXT: # fallthrough-return %s = getelementptr <8 x i16>, <8 x i16>* %p, i32 1 store <8 x i16> %v , <8 x i16>* %s ret void } -; CHECK-LABEL: store_v8i16_to_numeric_address: -; NO-SIMD128-NOT: v128 -; SIMD128-NEXT: .functype store_v8i16_to_numeric_address (v128) -> (){{$}} -; SIMD128-NEXT: i32.const $push[[L0:[0-9]+]]=, 0{{$}} -; SIMD128-NEXT: v128.store 32($pop[[L0]]), $0{{$}} define void @store_v8i16_to_numeric_address(<8 x i16> %v) { +; CHECK-LABEL: store_v8i16_to_numeric_address: +; CHECK: .functype store_v8i16_to_numeric_address (v128) -> () +; CHECK-NEXT: # %bb.0: +; CHECK-NEXT: i32.const 0 +; CHECK-NEXT: local.get 0 +; CHECK-NEXT: v128.store 32 +; CHECK-NEXT: # fallthrough-return %s = inttoptr i32 32 to <8 x i16>* store <8 x i16> %v , <8 x i16>* %s ret void } -; CHECK-LABEL: store_v8i16_to_global_address: -; NO-SIMD128-NOT: v128 -; SIMD128-NEXT: .functype store_v8i16_to_global_address (v128) -> (){{$}} -; SIMD128-NEXT: i32.const $push[[R:[0-9]+]]=, 0{{$}} -; SIMD128-NEXT: v128.store gv_v8i16($pop[[R]]), $0{{$}} define void @store_v8i16_to_global_address(<8 x i16> %v) { +; CHECK-LABEL: store_v8i16_to_global_address: +; CHECK: .functype store_v8i16_to_global_address (v128) -> () +; CHECK-NEXT: # %bb.0: +; CHECK-NEXT: i32.const 0 +; CHECK-NEXT: local.get 0 +; CHECK-NEXT: v128.store gv_v8i16 +; CHECK-NEXT: # fallthrough-return store <8 x i16> %v , <8 x i16>* @gv_v8i16 ret void } @@ -931,65 +1017,72 @@ define void @store_v8i16_to_global_address(<8 x i16> %v) { ; ============================================================================== ; 4 x i32 ; ============================================================================== -; CHECK-LABEL: load_v4i32: -; NO-SIMD128-NOT: v128 -; SIMD128-NEXT: .functype load_v4i32 (i32) -> (v128){{$}} -; SIMD128-NEXT: v128.load $push[[R:[0-9]+]]=, 0($0){{$}} -; SIMD128-NEXT: return $pop[[R]]{{$}} define <4 x i32> @load_v4i32(<4 x i32>* %p) { +; CHECK-LABEL: load_v4i32: +; CHECK: .functype load_v4i32 (i32) -> (v128) +; CHECK-NEXT: # %bb.0: +; CHECK-NEXT: local.get 0 +; CHECK-NEXT: v128.load 0 +; CHECK-NEXT: # fallthrough-return %v = load <4 x i32>, <4 x i32>* %p ret <4 x i32> %v } -; CHECK-LABEL: load_splat_v4i32: -; NO-SIMD128-NOT: v128 -; SIMD128-NEXT: .functype load_splat_v4i32 (i32) -> (v128){{$}} -; SIMD128-NEXT: v32x4.load_splat define <4 x i32> @load_splat_v4i32(i32* %addr) { +; CHECK-LABEL: load_splat_v4i32: +; CHECK: .functype load_splat_v4i32 (i32) -> (v128) +; CHECK-NEXT: # %bb.0: +; CHECK-NEXT: local.get 0 +; CHECK-NEXT: v32x4.load_splat 0 +; CHECK-NEXT: # fallthrough-return %e = load i32, i32* %addr, align 4 %v1 = insertelement <4 x i32> undef, i32 %e, i32 0 %v2 = shufflevector <4 x i32> %v1, <4 x i32> undef, <4 x i32> zeroinitializer ret <4 x i32> %v2 } -; CHECK-LABEL: load_sext_v4i32: -; NO-SIMD128-NOT: v128 -; SIMD128-NEXT: .functype load_sext_v4i32 (i32) -> (v128){{$}} -; SIMD128-NEXT: i32x4.load16x4_s $push[[R:[0-9]+]]=, 0($0){{$}} -; SIMD128-NEXT: return $pop[[R]]{{$}} define <4 x i32> @load_sext_v4i32(<4 x i16>* %p) { +; CHECK-LABEL: load_sext_v4i32: +; CHECK: .functype load_sext_v4i32 (i32) -> (v128) +; CHECK-NEXT: # %bb.0: +; CHECK-NEXT: local.get 0 +; CHECK-NEXT: i32x4.load16x4_s 0 +; CHECK-NEXT: # fallthrough-return %v = load <4 x i16>, <4 x i16>* %p %v2 = sext <4 x i16> %v to <4 x i32> ret <4 x i32> %v2 } -; CHECK-LABEL: load_zext_v4i32: -; NO-SIMD128-NOT: v128 -; SIMD128-NEXT: .functype load_zext_v4i32 (i32) -> (v128){{$}} -; SIMD128-NEXT: i32x4.load16x4_u $push[[R:[0-9]+]]=, 0($0){{$}} -; SIMD128-NEXT: return $pop[[R]]{{$}} define <4 x i32> @load_zext_v4i32(<4 x i16>* %p) { +; CHECK-LABEL: load_zext_v4i32: +; CHECK: .functype load_zext_v4i32 (i32) -> (v128) +; CHECK-NEXT: # %bb.0: +; CHECK-NEXT: local.get 0 +; CHECK-NEXT: i32x4.load16x4_u 0 +; CHECK-NEXT: # fallthrough-return %v = load <4 x i16>, <4 x i16>* %p %v2 = zext <4 x i16> %v to <4 x i32> ret <4 x i32> %v2 } -; CHECK-LABEL: load_ext_v4i32: -; NO-SIMD128-NOT: load16x4 -; SIMD128-NEXT: .functype load_ext_v4i32 (i32) -> (v128){{$}} -; SIMD128-NEXT: i32x4.load16x4_u $push[[R:[0-9]+]]=, 0($0){{$}} -; SIMD128-NEXT: return $pop[[R]]{{$}} define <4 x i16> @load_ext_v4i32(<4 x i16>* %p) { +; CHECK-LABEL: load_ext_v4i32: +; CHECK: .functype load_ext_v4i32 (i32) -> (v128) +; CHECK-NEXT: # %bb.0: +; CHECK-NEXT: local.get 0 +; CHECK-NEXT: i32x4.load16x4_u 0 +; CHECK-NEXT: # fallthrough-return %v = load <4 x i16>, <4 x i16>* %p ret <4 x i16> %v } -; CHECK-LABEL: load_v4i32_with_folded_offset: -; NO-SIMD128-NOT: v128 -; SIMD128-NEXT: .functype load_v4i32_with_folded_offset (i32) -> (v128){{$}} -; SIMD128-NEXT: v128.load $push[[R:[0-9]+]]=, 16($0){{$}} -; SIMD128-NEXT: return $pop[[R]]{{$}} define <4 x i32> @load_v4i32_with_folded_offset(<4 x i32>* %p) { +; CHECK-LABEL: load_v4i32_with_folded_offset: +; CHECK: .functype load_v4i32_with_folded_offset (i32) -> (v128) +; CHECK-NEXT: # %bb.0: +; CHECK-NEXT: local.get 0 +; CHECK-NEXT: v128.load 16 +; CHECK-NEXT: # fallthrough-return %q = ptrtoint <4 x i32>* %p to i32 %r = add nuw i32 %q, 16 %s = inttoptr i32 %r to <4 x i32>* @@ -997,12 +1090,13 @@ define <4 x i32> @load_v4i32_with_folded_offset(<4 x i32>* %p) { ret <4 x i32> %v } -; CHECK-LABEL: load_splat_v4i32_with_folded_offset: -; NO-SIMD128-NOT: v128 -; SIMD128-NEXT: .functype load_splat_v4i32_with_folded_offset (i32) -> (v128){{$}} -; SIMD128-NEXT: v32x4.load_splat $push[[R:[0-9]+]]=, 16($0){{$}} -; SIMD128-NEXT: return $pop[[R]]{{$}} define <4 x i32> @load_splat_v4i32_with_folded_offset(i32* %p) { +; CHECK-LABEL: load_splat_v4i32_with_folded_offset: +; CHECK: .functype load_splat_v4i32_with_folded_offset (i32) -> (v128) +; CHECK-NEXT: # %bb.0: +; CHECK-NEXT: local.get 0 +; CHECK-NEXT: v32x4.load_splat 16 +; CHECK-NEXT: # fallthrough-return %q = ptrtoint i32* %p to i32 %r = add nuw i32 %q, 16 %s = inttoptr i32 %r to i32* @@ -1012,12 +1106,13 @@ define <4 x i32> @load_splat_v4i32_with_folded_offset(i32* %p) { ret <4 x i32> %v2 } -; CHECK-LABEL: load_sext_v4i32_with_folded_offset: -; NO-SIMD128-NOT: v128 -; SIMD128-NEXT: .functype load_sext_v4i32_with_folded_offset (i32) -> (v128){{$}} -; SIMD128-NEXT: i32x4.load16x4_s $push[[R:[0-9]+]]=, 16($0){{$}} -; SIMD128-NEXT: return $pop[[R]]{{$}} define <4 x i32> @load_sext_v4i32_with_folded_offset(<4 x i16>* %p) { +; CHECK-LABEL: load_sext_v4i32_with_folded_offset: +; CHECK: .functype load_sext_v4i32_with_folded_offset (i32) -> (v128) +; CHECK-NEXT: # %bb.0: +; CHECK-NEXT: local.get 0 +; CHECK-NEXT: i32x4.load16x4_s 16 +; CHECK-NEXT: # fallthrough-return %q = ptrtoint <4 x i16>* %p to i32 %r = add nuw i32 %q, 16 %s = inttoptr i32 %r to <4 x i16>* @@ -1026,12 +1121,13 @@ define <4 x i32> @load_sext_v4i32_with_folded_offset(<4 x i16>* %p) { ret <4 x i32> %v2 } -; CHECK-LABEL: load_zext_v4i32_with_folded_offset: -; NO-SIMD128-NOT: v128 -; SIMD128-NEXT: .functype load_zext_v4i32_with_folded_offset (i32) -> (v128){{$}} -; SIMD128-NEXT: i32x4.load16x4_u $push[[R:[0-9]+]]=, 16($0){{$}} -; SIMD128-NEXT: return $pop[[R]]{{$}} define <4 x i32> @load_zext_v4i32_with_folded_offset(<4 x i16>* %p) { +; CHECK-LABEL: load_zext_v4i32_with_folded_offset: +; CHECK: .functype load_zext_v4i32_with_folded_offset (i32) -> (v128) +; CHECK-NEXT: # %bb.0: +; CHECK-NEXT: local.get 0 +; CHECK-NEXT: i32x4.load16x4_u 16 +; CHECK-NEXT: # fallthrough-return %q = ptrtoint <4 x i16>* %p to i32 %r = add nuw i32 %q, 16 %s = inttoptr i32 %r to <4 x i16>* @@ -1040,12 +1136,13 @@ define <4 x i32> @load_zext_v4i32_with_folded_offset(<4 x i16>* %p) { ret <4 x i32> %v2 } -; CHECK-LABEL: load_ext_v4i32_with_folded_offset: -; NO-SIMD128-NOT: load16x4 -; SIMD128-NEXT: .functype load_ext_v4i32_with_folded_offset (i32) -> (v128){{$}} -; SIMD128-NEXT: i32x4.load16x4_u $push[[R:[0-9]+]]=, 16($0){{$}} -; SIMD128-NEXT: return $pop[[R]]{{$}} define <4 x i16> @load_ext_v4i32_with_folded_offset(<4 x i16>* %p) { +; CHECK-LABEL: load_ext_v4i32_with_folded_offset: +; CHECK: .functype load_ext_v4i32_with_folded_offset (i32) -> (v128) +; CHECK-NEXT: # %bb.0: +; CHECK-NEXT: local.get 0 +; CHECK-NEXT: i32x4.load16x4_u 16 +; CHECK-NEXT: # fallthrough-return %q = ptrtoint <4 x i16>* %p to i32 %r = add nuw i32 %q, 16 %s = inttoptr i32 %r to <4 x i16>* @@ -1053,23 +1150,25 @@ define <4 x i16> @load_ext_v4i32_with_folded_offset(<4 x i16>* %p) { ret <4 x i16> %v } -; CHECK-LABEL: load_v4i32_with_folded_gep_offset: -; NO-SIMD128-NOT: v128 -; SIMD128-NEXT: .functype load_v4i32_with_folded_gep_offset (i32) -> (v128){{$}} -; SIMD128-NEXT: v128.load $push[[R:[0-9]+]]=, 16($0){{$}} -; SIMD128-NEXT: return $pop[[R]]{{$}} define <4 x i32> @load_v4i32_with_folded_gep_offset(<4 x i32>* %p) { +; CHECK-LABEL: load_v4i32_with_folded_gep_offset: +; CHECK: .functype load_v4i32_with_folded_gep_offset (i32) -> (v128) +; CHECK-NEXT: # %bb.0: +; CHECK-NEXT: local.get 0 +; CHECK-NEXT: v128.load 16 +; CHECK-NEXT: # fallthrough-return %s = getelementptr inbounds <4 x i32>, <4 x i32>* %p, i32 1 %v = load <4 x i32>, <4 x i32>* %s ret <4 x i32> %v } -; CHECK-LABEL: load_splat_v4i32_with_folded_gep_offset: -; NO-SIMD128-NOT: v128 -; SIMD128-NEXT: .functype load_splat_v4i32_with_folded_gep_offset (i32) -> (v128){{$}} -; SIMD128-NEXT: v32x4.load_splat $push[[R:[0-9]+]]=, 4($0){{$}} -; SIMD128-NEXT: return $pop[[R]]{{$}} define <4 x i32> @load_splat_v4i32_with_folded_gep_offset(i32* %p) { +; CHECK-LABEL: load_splat_v4i32_with_folded_gep_offset: +; CHECK: .functype load_splat_v4i32_with_folded_gep_offset (i32) -> (v128) +; CHECK-NEXT: # %bb.0: +; CHECK-NEXT: local.get 0 +; CHECK-NEXT: v32x4.load_splat 4 +; CHECK-NEXT: # fallthrough-return %s = getelementptr inbounds i32, i32* %p, i32 1 %e = load i32, i32* %s %v1 = insertelement <4 x i32> undef, i32 %e, i32 0 @@ -1077,62 +1176,67 @@ define <4 x i32> @load_splat_v4i32_with_folded_gep_offset(i32* %p) { ret <4 x i32> %v2 } -; CHECK-LABEL: load_sext_v4i32_with_folded_gep_offset: -; NO-SIMD128-NOT: v128 -; SIMD128-NEXT: .functype load_sext_v4i32_with_folded_gep_offset (i32) -> (v128){{$}} -; SIMD128-NEXT: i32x4.load16x4_s $push[[R:[0-9]+]]=, 8($0){{$}} -; SIMD128-NEXT: return $pop[[R]]{{$}} define <4 x i32> @load_sext_v4i32_with_folded_gep_offset(<4 x i16>* %p) { +; CHECK-LABEL: load_sext_v4i32_with_folded_gep_offset: +; CHECK: .functype load_sext_v4i32_with_folded_gep_offset (i32) -> (v128) +; CHECK-NEXT: # %bb.0: +; CHECK-NEXT: local.get 0 +; CHECK-NEXT: i32x4.load16x4_s 8 +; CHECK-NEXT: # fallthrough-return %s = getelementptr inbounds <4 x i16>, <4 x i16>* %p, i32 1 %v = load <4 x i16>, <4 x i16>* %s %v2 = sext <4 x i16> %v to <4 x i32> ret <4 x i32> %v2 } -; CHECK-LABEL: load_zext_v4i32_with_folded_gep_offset: -; NO-SIMD128-NOT: v128 -; SIMD128-NEXT: .functype load_zext_v4i32_with_folded_gep_offset (i32) -> (v128){{$}} -; SIMD128-NEXT: i32x4.load16x4_u $push[[R:[0-9]+]]=, 8($0){{$}} -; SIMD128-NEXT: return $pop[[R]]{{$}} define <4 x i32> @load_zext_v4i32_with_folded_gep_offset(<4 x i16>* %p) { +; CHECK-LABEL: load_zext_v4i32_with_folded_gep_offset: +; CHECK: .functype load_zext_v4i32_with_folded_gep_offset (i32) -> (v128) +; CHECK-NEXT: # %bb.0: +; CHECK-NEXT: local.get 0 +; CHECK-NEXT: i32x4.load16x4_u 8 +; CHECK-NEXT: # fallthrough-return %s = getelementptr inbounds <4 x i16>, <4 x i16>* %p, i32 1 %v = load <4 x i16>, <4 x i16>* %s %v2 = zext <4 x i16> %v to <4 x i32> ret <4 x i32> %v2 } -; CHECK-LABEL: load_ext_v4i32_with_folded_gep_offset: -; NO-SIMD128-NOT: load16x4 -; SIMD128-NEXT: .functype load_ext_v4i32_with_folded_gep_offset (i32) -> (v128){{$}} -; SIMD128-NEXT: i32x4.load16x4_u $push[[R:[0-9]+]]=, 8($0){{$}} -; SIMD128-NEXT: return $pop[[R]]{{$}} define <4 x i16> @load_ext_v4i32_with_folded_gep_offset(<4 x i16>* %p) { +; CHECK-LABEL: load_ext_v4i32_with_folded_gep_offset: +; CHECK: .functype load_ext_v4i32_with_folded_gep_offset (i32) -> (v128) +; CHECK-NEXT: # %bb.0: +; CHECK-NEXT: local.get 0 +; CHECK-NEXT: i32x4.load16x4_u 8 +; CHECK-NEXT: # fallthrough-return %s = getelementptr inbounds <4 x i16>, <4 x i16>* %p, i32 1 %v = load <4 x i16>, <4 x i16>* %s ret <4 x i16> %v } -; CHECK-LABEL: load_v4i32_with_unfolded_gep_negative_offset: -; NO-SIMD128-NOT: v128 -; SIMD128-NEXT: .functype load_v4i32_with_unfolded_gep_negative_offset (i32) -> (v128){{$}} -; SIMD128-NEXT: i32.const $push[[L0:[0-9]+]]=, -16{{$}} -; SIMD128-NEXT: i32.add $push[[L1:[0-9]+]]=, $0, $pop[[L0]]{{$}} -; SIMD128-NEXT: v128.load $push[[R:[0-9]+]]=, 0($pop[[L1]]){{$}} -; SIMD128-NEXT: return $pop[[R]]{{$}} define <4 x i32> @load_v4i32_with_unfolded_gep_negative_offset(<4 x i32>* %p) { +; CHECK-LABEL: load_v4i32_with_unfolded_gep_negative_offset: +; CHECK: .functype load_v4i32_with_unfolded_gep_negative_offset (i32) -> (v128) +; CHECK-NEXT: # %bb.0: +; CHECK-NEXT: local.get 0 +; CHECK-NEXT: i32.const -16 +; CHECK-NEXT: i32.add +; CHECK-NEXT: v128.load 0 +; CHECK-NEXT: # fallthrough-return %s = getelementptr inbounds <4 x i32>, <4 x i32>* %p, i32 -1 %v = load <4 x i32>, <4 x i32>* %s ret <4 x i32> %v } -; CHECK-LABEL: load_splat_v4i32_with_unfolded_gep_negative_offset: -; NO-SIMD128-NOT: v128 -; SIMD128-NEXT: .functype load_splat_v4i32_with_unfolded_gep_negative_offset (i32) -> (v128){{$}} -; SIMD128-NEXT: i32.const $push[[L0:[0-9]+]]=, -4{{$}} -; SIMD128-NEXT: i32.add $push[[L1:[0-9]+]]=, $0, $pop[[L0]]{{$}} -; SIMD128-NEXT: v32x4.load_splat $push[[R:[0-9]+]]=, 0($pop[[L1]]){{$}} -; SIMD128-NEXT: return $pop[[R]]{{$}} define <4 x i32> @load_splat_v4i32_with_unfolded_gep_negative_offset(i32* %p) { +; CHECK-LABEL: load_splat_v4i32_with_unfolded_gep_negative_offset: +; CHECK: .functype load_splat_v4i32_with_unfolded_gep_negative_offset (i32) -> (v128) +; CHECK-NEXT: # %bb.0: +; CHECK-NEXT: local.get 0 +; CHECK-NEXT: i32.const -4 +; CHECK-NEXT: i32.add +; CHECK-NEXT: v32x4.load_splat 0 +; CHECK-NEXT: # fallthrough-return %s = getelementptr inbounds i32, i32* %p, i32 -1 %e = load i32, i32* %s %v1 = insertelement <4 x i32> undef, i32 %e, i32 0 @@ -1140,55 +1244,59 @@ define <4 x i32> @load_splat_v4i32_with_unfolded_gep_negative_offset(i32* %p) { ret <4 x i32> %v2 } -; CHECK-LABEL: load_sext_v4i32_with_unfolded_gep_negative_offset: -; NO-SIMD128-NOT: v128 -; SIMD128-NEXT: .functype load_sext_v4i32_with_unfolded_gep_negative_offset (i32) -> (v128){{$}} -; SIMD128-NEXT: i32.const $push[[L0:[0-9]+]]=, -8{{$}} -; SIMD128-NEXT: i32.add $push[[L1:[0-9]+]]=, $0, $pop[[L0]]{{$}} -; SIMD128-NEXT: i32x4.load16x4_s $push[[R:[0-9]+]]=, 0($pop[[L1]]){{$}} -; SIMD128-NEXT: return $pop[[R]]{{$}} define <4 x i32> @load_sext_v4i32_with_unfolded_gep_negative_offset(<4 x i16>* %p) { +; CHECK-LABEL: load_sext_v4i32_with_unfolded_gep_negative_offset: +; CHECK: .functype load_sext_v4i32_with_unfolded_gep_negative_offset (i32) -> (v128) +; CHECK-NEXT: # %bb.0: +; CHECK-NEXT: local.get 0 +; CHECK-NEXT: i32.const -8 +; CHECK-NEXT: i32.add +; CHECK-NEXT: i32x4.load16x4_s 0 +; CHECK-NEXT: # fallthrough-return %s = getelementptr inbounds <4 x i16>, <4 x i16>* %p, i32 -1 %v = load <4 x i16>, <4 x i16>* %s %v2 = sext <4 x i16> %v to <4 x i32> ret <4 x i32> %v2 } -; CHECK-LABEL: load_zext_v4i32_with_unfolded_gep_negative_offset: -; NO-SIMD128-NOT: v128 -; SIMD128-NEXT: .functype load_zext_v4i32_with_unfolded_gep_negative_offset (i32) -> (v128){{$}} -; SIMD128-NEXT: i32.const $push[[L0:[0-9]+]]=, -8{{$}} -; SIMD128-NEXT: i32.add $push[[L1:[0-9]+]]=, $0, $pop[[L0]]{{$}} -; SIMD128-NEXT: i32x4.load16x4_u $push[[R:[0-9]+]]=, 0($pop[[L1]]){{$}} -; SIMD128-NEXT: return $pop[[R]]{{$}} define <4 x i32> @load_zext_v4i32_with_unfolded_gep_negative_offset(<4 x i16>* %p) { +; CHECK-LABEL: load_zext_v4i32_with_unfolded_gep_negative_offset: +; CHECK: .functype load_zext_v4i32_with_unfolded_gep_negative_offset (i32) -> (v128) +; CHECK-NEXT: # %bb.0: +; CHECK-NEXT: local.get 0 +; CHECK-NEXT: i32.const -8 +; CHECK-NEXT: i32.add +; CHECK-NEXT: i32x4.load16x4_u 0 +; CHECK-NEXT: # fallthrough-return %s = getelementptr inbounds <4 x i16>, <4 x i16>* %p, i32 -1 %v = load <4 x i16>, <4 x i16>* %s %v2 = zext <4 x i16> %v to <4 x i32> ret <4 x i32> %v2 } -; CHECK-LABEL: load_ext_v4i32_with_unfolded_gep_negative_offset: -; NO-SIMD128-NOT: load16x4 -; SIMD128-NEXT: .functype load_ext_v4i32_with_unfolded_gep_negative_offset (i32) -> (v128){{$}} -; SIMD128-NEXT: i32.const $push[[L0:[0-9]+]]=, -8{{$}} -; SIMD128-NEXT: i32.add $push[[L1:[0-9]+]]=, $0, $pop[[L0]]{{$}} -; SIMD128-NEXT: i32x4.load16x4_u $push[[R:[0-9]+]]=, 0($pop[[L1]]){{$}} -; SIMD128-NEXT: return $pop[[R]]{{$}} define <4 x i16> @load_ext_v4i32_with_unfolded_gep_negative_offset(<4 x i16>* %p) { +; CHECK-LABEL: load_ext_v4i32_with_unfolded_gep_negative_offset: +; CHECK: .functype load_ext_v4i32_with_unfolded_gep_negative_offset (i32) -> (v128) +; CHECK-NEXT: # %bb.0: +; CHECK-NEXT: local.get 0 +; CHECK-NEXT: i32.const -8 +; CHECK-NEXT: i32.add +; CHECK-NEXT: i32x4.load16x4_u 0 +; CHECK-NEXT: # fallthrough-return %s = getelementptr inbounds <4 x i16>, <4 x i16>* %p, i32 -1 %v = load <4 x i16>, <4 x i16>* %s ret <4 x i16> %v } -; CHECK-LABEL: load_v4i32_with_unfolded_offset: -; NO-SIMD128-NOT: v128 -; SIMD128-NEXT: .functype load_v4i32_with_unfolded_offset (i32) -> (v128){{$}} -; SIMD128-NEXT: i32.const $push[[L0:[0-9]+]]=, 16{{$}} -; SIMD128-NEXT: i32.add $push[[L1:[0-9]+]]=, $0, $pop[[L0]]{{$}} -; SIMD128-NEXT: v128.load $push[[R:[0-9]+]]=, 0($pop[[L1]]){{$}} -; SIMD128-NEXT: return $pop[[R]]{{$}} define <4 x i32> @load_v4i32_with_unfolded_offset(<4 x i32>* %p) { +; CHECK-LABEL: load_v4i32_with_unfolded_offset: +; CHECK: .functype load_v4i32_with_unfolded_offset (i32) -> (v128) +; CHECK-NEXT: # %bb.0: +; CHECK-NEXT: local.get 0 +; CHECK-NEXT: i32.const 16 +; CHECK-NEXT: i32.add +; CHECK-NEXT: v128.load 0 +; CHECK-NEXT: # fallthrough-return %q = ptrtoint <4 x i32>* %p to i32 %r = add nsw i32 %q, 16 %s = inttoptr i32 %r to <4 x i32>* @@ -1196,14 +1304,15 @@ define <4 x i32> @load_v4i32_with_unfolded_offset(<4 x i32>* %p) { ret <4 x i32> %v } -; CHECK-LABEL: load_splat_v4i32_with_unfolded_offset: -; NO-SIMD128-NOT: v128 -; SIMD128-NEXT: .functype load_splat_v4i32_with_unfolded_offset (i32) -> (v128){{$}} -; SIMD128-NEXT: i32.const $push[[L0:[0-9]+]]=, 16{{$}} -; SIMD128-NEXT: i32.add $push[[L1:[0-9]+]]=, $0, $pop[[L0]]{{$}} -; SIMD128-NEXT: v32x4.load_splat $push[[R:[0-9]+]]=, 0($pop[[L1]]){{$}} -; SIMD128-NEXT: return $pop[[R]]{{$}} define <4 x i32> @load_splat_v4i32_with_unfolded_offset(i32* %p) { +; CHECK-LABEL: load_splat_v4i32_with_unfolded_offset: +; CHECK: .functype load_splat_v4i32_with_unfolded_offset (i32) -> (v128) +; CHECK-NEXT: # %bb.0: +; CHECK-NEXT: local.get 0 +; CHECK-NEXT: i32.const 16 +; CHECK-NEXT: i32.add +; CHECK-NEXT: v32x4.load_splat 0 +; CHECK-NEXT: # fallthrough-return %q = ptrtoint i32* %p to i32 %r = add nsw i32 %q, 16 %s = inttoptr i32 %r to i32* @@ -1213,14 +1322,15 @@ define <4 x i32> @load_splat_v4i32_with_unfolded_offset(i32* %p) { ret <4 x i32> %v2 } -; CHECK-LABEL: load_sext_v4i32_with_unfolded_offset: -; NO-SIMD128-NOT: v128 -; SIMD128-NEXT: .functype load_sext_v4i32_with_unfolded_offset (i32) -> (v128){{$}} -; SIMD128-NEXT: i32.const $push[[L0:[0-9]+]]=, 16{{$}} -; SIMD128-NEXT: i32.add $push[[L1:[0-9]+]]=, $0, $pop[[L0]]{{$}} -; SIMD128-NEXT: i32x4.load16x4_s $push[[R:[0-9]+]]=, 0($pop[[L1]]){{$}} -; SIMD128-NEXT: return $pop[[R]]{{$}} define <4 x i32> @load_sext_v4i32_with_unfolded_offset(<4 x i16>* %p) { +; CHECK-LABEL: load_sext_v4i32_with_unfolded_offset: +; CHECK: .functype load_sext_v4i32_with_unfolded_offset (i32) -> (v128) +; CHECK-NEXT: # %bb.0: +; CHECK-NEXT: local.get 0 +; CHECK-NEXT: i32.const 16 +; CHECK-NEXT: i32.add +; CHECK-NEXT: i32x4.load16x4_s 0 +; CHECK-NEXT: # fallthrough-return %q = ptrtoint <4 x i16>* %p to i32 %r = add nsw i32 %q, 16 %s = inttoptr i32 %r to <4 x i16>* @@ -1229,14 +1339,15 @@ define <4 x i32> @load_sext_v4i32_with_unfolded_offset(<4 x i16>* %p) { ret <4 x i32> %v2 } -; CHECK-LABEL: load_zext_v4i32_with_unfolded_offset: -; NO-SIMD128-NOT: v128 -; SIMD128-NEXT: .functype load_zext_v4i32_with_unfolded_offset (i32) -> (v128){{$}} -; SIMD128-NEXT: i32.const $push[[L0:[0-9]+]]=, 16{{$}} -; SIMD128-NEXT: i32.add $push[[L1:[0-9]+]]=, $0, $pop[[L0]]{{$}} -; SIMD128-NEXT: i32x4.load16x4_u $push[[R:[0-9]+]]=, 0($pop[[L1]]){{$}} -; SIMD128-NEXT: return $pop[[R]]{{$}} define <4 x i32> @load_zext_v4i32_with_unfolded_offset(<4 x i16>* %p) { +; CHECK-LABEL: load_zext_v4i32_with_unfolded_offset: +; CHECK: .functype load_zext_v4i32_with_unfolded_offset (i32) -> (v128) +; CHECK-NEXT: # %bb.0: +; CHECK-NEXT: local.get 0 +; CHECK-NEXT: i32.const 16 +; CHECK-NEXT: i32.add +; CHECK-NEXT: i32x4.load16x4_u 0 +; CHECK-NEXT: # fallthrough-return %q = ptrtoint <4 x i16>* %p to i32 %r = add nsw i32 %q, 16 %s = inttoptr i32 %r to <4 x i16>* @@ -1245,14 +1356,15 @@ define <4 x i32> @load_zext_v4i32_with_unfolded_offset(<4 x i16>* %p) { ret <4 x i32> %v2 } -; CHECK-LABEL: load_ext_v4i32_with_unfolded_offset: -; NO-SIMD128-NOT: load16x4 -; SIMD128-NEXT: .functype load_ext_v4i32_with_unfolded_offset (i32) -> (v128){{$}} -; SIMD128-NEXT: i32.const $push[[L0:[0-9]+]]=, 16{{$}} -; SIMD128-NEXT: i32.add $push[[L1:[0-9]+]]=, $0, $pop[[L0]]{{$}} -; SIMD128-NEXT: i32x4.load16x4_u $push[[R:[0-9]+]]=, 0($pop[[L1]]){{$}} -; SIMD128-NEXT: return $pop[[R]]{{$}} define <4 x i16> @load_ext_v4i32_with_unfolded_offset(<4 x i16>* %p) { +; CHECK-LABEL: load_ext_v4i32_with_unfolded_offset: +; CHECK: .functype load_ext_v4i32_with_unfolded_offset (i32) -> (v128) +; CHECK-NEXT: # %bb.0: +; CHECK-NEXT: local.get 0 +; CHECK-NEXT: i32.const 16 +; CHECK-NEXT: i32.add +; CHECK-NEXT: i32x4.load16x4_u 0 +; CHECK-NEXT: # fallthrough-return %q = ptrtoint <4 x i16>* %p to i32 %r = add nsw i32 %q, 16 %s = inttoptr i32 %r to <4 x i16>* @@ -1260,27 +1372,29 @@ define <4 x i16> @load_ext_v4i32_with_unfolded_offset(<4 x i16>* %p) { ret <4 x i16> %v } -; CHECK-LABEL: load_v4i32_with_unfolded_gep_offset: -; NO-SIMD128-NOT: v128 -; SIMD128-NEXT: .functype load_v4i32_with_unfolded_gep_offset (i32) -> (v128){{$}} -; SIMD128-NEXT: i32.const $push[[L0:[0-9]+]]=, 16{{$}} -; SIMD128-NEXT: i32.add $push[[L1:[0-9]+]]=, $0, $pop[[L0]]{{$}} -; SIMD128-NEXT: v128.load $push[[R:[0-9]+]]=, 0($pop[[L1]]){{$}} -; SIMD128-NEXT: return $pop[[R]]{{$}} define <4 x i32> @load_v4i32_with_unfolded_gep_offset(<4 x i32>* %p) { +; CHECK-LABEL: load_v4i32_with_unfolded_gep_offset: +; CHECK: .functype load_v4i32_with_unfolded_gep_offset (i32) -> (v128) +; CHECK-NEXT: # %bb.0: +; CHECK-NEXT: local.get 0 +; CHECK-NEXT: i32.const 16 +; CHECK-NEXT: i32.add +; CHECK-NEXT: v128.load 0 +; CHECK-NEXT: # fallthrough-return %s = getelementptr <4 x i32>, <4 x i32>* %p, i32 1 %v = load <4 x i32>, <4 x i32>* %s ret <4 x i32> %v } -; CHECK-LABEL: load_splat_v4i32_with_unfolded_gep_offset: -; NO-SIMD128-NOT: v128 -; SIMD128-NEXT: .functype load_splat_v4i32_with_unfolded_gep_offset (i32) -> (v128){{$}} -; SIMD128-NEXT: i32.const $push[[L0:[0-9]+]]=, 4{{$}} -; SIMD128-NEXT: i32.add $push[[L1:[0-9]+]]=, $0, $pop[[L0]]{{$}} -; SIMD128-NEXT: v32x4.load_splat $push[[R:[0-9]+]]=, 0($pop[[L1]]){{$}} -; SIMD128-NEXT: return $pop[[R]]{{$}} define <4 x i32> @load_splat_v4i32_with_unfolded_gep_offset(i32* %p) { +; CHECK-LABEL: load_splat_v4i32_with_unfolded_gep_offset: +; CHECK: .functype load_splat_v4i32_with_unfolded_gep_offset (i32) -> (v128) +; CHECK-NEXT: # %bb.0: +; CHECK-NEXT: local.get 0 +; CHECK-NEXT: i32.const 4 +; CHECK-NEXT: i32.add +; CHECK-NEXT: v32x4.load_splat 0 +; CHECK-NEXT: # fallthrough-return %s = getelementptr i32, i32* %p, i32 1 %e = load i32, i32* %s %v1 = insertelement <4 x i32> undef, i32 %e, i32 0 @@ -1288,66 +1402,69 @@ define <4 x i32> @load_splat_v4i32_with_unfolded_gep_offset(i32* %p) { ret <4 x i32> %v2 } -; CHECK-LABEL: load_sext_v4i32_with_unfolded_gep_offset: -; NO-SIMD128-NOT: v128 -; SIMD128-NEXT: .functype load_sext_v4i32_with_unfolded_gep_offset (i32) -> (v128){{$}} -; SIMD128-NEXT: i32.const $push[[L0:[0-9]+]]=, 8{{$}} -; SIMD128-NEXT: i32.add $push[[L1:[0-9]+]]=, $0, $pop[[L0]]{{$}} -; SIMD128-NEXT: i32x4.load16x4_s $push[[R:[0-9]+]]=, 0($pop[[L1]]){{$}} -; SIMD128-NEXT: return $pop[[R]]{{$}} define <4 x i32> @load_sext_v4i32_with_unfolded_gep_offset(<4 x i16>* %p) { +; CHECK-LABEL: load_sext_v4i32_with_unfolded_gep_offset: +; CHECK: .functype load_sext_v4i32_with_unfolded_gep_offset (i32) -> (v128) +; CHECK-NEXT: # %bb.0: +; CHECK-NEXT: local.get 0 +; CHECK-NEXT: i32.const 8 +; CHECK-NEXT: i32.add +; CHECK-NEXT: i32x4.load16x4_s 0 +; CHECK-NEXT: # fallthrough-return %s = getelementptr <4 x i16>, <4 x i16>* %p, i32 1 %v = load <4 x i16>, <4 x i16>* %s %v2 = sext <4 x i16> %v to <4 x i32> ret <4 x i32> %v2 } -; CHECK-LABEL: load_zext_v4i32_with_unfolded_gep_offset: -; NO-SIMD128-NOT: v128 -; SIMD128-NEXT: .functype load_zext_v4i32_with_unfolded_gep_offset (i32) -> (v128){{$}} -; SIMD128-NEXT: i32.const $push[[L0:[0-9]+]]=, 8{{$}} -; SIMD128-NEXT: i32.add $push[[L1:[0-9]+]]=, $0, $pop[[L0]]{{$}} -; SIMD128-NEXT: i32x4.load16x4_u $push[[R:[0-9]+]]=, 0($pop[[L1]]){{$}} -; SIMD128-NEXT: return $pop[[R]]{{$}} define <4 x i32> @load_zext_v4i32_with_unfolded_gep_offset(<4 x i16>* %p) { +; CHECK-LABEL: load_zext_v4i32_with_unfolded_gep_offset: +; CHECK: .functype load_zext_v4i32_with_unfolded_gep_offset (i32) -> (v128) +; CHECK-NEXT: # %bb.0: +; CHECK-NEXT: local.get 0 +; CHECK-NEXT: i32.const 8 +; CHECK-NEXT: i32.add +; CHECK-NEXT: i32x4.load16x4_u 0 +; CHECK-NEXT: # fallthrough-return %s = getelementptr <4 x i16>, <4 x i16>* %p, i32 1 %v = load <4 x i16>, <4 x i16>* %s %v2 = zext <4 x i16> %v to <4 x i32> ret <4 x i32> %v2 } -; CHECK-LABEL: load_ext_v4i32_with_unfolded_gep_offset: -; NO-SIMD128-NOT: load16x4 -; SIMD128-NEXT: .functype load_ext_v4i32_with_unfolded_gep_offset (i32) -> (v128){{$}} -; SIMD128-NEXT: i32.const $push[[L0:[0-9]+]]=, 8{{$}} -; SIMD128-NEXT: i32.add $push[[L1:[0-9]+]]=, $0, $pop[[L0]]{{$}} -; SIMD128-NEXT: i32x4.load16x4_u $push[[R:[0-9]+]]=, 0($pop[[L1]]){{$}} -; SIMD128-NEXT: return $pop[[R]]{{$}} define <4 x i16> @load_ext_v4i32_with_unfolded_gep_offset(<4 x i16>* %p) { +; CHECK-LABEL: load_ext_v4i32_with_unfolded_gep_offset: +; CHECK: .functype load_ext_v4i32_with_unfolded_gep_offset (i32) -> (v128) +; CHECK-NEXT: # %bb.0: +; CHECK-NEXT: local.get 0 +; CHECK-NEXT: i32.const 8 +; CHECK-NEXT: i32.add +; CHECK-NEXT: i32x4.load16x4_u 0 +; CHECK-NEXT: # fallthrough-return %s = getelementptr <4 x i16>, <4 x i16>* %p, i32 1 %v = load <4 x i16>, <4 x i16>* %s ret <4 x i16> %v } -; CHECK-LABEL: load_v4i32_from_numeric_address: -; NO-SIMD128-NOT: v128 -; SIMD128-NEXT: .functype load_v4i32_from_numeric_address () -> (v128){{$}} -; SIMD128-NEXT: i32.const $push[[L0:[0-9]+]]=, 0{{$}} -; SIMD128-NEXT: v128.load $push[[R:[0-9]+]]=, 32($pop[[L0]]){{$}} -; SIMD128-NEXT: return $pop[[R]]{{$}} define <4 x i32> @load_v4i32_from_numeric_address() { +; CHECK-LABEL: load_v4i32_from_numeric_address: +; CHECK: .functype load_v4i32_from_numeric_address () -> (v128) +; CHECK-NEXT: # %bb.0: +; CHECK-NEXT: i32.const 0 +; CHECK-NEXT: v128.load 32 +; CHECK-NEXT: # fallthrough-return %s = inttoptr i32 32 to <4 x i32>* %v = load <4 x i32>, <4 x i32>* %s ret <4 x i32> %v } -; CHECK-LABEL: load_splat_v4i32_from_numeric_address: -; NO-SIMD128-NOT: v128 -; SIMD128-NEXT: .functype load_splat_v4i32_from_numeric_address () -> (v128){{$}} -; SIMD128-NEXT: i32.const $push[[L0:[0-9]+]]=, 0{{$}} -; SIMD128-NEXT: v32x4.load_splat $push[[R:[0-9]+]]=, 32($pop[[L0]]){{$}} -; SIMD128-NEXT: return $pop[[R]]{{$}} define <4 x i32> @load_splat_v4i32_from_numeric_address() { +; CHECK-LABEL: load_splat_v4i32_from_numeric_address: +; CHECK: .functype load_splat_v4i32_from_numeric_address () -> (v128) +; CHECK-NEXT: # %bb.0: +; CHECK-NEXT: i32.const 0 +; CHECK-NEXT: v32x4.load_splat 32 +; CHECK-NEXT: # fallthrough-return %s = inttoptr i32 32 to i32* %e = load i32, i32* %s %v1 = insertelement <4 x i32> undef, i32 %e, i32 0 @@ -1355,120 +1472,126 @@ define <4 x i32> @load_splat_v4i32_from_numeric_address() { ret <4 x i32> %v2 } -; CHECK-LABEL: load_sext_v4i32_from_numeric_address: -; NO-SIMD128-NOT: v128 -; SIMD128-NEXT: .functype load_sext_v4i32_from_numeric_address () -> (v128){{$}} -; SIMD128-NEXT: i32.const $push[[L0:[0-9]+]]=, 0{{$}} -; SIMD128-NEXT: i32x4.load16x4_s $push[[R:[0-9]+]]=, 32($pop[[L0]]){{$}} -; SIMD128-NEXT: return $pop[[R]]{{$}} define <4 x i32> @load_sext_v4i32_from_numeric_address() { +; CHECK-LABEL: load_sext_v4i32_from_numeric_address: +; CHECK: .functype load_sext_v4i32_from_numeric_address () -> (v128) +; CHECK-NEXT: # %bb.0: +; CHECK-NEXT: i32.const 0 +; CHECK-NEXT: i32x4.load16x4_s 32 +; CHECK-NEXT: # fallthrough-return %s = inttoptr i32 32 to <4 x i16>* %v = load <4 x i16>, <4 x i16>* %s %v2 = sext <4 x i16> %v to <4 x i32> ret <4 x i32> %v2 } -; CHECK-LABEL: load_zext_v4i32_from_numeric_address: -; NO-SIMD128-NOT: v128 -; SIMD128-NEXT: .functype load_zext_v4i32_from_numeric_address () -> (v128){{$}} -; SIMD128-NEXT: i32.const $push[[L0:[0-9]+]]=, 0{{$}} -; SIMD128-NEXT: i32x4.load16x4_u $push[[R:[0-9]+]]=, 32($pop[[L0]]){{$}} -; SIMD128-NEXT: return $pop[[R]]{{$}} define <4 x i32> @load_zext_v4i32_from_numeric_address() { +; CHECK-LABEL: load_zext_v4i32_from_numeric_address: +; CHECK: .functype load_zext_v4i32_from_numeric_address () -> (v128) +; CHECK-NEXT: # %bb.0: +; CHECK-NEXT: i32.const 0 +; CHECK-NEXT: i32x4.load16x4_u 32 +; CHECK-NEXT: # fallthrough-return %s = inttoptr i32 32 to <4 x i16>* %v = load <4 x i16>, <4 x i16>* %s %v2 = zext <4 x i16> %v to <4 x i32> ret <4 x i32> %v2 } -; CHECK-LABEL: load_ext_v4i32_from_numeric_address: -; NO-SIMD128-NOT: load16x4 -; SIMD128-NEXT: .functype load_ext_v4i32_from_numeric_address () -> (v128){{$}} -; SIMD128-NEXT: i32.const $push[[L0:[0-9]+]]=, 0{{$}} -; SIMD128-NEXT: i32x4.load16x4_u $push[[R:[0-9]+]]=, 32($pop[[L0]]){{$}} -; SIMD128-NEXT: return $pop[[R]]{{$}} define <4 x i16> @load_ext_v4i32_from_numeric_address() { +; CHECK-LABEL: load_ext_v4i32_from_numeric_address: +; CHECK: .functype load_ext_v4i32_from_numeric_address () -> (v128) +; CHECK-NEXT: # %bb.0: +; CHECK-NEXT: i32.const 0 +; CHECK-NEXT: i32x4.load16x4_u 32 +; CHECK-NEXT: # fallthrough-return %s = inttoptr i32 32 to <4 x i16>* %v = load <4 x i16>, <4 x i16>* %s ret <4 x i16> %v } -; CHECK-LABEL: load_v4i32_from_global_address: -; NO-SIMD128-NOT: v128 -; SIMD128-NEXT: .functype load_v4i32_from_global_address () -> (v128){{$}} -; SIMD128-NEXT: i32.const $push[[L0:[0-9]+]]=, 0{{$}} -; SIMD128-NEXT: v128.load $push[[R:[0-9]+]]=, gv_v4i32($pop[[L0]]){{$}} -; SIMD128-NEXT: return $pop[[R]]{{$}} @gv_v4i32 = global <4 x i32> define <4 x i32> @load_v4i32_from_global_address() { +; CHECK-LABEL: load_v4i32_from_global_address: +; CHECK: .functype load_v4i32_from_global_address () -> (v128) +; CHECK-NEXT: # %bb.0: +; CHECK-NEXT: i32.const 0 +; CHECK-NEXT: v128.load gv_v4i32 +; CHECK-NEXT: # fallthrough-return %v = load <4 x i32>, <4 x i32>* @gv_v4i32 ret <4 x i32> %v } -; CHECK-LABEL: load_splat_v4i32_from_global_address: -; NO-SIMD128-NOT: v128 -; SIMD128-NEXT: .functype load_splat_v4i32_from_global_address () -> (v128){{$}} -; SIMD128-NEXT: i32.const $push[[L0:[0-9]+]]=, 0{{$}} -; SIMD128-NEXT: v32x4.load_splat $push[[R:[0-9]+]]=, gv_i32($pop[[L0]]){{$}} -; SIMD128-NEXT: return $pop[[R]]{{$}} @gv_i32 = global i32 42 define <4 x i32> @load_splat_v4i32_from_global_address() { +; CHECK-LABEL: load_splat_v4i32_from_global_address: +; CHECK: .functype load_splat_v4i32_from_global_address () -> (v128) +; CHECK-NEXT: # %bb.0: +; CHECK-NEXT: i32.const 0 +; CHECK-NEXT: v32x4.load_splat gv_i32 +; CHECK-NEXT: # fallthrough-return %e = load i32, i32* @gv_i32 %v1 = insertelement <4 x i32> undef, i32 %e, i32 0 %v2 = shufflevector <4 x i32> %v1, <4 x i32> undef, <4 x i32> zeroinitializer ret <4 x i32> %v2 } -; CHECK-LABEL: load_sext_v4i32_from_global_address: -; NO-SIMD128-NOT: v128 -; SIMD128-NEXT: .functype load_sext_v4i32_from_global_address () -> (v128){{$}} -; SIMD128-NEXT: i32.const $push[[L0:[0-9]+]]=, 0{{$}} -; SIMD128-NEXT: i32x4.load16x4_s $push[[R:[0-9]+]]=, gv_v4i16($pop[[L0]]){{$}} -; SIMD128-NEXT: return $pop[[R]]{{$}} @gv_v4i16 = global <4 x i16> define <4 x i32> @load_sext_v4i32_from_global_address() { +; CHECK-LABEL: load_sext_v4i32_from_global_address: +; CHECK: .functype load_sext_v4i32_from_global_address () -> (v128) +; CHECK-NEXT: # %bb.0: +; CHECK-NEXT: i32.const 0 +; CHECK-NEXT: i32x4.load16x4_s gv_v4i16 +; CHECK-NEXT: # fallthrough-return %v = load <4 x i16>, <4 x i16>* @gv_v4i16 %v2 = sext <4 x i16> %v to <4 x i32> ret <4 x i32> %v2 } -; CHECK-LABEL: load_zext_v4i32_from_global_address: -; NO-SIMD128-NOT: v128 -; SIMD128-NEXT: .functype load_zext_v4i32_from_global_address () -> (v128){{$}} -; SIMD128-NEXT: i32.const $push[[L0:[0-9]+]]=, 0{{$}} -; SIMD128-NEXT: i32x4.load16x4_u $push[[R:[0-9]+]]=, gv_v4i16($pop[[L0]]){{$}} -; SIMD128-NEXT: return $pop[[R]]{{$}} define <4 x i32> @load_zext_v4i32_from_global_address() { +; CHECK-LABEL: load_zext_v4i32_from_global_address: +; CHECK: .functype load_zext_v4i32_from_global_address () -> (v128) +; CHECK-NEXT: # %bb.0: +; CHECK-NEXT: i32.const 0 +; CHECK-NEXT: i32x4.load16x4_u gv_v4i16 +; CHECK-NEXT: # fallthrough-return %v = load <4 x i16>, <4 x i16>* @gv_v4i16 %v2 = zext <4 x i16> %v to <4 x i32> ret <4 x i32> %v2 } -; CHECK-LABEL: load_ext_v4i32_from_global_address: -; NO-SIMD128-NOT: load16x4 -; SIMD128-NEXT: .functype load_ext_v4i32_from_global_address () -> (v128){{$}} -; SIMD128-NEXT: i32.const $push[[L0:[0-9]+]]=, 0{{$}} -; SIMD128-NEXT: i32x4.load16x4_u $push[[R:[0-9]+]]=, gv_v4i16($pop[[L0]]){{$}} -; SIMD128-NEXT: return $pop[[R]]{{$}} define <4 x i16> @load_ext_v4i32_from_global_address() { +; CHECK-LABEL: load_ext_v4i32_from_global_address: +; CHECK: .functype load_ext_v4i32_from_global_address () -> (v128) +; CHECK-NEXT: # %bb.0: +; CHECK-NEXT: i32.const 0 +; CHECK-NEXT: i32x4.load16x4_u gv_v4i16 +; CHECK-NEXT: # fallthrough-return %v = load <4 x i16>, <4 x i16>* @gv_v4i16 ret <4 x i16> %v } -; CHECK-LABEL: store_v4i32: -; NO-SIMD128-NOT: v128 -; SIMD128-NEXT: .functype store_v4i32 (v128, i32) -> (){{$}} -; SIMD128-NEXT: v128.store 0($1), $0{{$}} define void @store_v4i32(<4 x i32> %v, <4 x i32>* %p) { +; CHECK-LABEL: store_v4i32: +; CHECK: .functype store_v4i32 (v128, i32) -> () +; CHECK-NEXT: # %bb.0: +; CHECK-NEXT: local.get 1 +; CHECK-NEXT: local.get 0 +; CHECK-NEXT: v128.store 0 +; CHECK-NEXT: # fallthrough-return store <4 x i32> %v , <4 x i32>* %p ret void } -; CHECK-LABEL: store_v4i32_with_folded_offset: -; NO-SIMD128-NOT: v128 -; SIMD128-NEXT: .functype store_v4i32_with_folded_offset (v128, i32) -> (){{$}} -; SIMD128-NEXT: v128.store 16($1), $0{{$}} define void @store_v4i32_with_folded_offset(<4 x i32> %v, <4 x i32>* %p) { +; CHECK-LABEL: store_v4i32_with_folded_offset: +; CHECK: .functype store_v4i32_with_folded_offset (v128, i32) -> () +; CHECK-NEXT: # %bb.0: +; CHECK-NEXT: local.get 1 +; CHECK-NEXT: local.get 0 +; CHECK-NEXT: v128.store 16 +; CHECK-NEXT: # fallthrough-return %q = ptrtoint <4 x i32>* %p to i32 %r = add nuw i32 %q, 16 %s = inttoptr i32 %r to <4 x i32>* @@ -1476,69 +1599,85 @@ define void @store_v4i32_with_folded_offset(<4 x i32> %v, <4 x i32>* %p) { ret void } -; CHECK-LABEL: store_v4i32_with_folded_gep_offset: -; NO-SIMD128-NOT: v128 -; SIMD128-NEXT: .functype store_v4i32_with_folded_gep_offset (v128, i32) -> (){{$}} -; SIMD128-NEXT: v128.store 16($1), $0{{$}} define void @store_v4i32_with_folded_gep_offset(<4 x i32> %v, <4 x i32>* %p) { +; CHECK-LABEL: store_v4i32_with_folded_gep_offset: +; CHECK: .functype store_v4i32_with_folded_gep_offset (v128, i32) -> () +; CHECK-NEXT: # %bb.0: +; CHECK-NEXT: local.get 1 +; CHECK-NEXT: local.get 0 +; CHECK-NEXT: v128.store 16 +; CHECK-NEXT: # fallthrough-return %s = getelementptr inbounds <4 x i32>, <4 x i32>* %p, i32 1 store <4 x i32> %v , <4 x i32>* %s ret void } -; CHECK-LABEL: store_v4i32_with_unfolded_gep_negative_offset: -; NO-SIMD128-NOT: v128 -; SIMD128-NEXT: .functype store_v4i32_with_unfolded_gep_negative_offset (v128, i32) -> (){{$}} -; SIMD128-NEXT: i32.const $push[[L0:[0-9]+]]=, -16{{$}} -; SIMD128-NEXT: i32.add $push[[R:[0-9]+]]=, $1, $pop[[L0]]{{$}} -; SIMD128-NEXT: v128.store 0($pop[[R]]), $0{{$}} define void @store_v4i32_with_unfolded_gep_negative_offset(<4 x i32> %v, <4 x i32>* %p) { +; CHECK-LABEL: store_v4i32_with_unfolded_gep_negative_offset: +; CHECK: .functype store_v4i32_with_unfolded_gep_negative_offset (v128, i32) -> () +; CHECK-NEXT: # %bb.0: +; CHECK-NEXT: local.get 1 +; CHECK-NEXT: i32.const -16 +; CHECK-NEXT: i32.add +; CHECK-NEXT: local.get 0 +; CHECK-NEXT: v128.store 0 +; CHECK-NEXT: # fallthrough-return %s = getelementptr inbounds <4 x i32>, <4 x i32>* %p, i32 -1 store <4 x i32> %v , <4 x i32>* %s ret void } -; CHECK-LABEL: store_v4i32_with_unfolded_offset: -; NO-SIMD128-NOT: v128 -; SIMD128-NEXT: .functype store_v4i32_with_unfolded_offset (v128, i32) -> (){{$}} -; SIMD128-NEXT: i32.const $push[[L0:[0-9]+]]=, -16{{$}} -; SIMD128-NEXT: i32.add $push[[R:[0-9]+]]=, $1, $pop[[L0]]{{$}} -; SIMD128-NEXT: v128.store 0($pop[[R]]), $0{{$}} define void @store_v4i32_with_unfolded_offset(<4 x i32> %v, <4 x i32>* %p) { +; CHECK-LABEL: store_v4i32_with_unfolded_offset: +; CHECK: .functype store_v4i32_with_unfolded_offset (v128, i32) -> () +; CHECK-NEXT: # %bb.0: +; CHECK-NEXT: local.get 1 +; CHECK-NEXT: i32.const -16 +; CHECK-NEXT: i32.add +; CHECK-NEXT: local.get 0 +; CHECK-NEXT: v128.store 0 +; CHECK-NEXT: # fallthrough-return %s = getelementptr inbounds <4 x i32>, <4 x i32>* %p, i32 -1 store <4 x i32> %v , <4 x i32>* %s ret void } -; CHECK-LABEL: store_v4i32_with_unfolded_gep_offset: -; NO-SIMD128-NOT: v128 -; SIMD128-NEXT: .functype store_v4i32_with_unfolded_gep_offset (v128, i32) -> (){{$}} -; SIMD128-NEXT: i32.const $push[[L0:[0-9]+]]=, 16{{$}} -; SIMD128-NEXT: i32.add $push[[R:[0-9]+]]=, $1, $pop[[L0]]{{$}} -; SIMD128-NEXT: v128.store 0($pop[[R]]), $0{{$}} define void @store_v4i32_with_unfolded_gep_offset(<4 x i32> %v, <4 x i32>* %p) { +; CHECK-LABEL: store_v4i32_with_unfolded_gep_offset: +; CHECK: .functype store_v4i32_with_unfolded_gep_offset (v128, i32) -> () +; CHECK-NEXT: # %bb.0: +; CHECK-NEXT: local.get 1 +; CHECK-NEXT: i32.const 16 +; CHECK-NEXT: i32.add +; CHECK-NEXT: local.get 0 +; CHECK-NEXT: v128.store 0 +; CHECK-NEXT: # fallthrough-return %s = getelementptr <4 x i32>, <4 x i32>* %p, i32 1 store <4 x i32> %v , <4 x i32>* %s ret void } -; CHECK-LABEL: store_v4i32_to_numeric_address: -; NO-SIMD128-NOT: v128 -; SIMD128-NEXT: .functype store_v4i32_to_numeric_address (v128) -> (){{$}} -; SIMD128-NEXT: i32.const $push[[L0:[0-9]+]]=, 0{{$}} -; SIMD128-NEXT: v128.store 32($pop[[L0]]), $0{{$}} define void @store_v4i32_to_numeric_address(<4 x i32> %v) { +; CHECK-LABEL: store_v4i32_to_numeric_address: +; CHECK: .functype store_v4i32_to_numeric_address (v128) -> () +; CHECK-NEXT: # %bb.0: +; CHECK-NEXT: i32.const 0 +; CHECK-NEXT: local.get 0 +; CHECK-NEXT: v128.store 32 +; CHECK-NEXT: # fallthrough-return %s = inttoptr i32 32 to <4 x i32>* store <4 x i32> %v , <4 x i32>* %s ret void } -; CHECK-LABEL: store_v4i32_to_global_address: -; NO-SIMD128-NOT: v128 -; SIMD128-NEXT: .functype store_v4i32_to_global_address (v128) -> (){{$}} -; SIMD128-NEXT: i32.const $push[[R:[0-9]+]]=, 0{{$}} -; SIMD128-NEXT: v128.store gv_v4i32($pop[[R]]), $0{{$}} define void @store_v4i32_to_global_address(<4 x i32> %v) { +; CHECK-LABEL: store_v4i32_to_global_address: +; CHECK: .functype store_v4i32_to_global_address (v128) -> () +; CHECK-NEXT: # %bb.0: +; CHECK-NEXT: i32.const 0 +; CHECK-NEXT: local.get 0 +; CHECK-NEXT: v128.store gv_v4i32 +; CHECK-NEXT: # fallthrough-return store <4 x i32> %v , <4 x i32>* @gv_v4i32 ret void } @@ -1546,66 +1685,72 @@ define void @store_v4i32_to_global_address(<4 x i32> %v) { ; ============================================================================== ; 2 x i64 ; ============================================================================== -; CHECK-LABEL: load_v2i64: -; NO-SIMD128-NOT: v128 -; SIMD128-NEXT: .functype load_v2i64 (i32) -> (v128){{$}} -; SIMD128-NEXT: v128.load $push[[R:[0-9]+]]=, 0($0){{$}} -; SIMD128-NEXT: return $pop[[R]]{{$}} define <2 x i64> @load_v2i64(<2 x i64>* %p) { +; CHECK-LABEL: load_v2i64: +; CHECK: .functype load_v2i64 (i32) -> (v128) +; CHECK-NEXT: # %bb.0: +; CHECK-NEXT: local.get 0 +; CHECK-NEXT: v128.load 0 +; CHECK-NEXT: # fallthrough-return %v = load <2 x i64>, <2 x i64>* %p ret <2 x i64> %v } -; CHECK-LABEL: load_splat_v2i64: -; NO-SIMD128-NOT: v128 -; SIMD128-NEXT: .functype load_splat_v2i64 (i32) -> (v128){{$}} -; SIMD128-NEXT: v64x2.load_splat $push[[R:[0-9]+]]=, 0($0){{$}} -; SIMD128-NEXT: return $pop[[R]]{{$}} define <2 x i64> @load_splat_v2i64(i64* %p) { +; CHECK-LABEL: load_splat_v2i64: +; CHECK: .functype load_splat_v2i64 (i32) -> (v128) +; CHECK-NEXT: # %bb.0: +; CHECK-NEXT: local.get 0 +; CHECK-NEXT: v64x2.load_splat 0 +; CHECK-NEXT: # fallthrough-return %e = load i64, i64* %p %v1 = insertelement <2 x i64> undef, i64 %e, i32 0 %v2 = shufflevector <2 x i64> %v1, <2 x i64> undef, <2 x i32> zeroinitializer ret <2 x i64> %v2 } -; CHECK-LABEL: load_sext_v2i64: -; NO-SIMD128-NOT: v128 -; SIMD128-NEXT: .functype load_sext_v2i64 (i32) -> (v128){{$}} -; SIMD128-NEXT: i64x2.load32x2_s $push[[R:[0-9]+]]=, 0($0){{$}} -; SIMD128-NEXT: return $pop[[R]]{{$}} define <2 x i64> @load_sext_v2i64(<2 x i32>* %p) { +; CHECK-LABEL: load_sext_v2i64: +; CHECK: .functype load_sext_v2i64 (i32) -> (v128) +; CHECK-NEXT: # %bb.0: +; CHECK-NEXT: local.get 0 +; CHECK-NEXT: i64x2.load32x2_s 0 +; CHECK-NEXT: # fallthrough-return %v = load <2 x i32>, <2 x i32>* %p %v2 = sext <2 x i32> %v to <2 x i64> ret <2 x i64> %v2 } -; CHECK-LABEL: load_zext_v2i64: -; NO-SIMD128-NOT: v128 -; SIMD128-NEXT: .functype load_zext_v2i64 (i32) -> (v128){{$}} -; SIMD128-NEXT: i64x2.load32x2_u $push[[R:[0-9]+]]=, 0($0){{$}} -; SIMD128-NEXT: return $pop[[R]]{{$}} define <2 x i64> @load_zext_v2i64(<2 x i32>* %p) { +; CHECK-LABEL: load_zext_v2i64: +; CHECK: .functype load_zext_v2i64 (i32) -> (v128) +; CHECK-NEXT: # %bb.0: +; CHECK-NEXT: local.get 0 +; CHECK-NEXT: i64x2.load32x2_u 0 +; CHECK-NEXT: # fallthrough-return %v = load <2 x i32>, <2 x i32>* %p %v2 = zext <2 x i32> %v to <2 x i64> ret <2 x i64> %v2 } -; CHECK-LABEL: load_ext_v2i64: -; NO-SIMD128-NOT: v128 -; SIMD128-NEXT: .functype load_ext_v2i64 (i32) -> (v128){{$}} -; SIMD128-NEXT: i64x2.load32x2_u $push[[R:[0-9]+]]=, 0($0){{$}} -; SIMD128-NEXT: return $pop[[R]]{{$}} define <2 x i32> @load_ext_v2i64(<2 x i32>* %p) { +; CHECK-LABEL: load_ext_v2i64: +; CHECK: .functype load_ext_v2i64 (i32) -> (v128) +; CHECK-NEXT: # %bb.0: +; CHECK-NEXT: local.get 0 +; CHECK-NEXT: i64x2.load32x2_u 0 +; CHECK-NEXT: # fallthrough-return %v = load <2 x i32>, <2 x i32>* %p ret <2 x i32> %v } -; CHECK-LABEL: load_v2i64_with_folded_offset: -; NO-SIMD128-NOT: v128 -; SIMD128-NEXT: .functype load_v2i64_with_folded_offset (i32) -> (v128){{$}} -; SIMD128-NEXT: v128.load $push[[R:[0-9]+]]=, 16($0){{$}} -; SIMD128-NEXT: return $pop[[R]]{{$}} define <2 x i64> @load_v2i64_with_folded_offset(<2 x i64>* %p) { +; CHECK-LABEL: load_v2i64_with_folded_offset: +; CHECK: .functype load_v2i64_with_folded_offset (i32) -> (v128) +; CHECK-NEXT: # %bb.0: +; CHECK-NEXT: local.get 0 +; CHECK-NEXT: v128.load 16 +; CHECK-NEXT: # fallthrough-return %q = ptrtoint <2 x i64>* %p to i32 %r = add nuw i32 %q, 16 %s = inttoptr i32 %r to <2 x i64>* @@ -1613,12 +1758,13 @@ define <2 x i64> @load_v2i64_with_folded_offset(<2 x i64>* %p) { ret <2 x i64> %v } -; CHECK-LABEL: load_splat_v2i64_with_folded_offset: -; NO-SIMD128-NOT: v128 -; SIMD128-NEXT: .functype load_splat_v2i64_with_folded_offset (i32) -> (v128){{$}} -; SIMD128-NEXT: v64x2.load_splat $push[[R:[0-9]+]]=, 16($0){{$}} -; SIMD128-NEXT: return $pop[[R]]{{$}} define <2 x i64> @load_splat_v2i64_with_folded_offset(i64* %p) { +; CHECK-LABEL: load_splat_v2i64_with_folded_offset: +; CHECK: .functype load_splat_v2i64_with_folded_offset (i32) -> (v128) +; CHECK-NEXT: # %bb.0: +; CHECK-NEXT: local.get 0 +; CHECK-NEXT: v64x2.load_splat 16 +; CHECK-NEXT: # fallthrough-return %q = ptrtoint i64* %p to i32 %r = add nuw i32 %q, 16 %s = inttoptr i32 %r to i64* @@ -1628,12 +1774,13 @@ define <2 x i64> @load_splat_v2i64_with_folded_offset(i64* %p) { ret <2 x i64> %v2 } -; CHECK-LABEL: load_sext_v2i64_with_folded_offset: -; NO-SIMD128-NOT: v128 -; SIMD128-NEXT: .functype load_sext_v2i64_with_folded_offset (i32) -> (v128){{$}} -; SIMD128-NEXT: i64x2.load32x2_s $push[[R:[0-9]+]]=, 16($0){{$}} -; SIMD128-NEXT: return $pop[[R]]{{$}} define <2 x i64> @load_sext_v2i64_with_folded_offset(<2 x i32>* %p) { +; CHECK-LABEL: load_sext_v2i64_with_folded_offset: +; CHECK: .functype load_sext_v2i64_with_folded_offset (i32) -> (v128) +; CHECK-NEXT: # %bb.0: +; CHECK-NEXT: local.get 0 +; CHECK-NEXT: i64x2.load32x2_s 16 +; CHECK-NEXT: # fallthrough-return %q = ptrtoint <2 x i32>* %p to i32 %r = add nuw i32 %q, 16 %s = inttoptr i32 %r to <2 x i32>* @@ -1642,12 +1789,13 @@ define <2 x i64> @load_sext_v2i64_with_folded_offset(<2 x i32>* %p) { ret <2 x i64> %v2 } -; CHECK-LABEL: load_zext_v2i64_with_folded_offset: -; NO-SIMD128-NOT: v128 -; SIMD128-NEXT: .functype load_zext_v2i64_with_folded_offset (i32) -> (v128){{$}} -; SIMD128-NEXT: i64x2.load32x2_u $push[[R:[0-9]+]]=, 16($0){{$}} -; SIMD128-NEXT: return $pop[[R]]{{$}} define <2 x i64> @load_zext_v2i64_with_folded_offset(<2 x i32>* %p) { +; CHECK-LABEL: load_zext_v2i64_with_folded_offset: +; CHECK: .functype load_zext_v2i64_with_folded_offset (i32) -> (v128) +; CHECK-NEXT: # %bb.0: +; CHECK-NEXT: local.get 0 +; CHECK-NEXT: i64x2.load32x2_u 16 +; CHECK-NEXT: # fallthrough-return %q = ptrtoint <2 x i32>* %p to i32 %r = add nuw i32 %q, 16 %s = inttoptr i32 %r to <2 x i32>* @@ -1656,12 +1804,13 @@ define <2 x i64> @load_zext_v2i64_with_folded_offset(<2 x i32>* %p) { ret <2 x i64> %v2 } -; CHECK-LABEL: load_ext_v2i64_with_folded_offset: -; NO-SIMD128-NOT: v128 -; SIMD128-NEXT: .functype load_ext_v2i64_with_folded_offset (i32) -> (v128){{$}} -; SIMD128-NEXT: i64x2.load32x2_u $push[[R:[0-9]+]]=, 16($0){{$}} -; SIMD128-NEXT: return $pop[[R]]{{$}} define <2 x i32> @load_ext_v2i64_with_folded_offset(<2 x i32>* %p) { +; CHECK-LABEL: load_ext_v2i64_with_folded_offset: +; CHECK: .functype load_ext_v2i64_with_folded_offset (i32) -> (v128) +; CHECK-NEXT: # %bb.0: +; CHECK-NEXT: local.get 0 +; CHECK-NEXT: i64x2.load32x2_u 16 +; CHECK-NEXT: # fallthrough-return %q = ptrtoint <2 x i32>* %p to i32 %r = add nuw i32 %q, 16 %s = inttoptr i32 %r to <2 x i32>* @@ -1669,23 +1818,25 @@ define <2 x i32> @load_ext_v2i64_with_folded_offset(<2 x i32>* %p) { ret <2 x i32> %v } -; CHECK-LABEL: load_v2i64_with_folded_gep_offset: -; NO-SIMD128-NOT: v128 -; SIMD128-NEXT: .functype load_v2i64_with_folded_gep_offset (i32) -> (v128){{$}} -; SIMD128-NEXT: v128.load $push[[R:[0-9]+]]=, 16($0){{$}} -; SIMD128-NEXT: return $pop[[R]]{{$}} define <2 x i64> @load_v2i64_with_folded_gep_offset(<2 x i64>* %p) { +; CHECK-LABEL: load_v2i64_with_folded_gep_offset: +; CHECK: .functype load_v2i64_with_folded_gep_offset (i32) -> (v128) +; CHECK-NEXT: # %bb.0: +; CHECK-NEXT: local.get 0 +; CHECK-NEXT: v128.load 16 +; CHECK-NEXT: # fallthrough-return %s = getelementptr inbounds <2 x i64>, <2 x i64>* %p, i32 1 %v = load <2 x i64>, <2 x i64>* %s ret <2 x i64> %v } -; CHECK-LABEL: load_splat_v2i64_with_folded_gep_offset: -; NO-SIMD128-NOT: v128 -; SIMD128-NEXT: .functype load_splat_v2i64_with_folded_gep_offset (i32) -> (v128){{$}} -; SIMD128-NEXT: v64x2.load_splat $push[[R:[0-9]+]]=, 8($0){{$}} -; SIMD128-NEXT: return $pop[[R]]{{$}} define <2 x i64> @load_splat_v2i64_with_folded_gep_offset(i64* %p) { +; CHECK-LABEL: load_splat_v2i64_with_folded_gep_offset: +; CHECK: .functype load_splat_v2i64_with_folded_gep_offset (i32) -> (v128) +; CHECK-NEXT: # %bb.0: +; CHECK-NEXT: local.get 0 +; CHECK-NEXT: v64x2.load_splat 8 +; CHECK-NEXT: # fallthrough-return %s = getelementptr inbounds i64, i64* %p, i32 1 %e = load i64, i64* %s %v1 = insertelement <2 x i64> undef, i64 %e, i32 0 @@ -1693,62 +1844,67 @@ define <2 x i64> @load_splat_v2i64_with_folded_gep_offset(i64* %p) { ret <2 x i64> %v2 } -; CHECK-LABEL: load_sext_v2i64_with_folded_gep_offset: -; NO-SIMD128-NOT: v128 -; SIMD128-NEXT: .functype load_sext_v2i64_with_folded_gep_offset (i32) -> (v128){{$}} -; SIMD128-NEXT: i64x2.load32x2_s $push[[R:[0-9]+]]=, 8($0){{$}} -; SIMD128-NEXT: return $pop[[R]]{{$}} define <2 x i64> @load_sext_v2i64_with_folded_gep_offset(<2 x i32>* %p) { +; CHECK-LABEL: load_sext_v2i64_with_folded_gep_offset: +; CHECK: .functype load_sext_v2i64_with_folded_gep_offset (i32) -> (v128) +; CHECK-NEXT: # %bb.0: +; CHECK-NEXT: local.get 0 +; CHECK-NEXT: i64x2.load32x2_s 8 +; CHECK-NEXT: # fallthrough-return %s = getelementptr inbounds <2 x i32>, <2 x i32>* %p, i32 1 %v = load <2 x i32>, <2 x i32>* %s %v2 = sext <2 x i32> %v to <2 x i64> ret <2 x i64> %v2 } -; CHECK-LABEL: load_zext_v2i64_with_folded_gep_offset: -; NO-SIMD128-NOT: v128 -; SIMD128-NEXT: .functype load_zext_v2i64_with_folded_gep_offset (i32) -> (v128){{$}} -; SIMD128-NEXT: i64x2.load32x2_u $push[[R:[0-9]+]]=, 8($0){{$}} -; SIMD128-NEXT: return $pop[[R]]{{$}} define <2 x i64> @load_zext_v2i64_with_folded_gep_offset(<2 x i32>* %p) { +; CHECK-LABEL: load_zext_v2i64_with_folded_gep_offset: +; CHECK: .functype load_zext_v2i64_with_folded_gep_offset (i32) -> (v128) +; CHECK-NEXT: # %bb.0: +; CHECK-NEXT: local.get 0 +; CHECK-NEXT: i64x2.load32x2_u 8 +; CHECK-NEXT: # fallthrough-return %s = getelementptr inbounds <2 x i32>, <2 x i32>* %p, i32 1 %v = load <2 x i32>, <2 x i32>* %s %v2 = zext <2 x i32> %v to <2 x i64> ret <2 x i64> %v2 } -; CHECK-LABEL: load_ext_v2i64_with_folded_gep_offset: -; NO-SIMD128-NOT: v128 -; SIMD128-NEXT: .functype load_ext_v2i64_with_folded_gep_offset (i32) -> (v128){{$}} -; SIMD128-NEXT: i64x2.load32x2_u $push[[R:[0-9]+]]=, 8($0){{$}} -; SIMD128-NEXT: return $pop[[R]]{{$}} define <2 x i32> @load_ext_v2i64_with_folded_gep_offset(<2 x i32>* %p) { +; CHECK-LABEL: load_ext_v2i64_with_folded_gep_offset: +; CHECK: .functype load_ext_v2i64_with_folded_gep_offset (i32) -> (v128) +; CHECK-NEXT: # %bb.0: +; CHECK-NEXT: local.get 0 +; CHECK-NEXT: i64x2.load32x2_u 8 +; CHECK-NEXT: # fallthrough-return %s = getelementptr inbounds <2 x i32>, <2 x i32>* %p, i32 1 %v = load <2 x i32>, <2 x i32>* %s ret <2 x i32> %v } -; CHECK-LABEL: load_v2i64_with_unfolded_gep_negative_offset: -; NO-SIMD128-NOT: v128 -; SIMD128-NEXT: .functype load_v2i64_with_unfolded_gep_negative_offset (i32) -> (v128){{$}} -; SIMD128-NEXT: i32.const $push[[L0:[0-9]+]]=, -16{{$}} -; SIMD128-NEXT: i32.add $push[[L1:[0-9]+]]=, $0, $pop[[L0]]{{$}} -; SIMD128-NEXT: v128.load $push[[R:[0-9]+]]=, 0($pop[[L1]]){{$}} -; SIMD128-NEXT: return $pop[[R]]{{$}} define <2 x i64> @load_v2i64_with_unfolded_gep_negative_offset(<2 x i64>* %p) { +; CHECK-LABEL: load_v2i64_with_unfolded_gep_negative_offset: +; CHECK: .functype load_v2i64_with_unfolded_gep_negative_offset (i32) -> (v128) +; CHECK-NEXT: # %bb.0: +; CHECK-NEXT: local.get 0 +; CHECK-NEXT: i32.const -16 +; CHECK-NEXT: i32.add +; CHECK-NEXT: v128.load 0 +; CHECK-NEXT: # fallthrough-return %s = getelementptr inbounds <2 x i64>, <2 x i64>* %p, i32 -1 %v = load <2 x i64>, <2 x i64>* %s ret <2 x i64> %v } -; CHECK-LABEL: load_splat_v2i64_with_unfolded_gep_negative_offset: -; NO-SIMD128-NOT: v128 -; SIMD128-NEXT: .functype load_splat_v2i64_with_unfolded_gep_negative_offset (i32) -> (v128){{$}} -; SIMD128-NEXT: i32.const $push[[L0:[0-9]+]]=, -8{{$}} -; SIMD128-NEXT: i32.add $push[[L1:[0-9]+]]=, $0, $pop[[L0]]{{$}} -; SIMD128-NEXT: v64x2.load_splat $push[[R:[0-9]+]]=, 0($pop[[L1]]){{$}} -; SIMD128-NEXT: return $pop[[R]]{{$}} define <2 x i64> @load_splat_v2i64_with_unfolded_gep_negative_offset(i64* %p) { +; CHECK-LABEL: load_splat_v2i64_with_unfolded_gep_negative_offset: +; CHECK: .functype load_splat_v2i64_with_unfolded_gep_negative_offset (i32) -> (v128) +; CHECK-NEXT: # %bb.0: +; CHECK-NEXT: local.get 0 +; CHECK-NEXT: i32.const -8 +; CHECK-NEXT: i32.add +; CHECK-NEXT: v64x2.load_splat 0 +; CHECK-NEXT: # fallthrough-return %s = getelementptr inbounds i64, i64* %p, i32 -1 %e = load i64, i64* %s %v1 = insertelement <2 x i64> undef, i64 %e, i32 0 @@ -1756,55 +1912,59 @@ define <2 x i64> @load_splat_v2i64_with_unfolded_gep_negative_offset(i64* %p) { ret <2 x i64> %v2 } -; CHECK-LABEL: load_sext_v2i64_with_unfolded_gep_negative_offset: -; NO-SIMD128-NOT: v128 -; SIMD128-NEXT: .functype load_sext_v2i64_with_unfolded_gep_negative_offset (i32) -> (v128){{$}} -; SIMD128-NEXT: i32.const $push[[L0:[0-9]+]]=, -8{{$}} -; SIMD128-NEXT: i32.add $push[[L1:[0-9]+]]=, $0, $pop[[L0]]{{$}} -; SIMD128-NEXT: i64x2.load32x2_s $push[[R:[0-9]+]]=, 0($pop[[L1]]){{$}} -; SIMD128-NEXT: return $pop[[R]]{{$}} define <2 x i64> @load_sext_v2i64_with_unfolded_gep_negative_offset(<2 x i32>* %p) { +; CHECK-LABEL: load_sext_v2i64_with_unfolded_gep_negative_offset: +; CHECK: .functype load_sext_v2i64_with_unfolded_gep_negative_offset (i32) -> (v128) +; CHECK-NEXT: # %bb.0: +; CHECK-NEXT: local.get 0 +; CHECK-NEXT: i32.const -8 +; CHECK-NEXT: i32.add +; CHECK-NEXT: i64x2.load32x2_s 0 +; CHECK-NEXT: # fallthrough-return %s = getelementptr inbounds <2 x i32>, <2 x i32>* %p, i32 -1 %v = load <2 x i32>, <2 x i32>* %s %v2 = sext <2 x i32> %v to <2 x i64> ret <2 x i64> %v2 } -; CHECK-LABEL: load_zext_v2i64_with_unfolded_gep_negative_offset: -; NO-SIMD128-NOT: v128 -; SIMD128-NEXT: .functype load_zext_v2i64_with_unfolded_gep_negative_offset (i32) -> (v128){{$}} -; SIMD128-NEXT: i32.const $push[[L0:[0-9]+]]=, -8{{$}} -; SIMD128-NEXT: i32.add $push[[L1:[0-9]+]]=, $0, $pop[[L0]]{{$}} -; SIMD128-NEXT: i64x2.load32x2_u $push[[R:[0-9]+]]=, 0($pop[[L1]]){{$}} -; SIMD128-NEXT: return $pop[[R]]{{$}} define <2 x i64> @load_zext_v2i64_with_unfolded_gep_negative_offset(<2 x i32>* %p) { +; CHECK-LABEL: load_zext_v2i64_with_unfolded_gep_negative_offset: +; CHECK: .functype load_zext_v2i64_with_unfolded_gep_negative_offset (i32) -> (v128) +; CHECK-NEXT: # %bb.0: +; CHECK-NEXT: local.get 0 +; CHECK-NEXT: i32.const -8 +; CHECK-NEXT: i32.add +; CHECK-NEXT: i64x2.load32x2_u 0 +; CHECK-NEXT: # fallthrough-return %s = getelementptr inbounds <2 x i32>, <2 x i32>* %p, i32 -1 %v = load <2 x i32>, <2 x i32>* %s %v2 = zext <2 x i32> %v to <2 x i64> ret <2 x i64> %v2 } -; CHECK-LABEL: load_ext_v2i64_with_unfolded_gep_negative_offset: -; NO-SIMD128-NOT: v128 -; SIMD128-NEXT: .functype load_ext_v2i64_with_unfolded_gep_negative_offset (i32) -> (v128){{$}} -; SIMD128-NEXT: i32.const $push[[L0:[0-9]+]]=, -8{{$}} -; SIMD128-NEXT: i32.add $push[[L1:[0-9]+]]=, $0, $pop[[L0]]{{$}} -; SIMD128-NEXT: i64x2.load32x2_u $push[[R:[0-9]+]]=, 0($pop[[L1]]){{$}} -; SIMD128-NEXT: return $pop[[R]]{{$}} define <2 x i32> @load_ext_v2i64_with_unfolded_gep_negative_offset(<2 x i32>* %p) { +; CHECK-LABEL: load_ext_v2i64_with_unfolded_gep_negative_offset: +; CHECK: .functype load_ext_v2i64_with_unfolded_gep_negative_offset (i32) -> (v128) +; CHECK-NEXT: # %bb.0: +; CHECK-NEXT: local.get 0 +; CHECK-NEXT: i32.const -8 +; CHECK-NEXT: i32.add +; CHECK-NEXT: i64x2.load32x2_u 0 +; CHECK-NEXT: # fallthrough-return %s = getelementptr inbounds <2 x i32>, <2 x i32>* %p, i32 -1 %v = load <2 x i32>, <2 x i32>* %s ret <2 x i32> %v } -; CHECK-LABEL: load_v2i64_with_unfolded_offset: -; NO-SIMD128-NOT: v128 -; SIMD128-NEXT: .functype load_v2i64_with_unfolded_offset (i32) -> (v128){{$}} -; SIMD128-NEXT: i32.const $push[[L0:[0-9]+]]=, 16{{$}} -; SIMD128-NEXT: i32.add $push[[L1:[0-9]+]]=, $0, $pop[[L0]]{{$}} -; SIMD128-NEXT: v128.load $push[[R:[0-9]+]]=, 0($pop[[L1]]){{$}} -; SIMD128-NEXT: return $pop[[R]]{{$}} define <2 x i64> @load_v2i64_with_unfolded_offset(<2 x i64>* %p) { +; CHECK-LABEL: load_v2i64_with_unfolded_offset: +; CHECK: .functype load_v2i64_with_unfolded_offset (i32) -> (v128) +; CHECK-NEXT: # %bb.0: +; CHECK-NEXT: local.get 0 +; CHECK-NEXT: i32.const 16 +; CHECK-NEXT: i32.add +; CHECK-NEXT: v128.load 0 +; CHECK-NEXT: # fallthrough-return %q = ptrtoint <2 x i64>* %p to i32 %r = add nsw i32 %q, 16 %s = inttoptr i32 %r to <2 x i64>* @@ -1812,14 +1972,15 @@ define <2 x i64> @load_v2i64_with_unfolded_offset(<2 x i64>* %p) { ret <2 x i64> %v } -; CHECK-LABEL: load_splat_v2i64_with_unfolded_offset: -; NO-SIMD128-NOT: v128 -; SIMD128-NEXT: .functype load_splat_v2i64_with_unfolded_offset (i32) -> (v128){{$}} -; SIMD128-NEXT: i32.const $push[[L0:[0-9]+]]=, 16{{$}} -; SIMD128-NEXT: i32.add $push[[L1:[0-9]+]]=, $0, $pop[[L0]]{{$}} -; SIMD128-NEXT: v64x2.load_splat $push[[R:[0-9]+]]=, 0($pop[[L1]]){{$}} -; SIMD128-NEXT: return $pop[[R]]{{$}} define <2 x i64> @load_splat_v2i64_with_unfolded_offset(i64* %p) { +; CHECK-LABEL: load_splat_v2i64_with_unfolded_offset: +; CHECK: .functype load_splat_v2i64_with_unfolded_offset (i32) -> (v128) +; CHECK-NEXT: # %bb.0: +; CHECK-NEXT: local.get 0 +; CHECK-NEXT: i32.const 16 +; CHECK-NEXT: i32.add +; CHECK-NEXT: v64x2.load_splat 0 +; CHECK-NEXT: # fallthrough-return %q = ptrtoint i64* %p to i32 %r = add nsw i32 %q, 16 %s = inttoptr i32 %r to i64* @@ -1829,14 +1990,15 @@ define <2 x i64> @load_splat_v2i64_with_unfolded_offset(i64* %p) { ret <2 x i64> %v2 } -; CHECK-LABEL: load_sext_v2i64_with_unfolded_offset: -; NO-SIMD128-NOT: v128 -; SIMD128-NEXT: .functype load_sext_v2i64_with_unfolded_offset (i32) -> (v128){{$}} -; SIMD128-NEXT: i32.const $push[[L0:[0-9]+]]=, 16{{$}} -; SIMD128-NEXT: i32.add $push[[L1:[0-9]+]]=, $0, $pop[[L0]]{{$}} -; SIMD128-NEXT: i64x2.load32x2_s $push[[R:[0-9]+]]=, 0($pop[[L1]]){{$}} -; SIMD128-NEXT: return $pop[[R]]{{$}} define <2 x i64> @load_sext_v2i64_with_unfolded_offset(<2 x i32>* %p) { +; CHECK-LABEL: load_sext_v2i64_with_unfolded_offset: +; CHECK: .functype load_sext_v2i64_with_unfolded_offset (i32) -> (v128) +; CHECK-NEXT: # %bb.0: +; CHECK-NEXT: local.get 0 +; CHECK-NEXT: i32.const 16 +; CHECK-NEXT: i32.add +; CHECK-NEXT: i64x2.load32x2_s 0 +; CHECK-NEXT: # fallthrough-return %q = ptrtoint <2 x i32>* %p to i32 %r = add nsw i32 %q, 16 %s = inttoptr i32 %r to <2 x i32>* @@ -1845,14 +2007,15 @@ define <2 x i64> @load_sext_v2i64_with_unfolded_offset(<2 x i32>* %p) { ret <2 x i64> %v2 } -; CHECK-LABEL: load_zext_v2i64_with_unfolded_offset: -; NO-SIMD128-NOT: v128 -; SIMD128-NEXT: .functype load_zext_v2i64_with_unfolded_offset (i32) -> (v128){{$}} -; SIMD128-NEXT: i32.const $push[[L0:[0-9]+]]=, 16{{$}} -; SIMD128-NEXT: i32.add $push[[L1:[0-9]+]]=, $0, $pop[[L0]]{{$}} -; SIMD128-NEXT: i64x2.load32x2_u $push[[R:[0-9]+]]=, 0($pop[[L1]]){{$}} -; SIMD128-NEXT: return $pop[[R]]{{$}} define <2 x i64> @load_zext_v2i64_with_unfolded_offset(<2 x i32>* %p) { +; CHECK-LABEL: load_zext_v2i64_with_unfolded_offset: +; CHECK: .functype load_zext_v2i64_with_unfolded_offset (i32) -> (v128) +; CHECK-NEXT: # %bb.0: +; CHECK-NEXT: local.get 0 +; CHECK-NEXT: i32.const 16 +; CHECK-NEXT: i32.add +; CHECK-NEXT: i64x2.load32x2_u 0 +; CHECK-NEXT: # fallthrough-return %q = ptrtoint <2 x i32>* %p to i32 %r = add nsw i32 %q, 16 %s = inttoptr i32 %r to <2 x i32>* @@ -1861,14 +2024,15 @@ define <2 x i64> @load_zext_v2i64_with_unfolded_offset(<2 x i32>* %p) { ret <2 x i64> %v2 } -; CHECK-LABEL: load_ext_v2i64_with_unfolded_offset: -; NO-SIMD128-NOT: v128 -; SIMD128-NEXT: .functype load_ext_v2i64_with_unfolded_offset (i32) -> (v128){{$}} -; SIMD128-NEXT: i32.const $push[[L0:[0-9]+]]=, 16{{$}} -; SIMD128-NEXT: i32.add $push[[L1:[0-9]+]]=, $0, $pop[[L0]]{{$}} -; SIMD128-NEXT: i64x2.load32x2_u $push[[R:[0-9]+]]=, 0($pop[[L1]]){{$}} -; SIMD128-NEXT: return $pop[[R]]{{$}} define <2 x i32> @load_ext_v2i64_with_unfolded_offset(<2 x i32>* %p) { +; CHECK-LABEL: load_ext_v2i64_with_unfolded_offset: +; CHECK: .functype load_ext_v2i64_with_unfolded_offset (i32) -> (v128) +; CHECK-NEXT: # %bb.0: +; CHECK-NEXT: local.get 0 +; CHECK-NEXT: i32.const 16 +; CHECK-NEXT: i32.add +; CHECK-NEXT: i64x2.load32x2_u 0 +; CHECK-NEXT: # fallthrough-return %q = ptrtoint <2 x i32>* %p to i32 %r = add nsw i32 %q, 16 %s = inttoptr i32 %r to <2 x i32>* @@ -1876,27 +2040,29 @@ define <2 x i32> @load_ext_v2i64_with_unfolded_offset(<2 x i32>* %p) { ret <2 x i32> %v } -; CHECK-LABEL: load_v2i64_with_unfolded_gep_offset: -; NO-SIMD128-NOT: v128 -; SIMD128-NEXT: .functype load_v2i64_with_unfolded_gep_offset (i32) -> (v128){{$}} -; SIMD128-NEXT: i32.const $push[[L0:[0-9]+]]=, 16{{$}} -; SIMD128-NEXT: i32.add $push[[L1:[0-9]+]]=, $0, $pop[[L0]]{{$}} -; SIMD128-NEXT: v128.load $push[[R:[0-9]+]]=, 0($pop[[L1]]){{$}} -; SIMD128-NEXT: return $pop[[R]]{{$}} define <2 x i64> @load_v2i64_with_unfolded_gep_offset(<2 x i64>* %p) { +; CHECK-LABEL: load_v2i64_with_unfolded_gep_offset: +; CHECK: .functype load_v2i64_with_unfolded_gep_offset (i32) -> (v128) +; CHECK-NEXT: # %bb.0: +; CHECK-NEXT: local.get 0 +; CHECK-NEXT: i32.const 16 +; CHECK-NEXT: i32.add +; CHECK-NEXT: v128.load 0 +; CHECK-NEXT: # fallthrough-return %s = getelementptr <2 x i64>, <2 x i64>* %p, i32 1 %v = load <2 x i64>, <2 x i64>* %s ret <2 x i64> %v } -; CHECK-LABEL: load_splat_v2i64_with_unfolded_gep_offset: -; NO-SIMD128-NOT: v128 -; SIMD128-NEXT: .functype load_splat_v2i64_with_unfolded_gep_offset (i32) -> (v128){{$}} -; SIMD128-NEXT: i32.const $push[[L0:[0-9]+]]=, 8{{$}} -; SIMD128-NEXT: i32.add $push[[L1:[0-9]+]]=, $0, $pop[[L0]]{{$}} -; SIMD128-NEXT: v64x2.load_splat $push[[R:[0-9]+]]=, 0($pop[[L1]]){{$}} -; SIMD128-NEXT: return $pop[[R]]{{$}} define <2 x i64> @load_splat_v2i64_with_unfolded_gep_offset(i64* %p) { +; CHECK-LABEL: load_splat_v2i64_with_unfolded_gep_offset: +; CHECK: .functype load_splat_v2i64_with_unfolded_gep_offset (i32) -> (v128) +; CHECK-NEXT: # %bb.0: +; CHECK-NEXT: local.get 0 +; CHECK-NEXT: i32.const 8 +; CHECK-NEXT: i32.add +; CHECK-NEXT: v64x2.load_splat 0 +; CHECK-NEXT: # fallthrough-return %s = getelementptr i64, i64* %p, i32 1 %e = load i64, i64* %s %v1 = insertelement <2 x i64> undef, i64 %e, i32 0 @@ -1904,66 +2070,69 @@ define <2 x i64> @load_splat_v2i64_with_unfolded_gep_offset(i64* %p) { ret <2 x i64> %v2 } -; CHECK-LABEL: load_sext_v2i64_with_unfolded_gep_offset: -; NO-SIMD128-NOT: v128 -; SIMD128-NEXT: .functype load_sext_v2i64_with_unfolded_gep_offset (i32) -> (v128){{$}} -; SIMD128-NEXT: i32.const $push[[L0:[0-9]+]]=, 8{{$}} -; SIMD128-NEXT: i32.add $push[[L1:[0-9]+]]=, $0, $pop[[L0]]{{$}} -; SIMD128-NEXT: i64x2.load32x2_s $push[[R:[0-9]+]]=, 0($pop[[L1]]){{$}} -; SIMD128-NEXT: return $pop[[R]]{{$}} define <2 x i64> @load_sext_v2i64_with_unfolded_gep_offset(<2 x i32>* %p) { +; CHECK-LABEL: load_sext_v2i64_with_unfolded_gep_offset: +; CHECK: .functype load_sext_v2i64_with_unfolded_gep_offset (i32) -> (v128) +; CHECK-NEXT: # %bb.0: +; CHECK-NEXT: local.get 0 +; CHECK-NEXT: i32.const 8 +; CHECK-NEXT: i32.add +; CHECK-NEXT: i64x2.load32x2_s 0 +; CHECK-NEXT: # fallthrough-return %s = getelementptr <2 x i32>, <2 x i32>* %p, i32 1 %v = load <2 x i32>, <2 x i32>* %s %v2 = sext <2 x i32> %v to <2 x i64> ret <2 x i64> %v2 } -; CHECK-LABEL: load_zext_v2i64_with_unfolded_gep_offset: -; NO-SIMD128-NOT: v128 -; SIMD128-NEXT: .functype load_zext_v2i64_with_unfolded_gep_offset (i32) -> (v128){{$}} -; SIMD128-NEXT: i32.const $push[[L0:[0-9]+]]=, 8{{$}} -; SIMD128-NEXT: i32.add $push[[L1:[0-9]+]]=, $0, $pop[[L0]]{{$}} -; SIMD128-NEXT: i64x2.load32x2_u $push[[R:[0-9]+]]=, 0($pop[[L1]]){{$}} -; SIMD128-NEXT: return $pop[[R]]{{$}} define <2 x i64> @load_zext_v2i64_with_unfolded_gep_offset(<2 x i32>* %p) { +; CHECK-LABEL: load_zext_v2i64_with_unfolded_gep_offset: +; CHECK: .functype load_zext_v2i64_with_unfolded_gep_offset (i32) -> (v128) +; CHECK-NEXT: # %bb.0: +; CHECK-NEXT: local.get 0 +; CHECK-NEXT: i32.const 8 +; CHECK-NEXT: i32.add +; CHECK-NEXT: i64x2.load32x2_u 0 +; CHECK-NEXT: # fallthrough-return %s = getelementptr <2 x i32>, <2 x i32>* %p, i32 1 %v = load <2 x i32>, <2 x i32>* %s %v2 = zext <2 x i32> %v to <2 x i64> ret <2 x i64> %v2 } -; CHECK-LABEL: load_ext_v2i64_with_unfolded_gep_offset: -; NO-SIMD128-NOT: v128 -; SIMD128-NEXT: .functype load_ext_v2i64_with_unfolded_gep_offset (i32) -> (v128){{$}} -; SIMD128-NEXT: i32.const $push[[L0:[0-9]+]]=, 8{{$}} -; SIMD128-NEXT: i32.add $push[[L1:[0-9]+]]=, $0, $pop[[L0]]{{$}} -; SIMD128-NEXT: i64x2.load32x2_u $push[[R:[0-9]+]]=, 0($pop[[L1]]){{$}} -; SIMD128-NEXT: return $pop[[R]]{{$}} define <2 x i32> @load_ext_v2i64_with_unfolded_gep_offset(<2 x i32>* %p) { +; CHECK-LABEL: load_ext_v2i64_with_unfolded_gep_offset: +; CHECK: .functype load_ext_v2i64_with_unfolded_gep_offset (i32) -> (v128) +; CHECK-NEXT: # %bb.0: +; CHECK-NEXT: local.get 0 +; CHECK-NEXT: i32.const 8 +; CHECK-NEXT: i32.add +; CHECK-NEXT: i64x2.load32x2_u 0 +; CHECK-NEXT: # fallthrough-return %s = getelementptr <2 x i32>, <2 x i32>* %p, i32 1 %v = load <2 x i32>, <2 x i32>* %s ret <2 x i32> %v } -; CHECK-LABEL: load_v2i64_from_numeric_address: -; NO-SIMD128-NOT: v128 -; SIMD128-NEXT: .functype load_v2i64_from_numeric_address () -> (v128){{$}} -; SIMD128-NEXT: i32.const $push[[L0:[0-9]+]]=, 0{{$}} -; SIMD128-NEXT: v128.load $push[[R:[0-9]+]]=, 32($pop[[L0]]){{$}} -; SIMD128-NEXT: return $pop[[R]]{{$}} define <2 x i64> @load_v2i64_from_numeric_address() { +; CHECK-LABEL: load_v2i64_from_numeric_address: +; CHECK: .functype load_v2i64_from_numeric_address () -> (v128) +; CHECK-NEXT: # %bb.0: +; CHECK-NEXT: i32.const 0 +; CHECK-NEXT: v128.load 32 +; CHECK-NEXT: # fallthrough-return %s = inttoptr i32 32 to <2 x i64>* %v = load <2 x i64>, <2 x i64>* %s ret <2 x i64> %v } -; CHECK-LABEL: load_splat_v2i64_from_numeric_address: -; NO-SIMD128-NOT: v128 -; SIMD128-NEXT: .functype load_splat_v2i64_from_numeric_address () -> (v128){{$}} -; SIMD128-NEXT: i32.const $push[[L0:[0-9]+]]=, 0{{$}} -; SIMD128-NEXT: v64x2.load_splat $push[[R:[0-9]+]]=, 32($pop[[L0]]){{$}} -; SIMD128-NEXT: return $pop[[R]]{{$}} define <2 x i64> @load_splat_v2i64_from_numeric_address() { +; CHECK-LABEL: load_splat_v2i64_from_numeric_address: +; CHECK: .functype load_splat_v2i64_from_numeric_address () -> (v128) +; CHECK-NEXT: # %bb.0: +; CHECK-NEXT: i32.const 0 +; CHECK-NEXT: v64x2.load_splat 32 +; CHECK-NEXT: # fallthrough-return %s = inttoptr i32 32 to i64* %e = load i64, i64* %s %v1 = insertelement <2 x i64> undef, i64 %e, i32 0 @@ -1971,120 +2140,126 @@ define <2 x i64> @load_splat_v2i64_from_numeric_address() { ret <2 x i64> %v2 } -; CHECK-LABEL: load_sext_v2i64_from_numeric_address: -; NO-SIMD128-NOT: v128 -; SIMD128-NEXT: .functype load_sext_v2i64_from_numeric_address () -> (v128){{$}} -; SIMD128-NEXT: i32.const $push[[L0:[0-9]+]]=, 0{{$}} -; SIMD128-NEXT: i64x2.load32x2_s $push[[R:[0-9]+]]=, 32($pop[[L0]]){{$}} -; SIMD128-NEXT: return $pop[[R]]{{$}} define <2 x i64> @load_sext_v2i64_from_numeric_address() { +; CHECK-LABEL: load_sext_v2i64_from_numeric_address: +; CHECK: .functype load_sext_v2i64_from_numeric_address () -> (v128) +; CHECK-NEXT: # %bb.0: +; CHECK-NEXT: i32.const 0 +; CHECK-NEXT: i64x2.load32x2_s 32 +; CHECK-NEXT: # fallthrough-return %s = inttoptr i32 32 to <2 x i32>* %v = load <2 x i32>, <2 x i32>* %s %v2 = sext <2 x i32> %v to <2 x i64> ret <2 x i64> %v2 } -; CHECK-LABEL: load_zext_v2i64_from_numeric_address: -; NO-SIMD128-NOT: v128 -; SIMD128-NEXT: .functype load_zext_v2i64_from_numeric_address () -> (v128){{$}} -; SIMD128-NEXT: i32.const $push[[L0:[0-9]+]]=, 0{{$}} -; SIMD128-NEXT: i64x2.load32x2_u $push[[R:[0-9]+]]=, 32($pop[[L0]]){{$}} -; SIMD128-NEXT: return $pop[[R]]{{$}} define <2 x i64> @load_zext_v2i64_from_numeric_address() { +; CHECK-LABEL: load_zext_v2i64_from_numeric_address: +; CHECK: .functype load_zext_v2i64_from_numeric_address () -> (v128) +; CHECK-NEXT: # %bb.0: +; CHECK-NEXT: i32.const 0 +; CHECK-NEXT: i64x2.load32x2_u 32 +; CHECK-NEXT: # fallthrough-return %s = inttoptr i32 32 to <2 x i32>* %v = load <2 x i32>, <2 x i32>* %s %v2 = zext <2 x i32> %v to <2 x i64> ret <2 x i64> %v2 } -; CHECK-LABEL: load_ext_v2i64_from_numeric_address: -; NO-SIMD128-NOT: v128 -; SIMD128-NEXT: .functype load_ext_v2i64_from_numeric_address () -> (v128){{$}} -; SIMD128-NEXT: i32.const $push[[L0:[0-9]+]]=, 0{{$}} -; SIMD128-NEXT: i64x2.load32x2_u $push[[R:[0-9]+]]=, 32($pop[[L0]]){{$}} -; SIMD128-NEXT: return $pop[[R]]{{$}} define <2 x i32> @load_ext_v2i64_from_numeric_address() { +; CHECK-LABEL: load_ext_v2i64_from_numeric_address: +; CHECK: .functype load_ext_v2i64_from_numeric_address () -> (v128) +; CHECK-NEXT: # %bb.0: +; CHECK-NEXT: i32.const 0 +; CHECK-NEXT: i64x2.load32x2_u 32 +; CHECK-NEXT: # fallthrough-return %s = inttoptr i32 32 to <2 x i32>* %v = load <2 x i32>, <2 x i32>* %s ret <2 x i32> %v } -; CHECK-LABEL: load_v2i64_from_global_address: -; NO-SIMD128-NOT: v128 -; SIMD128-NEXT: .functype load_v2i64_from_global_address () -> (v128){{$}} -; SIMD128-NEXT: i32.const $push[[L0:[0-9]+]]=, 0{{$}} -; SIMD128-NEXT: v128.load $push[[R:[0-9]+]]=, gv_v2i64($pop[[L0]]){{$}} -; SIMD128-NEXT: return $pop[[R]]{{$}} @gv_v2i64 = global <2 x i64> define <2 x i64> @load_v2i64_from_global_address() { +; CHECK-LABEL: load_v2i64_from_global_address: +; CHECK: .functype load_v2i64_from_global_address () -> (v128) +; CHECK-NEXT: # %bb.0: +; CHECK-NEXT: i32.const 0 +; CHECK-NEXT: v128.load gv_v2i64 +; CHECK-NEXT: # fallthrough-return %v = load <2 x i64>, <2 x i64>* @gv_v2i64 ret <2 x i64> %v } -; CHECK-LABEL: load_splat_v2i64_from_global_address: -; NO-SIMD128-NOT: v128 -; SIMD128-NEXT: .functype load_splat_v2i64_from_global_address () -> (v128){{$}} -; SIMD128-NEXT: i32.const $push[[L0:[0-9]+]]=, 0{{$}} -; SIMD128-NEXT: v64x2.load_splat $push[[R:[0-9]+]]=, gv_i64($pop[[L0]]){{$}} -; SIMD128-NEXT: return $pop[[R]]{{$}} @gv_i64 = global i64 42 define <2 x i64> @load_splat_v2i64_from_global_address() { +; CHECK-LABEL: load_splat_v2i64_from_global_address: +; CHECK: .functype load_splat_v2i64_from_global_address () -> (v128) +; CHECK-NEXT: # %bb.0: +; CHECK-NEXT: i32.const 0 +; CHECK-NEXT: v64x2.load_splat gv_i64 +; CHECK-NEXT: # fallthrough-return %e = load i64, i64* @gv_i64 %v1 = insertelement <2 x i64> undef, i64 %e, i32 0 %v2 = shufflevector <2 x i64> %v1, <2 x i64> undef, <2 x i32> zeroinitializer ret <2 x i64> %v2 } -; CHECK-LABEL: load_sext_v2i64_from_global_address: -; NO-SIMD128-NOT: v128 -; SIMD128-NEXT: .functype load_sext_v2i64_from_global_address () -> (v128){{$}} -; SIMD128-NEXT: i32.const $push[[L0:[0-9]+]]=, 0{{$}} -; SIMD128-NEXT: i64x2.load32x2_s $push[[R:[0-9]+]]=, gv_v2i32($pop[[L0]]){{$}} -; SIMD128-NEXT: return $pop[[R]]{{$}} @gv_v2i32 = global <2 x i32> define <2 x i64> @load_sext_v2i64_from_global_address() { +; CHECK-LABEL: load_sext_v2i64_from_global_address: +; CHECK: .functype load_sext_v2i64_from_global_address () -> (v128) +; CHECK-NEXT: # %bb.0: +; CHECK-NEXT: i32.const 0 +; CHECK-NEXT: i64x2.load32x2_s gv_v2i32 +; CHECK-NEXT: # fallthrough-return %v = load <2 x i32>, <2 x i32>* @gv_v2i32 %v2 = sext <2 x i32> %v to <2 x i64> ret <2 x i64> %v2 } -; CHECK-LABEL: load_zext_v2i64_from_global_address: -; NO-SIMD128-NOT: v128 -; SIMD128-NEXT: .functype load_zext_v2i64_from_global_address () -> (v128){{$}} -; SIMD128-NEXT: i32.const $push[[L0:[0-9]+]]=, 0{{$}} -; SIMD128-NEXT: i64x2.load32x2_u $push[[R:[0-9]+]]=, gv_v2i32($pop[[L0]]){{$}} -; SIMD128-NEXT: return $pop[[R]]{{$}} define <2 x i64> @load_zext_v2i64_from_global_address() { +; CHECK-LABEL: load_zext_v2i64_from_global_address: +; CHECK: .functype load_zext_v2i64_from_global_address () -> (v128) +; CHECK-NEXT: # %bb.0: +; CHECK-NEXT: i32.const 0 +; CHECK-NEXT: i64x2.load32x2_u gv_v2i32 +; CHECK-NEXT: # fallthrough-return %v = load <2 x i32>, <2 x i32>* @gv_v2i32 %v2 = zext <2 x i32> %v to <2 x i64> ret <2 x i64> %v2 } -; CHECK-LABEL: load_ext_v2i64_from_global_address: -; NO-SIMD128-NOT: v128 -; SIMD128-NEXT: .functype load_ext_v2i64_from_global_address () -> (v128){{$}} -; SIMD128-NEXT: i32.const $push[[L0:[0-9]+]]=, 0{{$}} -; SIMD128-NEXT: i64x2.load32x2_u $push[[R:[0-9]+]]=, gv_v2i32($pop[[L0]]){{$}} -; SIMD128-NEXT: return $pop[[R]]{{$}} define <2 x i32> @load_ext_v2i64_from_global_address() { +; CHECK-LABEL: load_ext_v2i64_from_global_address: +; CHECK: .functype load_ext_v2i64_from_global_address () -> (v128) +; CHECK-NEXT: # %bb.0: +; CHECK-NEXT: i32.const 0 +; CHECK-NEXT: i64x2.load32x2_u gv_v2i32 +; CHECK-NEXT: # fallthrough-return %v = load <2 x i32>, <2 x i32>* @gv_v2i32 ret <2 x i32> %v } -; CHECK-LABEL: store_v2i64: -; NO-SIMD128-NOT: v128 -; SIMD128-NEXT: .functype store_v2i64 (v128, i32) -> (){{$}} -; SIMD128-NEXT: v128.store 0($1), $0{{$}} define void @store_v2i64(<2 x i64> %v, <2 x i64>* %p) { +; CHECK-LABEL: store_v2i64: +; CHECK: .functype store_v2i64 (v128, i32) -> () +; CHECK-NEXT: # %bb.0: +; CHECK-NEXT: local.get 1 +; CHECK-NEXT: local.get 0 +; CHECK-NEXT: v128.store 0 +; CHECK-NEXT: # fallthrough-return store <2 x i64> %v , <2 x i64>* %p ret void } -; CHECK-LABEL: store_v2i64_with_folded_offset: -; NO-SIMD128-NOT: v128 -; SIMD128-NEXT: .functype store_v2i64_with_folded_offset (v128, i32) -> (){{$}} -; SIMD128-NEXT: v128.store 16($1), $0{{$}} define void @store_v2i64_with_folded_offset(<2 x i64> %v, <2 x i64>* %p) { +; CHECK-LABEL: store_v2i64_with_folded_offset: +; CHECK: .functype store_v2i64_with_folded_offset (v128, i32) -> () +; CHECK-NEXT: # %bb.0: +; CHECK-NEXT: local.get 1 +; CHECK-NEXT: local.get 0 +; CHECK-NEXT: v128.store 16 +; CHECK-NEXT: # fallthrough-return %q = ptrtoint <2 x i64>* %p to i32 %r = add nuw i32 %q, 16 %s = inttoptr i32 %r to <2 x i64>* @@ -2092,69 +2267,85 @@ define void @store_v2i64_with_folded_offset(<2 x i64> %v, <2 x i64>* %p) { ret void } -; CHECK-LABEL: store_v2i64_with_folded_gep_offset: -; NO-SIMD128-NOT: v128 -; SIMD128-NEXT: .functype store_v2i64_with_folded_gep_offset (v128, i32) -> (){{$}} -; SIMD128-NEXT: v128.store 16($1), $0{{$}} define void @store_v2i64_with_folded_gep_offset(<2 x i64> %v, <2 x i64>* %p) { +; CHECK-LABEL: store_v2i64_with_folded_gep_offset: +; CHECK: .functype store_v2i64_with_folded_gep_offset (v128, i32) -> () +; CHECK-NEXT: # %bb.0: +; CHECK-NEXT: local.get 1 +; CHECK-NEXT: local.get 0 +; CHECK-NEXT: v128.store 16 +; CHECK-NEXT: # fallthrough-return %s = getelementptr inbounds <2 x i64>, <2 x i64>* %p, i32 1 store <2 x i64> %v , <2 x i64>* %s ret void } -; CHECK-LABEL: store_v2i64_with_unfolded_gep_negative_offset: -; NO-SIMD128-NOT: v128 -; SIMD128-NEXT: .functype store_v2i64_with_unfolded_gep_negative_offset (v128, i32) -> (){{$}} -; SIMD128-NEXT: i32.const $push[[L0:[0-9]+]]=, -16{{$}} -; SIMD128-NEXT: i32.add $push[[R:[0-9]+]]=, $1, $pop[[L0]]{{$}} -; SIMD128-NEXT: v128.store 0($pop[[R]]), $0{{$}} define void @store_v2i64_with_unfolded_gep_negative_offset(<2 x i64> %v, <2 x i64>* %p) { +; CHECK-LABEL: store_v2i64_with_unfolded_gep_negative_offset: +; CHECK: .functype store_v2i64_with_unfolded_gep_negative_offset (v128, i32) -> () +; CHECK-NEXT: # %bb.0: +; CHECK-NEXT: local.get 1 +; CHECK-NEXT: i32.const -16 +; CHECK-NEXT: i32.add +; CHECK-NEXT: local.get 0 +; CHECK-NEXT: v128.store 0 +; CHECK-NEXT: # fallthrough-return %s = getelementptr inbounds <2 x i64>, <2 x i64>* %p, i32 -1 store <2 x i64> %v , <2 x i64>* %s ret void } -; CHECK-LABEL: store_v2i64_with_unfolded_offset: -; NO-SIMD128-NOT: v128 -; SIMD128-NEXT: .functype store_v2i64_with_unfolded_offset (v128, i32) -> (){{$}} -; SIMD128-NEXT: i32.const $push[[L0:[0-9]+]]=, -16{{$}} -; SIMD128-NEXT: i32.add $push[[R:[0-9]+]]=, $1, $pop[[L0]]{{$}} -; SIMD128-NEXT: v128.store 0($pop[[R]]), $0{{$}} define void @store_v2i64_with_unfolded_offset(<2 x i64> %v, <2 x i64>* %p) { +; CHECK-LABEL: store_v2i64_with_unfolded_offset: +; CHECK: .functype store_v2i64_with_unfolded_offset (v128, i32) -> () +; CHECK-NEXT: # %bb.0: +; CHECK-NEXT: local.get 1 +; CHECK-NEXT: i32.const -16 +; CHECK-NEXT: i32.add +; CHECK-NEXT: local.get 0 +; CHECK-NEXT: v128.store 0 +; CHECK-NEXT: # fallthrough-return %s = getelementptr inbounds <2 x i64>, <2 x i64>* %p, i32 -1 store <2 x i64> %v , <2 x i64>* %s ret void } -; CHECK-LABEL: store_v2i64_with_unfolded_gep_offset: -; NO-SIMD128-NOT: v128 -; SIMD128-NEXT: .functype store_v2i64_with_unfolded_gep_offset (v128, i32) -> (){{$}} -; SIMD128-NEXT: i32.const $push[[L0:[0-9]+]]=, 16{{$}} -; SIMD128-NEXT: i32.add $push[[R:[0-9]+]]=, $1, $pop[[L0]]{{$}} -; SIMD128-NEXT: v128.store 0($pop[[R]]), $0{{$}} define void @store_v2i64_with_unfolded_gep_offset(<2 x i64> %v, <2 x i64>* %p) { +; CHECK-LABEL: store_v2i64_with_unfolded_gep_offset: +; CHECK: .functype store_v2i64_with_unfolded_gep_offset (v128, i32) -> () +; CHECK-NEXT: # %bb.0: +; CHECK-NEXT: local.get 1 +; CHECK-NEXT: i32.const 16 +; CHECK-NEXT: i32.add +; CHECK-NEXT: local.get 0 +; CHECK-NEXT: v128.store 0 +; CHECK-NEXT: # fallthrough-return %s = getelementptr <2 x i64>, <2 x i64>* %p, i32 1 store <2 x i64> %v , <2 x i64>* %s ret void } -; CHECK-LABEL: store_v2i64_to_numeric_address: -; NO-SIMD128-NOT: v128 -; SIMD128-NEXT: .functype store_v2i64_to_numeric_address (v128) -> (){{$}} -; SIMD128-NEXT: i32.const $push[[L0:[0-9]+]]=, 0{{$}} -; SIMD128-NEXT: v128.store 32($pop[[L0]]), $0{{$}} define void @store_v2i64_to_numeric_address(<2 x i64> %v) { +; CHECK-LABEL: store_v2i64_to_numeric_address: +; CHECK: .functype store_v2i64_to_numeric_address (v128) -> () +; CHECK-NEXT: # %bb.0: +; CHECK-NEXT: i32.const 0 +; CHECK-NEXT: local.get 0 +; CHECK-NEXT: v128.store 32 +; CHECK-NEXT: # fallthrough-return %s = inttoptr i32 32 to <2 x i64>* store <2 x i64> %v , <2 x i64>* %s ret void } -; CHECK-LABEL: store_v2i64_to_global_address: -; NO-SIMD128-NOT: v128 -; SIMD128-NEXT: .functype store_v2i64_to_global_address (v128) -> (){{$}} -; SIMD128-NEXT: i32.const $push[[R:[0-9]+]]=, 0{{$}} -; SIMD128-NEXT: v128.store gv_v2i64($pop[[R]]), $0{{$}} define void @store_v2i64_to_global_address(<2 x i64> %v) { +; CHECK-LABEL: store_v2i64_to_global_address: +; CHECK: .functype store_v2i64_to_global_address (v128) -> () +; CHECK-NEXT: # %bb.0: +; CHECK-NEXT: i32.const 0 +; CHECK-NEXT: local.get 0 +; CHECK-NEXT: v128.store gv_v2i64 +; CHECK-NEXT: # fallthrough-return store <2 x i64> %v , <2 x i64>* @gv_v2i64 ret void } @@ -2162,34 +2353,37 @@ define void @store_v2i64_to_global_address(<2 x i64> %v) { ; ============================================================================== ; 4 x float ; ============================================================================== -; CHECK-LABEL: load_v4f32: -; NO-SIMD128-NOT: v128 -; SIMD128-NEXT: .functype load_v4f32 (i32) -> (v128){{$}} -; SIMD128-NEXT: v128.load $push[[R:[0-9]+]]=, 0($0){{$}} -; SIMD128-NEXT: return $pop[[R]]{{$}} define <4 x float> @load_v4f32(<4 x float>* %p) { +; CHECK-LABEL: load_v4f32: +; CHECK: .functype load_v4f32 (i32) -> (v128) +; CHECK-NEXT: # %bb.0: +; CHECK-NEXT: local.get 0 +; CHECK-NEXT: v128.load 0 +; CHECK-NEXT: # fallthrough-return %v = load <4 x float>, <4 x float>* %p ret <4 x float> %v } -; CHECK-LABEL: load_splat_v4f32: -; NO-SIMD128-NOT: v128 -; SIMD128-NEXT: .functype load_splat_v4f32 (i32) -> (v128){{$}} -; SIMD128-NEXT: v32x4.load_splat $push[[R:[0-9]+]]=, 0($0){{$}} -; SIMD128-NEXT: return $pop[[R]]{{$}} define <4 x float> @load_splat_v4f32(float* %p) { +; CHECK-LABEL: load_splat_v4f32: +; CHECK: .functype load_splat_v4f32 (i32) -> (v128) +; CHECK-NEXT: # %bb.0: +; CHECK-NEXT: local.get 0 +; CHECK-NEXT: v32x4.load_splat 0 +; CHECK-NEXT: # fallthrough-return %e = load float, float* %p %v1 = insertelement <4 x float> undef, float %e, i32 0 %v2 = shufflevector <4 x float> %v1, <4 x float> undef, <4 x i32> zeroinitializer ret <4 x float> %v2 } -; CHECK-LABEL: load_v4f32_with_folded_offset: -; NO-SIMD128-NOT: v128 -; SIMD128-NEXT: .functype load_v4f32_with_folded_offset (i32) -> (v128){{$}} -; SIMD128-NEXT: v128.load $push[[R:[0-9]+]]=, 16($0){{$}} -; SIMD128-NEXT: return $pop[[R]]{{$}} define <4 x float> @load_v4f32_with_folded_offset(<4 x float>* %p) { +; CHECK-LABEL: load_v4f32_with_folded_offset: +; CHECK: .functype load_v4f32_with_folded_offset (i32) -> (v128) +; CHECK-NEXT: # %bb.0: +; CHECK-NEXT: local.get 0 +; CHECK-NEXT: v128.load 16 +; CHECK-NEXT: # fallthrough-return %q = ptrtoint <4 x float>* %p to i32 %r = add nuw i32 %q, 16 %s = inttoptr i32 %r to <4 x float>* @@ -2197,12 +2391,13 @@ define <4 x float> @load_v4f32_with_folded_offset(<4 x float>* %p) { ret <4 x float> %v } -; CHECK-LABEL: load_splat_v4f32_with_folded_offset: -; NO-SIMD128-NOT: v128 -; SIMD128-NEXT: .functype load_splat_v4f32_with_folded_offset (i32) -> (v128){{$}} -; SIMD128-NEXT: v32x4.load_splat $push[[R:[0-9]+]]=, 16($0){{$}} -; SIMD128-NEXT: return $pop[[R]]{{$}} define <4 x float> @load_splat_v4f32_with_folded_offset(float* %p) { +; CHECK-LABEL: load_splat_v4f32_with_folded_offset: +; CHECK: .functype load_splat_v4f32_with_folded_offset (i32) -> (v128) +; CHECK-NEXT: # %bb.0: +; CHECK-NEXT: local.get 0 +; CHECK-NEXT: v32x4.load_splat 16 +; CHECK-NEXT: # fallthrough-return %q = ptrtoint float* %p to i32 %r = add nuw i32 %q, 16 %s = inttoptr i32 %r to float* @@ -2212,23 +2407,25 @@ define <4 x float> @load_splat_v4f32_with_folded_offset(float* %p) { ret <4 x float> %v2 } -; CHECK-LABEL: load_v4f32_with_folded_gep_offset: -; NO-SIMD128-NOT: v128 -; SIMD128-NEXT: .functype load_v4f32_with_folded_gep_offset (i32) -> (v128){{$}} -; SIMD128-NEXT: v128.load $push[[R:[0-9]+]]=, 16($0){{$}} -; SIMD128-NEXT: return $pop[[R]]{{$}} define <4 x float> @load_v4f32_with_folded_gep_offset(<4 x float>* %p) { +; CHECK-LABEL: load_v4f32_with_folded_gep_offset: +; CHECK: .functype load_v4f32_with_folded_gep_offset (i32) -> (v128) +; CHECK-NEXT: # %bb.0: +; CHECK-NEXT: local.get 0 +; CHECK-NEXT: v128.load 16 +; CHECK-NEXT: # fallthrough-return %s = getelementptr inbounds <4 x float>, <4 x float>* %p, i32 1 %v = load <4 x float>, <4 x float>* %s ret <4 x float> %v } -; CHECK-LABEL: load_splat_v4f32_with_folded_gep_offset: -; NO-SIMD128-NOT: v128 -; SIMD128-NEXT: .functype load_splat_v4f32_with_folded_gep_offset (i32) -> (v128){{$}} -; SIMD128-NEXT: v32x4.load_splat $push[[R:[0-9]+]]=, 4($0){{$}} -; SIMD128-NEXT: return $pop[[R]]{{$}} define <4 x float> @load_splat_v4f32_with_folded_gep_offset(float* %p) { +; CHECK-LABEL: load_splat_v4f32_with_folded_gep_offset: +; CHECK: .functype load_splat_v4f32_with_folded_gep_offset (i32) -> (v128) +; CHECK-NEXT: # %bb.0: +; CHECK-NEXT: local.get 0 +; CHECK-NEXT: v32x4.load_splat 4 +; CHECK-NEXT: # fallthrough-return %s = getelementptr inbounds float, float* %p, i32 1 %e = load float, float* %s %v1 = insertelement <4 x float> undef, float %e, i32 0 @@ -2236,27 +2433,29 @@ define <4 x float> @load_splat_v4f32_with_folded_gep_offset(float* %p) { ret <4 x float> %v2 } -; CHECK-LABEL: load_v4f32_with_unfolded_gep_negative_offset: -; NO-SIMD128-NOT: v128 -; SIMD128-NEXT: .functype load_v4f32_with_unfolded_gep_negative_offset (i32) -> (v128){{$}} -; SIMD128-NEXT: i32.const $push[[L0:[0-9]+]]=, -16{{$}} -; SIMD128-NEXT: i32.add $push[[L1:[0-9]+]]=, $0, $pop[[L0]]{{$}} -; SIMD128-NEXT: v128.load $push[[R:[0-9]+]]=, 0($pop[[L1]]){{$}} -; SIMD128-NEXT: return $pop[[R]]{{$}} define <4 x float> @load_v4f32_with_unfolded_gep_negative_offset(<4 x float>* %p) { +; CHECK-LABEL: load_v4f32_with_unfolded_gep_negative_offset: +; CHECK: .functype load_v4f32_with_unfolded_gep_negative_offset (i32) -> (v128) +; CHECK-NEXT: # %bb.0: +; CHECK-NEXT: local.get 0 +; CHECK-NEXT: i32.const -16 +; CHECK-NEXT: i32.add +; CHECK-NEXT: v128.load 0 +; CHECK-NEXT: # fallthrough-return %s = getelementptr inbounds <4 x float>, <4 x float>* %p, i32 -1 %v = load <4 x float>, <4 x float>* %s ret <4 x float> %v } -; CHECK-LABEL: load_splat_v4f32_with_unfolded_gep_negative_offset: -; NO-SIMD128-NOT: v128 -; SIMD128-NEXT: .functype load_splat_v4f32_with_unfolded_gep_negative_offset (i32) -> (v128){{$}} -; SIMD128-NEXT: i32.const $push[[L0:[0-9]+]]=, -4{{$}} -; SIMD128-NEXT: i32.add $push[[L1:[0-9]+]]=, $0, $pop[[L0]]{{$}} -; SIMD128-NEXT: v32x4.load_splat $push[[R:[0-9]+]]=, 0($pop[[L1]]){{$}} -; SIMD128-NEXT: return $pop[[R]]{{$}} define <4 x float> @load_splat_v4f32_with_unfolded_gep_negative_offset(float* %p) { +; CHECK-LABEL: load_splat_v4f32_with_unfolded_gep_negative_offset: +; CHECK: .functype load_splat_v4f32_with_unfolded_gep_negative_offset (i32) -> (v128) +; CHECK-NEXT: # %bb.0: +; CHECK-NEXT: local.get 0 +; CHECK-NEXT: i32.const -4 +; CHECK-NEXT: i32.add +; CHECK-NEXT: v32x4.load_splat 0 +; CHECK-NEXT: # fallthrough-return %s = getelementptr inbounds float, float* %p, i32 -1 %e = load float, float* %s %v1 = insertelement <4 x float> undef, float %e, i32 0 @@ -2264,14 +2463,15 @@ define <4 x float> @load_splat_v4f32_with_unfolded_gep_negative_offset(float* %p ret <4 x float> %v2 } -; CHECK-LABEL: load_v4f32_with_unfolded_offset: -; NO-SIMD128-NOT: v128 -; SIMD128-NEXT: .functype load_v4f32_with_unfolded_offset (i32) -> (v128){{$}} -; SIMD128-NEXT: i32.const $push[[L0:[0-9]+]]=, 16{{$}} -; SIMD128-NEXT: i32.add $push[[L1:[0-9]+]]=, $0, $pop[[L0]]{{$}} -; SIMD128-NEXT: v128.load $push[[R:[0-9]+]]=, 0($pop[[L1]]){{$}} -; SIMD128-NEXT: return $pop[[R]]{{$}} define <4 x float> @load_v4f32_with_unfolded_offset(<4 x float>* %p) { +; CHECK-LABEL: load_v4f32_with_unfolded_offset: +; CHECK: .functype load_v4f32_with_unfolded_offset (i32) -> (v128) +; CHECK-NEXT: # %bb.0: +; CHECK-NEXT: local.get 0 +; CHECK-NEXT: i32.const 16 +; CHECK-NEXT: i32.add +; CHECK-NEXT: v128.load 0 +; CHECK-NEXT: # fallthrough-return %q = ptrtoint <4 x float>* %p to i32 %r = add nsw i32 %q, 16 %s = inttoptr i32 %r to <4 x float>* @@ -2279,14 +2479,15 @@ define <4 x float> @load_v4f32_with_unfolded_offset(<4 x float>* %p) { ret <4 x float> %v } -; CHECK-LABEL: load_splat_v4f32_with_unfolded_offset: -; NO-SIMD128-NOT: v128 -; SIMD128-NEXT: .functype load_splat_v4f32_with_unfolded_offset (i32) -> (v128){{$}} -; SIMD128-NEXT: i32.const $push[[L0:[0-9]+]]=, 16{{$}} -; SIMD128-NEXT: i32.add $push[[L1:[0-9]+]]=, $0, $pop[[L0]]{{$}} -; SIMD128-NEXT: v32x4.load_splat $push[[R:[0-9]+]]=, 0($pop[[L1]]){{$}} -; SIMD128-NEXT: return $pop[[R]]{{$}} define <4 x float> @load_splat_v4f32_with_unfolded_offset(float* %p) { +; CHECK-LABEL: load_splat_v4f32_with_unfolded_offset: +; CHECK: .functype load_splat_v4f32_with_unfolded_offset (i32) -> (v128) +; CHECK-NEXT: # %bb.0: +; CHECK-NEXT: local.get 0 +; CHECK-NEXT: i32.const 16 +; CHECK-NEXT: i32.add +; CHECK-NEXT: v32x4.load_splat 0 +; CHECK-NEXT: # fallthrough-return %q = ptrtoint float* %p to i32 %r = add nsw i32 %q, 16 %s = inttoptr i32 %r to float* @@ -2296,27 +2497,29 @@ define <4 x float> @load_splat_v4f32_with_unfolded_offset(float* %p) { ret <4 x float> %v2 } -; CHECK-LABEL: load_v4f32_with_unfolded_gep_offset: -; NO-SIMD128-NOT: v128 -; SIMD128-NEXT: .functype load_v4f32_with_unfolded_gep_offset (i32) -> (v128){{$}} -; SIMD128-NEXT: i32.const $push[[L0:[0-9]+]]=, 16{{$}} -; SIMD128-NEXT: i32.add $push[[L1:[0-9]+]]=, $0, $pop[[L0]]{{$}} -; SIMD128-NEXT: v128.load $push[[R:[0-9]+]]=, 0($pop[[L1]]){{$}} -; SIMD128-NEXT: return $pop[[R]]{{$}} define <4 x float> @load_v4f32_with_unfolded_gep_offset(<4 x float>* %p) { +; CHECK-LABEL: load_v4f32_with_unfolded_gep_offset: +; CHECK: .functype load_v4f32_with_unfolded_gep_offset (i32) -> (v128) +; CHECK-NEXT: # %bb.0: +; CHECK-NEXT: local.get 0 +; CHECK-NEXT: i32.const 16 +; CHECK-NEXT: i32.add +; CHECK-NEXT: v128.load 0 +; CHECK-NEXT: # fallthrough-return %s = getelementptr <4 x float>, <4 x float>* %p, i32 1 %v = load <4 x float>, <4 x float>* %s ret <4 x float> %v } -; CHECK-LABEL: load_splat_v4f32_with_unfolded_gep_offset: -; NO-SIMD128-NOT: v128 -; SIMD128-NEXT: .functype load_splat_v4f32_with_unfolded_gep_offset (i32) -> (v128){{$}} -; SIMD128-NEXT: i32.const $push[[L0:[0-9]+]]=, 4{{$}} -; SIMD128-NEXT: i32.add $push[[L1:[0-9]+]]=, $0, $pop[[L0]]{{$}} -; SIMD128-NEXT: v32x4.load_splat $push[[R:[0-9]+]]=, 0($pop[[L1]]){{$}} -; SIMD128-NEXT: return $pop[[R]]{{$}} define <4 x float> @load_splat_v4f32_with_unfolded_gep_offset(float* %p) { +; CHECK-LABEL: load_splat_v4f32_with_unfolded_gep_offset: +; CHECK: .functype load_splat_v4f32_with_unfolded_gep_offset (i32) -> (v128) +; CHECK-NEXT: # %bb.0: +; CHECK-NEXT: local.get 0 +; CHECK-NEXT: i32.const 4 +; CHECK-NEXT: i32.add +; CHECK-NEXT: v32x4.load_splat 0 +; CHECK-NEXT: # fallthrough-return %s = getelementptr float, float* %p, i32 1 %e = load float, float* %s %v1 = insertelement <4 x float> undef, float %e, i32 0 @@ -2324,25 +2527,25 @@ define <4 x float> @load_splat_v4f32_with_unfolded_gep_offset(float* %p) { ret <4 x float> %v2 } -; CHECK-LABEL: load_v4f32_from_numeric_address: -; NO-SIMD128-NOT: v128 -; SIMD128-NEXT: .functype load_v4f32_from_numeric_address () -> (v128){{$}} -; SIMD128-NEXT: i32.const $push[[L0:[0-9]+]]=, 0{{$}} -; SIMD128-NEXT: v128.load $push[[R:[0-9]+]]=, 32($pop[[L0]]){{$}} -; SIMD128-NEXT: return $pop[[R]]{{$}} define <4 x float> @load_v4f32_from_numeric_address() { +; CHECK-LABEL: load_v4f32_from_numeric_address: +; CHECK: .functype load_v4f32_from_numeric_address () -> (v128) +; CHECK-NEXT: # %bb.0: +; CHECK-NEXT: i32.const 0 +; CHECK-NEXT: v128.load 32 +; CHECK-NEXT: # fallthrough-return %s = inttoptr i32 32 to <4 x float>* %v = load <4 x float>, <4 x float>* %s ret <4 x float> %v } -; CHECK-LABEL: load_splat_v4f32_from_numeric_address: -; NO-SIMD128-NOT: v128 -; SIMD128-NEXT: .functype load_splat_v4f32_from_numeric_address () -> (v128){{$}} -; SIMD128-NEXT: i32.const $push[[L0:[0-9]+]]=, 0{{$}} -; SIMD128-NEXT: v32x4.load_splat $push[[R:[0-9]+]]=, 32($pop[[L0]]){{$}} -; SIMD128-NEXT: return $pop[[R]]{{$}} define <4 x float> @load_splat_v4f32_from_numeric_address() { +; CHECK-LABEL: load_splat_v4f32_from_numeric_address: +; CHECK: .functype load_splat_v4f32_from_numeric_address () -> (v128) +; CHECK-NEXT: # %bb.0: +; CHECK-NEXT: i32.const 0 +; CHECK-NEXT: v32x4.load_splat 32 +; CHECK-NEXT: # fallthrough-return %s = inttoptr i32 32 to float* %e = load float, float* %s %v1 = insertelement <4 x float> undef, float %e, i32 0 @@ -2350,46 +2553,52 @@ define <4 x float> @load_splat_v4f32_from_numeric_address() { ret <4 x float> %v2 } -; CHECK-LABEL: load_v4f32_from_global_address: -; NO-SIMD128-NOT: v128 -; SIMD128-NEXT: .functype load_v4f32_from_global_address () -> (v128){{$}} -; SIMD128-NEXT: i32.const $push[[L0:[0-9]+]]=, 0{{$}} -; SIMD128-NEXT: v128.load $push[[R:[0-9]+]]=, gv_v4f32($pop[[L0]]){{$}} -; SIMD128-NEXT: return $pop[[R]]{{$}} @gv_v4f32 = global <4 x float> define <4 x float> @load_v4f32_from_global_address() { +; CHECK-LABEL: load_v4f32_from_global_address: +; CHECK: .functype load_v4f32_from_global_address () -> (v128) +; CHECK-NEXT: # %bb.0: +; CHECK-NEXT: i32.const 0 +; CHECK-NEXT: v128.load gv_v4f32 +; CHECK-NEXT: # fallthrough-return %v = load <4 x float>, <4 x float>* @gv_v4f32 ret <4 x float> %v } -; CHECK-LABEL: load_splat_v4f32_from_global_address: -; NO-SIMD128-NOT: v128 -; SIMD128-NEXT: .functype load_splat_v4f32_from_global_address () -> (v128){{$}} -; SIMD128-NEXT: i32.const $push[[L0:[0-9]+]]=, 0{{$}} -; SIMD128-NEXT: v32x4.load_splat $push[[R:[0-9]+]]=, gv_f32($pop[[L0]]){{$}} -; SIMD128-NEXT: return $pop[[R]]{{$}} @gv_f32 = global float 42. define <4 x float> @load_splat_v4f32_from_global_address() { +; CHECK-LABEL: load_splat_v4f32_from_global_address: +; CHECK: .functype load_splat_v4f32_from_global_address () -> (v128) +; CHECK-NEXT: # %bb.0: +; CHECK-NEXT: i32.const 0 +; CHECK-NEXT: v32x4.load_splat gv_f32 +; CHECK-NEXT: # fallthrough-return %e = load float, float* @gv_f32 %v1 = insertelement <4 x float> undef, float %e, i32 0 %v2 = shufflevector <4 x float> %v1, <4 x float> undef, <4 x i32> zeroinitializer ret <4 x float> %v2 } -; CHECK-LABEL: store_v4f32: -; NO-SIMD128-NOT: v128 -; SIMD128-NEXT: .functype store_v4f32 (v128, i32) -> (){{$}} -; SIMD128-NEXT: v128.store 0($1), $0{{$}} define void @store_v4f32(<4 x float> %v, <4 x float>* %p) { +; CHECK-LABEL: store_v4f32: +; CHECK: .functype store_v4f32 (v128, i32) -> () +; CHECK-NEXT: # %bb.0: +; CHECK-NEXT: local.get 1 +; CHECK-NEXT: local.get 0 +; CHECK-NEXT: v128.store 0 +; CHECK-NEXT: # fallthrough-return store <4 x float> %v , <4 x float>* %p ret void } -; CHECK-LABEL: store_v4f32_with_folded_offset: -; NO-SIMD128-NOT: v128 -; SIMD128-NEXT: .functype store_v4f32_with_folded_offset (v128, i32) -> (){{$}} -; SIMD128-NEXT: v128.store 16($1), $0{{$}} define void @store_v4f32_with_folded_offset(<4 x float> %v, <4 x float>* %p) { +; CHECK-LABEL: store_v4f32_with_folded_offset: +; CHECK: .functype store_v4f32_with_folded_offset (v128, i32) -> () +; CHECK-NEXT: # %bb.0: +; CHECK-NEXT: local.get 1 +; CHECK-NEXT: local.get 0 +; CHECK-NEXT: v128.store 16 +; CHECK-NEXT: # fallthrough-return %q = ptrtoint <4 x float>* %p to i32 %r = add nuw i32 %q, 16 %s = inttoptr i32 %r to <4 x float>* @@ -2397,69 +2606,85 @@ define void @store_v4f32_with_folded_offset(<4 x float> %v, <4 x float>* %p) { ret void } -; CHECK-LABEL: store_v4f32_with_folded_gep_offset: -; NO-SIMD128-NOT: v128 -; SIMD128-NEXT: .functype store_v4f32_with_folded_gep_offset (v128, i32) -> (){{$}} -; SIMD128-NEXT: v128.store 16($1), $0{{$}} define void @store_v4f32_with_folded_gep_offset(<4 x float> %v, <4 x float>* %p) { +; CHECK-LABEL: store_v4f32_with_folded_gep_offset: +; CHECK: .functype store_v4f32_with_folded_gep_offset (v128, i32) -> () +; CHECK-NEXT: # %bb.0: +; CHECK-NEXT: local.get 1 +; CHECK-NEXT: local.get 0 +; CHECK-NEXT: v128.store 16 +; CHECK-NEXT: # fallthrough-return %s = getelementptr inbounds <4 x float>, <4 x float>* %p, i32 1 store <4 x float> %v , <4 x float>* %s ret void } -; CHECK-LABEL: store_v4f32_with_unfolded_gep_negative_offset: -; NO-SIMD128-NOT: v128 -; SIMD128-NEXT: .functype store_v4f32_with_unfolded_gep_negative_offset (v128, i32) -> (){{$}} -; SIMD128-NEXT: i32.const $push[[L0:[0-9]+]]=, -16{{$}} -; SIMD128-NEXT: i32.add $push[[R:[0-9]+]]=, $1, $pop[[L0]]{{$}} -; SIMD128-NEXT: v128.store 0($pop[[R]]), $0{{$}} define void @store_v4f32_with_unfolded_gep_negative_offset(<4 x float> %v, <4 x float>* %p) { +; CHECK-LABEL: store_v4f32_with_unfolded_gep_negative_offset: +; CHECK: .functype store_v4f32_with_unfolded_gep_negative_offset (v128, i32) -> () +; CHECK-NEXT: # %bb.0: +; CHECK-NEXT: local.get 1 +; CHECK-NEXT: i32.const -16 +; CHECK-NEXT: i32.add +; CHECK-NEXT: local.get 0 +; CHECK-NEXT: v128.store 0 +; CHECK-NEXT: # fallthrough-return %s = getelementptr inbounds <4 x float>, <4 x float>* %p, i32 -1 store <4 x float> %v , <4 x float>* %s ret void } -; CHECK-LABEL: store_v4f32_with_unfolded_offset: -; NO-SIMD128-NOT: v128 -; SIMD128-NEXT: .functype store_v4f32_with_unfolded_offset (v128, i32) -> (){{$}} -; SIMD128-NEXT: i32.const $push[[L0:[0-9]+]]=, -16{{$}} -; SIMD128-NEXT: i32.add $push[[R:[0-9]+]]=, $1, $pop[[L0]]{{$}} -; SIMD128-NEXT: v128.store 0($pop[[R]]), $0{{$}} define void @store_v4f32_with_unfolded_offset(<4 x float> %v, <4 x float>* %p) { +; CHECK-LABEL: store_v4f32_with_unfolded_offset: +; CHECK: .functype store_v4f32_with_unfolded_offset (v128, i32) -> () +; CHECK-NEXT: # %bb.0: +; CHECK-NEXT: local.get 1 +; CHECK-NEXT: i32.const -16 +; CHECK-NEXT: i32.add +; CHECK-NEXT: local.get 0 +; CHECK-NEXT: v128.store 0 +; CHECK-NEXT: # fallthrough-return %s = getelementptr inbounds <4 x float>, <4 x float>* %p, i32 -1 store <4 x float> %v , <4 x float>* %s ret void } -; CHECK-LABEL: store_v4f32_with_unfolded_gep_offset: -; NO-SIMD128-NOT: v128 -; SIMD128-NEXT: .functype store_v4f32_with_unfolded_gep_offset (v128, i32) -> (){{$}} -; SIMD128-NEXT: i32.const $push[[L0:[0-9]+]]=, 16{{$}} -; SIMD128-NEXT: i32.add $push[[R:[0-9]+]]=, $1, $pop[[L0]]{{$}} -; SIMD128-NEXT: v128.store 0($pop[[R]]), $0{{$}} define void @store_v4f32_with_unfolded_gep_offset(<4 x float> %v, <4 x float>* %p) { +; CHECK-LABEL: store_v4f32_with_unfolded_gep_offset: +; CHECK: .functype store_v4f32_with_unfolded_gep_offset (v128, i32) -> () +; CHECK-NEXT: # %bb.0: +; CHECK-NEXT: local.get 1 +; CHECK-NEXT: i32.const 16 +; CHECK-NEXT: i32.add +; CHECK-NEXT: local.get 0 +; CHECK-NEXT: v128.store 0 +; CHECK-NEXT: # fallthrough-return %s = getelementptr <4 x float>, <4 x float>* %p, i32 1 store <4 x float> %v , <4 x float>* %s ret void } -; CHECK-LABEL: store_v4f32_to_numeric_address: -; NO-SIMD128-NOT: v128 -; SIMD128-NEXT: .functype store_v4f32_to_numeric_address (v128) -> (){{$}} -; SIMD128-NEXT: i32.const $push[[L0:[0-9]+]]=, 0{{$}} -; SIMD128-NEXT: v128.store 32($pop[[L0]]), $0{{$}} define void @store_v4f32_to_numeric_address(<4 x float> %v) { +; CHECK-LABEL: store_v4f32_to_numeric_address: +; CHECK: .functype store_v4f32_to_numeric_address (v128) -> () +; CHECK-NEXT: # %bb.0: +; CHECK-NEXT: i32.const 0 +; CHECK-NEXT: local.get 0 +; CHECK-NEXT: v128.store 32 +; CHECK-NEXT: # fallthrough-return %s = inttoptr i32 32 to <4 x float>* store <4 x float> %v , <4 x float>* %s ret void } -; CHECK-LABEL: store_v4f32_to_global_address: -; NO-SIMD128-NOT: v128 -; SIMD128-NEXT: .functype store_v4f32_to_global_address (v128) -> (){{$}} -; SIMD128-NEXT: i32.const $push[[R:[0-9]+]]=, 0{{$}} -; SIMD128-NEXT: v128.store gv_v4f32($pop[[R]]), $0{{$}} define void @store_v4f32_to_global_address(<4 x float> %v) { +; CHECK-LABEL: store_v4f32_to_global_address: +; CHECK: .functype store_v4f32_to_global_address (v128) -> () +; CHECK-NEXT: # %bb.0: +; CHECK-NEXT: i32.const 0 +; CHECK-NEXT: local.get 0 +; CHECK-NEXT: v128.store gv_v4f32 +; CHECK-NEXT: # fallthrough-return store <4 x float> %v , <4 x float>* @gv_v4f32 ret void } @@ -2467,34 +2692,37 @@ define void @store_v4f32_to_global_address(<4 x float> %v) { ; ============================================================================== ; 2 x double ; ============================================================================== -; CHECK-LABEL: load_v2f64: -; NO-SIMD128-NOT: v128 -; SIMD128-NEXT: .functype load_v2f64 (i32) -> (v128){{$}} -; SIMD128-NEXT: v128.load $push[[R:[0-9]+]]=, 0($0){{$}} -; SIMD128-NEXT: return $pop[[R]]{{$}} define <2 x double> @load_v2f64(<2 x double>* %p) { +; CHECK-LABEL: load_v2f64: +; CHECK: .functype load_v2f64 (i32) -> (v128) +; CHECK-NEXT: # %bb.0: +; CHECK-NEXT: local.get 0 +; CHECK-NEXT: v128.load 0 +; CHECK-NEXT: # fallthrough-return %v = load <2 x double>, <2 x double>* %p ret <2 x double> %v } -; CHECK-LABEL: load_splat_v2f64: -; NO-SIMD128-NOT: v128 -; SIMD128-NEXT: .functype load_splat_v2f64 (i32) -> (v128){{$}} -; SIMD128-NEXT: v64x2.load_splat $push[[R:[0-9]+]]=, 0($0){{$}} -; SIMD128-NEXT: return $pop[[R]]{{$}} define <2 x double> @load_splat_v2f64(double* %p) { +; CHECK-LABEL: load_splat_v2f64: +; CHECK: .functype load_splat_v2f64 (i32) -> (v128) +; CHECK-NEXT: # %bb.0: +; CHECK-NEXT: local.get 0 +; CHECK-NEXT: v64x2.load_splat 0 +; CHECK-NEXT: # fallthrough-return %e = load double, double* %p %v1 = insertelement <2 x double> undef, double %e, i32 0 %v2 = shufflevector <2 x double> %v1, <2 x double> undef, <2 x i32> zeroinitializer ret <2 x double> %v2 } -; CHECK-LABEL: load_v2f64_with_folded_offset: -; NO-SIMD128-NOT: v128 -; SIMD128-NEXT: .functype load_v2f64_with_folded_offset (i32) -> (v128){{$}} -; SIMD128-NEXT: v128.load $push[[R:[0-9]+]]=, 16($0){{$}} -; SIMD128-NEXT: return $pop[[R]]{{$}} define <2 x double> @load_v2f64_with_folded_offset(<2 x double>* %p) { +; CHECK-LABEL: load_v2f64_with_folded_offset: +; CHECK: .functype load_v2f64_with_folded_offset (i32) -> (v128) +; CHECK-NEXT: # %bb.0: +; CHECK-NEXT: local.get 0 +; CHECK-NEXT: v128.load 16 +; CHECK-NEXT: # fallthrough-return %q = ptrtoint <2 x double>* %p to i32 %r = add nuw i32 %q, 16 %s = inttoptr i32 %r to <2 x double>* @@ -2502,12 +2730,13 @@ define <2 x double> @load_v2f64_with_folded_offset(<2 x double>* %p) { ret <2 x double> %v } -; CHECK-LABEL: load_splat_v2f64_with_folded_offset: -; NO-SIMD128-NOT: v128 -; SIMD128-NEXT: .functype load_splat_v2f64_with_folded_offset (i32) -> (v128){{$}} -; SIMD128-NEXT: v64x2.load_splat $push[[R:[0-9]+]]=, 16($0){{$}} -; SIMD128-NEXT: return $pop[[R]]{{$}} define <2 x double> @load_splat_v2f64_with_folded_offset(double* %p) { +; CHECK-LABEL: load_splat_v2f64_with_folded_offset: +; CHECK: .functype load_splat_v2f64_with_folded_offset (i32) -> (v128) +; CHECK-NEXT: # %bb.0: +; CHECK-NEXT: local.get 0 +; CHECK-NEXT: v64x2.load_splat 16 +; CHECK-NEXT: # fallthrough-return %q = ptrtoint double* %p to i32 %r = add nuw i32 %q, 16 %s = inttoptr i32 %r to double* @@ -2517,23 +2746,25 @@ define <2 x double> @load_splat_v2f64_with_folded_offset(double* %p) { ret <2 x double> %v2 } -; CHECK-LABEL: load_v2f64_with_folded_gep_offset: -; NO-SIMD128-NOT: v128 -; SIMD128-NEXT: .functype load_v2f64_with_folded_gep_offset (i32) -> (v128){{$}} -; SIMD128-NEXT: v128.load $push[[R:[0-9]+]]=, 16($0){{$}} -; SIMD128-NEXT: return $pop[[R]]{{$}} define <2 x double> @load_v2f64_with_folded_gep_offset(<2 x double>* %p) { +; CHECK-LABEL: load_v2f64_with_folded_gep_offset: +; CHECK: .functype load_v2f64_with_folded_gep_offset (i32) -> (v128) +; CHECK-NEXT: # %bb.0: +; CHECK-NEXT: local.get 0 +; CHECK-NEXT: v128.load 16 +; CHECK-NEXT: # fallthrough-return %s = getelementptr inbounds <2 x double>, <2 x double>* %p, i32 1 %v = load <2 x double>, <2 x double>* %s ret <2 x double> %v } -; CHECK-LABEL: load_splat_v2f64_with_folded_gep_offset: -; NO-SIMD128-NOT: v128 -; SIMD128-NEXT: .functype load_splat_v2f64_with_folded_gep_offset (i32) -> (v128){{$}} -; SIMD128-NEXT: v64x2.load_splat $push[[R:[0-9]+]]=, 8($0){{$}} -; SIMD128-NEXT: return $pop[[R]]{{$}} define <2 x double> @load_splat_v2f64_with_folded_gep_offset(double* %p) { +; CHECK-LABEL: load_splat_v2f64_with_folded_gep_offset: +; CHECK: .functype load_splat_v2f64_with_folded_gep_offset (i32) -> (v128) +; CHECK-NEXT: # %bb.0: +; CHECK-NEXT: local.get 0 +; CHECK-NEXT: v64x2.load_splat 8 +; CHECK-NEXT: # fallthrough-return %s = getelementptr inbounds double, double* %p, i32 1 %e = load double, double* %s %v1 = insertelement <2 x double> undef, double %e, i32 0 @@ -2541,27 +2772,29 @@ define <2 x double> @load_splat_v2f64_with_folded_gep_offset(double* %p) { ret <2 x double> %v2 } -; CHECK-LABEL: load_v2f64_with_unfolded_gep_negative_offset: -; NO-SIMD128-NOT: v128 -; SIMD128-NEXT: .functype load_v2f64_with_unfolded_gep_negative_offset (i32) -> (v128){{$}} -; SIMD128-NEXT: i32.const $push[[L0:[0-9]+]]=, -16{{$}} -; SIMD128-NEXT: i32.add $push[[L1:[0-9]+]]=, $0, $pop[[L0]]{{$}} -; SIMD128-NEXT: v128.load $push[[R:[0-9]+]]=, 0($pop[[L1]]){{$}} -; SIMD128-NEXT: return $pop[[R]]{{$}} define <2 x double> @load_v2f64_with_unfolded_gep_negative_offset(<2 x double>* %p) { +; CHECK-LABEL: load_v2f64_with_unfolded_gep_negative_offset: +; CHECK: .functype load_v2f64_with_unfolded_gep_negative_offset (i32) -> (v128) +; CHECK-NEXT: # %bb.0: +; CHECK-NEXT: local.get 0 +; CHECK-NEXT: i32.const -16 +; CHECK-NEXT: i32.add +; CHECK-NEXT: v128.load 0 +; CHECK-NEXT: # fallthrough-return %s = getelementptr inbounds <2 x double>, <2 x double>* %p, i32 -1 %v = load <2 x double>, <2 x double>* %s ret <2 x double> %v } -; CHECK-LABEL: load_splat_v2f64_with_unfolded_gep_negative_offset: -; NO-SIMD128-NOT: v128 -; SIMD128-NEXT: .functype load_splat_v2f64_with_unfolded_gep_negative_offset (i32) -> (v128){{$}} -; SIMD128-NEXT: i32.const $push[[L0:[0-9]+]]=, -8{{$}} -; SIMD128-NEXT: i32.add $push[[L1:[0-9]+]]=, $0, $pop[[L0]]{{$}} -; SIMD128-NEXT: v64x2.load_splat $push[[R:[0-9]+]]=, 0($pop[[L1]]){{$}} -; SIMD128-NEXT: return $pop[[R]]{{$}} define <2 x double> @load_splat_v2f64_with_unfolded_gep_negative_offset(double* %p) { +; CHECK-LABEL: load_splat_v2f64_with_unfolded_gep_negative_offset: +; CHECK: .functype load_splat_v2f64_with_unfolded_gep_negative_offset (i32) -> (v128) +; CHECK-NEXT: # %bb.0: +; CHECK-NEXT: local.get 0 +; CHECK-NEXT: i32.const -8 +; CHECK-NEXT: i32.add +; CHECK-NEXT: v64x2.load_splat 0 +; CHECK-NEXT: # fallthrough-return %s = getelementptr inbounds double, double* %p, i32 -1 %e = load double, double* %s %v1 = insertelement <2 x double> undef, double %e, i32 0 @@ -2569,14 +2802,15 @@ define <2 x double> @load_splat_v2f64_with_unfolded_gep_negative_offset(double* ret <2 x double> %v2 } -; CHECK-LABEL: load_v2f64_with_unfolded_offset: -; NO-SIMD128-NOT: v128 -; SIMD128-NEXT: .functype load_v2f64_with_unfolded_offset (i32) -> (v128){{$}} -; SIMD128-NEXT: i32.const $push[[L0:[0-9]+]]=, 16{{$}} -; SIMD128-NEXT: i32.add $push[[L1:[0-9]+]]=, $0, $pop[[L0]]{{$}} -; SIMD128-NEXT: v128.load $push[[R:[0-9]+]]=, 0($pop[[L1]]){{$}} -; SIMD128-NEXT: return $pop[[R]]{{$}} define <2 x double> @load_v2f64_with_unfolded_offset(<2 x double>* %p) { +; CHECK-LABEL: load_v2f64_with_unfolded_offset: +; CHECK: .functype load_v2f64_with_unfolded_offset (i32) -> (v128) +; CHECK-NEXT: # %bb.0: +; CHECK-NEXT: local.get 0 +; CHECK-NEXT: i32.const 16 +; CHECK-NEXT: i32.add +; CHECK-NEXT: v128.load 0 +; CHECK-NEXT: # fallthrough-return %q = ptrtoint <2 x double>* %p to i32 %r = add nsw i32 %q, 16 %s = inttoptr i32 %r to <2 x double>* @@ -2584,14 +2818,15 @@ define <2 x double> @load_v2f64_with_unfolded_offset(<2 x double>* %p) { ret <2 x double> %v } -; CHECK-LABEL: load_splat_v2f64_with_unfolded_offset: -; NO-SIMD128-NOT: v128 -; SIMD128-NEXT: .functype load_splat_v2f64_with_unfolded_offset (i32) -> (v128){{$}} -; SIMD128-NEXT: i32.const $push[[L0:[0-9]+]]=, 16{{$}} -; SIMD128-NEXT: i32.add $push[[L1:[0-9]+]]=, $0, $pop[[L0]]{{$}} -; SIMD128-NEXT: v64x2.load_splat $push[[R:[0-9]+]]=, 0($pop[[L1]]){{$}} -; SIMD128-NEXT: return $pop[[R]]{{$}} define <2 x double> @load_splat_v2f64_with_unfolded_offset(double* %p) { +; CHECK-LABEL: load_splat_v2f64_with_unfolded_offset: +; CHECK: .functype load_splat_v2f64_with_unfolded_offset (i32) -> (v128) +; CHECK-NEXT: # %bb.0: +; CHECK-NEXT: local.get 0 +; CHECK-NEXT: i32.const 16 +; CHECK-NEXT: i32.add +; CHECK-NEXT: v64x2.load_splat 0 +; CHECK-NEXT: # fallthrough-return %q = ptrtoint double* %p to i32 %r = add nsw i32 %q, 16 %s = inttoptr i32 %r to double* @@ -2601,27 +2836,29 @@ define <2 x double> @load_splat_v2f64_with_unfolded_offset(double* %p) { ret <2 x double> %v2 } -; CHECK-LABEL: load_v2f64_with_unfolded_gep_offset: -; NO-SIMD128-NOT: v128 -; SIMD128-NEXT: .functype load_v2f64_with_unfolded_gep_offset (i32) -> (v128){{$}} -; SIMD128-NEXT: i32.const $push[[L0:[0-9]+]]=, 16{{$}} -; SIMD128-NEXT: i32.add $push[[L1:[0-9]+]]=, $0, $pop[[L0]]{{$}} -; SIMD128-NEXT: v128.load $push[[R:[0-9]+]]=, 0($pop[[L1]]){{$}} -; SIMD128-NEXT: return $pop[[R]]{{$}} define <2 x double> @load_v2f64_with_unfolded_gep_offset(<2 x double>* %p) { +; CHECK-LABEL: load_v2f64_with_unfolded_gep_offset: +; CHECK: .functype load_v2f64_with_unfolded_gep_offset (i32) -> (v128) +; CHECK-NEXT: # %bb.0: +; CHECK-NEXT: local.get 0 +; CHECK-NEXT: i32.const 16 +; CHECK-NEXT: i32.add +; CHECK-NEXT: v128.load 0 +; CHECK-NEXT: # fallthrough-return %s = getelementptr <2 x double>, <2 x double>* %p, i32 1 %v = load <2 x double>, <2 x double>* %s ret <2 x double> %v } -; CHECK-LABEL: load_splat_v2f64_with_unfolded_gep_offset: -; NO-SIMD128-NOT: v128 -; SIMD128-NEXT: .functype load_splat_v2f64_with_unfolded_gep_offset (i32) -> (v128){{$}} -; SIMD128-NEXT: i32.const $push[[L0:[0-9]+]]=, 8{{$}} -; SIMD128-NEXT: i32.add $push[[L1:[0-9]+]]=, $0, $pop[[L0]]{{$}} -; SIMD128-NEXT: v64x2.load_splat $push[[R:[0-9]+]]=, 0($pop[[L1]]){{$}} -; SIMD128-NEXT: return $pop[[R]]{{$}} define <2 x double> @load_splat_v2f64_with_unfolded_gep_offset(double* %p) { +; CHECK-LABEL: load_splat_v2f64_with_unfolded_gep_offset: +; CHECK: .functype load_splat_v2f64_with_unfolded_gep_offset (i32) -> (v128) +; CHECK-NEXT: # %bb.0: +; CHECK-NEXT: local.get 0 +; CHECK-NEXT: i32.const 8 +; CHECK-NEXT: i32.add +; CHECK-NEXT: v64x2.load_splat 0 +; CHECK-NEXT: # fallthrough-return %s = getelementptr double, double* %p, i32 1 %e = load double, double* %s %v1 = insertelement <2 x double> undef, double %e, i32 0 @@ -2629,25 +2866,25 @@ define <2 x double> @load_splat_v2f64_with_unfolded_gep_offset(double* %p) { ret <2 x double> %v2 } -; CHECK-LABEL: load_v2f64_from_numeric_address: -; NO-SIMD128-NOT: v128 -; SIMD128-NEXT: .functype load_v2f64_from_numeric_address () -> (v128){{$}} -; SIMD128-NEXT: i32.const $push[[L0:[0-9]+]]=, 0{{$}} -; SIMD128-NEXT: v128.load $push[[R:[0-9]+]]=, 32($pop[[L0]]){{$}} -; SIMD128-NEXT: return $pop[[R]]{{$}} define <2 x double> @load_v2f64_from_numeric_address() { +; CHECK-LABEL: load_v2f64_from_numeric_address: +; CHECK: .functype load_v2f64_from_numeric_address () -> (v128) +; CHECK-NEXT: # %bb.0: +; CHECK-NEXT: i32.const 0 +; CHECK-NEXT: v128.load 32 +; CHECK-NEXT: # fallthrough-return %s = inttoptr i32 32 to <2 x double>* %v = load <2 x double>, <2 x double>* %s ret <2 x double> %v } -; CHECK-LABEL: load_splat_v2f64_from_numeric_address: -; NO-SIMD128-NOT: v128 -; SIMD128-NEXT: .functype load_splat_v2f64_from_numeric_address () -> (v128){{$}} -; SIMD128-NEXT: i32.const $push[[L0:[0-9]+]]=, 0{{$}} -; SIMD128-NEXT: v64x2.load_splat $push[[R:[0-9]+]]=, 32($pop[[L0]]){{$}} -; SIMD128-NEXT: return $pop[[R]]{{$}} define <2 x double> @load_splat_v2f64_from_numeric_address() { +; CHECK-LABEL: load_splat_v2f64_from_numeric_address: +; CHECK: .functype load_splat_v2f64_from_numeric_address () -> (v128) +; CHECK-NEXT: # %bb.0: +; CHECK-NEXT: i32.const 0 +; CHECK-NEXT: v64x2.load_splat 32 +; CHECK-NEXT: # fallthrough-return %s = inttoptr i32 32 to double* %e = load double, double* %s %v1 = insertelement <2 x double> undef, double %e, i32 0 @@ -2655,46 +2892,52 @@ define <2 x double> @load_splat_v2f64_from_numeric_address() { ret <2 x double> %v2 } -; CHECK-LABEL: load_v2f64_from_global_address: -; NO-SIMD128-NOT: v128 -; SIMD128-NEXT: .functype load_v2f64_from_global_address () -> (v128){{$}} -; SIMD128-NEXT: i32.const $push[[L0:[0-9]+]]=, 0{{$}} -; SIMD128-NEXT: v128.load $push[[R:[0-9]+]]=, gv_v2f64($pop[[L0]]){{$}} -; SIMD128-NEXT: return $pop[[R]]{{$}} @gv_v2f64 = global <2 x double> define <2 x double> @load_v2f64_from_global_address() { +; CHECK-LABEL: load_v2f64_from_global_address: +; CHECK: .functype load_v2f64_from_global_address () -> (v128) +; CHECK-NEXT: # %bb.0: +; CHECK-NEXT: i32.const 0 +; CHECK-NEXT: v128.load gv_v2f64 +; CHECK-NEXT: # fallthrough-return %v = load <2 x double>, <2 x double>* @gv_v2f64 ret <2 x double> %v } -; CHECK-LABEL: load_splat_v2f64_from_global_address: -; NO-SIMD128-NOT: v128 -; SIMD128-NEXT: .functype load_splat_v2f64_from_global_address () -> (v128){{$}} -; SIMD128-NEXT: i32.const $push[[L0:[0-9]+]]=, 0{{$}} -; SIMD128-NEXT: v64x2.load_splat $push[[R:[0-9]+]]=, gv_f64($pop[[L0]]){{$}} -; SIMD128-NEXT: return $pop[[R]]{{$}} @gv_f64 = global double 42. define <2 x double> @load_splat_v2f64_from_global_address() { +; CHECK-LABEL: load_splat_v2f64_from_global_address: +; CHECK: .functype load_splat_v2f64_from_global_address () -> (v128) +; CHECK-NEXT: # %bb.0: +; CHECK-NEXT: i32.const 0 +; CHECK-NEXT: v64x2.load_splat gv_f64 +; CHECK-NEXT: # fallthrough-return %e = load double, double* @gv_f64 %v1 = insertelement <2 x double> undef, double %e, i32 0 %v2 = shufflevector <2 x double> %v1, <2 x double> undef, <2 x i32> zeroinitializer ret <2 x double> %v2 } -; CHECK-LABEL: store_v2f64: -; NO-SIMD128-NOT: v128 -; SIMD128-NEXT: .functype store_v2f64 (v128, i32) -> (){{$}} -; SIMD128-NEXT: v128.store 0($1), $0{{$}} define void @store_v2f64(<2 x double> %v, <2 x double>* %p) { +; CHECK-LABEL: store_v2f64: +; CHECK: .functype store_v2f64 (v128, i32) -> () +; CHECK-NEXT: # %bb.0: +; CHECK-NEXT: local.get 1 +; CHECK-NEXT: local.get 0 +; CHECK-NEXT: v128.store 0 +; CHECK-NEXT: # fallthrough-return store <2 x double> %v , <2 x double>* %p ret void } -; CHECK-LABEL: store_v2f64_with_folded_offset: -; NO-SIMD128-NOT: v128 -; SIMD128-NEXT: .functype store_v2f64_with_folded_offset (v128, i32) -> (){{$}} -; SIMD128-NEXT: v128.store 16($1), $0{{$}} define void @store_v2f64_with_folded_offset(<2 x double> %v, <2 x double>* %p) { +; CHECK-LABEL: store_v2f64_with_folded_offset: +; CHECK: .functype store_v2f64_with_folded_offset (v128, i32) -> () +; CHECK-NEXT: # %bb.0: +; CHECK-NEXT: local.get 1 +; CHECK-NEXT: local.get 0 +; CHECK-NEXT: v128.store 16 +; CHECK-NEXT: # fallthrough-return %q = ptrtoint <2 x double>* %p to i32 %r = add nuw i32 %q, 16 %s = inttoptr i32 %r to <2 x double>* @@ -2702,69 +2945,85 @@ define void @store_v2f64_with_folded_offset(<2 x double> %v, <2 x double>* %p) { ret void } -; CHECK-LABEL: store_v2f64_with_folded_gep_offset: -; NO-SIMD128-NOT: v128 -; SIMD128-NEXT: .functype store_v2f64_with_folded_gep_offset (v128, i32) -> (){{$}} -; SIMD128-NEXT: v128.store 16($1), $0{{$}} define void @store_v2f64_with_folded_gep_offset(<2 x double> %v, <2 x double>* %p) { +; CHECK-LABEL: store_v2f64_with_folded_gep_offset: +; CHECK: .functype store_v2f64_with_folded_gep_offset (v128, i32) -> () +; CHECK-NEXT: # %bb.0: +; CHECK-NEXT: local.get 1 +; CHECK-NEXT: local.get 0 +; CHECK-NEXT: v128.store 16 +; CHECK-NEXT: # fallthrough-return %s = getelementptr inbounds <2 x double>, <2 x double>* %p, i32 1 store <2 x double> %v , <2 x double>* %s ret void } -; CHECK-LABEL: store_v2f64_with_unfolded_gep_negative_offset: -; NO-SIMD128-NOT: v128 -; SIMD128-NEXT: .functype store_v2f64_with_unfolded_gep_negative_offset (v128, i32) -> (){{$}} -; SIMD128-NEXT: i32.const $push[[L0:[0-9]+]]=, -16{{$}} -; SIMD128-NEXT: i32.add $push[[R:[0-9]+]]=, $1, $pop[[L0]]{{$}} -; SIMD128-NEXT: v128.store 0($pop[[R]]), $0{{$}} define void @store_v2f64_with_unfolded_gep_negative_offset(<2 x double> %v, <2 x double>* %p) { +; CHECK-LABEL: store_v2f64_with_unfolded_gep_negative_offset: +; CHECK: .functype store_v2f64_with_unfolded_gep_negative_offset (v128, i32) -> () +; CHECK-NEXT: # %bb.0: +; CHECK-NEXT: local.get 1 +; CHECK-NEXT: i32.const -16 +; CHECK-NEXT: i32.add +; CHECK-NEXT: local.get 0 +; CHECK-NEXT: v128.store 0 +; CHECK-NEXT: # fallthrough-return %s = getelementptr inbounds <2 x double>, <2 x double>* %p, i32 -1 store <2 x double> %v , <2 x double>* %s ret void } -; CHECK-LABEL: store_v2f64_with_unfolded_offset: -; NO-SIMD128-NOT: v128 -; SIMD128-NEXT: .functype store_v2f64_with_unfolded_offset (v128, i32) -> (){{$}} -; SIMD128-NEXT: i32.const $push[[L0:[0-9]+]]=, -16{{$}} -; SIMD128-NEXT: i32.add $push[[R:[0-9]+]]=, $1, $pop[[L0]]{{$}} -; SIMD128-NEXT: v128.store 0($pop[[R]]), $0{{$}} define void @store_v2f64_with_unfolded_offset(<2 x double> %v, <2 x double>* %p) { +; CHECK-LABEL: store_v2f64_with_unfolded_offset: +; CHECK: .functype store_v2f64_with_unfolded_offset (v128, i32) -> () +; CHECK-NEXT: # %bb.0: +; CHECK-NEXT: local.get 1 +; CHECK-NEXT: i32.const -16 +; CHECK-NEXT: i32.add +; CHECK-NEXT: local.get 0 +; CHECK-NEXT: v128.store 0 +; CHECK-NEXT: # fallthrough-return %s = getelementptr inbounds <2 x double>, <2 x double>* %p, i32 -1 store <2 x double> %v , <2 x double>* %s ret void } -; CHECK-LABEL: store_v2f64_with_unfolded_gep_offset: -; NO-SIMD128-NOT: v128 -; SIMD128-NEXT: .functype store_v2f64_with_unfolded_gep_offset (v128, i32) -> (){{$}} -; SIMD128-NEXT: i32.const $push[[L0:[0-9]+]]=, 16{{$}} -; SIMD128-NEXT: i32.add $push[[R:[0-9]+]]=, $1, $pop[[L0]]{{$}} -; SIMD128-NEXT: v128.store 0($pop[[R]]), $0{{$}} define void @store_v2f64_with_unfolded_gep_offset(<2 x double> %v, <2 x double>* %p) { +; CHECK-LABEL: store_v2f64_with_unfolded_gep_offset: +; CHECK: .functype store_v2f64_with_unfolded_gep_offset (v128, i32) -> () +; CHECK-NEXT: # %bb.0: +; CHECK-NEXT: local.get 1 +; CHECK-NEXT: i32.const 16 +; CHECK-NEXT: i32.add +; CHECK-NEXT: local.get 0 +; CHECK-NEXT: v128.store 0 +; CHECK-NEXT: # fallthrough-return %s = getelementptr <2 x double>, <2 x double>* %p, i32 1 store <2 x double> %v , <2 x double>* %s ret void } -; CHECK-LABEL: store_v2f64_to_numeric_address: -; NO-SIMD128-NOT: v128 -; SIMD128-NEXT: .functype store_v2f64_to_numeric_address (v128) -> (){{$}} -; SIMD128-NEXT: i32.const $push[[L0:[0-9]+]]=, 0{{$}} -; SIMD128-NEXT: v128.store 32($pop[[L0]]), $0{{$}} define void @store_v2f64_to_numeric_address(<2 x double> %v) { +; CHECK-LABEL: store_v2f64_to_numeric_address: +; CHECK: .functype store_v2f64_to_numeric_address (v128) -> () +; CHECK-NEXT: # %bb.0: +; CHECK-NEXT: i32.const 0 +; CHECK-NEXT: local.get 0 +; CHECK-NEXT: v128.store 32 +; CHECK-NEXT: # fallthrough-return %s = inttoptr i32 32 to <2 x double>* store <2 x double> %v , <2 x double>* %s ret void } -; CHECK-LABEL: store_v2f64_to_global_address: -; NO-SIMD128-NOT: v128 -; SIMD128-NEXT: .functype store_v2f64_to_global_address (v128) -> (){{$}} -; SIMD128-NEXT: i32.const $push[[R:[0-9]+]]=, 0{{$}} -; SIMD128-NEXT: v128.store gv_v2f64($pop[[R]]), $0{{$}} define void @store_v2f64_to_global_address(<2 x double> %v) { +; CHECK-LABEL: store_v2f64_to_global_address: +; CHECK: .functype store_v2f64_to_global_address (v128) -> () +; CHECK-NEXT: # %bb.0: +; CHECK-NEXT: i32.const 0 +; CHECK-NEXT: local.get 0 +; CHECK-NEXT: v128.store gv_v2f64 +; CHECK-NEXT: # fallthrough-return store <2 x double> %v , <2 x double>* @gv_v2f64 ret void } -- 2.7.4