From 6c3f56efa6e6ca746ba3dafae43251105f16e5fb Mon Sep 17 00:00:00 2001 From: Anton Afanasyev Date: Tue, 8 Dec 2020 11:54:42 +0300 Subject: [PATCH] [SLP][Test] Differentiate SSE/AVX512 test coverage (NFC) Add test coverage for SSE/AVX512 for insert-after-bundle.ll test. Prepare this test for accurate showing of PR46983 fix. --- .../SLPVectorizer/X86/insert-after-bundle.ll | 175 ++++++++++++--------- 1 file changed, 105 insertions(+), 70 deletions(-) diff --git a/llvm/test/Transforms/SLPVectorizer/X86/insert-after-bundle.ll b/llvm/test/Transforms/SLPVectorizer/X86/insert-after-bundle.ll index 2a4d457..fa11834 100644 --- a/llvm/test/Transforms/SLPVectorizer/X86/insert-after-bundle.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/insert-after-bundle.ll @@ -1,5 +1,6 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py -; RUN: opt -S -slp-vectorizer < %s | FileCheck %s +; RUN: opt -S -slp-vectorizer -mattr=+sse < %s | FileCheck %s --check-prefixes=CHECK,SSE +; RUN: opt -S -slp-vectorizer -mattr=+avx512f < %s | FileCheck %s --check-prefixes=CHECK,AVX512 target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" target triple = "x86_64-unknown-linux-gnu" @@ -410,75 +411,109 @@ for.end: ; preds = %for.body @ia = common local_unnamed_addr global [64 x i32] zeroinitializer, align 16 define i32 @foo1() local_unnamed_addr #0 { -; CHECK-LABEL: @foo1( -; CHECK-NEXT: entry: -; CHECK-NEXT: [[TMP0:%.*]] = load <4 x i32>, <4 x i32>* bitcast ([64 x i32]* @ib to <4 x i32>*), align 16 -; CHECK-NEXT: [[TMP1:%.*]] = xor <4 x i32> [[TMP0]], -; CHECK-NEXT: store <4 x i32> [[TMP1]], <4 x i32>* bitcast ([64 x i32]* @ia to <4 x i32>*), align 16 -; CHECK-NEXT: [[TMP2:%.*]] = load <4 x i32>, <4 x i32>* bitcast (i32* getelementptr inbounds ([64 x i32], [64 x i32]* @ib, i64 0, i64 4) to <4 x i32>*), align 16 -; CHECK-NEXT: [[TMP3:%.*]] = xor <4 x i32> [[TMP2]], -; CHECK-NEXT: store <4 x i32> [[TMP3]], <4 x i32>* bitcast (i32* getelementptr inbounds ([64 x i32], [64 x i32]* @ia, i64 0, i64 4) to <4 x i32>*), align 16 -; CHECK-NEXT: [[TMP4:%.*]] = load <4 x i32>, <4 x i32>* bitcast (i32* getelementptr inbounds ([64 x i32], [64 x i32]* @ib, i64 0, i64 8) to <4 x i32>*), align 16 -; CHECK-NEXT: [[TMP5:%.*]] = xor <4 x i32> [[TMP4]], -; CHECK-NEXT: store <4 x i32> [[TMP5]], <4 x i32>* bitcast (i32* getelementptr inbounds ([64 x i32], [64 x i32]* @ia, i64 0, i64 8) to <4 x i32>*), align 16 -; CHECK-NEXT: [[TMP6:%.*]] = load <4 x i32>, <4 x i32>* bitcast (i32* getelementptr inbounds ([64 x i32], [64 x i32]* @ib, i64 0, i64 12) to <4 x i32>*), align 16 -; CHECK-NEXT: [[TMP7:%.*]] = xor <4 x i32> [[TMP6]], -; CHECK-NEXT: store <4 x i32> [[TMP7]], <4 x i32>* bitcast (i32* getelementptr inbounds ([64 x i32], [64 x i32]* @ia, i64 0, i64 12) to <4 x i32>*), align 16 -; CHECK-NEXT: [[TMP8:%.*]] = load <4 x i32>, <4 x i32>* bitcast (i32* getelementptr inbounds ([64 x i32], [64 x i32]* @ib, i64 0, i64 16) to <4 x i32>*), align 16 -; CHECK-NEXT: [[TMP9:%.*]] = xor <4 x i32> [[TMP8]], -; CHECK-NEXT: store <4 x i32> [[TMP9]], <4 x i32>* bitcast (i32* getelementptr inbounds ([64 x i32], [64 x i32]* @ia, i64 0, i64 16) to <4 x i32>*), align 16 -; CHECK-NEXT: [[TMP10:%.*]] = load <4 x i32>, <4 x i32>* bitcast (i32* getelementptr inbounds ([64 x i32], [64 x i32]* @ib, i64 0, i64 20) to <4 x i32>*), align 16 -; CHECK-NEXT: [[TMP11:%.*]] = xor <4 x i32> [[TMP10]], -; CHECK-NEXT: store <4 x i32> [[TMP11]], <4 x i32>* bitcast (i32* getelementptr inbounds ([64 x i32], [64 x i32]* @ia, i64 0, i64 20) to <4 x i32>*), align 16 -; CHECK-NEXT: [[TMP12:%.*]] = load <4 x i32>, <4 x i32>* bitcast (i32* getelementptr inbounds ([64 x i32], [64 x i32]* @ib, i64 0, i64 24) to <4 x i32>*), align 16 -; CHECK-NEXT: [[TMP13:%.*]] = xor <4 x i32> [[TMP12]], -; CHECK-NEXT: store <4 x i32> [[TMP13]], <4 x i32>* bitcast (i32* getelementptr inbounds ([64 x i32], [64 x i32]* @ia, i64 0, i64 24) to <4 x i32>*), align 16 -; CHECK-NEXT: [[TMP14:%.*]] = load <4 x i32>, <4 x i32>* bitcast (i32* getelementptr inbounds ([64 x i32], [64 x i32]* @ib, i64 0, i64 28) to <4 x i32>*), align 16 -; CHECK-NEXT: [[TMP15:%.*]] = xor <4 x i32> [[TMP14]], -; CHECK-NEXT: store <4 x i32> [[TMP15]], <4 x i32>* bitcast (i32* getelementptr inbounds ([64 x i32], [64 x i32]* @ia, i64 0, i64 28) to <4 x i32>*), align 16 -; CHECK-NEXT: [[TMP16:%.*]] = load <4 x i32>, <4 x i32>* bitcast (i32* getelementptr inbounds ([64 x i32], [64 x i32]* @ib, i64 0, i64 32) to <4 x i32>*), align 16 -; CHECK-NEXT: [[TMP17:%.*]] = xor <4 x i32> [[TMP16]], -; CHECK-NEXT: store <4 x i32> [[TMP17]], <4 x i32>* bitcast (i32* getelementptr inbounds ([64 x i32], [64 x i32]* @ia, i64 0, i64 32) to <4 x i32>*), align 16 -; CHECK-NEXT: [[TMP18:%.*]] = load <4 x i32>, <4 x i32>* bitcast (i32* getelementptr inbounds ([64 x i32], [64 x i32]* @ib, i64 0, i64 36) to <4 x i32>*), align 16 -; CHECK-NEXT: [[TMP19:%.*]] = xor <4 x i32> [[TMP18]], -; CHECK-NEXT: store <4 x i32> [[TMP19]], <4 x i32>* bitcast (i32* getelementptr inbounds ([64 x i32], [64 x i32]* @ia, i64 0, i64 36) to <4 x i32>*), align 16 -; CHECK-NEXT: [[TMP20:%.*]] = load <4 x i32>, <4 x i32>* bitcast (i32* getelementptr inbounds ([64 x i32], [64 x i32]* @ib, i64 0, i64 40) to <4 x i32>*), align 16 -; CHECK-NEXT: [[TMP21:%.*]] = xor <4 x i32> [[TMP20]], -; CHECK-NEXT: store <4 x i32> [[TMP21]], <4 x i32>* bitcast (i32* getelementptr inbounds ([64 x i32], [64 x i32]* @ia, i64 0, i64 40) to <4 x i32>*), align 16 -; CHECK-NEXT: [[TMP22:%.*]] = load <4 x i32>, <4 x i32>* bitcast (i32* getelementptr inbounds ([64 x i32], [64 x i32]* @ib, i64 0, i64 44) to <4 x i32>*), align 16 -; CHECK-NEXT: [[TMP23:%.*]] = xor <4 x i32> [[TMP22]], -; CHECK-NEXT: store <4 x i32> [[TMP23]], <4 x i32>* bitcast (i32* getelementptr inbounds ([64 x i32], [64 x i32]* @ia, i64 0, i64 44) to <4 x i32>*), align 16 -; CHECK-NEXT: [[TMP24:%.*]] = load <4 x i32>, <4 x i32>* bitcast (i32* getelementptr inbounds ([64 x i32], [64 x i32]* @ib, i64 0, i64 48) to <4 x i32>*), align 16 -; CHECK-NEXT: [[TMP25:%.*]] = xor <4 x i32> [[TMP24]], -; CHECK-NEXT: store <4 x i32> [[TMP25]], <4 x i32>* bitcast (i32* getelementptr inbounds ([64 x i32], [64 x i32]* @ia, i64 0, i64 48) to <4 x i32>*), align 16 -; CHECK-NEXT: [[TMP26:%.*]] = load <4 x i32>, <4 x i32>* bitcast (i32* getelementptr inbounds ([64 x i32], [64 x i32]* @ib, i64 0, i64 52) to <4 x i32>*), align 16 -; CHECK-NEXT: [[TMP27:%.*]] = xor <4 x i32> [[TMP26]], -; CHECK-NEXT: store <4 x i32> [[TMP27]], <4 x i32>* bitcast (i32* getelementptr inbounds ([64 x i32], [64 x i32]* @ia, i64 0, i64 52) to <4 x i32>*), align 16 -; CHECK-NEXT: [[TMP28:%.*]] = load <4 x i32>, <4 x i32>* bitcast (i32* getelementptr inbounds ([64 x i32], [64 x i32]* @ib, i64 0, i64 56) to <4 x i32>*), align 16 -; CHECK-NEXT: [[TMP29:%.*]] = xor <4 x i32> [[TMP28]], -; CHECK-NEXT: store <4 x i32> [[TMP29]], <4 x i32>* bitcast (i32* getelementptr inbounds ([64 x i32], [64 x i32]* @ia, i64 0, i64 56) to <4 x i32>*), align 16 -; CHECK-NEXT: [[TMP30:%.*]] = load <4 x i32>, <4 x i32>* bitcast (i32* getelementptr inbounds ([64 x i32], [64 x i32]* @ib, i64 0, i64 60) to <4 x i32>*), align 16 -; CHECK-NEXT: [[TMP31:%.*]] = xor <4 x i32> [[TMP30]], -; CHECK-NEXT: store <4 x i32> [[TMP31]], <4 x i32>* bitcast (i32* getelementptr inbounds ([64 x i32], [64 x i32]* @ia, i64 0, i64 60) to <4 x i32>*), align 16 -; CHECK-NEXT: br label [[FOR_BODY5:%.*]] -; CHECK: for.cond3: -; CHECK-NEXT: [[INDVARS_IV_NEXT:%.*]] = add nuw nsw i64 [[INDVARS_IV:%.*]], 1 -; CHECK-NEXT: [[CMP4:%.*]] = icmp ult i64 [[INDVARS_IV]], 63 -; CHECK-NEXT: br i1 [[CMP4]], label [[FOR_BODY5]], label [[FOR_END14:%.*]] -; CHECK: for.body5: -; CHECK-NEXT: [[INDVARS_IV]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[INDVARS_IV_NEXT]], [[FOR_COND3:%.*]] ] -; CHECK-NEXT: [[ARRAYIDX7:%.*]] = getelementptr inbounds [64 x i32], [64 x i32]* @ia, i64 0, i64 [[INDVARS_IV]] -; CHECK-NEXT: [[TMP32:%.*]] = load i32, i32* [[ARRAYIDX7]], align 4 -; CHECK-NEXT: [[ARRAYIDX9:%.*]] = getelementptr inbounds [64 x i32], [64 x i32]* @ib, i64 0, i64 [[INDVARS_IV]] -; CHECK-NEXT: [[TMP33:%.*]] = load i32, i32* [[ARRAYIDX9]], align 4 -; CHECK-NEXT: [[NEG10:%.*]] = xor i32 [[TMP33]], -1 -; CHECK-NEXT: [[CMP11:%.*]] = icmp eq i32 [[TMP32]], [[NEG10]] -; CHECK-NEXT: br i1 [[CMP11]], label [[FOR_COND3]], label [[IF_THEN:%.*]] -; CHECK: if.then: -; CHECK-NEXT: tail call void @abort() -; CHECK-NEXT: unreachable -; CHECK: for.end14: -; CHECK-NEXT: ret i32 0 +; SSE-LABEL: @foo1( +; SSE-NEXT: entry: +; SSE-NEXT: [[TMP0:%.*]] = load <4 x i32>, <4 x i32>* bitcast ([64 x i32]* @ib to <4 x i32>*), align 16 +; SSE-NEXT: [[TMP1:%.*]] = xor <4 x i32> [[TMP0]], +; SSE-NEXT: store <4 x i32> [[TMP1]], <4 x i32>* bitcast ([64 x i32]* @ia to <4 x i32>*), align 16 +; SSE-NEXT: [[TMP2:%.*]] = load <4 x i32>, <4 x i32>* bitcast (i32* getelementptr inbounds ([64 x i32], [64 x i32]* @ib, i64 0, i64 4) to <4 x i32>*), align 16 +; SSE-NEXT: [[TMP3:%.*]] = xor <4 x i32> [[TMP2]], +; SSE-NEXT: store <4 x i32> [[TMP3]], <4 x i32>* bitcast (i32* getelementptr inbounds ([64 x i32], [64 x i32]* @ia, i64 0, i64 4) to <4 x i32>*), align 16 +; SSE-NEXT: [[TMP4:%.*]] = load <4 x i32>, <4 x i32>* bitcast (i32* getelementptr inbounds ([64 x i32], [64 x i32]* @ib, i64 0, i64 8) to <4 x i32>*), align 16 +; SSE-NEXT: [[TMP5:%.*]] = xor <4 x i32> [[TMP4]], +; SSE-NEXT: store <4 x i32> [[TMP5]], <4 x i32>* bitcast (i32* getelementptr inbounds ([64 x i32], [64 x i32]* @ia, i64 0, i64 8) to <4 x i32>*), align 16 +; SSE-NEXT: [[TMP6:%.*]] = load <4 x i32>, <4 x i32>* bitcast (i32* getelementptr inbounds ([64 x i32], [64 x i32]* @ib, i64 0, i64 12) to <4 x i32>*), align 16 +; SSE-NEXT: [[TMP7:%.*]] = xor <4 x i32> [[TMP6]], +; SSE-NEXT: store <4 x i32> [[TMP7]], <4 x i32>* bitcast (i32* getelementptr inbounds ([64 x i32], [64 x i32]* @ia, i64 0, i64 12) to <4 x i32>*), align 16 +; SSE-NEXT: [[TMP8:%.*]] = load <4 x i32>, <4 x i32>* bitcast (i32* getelementptr inbounds ([64 x i32], [64 x i32]* @ib, i64 0, i64 16) to <4 x i32>*), align 16 +; SSE-NEXT: [[TMP9:%.*]] = xor <4 x i32> [[TMP8]], +; SSE-NEXT: store <4 x i32> [[TMP9]], <4 x i32>* bitcast (i32* getelementptr inbounds ([64 x i32], [64 x i32]* @ia, i64 0, i64 16) to <4 x i32>*), align 16 +; SSE-NEXT: [[TMP10:%.*]] = load <4 x i32>, <4 x i32>* bitcast (i32* getelementptr inbounds ([64 x i32], [64 x i32]* @ib, i64 0, i64 20) to <4 x i32>*), align 16 +; SSE-NEXT: [[TMP11:%.*]] = xor <4 x i32> [[TMP10]], +; SSE-NEXT: store <4 x i32> [[TMP11]], <4 x i32>* bitcast (i32* getelementptr inbounds ([64 x i32], [64 x i32]* @ia, i64 0, i64 20) to <4 x i32>*), align 16 +; SSE-NEXT: [[TMP12:%.*]] = load <4 x i32>, <4 x i32>* bitcast (i32* getelementptr inbounds ([64 x i32], [64 x i32]* @ib, i64 0, i64 24) to <4 x i32>*), align 16 +; SSE-NEXT: [[TMP13:%.*]] = xor <4 x i32> [[TMP12]], +; SSE-NEXT: store <4 x i32> [[TMP13]], <4 x i32>* bitcast (i32* getelementptr inbounds ([64 x i32], [64 x i32]* @ia, i64 0, i64 24) to <4 x i32>*), align 16 +; SSE-NEXT: [[TMP14:%.*]] = load <4 x i32>, <4 x i32>* bitcast (i32* getelementptr inbounds ([64 x i32], [64 x i32]* @ib, i64 0, i64 28) to <4 x i32>*), align 16 +; SSE-NEXT: [[TMP15:%.*]] = xor <4 x i32> [[TMP14]], +; SSE-NEXT: store <4 x i32> [[TMP15]], <4 x i32>* bitcast (i32* getelementptr inbounds ([64 x i32], [64 x i32]* @ia, i64 0, i64 28) to <4 x i32>*), align 16 +; SSE-NEXT: [[TMP16:%.*]] = load <4 x i32>, <4 x i32>* bitcast (i32* getelementptr inbounds ([64 x i32], [64 x i32]* @ib, i64 0, i64 32) to <4 x i32>*), align 16 +; SSE-NEXT: [[TMP17:%.*]] = xor <4 x i32> [[TMP16]], +; SSE-NEXT: store <4 x i32> [[TMP17]], <4 x i32>* bitcast (i32* getelementptr inbounds ([64 x i32], [64 x i32]* @ia, i64 0, i64 32) to <4 x i32>*), align 16 +; SSE-NEXT: [[TMP18:%.*]] = load <4 x i32>, <4 x i32>* bitcast (i32* getelementptr inbounds ([64 x i32], [64 x i32]* @ib, i64 0, i64 36) to <4 x i32>*), align 16 +; SSE-NEXT: [[TMP19:%.*]] = xor <4 x i32> [[TMP18]], +; SSE-NEXT: store <4 x i32> [[TMP19]], <4 x i32>* bitcast (i32* getelementptr inbounds ([64 x i32], [64 x i32]* @ia, i64 0, i64 36) to <4 x i32>*), align 16 +; SSE-NEXT: [[TMP20:%.*]] = load <4 x i32>, <4 x i32>* bitcast (i32* getelementptr inbounds ([64 x i32], [64 x i32]* @ib, i64 0, i64 40) to <4 x i32>*), align 16 +; SSE-NEXT: [[TMP21:%.*]] = xor <4 x i32> [[TMP20]], +; SSE-NEXT: store <4 x i32> [[TMP21]], <4 x i32>* bitcast (i32* getelementptr inbounds ([64 x i32], [64 x i32]* @ia, i64 0, i64 40) to <4 x i32>*), align 16 +; SSE-NEXT: [[TMP22:%.*]] = load <4 x i32>, <4 x i32>* bitcast (i32* getelementptr inbounds ([64 x i32], [64 x i32]* @ib, i64 0, i64 44) to <4 x i32>*), align 16 +; SSE-NEXT: [[TMP23:%.*]] = xor <4 x i32> [[TMP22]], +; SSE-NEXT: store <4 x i32> [[TMP23]], <4 x i32>* bitcast (i32* getelementptr inbounds ([64 x i32], [64 x i32]* @ia, i64 0, i64 44) to <4 x i32>*), align 16 +; SSE-NEXT: [[TMP24:%.*]] = load <4 x i32>, <4 x i32>* bitcast (i32* getelementptr inbounds ([64 x i32], [64 x i32]* @ib, i64 0, i64 48) to <4 x i32>*), align 16 +; SSE-NEXT: [[TMP25:%.*]] = xor <4 x i32> [[TMP24]], +; SSE-NEXT: store <4 x i32> [[TMP25]], <4 x i32>* bitcast (i32* getelementptr inbounds ([64 x i32], [64 x i32]* @ia, i64 0, i64 48) to <4 x i32>*), align 16 +; SSE-NEXT: [[TMP26:%.*]] = load <4 x i32>, <4 x i32>* bitcast (i32* getelementptr inbounds ([64 x i32], [64 x i32]* @ib, i64 0, i64 52) to <4 x i32>*), align 16 +; SSE-NEXT: [[TMP27:%.*]] = xor <4 x i32> [[TMP26]], +; SSE-NEXT: store <4 x i32> [[TMP27]], <4 x i32>* bitcast (i32* getelementptr inbounds ([64 x i32], [64 x i32]* @ia, i64 0, i64 52) to <4 x i32>*), align 16 +; SSE-NEXT: [[TMP28:%.*]] = load <4 x i32>, <4 x i32>* bitcast (i32* getelementptr inbounds ([64 x i32], [64 x i32]* @ib, i64 0, i64 56) to <4 x i32>*), align 16 +; SSE-NEXT: [[TMP29:%.*]] = xor <4 x i32> [[TMP28]], +; SSE-NEXT: store <4 x i32> [[TMP29]], <4 x i32>* bitcast (i32* getelementptr inbounds ([64 x i32], [64 x i32]* @ia, i64 0, i64 56) to <4 x i32>*), align 16 +; SSE-NEXT: [[TMP30:%.*]] = load <4 x i32>, <4 x i32>* bitcast (i32* getelementptr inbounds ([64 x i32], [64 x i32]* @ib, i64 0, i64 60) to <4 x i32>*), align 16 +; SSE-NEXT: [[TMP31:%.*]] = xor <4 x i32> [[TMP30]], +; SSE-NEXT: store <4 x i32> [[TMP31]], <4 x i32>* bitcast (i32* getelementptr inbounds ([64 x i32], [64 x i32]* @ia, i64 0, i64 60) to <4 x i32>*), align 16 +; SSE-NEXT: br label [[FOR_BODY5:%.*]] +; SSE: for.cond3: +; SSE-NEXT: [[INDVARS_IV_NEXT:%.*]] = add nuw nsw i64 [[INDVARS_IV:%.*]], 1 +; SSE-NEXT: [[CMP4:%.*]] = icmp ult i64 [[INDVARS_IV]], 63 +; SSE-NEXT: br i1 [[CMP4]], label [[FOR_BODY5]], label [[FOR_END14:%.*]] +; SSE: for.body5: +; SSE-NEXT: [[INDVARS_IV]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[INDVARS_IV_NEXT]], [[FOR_COND3:%.*]] ] +; SSE-NEXT: [[ARRAYIDX7:%.*]] = getelementptr inbounds [64 x i32], [64 x i32]* @ia, i64 0, i64 [[INDVARS_IV]] +; SSE-NEXT: [[TMP32:%.*]] = load i32, i32* [[ARRAYIDX7]], align 4 +; SSE-NEXT: [[ARRAYIDX9:%.*]] = getelementptr inbounds [64 x i32], [64 x i32]* @ib, i64 0, i64 [[INDVARS_IV]] +; SSE-NEXT: [[TMP33:%.*]] = load i32, i32* [[ARRAYIDX9]], align 4 +; SSE-NEXT: [[NEG10:%.*]] = xor i32 [[TMP33]], -1 +; SSE-NEXT: [[CMP11:%.*]] = icmp eq i32 [[TMP32]], [[NEG10]] +; SSE-NEXT: br i1 [[CMP11]], label [[FOR_COND3]], label [[IF_THEN:%.*]] +; SSE: if.then: +; SSE-NEXT: tail call void @abort() +; SSE-NEXT: unreachable +; SSE: for.end14: +; SSE-NEXT: ret i32 0 +; +; AVX512-LABEL: @foo1( +; AVX512-NEXT: entry: +; AVX512-NEXT: [[TMP0:%.*]] = load <16 x i32>, <16 x i32>* bitcast ([64 x i32]* @ib to <16 x i32>*), align 16 +; AVX512-NEXT: [[TMP1:%.*]] = xor <16 x i32> [[TMP0]], +; AVX512-NEXT: store <16 x i32> [[TMP1]], <16 x i32>* bitcast ([64 x i32]* @ia to <16 x i32>*), align 16 +; AVX512-NEXT: [[TMP2:%.*]] = load <16 x i32>, <16 x i32>* bitcast (i32* getelementptr inbounds ([64 x i32], [64 x i32]* @ib, i64 0, i64 16) to <16 x i32>*), align 16 +; AVX512-NEXT: [[TMP3:%.*]] = xor <16 x i32> [[TMP2]], +; AVX512-NEXT: store <16 x i32> [[TMP3]], <16 x i32>* bitcast (i32* getelementptr inbounds ([64 x i32], [64 x i32]* @ia, i64 0, i64 16) to <16 x i32>*), align 16 +; AVX512-NEXT: [[TMP4:%.*]] = load <16 x i32>, <16 x i32>* bitcast (i32* getelementptr inbounds ([64 x i32], [64 x i32]* @ib, i64 0, i64 32) to <16 x i32>*), align 16 +; AVX512-NEXT: [[TMP5:%.*]] = xor <16 x i32> [[TMP4]], +; AVX512-NEXT: store <16 x i32> [[TMP5]], <16 x i32>* bitcast (i32* getelementptr inbounds ([64 x i32], [64 x i32]* @ia, i64 0, i64 32) to <16 x i32>*), align 16 +; AVX512-NEXT: [[TMP6:%.*]] = load <16 x i32>, <16 x i32>* bitcast (i32* getelementptr inbounds ([64 x i32], [64 x i32]* @ib, i64 0, i64 48) to <16 x i32>*), align 16 +; AVX512-NEXT: [[TMP7:%.*]] = xor <16 x i32> [[TMP6]], +; AVX512-NEXT: store <16 x i32> [[TMP7]], <16 x i32>* bitcast (i32* getelementptr inbounds ([64 x i32], [64 x i32]* @ia, i64 0, i64 48) to <16 x i32>*), align 16 +; AVX512-NEXT: br label [[FOR_BODY5:%.*]] +; AVX512: for.cond3: +; AVX512-NEXT: [[INDVARS_IV_NEXT:%.*]] = add nuw nsw i64 [[INDVARS_IV:%.*]], 1 +; AVX512-NEXT: [[CMP4:%.*]] = icmp ult i64 [[INDVARS_IV]], 63 +; AVX512-NEXT: br i1 [[CMP4]], label [[FOR_BODY5]], label [[FOR_END14:%.*]] +; AVX512: for.body5: +; AVX512-NEXT: [[INDVARS_IV]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[INDVARS_IV_NEXT]], [[FOR_COND3:%.*]] ] +; AVX512-NEXT: [[ARRAYIDX7:%.*]] = getelementptr inbounds [64 x i32], [64 x i32]* @ia, i64 0, i64 [[INDVARS_IV]] +; AVX512-NEXT: [[TMP8:%.*]] = load i32, i32* [[ARRAYIDX7]], align 4 +; AVX512-NEXT: [[ARRAYIDX9:%.*]] = getelementptr inbounds [64 x i32], [64 x i32]* @ib, i64 0, i64 [[INDVARS_IV]] +; AVX512-NEXT: [[TMP9:%.*]] = load i32, i32* [[ARRAYIDX9]], align 4 +; AVX512-NEXT: [[NEG10:%.*]] = xor i32 [[TMP9]], -1 +; AVX512-NEXT: [[CMP11:%.*]] = icmp eq i32 [[TMP8]], [[NEG10]] +; AVX512-NEXT: br i1 [[CMP11]], label [[FOR_COND3]], label [[IF_THEN:%.*]] +; AVX512: if.then: +; AVX512-NEXT: tail call void @abort() +; AVX512-NEXT: unreachable +; AVX512: for.end14: +; AVX512-NEXT: ret i32 0 ; entry: %0 = load i32, i32* getelementptr inbounds ([64 x i32], [64 x i32]* @ib, i64 0, i64 0), align 16 -- 2.7.4