From 4896f7243d629dfa9cce4acf2f72ca1e081a2c40 Mon Sep 17 00:00:00 2001 From: Sanjay Patel Date: Fri, 20 Sep 2019 14:17:00 +0000 Subject: [PATCH] [SLPVectorizer] add tests for bogus reductions; NFC https://bugs.llvm.org/show_bug.cgi?id=42708 https://bugs.llvm.org/show_bug.cgi?id=43146 llvm-svn: 372393 --- .../Transforms/SLPVectorizer/X86/bad-reduction.ll | 334 +++++++++++++++++++++ 1 file changed, 334 insertions(+) create mode 100644 llvm/test/Transforms/SLPVectorizer/X86/bad-reduction.ll diff --git a/llvm/test/Transforms/SLPVectorizer/X86/bad-reduction.ll b/llvm/test/Transforms/SLPVectorizer/X86/bad-reduction.ll new file mode 100644 index 0000000..8637df4 --- /dev/null +++ b/llvm/test/Transforms/SLPVectorizer/X86/bad-reduction.ll @@ -0,0 +1,334 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py +; RUN: opt < %s -slp-vectorizer -S -mtriple=x86_64-- -mattr=avx2 | FileCheck %s + +%v8i8 = type { i8, i8, i8, i8, i8, i8, i8, i8 } + +; https://bugs.llvm.org/show_bug.cgi?id=43146 + +define i64 @load_bswap(%v8i8* %p) { +; CHECK-LABEL: @load_bswap( +; CHECK-NEXT: [[G0:%.*]] = getelementptr inbounds [[V8I8:%.*]], %v8i8* [[P:%.*]], i64 0, i32 0 +; CHECK-NEXT: [[G1:%.*]] = getelementptr inbounds [[V8I8]], %v8i8* [[P]], i64 0, i32 1 +; CHECK-NEXT: [[G2:%.*]] = getelementptr inbounds [[V8I8]], %v8i8* [[P]], i64 0, i32 2 +; CHECK-NEXT: [[G3:%.*]] = getelementptr inbounds [[V8I8]], %v8i8* [[P]], i64 0, i32 3 +; CHECK-NEXT: [[G4:%.*]] = getelementptr inbounds [[V8I8]], %v8i8* [[P]], i64 0, i32 4 +; CHECK-NEXT: [[G5:%.*]] = getelementptr inbounds [[V8I8]], %v8i8* [[P]], i64 0, i32 5 +; CHECK-NEXT: [[G6:%.*]] = getelementptr inbounds [[V8I8]], %v8i8* [[P]], i64 0, i32 6 +; CHECK-NEXT: [[G7:%.*]] = getelementptr inbounds [[V8I8]], %v8i8* [[P]], i64 0, i32 7 +; CHECK-NEXT: [[TMP1:%.*]] = bitcast i8* [[G0]] to <4 x i8>* +; CHECK-NEXT: [[TMP2:%.*]] = load <4 x i8>, <4 x i8>* [[TMP1]], align 1 +; CHECK-NEXT: [[T4:%.*]] = load i8, i8* [[G4]] +; CHECK-NEXT: [[T5:%.*]] = load i8, i8* [[G5]] +; CHECK-NEXT: [[T6:%.*]] = load i8, i8* [[G6]] +; CHECK-NEXT: [[T7:%.*]] = load i8, i8* [[G7]] +; CHECK-NEXT: [[TMP3:%.*]] = zext <4 x i8> [[TMP2]] to <4 x i64> +; CHECK-NEXT: [[Z4:%.*]] = zext i8 [[T4]] to i64 +; CHECK-NEXT: [[Z5:%.*]] = zext i8 [[T5]] to i64 +; CHECK-NEXT: [[Z6:%.*]] = zext i8 [[T6]] to i64 +; CHECK-NEXT: [[Z7:%.*]] = zext i8 [[T7]] to i64 +; CHECK-NEXT: [[TMP4:%.*]] = shl nuw <4 x i64> [[TMP3]], +; CHECK-NEXT: [[SH4:%.*]] = shl nuw nsw i64 [[Z4]], 24 +; CHECK-NEXT: [[SH5:%.*]] = shl nuw nsw i64 [[Z5]], 16 +; CHECK-NEXT: [[SH6:%.*]] = shl nuw nsw i64 [[Z6]], 8 +; CHECK-NEXT: [[OR01:%.*]] = or i64 undef, undef +; CHECK-NEXT: [[OR012:%.*]] = or i64 [[OR01]], undef +; CHECK-NEXT: [[OR0123:%.*]] = or i64 [[OR012]], undef +; CHECK-NEXT: [[OR01234:%.*]] = or i64 [[OR0123]], [[SH4]] +; CHECK-NEXT: [[OR012345:%.*]] = or i64 [[OR01234]], [[SH5]] +; CHECK-NEXT: [[OR0123456:%.*]] = or i64 [[OR012345]], [[SH6]] +; CHECK-NEXT: [[RDX_SHUF:%.*]] = shufflevector <4 x i64> [[TMP4]], <4 x i64> undef, <4 x i32> +; CHECK-NEXT: [[BIN_RDX:%.*]] = or <4 x i64> [[TMP4]], [[RDX_SHUF]] +; CHECK-NEXT: [[RDX_SHUF1:%.*]] = shufflevector <4 x i64> [[BIN_RDX]], <4 x i64> undef, <4 x i32> +; CHECK-NEXT: [[BIN_RDX2:%.*]] = or <4 x i64> [[BIN_RDX]], [[RDX_SHUF1]] +; CHECK-NEXT: [[TMP5:%.*]] = extractelement <4 x i64> [[BIN_RDX2]], i32 0 +; CHECK-NEXT: [[TMP6:%.*]] = or i64 [[TMP5]], [[SH4]] +; CHECK-NEXT: [[TMP7:%.*]] = or i64 [[TMP6]], [[SH5]] +; CHECK-NEXT: [[TMP8:%.*]] = or i64 [[TMP7]], [[SH6]] +; CHECK-NEXT: [[OP_EXTRA:%.*]] = or i64 [[TMP8]], [[Z7]] +; CHECK-NEXT: [[OR01234567:%.*]] = or i64 [[OR0123456]], [[Z7]] +; CHECK-NEXT: ret i64 [[OP_EXTRA]] +; + %g0 = getelementptr inbounds %v8i8, %v8i8* %p, i64 0, i32 0 + %g1 = getelementptr inbounds %v8i8, %v8i8* %p, i64 0, i32 1 + %g2 = getelementptr inbounds %v8i8, %v8i8* %p, i64 0, i32 2 + %g3 = getelementptr inbounds %v8i8, %v8i8* %p, i64 0, i32 3 + %g4 = getelementptr inbounds %v8i8, %v8i8* %p, i64 0, i32 4 + %g5 = getelementptr inbounds %v8i8, %v8i8* %p, i64 0, i32 5 + %g6 = getelementptr inbounds %v8i8, %v8i8* %p, i64 0, i32 6 + %g7 = getelementptr inbounds %v8i8, %v8i8* %p, i64 0, i32 7 + + %t0 = load i8, i8* %g0 + %t1 = load i8, i8* %g1 + %t2 = load i8, i8* %g2 + %t3 = load i8, i8* %g3 + %t4 = load i8, i8* %g4 + %t5 = load i8, i8* %g5 + %t6 = load i8, i8* %g6 + %t7 = load i8, i8* %g7 + + %z0 = zext i8 %t0 to i64 + %z1 = zext i8 %t1 to i64 + %z2 = zext i8 %t2 to i64 + %z3 = zext i8 %t3 to i64 + %z4 = zext i8 %t4 to i64 + %z5 = zext i8 %t5 to i64 + %z6 = zext i8 %t6 to i64 + %z7 = zext i8 %t7 to i64 + + %sh0 = shl nuw i64 %z0, 56 + %sh1 = shl nuw nsw i64 %z1, 48 + %sh2 = shl nuw nsw i64 %z2, 40 + %sh3 = shl nuw nsw i64 %z3, 32 + %sh4 = shl nuw nsw i64 %z4, 24 + %sh5 = shl nuw nsw i64 %z5, 16 + %sh6 = shl nuw nsw i64 %z6, 8 +; %sh7 = shl nuw nsw i64 %z7, 0 <-- missing phantom shift + + %or01 = or i64 %sh0, %sh1 + %or012 = or i64 %or01, %sh2 + %or0123 = or i64 %or012, %sh3 + %or01234 = or i64 %or0123, %sh4 + %or012345 = or i64 %or01234, %sh5 + %or0123456 = or i64 %or012345, %sh6 + %or01234567 = or i64 %or0123456, %z7 + ret i64 %or01234567 +} + +define i64 @load_bswap_nop_shift(%v8i8* %p) { +; CHECK-LABEL: @load_bswap_nop_shift( +; CHECK-NEXT: [[G0:%.*]] = getelementptr inbounds [[V8I8:%.*]], %v8i8* [[P:%.*]], i64 0, i32 0 +; CHECK-NEXT: [[G1:%.*]] = getelementptr inbounds [[V8I8]], %v8i8* [[P]], i64 0, i32 1 +; CHECK-NEXT: [[G2:%.*]] = getelementptr inbounds [[V8I8]], %v8i8* [[P]], i64 0, i32 2 +; CHECK-NEXT: [[G3:%.*]] = getelementptr inbounds [[V8I8]], %v8i8* [[P]], i64 0, i32 3 +; CHECK-NEXT: [[G4:%.*]] = getelementptr inbounds [[V8I8]], %v8i8* [[P]], i64 0, i32 4 +; CHECK-NEXT: [[G5:%.*]] = getelementptr inbounds [[V8I8]], %v8i8* [[P]], i64 0, i32 5 +; CHECK-NEXT: [[G6:%.*]] = getelementptr inbounds [[V8I8]], %v8i8* [[P]], i64 0, i32 6 +; CHECK-NEXT: [[G7:%.*]] = getelementptr inbounds [[V8I8]], %v8i8* [[P]], i64 0, i32 7 +; CHECK-NEXT: [[TMP1:%.*]] = bitcast i8* [[G0]] to <8 x i8>* +; CHECK-NEXT: [[TMP2:%.*]] = load <8 x i8>, <8 x i8>* [[TMP1]], align 1 +; CHECK-NEXT: [[TMP3:%.*]] = zext <8 x i8> [[TMP2]] to <8 x i64> +; CHECK-NEXT: [[TMP4:%.*]] = shl nuw <8 x i64> [[TMP3]], +; CHECK-NEXT: [[OR01:%.*]] = or i64 undef, undef +; CHECK-NEXT: [[OR012:%.*]] = or i64 [[OR01]], undef +; CHECK-NEXT: [[OR0123:%.*]] = or i64 [[OR012]], undef +; CHECK-NEXT: [[OR01234:%.*]] = or i64 [[OR0123]], undef +; CHECK-NEXT: [[OR012345:%.*]] = or i64 [[OR01234]], undef +; CHECK-NEXT: [[OR0123456:%.*]] = or i64 [[OR012345]], undef +; CHECK-NEXT: [[RDX_SHUF:%.*]] = shufflevector <8 x i64> [[TMP4]], <8 x i64> undef, <8 x i32> +; CHECK-NEXT: [[BIN_RDX:%.*]] = or <8 x i64> [[TMP4]], [[RDX_SHUF]] +; CHECK-NEXT: [[RDX_SHUF1:%.*]] = shufflevector <8 x i64> [[BIN_RDX]], <8 x i64> undef, <8 x i32> +; CHECK-NEXT: [[BIN_RDX2:%.*]] = or <8 x i64> [[BIN_RDX]], [[RDX_SHUF1]] +; CHECK-NEXT: [[RDX_SHUF3:%.*]] = shufflevector <8 x i64> [[BIN_RDX2]], <8 x i64> undef, <8 x i32> +; CHECK-NEXT: [[BIN_RDX4:%.*]] = or <8 x i64> [[BIN_RDX2]], [[RDX_SHUF3]] +; CHECK-NEXT: [[TMP5:%.*]] = extractelement <8 x i64> [[BIN_RDX4]], i32 0 +; CHECK-NEXT: [[OR01234567:%.*]] = or i64 [[OR0123456]], undef +; CHECK-NEXT: ret i64 [[TMP5]] +; + %g0 = getelementptr inbounds %v8i8, %v8i8* %p, i64 0, i32 0 + %g1 = getelementptr inbounds %v8i8, %v8i8* %p, i64 0, i32 1 + %g2 = getelementptr inbounds %v8i8, %v8i8* %p, i64 0, i32 2 + %g3 = getelementptr inbounds %v8i8, %v8i8* %p, i64 0, i32 3 + %g4 = getelementptr inbounds %v8i8, %v8i8* %p, i64 0, i32 4 + %g5 = getelementptr inbounds %v8i8, %v8i8* %p, i64 0, i32 5 + %g6 = getelementptr inbounds %v8i8, %v8i8* %p, i64 0, i32 6 + %g7 = getelementptr inbounds %v8i8, %v8i8* %p, i64 0, i32 7 + + %t0 = load i8, i8* %g0 + %t1 = load i8, i8* %g1 + %t2 = load i8, i8* %g2 + %t3 = load i8, i8* %g3 + %t4 = load i8, i8* %g4 + %t5 = load i8, i8* %g5 + %t6 = load i8, i8* %g6 + %t7 = load i8, i8* %g7 + + %z0 = zext i8 %t0 to i64 + %z1 = zext i8 %t1 to i64 + %z2 = zext i8 %t2 to i64 + %z3 = zext i8 %t3 to i64 + %z4 = zext i8 %t4 to i64 + %z5 = zext i8 %t5 to i64 + %z6 = zext i8 %t6 to i64 + %z7 = zext i8 %t7 to i64 + + %sh0 = shl nuw i64 %z0, 56 + %sh1 = shl nuw nsw i64 %z1, 48 + %sh2 = shl nuw nsw i64 %z2, 40 + %sh3 = shl nuw nsw i64 %z3, 32 + %sh4 = shl nuw nsw i64 %z4, 24 + %sh5 = shl nuw nsw i64 %z5, 16 + %sh6 = shl nuw nsw i64 %z6, 8 + %sh7 = shl nuw nsw i64 %z7, 0 + + %or01 = or i64 %sh0, %sh1 + %or012 = or i64 %or01, %sh2 + %or0123 = or i64 %or012, %sh3 + %or01234 = or i64 %or0123, %sh4 + %or012345 = or i64 %or01234, %sh5 + %or0123456 = or i64 %or012345, %sh6 + %or01234567 = or i64 %or0123456, %sh7 + ret i64 %or01234567 +} + +; https://bugs.llvm.org/show_bug.cgi?id=42708 + +define i64 @load64le(i8* %arg) { +; CHECK-LABEL: @load64le( +; CHECK-NEXT: [[G1:%.*]] = getelementptr inbounds i8, i8* [[ARG:%.*]], i64 1 +; CHECK-NEXT: [[G2:%.*]] = getelementptr inbounds i8, i8* [[ARG]], i64 2 +; CHECK-NEXT: [[G3:%.*]] = getelementptr inbounds i8, i8* [[ARG]], i64 3 +; CHECK-NEXT: [[G4:%.*]] = getelementptr inbounds i8, i8* [[ARG]], i64 4 +; CHECK-NEXT: [[G5:%.*]] = getelementptr inbounds i8, i8* [[ARG]], i64 5 +; CHECK-NEXT: [[G6:%.*]] = getelementptr inbounds i8, i8* [[ARG]], i64 6 +; CHECK-NEXT: [[G7:%.*]] = getelementptr inbounds i8, i8* [[ARG]], i64 7 +; CHECK-NEXT: [[LD0:%.*]] = load i8, i8* [[ARG]], align 1 +; CHECK-NEXT: [[TMP1:%.*]] = bitcast i8* [[G1]] to <4 x i8>* +; CHECK-NEXT: [[TMP2:%.*]] = load <4 x i8>, <4 x i8>* [[TMP1]], align 1 +; CHECK-NEXT: [[LD5:%.*]] = load i8, i8* [[G5]], align 1 +; CHECK-NEXT: [[LD6:%.*]] = load i8, i8* [[G6]], align 1 +; CHECK-NEXT: [[LD7:%.*]] = load i8, i8* [[G7]], align 1 +; CHECK-NEXT: [[Z0:%.*]] = zext i8 [[LD0]] to i64 +; CHECK-NEXT: [[TMP3:%.*]] = zext <4 x i8> [[TMP2]] to <4 x i64> +; CHECK-NEXT: [[Z5:%.*]] = zext i8 [[LD5]] to i64 +; CHECK-NEXT: [[Z6:%.*]] = zext i8 [[LD6]] to i64 +; CHECK-NEXT: [[Z7:%.*]] = zext i8 [[LD7]] to i64 +; CHECK-NEXT: [[TMP4:%.*]] = shl nuw nsw <4 x i64> [[TMP3]], +; CHECK-NEXT: [[S5:%.*]] = shl nuw nsw i64 [[Z5]], 40 +; CHECK-NEXT: [[S6:%.*]] = shl nuw nsw i64 [[Z6]], 48 +; CHECK-NEXT: [[S7:%.*]] = shl nuw i64 [[Z7]], 56 +; CHECK-NEXT: [[O1:%.*]] = or i64 undef, [[Z0]] +; CHECK-NEXT: [[O2:%.*]] = or i64 [[O1]], undef +; CHECK-NEXT: [[O3:%.*]] = or i64 [[O2]], undef +; CHECK-NEXT: [[O4:%.*]] = or i64 [[O3]], undef +; CHECK-NEXT: [[O5:%.*]] = or i64 [[O4]], [[S5]] +; CHECK-NEXT: [[O6:%.*]] = or i64 [[O5]], [[S6]] +; CHECK-NEXT: [[RDX_SHUF:%.*]] = shufflevector <4 x i64> [[TMP4]], <4 x i64> undef, <4 x i32> +; CHECK-NEXT: [[BIN_RDX:%.*]] = or <4 x i64> [[TMP4]], [[RDX_SHUF]] +; CHECK-NEXT: [[RDX_SHUF1:%.*]] = shufflevector <4 x i64> [[BIN_RDX]], <4 x i64> undef, <4 x i32> +; CHECK-NEXT: [[BIN_RDX2:%.*]] = or <4 x i64> [[BIN_RDX]], [[RDX_SHUF1]] +; CHECK-NEXT: [[TMP5:%.*]] = extractelement <4 x i64> [[BIN_RDX2]], i32 0 +; CHECK-NEXT: [[TMP6:%.*]] = or i64 [[TMP5]], [[S5]] +; CHECK-NEXT: [[TMP7:%.*]] = or i64 [[TMP6]], [[S6]] +; CHECK-NEXT: [[TMP8:%.*]] = or i64 [[TMP7]], [[S7]] +; CHECK-NEXT: [[OP_EXTRA:%.*]] = or i64 [[TMP8]], [[Z0]] +; CHECK-NEXT: [[O7:%.*]] = or i64 [[O6]], [[S7]] +; CHECK-NEXT: ret i64 [[OP_EXTRA]] +; + %g1 = getelementptr inbounds i8, i8* %arg, i64 1 + %g2 = getelementptr inbounds i8, i8* %arg, i64 2 + %g3 = getelementptr inbounds i8, i8* %arg, i64 3 + %g4 = getelementptr inbounds i8, i8* %arg, i64 4 + %g5 = getelementptr inbounds i8, i8* %arg, i64 5 + %g6 = getelementptr inbounds i8, i8* %arg, i64 6 + %g7 = getelementptr inbounds i8, i8* %arg, i64 7 + + %ld0 = load i8, i8* %arg, align 1 + %ld1 = load i8, i8* %g1, align 1 + %ld2 = load i8, i8* %g2, align 1 + %ld3 = load i8, i8* %g3, align 1 + %ld4 = load i8, i8* %g4, align 1 + %ld5 = load i8, i8* %g5, align 1 + %ld6 = load i8, i8* %g6, align 1 + %ld7 = load i8, i8* %g7, align 1 + + %z0 = zext i8 %ld0 to i64 + %z1 = zext i8 %ld1 to i64 + %z2 = zext i8 %ld2 to i64 + %z3 = zext i8 %ld3 to i64 + %z4 = zext i8 %ld4 to i64 + %z5 = zext i8 %ld5 to i64 + %z6 = zext i8 %ld6 to i64 + %z7 = zext i8 %ld7 to i64 + +; %s0 = shl nuw nsw i64 %z0, 0 <-- missing phantom shift + %s1 = shl nuw nsw i64 %z1, 8 + %s2 = shl nuw nsw i64 %z2, 16 + %s3 = shl nuw nsw i64 %z3, 24 + %s4 = shl nuw nsw i64 %z4, 32 + %s5 = shl nuw nsw i64 %z5, 40 + %s6 = shl nuw nsw i64 %z6, 48 + %s7 = shl nuw i64 %z7, 56 + + %o1 = or i64 %s1, %z0 + %o2 = or i64 %o1, %s2 + %o3 = or i64 %o2, %s3 + %o4 = or i64 %o3, %s4 + %o5 = or i64 %o4, %s5 + %o6 = or i64 %o5, %s6 + %o7 = or i64 %o6, %s7 + ret i64 %o7 +} + +define i64 @load64le_nop_shift(i8* %arg) { +; CHECK-LABEL: @load64le_nop_shift( +; CHECK-NEXT: [[G1:%.*]] = getelementptr inbounds i8, i8* [[ARG:%.*]], i64 1 +; CHECK-NEXT: [[G2:%.*]] = getelementptr inbounds i8, i8* [[ARG]], i64 2 +; CHECK-NEXT: [[G3:%.*]] = getelementptr inbounds i8, i8* [[ARG]], i64 3 +; CHECK-NEXT: [[G4:%.*]] = getelementptr inbounds i8, i8* [[ARG]], i64 4 +; CHECK-NEXT: [[G5:%.*]] = getelementptr inbounds i8, i8* [[ARG]], i64 5 +; CHECK-NEXT: [[G6:%.*]] = getelementptr inbounds i8, i8* [[ARG]], i64 6 +; CHECK-NEXT: [[G7:%.*]] = getelementptr inbounds i8, i8* [[ARG]], i64 7 +; CHECK-NEXT: [[TMP1:%.*]] = bitcast i8* [[ARG]] to <8 x i8>* +; CHECK-NEXT: [[TMP2:%.*]] = load <8 x i8>, <8 x i8>* [[TMP1]], align 1 +; CHECK-NEXT: [[TMP3:%.*]] = zext <8 x i8> [[TMP2]] to <8 x i64> +; CHECK-NEXT: [[TMP4:%.*]] = shl nuw <8 x i64> [[TMP3]], +; CHECK-NEXT: [[O1:%.*]] = or i64 undef, undef +; CHECK-NEXT: [[O2:%.*]] = or i64 [[O1]], undef +; CHECK-NEXT: [[O3:%.*]] = or i64 [[O2]], undef +; CHECK-NEXT: [[O4:%.*]] = or i64 [[O3]], undef +; CHECK-NEXT: [[O5:%.*]] = or i64 [[O4]], undef +; CHECK-NEXT: [[O6:%.*]] = or i64 [[O5]], undef +; CHECK-NEXT: [[RDX_SHUF:%.*]] = shufflevector <8 x i64> [[TMP4]], <8 x i64> undef, <8 x i32> +; CHECK-NEXT: [[BIN_RDX:%.*]] = or <8 x i64> [[TMP4]], [[RDX_SHUF]] +; CHECK-NEXT: [[RDX_SHUF1:%.*]] = shufflevector <8 x i64> [[BIN_RDX]], <8 x i64> undef, <8 x i32> +; CHECK-NEXT: [[BIN_RDX2:%.*]] = or <8 x i64> [[BIN_RDX]], [[RDX_SHUF1]] +; CHECK-NEXT: [[RDX_SHUF3:%.*]] = shufflevector <8 x i64> [[BIN_RDX2]], <8 x i64> undef, <8 x i32> +; CHECK-NEXT: [[BIN_RDX4:%.*]] = or <8 x i64> [[BIN_RDX2]], [[RDX_SHUF3]] +; CHECK-NEXT: [[TMP5:%.*]] = extractelement <8 x i64> [[BIN_RDX4]], i32 0 +; CHECK-NEXT: [[O7:%.*]] = or i64 [[O6]], undef +; CHECK-NEXT: ret i64 [[TMP5]] +; + %g1 = getelementptr inbounds i8, i8* %arg, i64 1 + %g2 = getelementptr inbounds i8, i8* %arg, i64 2 + %g3 = getelementptr inbounds i8, i8* %arg, i64 3 + %g4 = getelementptr inbounds i8, i8* %arg, i64 4 + %g5 = getelementptr inbounds i8, i8* %arg, i64 5 + %g6 = getelementptr inbounds i8, i8* %arg, i64 6 + %g7 = getelementptr inbounds i8, i8* %arg, i64 7 + + %ld0 = load i8, i8* %arg, align 1 + %ld1 = load i8, i8* %g1, align 1 + %ld2 = load i8, i8* %g2, align 1 + %ld3 = load i8, i8* %g3, align 1 + %ld4 = load i8, i8* %g4, align 1 + %ld5 = load i8, i8* %g5, align 1 + %ld6 = load i8, i8* %g6, align 1 + %ld7 = load i8, i8* %g7, align 1 + + %z0 = zext i8 %ld0 to i64 + %z1 = zext i8 %ld1 to i64 + %z2 = zext i8 %ld2 to i64 + %z3 = zext i8 %ld3 to i64 + %z4 = zext i8 %ld4 to i64 + %z5 = zext i8 %ld5 to i64 + %z6 = zext i8 %ld6 to i64 + %z7 = zext i8 %ld7 to i64 + + %s0 = shl nuw nsw i64 %z0, 0 + %s1 = shl nuw nsw i64 %z1, 8 + %s2 = shl nuw nsw i64 %z2, 16 + %s3 = shl nuw nsw i64 %z3, 24 + %s4 = shl nuw nsw i64 %z4, 32 + %s5 = shl nuw nsw i64 %z5, 40 + %s6 = shl nuw nsw i64 %z6, 48 + %s7 = shl nuw i64 %z7, 56 + + %o1 = or i64 %s1, %s0 + %o2 = or i64 %o1, %s2 + %o3 = or i64 %o2, %s3 + %o4 = or i64 %o3, %s4 + %o5 = or i64 %o4, %s5 + %o6 = or i64 %o5, %s6 + %o7 = or i64 %o6, %s7 + ret i64 %o7 +} -- 2.7.4