From: Simon Pilgrim Date: Mon, 10 Oct 2016 14:10:41 +0000 (+0000) Subject: [SLPVectorizer][X86] Fixed alignments of scalar loads in sitofp/uitofp tests X-Git-Url: http://review.tizen.org/git/?a=commitdiff_plain;h=6cadb5610effee8f32fd61c6d4e86312f0fec93a;p=platform%2Fupstream%2Fllvm.git [SLPVectorizer][X86] Fixed alignments of scalar loads in sitofp/uitofp tests Fixed copy+paste vector alignment to correct for per-element scalar loads Increased to 512-bit data sizes in preparation of avx512 tests llvm-svn: 283748 --- diff --git a/llvm/test/Transforms/SLPVectorizer/X86/sitofp.ll b/llvm/test/Transforms/SLPVectorizer/X86/sitofp.ll index 70faa18..3984138 100644 --- a/llvm/test/Transforms/SLPVectorizer/X86/sitofp.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/sitofp.ll @@ -5,13 +5,13 @@ target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" -@src64 = common global [4 x i64] zeroinitializer, align 32 -@src32 = common global [8 x i32] zeroinitializer, align 32 -@src16 = common global [16 x i16] zeroinitializer, align 32 -@src8 = common global [32 x i8] zeroinitializer, align 32 +@src64 = common global [8 x i64] zeroinitializer, align 64 +@src32 = common global [16 x i32] zeroinitializer, align 64 +@src16 = common global [32 x i16] zeroinitializer, align 64 +@src8 = common global [64 x i8] zeroinitializer, align 64 -@dst64 = common global [4 x double] zeroinitializer, align 32 -@dst32 = common global [8 x float] zeroinitializer, align 32 +@dst64 = common global [8 x double] zeroinitializer, align 64 +@dst32 = common global [16 x float] zeroinitializer, align 64 ; ; SITOFP to vXf64 @@ -19,219 +19,219 @@ target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" define void @sitofp_2i64_2f64() #0 { ; CHECK-LABEL: @sitofp_2i64_2f64( -; CHECK-NEXT: [[LD0:%.*]] = load i64, i64* getelementptr inbounds ([4 x i64], [4 x i64]* @src64, i32 0, i64 0), align 8 -; CHECK-NEXT: [[LD1:%.*]] = load i64, i64* getelementptr inbounds ([4 x i64], [4 x i64]* @src64, i32 0, i64 1), align 8 +; CHECK-NEXT: [[LD0:%.*]] = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @src64, i32 0, i64 0), align 64 +; CHECK-NEXT: [[LD1:%.*]] = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @src64, i32 0, i64 1), align 8 ; CHECK-NEXT: [[CVT0:%.*]] = sitofp i64 [[LD0]] to double ; CHECK-NEXT: [[CVT1:%.*]] = sitofp i64 [[LD1]] to double -; CHECK-NEXT: store double [[CVT0]], double* getelementptr inbounds ([4 x double], [4 x double]* @dst64, i32 0, i64 0), align 8 -; CHECK-NEXT: store double [[CVT1]], double* getelementptr inbounds ([4 x double], [4 x double]* @dst64, i32 0, i64 1), align 8 +; CHECK-NEXT: store double [[CVT0]], double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 0), align 64 +; CHECK-NEXT: store double [[CVT1]], double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 1), align 8 ; CHECK-NEXT: ret void ; - %ld0 = load i64, i64* getelementptr inbounds ([4 x i64], [4 x i64]* @src64, i32 0, i64 0), align 8 - %ld1 = load i64, i64* getelementptr inbounds ([4 x i64], [4 x i64]* @src64, i32 0, i64 1), align 8 + %ld0 = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @src64, i32 0, i64 0), align 64 + %ld1 = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @src64, i32 0, i64 1), align 8 %cvt0 = sitofp i64 %ld0 to double %cvt1 = sitofp i64 %ld1 to double - store double %cvt0, double* getelementptr inbounds ([4 x double], [4 x double]* @dst64, i32 0, i64 0), align 8 - store double %cvt1, double* getelementptr inbounds ([4 x double], [4 x double]* @dst64, i32 0, i64 1), align 8 + store double %cvt0, double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 0), align 64 + store double %cvt1, double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 1), align 8 ret void } define void @sitofp_4i64_4f64() #0 { ; CHECK-LABEL: @sitofp_4i64_4f64( -; CHECK-NEXT: [[LD0:%.*]] = load i64, i64* getelementptr inbounds ([4 x i64], [4 x i64]* @src64, i32 0, i64 0), align 8 -; CHECK-NEXT: [[LD1:%.*]] = load i64, i64* getelementptr inbounds ([4 x i64], [4 x i64]* @src64, i32 0, i64 1), align 8 -; CHECK-NEXT: [[LD2:%.*]] = load i64, i64* getelementptr inbounds ([4 x i64], [4 x i64]* @src64, i32 0, i64 2), align 8 -; CHECK-NEXT: [[LD3:%.*]] = load i64, i64* getelementptr inbounds ([4 x i64], [4 x i64]* @src64, i32 0, i64 3), align 8 +; CHECK-NEXT: [[LD0:%.*]] = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @src64, i32 0, i64 0), align 64 +; CHECK-NEXT: [[LD1:%.*]] = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @src64, i32 0, i64 1), align 8 +; CHECK-NEXT: [[LD2:%.*]] = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @src64, i32 0, i64 2), align 16 +; CHECK-NEXT: [[LD3:%.*]] = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @src64, i32 0, i64 3), align 8 ; CHECK-NEXT: [[CVT0:%.*]] = sitofp i64 [[LD0]] to double ; CHECK-NEXT: [[CVT1:%.*]] = sitofp i64 [[LD1]] to double ; CHECK-NEXT: [[CVT2:%.*]] = sitofp i64 [[LD2]] to double ; CHECK-NEXT: [[CVT3:%.*]] = sitofp i64 [[LD3]] to double -; CHECK-NEXT: store double [[CVT0]], double* getelementptr inbounds ([4 x double], [4 x double]* @dst64, i32 0, i64 0), align 8 -; CHECK-NEXT: store double [[CVT1]], double* getelementptr inbounds ([4 x double], [4 x double]* @dst64, i32 0, i64 1), align 8 -; CHECK-NEXT: store double [[CVT2]], double* getelementptr inbounds ([4 x double], [4 x double]* @dst64, i32 0, i64 2), align 8 -; CHECK-NEXT: store double [[CVT3]], double* getelementptr inbounds ([4 x double], [4 x double]* @dst64, i32 0, i64 3), align 8 +; CHECK-NEXT: store double [[CVT0]], double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 0), align 64 +; CHECK-NEXT: store double [[CVT1]], double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 1), align 8 +; CHECK-NEXT: store double [[CVT2]], double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 2), align 16 +; CHECK-NEXT: store double [[CVT3]], double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 3), align 8 ; CHECK-NEXT: ret void ; - %ld0 = load i64, i64* getelementptr inbounds ([4 x i64], [4 x i64]* @src64, i32 0, i64 0), align 8 - %ld1 = load i64, i64* getelementptr inbounds ([4 x i64], [4 x i64]* @src64, i32 0, i64 1), align 8 - %ld2 = load i64, i64* getelementptr inbounds ([4 x i64], [4 x i64]* @src64, i32 0, i64 2), align 8 - %ld3 = load i64, i64* getelementptr inbounds ([4 x i64], [4 x i64]* @src64, i32 0, i64 3), align 8 + %ld0 = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @src64, i32 0, i64 0), align 64 + %ld1 = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @src64, i32 0, i64 1), align 8 + %ld2 = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @src64, i32 0, i64 2), align 16 + %ld3 = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @src64, i32 0, i64 3), align 8 %cvt0 = sitofp i64 %ld0 to double %cvt1 = sitofp i64 %ld1 to double %cvt2 = sitofp i64 %ld2 to double %cvt3 = sitofp i64 %ld3 to double - store double %cvt0, double* getelementptr inbounds ([4 x double], [4 x double]* @dst64, i32 0, i64 0), align 8 - store double %cvt1, double* getelementptr inbounds ([4 x double], [4 x double]* @dst64, i32 0, i64 1), align 8 - store double %cvt2, double* getelementptr inbounds ([4 x double], [4 x double]* @dst64, i32 0, i64 2), align 8 - store double %cvt3, double* getelementptr inbounds ([4 x double], [4 x double]* @dst64, i32 0, i64 3), align 8 + store double %cvt0, double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 0), align 64 + store double %cvt1, double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 1), align 8 + store double %cvt2, double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 2), align 16 + store double %cvt3, double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 3), align 8 ret void } define void @sitofp_2i32_2f64() #0 { ; CHECK-LABEL: @sitofp_2i32_2f64( -; CHECK-NEXT: [[LD0:%.*]] = load i32, i32* getelementptr inbounds ([8 x i32], [8 x i32]* @src32, i32 0, i64 0), align 8 -; CHECK-NEXT: [[LD1:%.*]] = load i32, i32* getelementptr inbounds ([8 x i32], [8 x i32]* @src32, i32 0, i64 1), align 8 +; CHECK-NEXT: [[LD0:%.*]] = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @src32, i32 0, i64 0), align 64 +; CHECK-NEXT: [[LD1:%.*]] = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @src32, i32 0, i64 1), align 4 ; CHECK-NEXT: [[CVT0:%.*]] = sitofp i32 [[LD0]] to double ; CHECK-NEXT: [[CVT1:%.*]] = sitofp i32 [[LD1]] to double -; CHECK-NEXT: store double [[CVT0]], double* getelementptr inbounds ([4 x double], [4 x double]* @dst64, i32 0, i64 0), align 8 -; CHECK-NEXT: store double [[CVT1]], double* getelementptr inbounds ([4 x double], [4 x double]* @dst64, i32 0, i64 1), align 8 +; CHECK-NEXT: store double [[CVT0]], double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 0), align 64 +; CHECK-NEXT: store double [[CVT1]], double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 1), align 8 ; CHECK-NEXT: ret void ; - %ld0 = load i32, i32* getelementptr inbounds ([8 x i32], [8 x i32]* @src32, i32 0, i64 0), align 8 - %ld1 = load i32, i32* getelementptr inbounds ([8 x i32], [8 x i32]* @src32, i32 0, i64 1), align 8 + %ld0 = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @src32, i32 0, i64 0), align 64 + %ld1 = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @src32, i32 0, i64 1), align 4 %cvt0 = sitofp i32 %ld0 to double %cvt1 = sitofp i32 %ld1 to double - store double %cvt0, double* getelementptr inbounds ([4 x double], [4 x double]* @dst64, i32 0, i64 0), align 8 - store double %cvt1, double* getelementptr inbounds ([4 x double], [4 x double]* @dst64, i32 0, i64 1), align 8 + store double %cvt0, double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 0), align 64 + store double %cvt1, double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 1), align 8 ret void } define void @sitofp_4i32_4f64() #0 { ; SSE-LABEL: @sitofp_4i32_4f64( -; SSE-NEXT: [[LD0:%.*]] = load i32, i32* getelementptr inbounds ([8 x i32], [8 x i32]* @src32, i32 0, i64 0), align 8 -; SSE-NEXT: [[LD1:%.*]] = load i32, i32* getelementptr inbounds ([8 x i32], [8 x i32]* @src32, i32 0, i64 1), align 8 -; SSE-NEXT: [[LD2:%.*]] = load i32, i32* getelementptr inbounds ([8 x i32], [8 x i32]* @src32, i32 0, i64 2), align 8 -; SSE-NEXT: [[LD3:%.*]] = load i32, i32* getelementptr inbounds ([8 x i32], [8 x i32]* @src32, i32 0, i64 3), align 8 +; SSE-NEXT: [[LD0:%.*]] = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @src32, i32 0, i64 0), align 64 +; SSE-NEXT: [[LD1:%.*]] = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @src32, i32 0, i64 1), align 4 +; SSE-NEXT: [[LD2:%.*]] = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @src32, i32 0, i64 2), align 8 +; SSE-NEXT: [[LD3:%.*]] = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @src32, i32 0, i64 3), align 4 ; SSE-NEXT: [[CVT0:%.*]] = sitofp i32 [[LD0]] to double ; SSE-NEXT: [[CVT1:%.*]] = sitofp i32 [[LD1]] to double ; SSE-NEXT: [[CVT2:%.*]] = sitofp i32 [[LD2]] to double ; SSE-NEXT: [[CVT3:%.*]] = sitofp i32 [[LD3]] to double -; SSE-NEXT: store double [[CVT0]], double* getelementptr inbounds ([4 x double], [4 x double]* @dst64, i32 0, i64 0), align 8 -; SSE-NEXT: store double [[CVT1]], double* getelementptr inbounds ([4 x double], [4 x double]* @dst64, i32 0, i64 1), align 8 -; SSE-NEXT: store double [[CVT2]], double* getelementptr inbounds ([4 x double], [4 x double]* @dst64, i32 0, i64 2), align 8 -; SSE-NEXT: store double [[CVT3]], double* getelementptr inbounds ([4 x double], [4 x double]* @dst64, i32 0, i64 3), align 8 +; SSE-NEXT: store double [[CVT0]], double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 0), align 64 +; SSE-NEXT: store double [[CVT1]], double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 1), align 8 +; SSE-NEXT: store double [[CVT2]], double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 2), align 16 +; SSE-NEXT: store double [[CVT3]], double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 3), align 8 ; SSE-NEXT: ret void ; ; AVX-LABEL: @sitofp_4i32_4f64( -; AVX-NEXT: [[TMP1:%.*]] = load <4 x i32>, <4 x i32>* bitcast ([8 x i32]* @src32 to <4 x i32>*), align 8 +; AVX-NEXT: [[TMP1:%.*]] = load <4 x i32>, <4 x i32>* bitcast ([16 x i32]* @src32 to <4 x i32>*), align 64 ; AVX-NEXT: [[TMP2:%.*]] = sitofp <4 x i32> [[TMP1]] to <4 x double> -; AVX-NEXT: store <4 x double> [[TMP2]], <4 x double>* bitcast ([4 x double]* @dst64 to <4 x double>*), align 8 +; AVX-NEXT: store <4 x double> [[TMP2]], <4 x double>* bitcast ([8 x double]* @dst64 to <4 x double>*), align 64 ; AVX-NEXT: ret void ; - %ld0 = load i32, i32* getelementptr inbounds ([8 x i32], [8 x i32]* @src32, i32 0, i64 0), align 8 - %ld1 = load i32, i32* getelementptr inbounds ([8 x i32], [8 x i32]* @src32, i32 0, i64 1), align 8 - %ld2 = load i32, i32* getelementptr inbounds ([8 x i32], [8 x i32]* @src32, i32 0, i64 2), align 8 - %ld3 = load i32, i32* getelementptr inbounds ([8 x i32], [8 x i32]* @src32, i32 0, i64 3), align 8 + %ld0 = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @src32, i32 0, i64 0), align 64 + %ld1 = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @src32, i32 0, i64 1), align 4 + %ld2 = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @src32, i32 0, i64 2), align 8 + %ld3 = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @src32, i32 0, i64 3), align 4 %cvt0 = sitofp i32 %ld0 to double %cvt1 = sitofp i32 %ld1 to double %cvt2 = sitofp i32 %ld2 to double %cvt3 = sitofp i32 %ld3 to double - store double %cvt0, double* getelementptr inbounds ([4 x double], [4 x double]* @dst64, i32 0, i64 0), align 8 - store double %cvt1, double* getelementptr inbounds ([4 x double], [4 x double]* @dst64, i32 0, i64 1), align 8 - store double %cvt2, double* getelementptr inbounds ([4 x double], [4 x double]* @dst64, i32 0, i64 2), align 8 - store double %cvt3, double* getelementptr inbounds ([4 x double], [4 x double]* @dst64, i32 0, i64 3), align 8 + store double %cvt0, double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 0), align 64 + store double %cvt1, double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 1), align 8 + store double %cvt2, double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 2), align 16 + store double %cvt3, double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 3), align 8 ret void } define void @sitofp_2i16_2f64() #0 { ; CHECK-LABEL: @sitofp_2i16_2f64( -; CHECK-NEXT: [[LD0:%.*]] = load i16, i16* getelementptr inbounds ([16 x i16], [16 x i16]* @src16, i32 0, i64 0), align 8 -; CHECK-NEXT: [[LD1:%.*]] = load i16, i16* getelementptr inbounds ([16 x i16], [16 x i16]* @src16, i32 0, i64 1), align 8 +; CHECK-NEXT: [[LD0:%.*]] = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @src16, i32 0, i64 0), align 64 +; CHECK-NEXT: [[LD1:%.*]] = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @src16, i32 0, i64 1), align 2 ; CHECK-NEXT: [[CVT0:%.*]] = sitofp i16 [[LD0]] to double ; CHECK-NEXT: [[CVT1:%.*]] = sitofp i16 [[LD1]] to double -; CHECK-NEXT: store double [[CVT0]], double* getelementptr inbounds ([4 x double], [4 x double]* @dst64, i32 0, i64 0), align 8 -; CHECK-NEXT: store double [[CVT1]], double* getelementptr inbounds ([4 x double], [4 x double]* @dst64, i32 0, i64 1), align 8 +; CHECK-NEXT: store double [[CVT0]], double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 0), align 64 +; CHECK-NEXT: store double [[CVT1]], double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 1), align 8 ; CHECK-NEXT: ret void ; - %ld0 = load i16, i16* getelementptr inbounds ([16 x i16], [16 x i16]* @src16, i32 0, i64 0), align 8 - %ld1 = load i16, i16* getelementptr inbounds ([16 x i16], [16 x i16]* @src16, i32 0, i64 1), align 8 + %ld0 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @src16, i32 0, i64 0), align 64 + %ld1 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @src16, i32 0, i64 1), align 2 %cvt0 = sitofp i16 %ld0 to double %cvt1 = sitofp i16 %ld1 to double - store double %cvt0, double* getelementptr inbounds ([4 x double], [4 x double]* @dst64, i32 0, i64 0), align 8 - store double %cvt1, double* getelementptr inbounds ([4 x double], [4 x double]* @dst64, i32 0, i64 1), align 8 + store double %cvt0, double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 0), align 64 + store double %cvt1, double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 1), align 8 ret void } define void @sitofp_4i16_4f64() #0 { ; SSE-LABEL: @sitofp_4i16_4f64( -; SSE-NEXT: [[LD0:%.*]] = load i16, i16* getelementptr inbounds ([16 x i16], [16 x i16]* @src16, i32 0, i64 0), align 8 -; SSE-NEXT: [[LD1:%.*]] = load i16, i16* getelementptr inbounds ([16 x i16], [16 x i16]* @src16, i32 0, i64 1), align 8 -; SSE-NEXT: [[LD2:%.*]] = load i16, i16* getelementptr inbounds ([16 x i16], [16 x i16]* @src16, i32 0, i64 2), align 8 -; SSE-NEXT: [[LD3:%.*]] = load i16, i16* getelementptr inbounds ([16 x i16], [16 x i16]* @src16, i32 0, i64 3), align 8 +; SSE-NEXT: [[LD0:%.*]] = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @src16, i32 0, i64 0), align 64 +; SSE-NEXT: [[LD1:%.*]] = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @src16, i32 0, i64 1), align 2 +; SSE-NEXT: [[LD2:%.*]] = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @src16, i32 0, i64 2), align 4 +; SSE-NEXT: [[LD3:%.*]] = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @src16, i32 0, i64 3), align 2 ; SSE-NEXT: [[CVT0:%.*]] = sitofp i16 [[LD0]] to double ; SSE-NEXT: [[CVT1:%.*]] = sitofp i16 [[LD1]] to double ; SSE-NEXT: [[CVT2:%.*]] = sitofp i16 [[LD2]] to double ; SSE-NEXT: [[CVT3:%.*]] = sitofp i16 [[LD3]] to double -; SSE-NEXT: store double [[CVT0]], double* getelementptr inbounds ([4 x double], [4 x double]* @dst64, i32 0, i64 0), align 8 -; SSE-NEXT: store double [[CVT1]], double* getelementptr inbounds ([4 x double], [4 x double]* @dst64, i32 0, i64 1), align 8 -; SSE-NEXT: store double [[CVT2]], double* getelementptr inbounds ([4 x double], [4 x double]* @dst64, i32 0, i64 2), align 8 -; SSE-NEXT: store double [[CVT3]], double* getelementptr inbounds ([4 x double], [4 x double]* @dst64, i32 0, i64 3), align 8 +; SSE-NEXT: store double [[CVT0]], double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 0), align 64 +; SSE-NEXT: store double [[CVT1]], double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 1), align 8 +; SSE-NEXT: store double [[CVT2]], double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 2), align 16 +; SSE-NEXT: store double [[CVT3]], double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 3), align 8 ; SSE-NEXT: ret void ; ; AVX-LABEL: @sitofp_4i16_4f64( -; AVX-NEXT: [[TMP1:%.*]] = load <4 x i16>, <4 x i16>* bitcast ([16 x i16]* @src16 to <4 x i16>*), align 8 +; AVX-NEXT: [[TMP1:%.*]] = load <4 x i16>, <4 x i16>* bitcast ([32 x i16]* @src16 to <4 x i16>*), align 64 ; AVX-NEXT: [[TMP2:%.*]] = sitofp <4 x i16> [[TMP1]] to <4 x double> -; AVX-NEXT: store <4 x double> [[TMP2]], <4 x double>* bitcast ([4 x double]* @dst64 to <4 x double>*), align 8 +; AVX-NEXT: store <4 x double> [[TMP2]], <4 x double>* bitcast ([8 x double]* @dst64 to <4 x double>*), align 64 ; AVX-NEXT: ret void ; - %ld0 = load i16, i16* getelementptr inbounds ([16 x i16], [16 x i16]* @src16, i32 0, i64 0), align 8 - %ld1 = load i16, i16* getelementptr inbounds ([16 x i16], [16 x i16]* @src16, i32 0, i64 1), align 8 - %ld2 = load i16, i16* getelementptr inbounds ([16 x i16], [16 x i16]* @src16, i32 0, i64 2), align 8 - %ld3 = load i16, i16* getelementptr inbounds ([16 x i16], [16 x i16]* @src16, i32 0, i64 3), align 8 + %ld0 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @src16, i32 0, i64 0), align 64 + %ld1 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @src16, i32 0, i64 1), align 2 + %ld2 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @src16, i32 0, i64 2), align 4 + %ld3 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @src16, i32 0, i64 3), align 2 %cvt0 = sitofp i16 %ld0 to double %cvt1 = sitofp i16 %ld1 to double %cvt2 = sitofp i16 %ld2 to double %cvt3 = sitofp i16 %ld3 to double - store double %cvt0, double* getelementptr inbounds ([4 x double], [4 x double]* @dst64, i32 0, i64 0), align 8 - store double %cvt1, double* getelementptr inbounds ([4 x double], [4 x double]* @dst64, i32 0, i64 1), align 8 - store double %cvt2, double* getelementptr inbounds ([4 x double], [4 x double]* @dst64, i32 0, i64 2), align 8 - store double %cvt3, double* getelementptr inbounds ([4 x double], [4 x double]* @dst64, i32 0, i64 3), align 8 + store double %cvt0, double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 0), align 64 + store double %cvt1, double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 1), align 8 + store double %cvt2, double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 2), align 16 + store double %cvt3, double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 3), align 8 ret void } define void @sitofp_2i8_2f64() #0 { ; CHECK-LABEL: @sitofp_2i8_2f64( -; CHECK-NEXT: [[LD0:%.*]] = load i8, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @src8, i32 0, i64 0), align 8 -; CHECK-NEXT: [[LD1:%.*]] = load i8, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @src8, i32 0, i64 1), align 8 +; CHECK-NEXT: [[LD0:%.*]] = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @src8, i32 0, i64 0), align 64 +; CHECK-NEXT: [[LD1:%.*]] = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @src8, i32 0, i64 1), align 1 ; CHECK-NEXT: [[CVT0:%.*]] = sitofp i8 [[LD0]] to double ; CHECK-NEXT: [[CVT1:%.*]] = sitofp i8 [[LD1]] to double -; CHECK-NEXT: store double [[CVT0]], double* getelementptr inbounds ([4 x double], [4 x double]* @dst64, i32 0, i64 0), align 8 -; CHECK-NEXT: store double [[CVT1]], double* getelementptr inbounds ([4 x double], [4 x double]* @dst64, i32 0, i64 1), align 8 +; CHECK-NEXT: store double [[CVT0]], double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 0), align 64 +; CHECK-NEXT: store double [[CVT1]], double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 1), align 8 ; CHECK-NEXT: ret void ; - %ld0 = load i8, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @src8, i32 0, i64 0), align 8 - %ld1 = load i8, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @src8, i32 0, i64 1), align 8 + %ld0 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @src8, i32 0, i64 0), align 64 + %ld1 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @src8, i32 0, i64 1), align 1 %cvt0 = sitofp i8 %ld0 to double %cvt1 = sitofp i8 %ld1 to double - store double %cvt0, double* getelementptr inbounds ([4 x double], [4 x double]* @dst64, i32 0, i64 0), align 8 - store double %cvt1, double* getelementptr inbounds ([4 x double], [4 x double]* @dst64, i32 0, i64 1), align 8 + store double %cvt0, double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 0), align 64 + store double %cvt1, double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 1), align 8 ret void } define void @sitofp_4i8_4f64() #0 { ; SSE-LABEL: @sitofp_4i8_4f64( -; SSE-NEXT: [[LD0:%.*]] = load i8, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @src8, i32 0, i64 0), align 8 -; SSE-NEXT: [[LD1:%.*]] = load i8, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @src8, i32 0, i64 1), align 8 -; SSE-NEXT: [[LD2:%.*]] = load i8, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @src8, i32 0, i64 2), align 8 -; SSE-NEXT: [[LD3:%.*]] = load i8, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @src8, i32 0, i64 3), align 8 +; SSE-NEXT: [[LD0:%.*]] = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @src8, i32 0, i64 0), align 64 +; SSE-NEXT: [[LD1:%.*]] = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @src8, i32 0, i64 1), align 1 +; SSE-NEXT: [[LD2:%.*]] = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @src8, i32 0, i64 2), align 2 +; SSE-NEXT: [[LD3:%.*]] = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @src8, i32 0, i64 3), align 1 ; SSE-NEXT: [[CVT0:%.*]] = sitofp i8 [[LD0]] to double ; SSE-NEXT: [[CVT1:%.*]] = sitofp i8 [[LD1]] to double ; SSE-NEXT: [[CVT2:%.*]] = sitofp i8 [[LD2]] to double ; SSE-NEXT: [[CVT3:%.*]] = sitofp i8 [[LD3]] to double -; SSE-NEXT: store double [[CVT0]], double* getelementptr inbounds ([4 x double], [4 x double]* @dst64, i32 0, i64 0), align 8 -; SSE-NEXT: store double [[CVT1]], double* getelementptr inbounds ([4 x double], [4 x double]* @dst64, i32 0, i64 1), align 8 -; SSE-NEXT: store double [[CVT2]], double* getelementptr inbounds ([4 x double], [4 x double]* @dst64, i32 0, i64 2), align 8 -; SSE-NEXT: store double [[CVT3]], double* getelementptr inbounds ([4 x double], [4 x double]* @dst64, i32 0, i64 3), align 8 +; SSE-NEXT: store double [[CVT0]], double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 0), align 64 +; SSE-NEXT: store double [[CVT1]], double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 1), align 8 +; SSE-NEXT: store double [[CVT2]], double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 2), align 16 +; SSE-NEXT: store double [[CVT3]], double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 3), align 8 ; SSE-NEXT: ret void ; ; AVX-LABEL: @sitofp_4i8_4f64( -; AVX-NEXT: [[TMP1:%.*]] = load <4 x i8>, <4 x i8>* bitcast ([32 x i8]* @src8 to <4 x i8>*), align 8 +; AVX-NEXT: [[TMP1:%.*]] = load <4 x i8>, <4 x i8>* bitcast ([64 x i8]* @src8 to <4 x i8>*), align 64 ; AVX-NEXT: [[TMP2:%.*]] = sitofp <4 x i8> [[TMP1]] to <4 x double> -; AVX-NEXT: store <4 x double> [[TMP2]], <4 x double>* bitcast ([4 x double]* @dst64 to <4 x double>*), align 8 +; AVX-NEXT: store <4 x double> [[TMP2]], <4 x double>* bitcast ([8 x double]* @dst64 to <4 x double>*), align 64 ; AVX-NEXT: ret void ; - %ld0 = load i8, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @src8, i32 0, i64 0), align 8 - %ld1 = load i8, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @src8, i32 0, i64 1), align 8 - %ld2 = load i8, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @src8, i32 0, i64 2), align 8 - %ld3 = load i8, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @src8, i32 0, i64 3), align 8 + %ld0 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @src8, i32 0, i64 0), align 64 + %ld1 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @src8, i32 0, i64 1), align 1 + %ld2 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @src8, i32 0, i64 2), align 2 + %ld3 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @src8, i32 0, i64 3), align 1 %cvt0 = sitofp i8 %ld0 to double %cvt1 = sitofp i8 %ld1 to double %cvt2 = sitofp i8 %ld2 to double %cvt3 = sitofp i8 %ld3 to double - store double %cvt0, double* getelementptr inbounds ([4 x double], [4 x double]* @dst64, i32 0, i64 0), align 8 - store double %cvt1, double* getelementptr inbounds ([4 x double], [4 x double]* @dst64, i32 0, i64 1), align 8 - store double %cvt2, double* getelementptr inbounds ([4 x double], [4 x double]* @dst64, i32 0, i64 2), align 8 - store double %cvt3, double* getelementptr inbounds ([4 x double], [4 x double]* @dst64, i32 0, i64 3), align 8 + store double %cvt0, double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 0), align 64 + store double %cvt1, double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 1), align 8 + store double %cvt2, double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 2), align 16 + store double %cvt3, double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 3), align 8 ret void } @@ -241,100 +241,100 @@ define void @sitofp_4i8_4f64() #0 { define void @sitofp_2i64_2f32() #0 { ; CHECK-LABEL: @sitofp_2i64_2f32( -; CHECK-NEXT: [[LD0:%.*]] = load i64, i64* getelementptr inbounds ([4 x i64], [4 x i64]* @src64, i32 0, i64 0), align 8 -; CHECK-NEXT: [[LD1:%.*]] = load i64, i64* getelementptr inbounds ([4 x i64], [4 x i64]* @src64, i32 0, i64 1), align 8 +; CHECK-NEXT: [[LD0:%.*]] = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @src64, i32 0, i64 0), align 64 +; CHECK-NEXT: [[LD1:%.*]] = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @src64, i32 0, i64 1), align 8 ; CHECK-NEXT: [[CVT0:%.*]] = sitofp i64 [[LD0]] to float ; CHECK-NEXT: [[CVT1:%.*]] = sitofp i64 [[LD1]] to float -; CHECK-NEXT: store float [[CVT0]], float* getelementptr inbounds ([8 x float], [8 x float]* @dst32, i32 0, i64 0), align 8 -; CHECK-NEXT: store float [[CVT1]], float* getelementptr inbounds ([8 x float], [8 x float]* @dst32, i32 0, i64 1), align 8 +; CHECK-NEXT: store float [[CVT0]], float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 0), align 64 +; CHECK-NEXT: store float [[CVT1]], float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 1), align 4 ; CHECK-NEXT: ret void ; - %ld0 = load i64, i64* getelementptr inbounds ([4 x i64], [4 x i64]* @src64, i32 0, i64 0), align 8 - %ld1 = load i64, i64* getelementptr inbounds ([4 x i64], [4 x i64]* @src64, i32 0, i64 1), align 8 + %ld0 = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @src64, i32 0, i64 0), align 64 + %ld1 = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @src64, i32 0, i64 1), align 8 %cvt0 = sitofp i64 %ld0 to float %cvt1 = sitofp i64 %ld1 to float - store float %cvt0, float* getelementptr inbounds ([8 x float], [8 x float]* @dst32, i32 0, i64 0), align 8 - store float %cvt1, float* getelementptr inbounds ([8 x float], [8 x float]* @dst32, i32 0, i64 1), align 8 + store float %cvt0, float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 0), align 64 + store float %cvt1, float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 1), align 4 ret void } define void @sitofp_4i64_4f32() #0 { ; CHECK-LABEL: @sitofp_4i64_4f32( -; CHECK-NEXT: [[LD0:%.*]] = load i64, i64* getelementptr inbounds ([4 x i64], [4 x i64]* @src64, i32 0, i64 0), align 8 -; CHECK-NEXT: [[LD1:%.*]] = load i64, i64* getelementptr inbounds ([4 x i64], [4 x i64]* @src64, i32 0, i64 1), align 8 -; CHECK-NEXT: [[LD2:%.*]] = load i64, i64* getelementptr inbounds ([4 x i64], [4 x i64]* @src64, i32 0, i64 2), align 8 -; CHECK-NEXT: [[LD3:%.*]] = load i64, i64* getelementptr inbounds ([4 x i64], [4 x i64]* @src64, i32 0, i64 3), align 8 +; CHECK-NEXT: [[LD0:%.*]] = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @src64, i32 0, i64 0), align 64 +; CHECK-NEXT: [[LD1:%.*]] = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @src64, i32 0, i64 1), align 8 +; CHECK-NEXT: [[LD2:%.*]] = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @src64, i32 0, i64 2), align 16 +; CHECK-NEXT: [[LD3:%.*]] = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @src64, i32 0, i64 3), align 8 ; CHECK-NEXT: [[CVT0:%.*]] = sitofp i64 [[LD0]] to float ; CHECK-NEXT: [[CVT1:%.*]] = sitofp i64 [[LD1]] to float ; CHECK-NEXT: [[CVT2:%.*]] = sitofp i64 [[LD2]] to float ; CHECK-NEXT: [[CVT3:%.*]] = sitofp i64 [[LD3]] to float -; CHECK-NEXT: store float [[CVT0]], float* getelementptr inbounds ([8 x float], [8 x float]* @dst32, i32 0, i64 0), align 8 -; CHECK-NEXT: store float [[CVT1]], float* getelementptr inbounds ([8 x float], [8 x float]* @dst32, i32 0, i64 1), align 8 -; CHECK-NEXT: store float [[CVT2]], float* getelementptr inbounds ([8 x float], [8 x float]* @dst32, i32 0, i64 2), align 8 -; CHECK-NEXT: store float [[CVT3]], float* getelementptr inbounds ([8 x float], [8 x float]* @dst32, i32 0, i64 3), align 8 +; CHECK-NEXT: store float [[CVT0]], float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 0), align 64 +; CHECK-NEXT: store float [[CVT1]], float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 1), align 4 +; CHECK-NEXT: store float [[CVT2]], float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 2), align 8 +; CHECK-NEXT: store float [[CVT3]], float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 3), align 4 ; CHECK-NEXT: ret void ; - %ld0 = load i64, i64* getelementptr inbounds ([4 x i64], [4 x i64]* @src64, i32 0, i64 0), align 8 - %ld1 = load i64, i64* getelementptr inbounds ([4 x i64], [4 x i64]* @src64, i32 0, i64 1), align 8 - %ld2 = load i64, i64* getelementptr inbounds ([4 x i64], [4 x i64]* @src64, i32 0, i64 2), align 8 - %ld3 = load i64, i64* getelementptr inbounds ([4 x i64], [4 x i64]* @src64, i32 0, i64 3), align 8 + %ld0 = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @src64, i32 0, i64 0), align 64 + %ld1 = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @src64, i32 0, i64 1), align 8 + %ld2 = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @src64, i32 0, i64 2), align 16 + %ld3 = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @src64, i32 0, i64 3), align 8 %cvt0 = sitofp i64 %ld0 to float %cvt1 = sitofp i64 %ld1 to float %cvt2 = sitofp i64 %ld2 to float %cvt3 = sitofp i64 %ld3 to float - store float %cvt0, float* getelementptr inbounds ([8 x float], [8 x float]* @dst32, i32 0, i64 0), align 8 - store float %cvt1, float* getelementptr inbounds ([8 x float], [8 x float]* @dst32, i32 0, i64 1), align 8 - store float %cvt2, float* getelementptr inbounds ([8 x float], [8 x float]* @dst32, i32 0, i64 2), align 8 - store float %cvt3, float* getelementptr inbounds ([8 x float], [8 x float]* @dst32, i32 0, i64 3), align 8 + store float %cvt0, float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 0), align 64 + store float %cvt1, float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 1), align 4 + store float %cvt2, float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 2), align 8 + store float %cvt3, float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 3), align 4 ret void } define void @sitofp_4i32_4f32() #0 { ; CHECK-LABEL: @sitofp_4i32_4f32( -; CHECK-NEXT: [[TMP1:%.*]] = load <4 x i32>, <4 x i32>* bitcast ([8 x i32]* @src32 to <4 x i32>*), align 8 +; CHECK-NEXT: [[TMP1:%.*]] = load <4 x i32>, <4 x i32>* bitcast ([16 x i32]* @src32 to <4 x i32>*), align 64 ; CHECK-NEXT: [[TMP2:%.*]] = sitofp <4 x i32> [[TMP1]] to <4 x float> -; CHECK-NEXT: store <4 x float> [[TMP2]], <4 x float>* bitcast ([8 x float]* @dst32 to <4 x float>*), align 8 +; CHECK-NEXT: store <4 x float> [[TMP2]], <4 x float>* bitcast ([16 x float]* @dst32 to <4 x float>*), align 64 ; CHECK-NEXT: ret void ; - %ld0 = load i32, i32* getelementptr inbounds ([8 x i32], [8 x i32]* @src32, i32 0, i64 0), align 8 - %ld1 = load i32, i32* getelementptr inbounds ([8 x i32], [8 x i32]* @src32, i32 0, i64 1), align 8 - %ld2 = load i32, i32* getelementptr inbounds ([8 x i32], [8 x i32]* @src32, i32 0, i64 2), align 8 - %ld3 = load i32, i32* getelementptr inbounds ([8 x i32], [8 x i32]* @src32, i32 0, i64 3), align 8 + %ld0 = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @src32, i32 0, i64 0), align 64 + %ld1 = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @src32, i32 0, i64 1), align 4 + %ld2 = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @src32, i32 0, i64 2), align 8 + %ld3 = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @src32, i32 0, i64 3), align 4 %cvt0 = sitofp i32 %ld0 to float %cvt1 = sitofp i32 %ld1 to float %cvt2 = sitofp i32 %ld2 to float %cvt3 = sitofp i32 %ld3 to float - store float %cvt0, float* getelementptr inbounds ([8 x float], [8 x float]* @dst32, i32 0, i64 0), align 8 - store float %cvt1, float* getelementptr inbounds ([8 x float], [8 x float]* @dst32, i32 0, i64 1), align 8 - store float %cvt2, float* getelementptr inbounds ([8 x float], [8 x float]* @dst32, i32 0, i64 2), align 8 - store float %cvt3, float* getelementptr inbounds ([8 x float], [8 x float]* @dst32, i32 0, i64 3), align 8 + store float %cvt0, float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 0), align 64 + store float %cvt1, float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 1), align 4 + store float %cvt2, float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 2), align 8 + store float %cvt3, float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 3), align 4 ret void } define void @sitofp_8i32_8f32() #0 { ; SSE-LABEL: @sitofp_8i32_8f32( -; SSE-NEXT: [[TMP1:%.*]] = load <4 x i32>, <4 x i32>* bitcast ([8 x i32]* @src32 to <4 x i32>*), align 8 -; SSE-NEXT: [[TMP2:%.*]] = load <4 x i32>, <4 x i32>* bitcast (i32* getelementptr inbounds ([8 x i32], [8 x i32]* @src32, i32 0, i64 4) to <4 x i32>*), align 8 +; SSE-NEXT: [[TMP1:%.*]] = load <4 x i32>, <4 x i32>* bitcast ([16 x i32]* @src32 to <4 x i32>*), align 64 +; SSE-NEXT: [[TMP2:%.*]] = load <4 x i32>, <4 x i32>* bitcast (i32* getelementptr inbounds ([16 x i32], [16 x i32]* @src32, i32 0, i64 4) to <4 x i32>*), align 16 ; SSE-NEXT: [[TMP3:%.*]] = sitofp <4 x i32> [[TMP1]] to <4 x float> ; SSE-NEXT: [[TMP4:%.*]] = sitofp <4 x i32> [[TMP2]] to <4 x float> -; SSE-NEXT: store <4 x float> [[TMP3]], <4 x float>* bitcast ([8 x float]* @dst32 to <4 x float>*), align 8 -; SSE-NEXT: store <4 x float> [[TMP4]], <4 x float>* bitcast (float* getelementptr inbounds ([8 x float], [8 x float]* @dst32, i32 0, i64 4) to <4 x float>*), align 8 +; SSE-NEXT: store <4 x float> [[TMP3]], <4 x float>* bitcast ([16 x float]* @dst32 to <4 x float>*), align 64 +; SSE-NEXT: store <4 x float> [[TMP4]], <4 x float>* bitcast (float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 4) to <4 x float>*), align 16 ; SSE-NEXT: ret void ; ; AVX-LABEL: @sitofp_8i32_8f32( -; AVX-NEXT: [[TMP1:%.*]] = load <8 x i32>, <8 x i32>* bitcast ([8 x i32]* @src32 to <8 x i32>*), align 8 +; AVX-NEXT: [[TMP1:%.*]] = load <8 x i32>, <8 x i32>* bitcast ([16 x i32]* @src32 to <8 x i32>*), align 64 ; AVX-NEXT: [[TMP2:%.*]] = sitofp <8 x i32> [[TMP1]] to <8 x float> -; AVX-NEXT: store <8 x float> [[TMP2]], <8 x float>* bitcast ([8 x float]* @dst32 to <8 x float>*), align 8 +; AVX-NEXT: store <8 x float> [[TMP2]], <8 x float>* bitcast ([16 x float]* @dst32 to <8 x float>*), align 64 ; AVX-NEXT: ret void ; - %ld0 = load i32, i32* getelementptr inbounds ([8 x i32], [8 x i32]* @src32, i32 0, i64 0), align 8 - %ld1 = load i32, i32* getelementptr inbounds ([8 x i32], [8 x i32]* @src32, i32 0, i64 1), align 8 - %ld2 = load i32, i32* getelementptr inbounds ([8 x i32], [8 x i32]* @src32, i32 0, i64 2), align 8 - %ld3 = load i32, i32* getelementptr inbounds ([8 x i32], [8 x i32]* @src32, i32 0, i64 3), align 8 - %ld4 = load i32, i32* getelementptr inbounds ([8 x i32], [8 x i32]* @src32, i32 0, i64 4), align 8 - %ld5 = load i32, i32* getelementptr inbounds ([8 x i32], [8 x i32]* @src32, i32 0, i64 5), align 8 - %ld6 = load i32, i32* getelementptr inbounds ([8 x i32], [8 x i32]* @src32, i32 0, i64 6), align 8 - %ld7 = load i32, i32* getelementptr inbounds ([8 x i32], [8 x i32]* @src32, i32 0, i64 7), align 8 + %ld0 = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @src32, i32 0, i64 0), align 64 + %ld1 = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @src32, i32 0, i64 1), align 4 + %ld2 = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @src32, i32 0, i64 2), align 8 + %ld3 = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @src32, i32 0, i64 3), align 4 + %ld4 = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @src32, i32 0, i64 4), align 16 + %ld5 = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @src32, i32 0, i64 5), align 4 + %ld6 = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @src32, i32 0, i64 6), align 8 + %ld7 = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @src32, i32 0, i64 7), align 4 %cvt0 = sitofp i32 %ld0 to float %cvt1 = sitofp i32 %ld1 to float %cvt2 = sitofp i32 %ld2 to float @@ -343,63 +343,63 @@ define void @sitofp_8i32_8f32() #0 { %cvt5 = sitofp i32 %ld5 to float %cvt6 = sitofp i32 %ld6 to float %cvt7 = sitofp i32 %ld7 to float - store float %cvt0, float* getelementptr inbounds ([8 x float], [8 x float]* @dst32, i32 0, i64 0), align 8 - store float %cvt1, float* getelementptr inbounds ([8 x float], [8 x float]* @dst32, i32 0, i64 1), align 8 - store float %cvt2, float* getelementptr inbounds ([8 x float], [8 x float]* @dst32, i32 0, i64 2), align 8 - store float %cvt3, float* getelementptr inbounds ([8 x float], [8 x float]* @dst32, i32 0, i64 3), align 8 - store float %cvt4, float* getelementptr inbounds ([8 x float], [8 x float]* @dst32, i32 0, i64 4), align 8 - store float %cvt5, float* getelementptr inbounds ([8 x float], [8 x float]* @dst32, i32 0, i64 5), align 8 - store float %cvt6, float* getelementptr inbounds ([8 x float], [8 x float]* @dst32, i32 0, i64 6), align 8 - store float %cvt7, float* getelementptr inbounds ([8 x float], [8 x float]* @dst32, i32 0, i64 7), align 8 + store float %cvt0, float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 0), align 64 + store float %cvt1, float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 1), align 4 + store float %cvt2, float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 2), align 8 + store float %cvt3, float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 3), align 4 + store float %cvt4, float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 4), align 16 + store float %cvt5, float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 5), align 4 + store float %cvt6, float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 6), align 8 + store float %cvt7, float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 7), align 4 ret void } define void @sitofp_4i16_4f32() #0 { ; CHECK-LABEL: @sitofp_4i16_4f32( -; CHECK-NEXT: [[TMP1:%.*]] = load <4 x i16>, <4 x i16>* bitcast ([16 x i16]* @src16 to <4 x i16>*), align 8 +; CHECK-NEXT: [[TMP1:%.*]] = load <4 x i16>, <4 x i16>* bitcast ([32 x i16]* @src16 to <4 x i16>*), align 64 ; CHECK-NEXT: [[TMP2:%.*]] = sitofp <4 x i16> [[TMP1]] to <4 x float> -; CHECK-NEXT: store <4 x float> [[TMP2]], <4 x float>* bitcast ([8 x float]* @dst32 to <4 x float>*), align 8 +; CHECK-NEXT: store <4 x float> [[TMP2]], <4 x float>* bitcast ([16 x float]* @dst32 to <4 x float>*), align 64 ; CHECK-NEXT: ret void ; - %ld0 = load i16, i16* getelementptr inbounds ([16 x i16], [16 x i16]* @src16, i32 0, i64 0), align 8 - %ld1 = load i16, i16* getelementptr inbounds ([16 x i16], [16 x i16]* @src16, i32 0, i64 1), align 8 - %ld2 = load i16, i16* getelementptr inbounds ([16 x i16], [16 x i16]* @src16, i32 0, i64 2), align 8 - %ld3 = load i16, i16* getelementptr inbounds ([16 x i16], [16 x i16]* @src16, i32 0, i64 3), align 8 + %ld0 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @src16, i32 0, i64 0), align 64 + %ld1 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @src16, i32 0, i64 1), align 2 + %ld2 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @src16, i32 0, i64 2), align 4 + %ld3 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @src16, i32 0, i64 3), align 2 %cvt0 = sitofp i16 %ld0 to float %cvt1 = sitofp i16 %ld1 to float %cvt2 = sitofp i16 %ld2 to float %cvt3 = sitofp i16 %ld3 to float - store float %cvt0, float* getelementptr inbounds ([8 x float], [8 x float]* @dst32, i32 0, i64 0), align 8 - store float %cvt1, float* getelementptr inbounds ([8 x float], [8 x float]* @dst32, i32 0, i64 1), align 8 - store float %cvt2, float* getelementptr inbounds ([8 x float], [8 x float]* @dst32, i32 0, i64 2), align 8 - store float %cvt3, float* getelementptr inbounds ([8 x float], [8 x float]* @dst32, i32 0, i64 3), align 8 + store float %cvt0, float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 0), align 64 + store float %cvt1, float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 1), align 4 + store float %cvt2, float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 2), align 8 + store float %cvt3, float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 3), align 4 ret void } define void @sitofp_8i16_8f32() #0 { ; SSE-LABEL: @sitofp_8i16_8f32( -; SSE-NEXT: [[TMP1:%.*]] = load <4 x i16>, <4 x i16>* bitcast ([16 x i16]* @src16 to <4 x i16>*), align 8 -; SSE-NEXT: [[TMP2:%.*]] = load <4 x i16>, <4 x i16>* bitcast (i16* getelementptr inbounds ([16 x i16], [16 x i16]* @src16, i32 0, i64 4) to <4 x i16>*), align 8 +; SSE-NEXT: [[TMP1:%.*]] = load <4 x i16>, <4 x i16>* bitcast ([32 x i16]* @src16 to <4 x i16>*), align 64 +; SSE-NEXT: [[TMP2:%.*]] = load <4 x i16>, <4 x i16>* bitcast (i16* getelementptr inbounds ([32 x i16], [32 x i16]* @src16, i32 0, i64 4) to <4 x i16>*), align 4 ; SSE-NEXT: [[TMP3:%.*]] = sitofp <4 x i16> [[TMP1]] to <4 x float> ; SSE-NEXT: [[TMP4:%.*]] = sitofp <4 x i16> [[TMP2]] to <4 x float> -; SSE-NEXT: store <4 x float> [[TMP3]], <4 x float>* bitcast ([8 x float]* @dst32 to <4 x float>*), align 8 -; SSE-NEXT: store <4 x float> [[TMP4]], <4 x float>* bitcast (float* getelementptr inbounds ([8 x float], [8 x float]* @dst32, i32 0, i64 4) to <4 x float>*), align 8 +; SSE-NEXT: store <4 x float> [[TMP3]], <4 x float>* bitcast ([16 x float]* @dst32 to <4 x float>*), align 64 +; SSE-NEXT: store <4 x float> [[TMP4]], <4 x float>* bitcast (float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 4) to <4 x float>*), align 16 ; SSE-NEXT: ret void ; ; AVX-LABEL: @sitofp_8i16_8f32( -; AVX-NEXT: [[TMP1:%.*]] = load <8 x i16>, <8 x i16>* bitcast ([16 x i16]* @src16 to <8 x i16>*), align 8 +; AVX-NEXT: [[TMP1:%.*]] = load <8 x i16>, <8 x i16>* bitcast ([32 x i16]* @src16 to <8 x i16>*), align 64 ; AVX-NEXT: [[TMP2:%.*]] = sitofp <8 x i16> [[TMP1]] to <8 x float> -; AVX-NEXT: store <8 x float> [[TMP2]], <8 x float>* bitcast ([8 x float]* @dst32 to <8 x float>*), align 8 +; AVX-NEXT: store <8 x float> [[TMP2]], <8 x float>* bitcast ([16 x float]* @dst32 to <8 x float>*), align 64 ; AVX-NEXT: ret void ; - %ld0 = load i16, i16* getelementptr inbounds ([16 x i16], [16 x i16]* @src16, i32 0, i64 0), align 8 - %ld1 = load i16, i16* getelementptr inbounds ([16 x i16], [16 x i16]* @src16, i32 0, i64 1), align 8 - %ld2 = load i16, i16* getelementptr inbounds ([16 x i16], [16 x i16]* @src16, i32 0, i64 2), align 8 - %ld3 = load i16, i16* getelementptr inbounds ([16 x i16], [16 x i16]* @src16, i32 0, i64 3), align 8 - %ld4 = load i16, i16* getelementptr inbounds ([16 x i16], [16 x i16]* @src16, i32 0, i64 4), align 8 - %ld5 = load i16, i16* getelementptr inbounds ([16 x i16], [16 x i16]* @src16, i32 0, i64 5), align 8 - %ld6 = load i16, i16* getelementptr inbounds ([16 x i16], [16 x i16]* @src16, i32 0, i64 6), align 8 - %ld7 = load i16, i16* getelementptr inbounds ([16 x i16], [16 x i16]* @src16, i32 0, i64 7), align 8 + %ld0 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @src16, i32 0, i64 0), align 64 + %ld1 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @src16, i32 0, i64 1), align 1 + %ld2 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @src16, i32 0, i64 2), align 2 + %ld3 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @src16, i32 0, i64 3), align 1 + %ld4 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @src16, i32 0, i64 4), align 4 + %ld5 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @src16, i32 0, i64 5), align 1 + %ld6 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @src16, i32 0, i64 6), align 2 + %ld7 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @src16, i32 0, i64 7), align 1 %cvt0 = sitofp i16 %ld0 to float %cvt1 = sitofp i16 %ld1 to float %cvt2 = sitofp i16 %ld2 to float @@ -408,63 +408,63 @@ define void @sitofp_8i16_8f32() #0 { %cvt5 = sitofp i16 %ld5 to float %cvt6 = sitofp i16 %ld6 to float %cvt7 = sitofp i16 %ld7 to float - store float %cvt0, float* getelementptr inbounds ([8 x float], [8 x float]* @dst32, i32 0, i64 0), align 8 - store float %cvt1, float* getelementptr inbounds ([8 x float], [8 x float]* @dst32, i32 0, i64 1), align 8 - store float %cvt2, float* getelementptr inbounds ([8 x float], [8 x float]* @dst32, i32 0, i64 2), align 8 - store float %cvt3, float* getelementptr inbounds ([8 x float], [8 x float]* @dst32, i32 0, i64 3), align 8 - store float %cvt4, float* getelementptr inbounds ([8 x float], [8 x float]* @dst32, i32 0, i64 4), align 8 - store float %cvt5, float* getelementptr inbounds ([8 x float], [8 x float]* @dst32, i32 0, i64 5), align 8 - store float %cvt6, float* getelementptr inbounds ([8 x float], [8 x float]* @dst32, i32 0, i64 6), align 8 - store float %cvt7, float* getelementptr inbounds ([8 x float], [8 x float]* @dst32, i32 0, i64 7), align 8 + store float %cvt0, float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 0), align 64 + store float %cvt1, float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 1), align 4 + store float %cvt2, float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 2), align 8 + store float %cvt3, float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 3), align 4 + store float %cvt4, float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 4), align 16 + store float %cvt5, float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 5), align 4 + store float %cvt6, float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 6), align 8 + store float %cvt7, float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 7), align 4 ret void } define void @sitofp_4i8_4f32() #0 { ; CHECK-LABEL: @sitofp_4i8_4f32( -; CHECK-NEXT: [[TMP1:%.*]] = load <4 x i8>, <4 x i8>* bitcast ([32 x i8]* @src8 to <4 x i8>*), align 8 +; CHECK-NEXT: [[TMP1:%.*]] = load <4 x i8>, <4 x i8>* bitcast ([64 x i8]* @src8 to <4 x i8>*), align 64 ; CHECK-NEXT: [[TMP2:%.*]] = sitofp <4 x i8> [[TMP1]] to <4 x float> -; CHECK-NEXT: store <4 x float> [[TMP2]], <4 x float>* bitcast ([8 x float]* @dst32 to <4 x float>*), align 8 +; CHECK-NEXT: store <4 x float> [[TMP2]], <4 x float>* bitcast ([16 x float]* @dst32 to <4 x float>*), align 64 ; CHECK-NEXT: ret void ; - %ld0 = load i8, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @src8, i32 0, i64 0), align 8 - %ld1 = load i8, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @src8, i32 0, i64 1), align 8 - %ld2 = load i8, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @src8, i32 0, i64 2), align 8 - %ld3 = load i8, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @src8, i32 0, i64 3), align 8 + %ld0 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @src8, i32 0, i64 0), align 64 + %ld1 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @src8, i32 0, i64 1), align 1 + %ld2 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @src8, i32 0, i64 2), align 2 + %ld3 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @src8, i32 0, i64 3), align 1 %cvt0 = sitofp i8 %ld0 to float %cvt1 = sitofp i8 %ld1 to float %cvt2 = sitofp i8 %ld2 to float %cvt3 = sitofp i8 %ld3 to float - store float %cvt0, float* getelementptr inbounds ([8 x float], [8 x float]* @dst32, i32 0, i64 0), align 8 - store float %cvt1, float* getelementptr inbounds ([8 x float], [8 x float]* @dst32, i32 0, i64 1), align 8 - store float %cvt2, float* getelementptr inbounds ([8 x float], [8 x float]* @dst32, i32 0, i64 2), align 8 - store float %cvt3, float* getelementptr inbounds ([8 x float], [8 x float]* @dst32, i32 0, i64 3), align 8 + store float %cvt0, float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 0), align 64 + store float %cvt1, float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 1), align 4 + store float %cvt2, float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 2), align 8 + store float %cvt3, float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 3), align 4 ret void } define void @sitofp_8i8_8f32() #0 { ; SSE-LABEL: @sitofp_8i8_8f32( -; SSE-NEXT: [[TMP1:%.*]] = load <4 x i8>, <4 x i8>* bitcast ([32 x i8]* @src8 to <4 x i8>*), align 8 -; SSE-NEXT: [[TMP2:%.*]] = load <4 x i8>, <4 x i8>* bitcast (i8* getelementptr inbounds ([32 x i8], [32 x i8]* @src8, i32 0, i64 4) to <4 x i8>*), align 8 +; SSE-NEXT: [[TMP1:%.*]] = load <4 x i8>, <4 x i8>* bitcast ([64 x i8]* @src8 to <4 x i8>*), align 64 +; SSE-NEXT: [[TMP2:%.*]] = load <4 x i8>, <4 x i8>* bitcast (i8* getelementptr inbounds ([64 x i8], [64 x i8]* @src8, i32 0, i64 4) to <4 x i8>*), align 4 ; SSE-NEXT: [[TMP3:%.*]] = sitofp <4 x i8> [[TMP1]] to <4 x float> ; SSE-NEXT: [[TMP4:%.*]] = sitofp <4 x i8> [[TMP2]] to <4 x float> -; SSE-NEXT: store <4 x float> [[TMP3]], <4 x float>* bitcast ([8 x float]* @dst32 to <4 x float>*), align 8 -; SSE-NEXT: store <4 x float> [[TMP4]], <4 x float>* bitcast (float* getelementptr inbounds ([8 x float], [8 x float]* @dst32, i32 0, i64 4) to <4 x float>*), align 8 +; SSE-NEXT: store <4 x float> [[TMP3]], <4 x float>* bitcast ([16 x float]* @dst32 to <4 x float>*), align 64 +; SSE-NEXT: store <4 x float> [[TMP4]], <4 x float>* bitcast (float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 4) to <4 x float>*), align 16 ; SSE-NEXT: ret void ; ; AVX-LABEL: @sitofp_8i8_8f32( -; AVX-NEXT: [[TMP1:%.*]] = load <8 x i8>, <8 x i8>* bitcast ([32 x i8]* @src8 to <8 x i8>*), align 8 +; AVX-NEXT: [[TMP1:%.*]] = load <8 x i8>, <8 x i8>* bitcast ([64 x i8]* @src8 to <8 x i8>*), align 64 ; AVX-NEXT: [[TMP2:%.*]] = sitofp <8 x i8> [[TMP1]] to <8 x float> -; AVX-NEXT: store <8 x float> [[TMP2]], <8 x float>* bitcast ([8 x float]* @dst32 to <8 x float>*), align 8 +; AVX-NEXT: store <8 x float> [[TMP2]], <8 x float>* bitcast ([16 x float]* @dst32 to <8 x float>*), align 64 ; AVX-NEXT: ret void ; - %ld0 = load i8, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @src8, i32 0, i64 0), align 8 - %ld1 = load i8, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @src8, i32 0, i64 1), align 8 - %ld2 = load i8, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @src8, i32 0, i64 2), align 8 - %ld3 = load i8, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @src8, i32 0, i64 3), align 8 - %ld4 = load i8, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @src8, i32 0, i64 4), align 8 - %ld5 = load i8, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @src8, i32 0, i64 5), align 8 - %ld6 = load i8, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @src8, i32 0, i64 6), align 8 - %ld7 = load i8, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @src8, i32 0, i64 7), align 8 + %ld0 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @src8, i32 0, i64 0), align 64 + %ld1 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @src8, i32 0, i64 1), align 1 + %ld2 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @src8, i32 0, i64 2), align 2 + %ld3 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @src8, i32 0, i64 3), align 1 + %ld4 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @src8, i32 0, i64 4), align 4 + %ld5 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @src8, i32 0, i64 5), align 1 + %ld6 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @src8, i32 0, i64 6), align 2 + %ld7 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @src8, i32 0, i64 7), align 1 %cvt0 = sitofp i8 %ld0 to float %cvt1 = sitofp i8 %ld1 to float %cvt2 = sitofp i8 %ld2 to float @@ -473,14 +473,14 @@ define void @sitofp_8i8_8f32() #0 { %cvt5 = sitofp i8 %ld5 to float %cvt6 = sitofp i8 %ld6 to float %cvt7 = sitofp i8 %ld7 to float - store float %cvt0, float* getelementptr inbounds ([8 x float], [8 x float]* @dst32, i32 0, i64 0), align 8 - store float %cvt1, float* getelementptr inbounds ([8 x float], [8 x float]* @dst32, i32 0, i64 1), align 8 - store float %cvt2, float* getelementptr inbounds ([8 x float], [8 x float]* @dst32, i32 0, i64 2), align 8 - store float %cvt3, float* getelementptr inbounds ([8 x float], [8 x float]* @dst32, i32 0, i64 3), align 8 - store float %cvt4, float* getelementptr inbounds ([8 x float], [8 x float]* @dst32, i32 0, i64 4), align 8 - store float %cvt5, float* getelementptr inbounds ([8 x float], [8 x float]* @dst32, i32 0, i64 5), align 8 - store float %cvt6, float* getelementptr inbounds ([8 x float], [8 x float]* @dst32, i32 0, i64 6), align 8 - store float %cvt7, float* getelementptr inbounds ([8 x float], [8 x float]* @dst32, i32 0, i64 7), align 8 + store float %cvt0, float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 0), align 64 + store float %cvt1, float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 1), align 4 + store float %cvt2, float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 2), align 8 + store float %cvt3, float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 3), align 4 + store float %cvt4, float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 4), align 16 + store float %cvt5, float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 5), align 4 + store float %cvt6, float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 6), align 8 + store float %cvt7, float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 7), align 4 ret void } diff --git a/llvm/test/Transforms/SLPVectorizer/X86/uitofp.ll b/llvm/test/Transforms/SLPVectorizer/X86/uitofp.ll index 5aad493..ada75ed 100644 --- a/llvm/test/Transforms/SLPVectorizer/X86/uitofp.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/uitofp.ll @@ -5,13 +5,13 @@ target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" -@src64 = common global [4 x i64] zeroinitializer, align 32 -@src32 = common global [8 x i32] zeroinitializer, align 32 -@src16 = common global [16 x i16] zeroinitializer, align 32 -@src8 = common global [32 x i8] zeroinitializer, align 32 +@src64 = common global [8 x i64] zeroinitializer, align 64 +@src32 = common global [16 x i32] zeroinitializer, align 64 +@src16 = common global [32 x i16] zeroinitializer, align 64 +@src8 = common global [64 x i8] zeroinitializer, align 64 -@dst64 = common global [4 x double] zeroinitializer, align 32 -@dst32 = common global [8 x float] zeroinitializer, align 32 +@dst64 = common global [8 x double] zeroinitializer, align 64 +@dst32 = common global [16 x float] zeroinitializer, align 64 ; ; UITOFP to vXf64 @@ -19,219 +19,219 @@ target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" define void @uitofp_2i64_2f64() #0 { ; CHECK-LABEL: @uitofp_2i64_2f64( -; CHECK-NEXT: [[LD0:%.*]] = load i64, i64* getelementptr inbounds ([4 x i64], [4 x i64]* @src64, i32 0, i64 0), align 8 -; CHECK-NEXT: [[LD1:%.*]] = load i64, i64* getelementptr inbounds ([4 x i64], [4 x i64]* @src64, i32 0, i64 1), align 8 +; CHECK-NEXT: [[LD0:%.*]] = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @src64, i32 0, i64 0), align 64 +; CHECK-NEXT: [[LD1:%.*]] = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @src64, i32 0, i64 1), align 8 ; CHECK-NEXT: [[CVT0:%.*]] = uitofp i64 [[LD0]] to double ; CHECK-NEXT: [[CVT1:%.*]] = uitofp i64 [[LD1]] to double -; CHECK-NEXT: store double [[CVT0]], double* getelementptr inbounds ([4 x double], [4 x double]* @dst64, i32 0, i64 0), align 8 -; CHECK-NEXT: store double [[CVT1]], double* getelementptr inbounds ([4 x double], [4 x double]* @dst64, i32 0, i64 1), align 8 +; CHECK-NEXT: store double [[CVT0]], double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 0), align 64 +; CHECK-NEXT: store double [[CVT1]], double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 1), align 8 ; CHECK-NEXT: ret void ; - %ld0 = load i64, i64* getelementptr inbounds ([4 x i64], [4 x i64]* @src64, i32 0, i64 0), align 8 - %ld1 = load i64, i64* getelementptr inbounds ([4 x i64], [4 x i64]* @src64, i32 0, i64 1), align 8 + %ld0 = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @src64, i32 0, i64 0), align 64 + %ld1 = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @src64, i32 0, i64 1), align 8 %cvt0 = uitofp i64 %ld0 to double %cvt1 = uitofp i64 %ld1 to double - store double %cvt0, double* getelementptr inbounds ([4 x double], [4 x double]* @dst64, i32 0, i64 0), align 8 - store double %cvt1, double* getelementptr inbounds ([4 x double], [4 x double]* @dst64, i32 0, i64 1), align 8 + store double %cvt0, double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 0), align 64 + store double %cvt1, double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 1), align 8 ret void } define void @uitofp_4i64_4f64() #0 { ; CHECK-LABEL: @uitofp_4i64_4f64( -; CHECK-NEXT: [[LD0:%.*]] = load i64, i64* getelementptr inbounds ([4 x i64], [4 x i64]* @src64, i32 0, i64 0), align 8 -; CHECK-NEXT: [[LD1:%.*]] = load i64, i64* getelementptr inbounds ([4 x i64], [4 x i64]* @src64, i32 0, i64 1), align 8 -; CHECK-NEXT: [[LD2:%.*]] = load i64, i64* getelementptr inbounds ([4 x i64], [4 x i64]* @src64, i32 0, i64 2), align 8 -; CHECK-NEXT: [[LD3:%.*]] = load i64, i64* getelementptr inbounds ([4 x i64], [4 x i64]* @src64, i32 0, i64 3), align 8 +; CHECK-NEXT: [[LD0:%.*]] = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @src64, i32 0, i64 0), align 64 +; CHECK-NEXT: [[LD1:%.*]] = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @src64, i32 0, i64 1), align 8 +; CHECK-NEXT: [[LD2:%.*]] = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @src64, i32 0, i64 2), align 16 +; CHECK-NEXT: [[LD3:%.*]] = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @src64, i32 0, i64 3), align 8 ; CHECK-NEXT: [[CVT0:%.*]] = uitofp i64 [[LD0]] to double ; CHECK-NEXT: [[CVT1:%.*]] = uitofp i64 [[LD1]] to double ; CHECK-NEXT: [[CVT2:%.*]] = uitofp i64 [[LD2]] to double ; CHECK-NEXT: [[CVT3:%.*]] = uitofp i64 [[LD3]] to double -; CHECK-NEXT: store double [[CVT0]], double* getelementptr inbounds ([4 x double], [4 x double]* @dst64, i32 0, i64 0), align 8 -; CHECK-NEXT: store double [[CVT1]], double* getelementptr inbounds ([4 x double], [4 x double]* @dst64, i32 0, i64 1), align 8 -; CHECK-NEXT: store double [[CVT2]], double* getelementptr inbounds ([4 x double], [4 x double]* @dst64, i32 0, i64 2), align 8 -; CHECK-NEXT: store double [[CVT3]], double* getelementptr inbounds ([4 x double], [4 x double]* @dst64, i32 0, i64 3), align 8 +; CHECK-NEXT: store double [[CVT0]], double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 0), align 64 +; CHECK-NEXT: store double [[CVT1]], double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 1), align 8 +; CHECK-NEXT: store double [[CVT2]], double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 2), align 16 +; CHECK-NEXT: store double [[CVT3]], double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 3), align 8 ; CHECK-NEXT: ret void ; - %ld0 = load i64, i64* getelementptr inbounds ([4 x i64], [4 x i64]* @src64, i32 0, i64 0), align 8 - %ld1 = load i64, i64* getelementptr inbounds ([4 x i64], [4 x i64]* @src64, i32 0, i64 1), align 8 - %ld2 = load i64, i64* getelementptr inbounds ([4 x i64], [4 x i64]* @src64, i32 0, i64 2), align 8 - %ld3 = load i64, i64* getelementptr inbounds ([4 x i64], [4 x i64]* @src64, i32 0, i64 3), align 8 + %ld0 = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @src64, i32 0, i64 0), align 64 + %ld1 = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @src64, i32 0, i64 1), align 8 + %ld2 = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @src64, i32 0, i64 2), align 16 + %ld3 = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @src64, i32 0, i64 3), align 8 %cvt0 = uitofp i64 %ld0 to double %cvt1 = uitofp i64 %ld1 to double %cvt2 = uitofp i64 %ld2 to double %cvt3 = uitofp i64 %ld3 to double - store double %cvt0, double* getelementptr inbounds ([4 x double], [4 x double]* @dst64, i32 0, i64 0), align 8 - store double %cvt1, double* getelementptr inbounds ([4 x double], [4 x double]* @dst64, i32 0, i64 1), align 8 - store double %cvt2, double* getelementptr inbounds ([4 x double], [4 x double]* @dst64, i32 0, i64 2), align 8 - store double %cvt3, double* getelementptr inbounds ([4 x double], [4 x double]* @dst64, i32 0, i64 3), align 8 + store double %cvt0, double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 0), align 64 + store double %cvt1, double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 1), align 8 + store double %cvt2, double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 2), align 16 + store double %cvt3, double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 3), align 8 ret void } define void @uitofp_2i32_2f64() #0 { ; CHECK-LABEL: @uitofp_2i32_2f64( -; CHECK-NEXT: [[LD0:%.*]] = load i32, i32* getelementptr inbounds ([8 x i32], [8 x i32]* @src32, i32 0, i64 0), align 8 -; CHECK-NEXT: [[LD1:%.*]] = load i32, i32* getelementptr inbounds ([8 x i32], [8 x i32]* @src32, i32 0, i64 1), align 8 +; CHECK-NEXT: [[LD0:%.*]] = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @src32, i32 0, i64 0), align 64 +; CHECK-NEXT: [[LD1:%.*]] = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @src32, i32 0, i64 1), align 4 ; CHECK-NEXT: [[CVT0:%.*]] = uitofp i32 [[LD0]] to double ; CHECK-NEXT: [[CVT1:%.*]] = uitofp i32 [[LD1]] to double -; CHECK-NEXT: store double [[CVT0]], double* getelementptr inbounds ([4 x double], [4 x double]* @dst64, i32 0, i64 0), align 8 -; CHECK-NEXT: store double [[CVT1]], double* getelementptr inbounds ([4 x double], [4 x double]* @dst64, i32 0, i64 1), align 8 +; CHECK-NEXT: store double [[CVT0]], double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 0), align 64 +; CHECK-NEXT: store double [[CVT1]], double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 1), align 8 ; CHECK-NEXT: ret void ; - %ld0 = load i32, i32* getelementptr inbounds ([8 x i32], [8 x i32]* @src32, i32 0, i64 0), align 8 - %ld1 = load i32, i32* getelementptr inbounds ([8 x i32], [8 x i32]* @src32, i32 0, i64 1), align 8 + %ld0 = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @src32, i32 0, i64 0), align 64 + %ld1 = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @src32, i32 0, i64 1), align 4 %cvt0 = uitofp i32 %ld0 to double %cvt1 = uitofp i32 %ld1 to double - store double %cvt0, double* getelementptr inbounds ([4 x double], [4 x double]* @dst64, i32 0, i64 0), align 8 - store double %cvt1, double* getelementptr inbounds ([4 x double], [4 x double]* @dst64, i32 0, i64 1), align 8 + store double %cvt0, double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 0), align 64 + store double %cvt1, double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 1), align 8 ret void } define void @uitofp_4i32_4f64() #0 { ; SSE-LABEL: @uitofp_4i32_4f64( -; SSE-NEXT: [[LD0:%.*]] = load i32, i32* getelementptr inbounds ([8 x i32], [8 x i32]* @src32, i32 0, i64 0), align 8 -; SSE-NEXT: [[LD1:%.*]] = load i32, i32* getelementptr inbounds ([8 x i32], [8 x i32]* @src32, i32 0, i64 1), align 8 -; SSE-NEXT: [[LD2:%.*]] = load i32, i32* getelementptr inbounds ([8 x i32], [8 x i32]* @src32, i32 0, i64 2), align 8 -; SSE-NEXT: [[LD3:%.*]] = load i32, i32* getelementptr inbounds ([8 x i32], [8 x i32]* @src32, i32 0, i64 3), align 8 +; SSE-NEXT: [[LD0:%.*]] = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @src32, i32 0, i64 0), align 64 +; SSE-NEXT: [[LD1:%.*]] = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @src32, i32 0, i64 1), align 4 +; SSE-NEXT: [[LD2:%.*]] = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @src32, i32 0, i64 2), align 8 +; SSE-NEXT: [[LD3:%.*]] = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @src32, i32 0, i64 3), align 4 ; SSE-NEXT: [[CVT0:%.*]] = uitofp i32 [[LD0]] to double ; SSE-NEXT: [[CVT1:%.*]] = uitofp i32 [[LD1]] to double ; SSE-NEXT: [[CVT2:%.*]] = uitofp i32 [[LD2]] to double ; SSE-NEXT: [[CVT3:%.*]] = uitofp i32 [[LD3]] to double -; SSE-NEXT: store double [[CVT0]], double* getelementptr inbounds ([4 x double], [4 x double]* @dst64, i32 0, i64 0), align 8 -; SSE-NEXT: store double [[CVT1]], double* getelementptr inbounds ([4 x double], [4 x double]* @dst64, i32 0, i64 1), align 8 -; SSE-NEXT: store double [[CVT2]], double* getelementptr inbounds ([4 x double], [4 x double]* @dst64, i32 0, i64 2), align 8 -; SSE-NEXT: store double [[CVT3]], double* getelementptr inbounds ([4 x double], [4 x double]* @dst64, i32 0, i64 3), align 8 +; SSE-NEXT: store double [[CVT0]], double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 0), align 64 +; SSE-NEXT: store double [[CVT1]], double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 1), align 8 +; SSE-NEXT: store double [[CVT2]], double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 2), align 16 +; SSE-NEXT: store double [[CVT3]], double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 3), align 8 ; SSE-NEXT: ret void ; ; AVX-LABEL: @uitofp_4i32_4f64( -; AVX-NEXT: [[TMP1:%.*]] = load <4 x i32>, <4 x i32>* bitcast ([8 x i32]* @src32 to <4 x i32>*), align 8 +; AVX-NEXT: [[TMP1:%.*]] = load <4 x i32>, <4 x i32>* bitcast ([16 x i32]* @src32 to <4 x i32>*), align 64 ; AVX-NEXT: [[TMP2:%.*]] = uitofp <4 x i32> [[TMP1]] to <4 x double> -; AVX-NEXT: store <4 x double> [[TMP2]], <4 x double>* bitcast ([4 x double]* @dst64 to <4 x double>*), align 8 +; AVX-NEXT: store <4 x double> [[TMP2]], <4 x double>* bitcast ([8 x double]* @dst64 to <4 x double>*), align 64 ; AVX-NEXT: ret void ; - %ld0 = load i32, i32* getelementptr inbounds ([8 x i32], [8 x i32]* @src32, i32 0, i64 0), align 8 - %ld1 = load i32, i32* getelementptr inbounds ([8 x i32], [8 x i32]* @src32, i32 0, i64 1), align 8 - %ld2 = load i32, i32* getelementptr inbounds ([8 x i32], [8 x i32]* @src32, i32 0, i64 2), align 8 - %ld3 = load i32, i32* getelementptr inbounds ([8 x i32], [8 x i32]* @src32, i32 0, i64 3), align 8 + %ld0 = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @src32, i32 0, i64 0), align 64 + %ld1 = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @src32, i32 0, i64 1), align 4 + %ld2 = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @src32, i32 0, i64 2), align 8 + %ld3 = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @src32, i32 0, i64 3), align 4 %cvt0 = uitofp i32 %ld0 to double %cvt1 = uitofp i32 %ld1 to double %cvt2 = uitofp i32 %ld2 to double %cvt3 = uitofp i32 %ld3 to double - store double %cvt0, double* getelementptr inbounds ([4 x double], [4 x double]* @dst64, i32 0, i64 0), align 8 - store double %cvt1, double* getelementptr inbounds ([4 x double], [4 x double]* @dst64, i32 0, i64 1), align 8 - store double %cvt2, double* getelementptr inbounds ([4 x double], [4 x double]* @dst64, i32 0, i64 2), align 8 - store double %cvt3, double* getelementptr inbounds ([4 x double], [4 x double]* @dst64, i32 0, i64 3), align 8 + store double %cvt0, double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 0), align 64 + store double %cvt1, double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 1), align 8 + store double %cvt2, double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 2), align 16 + store double %cvt3, double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 3), align 8 ret void } define void @uitofp_2i16_2f64() #0 { ; CHECK-LABEL: @uitofp_2i16_2f64( -; CHECK-NEXT: [[LD0:%.*]] = load i16, i16* getelementptr inbounds ([16 x i16], [16 x i16]* @src16, i32 0, i64 0), align 8 -; CHECK-NEXT: [[LD1:%.*]] = load i16, i16* getelementptr inbounds ([16 x i16], [16 x i16]* @src16, i32 0, i64 1), align 8 +; CHECK-NEXT: [[LD0:%.*]] = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @src16, i32 0, i64 0), align 64 +; CHECK-NEXT: [[LD1:%.*]] = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @src16, i32 0, i64 1), align 2 ; CHECK-NEXT: [[CVT0:%.*]] = uitofp i16 [[LD0]] to double ; CHECK-NEXT: [[CVT1:%.*]] = uitofp i16 [[LD1]] to double -; CHECK-NEXT: store double [[CVT0]], double* getelementptr inbounds ([4 x double], [4 x double]* @dst64, i32 0, i64 0), align 8 -; CHECK-NEXT: store double [[CVT1]], double* getelementptr inbounds ([4 x double], [4 x double]* @dst64, i32 0, i64 1), align 8 +; CHECK-NEXT: store double [[CVT0]], double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 0), align 64 +; CHECK-NEXT: store double [[CVT1]], double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 1), align 8 ; CHECK-NEXT: ret void ; - %ld0 = load i16, i16* getelementptr inbounds ([16 x i16], [16 x i16]* @src16, i32 0, i64 0), align 8 - %ld1 = load i16, i16* getelementptr inbounds ([16 x i16], [16 x i16]* @src16, i32 0, i64 1), align 8 + %ld0 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @src16, i32 0, i64 0), align 64 + %ld1 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @src16, i32 0, i64 1), align 2 %cvt0 = uitofp i16 %ld0 to double %cvt1 = uitofp i16 %ld1 to double - store double %cvt0, double* getelementptr inbounds ([4 x double], [4 x double]* @dst64, i32 0, i64 0), align 8 - store double %cvt1, double* getelementptr inbounds ([4 x double], [4 x double]* @dst64, i32 0, i64 1), align 8 + store double %cvt0, double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 0), align 64 + store double %cvt1, double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 1), align 8 ret void } define void @uitofp_4i16_4f64() #0 { ; SSE-LABEL: @uitofp_4i16_4f64( -; SSE-NEXT: [[LD0:%.*]] = load i16, i16* getelementptr inbounds ([16 x i16], [16 x i16]* @src16, i32 0, i64 0), align 8 -; SSE-NEXT: [[LD1:%.*]] = load i16, i16* getelementptr inbounds ([16 x i16], [16 x i16]* @src16, i32 0, i64 1), align 8 -; SSE-NEXT: [[LD2:%.*]] = load i16, i16* getelementptr inbounds ([16 x i16], [16 x i16]* @src16, i32 0, i64 2), align 8 -; SSE-NEXT: [[LD3:%.*]] = load i16, i16* getelementptr inbounds ([16 x i16], [16 x i16]* @src16, i32 0, i64 3), align 8 +; SSE-NEXT: [[LD0:%.*]] = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @src16, i32 0, i64 0), align 64 +; SSE-NEXT: [[LD1:%.*]] = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @src16, i32 0, i64 1), align 2 +; SSE-NEXT: [[LD2:%.*]] = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @src16, i32 0, i64 2), align 4 +; SSE-NEXT: [[LD3:%.*]] = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @src16, i32 0, i64 3), align 2 ; SSE-NEXT: [[CVT0:%.*]] = uitofp i16 [[LD0]] to double ; SSE-NEXT: [[CVT1:%.*]] = uitofp i16 [[LD1]] to double ; SSE-NEXT: [[CVT2:%.*]] = uitofp i16 [[LD2]] to double ; SSE-NEXT: [[CVT3:%.*]] = uitofp i16 [[LD3]] to double -; SSE-NEXT: store double [[CVT0]], double* getelementptr inbounds ([4 x double], [4 x double]* @dst64, i32 0, i64 0), align 8 -; SSE-NEXT: store double [[CVT1]], double* getelementptr inbounds ([4 x double], [4 x double]* @dst64, i32 0, i64 1), align 8 -; SSE-NEXT: store double [[CVT2]], double* getelementptr inbounds ([4 x double], [4 x double]* @dst64, i32 0, i64 2), align 8 -; SSE-NEXT: store double [[CVT3]], double* getelementptr inbounds ([4 x double], [4 x double]* @dst64, i32 0, i64 3), align 8 +; SSE-NEXT: store double [[CVT0]], double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 0), align 64 +; SSE-NEXT: store double [[CVT1]], double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 1), align 8 +; SSE-NEXT: store double [[CVT2]], double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 2), align 16 +; SSE-NEXT: store double [[CVT3]], double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 3), align 8 ; SSE-NEXT: ret void ; ; AVX-LABEL: @uitofp_4i16_4f64( -; AVX-NEXT: [[TMP1:%.*]] = load <4 x i16>, <4 x i16>* bitcast ([16 x i16]* @src16 to <4 x i16>*), align 8 +; AVX-NEXT: [[TMP1:%.*]] = load <4 x i16>, <4 x i16>* bitcast ([32 x i16]* @src16 to <4 x i16>*), align 64 ; AVX-NEXT: [[TMP2:%.*]] = uitofp <4 x i16> [[TMP1]] to <4 x double> -; AVX-NEXT: store <4 x double> [[TMP2]], <4 x double>* bitcast ([4 x double]* @dst64 to <4 x double>*), align 8 +; AVX-NEXT: store <4 x double> [[TMP2]], <4 x double>* bitcast ([8 x double]* @dst64 to <4 x double>*), align 64 ; AVX-NEXT: ret void ; - %ld0 = load i16, i16* getelementptr inbounds ([16 x i16], [16 x i16]* @src16, i32 0, i64 0), align 8 - %ld1 = load i16, i16* getelementptr inbounds ([16 x i16], [16 x i16]* @src16, i32 0, i64 1), align 8 - %ld2 = load i16, i16* getelementptr inbounds ([16 x i16], [16 x i16]* @src16, i32 0, i64 2), align 8 - %ld3 = load i16, i16* getelementptr inbounds ([16 x i16], [16 x i16]* @src16, i32 0, i64 3), align 8 + %ld0 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @src16, i32 0, i64 0), align 64 + %ld1 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @src16, i32 0, i64 1), align 2 + %ld2 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @src16, i32 0, i64 2), align 4 + %ld3 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @src16, i32 0, i64 3), align 2 %cvt0 = uitofp i16 %ld0 to double %cvt1 = uitofp i16 %ld1 to double %cvt2 = uitofp i16 %ld2 to double %cvt3 = uitofp i16 %ld3 to double - store double %cvt0, double* getelementptr inbounds ([4 x double], [4 x double]* @dst64, i32 0, i64 0), align 8 - store double %cvt1, double* getelementptr inbounds ([4 x double], [4 x double]* @dst64, i32 0, i64 1), align 8 - store double %cvt2, double* getelementptr inbounds ([4 x double], [4 x double]* @dst64, i32 0, i64 2), align 8 - store double %cvt3, double* getelementptr inbounds ([4 x double], [4 x double]* @dst64, i32 0, i64 3), align 8 + store double %cvt0, double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 0), align 64 + store double %cvt1, double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 1), align 8 + store double %cvt2, double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 2), align 16 + store double %cvt3, double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 3), align 8 ret void } define void @uitofp_2i8_2f64() #0 { ; CHECK-LABEL: @uitofp_2i8_2f64( -; CHECK-NEXT: [[LD0:%.*]] = load i8, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @src8, i32 0, i64 0), align 8 -; CHECK-NEXT: [[LD1:%.*]] = load i8, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @src8, i32 0, i64 1), align 8 +; CHECK-NEXT: [[LD0:%.*]] = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @src8, i32 0, i64 0), align 64 +; CHECK-NEXT: [[LD1:%.*]] = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @src8, i32 0, i64 1), align 1 ; CHECK-NEXT: [[CVT0:%.*]] = uitofp i8 [[LD0]] to double ; CHECK-NEXT: [[CVT1:%.*]] = uitofp i8 [[LD1]] to double -; CHECK-NEXT: store double [[CVT0]], double* getelementptr inbounds ([4 x double], [4 x double]* @dst64, i32 0, i64 0), align 8 -; CHECK-NEXT: store double [[CVT1]], double* getelementptr inbounds ([4 x double], [4 x double]* @dst64, i32 0, i64 1), align 8 +; CHECK-NEXT: store double [[CVT0]], double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 0), align 64 +; CHECK-NEXT: store double [[CVT1]], double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 1), align 8 ; CHECK-NEXT: ret void ; - %ld0 = load i8, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @src8, i32 0, i64 0), align 8 - %ld1 = load i8, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @src8, i32 0, i64 1), align 8 + %ld0 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @src8, i32 0, i64 0), align 64 + %ld1 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @src8, i32 0, i64 1), align 1 %cvt0 = uitofp i8 %ld0 to double %cvt1 = uitofp i8 %ld1 to double - store double %cvt0, double* getelementptr inbounds ([4 x double], [4 x double]* @dst64, i32 0, i64 0), align 8 - store double %cvt1, double* getelementptr inbounds ([4 x double], [4 x double]* @dst64, i32 0, i64 1), align 8 + store double %cvt0, double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 0), align 64 + store double %cvt1, double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 1), align 8 ret void } define void @uitofp_4i8_4f64() #0 { ; SSE-LABEL: @uitofp_4i8_4f64( -; SSE-NEXT: [[LD0:%.*]] = load i8, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @src8, i32 0, i64 0), align 8 -; SSE-NEXT: [[LD1:%.*]] = load i8, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @src8, i32 0, i64 1), align 8 -; SSE-NEXT: [[LD2:%.*]] = load i8, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @src8, i32 0, i64 2), align 8 -; SSE-NEXT: [[LD3:%.*]] = load i8, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @src8, i32 0, i64 3), align 8 +; SSE-NEXT: [[LD0:%.*]] = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @src8, i32 0, i64 0), align 64 +; SSE-NEXT: [[LD1:%.*]] = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @src8, i32 0, i64 1), align 1 +; SSE-NEXT: [[LD2:%.*]] = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @src8, i32 0, i64 2), align 2 +; SSE-NEXT: [[LD3:%.*]] = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @src8, i32 0, i64 3), align 1 ; SSE-NEXT: [[CVT0:%.*]] = uitofp i8 [[LD0]] to double ; SSE-NEXT: [[CVT1:%.*]] = uitofp i8 [[LD1]] to double ; SSE-NEXT: [[CVT2:%.*]] = uitofp i8 [[LD2]] to double ; SSE-NEXT: [[CVT3:%.*]] = uitofp i8 [[LD3]] to double -; SSE-NEXT: store double [[CVT0]], double* getelementptr inbounds ([4 x double], [4 x double]* @dst64, i32 0, i64 0), align 8 -; SSE-NEXT: store double [[CVT1]], double* getelementptr inbounds ([4 x double], [4 x double]* @dst64, i32 0, i64 1), align 8 -; SSE-NEXT: store double [[CVT2]], double* getelementptr inbounds ([4 x double], [4 x double]* @dst64, i32 0, i64 2), align 8 -; SSE-NEXT: store double [[CVT3]], double* getelementptr inbounds ([4 x double], [4 x double]* @dst64, i32 0, i64 3), align 8 +; SSE-NEXT: store double [[CVT0]], double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 0), align 64 +; SSE-NEXT: store double [[CVT1]], double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 1), align 8 +; SSE-NEXT: store double [[CVT2]], double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 2), align 16 +; SSE-NEXT: store double [[CVT3]], double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 3), align 8 ; SSE-NEXT: ret void ; ; AVX-LABEL: @uitofp_4i8_4f64( -; AVX-NEXT: [[TMP1:%.*]] = load <4 x i8>, <4 x i8>* bitcast ([32 x i8]* @src8 to <4 x i8>*), align 8 +; AVX-NEXT: [[TMP1:%.*]] = load <4 x i8>, <4 x i8>* bitcast ([64 x i8]* @src8 to <4 x i8>*), align 64 ; AVX-NEXT: [[TMP2:%.*]] = uitofp <4 x i8> [[TMP1]] to <4 x double> -; AVX-NEXT: store <4 x double> [[TMP2]], <4 x double>* bitcast ([4 x double]* @dst64 to <4 x double>*), align 8 +; AVX-NEXT: store <4 x double> [[TMP2]], <4 x double>* bitcast ([8 x double]* @dst64 to <4 x double>*), align 64 ; AVX-NEXT: ret void ; - %ld0 = load i8, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @src8, i32 0, i64 0), align 8 - %ld1 = load i8, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @src8, i32 0, i64 1), align 8 - %ld2 = load i8, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @src8, i32 0, i64 2), align 8 - %ld3 = load i8, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @src8, i32 0, i64 3), align 8 + %ld0 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @src8, i32 0, i64 0), align 64 + %ld1 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @src8, i32 0, i64 1), align 1 + %ld2 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @src8, i32 0, i64 2), align 2 + %ld3 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @src8, i32 0, i64 3), align 1 %cvt0 = uitofp i8 %ld0 to double %cvt1 = uitofp i8 %ld1 to double %cvt2 = uitofp i8 %ld2 to double %cvt3 = uitofp i8 %ld3 to double - store double %cvt0, double* getelementptr inbounds ([4 x double], [4 x double]* @dst64, i32 0, i64 0), align 8 - store double %cvt1, double* getelementptr inbounds ([4 x double], [4 x double]* @dst64, i32 0, i64 1), align 8 - store double %cvt2, double* getelementptr inbounds ([4 x double], [4 x double]* @dst64, i32 0, i64 2), align 8 - store double %cvt3, double* getelementptr inbounds ([4 x double], [4 x double]* @dst64, i32 0, i64 3), align 8 + store double %cvt0, double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 0), align 64 + store double %cvt1, double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 1), align 8 + store double %cvt2, double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 2), align 16 + store double %cvt3, double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 3), align 8 ret void } @@ -241,100 +241,100 @@ define void @uitofp_4i8_4f64() #0 { define void @uitofp_2i64_2f32() #0 { ; CHECK-LABEL: @uitofp_2i64_2f32( -; CHECK-NEXT: [[LD0:%.*]] = load i64, i64* getelementptr inbounds ([4 x i64], [4 x i64]* @src64, i32 0, i64 0), align 8 -; CHECK-NEXT: [[LD1:%.*]] = load i64, i64* getelementptr inbounds ([4 x i64], [4 x i64]* @src64, i32 0, i64 1), align 8 +; CHECK-NEXT: [[LD0:%.*]] = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @src64, i32 0, i64 0), align 64 +; CHECK-NEXT: [[LD1:%.*]] = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @src64, i32 0, i64 1), align 8 ; CHECK-NEXT: [[CVT0:%.*]] = uitofp i64 [[LD0]] to float ; CHECK-NEXT: [[CVT1:%.*]] = uitofp i64 [[LD1]] to float -; CHECK-NEXT: store float [[CVT0]], float* getelementptr inbounds ([8 x float], [8 x float]* @dst32, i32 0, i64 0), align 8 -; CHECK-NEXT: store float [[CVT1]], float* getelementptr inbounds ([8 x float], [8 x float]* @dst32, i32 0, i64 1), align 8 +; CHECK-NEXT: store float [[CVT0]], float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 0), align 64 +; CHECK-NEXT: store float [[CVT1]], float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 1), align 4 ; CHECK-NEXT: ret void ; - %ld0 = load i64, i64* getelementptr inbounds ([4 x i64], [4 x i64]* @src64, i32 0, i64 0), align 8 - %ld1 = load i64, i64* getelementptr inbounds ([4 x i64], [4 x i64]* @src64, i32 0, i64 1), align 8 + %ld0 = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @src64, i32 0, i64 0), align 64 + %ld1 = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @src64, i32 0, i64 1), align 8 %cvt0 = uitofp i64 %ld0 to float %cvt1 = uitofp i64 %ld1 to float - store float %cvt0, float* getelementptr inbounds ([8 x float], [8 x float]* @dst32, i32 0, i64 0), align 8 - store float %cvt1, float* getelementptr inbounds ([8 x float], [8 x float]* @dst32, i32 0, i64 1), align 8 + store float %cvt0, float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 0), align 64 + store float %cvt1, float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 1), align 4 ret void } define void @uitofp_4i64_4f32() #0 { ; CHECK-LABEL: @uitofp_4i64_4f32( -; CHECK-NEXT: [[LD0:%.*]] = load i64, i64* getelementptr inbounds ([4 x i64], [4 x i64]* @src64, i32 0, i64 0), align 8 -; CHECK-NEXT: [[LD1:%.*]] = load i64, i64* getelementptr inbounds ([4 x i64], [4 x i64]* @src64, i32 0, i64 1), align 8 -; CHECK-NEXT: [[LD2:%.*]] = load i64, i64* getelementptr inbounds ([4 x i64], [4 x i64]* @src64, i32 0, i64 2), align 8 -; CHECK-NEXT: [[LD3:%.*]] = load i64, i64* getelementptr inbounds ([4 x i64], [4 x i64]* @src64, i32 0, i64 3), align 8 +; CHECK-NEXT: [[LD0:%.*]] = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @src64, i32 0, i64 0), align 64 +; CHECK-NEXT: [[LD1:%.*]] = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @src64, i32 0, i64 1), align 8 +; CHECK-NEXT: [[LD2:%.*]] = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @src64, i32 0, i64 2), align 16 +; CHECK-NEXT: [[LD3:%.*]] = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @src64, i32 0, i64 3), align 8 ; CHECK-NEXT: [[CVT0:%.*]] = uitofp i64 [[LD0]] to float ; CHECK-NEXT: [[CVT1:%.*]] = uitofp i64 [[LD1]] to float ; CHECK-NEXT: [[CVT2:%.*]] = uitofp i64 [[LD2]] to float ; CHECK-NEXT: [[CVT3:%.*]] = uitofp i64 [[LD3]] to float -; CHECK-NEXT: store float [[CVT0]], float* getelementptr inbounds ([8 x float], [8 x float]* @dst32, i32 0, i64 0), align 8 -; CHECK-NEXT: store float [[CVT1]], float* getelementptr inbounds ([8 x float], [8 x float]* @dst32, i32 0, i64 1), align 8 -; CHECK-NEXT: store float [[CVT2]], float* getelementptr inbounds ([8 x float], [8 x float]* @dst32, i32 0, i64 2), align 8 -; CHECK-NEXT: store float [[CVT3]], float* getelementptr inbounds ([8 x float], [8 x float]* @dst32, i32 0, i64 3), align 8 +; CHECK-NEXT: store float [[CVT0]], float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 0), align 64 +; CHECK-NEXT: store float [[CVT1]], float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 1), align 4 +; CHECK-NEXT: store float [[CVT2]], float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 2), align 8 +; CHECK-NEXT: store float [[CVT3]], float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 3), align 4 ; CHECK-NEXT: ret void ; - %ld0 = load i64, i64* getelementptr inbounds ([4 x i64], [4 x i64]* @src64, i32 0, i64 0), align 8 - %ld1 = load i64, i64* getelementptr inbounds ([4 x i64], [4 x i64]* @src64, i32 0, i64 1), align 8 - %ld2 = load i64, i64* getelementptr inbounds ([4 x i64], [4 x i64]* @src64, i32 0, i64 2), align 8 - %ld3 = load i64, i64* getelementptr inbounds ([4 x i64], [4 x i64]* @src64, i32 0, i64 3), align 8 + %ld0 = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @src64, i32 0, i64 0), align 64 + %ld1 = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @src64, i32 0, i64 1), align 8 + %ld2 = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @src64, i32 0, i64 2), align 16 + %ld3 = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @src64, i32 0, i64 3), align 8 %cvt0 = uitofp i64 %ld0 to float %cvt1 = uitofp i64 %ld1 to float %cvt2 = uitofp i64 %ld2 to float %cvt3 = uitofp i64 %ld3 to float - store float %cvt0, float* getelementptr inbounds ([8 x float], [8 x float]* @dst32, i32 0, i64 0), align 8 - store float %cvt1, float* getelementptr inbounds ([8 x float], [8 x float]* @dst32, i32 0, i64 1), align 8 - store float %cvt2, float* getelementptr inbounds ([8 x float], [8 x float]* @dst32, i32 0, i64 2), align 8 - store float %cvt3, float* getelementptr inbounds ([8 x float], [8 x float]* @dst32, i32 0, i64 3), align 8 + store float %cvt0, float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 0), align 64 + store float %cvt1, float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 1), align 4 + store float %cvt2, float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 2), align 8 + store float %cvt3, float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 3), align 4 ret void } define void @uitofp_4i32_4f32() #0 { ; CHECK-LABEL: @uitofp_4i32_4f32( -; CHECK-NEXT: [[TMP1:%.*]] = load <4 x i32>, <4 x i32>* bitcast ([8 x i32]* @src32 to <4 x i32>*), align 8 +; CHECK-NEXT: [[TMP1:%.*]] = load <4 x i32>, <4 x i32>* bitcast ([16 x i32]* @src32 to <4 x i32>*), align 64 ; CHECK-NEXT: [[TMP2:%.*]] = uitofp <4 x i32> [[TMP1]] to <4 x float> -; CHECK-NEXT: store <4 x float> [[TMP2]], <4 x float>* bitcast ([8 x float]* @dst32 to <4 x float>*), align 8 +; CHECK-NEXT: store <4 x float> [[TMP2]], <4 x float>* bitcast ([16 x float]* @dst32 to <4 x float>*), align 64 ; CHECK-NEXT: ret void ; - %ld0 = load i32, i32* getelementptr inbounds ([8 x i32], [8 x i32]* @src32, i32 0, i64 0), align 8 - %ld1 = load i32, i32* getelementptr inbounds ([8 x i32], [8 x i32]* @src32, i32 0, i64 1), align 8 - %ld2 = load i32, i32* getelementptr inbounds ([8 x i32], [8 x i32]* @src32, i32 0, i64 2), align 8 - %ld3 = load i32, i32* getelementptr inbounds ([8 x i32], [8 x i32]* @src32, i32 0, i64 3), align 8 + %ld0 = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @src32, i32 0, i64 0), align 64 + %ld1 = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @src32, i32 0, i64 1), align 4 + %ld2 = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @src32, i32 0, i64 2), align 8 + %ld3 = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @src32, i32 0, i64 3), align 4 %cvt0 = uitofp i32 %ld0 to float %cvt1 = uitofp i32 %ld1 to float %cvt2 = uitofp i32 %ld2 to float %cvt3 = uitofp i32 %ld3 to float - store float %cvt0, float* getelementptr inbounds ([8 x float], [8 x float]* @dst32, i32 0, i64 0), align 8 - store float %cvt1, float* getelementptr inbounds ([8 x float], [8 x float]* @dst32, i32 0, i64 1), align 8 - store float %cvt2, float* getelementptr inbounds ([8 x float], [8 x float]* @dst32, i32 0, i64 2), align 8 - store float %cvt3, float* getelementptr inbounds ([8 x float], [8 x float]* @dst32, i32 0, i64 3), align 8 + store float %cvt0, float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 0), align 64 + store float %cvt1, float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 1), align 4 + store float %cvt2, float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 2), align 8 + store float %cvt3, float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 3), align 4 ret void } define void @uitofp_8i32_8f32() #0 { ; SSE-LABEL: @uitofp_8i32_8f32( -; SSE-NEXT: [[TMP1:%.*]] = load <4 x i32>, <4 x i32>* bitcast ([8 x i32]* @src32 to <4 x i32>*), align 8 -; SSE-NEXT: [[TMP2:%.*]] = load <4 x i32>, <4 x i32>* bitcast (i32* getelementptr inbounds ([8 x i32], [8 x i32]* @src32, i32 0, i64 4) to <4 x i32>*), align 8 +; SSE-NEXT: [[TMP1:%.*]] = load <4 x i32>, <4 x i32>* bitcast ([16 x i32]* @src32 to <4 x i32>*), align 64 +; SSE-NEXT: [[TMP2:%.*]] = load <4 x i32>, <4 x i32>* bitcast (i32* getelementptr inbounds ([16 x i32], [16 x i32]* @src32, i32 0, i64 4) to <4 x i32>*), align 16 ; SSE-NEXT: [[TMP3:%.*]] = uitofp <4 x i32> [[TMP1]] to <4 x float> ; SSE-NEXT: [[TMP4:%.*]] = uitofp <4 x i32> [[TMP2]] to <4 x float> -; SSE-NEXT: store <4 x float> [[TMP3]], <4 x float>* bitcast ([8 x float]* @dst32 to <4 x float>*), align 8 -; SSE-NEXT: store <4 x float> [[TMP4]], <4 x float>* bitcast (float* getelementptr inbounds ([8 x float], [8 x float]* @dst32, i32 0, i64 4) to <4 x float>*), align 8 +; SSE-NEXT: store <4 x float> [[TMP3]], <4 x float>* bitcast ([16 x float]* @dst32 to <4 x float>*), align 64 +; SSE-NEXT: store <4 x float> [[TMP4]], <4 x float>* bitcast (float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 4) to <4 x float>*), align 16 ; SSE-NEXT: ret void ; ; AVX-LABEL: @uitofp_8i32_8f32( -; AVX-NEXT: [[TMP1:%.*]] = load <8 x i32>, <8 x i32>* bitcast ([8 x i32]* @src32 to <8 x i32>*), align 8 +; AVX-NEXT: [[TMP1:%.*]] = load <8 x i32>, <8 x i32>* bitcast ([16 x i32]* @src32 to <8 x i32>*), align 64 ; AVX-NEXT: [[TMP2:%.*]] = uitofp <8 x i32> [[TMP1]] to <8 x float> -; AVX-NEXT: store <8 x float> [[TMP2]], <8 x float>* bitcast ([8 x float]* @dst32 to <8 x float>*), align 8 +; AVX-NEXT: store <8 x float> [[TMP2]], <8 x float>* bitcast ([16 x float]* @dst32 to <8 x float>*), align 64 ; AVX-NEXT: ret void ; - %ld0 = load i32, i32* getelementptr inbounds ([8 x i32], [8 x i32]* @src32, i32 0, i64 0), align 8 - %ld1 = load i32, i32* getelementptr inbounds ([8 x i32], [8 x i32]* @src32, i32 0, i64 1), align 8 - %ld2 = load i32, i32* getelementptr inbounds ([8 x i32], [8 x i32]* @src32, i32 0, i64 2), align 8 - %ld3 = load i32, i32* getelementptr inbounds ([8 x i32], [8 x i32]* @src32, i32 0, i64 3), align 8 - %ld4 = load i32, i32* getelementptr inbounds ([8 x i32], [8 x i32]* @src32, i32 0, i64 4), align 8 - %ld5 = load i32, i32* getelementptr inbounds ([8 x i32], [8 x i32]* @src32, i32 0, i64 5), align 8 - %ld6 = load i32, i32* getelementptr inbounds ([8 x i32], [8 x i32]* @src32, i32 0, i64 6), align 8 - %ld7 = load i32, i32* getelementptr inbounds ([8 x i32], [8 x i32]* @src32, i32 0, i64 7), align 8 + %ld0 = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @src32, i32 0, i64 0), align 64 + %ld1 = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @src32, i32 0, i64 1), align 4 + %ld2 = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @src32, i32 0, i64 2), align 8 + %ld3 = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @src32, i32 0, i64 3), align 4 + %ld4 = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @src32, i32 0, i64 4), align 16 + %ld5 = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @src32, i32 0, i64 5), align 4 + %ld6 = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @src32, i32 0, i64 6), align 8 + %ld7 = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @src32, i32 0, i64 7), align 4 %cvt0 = uitofp i32 %ld0 to float %cvt1 = uitofp i32 %ld1 to float %cvt2 = uitofp i32 %ld2 to float @@ -343,63 +343,63 @@ define void @uitofp_8i32_8f32() #0 { %cvt5 = uitofp i32 %ld5 to float %cvt6 = uitofp i32 %ld6 to float %cvt7 = uitofp i32 %ld7 to float - store float %cvt0, float* getelementptr inbounds ([8 x float], [8 x float]* @dst32, i32 0, i64 0), align 8 - store float %cvt1, float* getelementptr inbounds ([8 x float], [8 x float]* @dst32, i32 0, i64 1), align 8 - store float %cvt2, float* getelementptr inbounds ([8 x float], [8 x float]* @dst32, i32 0, i64 2), align 8 - store float %cvt3, float* getelementptr inbounds ([8 x float], [8 x float]* @dst32, i32 0, i64 3), align 8 - store float %cvt4, float* getelementptr inbounds ([8 x float], [8 x float]* @dst32, i32 0, i64 4), align 8 - store float %cvt5, float* getelementptr inbounds ([8 x float], [8 x float]* @dst32, i32 0, i64 5), align 8 - store float %cvt6, float* getelementptr inbounds ([8 x float], [8 x float]* @dst32, i32 0, i64 6), align 8 - store float %cvt7, float* getelementptr inbounds ([8 x float], [8 x float]* @dst32, i32 0, i64 7), align 8 + store float %cvt0, float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 0), align 64 + store float %cvt1, float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 1), align 4 + store float %cvt2, float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 2), align 8 + store float %cvt3, float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 3), align 4 + store float %cvt4, float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 4), align 16 + store float %cvt5, float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 5), align 4 + store float %cvt6, float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 6), align 8 + store float %cvt7, float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 7), align 4 ret void } define void @uitofp_4i16_4f32() #0 { ; CHECK-LABEL: @uitofp_4i16_4f32( -; CHECK-NEXT: [[TMP1:%.*]] = load <4 x i16>, <4 x i16>* bitcast ([16 x i16]* @src16 to <4 x i16>*), align 8 +; CHECK-NEXT: [[TMP1:%.*]] = load <4 x i16>, <4 x i16>* bitcast ([32 x i16]* @src16 to <4 x i16>*), align 64 ; CHECK-NEXT: [[TMP2:%.*]] = uitofp <4 x i16> [[TMP1]] to <4 x float> -; CHECK-NEXT: store <4 x float> [[TMP2]], <4 x float>* bitcast ([8 x float]* @dst32 to <4 x float>*), align 8 +; CHECK-NEXT: store <4 x float> [[TMP2]], <4 x float>* bitcast ([16 x float]* @dst32 to <4 x float>*), align 64 ; CHECK-NEXT: ret void ; - %ld0 = load i16, i16* getelementptr inbounds ([16 x i16], [16 x i16]* @src16, i32 0, i64 0), align 8 - %ld1 = load i16, i16* getelementptr inbounds ([16 x i16], [16 x i16]* @src16, i32 0, i64 1), align 8 - %ld2 = load i16, i16* getelementptr inbounds ([16 x i16], [16 x i16]* @src16, i32 0, i64 2), align 8 - %ld3 = load i16, i16* getelementptr inbounds ([16 x i16], [16 x i16]* @src16, i32 0, i64 3), align 8 + %ld0 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @src16, i32 0, i64 0), align 64 + %ld1 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @src16, i32 0, i64 1), align 2 + %ld2 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @src16, i32 0, i64 2), align 4 + %ld3 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @src16, i32 0, i64 3), align 2 %cvt0 = uitofp i16 %ld0 to float %cvt1 = uitofp i16 %ld1 to float %cvt2 = uitofp i16 %ld2 to float %cvt3 = uitofp i16 %ld3 to float - store float %cvt0, float* getelementptr inbounds ([8 x float], [8 x float]* @dst32, i32 0, i64 0), align 8 - store float %cvt1, float* getelementptr inbounds ([8 x float], [8 x float]* @dst32, i32 0, i64 1), align 8 - store float %cvt2, float* getelementptr inbounds ([8 x float], [8 x float]* @dst32, i32 0, i64 2), align 8 - store float %cvt3, float* getelementptr inbounds ([8 x float], [8 x float]* @dst32, i32 0, i64 3), align 8 + store float %cvt0, float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 0), align 64 + store float %cvt1, float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 1), align 4 + store float %cvt2, float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 2), align 8 + store float %cvt3, float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 3), align 4 ret void } define void @uitofp_8i16_8f32() #0 { ; SSE-LABEL: @uitofp_8i16_8f32( -; SSE-NEXT: [[TMP1:%.*]] = load <4 x i16>, <4 x i16>* bitcast ([16 x i16]* @src16 to <4 x i16>*), align 8 -; SSE-NEXT: [[TMP2:%.*]] = load <4 x i16>, <4 x i16>* bitcast (i16* getelementptr inbounds ([16 x i16], [16 x i16]* @src16, i32 0, i64 4) to <4 x i16>*), align 8 +; SSE-NEXT: [[TMP1:%.*]] = load <4 x i16>, <4 x i16>* bitcast ([32 x i16]* @src16 to <4 x i16>*), align 64 +; SSE-NEXT: [[TMP2:%.*]] = load <4 x i16>, <4 x i16>* bitcast (i16* getelementptr inbounds ([32 x i16], [32 x i16]* @src16, i32 0, i64 4) to <4 x i16>*), align 4 ; SSE-NEXT: [[TMP3:%.*]] = uitofp <4 x i16> [[TMP1]] to <4 x float> ; SSE-NEXT: [[TMP4:%.*]] = uitofp <4 x i16> [[TMP2]] to <4 x float> -; SSE-NEXT: store <4 x float> [[TMP3]], <4 x float>* bitcast ([8 x float]* @dst32 to <4 x float>*), align 8 -; SSE-NEXT: store <4 x float> [[TMP4]], <4 x float>* bitcast (float* getelementptr inbounds ([8 x float], [8 x float]* @dst32, i32 0, i64 4) to <4 x float>*), align 8 +; SSE-NEXT: store <4 x float> [[TMP3]], <4 x float>* bitcast ([16 x float]* @dst32 to <4 x float>*), align 64 +; SSE-NEXT: store <4 x float> [[TMP4]], <4 x float>* bitcast (float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 4) to <4 x float>*), align 16 ; SSE-NEXT: ret void ; ; AVX-LABEL: @uitofp_8i16_8f32( -; AVX-NEXT: [[TMP1:%.*]] = load <8 x i16>, <8 x i16>* bitcast ([16 x i16]* @src16 to <8 x i16>*), align 8 +; AVX-NEXT: [[TMP1:%.*]] = load <8 x i16>, <8 x i16>* bitcast ([32 x i16]* @src16 to <8 x i16>*), align 64 ; AVX-NEXT: [[TMP2:%.*]] = uitofp <8 x i16> [[TMP1]] to <8 x float> -; AVX-NEXT: store <8 x float> [[TMP2]], <8 x float>* bitcast ([8 x float]* @dst32 to <8 x float>*), align 8 +; AVX-NEXT: store <8 x float> [[TMP2]], <8 x float>* bitcast ([16 x float]* @dst32 to <8 x float>*), align 64 ; AVX-NEXT: ret void ; - %ld0 = load i16, i16* getelementptr inbounds ([16 x i16], [16 x i16]* @src16, i32 0, i64 0), align 8 - %ld1 = load i16, i16* getelementptr inbounds ([16 x i16], [16 x i16]* @src16, i32 0, i64 1), align 8 - %ld2 = load i16, i16* getelementptr inbounds ([16 x i16], [16 x i16]* @src16, i32 0, i64 2), align 8 - %ld3 = load i16, i16* getelementptr inbounds ([16 x i16], [16 x i16]* @src16, i32 0, i64 3), align 8 - %ld4 = load i16, i16* getelementptr inbounds ([16 x i16], [16 x i16]* @src16, i32 0, i64 4), align 8 - %ld5 = load i16, i16* getelementptr inbounds ([16 x i16], [16 x i16]* @src16, i32 0, i64 5), align 8 - %ld6 = load i16, i16* getelementptr inbounds ([16 x i16], [16 x i16]* @src16, i32 0, i64 6), align 8 - %ld7 = load i16, i16* getelementptr inbounds ([16 x i16], [16 x i16]* @src16, i32 0, i64 7), align 8 + %ld0 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @src16, i32 0, i64 0), align 64 + %ld1 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @src16, i32 0, i64 1), align 1 + %ld2 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @src16, i32 0, i64 2), align 2 + %ld3 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @src16, i32 0, i64 3), align 1 + %ld4 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @src16, i32 0, i64 4), align 4 + %ld5 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @src16, i32 0, i64 5), align 1 + %ld6 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @src16, i32 0, i64 6), align 2 + %ld7 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @src16, i32 0, i64 7), align 1 %cvt0 = uitofp i16 %ld0 to float %cvt1 = uitofp i16 %ld1 to float %cvt2 = uitofp i16 %ld2 to float @@ -408,63 +408,63 @@ define void @uitofp_8i16_8f32() #0 { %cvt5 = uitofp i16 %ld5 to float %cvt6 = uitofp i16 %ld6 to float %cvt7 = uitofp i16 %ld7 to float - store float %cvt0, float* getelementptr inbounds ([8 x float], [8 x float]* @dst32, i32 0, i64 0), align 8 - store float %cvt1, float* getelementptr inbounds ([8 x float], [8 x float]* @dst32, i32 0, i64 1), align 8 - store float %cvt2, float* getelementptr inbounds ([8 x float], [8 x float]* @dst32, i32 0, i64 2), align 8 - store float %cvt3, float* getelementptr inbounds ([8 x float], [8 x float]* @dst32, i32 0, i64 3), align 8 - store float %cvt4, float* getelementptr inbounds ([8 x float], [8 x float]* @dst32, i32 0, i64 4), align 8 - store float %cvt5, float* getelementptr inbounds ([8 x float], [8 x float]* @dst32, i32 0, i64 5), align 8 - store float %cvt6, float* getelementptr inbounds ([8 x float], [8 x float]* @dst32, i32 0, i64 6), align 8 - store float %cvt7, float* getelementptr inbounds ([8 x float], [8 x float]* @dst32, i32 0, i64 7), align 8 + store float %cvt0, float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 0), align 64 + store float %cvt1, float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 1), align 4 + store float %cvt2, float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 2), align 8 + store float %cvt3, float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 3), align 4 + store float %cvt4, float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 4), align 16 + store float %cvt5, float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 5), align 4 + store float %cvt6, float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 6), align 8 + store float %cvt7, float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 7), align 4 ret void } define void @uitofp_4i8_4f32() #0 { ; CHECK-LABEL: @uitofp_4i8_4f32( -; CHECK-NEXT: [[TMP1:%.*]] = load <4 x i8>, <4 x i8>* bitcast ([32 x i8]* @src8 to <4 x i8>*), align 8 +; CHECK-NEXT: [[TMP1:%.*]] = load <4 x i8>, <4 x i8>* bitcast ([64 x i8]* @src8 to <4 x i8>*), align 64 ; CHECK-NEXT: [[TMP2:%.*]] = uitofp <4 x i8> [[TMP1]] to <4 x float> -; CHECK-NEXT: store <4 x float> [[TMP2]], <4 x float>* bitcast ([8 x float]* @dst32 to <4 x float>*), align 8 +; CHECK-NEXT: store <4 x float> [[TMP2]], <4 x float>* bitcast ([16 x float]* @dst32 to <4 x float>*), align 64 ; CHECK-NEXT: ret void ; - %ld0 = load i8, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @src8, i32 0, i64 0), align 8 - %ld1 = load i8, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @src8, i32 0, i64 1), align 8 - %ld2 = load i8, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @src8, i32 0, i64 2), align 8 - %ld3 = load i8, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @src8, i32 0, i64 3), align 8 + %ld0 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @src8, i32 0, i64 0), align 64 + %ld1 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @src8, i32 0, i64 1), align 1 + %ld2 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @src8, i32 0, i64 2), align 2 + %ld3 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @src8, i32 0, i64 3), align 1 %cvt0 = uitofp i8 %ld0 to float %cvt1 = uitofp i8 %ld1 to float %cvt2 = uitofp i8 %ld2 to float %cvt3 = uitofp i8 %ld3 to float - store float %cvt0, float* getelementptr inbounds ([8 x float], [8 x float]* @dst32, i32 0, i64 0), align 8 - store float %cvt1, float* getelementptr inbounds ([8 x float], [8 x float]* @dst32, i32 0, i64 1), align 8 - store float %cvt2, float* getelementptr inbounds ([8 x float], [8 x float]* @dst32, i32 0, i64 2), align 8 - store float %cvt3, float* getelementptr inbounds ([8 x float], [8 x float]* @dst32, i32 0, i64 3), align 8 + store float %cvt0, float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 0), align 64 + store float %cvt1, float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 1), align 4 + store float %cvt2, float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 2), align 8 + store float %cvt3, float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 3), align 4 ret void } define void @uitofp_8i8_8f32() #0 { ; SSE-LABEL: @uitofp_8i8_8f32( -; SSE-NEXT: [[TMP1:%.*]] = load <4 x i8>, <4 x i8>* bitcast ([32 x i8]* @src8 to <4 x i8>*), align 8 -; SSE-NEXT: [[TMP2:%.*]] = load <4 x i8>, <4 x i8>* bitcast (i8* getelementptr inbounds ([32 x i8], [32 x i8]* @src8, i32 0, i64 4) to <4 x i8>*), align 8 +; SSE-NEXT: [[TMP1:%.*]] = load <4 x i8>, <4 x i8>* bitcast ([64 x i8]* @src8 to <4 x i8>*), align 64 +; SSE-NEXT: [[TMP2:%.*]] = load <4 x i8>, <4 x i8>* bitcast (i8* getelementptr inbounds ([64 x i8], [64 x i8]* @src8, i32 0, i64 4) to <4 x i8>*), align 4 ; SSE-NEXT: [[TMP3:%.*]] = uitofp <4 x i8> [[TMP1]] to <4 x float> ; SSE-NEXT: [[TMP4:%.*]] = uitofp <4 x i8> [[TMP2]] to <4 x float> -; SSE-NEXT: store <4 x float> [[TMP3]], <4 x float>* bitcast ([8 x float]* @dst32 to <4 x float>*), align 8 -; SSE-NEXT: store <4 x float> [[TMP4]], <4 x float>* bitcast (float* getelementptr inbounds ([8 x float], [8 x float]* @dst32, i32 0, i64 4) to <4 x float>*), align 8 +; SSE-NEXT: store <4 x float> [[TMP3]], <4 x float>* bitcast ([16 x float]* @dst32 to <4 x float>*), align 64 +; SSE-NEXT: store <4 x float> [[TMP4]], <4 x float>* bitcast (float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 4) to <4 x float>*), align 16 ; SSE-NEXT: ret void ; ; AVX-LABEL: @uitofp_8i8_8f32( -; AVX-NEXT: [[TMP1:%.*]] = load <8 x i8>, <8 x i8>* bitcast ([32 x i8]* @src8 to <8 x i8>*), align 8 +; AVX-NEXT: [[TMP1:%.*]] = load <8 x i8>, <8 x i8>* bitcast ([64 x i8]* @src8 to <8 x i8>*), align 64 ; AVX-NEXT: [[TMP2:%.*]] = uitofp <8 x i8> [[TMP1]] to <8 x float> -; AVX-NEXT: store <8 x float> [[TMP2]], <8 x float>* bitcast ([8 x float]* @dst32 to <8 x float>*), align 8 +; AVX-NEXT: store <8 x float> [[TMP2]], <8 x float>* bitcast ([16 x float]* @dst32 to <8 x float>*), align 64 ; AVX-NEXT: ret void ; - %ld0 = load i8, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @src8, i32 0, i64 0), align 8 - %ld1 = load i8, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @src8, i32 0, i64 1), align 8 - %ld2 = load i8, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @src8, i32 0, i64 2), align 8 - %ld3 = load i8, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @src8, i32 0, i64 3), align 8 - %ld4 = load i8, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @src8, i32 0, i64 4), align 8 - %ld5 = load i8, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @src8, i32 0, i64 5), align 8 - %ld6 = load i8, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @src8, i32 0, i64 6), align 8 - %ld7 = load i8, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @src8, i32 0, i64 7), align 8 + %ld0 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @src8, i32 0, i64 0), align 64 + %ld1 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @src8, i32 0, i64 1), align 1 + %ld2 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @src8, i32 0, i64 2), align 2 + %ld3 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @src8, i32 0, i64 3), align 1 + %ld4 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @src8, i32 0, i64 4), align 4 + %ld5 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @src8, i32 0, i64 5), align 1 + %ld6 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @src8, i32 0, i64 6), align 2 + %ld7 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @src8, i32 0, i64 7), align 1 %cvt0 = uitofp i8 %ld0 to float %cvt1 = uitofp i8 %ld1 to float %cvt2 = uitofp i8 %ld2 to float @@ -473,14 +473,14 @@ define void @uitofp_8i8_8f32() #0 { %cvt5 = uitofp i8 %ld5 to float %cvt6 = uitofp i8 %ld6 to float %cvt7 = uitofp i8 %ld7 to float - store float %cvt0, float* getelementptr inbounds ([8 x float], [8 x float]* @dst32, i32 0, i64 0), align 8 - store float %cvt1, float* getelementptr inbounds ([8 x float], [8 x float]* @dst32, i32 0, i64 1), align 8 - store float %cvt2, float* getelementptr inbounds ([8 x float], [8 x float]* @dst32, i32 0, i64 2), align 8 - store float %cvt3, float* getelementptr inbounds ([8 x float], [8 x float]* @dst32, i32 0, i64 3), align 8 - store float %cvt4, float* getelementptr inbounds ([8 x float], [8 x float]* @dst32, i32 0, i64 4), align 8 - store float %cvt5, float* getelementptr inbounds ([8 x float], [8 x float]* @dst32, i32 0, i64 5), align 8 - store float %cvt6, float* getelementptr inbounds ([8 x float], [8 x float]* @dst32, i32 0, i64 6), align 8 - store float %cvt7, float* getelementptr inbounds ([8 x float], [8 x float]* @dst32, i32 0, i64 7), align 8 + store float %cvt0, float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 0), align 64 + store float %cvt1, float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 1), align 4 + store float %cvt2, float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 2), align 8 + store float %cvt3, float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 3), align 4 + store float %cvt4, float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 4), align 16 + store float %cvt5, float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 5), align 4 + store float %cvt6, float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 6), align 8 + store float %cvt7, float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 7), align 4 ret void }