}
void ReductionCodeGen::emitAggregateType(CodeGenFunction &CGF, unsigned N) {
- const auto *PrivateVD =
- cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Private)->getDecl());
- QualType PrivateType = PrivateVD->getType();
+ QualType PrivateType = getPrivateType(N);
bool AsArraySection = isa<OMPArraySectionExpr>(ClausesData[N].Ref);
if (!PrivateType->isVariablyModifiedType()) {
Sizes.emplace_back(
void ReductionCodeGen::emitAggregateType(CodeGenFunction &CGF, unsigned N,
llvm::Value *Size) {
- const auto *PrivateVD =
- cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Private)->getDecl());
- QualType PrivateType = PrivateVD->getType();
+ QualType PrivateType = getPrivateType(N);
if (!PrivateType->isVariablyModifiedType()) {
assert(!Size && !Sizes[N].second &&
"Size should be nullptr for non-variably modified reduction "
cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Private)->getDecl());
const OMPDeclareReductionDecl *DRD =
getReductionInit(ClausesData[N].ReductionOp);
- QualType PrivateType = PrivateVD->getType();
- PrivateAddr = CGF.Builder.CreateElementBitCast(
- PrivateAddr, CGF.ConvertTypeForMem(PrivateType));
if (CGF.getContext().getAsArrayType(PrivateVD->getType())) {
if (DRD && DRD->getInitializer())
(void)DefaultInit(CGF);
}
bool ReductionCodeGen::needCleanups(unsigned N) {
- const auto *PrivateVD =
- cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Private)->getDecl());
- QualType PrivateType = PrivateVD->getType();
+ QualType PrivateType = getPrivateType(N);
QualType::DestructionKind DTorKind = PrivateType.isDestructedType();
return DTorKind != QualType::DK_none;
}
void ReductionCodeGen::emitCleanups(CodeGenFunction &CGF, unsigned N,
Address PrivateAddr) {
- const auto *PrivateVD =
- cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Private)->getDecl());
- QualType PrivateType = PrivateVD->getType();
+ QualType PrivateType = getPrivateType(N);
QualType::DestructionKind DTorKind = PrivateType.isDestructedType();
if (needCleanups(N)) {
PrivateAddr = CGF.Builder.CreateElementBitCast(
Fn->setDoesNotRecurse();
CodeGenFunction CGF(CGM);
CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, FnInfo, Args, Loc, Loc);
+ QualType PrivateType = RCG.getPrivateType(N);
Address PrivateAddr = CGF.EmitLoadOfPointer(
- CGF.GetAddrOfLocalVar(&Param),
- C.getPointerType(C.VoidPtrTy).castAs<PointerType>());
+ CGF.Builder.CreateElementBitCast(
+ CGF.GetAddrOfLocalVar(&Param),
+ CGF.ConvertTypeForMem(PrivateType)->getPointerTo()),
+ C.getPointerType(PrivateType)->castAs<PointerType>());
llvm::Value *Size = nullptr;
// If the size of the reduction item is non-constant, load it from global
// threadprivate variable.
/// Returns true if the initialization of the reduction item uses initializer
/// from declare reduction construct.
bool usesReductionInitializer(unsigned N) const;
+ /// Return the type of the private item.
+ QualType getPrivateType(unsigned N) const {
+ return cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Private)->getDecl())
+ ->getType();
+ }
};
class CGOpenMPRuntime {
// CHECK1-NEXT: [[DOTADDR1:%.*]] = alloca i8*, align 8
// CHECK1-NEXT: store i8* [[TMP0]], i8** [[DOTADDR]], align 8
// CHECK1-NEXT: store i8* [[TMP1]], i8** [[DOTADDR1]], align 8
-// CHECK1-NEXT: [[TMP2:%.*]] = load i8*, i8** [[DOTADDR]], align 8
-// CHECK1-NEXT: [[TMP3:%.*]] = bitcast i8* [[TMP2]] to i32*
-// CHECK1-NEXT: store i32 0, i32* [[TMP3]], align 8
+// CHECK1-NEXT: [[TMP2:%.*]] = bitcast i8** [[DOTADDR]] to i32**
+// CHECK1-NEXT: [[TMP3:%.*]] = load i32*, i32** [[TMP2]], align 8
+// CHECK1-NEXT: store i32 0, i32* [[TMP3]], align 4
// CHECK1-NEXT: ret void
//
//
// CHECK2-NEXT: [[DOTADDR1:%.*]] = alloca i8*, align 8
// CHECK2-NEXT: store i8* [[TMP0]], i8** [[DOTADDR]], align 8
// CHECK2-NEXT: store i8* [[TMP1]], i8** [[DOTADDR1]], align 8
-// CHECK2-NEXT: [[TMP2:%.*]] = load i8*, i8** [[DOTADDR]], align 8
-// CHECK2-NEXT: [[TMP3:%.*]] = bitcast i8* [[TMP2]] to i32*
-// CHECK2-NEXT: store i32 0, i32* [[TMP3]], align 8
+// CHECK2-NEXT: [[TMP2:%.*]] = bitcast i8** [[DOTADDR]] to i32**
+// CHECK2-NEXT: [[TMP3:%.*]] = load i32*, i32** [[TMP2]], align 8
+// CHECK2-NEXT: store i32 0, i32* [[TMP3]], align 4
// CHECK2-NEXT: ret void
//
//
// CHECK1-NEXT: [[DOTADDR1:%.*]] = alloca i8*, align 8
// CHECK1-NEXT: store i8* [[TMP0]], i8** [[DOTADDR]], align 8
// CHECK1-NEXT: store i8* [[TMP1]], i8** [[DOTADDR1]], align 8
-// CHECK1-NEXT: [[TMP2:%.*]] = load i8*, i8** [[DOTADDR]], align 8
-// CHECK1-NEXT: [[TMP3:%.*]] = bitcast i8* [[TMP2]] to i32*
-// CHECK1-NEXT: store i32 0, i32* [[TMP3]], align 8
+// CHECK1-NEXT: [[TMP2:%.*]] = bitcast i8** [[DOTADDR]] to i32**
+// CHECK1-NEXT: [[TMP3:%.*]] = load i32*, i32** [[TMP2]], align 8
+// CHECK1-NEXT: store i32 0, i32* [[TMP3]], align 4
// CHECK1-NEXT: ret void
//
//
// CHECK2-NEXT: [[DOTADDR1:%.*]] = alloca i8*, align 8
// CHECK2-NEXT: store i8* [[TMP0]], i8** [[DOTADDR]], align 8
// CHECK2-NEXT: store i8* [[TMP1]], i8** [[DOTADDR1]], align 8
-// CHECK2-NEXT: [[TMP2:%.*]] = load i8*, i8** [[DOTADDR]], align 8
-// CHECK2-NEXT: [[TMP3:%.*]] = bitcast i8* [[TMP2]] to i32*
-// CHECK2-NEXT: store i32 0, i32* [[TMP3]], align 8
+// CHECK2-NEXT: [[TMP2:%.*]] = bitcast i8** [[DOTADDR]] to i32**
+// CHECK2-NEXT: [[TMP3:%.*]] = load i32*, i32** [[TMP2]], align 8
+// CHECK2-NEXT: store i32 0, i32* [[TMP3]], align 4
// CHECK2-NEXT: ret void
//
//
// CHECK1-NEXT: [[DOTADDR1:%.*]] = alloca i8*, align 8
// CHECK1-NEXT: store i8* [[TMP0]], i8** [[DOTADDR]], align 8
// CHECK1-NEXT: store i8* [[TMP1]], i8** [[DOTADDR1]], align 8
-// CHECK1-NEXT: [[TMP2:%.*]] = load i8*, i8** [[DOTADDR]], align 8
-// CHECK1-NEXT: [[TMP3:%.*]] = bitcast i8* [[TMP2]] to i32*
-// CHECK1-NEXT: store i32 0, i32* [[TMP3]], align 8
+// CHECK1-NEXT: [[TMP2:%.*]] = bitcast i8** [[DOTADDR]] to i32**
+// CHECK1-NEXT: [[TMP3:%.*]] = load i32*, i32** [[TMP2]], align 8
+// CHECK1-NEXT: store i32 0, i32* [[TMP3]], align 4
// CHECK1-NEXT: ret void
//
//
// CHECK1-NEXT: [[DOTADDR1:%.*]] = alloca i8*, align 8
// CHECK1-NEXT: store i8* [[TMP0]], i8** [[DOTADDR]], align 8
// CHECK1-NEXT: store i8* [[TMP1]], i8** [[DOTADDR1]], align 8
-// CHECK1-NEXT: [[TMP2:%.*]] = load i8*, i8** [[DOTADDR]], align 8
-// CHECK1-NEXT: [[TMP3:%.*]] = bitcast i8* [[TMP2]] to float*
-// CHECK1-NEXT: store float 0.000000e+00, float* [[TMP3]], align 8
+// CHECK1-NEXT: [[TMP2:%.*]] = bitcast i8** [[DOTADDR]] to float**
+// CHECK1-NEXT: [[TMP3:%.*]] = load float*, float** [[TMP2]], align 8
+// CHECK1-NEXT: store float 0.000000e+00, float* [[TMP3]], align 4
// CHECK1-NEXT: ret void
//
//
// CHECK1-NEXT: [[DOTADDR1:%.*]] = alloca i8*, align 8
// CHECK1-NEXT: store i8* [[TMP0]], i8** [[DOTADDR]], align 8
// CHECK1-NEXT: store i8* [[TMP1]], i8** [[DOTADDR1]], align 8
-// CHECK1-NEXT: [[TMP2:%.*]] = load i8*, i8** [[DOTADDR]], align 8
-// CHECK1-NEXT: [[TMP3:%.*]] = bitcast i8* [[TMP2]] to i32*
-// CHECK1-NEXT: store i32 0, i32* [[TMP3]], align 8
+// CHECK1-NEXT: [[TMP2:%.*]] = bitcast i8** [[DOTADDR]] to i32**
+// CHECK1-NEXT: [[TMP3:%.*]] = load i32*, i32** [[TMP2]], align 8
+// CHECK1-NEXT: store i32 0, i32* [[TMP3]], align 4
// CHECK1-NEXT: ret void
//
//
// CHECK1-NEXT: [[DOTADDR1:%.*]] = alloca i8*, align 8
// CHECK1-NEXT: store i8* [[TMP0]], i8** [[DOTADDR]], align 8
// CHECK1-NEXT: store i8* [[TMP1]], i8** [[DOTADDR1]], align 8
-// CHECK1-NEXT: [[TMP2:%.*]] = load i8*, i8** [[DOTADDR]], align 8
-// CHECK1-NEXT: [[TMP3:%.*]] = bitcast i8* [[TMP2]] to [5 x %struct.S]*
+// CHECK1-NEXT: [[TMP2:%.*]] = bitcast i8** [[DOTADDR]] to [5 x %struct.S]**
+// CHECK1-NEXT: [[TMP3:%.*]] = load [5 x %struct.S]*, [5 x %struct.S]** [[TMP2]], align 8
// CHECK1-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [5 x %struct.S], [5 x %struct.S]* [[TMP3]], i32 0, i32 0
// CHECK1-NEXT: [[TMP4:%.*]] = getelementptr [[STRUCT_S:%.*]], %struct.S* [[ARRAY_BEGIN]], i64 5
// CHECK1-NEXT: [[OMP_ARRAYINIT_ISEMPTY:%.*]] = icmp eq %struct.S* [[ARRAY_BEGIN]], [[TMP4]]
// CHECK1-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]])
// CHECK1-NEXT: store i8* [[TMP0]], i8** [[DOTADDR]], align 8
// CHECK1-NEXT: store i8* [[TMP1]], i8** [[DOTADDR1]], align 8
-// CHECK1-NEXT: [[TMP3:%.*]] = load i8*, i8** [[DOTADDR]], align 8
-// CHECK1-NEXT: [[TMP4:%.*]] = call i8* @__kmpc_threadprivate_cached(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]], i8* bitcast (i64* @{{reduction_size[.].+[.]}})
-// CHECK1-NEXT: [[TMP5:%.*]] = bitcast i8* [[TMP4]] to i64*
-// CHECK1-NEXT: [[TMP6:%.*]] = load i64, i64* [[TMP5]], align 8
-// CHECK1-NEXT: [[TMP7:%.*]] = bitcast i8* [[TMP3]] to i16*
-// CHECK1-NEXT: [[TMP8:%.*]] = getelementptr i16, i16* [[TMP7]], i64 [[TMP6]]
-// CHECK1-NEXT: [[OMP_ARRAYINIT_ISEMPTY:%.*]] = icmp eq i16* [[TMP7]], [[TMP8]]
+// CHECK1-NEXT: [[TMP3:%.*]] = bitcast i8** [[DOTADDR]] to i16**
+// CHECK1-NEXT: [[TMP4:%.*]] = load i16*, i16** [[TMP3]], align 8
+// CHECK1-NEXT: [[TMP5:%.*]] = call i8* @__kmpc_threadprivate_cached(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]], i8* bitcast (i64* @{{reduction_size[.].+[.]}})
+// CHECK1-NEXT: [[TMP6:%.*]] = bitcast i8* [[TMP5]] to i64*
+// CHECK1-NEXT: [[TMP7:%.*]] = load i64, i64* [[TMP6]], align 8
+// CHECK1-NEXT: [[TMP8:%.*]] = getelementptr i16, i16* [[TMP4]], i64 [[TMP7]]
+// CHECK1-NEXT: [[OMP_ARRAYINIT_ISEMPTY:%.*]] = icmp eq i16* [[TMP4]], [[TMP8]]
// CHECK1-NEXT: br i1 [[OMP_ARRAYINIT_ISEMPTY]], label [[OMP_ARRAYINIT_DONE:%.*]], label [[OMP_ARRAYINIT_BODY:%.*]]
// CHECK1: omp.arrayinit.body:
-// CHECK1-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST:%.*]] = phi i16* [ [[TMP7]], [[ENTRY:%.*]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT:%.*]], [[OMP_ARRAYINIT_BODY]] ]
+// CHECK1-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST:%.*]] = phi i16* [ [[TMP4]], [[ENTRY:%.*]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT:%.*]], [[OMP_ARRAYINIT_BODY]] ]
// CHECK1-NEXT: store i16 0, i16* [[OMP_ARRAYCPY_DESTELEMENTPAST]], align 2
// CHECK1-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT]] = getelementptr i16, i16* [[OMP_ARRAYCPY_DESTELEMENTPAST]], i32 1
// CHECK1-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq i16* [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP8]]
// CHECK2-NEXT: [[DOTADDR1:%.*]] = alloca i8*, align 8
// CHECK2-NEXT: store i8* [[TMP0]], i8** [[DOTADDR]], align 8
// CHECK2-NEXT: store i8* [[TMP1]], i8** [[DOTADDR1]], align 8
-// CHECK2-NEXT: [[TMP2:%.*]] = load i8*, i8** [[DOTADDR]], align 8
-// CHECK2-NEXT: [[TMP3:%.*]] = bitcast i8* [[TMP2]] to i32*
-// CHECK2-NEXT: store i32 0, i32* [[TMP3]], align 8
+// CHECK2-NEXT: [[TMP2:%.*]] = bitcast i8** [[DOTADDR]] to i32**
+// CHECK2-NEXT: [[TMP3:%.*]] = load i32*, i32** [[TMP2]], align 8
+// CHECK2-NEXT: store i32 0, i32* [[TMP3]], align 4
// CHECK2-NEXT: ret void
//
//
// CHECK2-NEXT: [[DOTADDR1:%.*]] = alloca i8*, align 8
// CHECK2-NEXT: store i8* [[TMP0]], i8** [[DOTADDR]], align 8
// CHECK2-NEXT: store i8* [[TMP1]], i8** [[DOTADDR1]], align 8
-// CHECK2-NEXT: [[TMP2:%.*]] = load i8*, i8** [[DOTADDR]], align 8
-// CHECK2-NEXT: [[TMP3:%.*]] = bitcast i8* [[TMP2]] to float*
-// CHECK2-NEXT: store float 0.000000e+00, float* [[TMP3]], align 8
+// CHECK2-NEXT: [[TMP2:%.*]] = bitcast i8** [[DOTADDR]] to float**
+// CHECK2-NEXT: [[TMP3:%.*]] = load float*, float** [[TMP2]], align 8
+// CHECK2-NEXT: store float 0.000000e+00, float* [[TMP3]], align 4
// CHECK2-NEXT: ret void
//
//
// CHECK2-NEXT: [[DOTADDR1:%.*]] = alloca i8*, align 8
// CHECK2-NEXT: store i8* [[TMP0]], i8** [[DOTADDR]], align 8
// CHECK2-NEXT: store i8* [[TMP1]], i8** [[DOTADDR1]], align 8
-// CHECK2-NEXT: [[TMP2:%.*]] = load i8*, i8** [[DOTADDR]], align 8
-// CHECK2-NEXT: [[TMP3:%.*]] = bitcast i8* [[TMP2]] to i32*
-// CHECK2-NEXT: store i32 0, i32* [[TMP3]], align 8
+// CHECK2-NEXT: [[TMP2:%.*]] = bitcast i8** [[DOTADDR]] to i32**
+// CHECK2-NEXT: [[TMP3:%.*]] = load i32*, i32** [[TMP2]], align 8
+// CHECK2-NEXT: store i32 0, i32* [[TMP3]], align 4
// CHECK2-NEXT: ret void
//
//
// CHECK2-NEXT: [[DOTADDR1:%.*]] = alloca i8*, align 8
// CHECK2-NEXT: store i8* [[TMP0]], i8** [[DOTADDR]], align 8
// CHECK2-NEXT: store i8* [[TMP1]], i8** [[DOTADDR1]], align 8
-// CHECK2-NEXT: [[TMP2:%.*]] = load i8*, i8** [[DOTADDR]], align 8
-// CHECK2-NEXT: [[TMP3:%.*]] = bitcast i8* [[TMP2]] to [5 x %struct.S]*
+// CHECK2-NEXT: [[TMP2:%.*]] = bitcast i8** [[DOTADDR]] to [5 x %struct.S]**
+// CHECK2-NEXT: [[TMP3:%.*]] = load [5 x %struct.S]*, [5 x %struct.S]** [[TMP2]], align 8
// CHECK2-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [5 x %struct.S], [5 x %struct.S]* [[TMP3]], i32 0, i32 0
// CHECK2-NEXT: [[TMP4:%.*]] = getelementptr [[STRUCT_S:%.*]], %struct.S* [[ARRAY_BEGIN]], i64 5
// CHECK2-NEXT: [[OMP_ARRAYINIT_ISEMPTY:%.*]] = icmp eq %struct.S* [[ARRAY_BEGIN]], [[TMP4]]
// CHECK2-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]])
// CHECK2-NEXT: store i8* [[TMP0]], i8** [[DOTADDR]], align 8
// CHECK2-NEXT: store i8* [[TMP1]], i8** [[DOTADDR1]], align 8
-// CHECK2-NEXT: [[TMP3:%.*]] = load i8*, i8** [[DOTADDR]], align 8
-// CHECK2-NEXT: [[TMP4:%.*]] = call i8* @__kmpc_threadprivate_cached(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]], i8* bitcast (i64* @{{reduction_size[.].+[.]}})
-// CHECK2-NEXT: [[TMP5:%.*]] = bitcast i8* [[TMP4]] to i64*
-// CHECK2-NEXT: [[TMP6:%.*]] = load i64, i64* [[TMP5]], align 8
-// CHECK2-NEXT: [[TMP7:%.*]] = bitcast i8* [[TMP3]] to i16*
-// CHECK2-NEXT: [[TMP8:%.*]] = getelementptr i16, i16* [[TMP7]], i64 [[TMP6]]
-// CHECK2-NEXT: [[OMP_ARRAYINIT_ISEMPTY:%.*]] = icmp eq i16* [[TMP7]], [[TMP8]]
+// CHECK2-NEXT: [[TMP3:%.*]] = bitcast i8** [[DOTADDR]] to i16**
+// CHECK2-NEXT: [[TMP4:%.*]] = load i16*, i16** [[TMP3]], align 8
+// CHECK2-NEXT: [[TMP5:%.*]] = call i8* @__kmpc_threadprivate_cached(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]], i8* bitcast (i64* @{{reduction_size[.].+[.]}})
+// CHECK2-NEXT: [[TMP6:%.*]] = bitcast i8* [[TMP5]] to i64*
+// CHECK2-NEXT: [[TMP7:%.*]] = load i64, i64* [[TMP6]], align 8
+// CHECK2-NEXT: [[TMP8:%.*]] = getelementptr i16, i16* [[TMP4]], i64 [[TMP7]]
+// CHECK2-NEXT: [[OMP_ARRAYINIT_ISEMPTY:%.*]] = icmp eq i16* [[TMP4]], [[TMP8]]
// CHECK2-NEXT: br i1 [[OMP_ARRAYINIT_ISEMPTY]], label [[OMP_ARRAYINIT_DONE:%.*]], label [[OMP_ARRAYINIT_BODY:%.*]]
// CHECK2: omp.arrayinit.body:
-// CHECK2-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST:%.*]] = phi i16* [ [[TMP7]], [[ENTRY:%.*]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT:%.*]], [[OMP_ARRAYINIT_BODY]] ]
+// CHECK2-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST:%.*]] = phi i16* [ [[TMP4]], [[ENTRY:%.*]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT:%.*]], [[OMP_ARRAYINIT_BODY]] ]
// CHECK2-NEXT: store i16 0, i16* [[OMP_ARRAYCPY_DESTELEMENTPAST]], align 2
// CHECK2-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT]] = getelementptr i16, i16* [[OMP_ARRAYCPY_DESTELEMENTPAST]], i32 1
// CHECK2-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq i16* [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP8]]
// CHECK1-NEXT: [[DOTADDR1:%.*]] = alloca i8*, align 8
// CHECK1-NEXT: store i8* [[TMP0]], i8** [[DOTADDR]], align 8
// CHECK1-NEXT: store i8* [[TMP1]], i8** [[DOTADDR1]], align 8
-// CHECK1-NEXT: [[TMP2:%.*]] = load i8*, i8** [[DOTADDR]], align 8
-// CHECK1-NEXT: [[TMP3:%.*]] = bitcast i8* [[TMP2]] to i32*
-// CHECK1-NEXT: store i32 0, i32* [[TMP3]], align 8
+// CHECK1-NEXT: [[TMP2:%.*]] = bitcast i8** [[DOTADDR]] to i32**
+// CHECK1-NEXT: [[TMP3:%.*]] = load i32*, i32** [[TMP2]], align 8
+// CHECK1-NEXT: store i32 0, i32* [[TMP3]], align 4
// CHECK1-NEXT: ret void
//
//
// CHECK1-NEXT: [[DOTADDR1:%.*]] = alloca i8*, align 8
// CHECK1-NEXT: store i8* [[TMP0]], i8** [[DOTADDR]], align 8
// CHECK1-NEXT: store i8* [[TMP1]], i8** [[DOTADDR1]], align 8
-// CHECK1-NEXT: [[TMP2:%.*]] = load i8*, i8** [[DOTADDR]], align 8
-// CHECK1-NEXT: [[TMP3:%.*]] = bitcast i8* [[TMP2]] to float*
-// CHECK1-NEXT: store float 0.000000e+00, float* [[TMP3]], align 8
+// CHECK1-NEXT: [[TMP2:%.*]] = bitcast i8** [[DOTADDR]] to float**
+// CHECK1-NEXT: [[TMP3:%.*]] = load float*, float** [[TMP2]], align 8
+// CHECK1-NEXT: store float 0.000000e+00, float* [[TMP3]], align 4
// CHECK1-NEXT: ret void
//
//
// CHECK1-NEXT: [[DOTADDR1:%.*]] = alloca i8*, align 8
// CHECK1-NEXT: store i8* [[TMP0]], i8** [[DOTADDR]], align 8
// CHECK1-NEXT: store i8* [[TMP1]], i8** [[DOTADDR1]], align 8
-// CHECK1-NEXT: [[TMP2:%.*]] = load i8*, i8** [[DOTADDR]], align 8
-// CHECK1-NEXT: [[TMP3:%.*]] = bitcast i8* [[TMP2]] to i32*
-// CHECK1-NEXT: store i32 0, i32* [[TMP3]], align 8
+// CHECK1-NEXT: [[TMP2:%.*]] = bitcast i8** [[DOTADDR]] to i32**
+// CHECK1-NEXT: [[TMP3:%.*]] = load i32*, i32** [[TMP2]], align 8
+// CHECK1-NEXT: store i32 0, i32* [[TMP3]], align 4
// CHECK1-NEXT: ret void
//
//
// CHECK1-NEXT: [[DOTADDR1:%.*]] = alloca i8*, align 8
// CHECK1-NEXT: store i8* [[TMP0]], i8** [[DOTADDR]], align 8
// CHECK1-NEXT: store i8* [[TMP1]], i8** [[DOTADDR1]], align 8
-// CHECK1-NEXT: [[TMP2:%.*]] = load i8*, i8** [[DOTADDR]], align 8
-// CHECK1-NEXT: [[TMP3:%.*]] = bitcast i8* [[TMP2]] to [5 x %struct.S]*
+// CHECK1-NEXT: [[TMP2:%.*]] = bitcast i8** [[DOTADDR]] to [5 x %struct.S]**
+// CHECK1-NEXT: [[TMP3:%.*]] = load [5 x %struct.S]*, [5 x %struct.S]** [[TMP2]], align 8
// CHECK1-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [5 x %struct.S], [5 x %struct.S]* [[TMP3]], i32 0, i32 0
// CHECK1-NEXT: [[TMP4:%.*]] = getelementptr [[STRUCT_S:%.*]], %struct.S* [[ARRAY_BEGIN]], i64 5
// CHECK1-NEXT: [[OMP_ARRAYINIT_ISEMPTY:%.*]] = icmp eq %struct.S* [[ARRAY_BEGIN]], [[TMP4]]
// CHECK1-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]])
// CHECK1-NEXT: store i8* [[TMP0]], i8** [[DOTADDR]], align 8
// CHECK1-NEXT: store i8* [[TMP1]], i8** [[DOTADDR1]], align 8
-// CHECK1-NEXT: [[TMP3:%.*]] = load i8*, i8** [[DOTADDR]], align 8
-// CHECK1-NEXT: [[TMP4:%.*]] = call i8* @__kmpc_threadprivate_cached(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]], i8* bitcast (i64* @{{reduction_size[.].+[.]}})
-// CHECK1-NEXT: [[TMP5:%.*]] = bitcast i8* [[TMP4]] to i64*
-// CHECK1-NEXT: [[TMP6:%.*]] = load i64, i64* [[TMP5]], align 8
-// CHECK1-NEXT: [[TMP7:%.*]] = bitcast i8* [[TMP3]] to i16*
-// CHECK1-NEXT: [[TMP8:%.*]] = getelementptr i16, i16* [[TMP7]], i64 [[TMP6]]
-// CHECK1-NEXT: [[OMP_ARRAYINIT_ISEMPTY:%.*]] = icmp eq i16* [[TMP7]], [[TMP8]]
+// CHECK1-NEXT: [[TMP3:%.*]] = bitcast i8** [[DOTADDR]] to i16**
+// CHECK1-NEXT: [[TMP4:%.*]] = load i16*, i16** [[TMP3]], align 8
+// CHECK1-NEXT: [[TMP5:%.*]] = call i8* @__kmpc_threadprivate_cached(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]], i8* bitcast (i64* @{{reduction_size[.].+[.]}})
+// CHECK1-NEXT: [[TMP6:%.*]] = bitcast i8* [[TMP5]] to i64*
+// CHECK1-NEXT: [[TMP7:%.*]] = load i64, i64* [[TMP6]], align 8
+// CHECK1-NEXT: [[TMP8:%.*]] = getelementptr i16, i16* [[TMP4]], i64 [[TMP7]]
+// CHECK1-NEXT: [[OMP_ARRAYINIT_ISEMPTY:%.*]] = icmp eq i16* [[TMP4]], [[TMP8]]
// CHECK1-NEXT: br i1 [[OMP_ARRAYINIT_ISEMPTY]], label [[OMP_ARRAYINIT_DONE:%.*]], label [[OMP_ARRAYINIT_BODY:%.*]]
// CHECK1: omp.arrayinit.body:
-// CHECK1-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST:%.*]] = phi i16* [ [[TMP7]], [[ENTRY:%.*]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT:%.*]], [[OMP_ARRAYINIT_BODY]] ]
+// CHECK1-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST:%.*]] = phi i16* [ [[TMP4]], [[ENTRY:%.*]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT:%.*]], [[OMP_ARRAYINIT_BODY]] ]
// CHECK1-NEXT: store i16 0, i16* [[OMP_ARRAYCPY_DESTELEMENTPAST]], align 2
// CHECK1-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT]] = getelementptr i16, i16* [[OMP_ARRAYCPY_DESTELEMENTPAST]], i32 1
// CHECK1-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq i16* [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP8]]
// CHECK2-NEXT: [[DOTADDR1:%.*]] = alloca i8*, align 8
// CHECK2-NEXT: store i8* [[TMP0]], i8** [[DOTADDR]], align 8
// CHECK2-NEXT: store i8* [[TMP1]], i8** [[DOTADDR1]], align 8
-// CHECK2-NEXT: [[TMP2:%.*]] = load i8*, i8** [[DOTADDR]], align 8
-// CHECK2-NEXT: [[TMP3:%.*]] = bitcast i8* [[TMP2]] to i32*
-// CHECK2-NEXT: store i32 0, i32* [[TMP3]], align 8
+// CHECK2-NEXT: [[TMP2:%.*]] = bitcast i8** [[DOTADDR]] to i32**
+// CHECK2-NEXT: [[TMP3:%.*]] = load i32*, i32** [[TMP2]], align 8
+// CHECK2-NEXT: store i32 0, i32* [[TMP3]], align 4
// CHECK2-NEXT: ret void
//
//
// CHECK2-NEXT: [[DOTADDR1:%.*]] = alloca i8*, align 8
// CHECK2-NEXT: store i8* [[TMP0]], i8** [[DOTADDR]], align 8
// CHECK2-NEXT: store i8* [[TMP1]], i8** [[DOTADDR1]], align 8
-// CHECK2-NEXT: [[TMP2:%.*]] = load i8*, i8** [[DOTADDR]], align 8
-// CHECK2-NEXT: [[TMP3:%.*]] = bitcast i8* [[TMP2]] to float*
-// CHECK2-NEXT: store float 0.000000e+00, float* [[TMP3]], align 8
+// CHECK2-NEXT: [[TMP2:%.*]] = bitcast i8** [[DOTADDR]] to float**
+// CHECK2-NEXT: [[TMP3:%.*]] = load float*, float** [[TMP2]], align 8
+// CHECK2-NEXT: store float 0.000000e+00, float* [[TMP3]], align 4
// CHECK2-NEXT: ret void
//
//
// CHECK2-NEXT: [[DOTADDR1:%.*]] = alloca i8*, align 8
// CHECK2-NEXT: store i8* [[TMP0]], i8** [[DOTADDR]], align 8
// CHECK2-NEXT: store i8* [[TMP1]], i8** [[DOTADDR1]], align 8
-// CHECK2-NEXT: [[TMP2:%.*]] = load i8*, i8** [[DOTADDR]], align 8
-// CHECK2-NEXT: [[TMP3:%.*]] = bitcast i8* [[TMP2]] to i32*
-// CHECK2-NEXT: store i32 0, i32* [[TMP3]], align 8
+// CHECK2-NEXT: [[TMP2:%.*]] = bitcast i8** [[DOTADDR]] to i32**
+// CHECK2-NEXT: [[TMP3:%.*]] = load i32*, i32** [[TMP2]], align 8
+// CHECK2-NEXT: store i32 0, i32* [[TMP3]], align 4
// CHECK2-NEXT: ret void
//
//
// CHECK2-NEXT: [[DOTADDR1:%.*]] = alloca i8*, align 8
// CHECK2-NEXT: store i8* [[TMP0]], i8** [[DOTADDR]], align 8
// CHECK2-NEXT: store i8* [[TMP1]], i8** [[DOTADDR1]], align 8
-// CHECK2-NEXT: [[TMP2:%.*]] = load i8*, i8** [[DOTADDR]], align 8
-// CHECK2-NEXT: [[TMP3:%.*]] = bitcast i8* [[TMP2]] to [5 x %struct.S]*
+// CHECK2-NEXT: [[TMP2:%.*]] = bitcast i8** [[DOTADDR]] to [5 x %struct.S]**
+// CHECK2-NEXT: [[TMP3:%.*]] = load [5 x %struct.S]*, [5 x %struct.S]** [[TMP2]], align 8
// CHECK2-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [5 x %struct.S], [5 x %struct.S]* [[TMP3]], i32 0, i32 0
// CHECK2-NEXT: [[TMP4:%.*]] = getelementptr [[STRUCT_S:%.*]], %struct.S* [[ARRAY_BEGIN]], i64 5
// CHECK2-NEXT: [[OMP_ARRAYINIT_ISEMPTY:%.*]] = icmp eq %struct.S* [[ARRAY_BEGIN]], [[TMP4]]
// CHECK2-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]])
// CHECK2-NEXT: store i8* [[TMP0]], i8** [[DOTADDR]], align 8
// CHECK2-NEXT: store i8* [[TMP1]], i8** [[DOTADDR1]], align 8
-// CHECK2-NEXT: [[TMP3:%.*]] = load i8*, i8** [[DOTADDR]], align 8
-// CHECK2-NEXT: [[TMP4:%.*]] = call i8* @__kmpc_threadprivate_cached(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]], i8* bitcast (i64* @{{reduction_size[.].+[.]}})
-// CHECK2-NEXT: [[TMP5:%.*]] = bitcast i8* [[TMP4]] to i64*
-// CHECK2-NEXT: [[TMP6:%.*]] = load i64, i64* [[TMP5]], align 8
-// CHECK2-NEXT: [[TMP7:%.*]] = bitcast i8* [[TMP3]] to i16*
-// CHECK2-NEXT: [[TMP8:%.*]] = getelementptr i16, i16* [[TMP7]], i64 [[TMP6]]
-// CHECK2-NEXT: [[OMP_ARRAYINIT_ISEMPTY:%.*]] = icmp eq i16* [[TMP7]], [[TMP8]]
+// CHECK2-NEXT: [[TMP3:%.*]] = bitcast i8** [[DOTADDR]] to i16**
+// CHECK2-NEXT: [[TMP4:%.*]] = load i16*, i16** [[TMP3]], align 8
+// CHECK2-NEXT: [[TMP5:%.*]] = call i8* @__kmpc_threadprivate_cached(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]], i8* bitcast (i64* @{{reduction_size[.].+[.]}})
+// CHECK2-NEXT: [[TMP6:%.*]] = bitcast i8* [[TMP5]] to i64*
+// CHECK2-NEXT: [[TMP7:%.*]] = load i64, i64* [[TMP6]], align 8
+// CHECK2-NEXT: [[TMP8:%.*]] = getelementptr i16, i16* [[TMP4]], i64 [[TMP7]]
+// CHECK2-NEXT: [[OMP_ARRAYINIT_ISEMPTY:%.*]] = icmp eq i16* [[TMP4]], [[TMP8]]
// CHECK2-NEXT: br i1 [[OMP_ARRAYINIT_ISEMPTY]], label [[OMP_ARRAYINIT_DONE:%.*]], label [[OMP_ARRAYINIT_BODY:%.*]]
// CHECK2: omp.arrayinit.body:
-// CHECK2-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST:%.*]] = phi i16* [ [[TMP7]], [[ENTRY:%.*]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT:%.*]], [[OMP_ARRAYINIT_BODY]] ]
+// CHECK2-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST:%.*]] = phi i16* [ [[TMP4]], [[ENTRY:%.*]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT:%.*]], [[OMP_ARRAYINIT_BODY]] ]
// CHECK2-NEXT: store i16 0, i16* [[OMP_ARRAYCPY_DESTELEMENTPAST]], align 2
// CHECK2-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT]] = getelementptr i16, i16* [[OMP_ARRAYCPY_DESTELEMENTPAST]], i32 1
// CHECK2-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq i16* [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP8]]
// CHECK1-NEXT: [[DOTADDR1:%.*]] = alloca i8*, align 8
// CHECK1-NEXT: store i8* [[TMP0]], i8** [[DOTADDR]], align 8
// CHECK1-NEXT: store i8* [[TMP1]], i8** [[DOTADDR1]], align 8
-// CHECK1-NEXT: [[TMP2:%.*]] = load i8*, i8** [[DOTADDR]], align 8
-// CHECK1-NEXT: [[TMP3:%.*]] = bitcast i8* [[TMP2]] to i32*
-// CHECK1-NEXT: store i32 0, i32* [[TMP3]], align 8
+// CHECK1-NEXT: [[TMP2:%.*]] = bitcast i8** [[DOTADDR]] to i32**
+// CHECK1-NEXT: [[TMP3:%.*]] = load i32*, i32** [[TMP2]], align 8
+// CHECK1-NEXT: store i32 0, i32* [[TMP3]], align 4
// CHECK1-NEXT: ret void
//
//
// CHECK2-NEXT: [[DOTADDR1:%.*]] = alloca i8*, align 8
// CHECK2-NEXT: store i8* [[TMP0]], i8** [[DOTADDR]], align 8
// CHECK2-NEXT: store i8* [[TMP1]], i8** [[DOTADDR1]], align 8
-// CHECK2-NEXT: [[TMP2:%.*]] = load i8*, i8** [[DOTADDR]], align 8
-// CHECK2-NEXT: [[TMP3:%.*]] = bitcast i8* [[TMP2]] to i32*
-// CHECK2-NEXT: store i32 0, i32* [[TMP3]], align 8
+// CHECK2-NEXT: [[TMP2:%.*]] = bitcast i8** [[DOTADDR]] to i32**
+// CHECK2-NEXT: [[TMP3:%.*]] = load i32*, i32** [[TMP2]], align 8
+// CHECK2-NEXT: store i32 0, i32* [[TMP3]], align 4
// CHECK2-NEXT: ret void
//
//
// CHECK1-NEXT: [[DOTADDR1:%.*]] = alloca i8*, align 8
// CHECK1-NEXT: store i8* [[TMP0]], i8** [[DOTADDR]], align 8
// CHECK1-NEXT: store i8* [[TMP1]], i8** [[DOTADDR1]], align 8
-// CHECK1-NEXT: [[TMP2:%.*]] = load i8*, i8** [[DOTADDR]], align 8
-// CHECK1-NEXT: [[TMP3:%.*]] = bitcast i8* [[TMP2]] to i32*
-// CHECK1-NEXT: store i32 0, i32* [[TMP3]], align 8
+// CHECK1-NEXT: [[TMP2:%.*]] = bitcast i8** [[DOTADDR]] to i32**
+// CHECK1-NEXT: [[TMP3:%.*]] = load i32*, i32** [[TMP2]], align 8
+// CHECK1-NEXT: store i32 0, i32* [[TMP3]], align 4
// CHECK1-NEXT: ret void
//
//
// CHECK2-NEXT: [[DOTADDR1:%.*]] = alloca i8*, align 8
// CHECK2-NEXT: store i8* [[TMP0]], i8** [[DOTADDR]], align 8
// CHECK2-NEXT: store i8* [[TMP1]], i8** [[DOTADDR1]], align 8
-// CHECK2-NEXT: [[TMP2:%.*]] = load i8*, i8** [[DOTADDR]], align 8
-// CHECK2-NEXT: [[TMP3:%.*]] = bitcast i8* [[TMP2]] to i32*
-// CHECK2-NEXT: store i32 0, i32* [[TMP3]], align 8
+// CHECK2-NEXT: [[TMP2:%.*]] = bitcast i8** [[DOTADDR]] to i32**
+// CHECK2-NEXT: [[TMP3:%.*]] = load i32*, i32** [[TMP2]], align 8
+// CHECK2-NEXT: store i32 0, i32* [[TMP3]], align 4
// CHECK2-NEXT: ret void
//
//
// CHECK1-NEXT: [[DOTADDR1:%.*]] = alloca i8*, align 8
// CHECK1-NEXT: store i8* [[TMP0]], i8** [[DOTADDR]], align 8
// CHECK1-NEXT: store i8* [[TMP1]], i8** [[DOTADDR1]], align 8
-// CHECK1-NEXT: [[TMP2:%.*]] = load i8*, i8** [[DOTADDR]], align 8
-// CHECK1-NEXT: [[TMP3:%.*]] = bitcast i8* [[TMP2]] to i32*
-// CHECK1-NEXT: store i32 0, i32* [[TMP3]], align 8
+// CHECK1-NEXT: [[TMP2:%.*]] = bitcast i8** [[DOTADDR]] to i32**
+// CHECK1-NEXT: [[TMP3:%.*]] = load i32*, i32** [[TMP2]], align 8
+// CHECK1-NEXT: store i32 0, i32* [[TMP3]], align 4
// CHECK1-NEXT: ret void
//
//
// CHECK2-NEXT: [[DOTADDR1:%.*]] = alloca i8*, align 8
// CHECK2-NEXT: store i8* [[TMP0]], i8** [[DOTADDR]], align 8
// CHECK2-NEXT: store i8* [[TMP1]], i8** [[DOTADDR1]], align 8
-// CHECK2-NEXT: [[TMP2:%.*]] = load i8*, i8** [[DOTADDR]], align 8
-// CHECK2-NEXT: [[TMP3:%.*]] = bitcast i8* [[TMP2]] to i32*
-// CHECK2-NEXT: store i32 0, i32* [[TMP3]], align 8
+// CHECK2-NEXT: [[TMP2:%.*]] = bitcast i8** [[DOTADDR]] to i32**
+// CHECK2-NEXT: [[TMP3:%.*]] = load i32*, i32** [[TMP2]], align 8
+// CHECK2-NEXT: store i32 0, i32* [[TMP3]], align 4
// CHECK2-NEXT: ret void
//
//
// CHECK1-NEXT: [[DOTADDR1:%.*]] = alloca i8*, align 8
// CHECK1-NEXT: store i8* [[TMP0]], i8** [[DOTADDR]], align 8
// CHECK1-NEXT: store i8* [[TMP1]], i8** [[DOTADDR1]], align 8
-// CHECK1-NEXT: [[TMP2:%.*]] = load i8*, i8** [[DOTADDR]], align 8
-// CHECK1-NEXT: [[TMP3:%.*]] = bitcast i8* [[TMP2]] to i32*
-// CHECK1-NEXT: store i32 0, i32* [[TMP3]], align 8
+// CHECK1-NEXT: [[TMP2:%.*]] = bitcast i8** [[DOTADDR]] to i32**
+// CHECK1-NEXT: [[TMP3:%.*]] = load i32*, i32** [[TMP2]], align 8
+// CHECK1-NEXT: store i32 0, i32* [[TMP3]], align 4
// CHECK1-NEXT: ret void
//
//
// CHECK2-NEXT: [[DOTADDR1:%.*]] = alloca i8*, align 8
// CHECK2-NEXT: store i8* [[TMP0]], i8** [[DOTADDR]], align 8
// CHECK2-NEXT: store i8* [[TMP1]], i8** [[DOTADDR1]], align 8
-// CHECK2-NEXT: [[TMP2:%.*]] = load i8*, i8** [[DOTADDR]], align 8
-// CHECK2-NEXT: [[TMP3:%.*]] = bitcast i8* [[TMP2]] to i32*
-// CHECK2-NEXT: store i32 0, i32* [[TMP3]], align 8
+// CHECK2-NEXT: [[TMP2:%.*]] = bitcast i8** [[DOTADDR]] to i32**
+// CHECK2-NEXT: [[TMP3:%.*]] = load i32*, i32** [[TMP2]], align 8
+// CHECK2-NEXT: store i32 0, i32* [[TMP3]], align 4
// CHECK2-NEXT: ret void
//
//
// CHECK1-NEXT: [[DOTADDR1:%.*]] = alloca i8*, align 8
// CHECK1-NEXT: store i8* [[TMP0]], i8** [[DOTADDR]], align 8
// CHECK1-NEXT: store i8* [[TMP1]], i8** [[DOTADDR1]], align 8
-// CHECK1-NEXT: [[TMP2:%.*]] = load i8*, i8** [[DOTADDR]], align 8
-// CHECK1-NEXT: [[TMP3:%.*]] = load i64, i64* @{{reduction_size[.].+[.]}}, align 8
-// CHECK1-NEXT: [[TMP4:%.*]] = bitcast i8* [[TMP2]] to double*
-// CHECK1-NEXT: [[TMP5:%.*]] = getelementptr double, double* [[TMP4]], i64 [[TMP3]]
-// CHECK1-NEXT: [[OMP_ARRAYINIT_ISEMPTY:%.*]] = icmp eq double* [[TMP4]], [[TMP5]]
+// CHECK1-NEXT: [[TMP2:%.*]] = bitcast i8** [[DOTADDR]] to double**
+// CHECK1-NEXT: [[TMP3:%.*]] = load double*, double** [[TMP2]], align 8
+// CHECK1-NEXT: [[TMP4:%.*]] = load i64, i64* @{{reduction_size[.].+[.]}}, align 8
+// CHECK1-NEXT: [[TMP5:%.*]] = getelementptr double, double* [[TMP3]], i64 [[TMP4]]
+// CHECK1-NEXT: [[OMP_ARRAYINIT_ISEMPTY:%.*]] = icmp eq double* [[TMP3]], [[TMP5]]
// CHECK1-NEXT: br i1 [[OMP_ARRAYINIT_ISEMPTY]], label [[OMP_ARRAYINIT_DONE:%.*]], label [[OMP_ARRAYINIT_BODY:%.*]]
// CHECK1: omp.arrayinit.body:
-// CHECK1-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST:%.*]] = phi double* [ [[TMP4]], [[ENTRY:%.*]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT:%.*]], [[OMP_ARRAYINIT_BODY]] ]
+// CHECK1-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST:%.*]] = phi double* [ [[TMP3]], [[ENTRY:%.*]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT:%.*]], [[OMP_ARRAYINIT_BODY]] ]
// CHECK1-NEXT: store double 0.000000e+00, double* [[OMP_ARRAYCPY_DESTELEMENTPAST]], align 8
// CHECK1-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT]] = getelementptr double, double* [[OMP_ARRAYCPY_DESTELEMENTPAST]], i32 1
// CHECK1-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq double* [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP5]]
// CHECK1-NEXT: [[DOTADDR1:%.*]] = alloca i8*, align 8
// CHECK1-NEXT: store i8* [[TMP0]], i8** [[DOTADDR]], align 8
// CHECK1-NEXT: store i8* [[TMP1]], i8** [[DOTADDR1]], align 8
-// CHECK1-NEXT: [[TMP2:%.*]] = load i8*, i8** [[DOTADDR]], align 8
-// CHECK1-NEXT: [[TMP3:%.*]] = bitcast i8* [[TMP2]] to i32*
-// CHECK1-NEXT: store i32 0, i32* [[TMP3]], align 8
+// CHECK1-NEXT: [[TMP2:%.*]] = bitcast i8** [[DOTADDR]] to i32**
+// CHECK1-NEXT: [[TMP3:%.*]] = load i32*, i32** [[TMP2]], align 8
+// CHECK1-NEXT: store i32 0, i32* [[TMP3]], align 4
// CHECK1-NEXT: ret void
//
//
// CHECK2-NEXT: [[DOTADDR1:%.*]] = alloca i8*, align 8
// CHECK2-NEXT: store i8* [[TMP0]], i8** [[DOTADDR]], align 8
// CHECK2-NEXT: store i8* [[TMP1]], i8** [[DOTADDR1]], align 8
-// CHECK2-NEXT: [[TMP2:%.*]] = load i8*, i8** [[DOTADDR]], align 8
-// CHECK2-NEXT: [[TMP3:%.*]] = bitcast i8* [[TMP2]] to i32*
-// CHECK2-NEXT: store i32 0, i32* [[TMP3]], align 8
+// CHECK2-NEXT: [[TMP2:%.*]] = bitcast i8** [[DOTADDR]] to i32**
+// CHECK2-NEXT: [[TMP3:%.*]] = load i32*, i32** [[TMP2]], align 8
+// CHECK2-NEXT: store i32 0, i32* [[TMP3]], align 4
// CHECK2-NEXT: ret void
//
//
// CHECK1-NEXT: [[DOTADDR1:%.*]] = alloca i8*, align 8
// CHECK1-NEXT: store i8* [[TMP0]], i8** [[DOTADDR]], align 8
// CHECK1-NEXT: store i8* [[TMP1]], i8** [[DOTADDR1]], align 8
-// CHECK1-NEXT: [[TMP2:%.*]] = load i8*, i8** [[DOTADDR]], align 8
-// CHECK1-NEXT: [[TMP3:%.*]] = bitcast i8* [[TMP2]] to i32*
-// CHECK1-NEXT: store i32 0, i32* [[TMP3]], align 8
+// CHECK1-NEXT: [[TMP2:%.*]] = bitcast i8** [[DOTADDR]] to i32**
+// CHECK1-NEXT: [[TMP3:%.*]] = load i32*, i32** [[TMP2]], align 8
+// CHECK1-NEXT: store i32 0, i32* [[TMP3]], align 4
// CHECK1-NEXT: ret void
//
//
// CHECK2-NEXT: [[DOTADDR1:%.*]] = alloca i8*, align 8
// CHECK2-NEXT: store i8* [[TMP0]], i8** [[DOTADDR]], align 8
// CHECK2-NEXT: store i8* [[TMP1]], i8** [[DOTADDR1]], align 8
-// CHECK2-NEXT: [[TMP2:%.*]] = load i8*, i8** [[DOTADDR]], align 8
-// CHECK2-NEXT: [[TMP3:%.*]] = bitcast i8* [[TMP2]] to i32*
-// CHECK2-NEXT: store i32 0, i32* [[TMP3]], align 8
+// CHECK2-NEXT: [[TMP2:%.*]] = bitcast i8** [[DOTADDR]] to i32**
+// CHECK2-NEXT: [[TMP3:%.*]] = load i32*, i32** [[TMP2]], align 8
+// CHECK2-NEXT: store i32 0, i32* [[TMP3]], align 4
// CHECK2-NEXT: ret void
//
//
// CHECK1-NEXT: [[DOTADDR1:%.*]] = alloca i8*, align 8
// CHECK1-NEXT: store i8* [[TMP0]], i8** [[DOTADDR]], align 8
// CHECK1-NEXT: store i8* [[TMP1]], i8** [[DOTADDR1]], align 8
-// CHECK1-NEXT: [[TMP2:%.*]] = load i8*, i8** [[DOTADDR]], align 8
-// CHECK1-NEXT: [[TMP3:%.*]] = bitcast i8* [[TMP2]] to i32*
-// CHECK1-NEXT: store i32 0, i32* [[TMP3]], align 8
+// CHECK1-NEXT: [[TMP2:%.*]] = bitcast i8** [[DOTADDR]] to i32**
+// CHECK1-NEXT: [[TMP3:%.*]] = load i32*, i32** [[TMP2]], align 8
+// CHECK1-NEXT: store i32 0, i32* [[TMP3]], align 4
// CHECK1-NEXT: ret void
//
//
// CHECK2-NEXT: [[DOTADDR1:%.*]] = alloca i8*, align 8
// CHECK2-NEXT: store i8* [[TMP0]], i8** [[DOTADDR]], align 8
// CHECK2-NEXT: store i8* [[TMP1]], i8** [[DOTADDR1]], align 8
-// CHECK2-NEXT: [[TMP2:%.*]] = load i8*, i8** [[DOTADDR]], align 8
-// CHECK2-NEXT: [[TMP3:%.*]] = bitcast i8* [[TMP2]] to i32*
-// CHECK2-NEXT: store i32 0, i32* [[TMP3]], align 8
+// CHECK2-NEXT: [[TMP2:%.*]] = bitcast i8** [[DOTADDR]] to i32**
+// CHECK2-NEXT: [[TMP3:%.*]] = load i32*, i32** [[TMP2]], align 8
+// CHECK2-NEXT: store i32 0, i32* [[TMP3]], align 4
// CHECK2-NEXT: ret void
//
//
// CHECK1-NEXT: [[DOTADDR1:%.*]] = alloca i8*, align 8
// CHECK1-NEXT: store i8* [[TMP0]], i8** [[DOTADDR]], align 8
// CHECK1-NEXT: store i8* [[TMP1]], i8** [[DOTADDR1]], align 8
-// CHECK1-NEXT: [[TMP2:%.*]] = load i8*, i8** [[DOTADDR]], align 8
-// CHECK1-NEXT: [[TMP3:%.*]] = bitcast i8* [[TMP2]] to i32*
-// CHECK1-NEXT: store i32 0, i32* [[TMP3]], align 8
+// CHECK1-NEXT: [[TMP2:%.*]] = bitcast i8** [[DOTADDR]] to i32**
+// CHECK1-NEXT: [[TMP3:%.*]] = load i32*, i32** [[TMP2]], align 8
+// CHECK1-NEXT: store i32 0, i32* [[TMP3]], align 4
// CHECK1-NEXT: ret void
//
//
// CHECK1-NEXT: [[DOTADDR1:%.*]] = alloca i8*, align 8
// CHECK1-NEXT: store i8* [[TMP0]], i8** [[DOTADDR]], align 8
// CHECK1-NEXT: store i8* [[TMP1]], i8** [[DOTADDR1]], align 8
-// CHECK1-NEXT: [[TMP2:%.*]] = load i8*, i8** [[DOTADDR]], align 8
-// CHECK1-NEXT: [[TMP3:%.*]] = bitcast i8* [[TMP2]] to i32*
-// CHECK1-NEXT: store i32 0, i32* [[TMP3]], align 8
+// CHECK1-NEXT: [[TMP2:%.*]] = bitcast i8** [[DOTADDR]] to i32**
+// CHECK1-NEXT: [[TMP3:%.*]] = load i32*, i32** [[TMP2]], align 8
+// CHECK1-NEXT: store i32 0, i32* [[TMP3]], align 4
// CHECK1-NEXT: ret void
//
//
// CHECK2-NEXT: [[DOTADDR1:%.*]] = alloca i8*, align 8
// CHECK2-NEXT: store i8* [[TMP0]], i8** [[DOTADDR]], align 8
// CHECK2-NEXT: store i8* [[TMP1]], i8** [[DOTADDR1]], align 8
-// CHECK2-NEXT: [[TMP2:%.*]] = load i8*, i8** [[DOTADDR]], align 8
-// CHECK2-NEXT: [[TMP3:%.*]] = bitcast i8* [[TMP2]] to i32*
-// CHECK2-NEXT: store i32 0, i32* [[TMP3]], align 8
+// CHECK2-NEXT: [[TMP2:%.*]] = bitcast i8** [[DOTADDR]] to i32**
+// CHECK2-NEXT: [[TMP3:%.*]] = load i32*, i32** [[TMP2]], align 8
+// CHECK2-NEXT: store i32 0, i32* [[TMP3]], align 4
// CHECK2-NEXT: ret void
//
//
// CHECK2-NEXT: [[DOTADDR1:%.*]] = alloca i8*, align 8
// CHECK2-NEXT: store i8* [[TMP0]], i8** [[DOTADDR]], align 8
// CHECK2-NEXT: store i8* [[TMP1]], i8** [[DOTADDR1]], align 8
-// CHECK2-NEXT: [[TMP2:%.*]] = load i8*, i8** [[DOTADDR]], align 8
-// CHECK2-NEXT: [[TMP3:%.*]] = bitcast i8* [[TMP2]] to i32*
-// CHECK2-NEXT: store i32 0, i32* [[TMP3]], align 8
+// CHECK2-NEXT: [[TMP2:%.*]] = bitcast i8** [[DOTADDR]] to i32**
+// CHECK2-NEXT: [[TMP3:%.*]] = load i32*, i32** [[TMP2]], align 8
+// CHECK2-NEXT: store i32 0, i32* [[TMP3]], align 4
// CHECK2-NEXT: ret void
//
//
// CHECK1-NEXT: [[DOTADDR1:%.*]] = alloca i8*, align 8
// CHECK1-NEXT: store i8* [[TMP0]], i8** [[DOTADDR]], align 8
// CHECK1-NEXT: store i8* [[TMP1]], i8** [[DOTADDR1]], align 8
-// CHECK1-NEXT: [[TMP2:%.*]] = load i8*, i8** [[DOTADDR]], align 8
-// CHECK1-NEXT: [[TMP3:%.*]] = bitcast i8* [[TMP2]] to i32*
-// CHECK1-NEXT: store i32 0, i32* [[TMP3]], align 8
+// CHECK1-NEXT: [[TMP2:%.*]] = bitcast i8** [[DOTADDR]] to i32**
+// CHECK1-NEXT: [[TMP3:%.*]] = load i32*, i32** [[TMP2]], align 8
+// CHECK1-NEXT: store i32 0, i32* [[TMP3]], align 4
// CHECK1-NEXT: ret void
//
//
// CHECK1-NEXT: [[DOTADDR1:%.*]] = alloca i8*, align 8
// CHECK1-NEXT: store i8* [[TMP0]], i8** [[DOTADDR]], align 8
// CHECK1-NEXT: store i8* [[TMP1]], i8** [[DOTADDR1]], align 8
-// CHECK1-NEXT: [[TMP2:%.*]] = load i8*, i8** [[DOTADDR]], align 8
-// CHECK1-NEXT: [[TMP3:%.*]] = bitcast i8* [[TMP2]] to float*
-// CHECK1-NEXT: store float 0.000000e+00, float* [[TMP3]], align 8
+// CHECK1-NEXT: [[TMP2:%.*]] = bitcast i8** [[DOTADDR]] to float**
+// CHECK1-NEXT: [[TMP3:%.*]] = load float*, float** [[TMP2]], align 8
+// CHECK1-NEXT: store float 0.000000e+00, float* [[TMP3]], align 4
// CHECK1-NEXT: ret void
//
//
// CHECK1-NEXT: [[DOTADDR1:%.*]] = alloca i8*, align 8
// CHECK1-NEXT: store i8* [[TMP0]], i8** [[DOTADDR]], align 8
// CHECK1-NEXT: store i8* [[TMP1]], i8** [[DOTADDR1]], align 8
-// CHECK1-NEXT: [[TMP2:%.*]] = load i8*, i8** [[DOTADDR]], align 8
-// CHECK1-NEXT: [[TMP3:%.*]] = bitcast i8* [[TMP2]] to i32*
-// CHECK1-NEXT: store i32 0, i32* [[TMP3]], align 8
+// CHECK1-NEXT: [[TMP2:%.*]] = bitcast i8** [[DOTADDR]] to i32**
+// CHECK1-NEXT: [[TMP3:%.*]] = load i32*, i32** [[TMP2]], align 8
+// CHECK1-NEXT: store i32 0, i32* [[TMP3]], align 4
// CHECK1-NEXT: ret void
//
//
// CHECK1-NEXT: [[DOTADDR1:%.*]] = alloca i8*, align 8
// CHECK1-NEXT: store i8* [[TMP0]], i8** [[DOTADDR]], align 8
// CHECK1-NEXT: store i8* [[TMP1]], i8** [[DOTADDR1]], align 8
-// CHECK1-NEXT: [[TMP2:%.*]] = load i8*, i8** [[DOTADDR]], align 8
-// CHECK1-NEXT: [[TMP3:%.*]] = bitcast i8* [[TMP2]] to [5 x %struct.S]*
+// CHECK1-NEXT: [[TMP2:%.*]] = bitcast i8** [[DOTADDR]] to [5 x %struct.S]**
+// CHECK1-NEXT: [[TMP3:%.*]] = load [5 x %struct.S]*, [5 x %struct.S]** [[TMP2]], align 8
// CHECK1-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [5 x %struct.S], [5 x %struct.S]* [[TMP3]], i32 0, i32 0
// CHECK1-NEXT: [[TMP4:%.*]] = getelementptr [[STRUCT_S:%.*]], %struct.S* [[ARRAY_BEGIN]], i64 5
// CHECK1-NEXT: [[OMP_ARRAYINIT_ISEMPTY:%.*]] = icmp eq %struct.S* [[ARRAY_BEGIN]], [[TMP4]]
// CHECK1-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]])
// CHECK1-NEXT: store i8* [[TMP0]], i8** [[DOTADDR]], align 8
// CHECK1-NEXT: store i8* [[TMP1]], i8** [[DOTADDR1]], align 8
-// CHECK1-NEXT: [[TMP3:%.*]] = load i8*, i8** [[DOTADDR]], align 8
-// CHECK1-NEXT: [[TMP4:%.*]] = call i8* @__kmpc_threadprivate_cached(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]], i8* bitcast (i64* @{{reduction_size[.].+[.]}})
-// CHECK1-NEXT: [[TMP5:%.*]] = bitcast i8* [[TMP4]] to i64*
-// CHECK1-NEXT: [[TMP6:%.*]] = load i64, i64* [[TMP5]], align 8
-// CHECK1-NEXT: [[TMP7:%.*]] = bitcast i8* [[TMP3]] to i16*
-// CHECK1-NEXT: [[TMP8:%.*]] = getelementptr i16, i16* [[TMP7]], i64 [[TMP6]]
-// CHECK1-NEXT: [[OMP_ARRAYINIT_ISEMPTY:%.*]] = icmp eq i16* [[TMP7]], [[TMP8]]
+// CHECK1-NEXT: [[TMP3:%.*]] = bitcast i8** [[DOTADDR]] to i16**
+// CHECK1-NEXT: [[TMP4:%.*]] = load i16*, i16** [[TMP3]], align 8
+// CHECK1-NEXT: [[TMP5:%.*]] = call i8* @__kmpc_threadprivate_cached(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]], i8* bitcast (i64* @{{reduction_size[.].+[.]}})
+// CHECK1-NEXT: [[TMP6:%.*]] = bitcast i8* [[TMP5]] to i64*
+// CHECK1-NEXT: [[TMP7:%.*]] = load i64, i64* [[TMP6]], align 8
+// CHECK1-NEXT: [[TMP8:%.*]] = getelementptr i16, i16* [[TMP4]], i64 [[TMP7]]
+// CHECK1-NEXT: [[OMP_ARRAYINIT_ISEMPTY:%.*]] = icmp eq i16* [[TMP4]], [[TMP8]]
// CHECK1-NEXT: br i1 [[OMP_ARRAYINIT_ISEMPTY]], label [[OMP_ARRAYINIT_DONE:%.*]], label [[OMP_ARRAYINIT_BODY:%.*]]
// CHECK1: omp.arrayinit.body:
-// CHECK1-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST:%.*]] = phi i16* [ [[TMP7]], [[ENTRY:%.*]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT:%.*]], [[OMP_ARRAYINIT_BODY]] ]
+// CHECK1-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST:%.*]] = phi i16* [ [[TMP4]], [[ENTRY:%.*]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT:%.*]], [[OMP_ARRAYINIT_BODY]] ]
// CHECK1-NEXT: store i16 0, i16* [[OMP_ARRAYCPY_DESTELEMENTPAST]], align 2
// CHECK1-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT]] = getelementptr i16, i16* [[OMP_ARRAYCPY_DESTELEMENTPAST]], i32 1
// CHECK1-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq i16* [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP8]]
// CHECK2-NEXT: [[DOTADDR1:%.*]] = alloca i8*, align 8
// CHECK2-NEXT: store i8* [[TMP0]], i8** [[DOTADDR]], align 8
// CHECK2-NEXT: store i8* [[TMP1]], i8** [[DOTADDR1]], align 8
-// CHECK2-NEXT: [[TMP2:%.*]] = load i8*, i8** [[DOTADDR]], align 8
-// CHECK2-NEXT: [[TMP3:%.*]] = bitcast i8* [[TMP2]] to i32*
-// CHECK2-NEXT: store i32 0, i32* [[TMP3]], align 8
+// CHECK2-NEXT: [[TMP2:%.*]] = bitcast i8** [[DOTADDR]] to i32**
+// CHECK2-NEXT: [[TMP3:%.*]] = load i32*, i32** [[TMP2]], align 8
+// CHECK2-NEXT: store i32 0, i32* [[TMP3]], align 4
// CHECK2-NEXT: ret void
//
//
// CHECK2-NEXT: [[DOTADDR1:%.*]] = alloca i8*, align 8
// CHECK2-NEXT: store i8* [[TMP0]], i8** [[DOTADDR]], align 8
// CHECK2-NEXT: store i8* [[TMP1]], i8** [[DOTADDR1]], align 8
-// CHECK2-NEXT: [[TMP2:%.*]] = load i8*, i8** [[DOTADDR]], align 8
-// CHECK2-NEXT: [[TMP3:%.*]] = bitcast i8* [[TMP2]] to float*
-// CHECK2-NEXT: store float 0.000000e+00, float* [[TMP3]], align 8
+// CHECK2-NEXT: [[TMP2:%.*]] = bitcast i8** [[DOTADDR]] to float**
+// CHECK2-NEXT: [[TMP3:%.*]] = load float*, float** [[TMP2]], align 8
+// CHECK2-NEXT: store float 0.000000e+00, float* [[TMP3]], align 4
// CHECK2-NEXT: ret void
//
//
// CHECK2-NEXT: [[DOTADDR1:%.*]] = alloca i8*, align 8
// CHECK2-NEXT: store i8* [[TMP0]], i8** [[DOTADDR]], align 8
// CHECK2-NEXT: store i8* [[TMP1]], i8** [[DOTADDR1]], align 8
-// CHECK2-NEXT: [[TMP2:%.*]] = load i8*, i8** [[DOTADDR]], align 8
-// CHECK2-NEXT: [[TMP3:%.*]] = bitcast i8* [[TMP2]] to i32*
-// CHECK2-NEXT: store i32 0, i32* [[TMP3]], align 8
+// CHECK2-NEXT: [[TMP2:%.*]] = bitcast i8** [[DOTADDR]] to i32**
+// CHECK2-NEXT: [[TMP3:%.*]] = load i32*, i32** [[TMP2]], align 8
+// CHECK2-NEXT: store i32 0, i32* [[TMP3]], align 4
// CHECK2-NEXT: ret void
//
//
// CHECK2-NEXT: [[DOTADDR1:%.*]] = alloca i8*, align 8
// CHECK2-NEXT: store i8* [[TMP0]], i8** [[DOTADDR]], align 8
// CHECK2-NEXT: store i8* [[TMP1]], i8** [[DOTADDR1]], align 8
-// CHECK2-NEXT: [[TMP2:%.*]] = load i8*, i8** [[DOTADDR]], align 8
-// CHECK2-NEXT: [[TMP3:%.*]] = bitcast i8* [[TMP2]] to [5 x %struct.S]*
+// CHECK2-NEXT: [[TMP2:%.*]] = bitcast i8** [[DOTADDR]] to [5 x %struct.S]**
+// CHECK2-NEXT: [[TMP3:%.*]] = load [5 x %struct.S]*, [5 x %struct.S]** [[TMP2]], align 8
// CHECK2-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [5 x %struct.S], [5 x %struct.S]* [[TMP3]], i32 0, i32 0
// CHECK2-NEXT: [[TMP4:%.*]] = getelementptr [[STRUCT_S:%.*]], %struct.S* [[ARRAY_BEGIN]], i64 5
// CHECK2-NEXT: [[OMP_ARRAYINIT_ISEMPTY:%.*]] = icmp eq %struct.S* [[ARRAY_BEGIN]], [[TMP4]]
// CHECK2-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]])
// CHECK2-NEXT: store i8* [[TMP0]], i8** [[DOTADDR]], align 8
// CHECK2-NEXT: store i8* [[TMP1]], i8** [[DOTADDR1]], align 8
-// CHECK2-NEXT: [[TMP3:%.*]] = load i8*, i8** [[DOTADDR]], align 8
-// CHECK2-NEXT: [[TMP4:%.*]] = call i8* @__kmpc_threadprivate_cached(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]], i8* bitcast (i64* @{{reduction_size[.].+[.]}})
-// CHECK2-NEXT: [[TMP5:%.*]] = bitcast i8* [[TMP4]] to i64*
-// CHECK2-NEXT: [[TMP6:%.*]] = load i64, i64* [[TMP5]], align 8
-// CHECK2-NEXT: [[TMP7:%.*]] = bitcast i8* [[TMP3]] to i16*
-// CHECK2-NEXT: [[TMP8:%.*]] = getelementptr i16, i16* [[TMP7]], i64 [[TMP6]]
-// CHECK2-NEXT: [[OMP_ARRAYINIT_ISEMPTY:%.*]] = icmp eq i16* [[TMP7]], [[TMP8]]
+// CHECK2-NEXT: [[TMP3:%.*]] = bitcast i8** [[DOTADDR]] to i16**
+// CHECK2-NEXT: [[TMP4:%.*]] = load i16*, i16** [[TMP3]], align 8
+// CHECK2-NEXT: [[TMP5:%.*]] = call i8* @__kmpc_threadprivate_cached(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]], i8* bitcast (i64* @{{reduction_size[.].+[.]}})
+// CHECK2-NEXT: [[TMP6:%.*]] = bitcast i8* [[TMP5]] to i64*
+// CHECK2-NEXT: [[TMP7:%.*]] = load i64, i64* [[TMP6]], align 8
+// CHECK2-NEXT: [[TMP8:%.*]] = getelementptr i16, i16* [[TMP4]], i64 [[TMP7]]
+// CHECK2-NEXT: [[OMP_ARRAYINIT_ISEMPTY:%.*]] = icmp eq i16* [[TMP4]], [[TMP8]]
// CHECK2-NEXT: br i1 [[OMP_ARRAYINIT_ISEMPTY]], label [[OMP_ARRAYINIT_DONE:%.*]], label [[OMP_ARRAYINIT_BODY:%.*]]
// CHECK2: omp.arrayinit.body:
-// CHECK2-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST:%.*]] = phi i16* [ [[TMP7]], [[ENTRY:%.*]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT:%.*]], [[OMP_ARRAYINIT_BODY]] ]
+// CHECK2-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST:%.*]] = phi i16* [ [[TMP4]], [[ENTRY:%.*]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT:%.*]], [[OMP_ARRAYINIT_BODY]] ]
// CHECK2-NEXT: store i16 0, i16* [[OMP_ARRAYCPY_DESTELEMENTPAST]], align 2
// CHECK2-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT]] = getelementptr i16, i16* [[OMP_ARRAYCPY_DESTELEMENTPAST]], i32 1
// CHECK2-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq i16* [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP8]]
// CHECK1-NEXT: [[DOTADDR1:%.*]] = alloca i8*, align 8
// CHECK1-NEXT: store i8* [[TMP0]], i8** [[DOTADDR]], align 8
// CHECK1-NEXT: store i8* [[TMP1]], i8** [[DOTADDR1]], align 8
-// CHECK1-NEXT: [[TMP2:%.*]] = load i8*, i8** [[DOTADDR]], align 8
-// CHECK1-NEXT: [[TMP3:%.*]] = bitcast i8* [[TMP2]] to i32*
-// CHECK1-NEXT: store i32 0, i32* [[TMP3]], align 8
+// CHECK1-NEXT: [[TMP2:%.*]] = bitcast i8** [[DOTADDR]] to i32**
+// CHECK1-NEXT: [[TMP3:%.*]] = load i32*, i32** [[TMP2]], align 8
+// CHECK1-NEXT: store i32 0, i32* [[TMP3]], align 4
// CHECK1-NEXT: ret void
//
//
// CHECK1-NEXT: [[DOTADDR1:%.*]] = alloca i8*, align 8
// CHECK1-NEXT: store i8* [[TMP0]], i8** [[DOTADDR]], align 8
// CHECK1-NEXT: store i8* [[TMP1]], i8** [[DOTADDR1]], align 8
-// CHECK1-NEXT: [[TMP2:%.*]] = load i8*, i8** [[DOTADDR]], align 8
-// CHECK1-NEXT: [[TMP3:%.*]] = bitcast i8* [[TMP2]] to float*
-// CHECK1-NEXT: store float 0.000000e+00, float* [[TMP3]], align 8
+// CHECK1-NEXT: [[TMP2:%.*]] = bitcast i8** [[DOTADDR]] to float**
+// CHECK1-NEXT: [[TMP3:%.*]] = load float*, float** [[TMP2]], align 8
+// CHECK1-NEXT: store float 0.000000e+00, float* [[TMP3]], align 4
// CHECK1-NEXT: ret void
//
//
// CHECK1-NEXT: [[DOTADDR1:%.*]] = alloca i8*, align 8
// CHECK1-NEXT: store i8* [[TMP0]], i8** [[DOTADDR]], align 8
// CHECK1-NEXT: store i8* [[TMP1]], i8** [[DOTADDR1]], align 8
-// CHECK1-NEXT: [[TMP2:%.*]] = load i8*, i8** [[DOTADDR]], align 8
-// CHECK1-NEXT: [[TMP3:%.*]] = bitcast i8* [[TMP2]] to i32*
-// CHECK1-NEXT: store i32 0, i32* [[TMP3]], align 8
+// CHECK1-NEXT: [[TMP2:%.*]] = bitcast i8** [[DOTADDR]] to i32**
+// CHECK1-NEXT: [[TMP3:%.*]] = load i32*, i32** [[TMP2]], align 8
+// CHECK1-NEXT: store i32 0, i32* [[TMP3]], align 4
// CHECK1-NEXT: ret void
//
//
// CHECK1-NEXT: [[DOTADDR1:%.*]] = alloca i8*, align 8
// CHECK1-NEXT: store i8* [[TMP0]], i8** [[DOTADDR]], align 8
// CHECK1-NEXT: store i8* [[TMP1]], i8** [[DOTADDR1]], align 8
-// CHECK1-NEXT: [[TMP2:%.*]] = load i8*, i8** [[DOTADDR]], align 8
-// CHECK1-NEXT: [[TMP3:%.*]] = bitcast i8* [[TMP2]] to [5 x %struct.S]*
+// CHECK1-NEXT: [[TMP2:%.*]] = bitcast i8** [[DOTADDR]] to [5 x %struct.S]**
+// CHECK1-NEXT: [[TMP3:%.*]] = load [5 x %struct.S]*, [5 x %struct.S]** [[TMP2]], align 8
// CHECK1-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [5 x %struct.S], [5 x %struct.S]* [[TMP3]], i32 0, i32 0
// CHECK1-NEXT: [[TMP4:%.*]] = getelementptr [[STRUCT_S:%.*]], %struct.S* [[ARRAY_BEGIN]], i64 5
// CHECK1-NEXT: [[OMP_ARRAYINIT_ISEMPTY:%.*]] = icmp eq %struct.S* [[ARRAY_BEGIN]], [[TMP4]]
// CHECK1-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]])
// CHECK1-NEXT: store i8* [[TMP0]], i8** [[DOTADDR]], align 8
// CHECK1-NEXT: store i8* [[TMP1]], i8** [[DOTADDR1]], align 8
-// CHECK1-NEXT: [[TMP3:%.*]] = load i8*, i8** [[DOTADDR]], align 8
-// CHECK1-NEXT: [[TMP4:%.*]] = call i8* @__kmpc_threadprivate_cached(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]], i8* bitcast (i64* @{{reduction_size[.].+[.]}})
-// CHECK1-NEXT: [[TMP5:%.*]] = bitcast i8* [[TMP4]] to i64*
-// CHECK1-NEXT: [[TMP6:%.*]] = load i64, i64* [[TMP5]], align 8
-// CHECK1-NEXT: [[TMP7:%.*]] = bitcast i8* [[TMP3]] to i16*
-// CHECK1-NEXT: [[TMP8:%.*]] = getelementptr i16, i16* [[TMP7]], i64 [[TMP6]]
-// CHECK1-NEXT: [[OMP_ARRAYINIT_ISEMPTY:%.*]] = icmp eq i16* [[TMP7]], [[TMP8]]
+// CHECK1-NEXT: [[TMP3:%.*]] = bitcast i8** [[DOTADDR]] to i16**
+// CHECK1-NEXT: [[TMP4:%.*]] = load i16*, i16** [[TMP3]], align 8
+// CHECK1-NEXT: [[TMP5:%.*]] = call i8* @__kmpc_threadprivate_cached(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]], i8* bitcast (i64* @{{reduction_size[.].+[.]}})
+// CHECK1-NEXT: [[TMP6:%.*]] = bitcast i8* [[TMP5]] to i64*
+// CHECK1-NEXT: [[TMP7:%.*]] = load i64, i64* [[TMP6]], align 8
+// CHECK1-NEXT: [[TMP8:%.*]] = getelementptr i16, i16* [[TMP4]], i64 [[TMP7]]
+// CHECK1-NEXT: [[OMP_ARRAYINIT_ISEMPTY:%.*]] = icmp eq i16* [[TMP4]], [[TMP8]]
// CHECK1-NEXT: br i1 [[OMP_ARRAYINIT_ISEMPTY]], label [[OMP_ARRAYINIT_DONE:%.*]], label [[OMP_ARRAYINIT_BODY:%.*]]
// CHECK1: omp.arrayinit.body:
-// CHECK1-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST:%.*]] = phi i16* [ [[TMP7]], [[ENTRY:%.*]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT:%.*]], [[OMP_ARRAYINIT_BODY]] ]
+// CHECK1-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST:%.*]] = phi i16* [ [[TMP4]], [[ENTRY:%.*]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT:%.*]], [[OMP_ARRAYINIT_BODY]] ]
// CHECK1-NEXT: store i16 0, i16* [[OMP_ARRAYCPY_DESTELEMENTPAST]], align 2
// CHECK1-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT]] = getelementptr i16, i16* [[OMP_ARRAYCPY_DESTELEMENTPAST]], i32 1
// CHECK1-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq i16* [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP8]]
// CHECK2-NEXT: [[DOTADDR1:%.*]] = alloca i8*, align 8
// CHECK2-NEXT: store i8* [[TMP0]], i8** [[DOTADDR]], align 8
// CHECK2-NEXT: store i8* [[TMP1]], i8** [[DOTADDR1]], align 8
-// CHECK2-NEXT: [[TMP2:%.*]] = load i8*, i8** [[DOTADDR]], align 8
-// CHECK2-NEXT: [[TMP3:%.*]] = bitcast i8* [[TMP2]] to i32*
-// CHECK2-NEXT: store i32 0, i32* [[TMP3]], align 8
+// CHECK2-NEXT: [[TMP2:%.*]] = bitcast i8** [[DOTADDR]] to i32**
+// CHECK2-NEXT: [[TMP3:%.*]] = load i32*, i32** [[TMP2]], align 8
+// CHECK2-NEXT: store i32 0, i32* [[TMP3]], align 4
// CHECK2-NEXT: ret void
//
//
// CHECK2-NEXT: [[DOTADDR1:%.*]] = alloca i8*, align 8
// CHECK2-NEXT: store i8* [[TMP0]], i8** [[DOTADDR]], align 8
// CHECK2-NEXT: store i8* [[TMP1]], i8** [[DOTADDR1]], align 8
-// CHECK2-NEXT: [[TMP2:%.*]] = load i8*, i8** [[DOTADDR]], align 8
-// CHECK2-NEXT: [[TMP3:%.*]] = bitcast i8* [[TMP2]] to float*
-// CHECK2-NEXT: store float 0.000000e+00, float* [[TMP3]], align 8
+// CHECK2-NEXT: [[TMP2:%.*]] = bitcast i8** [[DOTADDR]] to float**
+// CHECK2-NEXT: [[TMP3:%.*]] = load float*, float** [[TMP2]], align 8
+// CHECK2-NEXT: store float 0.000000e+00, float* [[TMP3]], align 4
// CHECK2-NEXT: ret void
//
//
// CHECK2-NEXT: [[DOTADDR1:%.*]] = alloca i8*, align 8
// CHECK2-NEXT: store i8* [[TMP0]], i8** [[DOTADDR]], align 8
// CHECK2-NEXT: store i8* [[TMP1]], i8** [[DOTADDR1]], align 8
-// CHECK2-NEXT: [[TMP2:%.*]] = load i8*, i8** [[DOTADDR]], align 8
-// CHECK2-NEXT: [[TMP3:%.*]] = bitcast i8* [[TMP2]] to i32*
-// CHECK2-NEXT: store i32 0, i32* [[TMP3]], align 8
+// CHECK2-NEXT: [[TMP2:%.*]] = bitcast i8** [[DOTADDR]] to i32**
+// CHECK2-NEXT: [[TMP3:%.*]] = load i32*, i32** [[TMP2]], align 8
+// CHECK2-NEXT: store i32 0, i32* [[TMP3]], align 4
// CHECK2-NEXT: ret void
//
//
// CHECK2-NEXT: [[DOTADDR1:%.*]] = alloca i8*, align 8
// CHECK2-NEXT: store i8* [[TMP0]], i8** [[DOTADDR]], align 8
// CHECK2-NEXT: store i8* [[TMP1]], i8** [[DOTADDR1]], align 8
-// CHECK2-NEXT: [[TMP2:%.*]] = load i8*, i8** [[DOTADDR]], align 8
-// CHECK2-NEXT: [[TMP3:%.*]] = bitcast i8* [[TMP2]] to [5 x %struct.S]*
+// CHECK2-NEXT: [[TMP2:%.*]] = bitcast i8** [[DOTADDR]] to [5 x %struct.S]**
+// CHECK2-NEXT: [[TMP3:%.*]] = load [5 x %struct.S]*, [5 x %struct.S]** [[TMP2]], align 8
// CHECK2-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [5 x %struct.S], [5 x %struct.S]* [[TMP3]], i32 0, i32 0
// CHECK2-NEXT: [[TMP4:%.*]] = getelementptr [[STRUCT_S:%.*]], %struct.S* [[ARRAY_BEGIN]], i64 5
// CHECK2-NEXT: [[OMP_ARRAYINIT_ISEMPTY:%.*]] = icmp eq %struct.S* [[ARRAY_BEGIN]], [[TMP4]]
// CHECK2-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]])
// CHECK2-NEXT: store i8* [[TMP0]], i8** [[DOTADDR]], align 8
// CHECK2-NEXT: store i8* [[TMP1]], i8** [[DOTADDR1]], align 8
-// CHECK2-NEXT: [[TMP3:%.*]] = load i8*, i8** [[DOTADDR]], align 8
-// CHECK2-NEXT: [[TMP4:%.*]] = call i8* @__kmpc_threadprivate_cached(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]], i8* bitcast (i64* @{{reduction_size[.].+[.]}})
-// CHECK2-NEXT: [[TMP5:%.*]] = bitcast i8* [[TMP4]] to i64*
-// CHECK2-NEXT: [[TMP6:%.*]] = load i64, i64* [[TMP5]], align 8
-// CHECK2-NEXT: [[TMP7:%.*]] = bitcast i8* [[TMP3]] to i16*
-// CHECK2-NEXT: [[TMP8:%.*]] = getelementptr i16, i16* [[TMP7]], i64 [[TMP6]]
-// CHECK2-NEXT: [[OMP_ARRAYINIT_ISEMPTY:%.*]] = icmp eq i16* [[TMP7]], [[TMP8]]
+// CHECK2-NEXT: [[TMP3:%.*]] = bitcast i8** [[DOTADDR]] to i16**
+// CHECK2-NEXT: [[TMP4:%.*]] = load i16*, i16** [[TMP3]], align 8
+// CHECK2-NEXT: [[TMP5:%.*]] = call i8* @__kmpc_threadprivate_cached(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]], i8* bitcast (i64* @{{reduction_size[.].+[.]}})
+// CHECK2-NEXT: [[TMP6:%.*]] = bitcast i8* [[TMP5]] to i64*
+// CHECK2-NEXT: [[TMP7:%.*]] = load i64, i64* [[TMP6]], align 8
+// CHECK2-NEXT: [[TMP8:%.*]] = getelementptr i16, i16* [[TMP4]], i64 [[TMP7]]
+// CHECK2-NEXT: [[OMP_ARRAYINIT_ISEMPTY:%.*]] = icmp eq i16* [[TMP4]], [[TMP8]]
// CHECK2-NEXT: br i1 [[OMP_ARRAYINIT_ISEMPTY]], label [[OMP_ARRAYINIT_DONE:%.*]], label [[OMP_ARRAYINIT_BODY:%.*]]
// CHECK2: omp.arrayinit.body:
-// CHECK2-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST:%.*]] = phi i16* [ [[TMP7]], [[ENTRY:%.*]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT:%.*]], [[OMP_ARRAYINIT_BODY]] ]
+// CHECK2-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST:%.*]] = phi i16* [ [[TMP4]], [[ENTRY:%.*]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT:%.*]], [[OMP_ARRAYINIT_BODY]] ]
// CHECK2-NEXT: store i16 0, i16* [[OMP_ARRAYCPY_DESTELEMENTPAST]], align 2
// CHECK2-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT]] = getelementptr i16, i16* [[OMP_ARRAYCPY_DESTELEMENTPAST]], i32 1
// CHECK2-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq i16* [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP8]]
// CHECK1-NEXT: [[DOTADDR1:%.*]] = alloca i8*, align 8
// CHECK1-NEXT: store i8* [[TMP0]], i8** [[DOTADDR]], align 8
// CHECK1-NEXT: store i8* [[TMP1]], i8** [[DOTADDR1]], align 8
-// CHECK1-NEXT: [[TMP2:%.*]] = load i8*, i8** [[DOTADDR]], align 8
-// CHECK1-NEXT: [[TMP3:%.*]] = bitcast i8* [[TMP2]] to i32*
-// CHECK1-NEXT: store i32 0, i32* [[TMP3]], align 8
+// CHECK1-NEXT: [[TMP2:%.*]] = bitcast i8** [[DOTADDR]] to i32**
+// CHECK1-NEXT: [[TMP3:%.*]] = load i32*, i32** [[TMP2]], align 8
+// CHECK1-NEXT: store i32 0, i32* [[TMP3]], align 4
// CHECK1-NEXT: ret void
//
//
// CHECK1-NEXT: [[DOTADDR1:%.*]] = alloca i8*, align 8
// CHECK1-NEXT: store i8* [[TMP0]], i8** [[DOTADDR]], align 8
// CHECK1-NEXT: store i8* [[TMP1]], i8** [[DOTADDR1]], align 8
-// CHECK1-NEXT: [[TMP2:%.*]] = load i8*, i8** [[DOTADDR]], align 8
-// CHECK1-NEXT: [[TMP3:%.*]] = bitcast i8* [[TMP2]] to float*
-// CHECK1-NEXT: store float 0.000000e+00, float* [[TMP3]], align 8
+// CHECK1-NEXT: [[TMP2:%.*]] = bitcast i8** [[DOTADDR]] to float**
+// CHECK1-NEXT: [[TMP3:%.*]] = load float*, float** [[TMP2]], align 8
+// CHECK1-NEXT: store float 0.000000e+00, float* [[TMP3]], align 4
// CHECK1-NEXT: ret void
//
//
// CHECK1-NEXT: [[DOTADDR1:%.*]] = alloca i8*, align 8
// CHECK1-NEXT: store i8* [[TMP0]], i8** [[DOTADDR]], align 8
// CHECK1-NEXT: store i8* [[TMP1]], i8** [[DOTADDR1]], align 8
-// CHECK1-NEXT: [[TMP2:%.*]] = load i8*, i8** [[DOTADDR]], align 8
-// CHECK1-NEXT: [[TMP3:%.*]] = bitcast i8* [[TMP2]] to i32*
-// CHECK1-NEXT: store i32 0, i32* [[TMP3]], align 8
+// CHECK1-NEXT: [[TMP2:%.*]] = bitcast i8** [[DOTADDR]] to i32**
+// CHECK1-NEXT: [[TMP3:%.*]] = load i32*, i32** [[TMP2]], align 8
+// CHECK1-NEXT: store i32 0, i32* [[TMP3]], align 4
// CHECK1-NEXT: ret void
//
//
// CHECK1-NEXT: [[DOTADDR1:%.*]] = alloca i8*, align 8
// CHECK1-NEXT: store i8* [[TMP0]], i8** [[DOTADDR]], align 8
// CHECK1-NEXT: store i8* [[TMP1]], i8** [[DOTADDR1]], align 8
-// CHECK1-NEXT: [[TMP2:%.*]] = load i8*, i8** [[DOTADDR]], align 8
-// CHECK1-NEXT: [[TMP3:%.*]] = bitcast i8* [[TMP2]] to [5 x %struct.S]*
+// CHECK1-NEXT: [[TMP2:%.*]] = bitcast i8** [[DOTADDR]] to [5 x %struct.S]**
+// CHECK1-NEXT: [[TMP3:%.*]] = load [5 x %struct.S]*, [5 x %struct.S]** [[TMP2]], align 8
// CHECK1-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [5 x %struct.S], [5 x %struct.S]* [[TMP3]], i32 0, i32 0
// CHECK1-NEXT: [[TMP4:%.*]] = getelementptr [[STRUCT_S:%.*]], %struct.S* [[ARRAY_BEGIN]], i64 5
// CHECK1-NEXT: [[OMP_ARRAYINIT_ISEMPTY:%.*]] = icmp eq %struct.S* [[ARRAY_BEGIN]], [[TMP4]]
// CHECK1-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]])
// CHECK1-NEXT: store i8* [[TMP0]], i8** [[DOTADDR]], align 8
// CHECK1-NEXT: store i8* [[TMP1]], i8** [[DOTADDR1]], align 8
-// CHECK1-NEXT: [[TMP3:%.*]] = load i8*, i8** [[DOTADDR]], align 8
-// CHECK1-NEXT: [[TMP4:%.*]] = call i8* @__kmpc_threadprivate_cached(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]], i8* bitcast (i64* @{{reduction_size[.].+[.]}})
-// CHECK1-NEXT: [[TMP5:%.*]] = bitcast i8* [[TMP4]] to i64*
-// CHECK1-NEXT: [[TMP6:%.*]] = load i64, i64* [[TMP5]], align 8
-// CHECK1-NEXT: [[TMP7:%.*]] = bitcast i8* [[TMP3]] to i16*
-// CHECK1-NEXT: [[TMP8:%.*]] = getelementptr i16, i16* [[TMP7]], i64 [[TMP6]]
-// CHECK1-NEXT: [[OMP_ARRAYINIT_ISEMPTY:%.*]] = icmp eq i16* [[TMP7]], [[TMP8]]
+// CHECK1-NEXT: [[TMP3:%.*]] = bitcast i8** [[DOTADDR]] to i16**
+// CHECK1-NEXT: [[TMP4:%.*]] = load i16*, i16** [[TMP3]], align 8
+// CHECK1-NEXT: [[TMP5:%.*]] = call i8* @__kmpc_threadprivate_cached(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]], i8* bitcast (i64* @{{reduction_size[.].+[.]}})
+// CHECK1-NEXT: [[TMP6:%.*]] = bitcast i8* [[TMP5]] to i64*
+// CHECK1-NEXT: [[TMP7:%.*]] = load i64, i64* [[TMP6]], align 8
+// CHECK1-NEXT: [[TMP8:%.*]] = getelementptr i16, i16* [[TMP4]], i64 [[TMP7]]
+// CHECK1-NEXT: [[OMP_ARRAYINIT_ISEMPTY:%.*]] = icmp eq i16* [[TMP4]], [[TMP8]]
// CHECK1-NEXT: br i1 [[OMP_ARRAYINIT_ISEMPTY]], label [[OMP_ARRAYINIT_DONE:%.*]], label [[OMP_ARRAYINIT_BODY:%.*]]
// CHECK1: omp.arrayinit.body:
-// CHECK1-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST:%.*]] = phi i16* [ [[TMP7]], [[ENTRY:%.*]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT:%.*]], [[OMP_ARRAYINIT_BODY]] ]
+// CHECK1-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST:%.*]] = phi i16* [ [[TMP4]], [[ENTRY:%.*]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT:%.*]], [[OMP_ARRAYINIT_BODY]] ]
// CHECK1-NEXT: store i16 0, i16* [[OMP_ARRAYCPY_DESTELEMENTPAST]], align 2
// CHECK1-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT]] = getelementptr i16, i16* [[OMP_ARRAYCPY_DESTELEMENTPAST]], i32 1
// CHECK1-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq i16* [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP8]]
// CHECK2-NEXT: [[DOTADDR1:%.*]] = alloca i8*, align 8
// CHECK2-NEXT: store i8* [[TMP0]], i8** [[DOTADDR]], align 8
// CHECK2-NEXT: store i8* [[TMP1]], i8** [[DOTADDR1]], align 8
-// CHECK2-NEXT: [[TMP2:%.*]] = load i8*, i8** [[DOTADDR]], align 8
-// CHECK2-NEXT: [[TMP3:%.*]] = bitcast i8* [[TMP2]] to i32*
-// CHECK2-NEXT: store i32 0, i32* [[TMP3]], align 8
+// CHECK2-NEXT: [[TMP2:%.*]] = bitcast i8** [[DOTADDR]] to i32**
+// CHECK2-NEXT: [[TMP3:%.*]] = load i32*, i32** [[TMP2]], align 8
+// CHECK2-NEXT: store i32 0, i32* [[TMP3]], align 4
// CHECK2-NEXT: ret void
//
//
// CHECK2-NEXT: [[DOTADDR1:%.*]] = alloca i8*, align 8
// CHECK2-NEXT: store i8* [[TMP0]], i8** [[DOTADDR]], align 8
// CHECK2-NEXT: store i8* [[TMP1]], i8** [[DOTADDR1]], align 8
-// CHECK2-NEXT: [[TMP2:%.*]] = load i8*, i8** [[DOTADDR]], align 8
-// CHECK2-NEXT: [[TMP3:%.*]] = bitcast i8* [[TMP2]] to float*
-// CHECK2-NEXT: store float 0.000000e+00, float* [[TMP3]], align 8
+// CHECK2-NEXT: [[TMP2:%.*]] = bitcast i8** [[DOTADDR]] to float**
+// CHECK2-NEXT: [[TMP3:%.*]] = load float*, float** [[TMP2]], align 8
+// CHECK2-NEXT: store float 0.000000e+00, float* [[TMP3]], align 4
// CHECK2-NEXT: ret void
//
//
// CHECK2-NEXT: [[DOTADDR1:%.*]] = alloca i8*, align 8
// CHECK2-NEXT: store i8* [[TMP0]], i8** [[DOTADDR]], align 8
// CHECK2-NEXT: store i8* [[TMP1]], i8** [[DOTADDR1]], align 8
-// CHECK2-NEXT: [[TMP2:%.*]] = load i8*, i8** [[DOTADDR]], align 8
-// CHECK2-NEXT: [[TMP3:%.*]] = bitcast i8* [[TMP2]] to i32*
-// CHECK2-NEXT: store i32 0, i32* [[TMP3]], align 8
+// CHECK2-NEXT: [[TMP2:%.*]] = bitcast i8** [[DOTADDR]] to i32**
+// CHECK2-NEXT: [[TMP3:%.*]] = load i32*, i32** [[TMP2]], align 8
+// CHECK2-NEXT: store i32 0, i32* [[TMP3]], align 4
// CHECK2-NEXT: ret void
//
//
// CHECK2-NEXT: [[DOTADDR1:%.*]] = alloca i8*, align 8
// CHECK2-NEXT: store i8* [[TMP0]], i8** [[DOTADDR]], align 8
// CHECK2-NEXT: store i8* [[TMP1]], i8** [[DOTADDR1]], align 8
-// CHECK2-NEXT: [[TMP2:%.*]] = load i8*, i8** [[DOTADDR]], align 8
-// CHECK2-NEXT: [[TMP3:%.*]] = bitcast i8* [[TMP2]] to [5 x %struct.S]*
+// CHECK2-NEXT: [[TMP2:%.*]] = bitcast i8** [[DOTADDR]] to [5 x %struct.S]**
+// CHECK2-NEXT: [[TMP3:%.*]] = load [5 x %struct.S]*, [5 x %struct.S]** [[TMP2]], align 8
// CHECK2-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [5 x %struct.S], [5 x %struct.S]* [[TMP3]], i32 0, i32 0
// CHECK2-NEXT: [[TMP4:%.*]] = getelementptr [[STRUCT_S:%.*]], %struct.S* [[ARRAY_BEGIN]], i64 5
// CHECK2-NEXT: [[OMP_ARRAYINIT_ISEMPTY:%.*]] = icmp eq %struct.S* [[ARRAY_BEGIN]], [[TMP4]]
// CHECK2-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]])
// CHECK2-NEXT: store i8* [[TMP0]], i8** [[DOTADDR]], align 8
// CHECK2-NEXT: store i8* [[TMP1]], i8** [[DOTADDR1]], align 8
-// CHECK2-NEXT: [[TMP3:%.*]] = load i8*, i8** [[DOTADDR]], align 8
-// CHECK2-NEXT: [[TMP4:%.*]] = call i8* @__kmpc_threadprivate_cached(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]], i8* bitcast (i64* @{{reduction_size[.].+[.]}})
-// CHECK2-NEXT: [[TMP5:%.*]] = bitcast i8* [[TMP4]] to i64*
-// CHECK2-NEXT: [[TMP6:%.*]] = load i64, i64* [[TMP5]], align 8
-// CHECK2-NEXT: [[TMP7:%.*]] = bitcast i8* [[TMP3]] to i16*
-// CHECK2-NEXT: [[TMP8:%.*]] = getelementptr i16, i16* [[TMP7]], i64 [[TMP6]]
-// CHECK2-NEXT: [[OMP_ARRAYINIT_ISEMPTY:%.*]] = icmp eq i16* [[TMP7]], [[TMP8]]
+// CHECK2-NEXT: [[TMP3:%.*]] = bitcast i8** [[DOTADDR]] to i16**
+// CHECK2-NEXT: [[TMP4:%.*]] = load i16*, i16** [[TMP3]], align 8
+// CHECK2-NEXT: [[TMP5:%.*]] = call i8* @__kmpc_threadprivate_cached(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]], i8* bitcast (i64* @{{reduction_size[.].+[.]}})
+// CHECK2-NEXT: [[TMP6:%.*]] = bitcast i8* [[TMP5]] to i64*
+// CHECK2-NEXT: [[TMP7:%.*]] = load i64, i64* [[TMP6]], align 8
+// CHECK2-NEXT: [[TMP8:%.*]] = getelementptr i16, i16* [[TMP4]], i64 [[TMP7]]
+// CHECK2-NEXT: [[OMP_ARRAYINIT_ISEMPTY:%.*]] = icmp eq i16* [[TMP4]], [[TMP8]]
// CHECK2-NEXT: br i1 [[OMP_ARRAYINIT_ISEMPTY]], label [[OMP_ARRAYINIT_DONE:%.*]], label [[OMP_ARRAYINIT_BODY:%.*]]
// CHECK2: omp.arrayinit.body:
-// CHECK2-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST:%.*]] = phi i16* [ [[TMP7]], [[ENTRY:%.*]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT:%.*]], [[OMP_ARRAYINIT_BODY]] ]
+// CHECK2-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST:%.*]] = phi i16* [ [[TMP4]], [[ENTRY:%.*]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT:%.*]], [[OMP_ARRAYINIT_BODY]] ]
// CHECK2-NEXT: store i16 0, i16* [[OMP_ARRAYCPY_DESTELEMENTPAST]], align 2
// CHECK2-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT]] = getelementptr i16, i16* [[OMP_ARRAYCPY_DESTELEMENTPAST]], i32 1
// CHECK2-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq i16* [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP8]]
// CHECK1-NEXT: [[DOTADDR1:%.*]] = alloca i8*, align 8
// CHECK1-NEXT: store i8* [[TMP0]], i8** [[DOTADDR]], align 8
// CHECK1-NEXT: store i8* [[TMP1]], i8** [[DOTADDR1]], align 8
-// CHECK1-NEXT: [[TMP2:%.*]] = load i8*, i8** [[DOTADDR]], align 8
-// CHECK1-NEXT: [[TMP3:%.*]] = bitcast i8* [[TMP2]] to i32*
-// CHECK1-NEXT: store i32 0, i32* [[TMP3]], align 8
+// CHECK1-NEXT: [[TMP2:%.*]] = bitcast i8** [[DOTADDR]] to i32**
+// CHECK1-NEXT: [[TMP3:%.*]] = load i32*, i32** [[TMP2]], align 8
+// CHECK1-NEXT: store i32 0, i32* [[TMP3]], align 4
// CHECK1-NEXT: ret void
//
//
// CHECK1-NEXT: [[DOTADDR1:%.*]] = alloca i8*, align 8
// CHECK1-NEXT: store i8* [[TMP0]], i8** [[DOTADDR]], align 8
// CHECK1-NEXT: store i8* [[TMP1]], i8** [[DOTADDR1]], align 8
-// CHECK1-NEXT: [[TMP2:%.*]] = load i8*, i8** [[DOTADDR]], align 8
-// CHECK1-NEXT: [[TMP3:%.*]] = bitcast i8* [[TMP2]] to i32*
-// CHECK1-NEXT: store i32 0, i32* [[TMP3]], align 8
+// CHECK1-NEXT: [[TMP2:%.*]] = bitcast i8** [[DOTADDR]] to i32**
+// CHECK1-NEXT: [[TMP3:%.*]] = load i32*, i32** [[TMP2]], align 8
+// CHECK1-NEXT: store i32 0, i32* [[TMP3]], align 4
// CHECK1-NEXT: ret void
//
//
// CHECK2-NEXT: [[DOTADDR1:%.*]] = alloca i8*, align 8
// CHECK2-NEXT: store i8* [[TMP0]], i8** [[DOTADDR]], align 8
// CHECK2-NEXT: store i8* [[TMP1]], i8** [[DOTADDR1]], align 8
-// CHECK2-NEXT: [[TMP2:%.*]] = load i8*, i8** [[DOTADDR]], align 8
-// CHECK2-NEXT: [[TMP3:%.*]] = bitcast i8* [[TMP2]] to i32*
-// CHECK2-NEXT: store i32 0, i32* [[TMP3]], align 8
+// CHECK2-NEXT: [[TMP2:%.*]] = bitcast i8** [[DOTADDR]] to i32**
+// CHECK2-NEXT: [[TMP3:%.*]] = load i32*, i32** [[TMP2]], align 8
+// CHECK2-NEXT: store i32 0, i32* [[TMP3]], align 4
// CHECK2-NEXT: ret void
//
//
// CHECK2-NEXT: [[DOTADDR1:%.*]] = alloca i8*, align 8
// CHECK2-NEXT: store i8* [[TMP0]], i8** [[DOTADDR]], align 8
// CHECK2-NEXT: store i8* [[TMP1]], i8** [[DOTADDR1]], align 8
-// CHECK2-NEXT: [[TMP2:%.*]] = load i8*, i8** [[DOTADDR]], align 8
-// CHECK2-NEXT: [[TMP3:%.*]] = bitcast i8* [[TMP2]] to i32*
-// CHECK2-NEXT: store i32 0, i32* [[TMP3]], align 8
+// CHECK2-NEXT: [[TMP2:%.*]] = bitcast i8** [[DOTADDR]] to i32**
+// CHECK2-NEXT: [[TMP3:%.*]] = load i32*, i32** [[TMP2]], align 8
+// CHECK2-NEXT: store i32 0, i32* [[TMP3]], align 4
// CHECK2-NEXT: ret void
//
//