const DataLayout *DL = nullptr;
MemoryDependenceResults *MDA = nullptr;
- bool checkArgumentUses(Value &Arg) const;
- bool isOutArgumentCandidate(Argument &Arg) const;
-
-#ifndef NDEBUG
- bool isVec3ToVec4Shuffle(Type *Ty0, Type* Ty1) const;
-#endif
+ Type *getStoredType(Value &Arg) const;
+ Type *getOutArgumentType(Argument &Arg) const;
public:
static char ID;
char AMDGPURewriteOutArguments::ID = 0;
-bool AMDGPURewriteOutArguments::checkArgumentUses(Value &Arg) const {
+Type *AMDGPURewriteOutArguments::getStoredType(Value &Arg) const {
const int MaxUses = 10;
int UseCount = 0;
- for (Use &U : Arg.uses()) {
- StoreInst *SI = dyn_cast<StoreInst>(U.getUser());
- if (UseCount > MaxUses)
- return false;
+ SmallVector<Use *> Worklist;
+ for (Use &U : Arg.uses())
+ Worklist.push_back(&U);
- if (!SI) {
- auto *BCI = dyn_cast<BitCastInst>(U.getUser());
- if (!BCI || !BCI->hasOneUse())
- return false;
-
- // We don't handle multiple stores currently, so stores to aggregate
- // pointers aren't worth the trouble since they are canonically split up.
- Type *DestEltTy = BCI->getType()->getPointerElementType();
- if (DestEltTy->isAggregateType())
- return false;
-
- // We could handle these if we had a convenient way to bitcast between
- // them.
- Type *SrcEltTy = Arg.getType()->getPointerElementType();
- if (SrcEltTy->isArrayTy())
- return false;
-
- // Special case handle structs with single members. It is useful to handle
- // some casts between structs and non-structs, but we can't bitcast
- // directly between them. Blender uses some casts that look like
- // { <3 x float> }* to <4 x float>*
- if ((SrcEltTy->isStructTy() && (SrcEltTy->getStructNumElements() != 1)))
- return false;
-
- // Clang emits OpenCL 3-vector type accesses with a bitcast to the
- // equivalent 4-element vector and accesses that, and we're looking for
- // this pointer cast.
- if (DL->getTypeAllocSize(SrcEltTy) != DL->getTypeAllocSize(DestEltTy))
- return false;
-
- return checkArgumentUses(*BCI);
+ Type *StoredType = nullptr;
+ while (!Worklist.empty()) {
+ Use *U = Worklist.pop_back_val();
+
+ if (auto *BCI = dyn_cast<BitCastInst>(U->getUser())) {
+ for (Use &U : BCI->uses())
+ Worklist.push_back(&U);
+ continue;
}
- if (!SI->isSimple() ||
- U.getOperandNo() != StoreInst::getPointerOperandIndex())
- return false;
+ if (auto *SI = dyn_cast<StoreInst>(U->getUser())) {
+ if (UseCount++ > MaxUses)
+ return nullptr;
+
+ if (!SI->isSimple() ||
+ U->getOperandNo() != StoreInst::getPointerOperandIndex())
+ return nullptr;
- ++UseCount;
+ if (StoredType && StoredType != SI->getValueOperand()->getType())
+ return nullptr; // More than one type.
+ StoredType = SI->getValueOperand()->getType();
+ continue;
+ }
+
+ // Unsupported user.
+ return nullptr;
}
- // Skip unused arguments.
- return UseCount > 0;
+ return StoredType;
}
-bool AMDGPURewriteOutArguments::isOutArgumentCandidate(Argument &Arg) const {
+Type *AMDGPURewriteOutArguments::getOutArgumentType(Argument &Arg) const {
const unsigned MaxOutArgSizeBytes = 4 * MaxNumRetRegs;
PointerType *ArgTy = dyn_cast<PointerType>(Arg.getType());
// TODO: It might be useful for any out arguments, not just privates.
if (!ArgTy || (ArgTy->getAddressSpace() != DL->getAllocaAddrSpace() &&
!AnyAddressSpace) ||
- Arg.hasByValAttr() || Arg.hasStructRetAttr() ||
- DL->getTypeStoreSize(ArgTy->getPointerElementType()) > MaxOutArgSizeBytes) {
- return false;
+ Arg.hasByValAttr() || Arg.hasStructRetAttr()) {
+ return nullptr;
}
- return checkArgumentUses(Arg);
+ Type *StoredType = getStoredType(Arg);
+ if (!StoredType || DL->getTypeStoreSize(StoredType) > MaxOutArgSizeBytes)
+ return nullptr;
+
+ return StoredType;
}
bool AMDGPURewriteOutArguments::doInitialization(Module &M) {
return false;
}
-#ifndef NDEBUG
-bool AMDGPURewriteOutArguments::isVec3ToVec4Shuffle(Type *Ty0, Type* Ty1) const {
- auto *VT0 = dyn_cast<FixedVectorType>(Ty0);
- auto *VT1 = dyn_cast<FixedVectorType>(Ty1);
- if (!VT0 || !VT1)
- return false;
-
- if (VT0->getNumElements() != 3 ||
- VT1->getNumElements() != 4)
- return false;
-
- return DL->getTypeSizeInBits(VT0->getElementType()) ==
- DL->getTypeSizeInBits(VT1->getElementType());
-}
-#endif
-
bool AMDGPURewriteOutArguments::runOnFunction(Function &F) {
if (skipFunction(F))
return false;
MDA = &getAnalysis<MemoryDependenceWrapperPass>().getMemDep();
unsigned ReturnNumRegs = 0;
- SmallSet<int, 4> OutArgIndexes;
+ SmallDenseMap<int, Type *, 4> OutArgIndexes;
SmallVector<Type *, 4> ReturnTypes;
Type *RetTy = F.getReturnType();
if (!RetTy->isVoidTy()) {
ReturnTypes.push_back(RetTy);
}
- SmallVector<Argument *, 4> OutArgs;
+ SmallVector<std::pair<Argument *, Type *>, 4> OutArgs;
for (Argument &Arg : F.args()) {
- if (isOutArgumentCandidate(Arg)) {
+ if (Type *Ty = getOutArgumentType(Arg)) {
LLVM_DEBUG(dbgs() << "Found possible out argument " << Arg
<< " in function " << F.getName() << '\n');
- OutArgs.push_back(&Arg);
+ OutArgs.push_back({&Arg, Ty});
}
}
// first. On the second iteration we've removed that out clobbering argument
// (by effectively moving it into another function) and will find the second
// argument is OK to move.
- for (Argument *OutArg : OutArgs) {
+ for (const auto &Pair : OutArgs) {
bool ThisReplaceable = true;
SmallVector<std::pair<ReturnInst *, StoreInst *>, 4> ReplaceableStores;
- Type *ArgTy = OutArg->getType()->getPointerElementType();
+ Argument *OutArg = Pair.first;
+ Type *ArgTy = Pair.second;
// Skip this argument if converting it will push us over the register
// count to return limit.
if (ThisReplaceable) {
ReturnTypes.push_back(ArgTy);
- OutArgIndexes.insert(OutArg->getArgNo());
+ OutArgIndexes.insert({OutArg->getArgNo(), ArgTy});
++NumOutArgumentsReplaced;
Changing = true;
}
if (RetVal)
NewRetVal = B.CreateInsertValue(NewRetVal, RetVal, RetIdx++);
- for (std::pair<Argument *, Value *> ReturnPoint : Replacement.second) {
- Argument *Arg = ReturnPoint.first;
- Value *Val = ReturnPoint.second;
- Type *EltTy = Arg->getType()->getPointerElementType();
- if (Val->getType() != EltTy) {
- Type *EffectiveEltTy = EltTy;
- if (StructType *CT = dyn_cast<StructType>(EltTy)) {
- assert(CT->getNumElements() == 1);
- EffectiveEltTy = CT->getElementType(0);
- }
-
- if (DL->getTypeSizeInBits(EffectiveEltTy) !=
- DL->getTypeSizeInBits(Val->getType())) {
- assert(isVec3ToVec4Shuffle(EffectiveEltTy, Val->getType()));
- Val = B.CreateShuffleVector(Val, ArrayRef<int>{0, 1, 2});
- }
-
- Val = B.CreateBitCast(Val, EffectiveEltTy);
-
- // Re-create single element composite.
- if (EltTy != EffectiveEltTy)
- Val = B.CreateInsertValue(UndefValue::get(EltTy), Val, 0);
- }
-
- NewRetVal = B.CreateInsertValue(NewRetVal, Val, RetIdx++);
- }
+ for (std::pair<Argument *, Value *> ReturnPoint : Replacement.second)
+ NewRetVal = B.CreateInsertValue(NewRetVal, ReturnPoint.second, RetIdx++);
if (RetVal)
RI->setOperand(0, NewRetVal);
PointerType *ArgType = cast<PointerType>(Arg.getType());
- auto *EltTy = ArgType->getPointerElementType();
+ Type *EltTy = OutArgIndexes[Arg.getArgNo()];
const auto Align =
DL->getValueOrABITypeAlignment(Arg.getParamAlign(), EltTy);
; CHECK-SAME: (void ()** [[OUT:%.*]]) #[[ATTR0]] {
; CHECK-NEXT: [[FUNC:%.*]] = load i32 ()*, i32 ()** undef, align 8
; CHECK-NEXT: [[CAST:%.*]] = bitcast void ()** [[OUT]] to i32 ()**
-; CHECK-NEXT: [[TMP1:%.*]] = bitcast i32 ()* [[FUNC]] to void ()*
-; CHECK-NEXT: [[TMP2:%.*]] = insertvalue [[BITCAST_FUNC_PTR_TYPE:%.*]] undef, void ()* [[TMP1]], 0
-; CHECK-NEXT: ret [[BITCAST_FUNC_PTR_TYPE]] [[TMP2]]
+; CHECK-NEXT: [[TMP1:%.*]] = insertvalue [[BITCAST_FUNC_PTR_TYPE:%.*]] undef, i32 ()* [[FUNC]], 0
+; CHECK-NEXT: ret [[BITCAST_FUNC_PTR_TYPE]] [[TMP1]]
;
;
; CHECK-LABEL: define {{[^@]+}}@bitcast_func_ptr_type
; CHECK-SAME: (void ()** [[TMP0:%.*]]) #[[ATTR2]] {
; CHECK-NEXT: [[TMP2:%.*]] = call [[BITCAST_FUNC_PTR_TYPE:%.*]] @bitcast_func_ptr_type.body(void ()** undef)
; CHECK-NEXT: [[TMP3:%.*]] = extractvalue [[BITCAST_FUNC_PTR_TYPE]] [[TMP2]], 0
-; CHECK-NEXT: store void ()* [[TMP3]], void ()** [[TMP0]], align 8
+; CHECK-NEXT: [[TMP4:%.*]] = bitcast void ()** [[TMP0]] to i32 ()**
+; CHECK-NEXT: store i32 ()* [[TMP3]], i32 ()** [[TMP4]], align 8
; CHECK-NEXT: ret void
;
;
; CHECK-SAME: (<3 x i32>* [[OUT:%.*]]) #[[ATTR0]] {
; CHECK-NEXT: [[LOAD:%.*]] = load volatile <4 x i32>, <4 x i32> addrspace(1)* undef, align 16
; CHECK-NEXT: [[BITCAST:%.*]] = bitcast <3 x i32>* [[OUT]] to <4 x i32>*
-; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x i32> [[LOAD]], <4 x i32> poison, <3 x i32> <i32 0, i32 1, i32 2>
-; CHECK-NEXT: [[TMP2:%.*]] = insertvalue [[BITCAST_POINTER_V4I32_V3I32:%.*]] undef, <3 x i32> [[TMP1]], 0
-; CHECK-NEXT: ret [[BITCAST_POINTER_V4I32_V3I32]] [[TMP2]]
+; CHECK-NEXT: [[TMP1:%.*]] = insertvalue [[BITCAST_POINTER_V4I32_V3I32:%.*]] undef, <4 x i32> [[LOAD]], 0
+; CHECK-NEXT: ret [[BITCAST_POINTER_V4I32_V3I32]] [[TMP1]]
;
;
; CHECK-LABEL: define {{[^@]+}}@bitcast_pointer_v4i32_v3i32
; CHECK-SAME: (<3 x i32>* [[TMP0:%.*]]) #[[ATTR2]] {
; CHECK-NEXT: [[TMP2:%.*]] = call [[BITCAST_POINTER_V4I32_V3I32:%.*]] @bitcast_pointer_v4i32_v3i32.body(<3 x i32>* undef)
; CHECK-NEXT: [[TMP3:%.*]] = extractvalue [[BITCAST_POINTER_V4I32_V3I32]] [[TMP2]], 0
-; CHECK-NEXT: store <3 x i32> [[TMP3]], <3 x i32>* [[TMP0]], align 16
+; CHECK-NEXT: [[TMP4:%.*]] = bitcast <3 x i32>* [[TMP0]] to <4 x i32>*
+; CHECK-NEXT: store <4 x i32> [[TMP3]], <4 x i32>* [[TMP4]], align 16
; CHECK-NEXT: ret void
;
;
; CHECK-SAME: (<3 x float>* [[OUT:%.*]]) #[[ATTR0]] {
; CHECK-NEXT: [[LOAD:%.*]] = load volatile <4 x i32>, <4 x i32> addrspace(1)* undef, align 16
; CHECK-NEXT: [[BITCAST:%.*]] = bitcast <3 x float>* [[OUT]] to <4 x i32>*
-; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x i32> [[LOAD]], <4 x i32> poison, <3 x i32> <i32 0, i32 1, i32 2>
-; CHECK-NEXT: [[TMP2:%.*]] = bitcast <3 x i32> [[TMP1]] to <3 x float>
-; CHECK-NEXT: [[TMP3:%.*]] = insertvalue [[BITCAST_POINTER_V4I32_V3F32:%.*]] undef, <3 x float> [[TMP2]], 0
-; CHECK-NEXT: ret [[BITCAST_POINTER_V4I32_V3F32]] [[TMP3]]
+; CHECK-NEXT: [[TMP1:%.*]] = insertvalue [[BITCAST_POINTER_V4I32_V3F32:%.*]] undef, <4 x i32> [[LOAD]], 0
+; CHECK-NEXT: ret [[BITCAST_POINTER_V4I32_V3F32]] [[TMP1]]
;
;
; CHECK-LABEL: define {{[^@]+}}@bitcast_pointer_v4i32_v3f32
; CHECK-SAME: (<3 x float>* [[TMP0:%.*]]) #[[ATTR2]] {
; CHECK-NEXT: [[TMP2:%.*]] = call [[BITCAST_POINTER_V4I32_V3F32:%.*]] @bitcast_pointer_v4i32_v3f32.body(<3 x float>* undef)
; CHECK-NEXT: [[TMP3:%.*]] = extractvalue [[BITCAST_POINTER_V4I32_V3F32]] [[TMP2]], 0
-; CHECK-NEXT: store <3 x float> [[TMP3]], <3 x float>* [[TMP0]], align 16
+; CHECK-NEXT: [[TMP4:%.*]] = bitcast <3 x float>* [[TMP0]] to <4 x i32>*
+; CHECK-NEXT: store <4 x i32> [[TMP3]], <4 x i32>* [[TMP4]], align 16
; CHECK-NEXT: ret void
;
;
; CHECK-SAME: (float* [[OUT:%.*]]) #[[ATTR0]] {
; CHECK-NEXT: [[LOAD:%.*]] = load volatile i32, i32 addrspace(1)* undef, align 4
; CHECK-NEXT: [[BITCAST:%.*]] = bitcast float* [[OUT]] to i32*
-; CHECK-NEXT: [[TMP1:%.*]] = bitcast i32 [[LOAD]] to float
-; CHECK-NEXT: [[TMP2:%.*]] = insertvalue [[BITCAST_POINTER_I32_F32:%.*]] undef, float [[TMP1]], 0
-; CHECK-NEXT: ret [[BITCAST_POINTER_I32_F32]] [[TMP2]]
+; CHECK-NEXT: [[TMP1:%.*]] = insertvalue [[BITCAST_POINTER_I32_F32:%.*]] undef, i32 [[LOAD]], 0
+; CHECK-NEXT: ret [[BITCAST_POINTER_I32_F32]] [[TMP1]]
;
;
; CHECK-LABEL: define {{[^@]+}}@bitcast_pointer_i32_f32
; CHECK-SAME: (float* [[TMP0:%.*]]) #[[ATTR2]] {
; CHECK-NEXT: [[TMP2:%.*]] = call [[BITCAST_POINTER_I32_F32:%.*]] @bitcast_pointer_i32_f32.body(float* undef)
; CHECK-NEXT: [[TMP3:%.*]] = extractvalue [[BITCAST_POINTER_I32_F32]] [[TMP2]], 0
-; CHECK-NEXT: store float [[TMP3]], float* [[TMP0]], align 4
+; CHECK-NEXT: [[TMP4:%.*]] = bitcast float* [[TMP0]] to i32*
+; CHECK-NEXT: store i32 [[TMP3]], i32* [[TMP4]], align 4
; CHECK-NEXT: ret void
;
;
-; CHECK-LABEL: define {{[^@]+}}@bitcast_pointer_i32_f16
+; CHECK-LABEL: define {{[^@]+}}@bitcast_pointer_i32_f16.body
; CHECK-SAME: (half* [[OUT:%.*]]) #[[ATTR0]] {
; CHECK-NEXT: [[LOAD:%.*]] = load volatile i32, i32 addrspace(1)* undef, align 4
; CHECK-NEXT: [[BITCAST:%.*]] = bitcast half* [[OUT]] to i32*
-; CHECK-NEXT: store i32 [[LOAD]], i32* [[BITCAST]], align 4
+; CHECK-NEXT: [[TMP1:%.*]] = insertvalue [[BITCAST_POINTER_I32_F16:%.*]] undef, i32 [[LOAD]], 0
+; CHECK-NEXT: ret [[BITCAST_POINTER_I32_F16]] [[TMP1]]
+;
+;
+; CHECK-LABEL: define {{[^@]+}}@bitcast_pointer_i32_f16
+; CHECK-SAME: (half* [[TMP0:%.*]]) #[[ATTR2]] {
+; CHECK-NEXT: [[TMP2:%.*]] = call [[BITCAST_POINTER_I32_F16:%.*]] @bitcast_pointer_i32_f16.body(half* undef)
+; CHECK-NEXT: [[TMP3:%.*]] = extractvalue [[BITCAST_POINTER_I32_F16]] [[TMP2]], 0
+; CHECK-NEXT: [[TMP4:%.*]] = bitcast half* [[TMP0]] to i32*
+; CHECK-NEXT: store i32 [[TMP3]], i32* [[TMP4]], align 4
; CHECK-NEXT: ret void
;
;
-; CHECK-LABEL: define {{[^@]+}}@bitcast_pointer_f16_i32
+; CHECK-LABEL: define {{[^@]+}}@bitcast_pointer_f16_i32.body
; CHECK-SAME: (i32* [[OUT:%.*]]) #[[ATTR0]] {
; CHECK-NEXT: [[LOAD:%.*]] = load volatile half, half addrspace(1)* undef, align 2
; CHECK-NEXT: [[BITCAST:%.*]] = bitcast i32* [[OUT]] to half*
-; CHECK-NEXT: store half [[LOAD]], half* [[BITCAST]], align 2
+; CHECK-NEXT: [[TMP1:%.*]] = insertvalue [[BITCAST_POINTER_F16_I32:%.*]] undef, half [[LOAD]], 0
+; CHECK-NEXT: ret [[BITCAST_POINTER_F16_I32]] [[TMP1]]
+;
+;
+; CHECK-LABEL: define {{[^@]+}}@bitcast_pointer_f16_i32
+; CHECK-SAME: (i32* [[TMP0:%.*]]) #[[ATTR2]] {
+; CHECK-NEXT: [[TMP2:%.*]] = call [[BITCAST_POINTER_F16_I32:%.*]] @bitcast_pointer_f16_i32.body(i32* undef)
+; CHECK-NEXT: [[TMP3:%.*]] = extractvalue [[BITCAST_POINTER_F16_I32]] [[TMP2]], 0
+; CHECK-NEXT: [[TMP4:%.*]] = bitcast i32* [[TMP0]] to half*
+; CHECK-NEXT: store half [[TMP3]], half* [[TMP4]], align 2
; CHECK-NEXT: ret void
;
;
; CHECK-SAME: (%struct.v3f32* [[OUT:%.*]], <3 x float> [[VALUE:%.*]]) #[[ATTR0]] {
; CHECK-NEXT: [[EXTRACTVEC:%.*]] = shufflevector <3 x float> [[VALUE]], <3 x float> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 undef>
; CHECK-NEXT: [[CAST:%.*]] = bitcast %struct.v3f32* [[OUT]] to <4 x float>*
-; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x float> [[EXTRACTVEC]], <4 x float> poison, <3 x i32> <i32 0, i32 1, i32 2>
-; CHECK-NEXT: [[TMP2:%.*]] = insertvalue [[STRUCT_V3F32:%.*]] undef, <3 x float> [[TMP1]], 0
-; CHECK-NEXT: [[TMP3:%.*]] = insertvalue [[BITCAST_STRUCT_V3F32_V3F32:%.*]] undef, [[STRUCT_V3F32]] [[TMP2]], 0
-; CHECK-NEXT: ret [[BITCAST_STRUCT_V3F32_V3F32]] [[TMP3]]
+; CHECK-NEXT: [[TMP1:%.*]] = insertvalue [[BITCAST_STRUCT_V3F32_V3F32:%.*]] undef, <4 x float> [[EXTRACTVEC]], 0
+; CHECK-NEXT: ret [[BITCAST_STRUCT_V3F32_V3F32]] [[TMP1]]
;
;
; CHECK-LABEL: define {{[^@]+}}@bitcast_struct_v3f32_v3f32
; CHECK-SAME: (%struct.v3f32* [[TMP0:%.*]], <3 x float> [[TMP1:%.*]]) #[[ATTR2]] {
; CHECK-NEXT: [[TMP3:%.*]] = call [[BITCAST_STRUCT_V3F32_V3F32:%.*]] @bitcast_struct_v3f32_v3f32.body(%struct.v3f32* undef, <3 x float> [[TMP1]])
; CHECK-NEXT: [[TMP4:%.*]] = extractvalue [[BITCAST_STRUCT_V3F32_V3F32]] [[TMP3]], 0
-; CHECK-NEXT: store [[STRUCT_V3F32:%.*]] [[TMP4]], %struct.v3f32* [[TMP0]], align 16
+; CHECK-NEXT: [[TMP5:%.*]] = bitcast %struct.v3f32* [[TMP0]] to <4 x float>*
+; CHECK-NEXT: store <4 x float> [[TMP4]], <4 x float>* [[TMP5]], align 16
; CHECK-NEXT: ret void
;
;
; CHECK-SAME: (%struct.v3f32* [[OUT:%.*]], <3 x i32> [[VALUE:%.*]]) #[[ATTR0]] {
; CHECK-NEXT: [[EXTRACTVEC:%.*]] = shufflevector <3 x i32> [[VALUE]], <3 x i32> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 undef>
; CHECK-NEXT: [[CAST:%.*]] = bitcast %struct.v3f32* [[OUT]] to <4 x i32>*
-; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x i32> [[EXTRACTVEC]], <4 x i32> poison, <3 x i32> <i32 0, i32 1, i32 2>
-; CHECK-NEXT: [[TMP2:%.*]] = bitcast <3 x i32> [[TMP1]] to <3 x float>
-; CHECK-NEXT: [[TMP3:%.*]] = insertvalue [[STRUCT_V3F32:%.*]] undef, <3 x float> [[TMP2]], 0
-; CHECK-NEXT: [[TMP4:%.*]] = insertvalue [[BITCAST_STRUCT_V3F32_V3I32:%.*]] undef, [[STRUCT_V3F32]] [[TMP3]], 0
-; CHECK-NEXT: ret [[BITCAST_STRUCT_V3F32_V3I32]] [[TMP4]]
+; CHECK-NEXT: [[TMP1:%.*]] = insertvalue [[BITCAST_STRUCT_V3F32_V3I32:%.*]] undef, <4 x i32> [[EXTRACTVEC]], 0
+; CHECK-NEXT: ret [[BITCAST_STRUCT_V3F32_V3I32]] [[TMP1]]
;
;
; CHECK-LABEL: define {{[^@]+}}@bitcast_struct_v3f32_v3i32
; CHECK-SAME: (%struct.v3f32* [[TMP0:%.*]], <3 x i32> [[TMP1:%.*]]) #[[ATTR2]] {
; CHECK-NEXT: [[TMP3:%.*]] = call [[BITCAST_STRUCT_V3F32_V3I32:%.*]] @bitcast_struct_v3f32_v3i32.body(%struct.v3f32* undef, <3 x i32> [[TMP1]])
; CHECK-NEXT: [[TMP4:%.*]] = extractvalue [[BITCAST_STRUCT_V3F32_V3I32]] [[TMP3]], 0
-; CHECK-NEXT: store [[STRUCT_V3F32:%.*]] [[TMP4]], %struct.v3f32* [[TMP0]], align 16
+; CHECK-NEXT: [[TMP5:%.*]] = bitcast %struct.v3f32* [[TMP0]] to <4 x i32>*
+; CHECK-NEXT: store <4 x i32> [[TMP4]], <4 x i32>* [[TMP5]], align 16
; CHECK-NEXT: ret void
;
;
; CHECK-LABEL: define {{[^@]+}}@bitcast_struct_v4f32_v4f32.body
; CHECK-SAME: (%struct.v4f32* [[OUT:%.*]], <4 x float> [[VALUE:%.*]]) #[[ATTR0]] {
; CHECK-NEXT: [[CAST:%.*]] = bitcast %struct.v4f32* [[OUT]] to <4 x float>*
-; CHECK-NEXT: [[TMP1:%.*]] = insertvalue [[STRUCT_V4F32:%.*]] undef, <4 x float> [[VALUE]], 0
-; CHECK-NEXT: [[TMP2:%.*]] = insertvalue [[BITCAST_STRUCT_V4F32_V4F32:%.*]] undef, [[STRUCT_V4F32]] [[TMP1]], 0
-; CHECK-NEXT: ret [[BITCAST_STRUCT_V4F32_V4F32]] [[TMP2]]
+; CHECK-NEXT: [[TMP1:%.*]] = insertvalue [[BITCAST_STRUCT_V4F32_V4F32:%.*]] undef, <4 x float> [[VALUE]], 0
+; CHECK-NEXT: ret [[BITCAST_STRUCT_V4F32_V4F32]] [[TMP1]]
;
;
; CHECK-LABEL: define {{[^@]+}}@bitcast_struct_v4f32_v4f32
; CHECK-SAME: (%struct.v4f32* [[TMP0:%.*]], <4 x float> [[TMP1:%.*]]) #[[ATTR2]] {
; CHECK-NEXT: [[TMP3:%.*]] = call [[BITCAST_STRUCT_V4F32_V4F32:%.*]] @bitcast_struct_v4f32_v4f32.body(%struct.v4f32* undef, <4 x float> [[TMP1]])
; CHECK-NEXT: [[TMP4:%.*]] = extractvalue [[BITCAST_STRUCT_V4F32_V4F32]] [[TMP3]], 0
-; CHECK-NEXT: store [[STRUCT_V4F32:%.*]] [[TMP4]], %struct.v4f32* [[TMP0]], align 16
+; CHECK-NEXT: [[TMP5:%.*]] = bitcast %struct.v4f32* [[TMP0]] to <4 x float>*
+; CHECK-NEXT: store <4 x float> [[TMP4]], <4 x float>* [[TMP5]], align 16
; CHECK-NEXT: ret void
;
;
; CHECK-LABEL: define {{[^@]+}}@bitcast_struct_v3f32_v4i32.body
; CHECK-SAME: (%struct.v3f32* [[OUT:%.*]], <4 x i32> [[VALUE:%.*]]) #[[ATTR0]] {
; CHECK-NEXT: [[CAST:%.*]] = bitcast %struct.v3f32* [[OUT]] to <4 x i32>*
-; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x i32> [[VALUE]], <4 x i32> poison, <3 x i32> <i32 0, i32 1, i32 2>
-; CHECK-NEXT: [[TMP2:%.*]] = bitcast <3 x i32> [[TMP1]] to <3 x float>
-; CHECK-NEXT: [[TMP3:%.*]] = insertvalue [[STRUCT_V3F32:%.*]] undef, <3 x float> [[TMP2]], 0
-; CHECK-NEXT: [[TMP4:%.*]] = insertvalue [[BITCAST_STRUCT_V3F32_V4I32:%.*]] undef, [[STRUCT_V3F32]] [[TMP3]], 0
-; CHECK-NEXT: ret [[BITCAST_STRUCT_V3F32_V4I32]] [[TMP4]]
+; CHECK-NEXT: [[TMP1:%.*]] = insertvalue [[BITCAST_STRUCT_V3F32_V4I32:%.*]] undef, <4 x i32> [[VALUE]], 0
+; CHECK-NEXT: ret [[BITCAST_STRUCT_V3F32_V4I32]] [[TMP1]]
;
;
; CHECK-LABEL: define {{[^@]+}}@bitcast_struct_v3f32_v4i32
; CHECK-SAME: (%struct.v3f32* [[TMP0:%.*]], <4 x i32> [[TMP1:%.*]]) #[[ATTR2]] {
; CHECK-NEXT: [[TMP3:%.*]] = call [[BITCAST_STRUCT_V3F32_V4I32:%.*]] @bitcast_struct_v3f32_v4i32.body(%struct.v3f32* undef, <4 x i32> [[TMP1]])
; CHECK-NEXT: [[TMP4:%.*]] = extractvalue [[BITCAST_STRUCT_V3F32_V4I32]] [[TMP3]], 0
-; CHECK-NEXT: store [[STRUCT_V3F32:%.*]] [[TMP4]], %struct.v3f32* [[TMP0]], align 16
+; CHECK-NEXT: [[TMP5:%.*]] = bitcast %struct.v3f32* [[TMP0]] to <4 x i32>*
+; CHECK-NEXT: store <4 x i32> [[TMP4]], <4 x i32>* [[TMP5]], align 16
; CHECK-NEXT: ret void
;
;
; CHECK-SAME: (%struct.v4f32* [[OUT:%.*]], <3 x float> [[VALUE:%.*]]) #[[ATTR0]] {
; CHECK-NEXT: [[EXTRACTVEC:%.*]] = shufflevector <3 x float> [[VALUE]], <3 x float> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 undef>
; CHECK-NEXT: [[CAST:%.*]] = bitcast %struct.v4f32* [[OUT]] to <4 x float>*
-; CHECK-NEXT: [[TMP1:%.*]] = insertvalue [[STRUCT_V4F32:%.*]] undef, <4 x float> [[EXTRACTVEC]], 0
-; CHECK-NEXT: [[TMP2:%.*]] = insertvalue [[BITCAST_STRUCT_V4F32_V3F32:%.*]] undef, [[STRUCT_V4F32]] [[TMP1]], 0
-; CHECK-NEXT: ret [[BITCAST_STRUCT_V4F32_V3F32]] [[TMP2]]
+; CHECK-NEXT: [[TMP1:%.*]] = insertvalue [[BITCAST_STRUCT_V4F32_V3F32:%.*]] undef, <4 x float> [[EXTRACTVEC]], 0
+; CHECK-NEXT: ret [[BITCAST_STRUCT_V4F32_V3F32]] [[TMP1]]
;
;
; CHECK-LABEL: define {{[^@]+}}@bitcast_struct_v4f32_v3f32
; CHECK-SAME: (%struct.v4f32* [[TMP0:%.*]], <3 x float> [[TMP1:%.*]]) #[[ATTR2]] {
; CHECK-NEXT: [[TMP3:%.*]] = call [[BITCAST_STRUCT_V4F32_V3F32:%.*]] @bitcast_struct_v4f32_v3f32.body(%struct.v4f32* undef, <3 x float> [[TMP1]])
; CHECK-NEXT: [[TMP4:%.*]] = extractvalue [[BITCAST_STRUCT_V4F32_V3F32]] [[TMP3]], 0
-; CHECK-NEXT: store [[STRUCT_V4F32:%.*]] [[TMP4]], %struct.v4f32* [[TMP0]], align 16
+; CHECK-NEXT: [[TMP5:%.*]] = bitcast %struct.v4f32* [[TMP0]] to <4 x float>*
+; CHECK-NEXT: store <4 x float> [[TMP4]], <4 x float>* [[TMP5]], align 16
; CHECK-NEXT: ret void
;
;
-; CHECK-LABEL: define {{[^@]+}}@bitcast_struct_v3f32_v2f32
+; CHECK-LABEL: define {{[^@]+}}@bitcast_struct_v3f32_v2f32.body
; CHECK-SAME: (%struct.v3f32* [[OUT:%.*]], <2 x float> [[VALUE:%.*]]) #[[ATTR0]] {
; CHECK-NEXT: [[CAST:%.*]] = bitcast %struct.v3f32* [[OUT]] to <2 x float>*
-; CHECK-NEXT: store <2 x float> [[VALUE]], <2 x float>* [[CAST]], align 8
+; CHECK-NEXT: [[TMP1:%.*]] = insertvalue [[BITCAST_STRUCT_V3F32_V2F32:%.*]] undef, <2 x float> [[VALUE]], 0
+; CHECK-NEXT: ret [[BITCAST_STRUCT_V3F32_V2F32]] [[TMP1]]
+;
+;
+; CHECK-LABEL: define {{[^@]+}}@bitcast_struct_v3f32_v2f32
+; CHECK-SAME: (%struct.v3f32* [[TMP0:%.*]], <2 x float> [[TMP1:%.*]]) #[[ATTR2]] {
+; CHECK-NEXT: [[TMP3:%.*]] = call [[BITCAST_STRUCT_V3F32_V2F32:%.*]] @bitcast_struct_v3f32_v2f32.body(%struct.v3f32* undef, <2 x float> [[TMP1]])
+; CHECK-NEXT: [[TMP4:%.*]] = extractvalue [[BITCAST_STRUCT_V3F32_V2F32]] [[TMP3]], 0
+; CHECK-NEXT: [[TMP5:%.*]] = bitcast %struct.v3f32* [[TMP0]] to <2 x float>*
+; CHECK-NEXT: store <2 x float> [[TMP4]], <2 x float>* [[TMP5]], align 8
; CHECK-NEXT: ret void
;
;
-; CHECK-LABEL: define {{[^@]+}}@bitcast_struct_v3f32_f32_v3f32
+; CHECK-LABEL: define {{[^@]+}}@bitcast_struct_v3f32_f32_v3f32.body
; CHECK-SAME: (%struct.v3f32.f32* [[OUT:%.*]], <3 x float> [[VALUE:%.*]]) #[[ATTR0]] {
; CHECK-NEXT: [[EXTRACTVEC:%.*]] = shufflevector <3 x float> [[VALUE]], <3 x float> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 undef>
; CHECK-NEXT: [[CAST:%.*]] = bitcast %struct.v3f32.f32* [[OUT]] to <4 x float>*
-; CHECK-NEXT: store <4 x float> [[EXTRACTVEC]], <4 x float>* [[CAST]], align 16
+; CHECK-NEXT: [[TMP1:%.*]] = insertvalue [[BITCAST_STRUCT_V3F32_F32_V3F32:%.*]] undef, <4 x float> [[EXTRACTVEC]], 0
+; CHECK-NEXT: ret [[BITCAST_STRUCT_V3F32_F32_V3F32]] [[TMP1]]
+;
+;
+; CHECK-LABEL: define {{[^@]+}}@bitcast_struct_v3f32_f32_v3f32
+; CHECK-SAME: (%struct.v3f32.f32* [[TMP0:%.*]], <3 x float> [[TMP1:%.*]]) #[[ATTR2]] {
+; CHECK-NEXT: [[TMP3:%.*]] = call [[BITCAST_STRUCT_V3F32_F32_V3F32:%.*]] @bitcast_struct_v3f32_f32_v3f32.body(%struct.v3f32.f32* undef, <3 x float> [[TMP1]])
+; CHECK-NEXT: [[TMP4:%.*]] = extractvalue [[BITCAST_STRUCT_V3F32_F32_V3F32]] [[TMP3]], 0
+; CHECK-NEXT: [[TMP5:%.*]] = bitcast %struct.v3f32.f32* [[TMP0]] to <4 x float>*
+; CHECK-NEXT: store <4 x float> [[TMP4]], <4 x float>* [[TMP5]], align 16
; CHECK-NEXT: ret void
;
;
-; CHECK-LABEL: define {{[^@]+}}@bitcast_struct_v3f32_f32_v4f32
+; CHECK-LABEL: define {{[^@]+}}@bitcast_struct_v3f32_f32_v4f32.body
; CHECK-SAME: (%struct.v3f32.f32* [[OUT:%.*]], <4 x float> [[VALUE:%.*]]) #[[ATTR0]] {
; CHECK-NEXT: [[CAST:%.*]] = bitcast %struct.v3f32.f32* [[OUT]] to <4 x float>*
-; CHECK-NEXT: store <4 x float> [[VALUE]], <4 x float>* [[CAST]], align 16
+; CHECK-NEXT: [[TMP1:%.*]] = insertvalue [[BITCAST_STRUCT_V3F32_F32_V4F32:%.*]] undef, <4 x float> [[VALUE]], 0
+; CHECK-NEXT: ret [[BITCAST_STRUCT_V3F32_F32_V4F32]] [[TMP1]]
+;
+;
+; CHECK-LABEL: define {{[^@]+}}@bitcast_struct_v3f32_f32_v4f32
+; CHECK-SAME: (%struct.v3f32.f32* [[TMP0:%.*]], <4 x float> [[TMP1:%.*]]) #[[ATTR2]] {
+; CHECK-NEXT: [[TMP3:%.*]] = call [[BITCAST_STRUCT_V3F32_F32_V4F32:%.*]] @bitcast_struct_v3f32_f32_v4f32.body(%struct.v3f32.f32* undef, <4 x float> [[TMP1]])
+; CHECK-NEXT: [[TMP4:%.*]] = extractvalue [[BITCAST_STRUCT_V3F32_F32_V4F32]] [[TMP3]], 0
+; CHECK-NEXT: [[TMP5:%.*]] = bitcast %struct.v3f32.f32* [[TMP0]] to <4 x float>*
+; CHECK-NEXT: store <4 x float> [[TMP4]], <4 x float>* [[TMP5]], align 16
; CHECK-NEXT: ret void
;
;
; CHECK-LABEL: define {{[^@]+}}@bitcast_struct_i128_v4f32.body
; CHECK-SAME: (%struct.i128* [[OUT:%.*]], <4 x float> [[VALUE:%.*]]) #[[ATTR0]] {
; CHECK-NEXT: [[CAST:%.*]] = bitcast %struct.i128* [[OUT]] to <4 x float>*
-; CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x float> [[VALUE]] to i128
-; CHECK-NEXT: [[TMP2:%.*]] = insertvalue [[STRUCT_I128:%.*]] undef, i128 [[TMP1]], 0
-; CHECK-NEXT: [[TMP3:%.*]] = insertvalue [[BITCAST_STRUCT_I128_V4F32:%.*]] undef, [[STRUCT_I128]] [[TMP2]], 0
-; CHECK-NEXT: ret [[BITCAST_STRUCT_I128_V4F32]] [[TMP3]]
+; CHECK-NEXT: [[TMP1:%.*]] = insertvalue [[BITCAST_STRUCT_I128_V4F32:%.*]] undef, <4 x float> [[VALUE]], 0
+; CHECK-NEXT: ret [[BITCAST_STRUCT_I128_V4F32]] [[TMP1]]
;
;
; CHECK-LABEL: define {{[^@]+}}@bitcast_struct_i128_v4f32
; CHECK-SAME: (%struct.i128* [[TMP0:%.*]], <4 x float> [[TMP1:%.*]]) #[[ATTR2]] {
; CHECK-NEXT: [[TMP3:%.*]] = call [[BITCAST_STRUCT_I128_V4F32:%.*]] @bitcast_struct_i128_v4f32.body(%struct.i128* undef, <4 x float> [[TMP1]])
; CHECK-NEXT: [[TMP4:%.*]] = extractvalue [[BITCAST_STRUCT_I128_V4F32]] [[TMP3]], 0
-; CHECK-NEXT: store [[STRUCT_I128:%.*]] [[TMP4]], %struct.i128* [[TMP0]], align 4
+; CHECK-NEXT: [[TMP5:%.*]] = bitcast %struct.i128* [[TMP0]] to <4 x float>*
+; CHECK-NEXT: store <4 x float> [[TMP4]], <4 x float>* [[TMP5]], align 16
; CHECK-NEXT: ret void
;
;
-; CHECK-LABEL: define {{[^@]+}}@bitcast_array_v4i32_v4f32
+; CHECK-LABEL: define {{[^@]+}}@bitcast_array_v4i32_v4f32.body
; CHECK-SAME: ([4 x i32]* [[OUT:%.*]], [4 x float] [[VALUE:%.*]]) #[[ATTR0]] {
; CHECK-NEXT: [[CAST:%.*]] = bitcast [4 x i32]* [[OUT]] to [4 x float]*
-; CHECK-NEXT: store [4 x float] [[VALUE]], [4 x float]* [[CAST]], align 4
+; CHECK-NEXT: [[TMP1:%.*]] = insertvalue [[BITCAST_ARRAY_V4I32_V4F32:%.*]] undef, [4 x float] [[VALUE]], 0
+; CHECK-NEXT: ret [[BITCAST_ARRAY_V4I32_V4F32]] [[TMP1]]
+;
+;
+; CHECK-LABEL: define {{[^@]+}}@bitcast_array_v4i32_v4f32
+; CHECK-SAME: ([4 x i32]* [[TMP0:%.*]], [4 x float] [[TMP1:%.*]]) #[[ATTR2]] {
+; CHECK-NEXT: [[TMP3:%.*]] = call [[BITCAST_ARRAY_V4I32_V4F32:%.*]] @bitcast_array_v4i32_v4f32.body([4 x i32]* undef, [4 x float] [[TMP1]])
+; CHECK-NEXT: [[TMP4:%.*]] = extractvalue [[BITCAST_ARRAY_V4I32_V4F32]] [[TMP3]], 0
+; CHECK-NEXT: [[TMP5:%.*]] = bitcast [4 x i32]* [[TMP0]] to [4 x float]*
+; CHECK-NEXT: store [4 x float] [[TMP4]], [4 x float]* [[TMP5]], align 4
; CHECK-NEXT: ret void
;
;
; CHECK: ret0:
; CHECK-NEXT: [[EXTRACTVEC:%.*]] = shufflevector <3 x float> [[VALUE]], <3 x float> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 undef>
; CHECK-NEXT: [[CAST0:%.*]] = bitcast %struct.v3f32* [[OUT]] to <4 x float>*
-; CHECK-NEXT: [[TMP0:%.*]] = shufflevector <4 x float> [[EXTRACTVEC]], <4 x float> poison, <3 x i32> <i32 0, i32 1, i32 2>
-; CHECK-NEXT: [[TMP1:%.*]] = insertvalue [[STRUCT_V3F32:%.*]] undef, <3 x float> [[TMP0]], 0
-; CHECK-NEXT: [[TMP2:%.*]] = insertvalue [[MULTI_RETURN_BITCAST_STRUCT_V3F32_V3F32:%.*]] undef, [[STRUCT_V3F32]] [[TMP1]], 0
-; CHECK-NEXT: ret [[MULTI_RETURN_BITCAST_STRUCT_V3F32_V3F32]] [[TMP2]]
+; CHECK-NEXT: [[TMP0:%.*]] = insertvalue [[MULTI_RETURN_BITCAST_STRUCT_V3F32_V3F32:%.*]] undef, <4 x float> [[EXTRACTVEC]], 0
+; CHECK-NEXT: ret [[MULTI_RETURN_BITCAST_STRUCT_V3F32_V3F32]] [[TMP0]]
; CHECK: ret1:
; CHECK-NEXT: [[CAST1:%.*]] = bitcast %struct.v3f32* [[OUT]] to <4 x float>*
; CHECK-NEXT: [[LOAD:%.*]] = load <4 x float>, <4 x float> addrspace(1)* undef, align 16
-; CHECK-NEXT: [[TMP3:%.*]] = shufflevector <4 x float> [[LOAD]], <4 x float> poison, <3 x i32> <i32 0, i32 1, i32 2>
-; CHECK-NEXT: [[TMP4:%.*]] = insertvalue [[STRUCT_V3F32]] undef, <3 x float> [[TMP3]], 0
-; CHECK-NEXT: [[TMP5:%.*]] = insertvalue [[MULTI_RETURN_BITCAST_STRUCT_V3F32_V3F32]] undef, [[STRUCT_V3F32]] [[TMP4]], 0
-; CHECK-NEXT: ret [[MULTI_RETURN_BITCAST_STRUCT_V3F32_V3F32]] [[TMP5]]
+; CHECK-NEXT: [[TMP1:%.*]] = insertvalue [[MULTI_RETURN_BITCAST_STRUCT_V3F32_V3F32]] undef, <4 x float> [[LOAD]], 0
+; CHECK-NEXT: ret [[MULTI_RETURN_BITCAST_STRUCT_V3F32_V3F32]] [[TMP1]]
;
;
; CHECK-LABEL: define {{[^@]+}}@multi_return_bitcast_struct_v3f32_v3f32
; CHECK-SAME: (i1 [[TMP0:%.*]], %struct.v3f32* [[TMP1:%.*]], <3 x float> [[TMP2:%.*]]) #[[ATTR2]] {
; CHECK-NEXT: [[TMP4:%.*]] = call [[MULTI_RETURN_BITCAST_STRUCT_V3F32_V3F32:%.*]] @multi_return_bitcast_struct_v3f32_v3f32.body(i1 [[TMP0]], %struct.v3f32* undef, <3 x float> [[TMP2]])
; CHECK-NEXT: [[TMP5:%.*]] = extractvalue [[MULTI_RETURN_BITCAST_STRUCT_V3F32_V3F32]] [[TMP4]], 0
-; CHECK-NEXT: store [[STRUCT_V3F32:%.*]] [[TMP5]], %struct.v3f32* [[TMP1]], align 16
+; CHECK-NEXT: [[TMP6:%.*]] = bitcast %struct.v3f32* [[TMP1]] to <4 x float>*
+; CHECK-NEXT: store <4 x float> [[TMP5]], <4 x float>* [[TMP6]], align 16
; CHECK-NEXT: ret void
;
;
-; CHECK-LABEL: define {{[^@]+}}@bitcast_v3f32_struct_v3f32
+; CHECK-LABEL: define {{[^@]+}}@bitcast_v3f32_struct_v3f32.body
; CHECK-SAME: (<3 x float>* [[OUT:%.*]], [[STRUCT_V3F32:%.*]] [[VALUE:%.*]]) #[[ATTR0]] {
; CHECK-NEXT: [[CAST:%.*]] = bitcast <3 x float>* [[OUT]] to %struct.v3f32*
-; CHECK-NEXT: store [[STRUCT_V3F32]] [[VALUE]], %struct.v3f32* [[CAST]], align 4
+; CHECK-NEXT: [[TMP1:%.*]] = insertvalue [[BITCAST_V3F32_STRUCT_V3F32:%.*]] undef, [[STRUCT_V3F32]] [[VALUE]], 0
+; CHECK-NEXT: ret [[BITCAST_V3F32_STRUCT_V3F32]] [[TMP1]]
+;
+;
+; CHECK-LABEL: define {{[^@]+}}@bitcast_v3f32_struct_v3f32
+; CHECK-SAME: (<3 x float>* [[TMP0:%.*]], [[STRUCT_V3F32:%.*]] [[TMP1:%.*]]) #[[ATTR2]] {
+; CHECK-NEXT: [[TMP3:%.*]] = call [[BITCAST_V3F32_STRUCT_V3F32:%.*]] @bitcast_v3f32_struct_v3f32.body(<3 x float>* undef, [[STRUCT_V3F32]] [[TMP1]])
+; CHECK-NEXT: [[TMP4:%.*]] = extractvalue [[BITCAST_V3F32_STRUCT_V3F32]] [[TMP3]], 0
+; CHECK-NEXT: [[TMP5:%.*]] = bitcast <3 x float>* [[TMP0]] to %struct.v3f32*
+; CHECK-NEXT: store [[STRUCT_V3F32]] [[TMP4]], %struct.v3f32* [[TMP5]], align 16
; CHECK-NEXT: ret void
;