if (auto *BinOp = dyn_cast<BinaryOperator>(Inst))
Changed |= VisitBinaryOperator(BinOp);
if (match(Inst, m_Load(m_Value(Op1))))
- Changed |= VisitLoad(Inst, Op1, Builder);
+ Changed |= VisitLoad(cast<LoadInst>(Inst), Op1, Builder);
else if (match(Inst, m_Store(m_Value(Op1), m_Value(Op2))))
- Changed |= VisitStore(Inst, Op1, Op2, Builder);
+ Changed |= VisitStore(cast<StoreInst>(Inst), Op1, Op2, Builder);
}
RemarkGenerator RemarkGen(Inst2ColumnMatrix, ORE, Func);
return Changed;
}
- LoadInst *createVectorLoad(Value *ColumnPtr, Type *EltType,
+ LoadInst *createVectorLoad(Value *ColumnPtr, Type *EltType, bool IsVolatile,
IRBuilder<> &Builder) {
- return Builder.CreateAlignedLoad(
- ColumnPtr, Align(DL.getABITypeAlignment(EltType)), "col.load");
+ return Builder.CreateAlignedLoad(ColumnPtr,
+ Align(DL.getABITypeAlignment(EltType)),
+ IsVolatile, "col.load");
}
StoreInst *createVectorStore(Value *ColumnValue, Value *ColumnPtr,
- Type *EltType, IRBuilder<> &Builder) {
+ Type *EltType, bool IsVolatile,
+ IRBuilder<> &Builder) {
return Builder.CreateAlignedStore(ColumnValue, ColumnPtr,
- DL.getABITypeAlign(EltType));
+ DL.getABITypeAlign(EltType), IsVolatile);
}
/// Turns \p BasePtr into an elementwise pointer to \p EltType.
/// Load a matrix with \p Shape starting at \p Ptr and using \p Stride between
/// vectors.
- MatrixTy loadMatrix(Type *Ty, Value *Ptr, Value *Stride, ShapeInfo Shape,
- IRBuilder<> &Builder) {
+ MatrixTy loadMatrix(Type *Ty, Value *Ptr, Value *Stride, bool IsVolatile,
+ ShapeInfo Shape, IRBuilder<> &Builder) {
auto VType = cast<VectorType>(Ty);
Value *EltPtr = createElementPtr(Ptr, VType->getElementType(), Builder);
MatrixTy Result;
Value *GEP = computeVectorAddr(EltPtr, Builder.getInt64(I), Stride,
Shape.getStride(), VType->getElementType(),
Builder);
- Value *Vector = createVectorLoad(GEP, VType->getElementType(), Builder);
+ Value *Vector =
+ createVectorLoad(GEP, VType->getElementType(), IsVolatile, Builder);
Result.addVector(Vector);
}
return Result.addNumLoads(getNumOps(Result.getVectorTy()) *
/// Loads a sub-matrix with shape \p ResultShape from a \p R x \p C matrix,
/// starting at \p MatrixPtr[I][J].
- MatrixTy loadMatrix(Value *MatrixPtr, ShapeInfo MatrixShape, Value *I,
- Value *J, ShapeInfo ResultShape, Type *EltTy,
+ MatrixTy loadMatrix(Value *MatrixPtr, bool IsVolatile, ShapeInfo MatrixShape,
+ Value *I, Value *J, ShapeInfo ResultShape, Type *EltTy,
IRBuilder<> &Builder) {
Value *Offset = Builder.CreateAdd(
Builder.CreatePointerCast(TileStart, TilePtrTy, "col.cast");
return loadMatrix(TileTy, TilePtr,
- Builder.getInt64(MatrixShape.getStride()), ResultShape,
- Builder);
+ Builder.getInt64(MatrixShape.getStride()), IsVolatile,
+ ResultShape, Builder);
}
/// Lower a load instruction with shape information.
- void LowerLoad(Instruction *Inst, Value *Ptr, Value *Stride,
+ void LowerLoad(Instruction *Inst, Value *Ptr, Value *Stride, bool IsVolatile,
ShapeInfo Shape) {
IRBuilder<> Builder(Inst);
- finalizeLowering(Inst,
- loadMatrix(Inst->getType(), Ptr, Stride, Shape, Builder),
- Builder);
+ finalizeLowering(
+ Inst,
+ loadMatrix(Inst->getType(), Ptr, Stride, IsVolatile, Shape, Builder),
+ Builder);
}
/// Lowers llvm.matrix.column.major.load.
Value *Ptr = Inst->getArgOperand(0);
Value *Stride = Inst->getArgOperand(1);
LowerLoad(Inst, Ptr, Stride,
+ cast<ConstantInt>(Inst->getArgOperand(2))->isOne(),
{Inst->getArgOperand(3), Inst->getArgOperand(4)});
}
/// Stores a sub-matrix \p StoreVal into the \p R x \p C matrix starting at \p
/// MatrixPtr[I][J].
- void storeMatrix(const MatrixTy &StoreVal, Value *MatrixPtr,
+ void storeMatrix(const MatrixTy &StoreVal, Value *MatrixPtr, bool IsVolatile,
ShapeInfo MatrixShape, Value *I, Value *J, Type *EltTy,
IRBuilder<> &Builder) {
Value *Offset = Builder.CreateAdd(
Builder.CreatePointerCast(TileStart, TilePtrTy, "col.cast");
storeMatrix(TileTy, StoreVal, TilePtr,
- Builder.getInt64(MatrixShape.getStride()), Builder);
+ Builder.getInt64(MatrixShape.getStride()), IsVolatile, Builder);
}
/// Store matrix \p StoreVal starting at \p Ptr and using \p Stride between
/// vectors.
MatrixTy storeMatrix(Type *Ty, MatrixTy StoreVal, Value *Ptr, Value *Stride,
- IRBuilder<> &Builder) {
+ bool IsVolatile, IRBuilder<> &Builder) {
auto VType = cast<VectorType>(Ty);
Value *EltPtr = createElementPtr(Ptr, VType->getElementType(), Builder);
for (auto Vec : enumerate(StoreVal.vectors())) {
Value *GEP = computeVectorAddr(EltPtr, Builder.getInt64(Vec.index()),
Stride, StoreVal.getStride(),
VType->getElementType(), Builder);
- createVectorStore(Vec.value(), GEP, VType->getElementType(), Builder);
+ createVectorStore(Vec.value(), GEP, VType->getElementType(), IsVolatile,
+ Builder);
}
return MatrixTy().addNumStores(getNumOps(StoreVal.getVectorTy()) *
StoreVal.getNumVectors());
/// Lower a store instruction with shape information.
void LowerStore(Instruction *Inst, Value *Matrix, Value *Ptr, Value *Stride,
- ShapeInfo Shape) {
+ bool IsVolatile, ShapeInfo Shape) {
IRBuilder<> Builder(Inst);
auto StoreVal = getMatrix(Matrix, Shape, Builder);
- finalizeLowering(
- Inst, storeMatrix(Matrix->getType(), StoreVal, Ptr, Stride, Builder),
- Builder);
+ finalizeLowering(Inst,
+ storeMatrix(Matrix->getType(), StoreVal, Ptr, Stride,
+ IsVolatile, Builder),
+ Builder);
}
/// Lowers llvm.matrix.column.major.store.
Value *Ptr = Inst->getArgOperand(1);
Value *Stride = Inst->getArgOperand(2);
LowerStore(Inst, Matrix, Ptr, Stride,
+ cast<ConstantInt>(Inst->getArgOperand(3))->isOne(),
{Inst->getArgOperand(4), Inst->getArgOperand(5)});
}
for (unsigned K = 0; K < M; K += TileSize) {
const unsigned TileM = std::min(M - K, unsigned(TileSize));
- MatrixTy A =
- loadMatrix(APtr, LShape, Builder.getInt64(I), Builder.getInt64(K),
- {TileR, TileM}, EltType, Builder);
- MatrixTy B =
- loadMatrix(BPtr, RShape, Builder.getInt64(K), Builder.getInt64(J),
- {TileM, TileC}, EltType, Builder);
+ MatrixTy A = loadMatrix(APtr, LoadOp0->isVolatile(), LShape,
+ Builder.getInt64(I), Builder.getInt64(K),
+ {TileR, TileM}, EltType, Builder);
+ MatrixTy B = loadMatrix(BPtr, LoadOp1->isVolatile(), RShape,
+ Builder.getInt64(K), Builder.getInt64(J),
+ {TileM, TileC}, EltType, Builder);
emitMatrixMultiply(Res, A, B, AllowContract, Builder, true);
}
- storeMatrix(Res, CPtr, {R, M}, Builder.getInt64(I), Builder.getInt64(J),
- EltType, Builder);
+ storeMatrix(Res, CPtr, Store->isVolatile(), {R, M}, Builder.getInt64(I),
+ Builder.getInt64(J), EltType, Builder);
}
// Mark eliminated instructions as fused and remove them.
}
/// Lower load instructions, if shape information is available.
- bool VisitLoad(Instruction *Inst, Value *Ptr, IRBuilder<> &Builder) {
+ bool VisitLoad(LoadInst *Inst, Value *Ptr, IRBuilder<> &Builder) {
auto I = ShapeMap.find(Inst);
if (I == ShapeMap.end())
return false;
- LowerLoad(Inst, Ptr, Builder.getInt64(I->second.getStride()), I->second);
+ LowerLoad(Inst, Ptr, Builder.getInt64(I->second.getStride()),
+ Inst->isVolatile(), I->second);
return true;
}
- bool VisitStore(Instruction *Inst, Value *StoredVal, Value *Ptr,
+ bool VisitStore(StoreInst *Inst, Value *StoredVal, Value *Ptr,
IRBuilder<> &Builder) {
auto I = ShapeMap.find(StoredVal);
if (I == ShapeMap.end())
return false;
LowerStore(Inst, StoredVal, Ptr, Builder.getInt64(I->second.getStride()),
- I->second);
+ Inst->isVolatile(), I->second);
return true;
}
; CHECK-NEXT: [[COL_CAST:%.*]] = bitcast double* [[TMP1]] to <4 x double>*
; CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x double>* [[COL_CAST]] to double*
; CHECK-NEXT: [[VEC_CAST:%.*]] = bitcast double* [[TMP2]] to <2 x double>*
-; CHECK-NEXT: load <2 x double>, <2 x double>* [[VEC_CAST]], align 8
+; CHECK-NEXT: load volatile <2 x double>, <2 x double>* [[VEC_CAST]], align 8
; CHECK-NEXT: [[VEC_GEP:%.*]] = getelementptr double, double* [[TMP2]], i64 2
; CHECK-NEXT: [[VEC_CAST1:%.*]] = bitcast double* [[VEC_GEP]] to <2 x double>*
-; CHECK-NEXT: load <2 x double>, <2 x double>* [[VEC_CAST1]], align 8
+; CHECK-NEXT: load volatile <2 x double>, <2 x double>* [[VEC_CAST1]], align 8
; CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x double>* [[B:%.*]] to double*
; CHECK-NEXT: [[TMP4:%.*]] = getelementptr double, double* [[TMP3]], i64 0
; CHECK-NEXT: [[COL_CAST3:%.*]] = bitcast double* [[TMP4]] to <4 x double>*
; CHECK-NEXT: [[TMP5:%.*]] = bitcast <4 x double>* [[COL_CAST3]] to double*
; CHECK-NEXT: [[VEC_CAST4:%.*]] = bitcast double* [[TMP5]] to <2 x double>*
-; CHECK-NEXT: load <2 x double>, <2 x double>* [[VEC_CAST4]], align 8
+; CHECK-NEXT: load volatile <2 x double>, <2 x double>* [[VEC_CAST4]], align 8
; CHECK-NEXT: [[VEC_GEP6:%.*]] = getelementptr double, double* [[TMP5]], i64 2
; CHECK-NEXT: [[VEC_CAST7:%.*]] = bitcast double* [[VEC_GEP6]] to <2 x double>*
-; CHECK-NEXT: load <2 x double>, <2 x double>* [[VEC_CAST7]], align 8
+; CHECK-NEXT: load volatile <2 x double>, <2 x double>* [[VEC_CAST7]], align 8
; CHECK: [[TMP18:%.*]] = bitcast <4 x double>* [[C:%.*]] to double*
; CHECK-NEXT: [[TMP19:%.*]] = getelementptr double, double* [[TMP18]], i64 0
; CHECK-NEXT: [[COL_CAST18:%.*]] = bitcast double* [[TMP19]] to <4 x double>*
; CHECK-NEXT: [[TMP20:%.*]] = bitcast <4 x double>* [[COL_CAST18]] to double*
; CHECK-NEXT: [[VEC_CAST19:%.*]] = bitcast double* [[TMP20]] to <2 x double>*
-; CHECK-NEXT: store <2 x double> {{.*}}, <2 x double>* [[VEC_CAST19]], align 8
+; CHECK-NEXT: store volatile <2 x double> {{.*}}, <2 x double>* [[VEC_CAST19]], align 8
; CHECK-NEXT: [[VEC_GEP20:%.*]] = getelementptr double, double* [[TMP20]], i64 2
; CHECK-NEXT: [[VEC_CAST21:%.*]] = bitcast double* [[VEC_GEP20]] to <2 x double>*
-; CHECK-NEXT: store <2 x double> {{.*}}, <2 x double>* [[VEC_CAST21]], align 8
+; CHECK-NEXT: store volatile <2 x double> {{.*}}, <2 x double>* [[VEC_CAST21]], align 8
; CHECK-NEXT: ret void
;
; CHECK-NEXT: [[COL_CAST:%.*]] = bitcast double* [[TMP1]] to <4 x double>*
; CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x double>* [[COL_CAST]] to double*
; CHECK-NEXT: [[VEC_CAST:%.*]] = bitcast double* [[TMP2]] to <2 x double>*
-; CHECK-NEXT: load <2 x double>, <2 x double>* [[VEC_CAST]], align 8
+; CHECK-NEXT: load volatile <2 x double>, <2 x double>* [[VEC_CAST]], align 8
; CHECK-NEXT: [[VEC_GEP:%.*]] = getelementptr double, double* [[TMP2]], i64 2
; CHECK-NEXT: [[VEC_CAST1:%.*]] = bitcast double* [[VEC_GEP]] to <2 x double>*
-; CHECK-NEXT: load <2 x double>, <2 x double>* [[VEC_CAST1]], align 8
+; CHECK-NEXT: load volatile <2 x double>, <2 x double>* [[VEC_CAST1]], align 8
; CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x double>* [[B:%.*]] to double*
; CHECK-NEXT: [[TMP4:%.*]] = getelementptr double, double* [[TMP3]], i64 0
; CHECK-NEXT: [[COL_CAST3:%.*]] = bitcast double* [[TMP4]] to <4 x double>*
; CHECK-NEXT: [[COL_CAST3:%.*]] = bitcast double* [[TMP4]] to <4 x double>*
; CHECK-NEXT: [[TMP5:%.*]] = bitcast <4 x double>* [[COL_CAST3]] to double*
; CHECK-NEXT: [[VEC_CAST4:%.*]] = bitcast double* [[TMP5]] to <2 x double>*
-; CHECK-NEXT: load <2 x double>, <2 x double>* [[VEC_CAST4]], align 8
+; CHECK-NEXT: load volatile <2 x double>, <2 x double>* [[VEC_CAST4]], align 8
; CHECK-NEXT: [[VEC_GEP6:%.*]] = getelementptr double, double* [[TMP5]], i64 2
; CHECK-NEXT: [[VEC_CAST7:%.*]] = bitcast double* [[VEC_GEP6]] to <2 x double>*
-; CHECK-NEXT: load <2 x double>, <2 x double>* [[VEC_CAST7]], align 8
+; CHECK-NEXT: load volatile <2 x double>, <2 x double>* [[VEC_CAST7]], align 8
; CHECK: [[TMP18:%.*]] = bitcast <4 x double>* [[C:%.*]] to double*
; CHECK-NEXT: [[TMP19:%.*]] = getelementptr double, double* [[TMP18]], i64 0
; CHECK-NEXT: [[COL_CAST18:%.*]] = bitcast double* [[TMP19]] to <4 x double>*
; CHECK-NEXT: [[TMP20:%.*]] = bitcast <4 x double>* [[COL_CAST18]] to double*
; CHECK-NEXT: [[VEC_CAST19:%.*]] = bitcast double* [[TMP20]] to <2 x double>*
-; CHECK-NEXT: store <2 x double> {{.*}}, <2 x double>* [[VEC_CAST19]], align 8
+; CHECK-NEXT: store volatile <2 x double> {{.*}}, <2 x double>* [[VEC_CAST19]], align 8
; CHECK-NEXT: [[VEC_GEP20:%.*]] = getelementptr double, double* [[TMP20]], i64 2
; CHECK-NEXT: [[VEC_CAST21:%.*]] = bitcast double* [[VEC_GEP20]] to <2 x double>*
-; CHECK-NEXT: store <2 x double> {{.*}}, <2 x double>* [[VEC_CAST21]], align 8
+; CHECK-NEXT: store volatile <2 x double> {{.*}}, <2 x double>* [[VEC_CAST21]], align 8
; CHECK-NEXT: ret void
;
entry:
; CHECK-NEXT: [[SPLIT:%.*]] = shufflevector <6 x i32> [[IN:%.*]], <6 x i32> undef, <3 x i32> <i32 0, i32 1, i32 2>
; CHECK-NEXT: [[SPLIT1:%.*]] = shufflevector <6 x i32> [[IN]], <6 x i32> undef, <3 x i32> <i32 3, i32 4, i32 5>
; CHECK-NEXT: [[VEC_CAST:%.*]] = bitcast i32* [[OUT:%.*]] to <3 x i32>*
-; CHECK-NEXT: store <3 x i32> [[SPLIT]], <3 x i32>* [[VEC_CAST]], align 4
+; CHECK-NEXT: store volatile <3 x i32> [[SPLIT]], <3 x i32>* [[VEC_CAST]], align 4
; CHECK-NEXT: [[VEC_GEP:%.*]] = getelementptr i32, i32* [[OUT]], i64 5
; CHECK-NEXT: [[VEC_CAST2:%.*]] = bitcast i32* [[VEC_GEP]] to <3 x i32>*
-; CHECK-NEXT: store <3 x i32> [[SPLIT1]], <3 x i32>* [[VEC_CAST2]], align 4
+; CHECK-NEXT: store volatile <3 x i32> [[SPLIT1]], <3 x i32>* [[VEC_CAST2]], align 4
; CHECK-NEXT: ret void
;
call void @llvm.matrix.column.major.store(<6 x i32> %in, i32* %out, i64 5, i1 true, i32 3, i32 2)
; CHECK-LABEL: @multiply_store_volatile(
; CHECK: [[TMP29:%.*]] = bitcast <4 x i32>* %out to i32*
; CHECK-NEXT: [[VEC_CAST:%.*]] = bitcast i32* [[TMP29]] to <2 x i32>*
-; CHECK-NEXT: store <2 x i32> {{.*}}, <2 x i32>* [[VEC_CAST]], align 4
+; CHECK-NEXT: store volatile <2 x i32> {{.*}}, <2 x i32>* [[VEC_CAST]], align 4
; CHECK-NEXT: [[VEC_GEP:%.*]] = getelementptr i32, i32* [[TMP29]], i64 2
; CHECK-NEXT: [[VEC_CAST25:%.*]] = bitcast i32* [[VEC_GEP]] to <2 x i32>*
-; CHECK-NEXT: store <2 x i32> {{.*}}, <2 x i32>* [[VEC_CAST25]], align 4
+; CHECK-NEXT: store volatile <2 x i32> {{.*}}, <2 x i32>* [[VEC_CAST25]], align 4
; CHECK-NEXT: ret void
;
%res = call <4 x i32> @llvm.matrix.multiply(<4 x i32> %in, <4 x i32> %in, i32 2, i32 2, i32 2)
; CHECK-NEXT: [[VEC_START:%.*]] = mul i64 0, [[STRIDE:%.*]]
; CHECK-NEXT: [[VEC_GEP:%.*]] = getelementptr i32, i32* [[OUT:%.*]], i64 [[VEC_START]]
; CHECK-NEXT: [[VEC_CAST:%.*]] = bitcast i32* [[VEC_GEP]] to <3 x i32>*
-; CHECK-NEXT: store <3 x i32> [[SPLIT]], <3 x i32>* [[VEC_CAST]], align 4
+; CHECK-NEXT: store volatile <3 x i32> [[SPLIT]], <3 x i32>* [[VEC_CAST]], align 4
; CHECK-NEXT: [[VEC_START2:%.*]] = mul i64 1, [[STRIDE]]
; CHECK-NEXT: [[VEC_GEP3:%.*]] = getelementptr i32, i32* [[OUT]], i64 [[VEC_START2]]
; CHECK-NEXT: [[VEC_CAST4:%.*]] = bitcast i32* [[VEC_GEP3]] to <3 x i32>*
-; CHECK-NEXT: store <3 x i32> [[SPLIT1]], <3 x i32>* [[VEC_CAST4]], align 4
+; CHECK-NEXT: store volatile <3 x i32> [[SPLIT1]], <3 x i32>* [[VEC_CAST4]], align 4
; CHECK-NEXT: ret void
;
call void @llvm.matrix.column.major.store(<6 x i32> %in, i32* align 32 %out, i64 %stride, i1 true, i32 3, i32 2)
; CHECK-NEXT: [[VEC_START:%.*]] = mul i64 0, [[STRIDE:%.*]]
; CHECK-NEXT: [[VEC_GEP:%.*]] = getelementptr i32, i32* [[OUT:%.*]], i64 [[VEC_START]]
; CHECK-NEXT: [[VEC_CAST:%.*]] = bitcast i32* [[VEC_GEP]] to <3 x i32>*
-; CHECK-NEXT: store <3 x i32> [[SPLIT]], <3 x i32>* [[VEC_CAST]], align 4
+; CHECK-NEXT: store volatile <3 x i32> [[SPLIT]], <3 x i32>* [[VEC_CAST]], align 4
; CHECK-NEXT: [[VEC_START2:%.*]] = mul i64 1, [[STRIDE]]
; CHECK-NEXT: [[VEC_GEP3:%.*]] = getelementptr i32, i32* [[OUT]], i64 [[VEC_START2]]
; CHECK-NEXT: [[VEC_CAST4:%.*]] = bitcast i32* [[VEC_GEP3]] to <3 x i32>*
-; CHECK-NEXT: store <3 x i32> [[SPLIT1]], <3 x i32>* [[VEC_CAST4]], align 4
+; CHECK-NEXT: store volatile <3 x i32> [[SPLIT1]], <3 x i32>* [[VEC_CAST4]], align 4
; CHECK-NEXT: ret void
;
call void @llvm.matrix.column.major.store(<6 x i32> %in, i32* align 2 %out, i64 %stride, i1 true, i32 3, i32 2)