return inBoundsCondition;
}
+// TODO: Parallelism and threadlocal considerations.
+static Value setAllocAtFunctionEntry(MemRefType memRefMinorVectorType,
+ Operation *op) {
+ auto &b = ScopedContext::getBuilderRef();
+ OpBuilder::InsertionGuard guard(b);
+ b.setInsertionPointToStart(&op->getParentOfType<FuncOp>().front());
+ Value res =
+ std_alloca(memRefMinorVectorType, ValueRange{}, b.getI64IntegerAttr(128));
+ return res;
+}
+
template <>
LogicalResult NDTransferOpHelper<TransferReadOp>::doReplace() {
Value alloc, result;
if (options.unroll)
result = std_splat(vectorType, xferOp.padding());
else
- alloc = std_alloc(memRefMinorVectorType);
+ alloc = setAllocAtFunctionEntry(memRefMinorVectorType, op);
emitLoops([&](ValueRange majorIvs, ValueRange leadingOffsets,
ValueRange majorOffsets, ValueRange minorOffsets,
LogicalResult NDTransferOpHelper<TransferWriteOp>::doReplace() {
Value alloc;
if (!options.unroll) {
- alloc = std_alloc(memRefMinorVectorType);
+ alloc = setAllocAtFunctionEntry(memRefMinorVectorType, op);
std_store(xferOp.vector(),
vector_type_cast(MemRefType::get({}, vectorType), alloc));
}
%f7 = constant 7.0: f32
// CHECK-DAG: %[[splat:.*]] = constant dense<7.000000e+00> : vector<15xf32>
- // CHECK-DAG: %[[alloc:.*]] = alloc() : memref<3xvector<15xf32>>
+ // CHECK-DAG: %[[alloc:.*]] = alloca() {alignment = 128 : i64} : memref<3xvector<15xf32>>
// CHECK-DAG: %[[dim:.*]] = dim %[[A]], 0 : memref<?x?xf32>
// CHECK: affine.for %[[I:.*]] = 0 to 3 {
// CHECK: %[[add:.*]] = affine.apply #[[MAP0]](%[[I]])[%[[base]]]
// FULL-UNROLL-SAME: %[[base:[a-zA-Z0-9]+]]: index,
// FULL-UNROLL-SAME: %[[vec:[a-zA-Z0-9]+]]: vector<3x15xf32>
func @transfer_write_progressive(%A : memref<?x?xf32>, %base: index, %vec: vector<3x15xf32>) {
- // CHECK: %[[alloc:.*]] = alloc() : memref<3xvector<15xf32>>
+ // CHECK: %[[alloc:.*]] = alloca() {alignment = 128 : i64} : memref<3xvector<15xf32>>
// CHECK: %[[vmemref:.*]] = vector.type_cast %[[alloc]] : memref<3xvector<15xf32>> to memref<vector<3x15xf32>>
// CHECK: store %[[vec]], %[[vmemref]][] : memref<vector<3x15xf32>>
// CHECK: %[[dim:.*]] = dim %[[A]], 0 : memref<?x?xf32>
// FULL-UNROLL-SAME: %[[vec:[a-zA-Z0-9]+]]: vector<3x15xf32>
func @transfer_write_progressive_not_masked(%A : memref<?x?xf32>, %base: index, %vec: vector<3x15xf32>) {
// CHECK-NOT: scf.if
- // CHECK-NEXT: %[[alloc:.*]] = alloc() : memref<3xvector<15xf32>>
+ // CHECK-NEXT: %[[alloc:.*]] = alloca() {alignment = 128 : i64} : memref<3xvector<15xf32>>
// CHECK-NEXT: %[[vmemref:.*]] = vector.type_cast %[[alloc]] : memref<3xvector<15xf32>> to memref<vector<3x15xf32>>
// CHECK-NEXT: store %[[vec]], %[[vmemref]][] : memref<vector<3x15xf32>>
// CHECK-NEXT: affine.for %[[I:.*]] = 0 to 3 {