From 55014813e33a3ce8757e899dc9d39f5d394798ec Mon Sep 17 00:00:00 2001 From: Andy Davis Date: Thu, 4 Apr 2019 10:35:51 -0700 Subject: [PATCH] Adds dependence analysis support for iteration domains with local variables (enables dependence analysis of loops with non-unit step). -- PiperOrigin-RevId: 241957023 --- mlir/lib/Analysis/AffineAnalysis.cpp | 105 ++++++++++++---------- mlir/lib/Analysis/AffineStructures.cpp | 3 + mlir/test/Transforms/memref-dependence-check.mlir | 68 ++++++++++++++ 3 files changed, 127 insertions(+), 49 deletions(-) diff --git a/mlir/lib/Analysis/AffineAnalysis.cpp b/mlir/lib/Analysis/AffineAnalysis.cpp index 9fac3c8..c7d992c 100644 --- a/mlir/lib/Analysis/AffineAnalysis.cpp +++ b/mlir/lib/Analysis/AffineAnalysis.cpp @@ -234,9 +234,8 @@ static void buildDimAndSymbolPositionMaps( }; SmallVector srcValues, destValues; - srcDomain.getAllIdValues(&srcValues); - dstDomain.getAllIdValues(&destValues); - + srcDomain.getIdValues(0, srcDomain.getNumDimAndSymbolIds(), &srcValues); + dstDomain.getIdValues(0, dstDomain.getNumDimAndSymbolIds(), &destValues); // Update value position map with identifiers from src iteration domain. updateValuePosMap(srcValues, /*isSrc=*/true); // Update value position map with identifiers from dst iteration domain. @@ -248,7 +247,7 @@ static void buildDimAndSymbolPositionMaps( } // Sets up dependence constraints columns appropriately, in the format: -// [src-dim-identifiers, dst-dim-identifiers, symbol-identifiers, const_term] +// [src-dim-ids, dst-dim-ids, symbol-ids, local-ids, const_term] void initDependenceConstraints(const FlatAffineConstraints &srcDomain, const FlatAffineConstraints &dstDomain, const AffineValueMap &srcAccessMap, @@ -264,12 +263,13 @@ void initDependenceConstraints(const FlatAffineConstraints &srcDomain, unsigned numEq = srcMap.getNumResults(); unsigned numDims = srcDomain.getNumDimIds() + dstDomain.getNumDimIds(); unsigned numSymbols = valuePosMap.getNumSymbols(); - unsigned numIds = numDims + numSymbols; + unsigned numLocals = srcDomain.getNumLocalIds() + dstDomain.getNumLocalIds(); + unsigned numIds = numDims + numSymbols + numLocals; unsigned numCols = numIds + 1; // Set flat affine constraints sizes and reserving space for constraints. dependenceConstraints->reset(numIneq, numEq, numCols, numDims, numSymbols, - /*numLocals=*/0); + numLocals); // Set values corresponding to dependence constraint identifiers. SmallVector srcLoopIVs, dstLoopIVs; @@ -314,43 +314,53 @@ static void addDomainConstraints(const FlatAffineConstraints &srcDomain, const FlatAffineConstraints &dstDomain, const ValuePositionMap &valuePosMap, FlatAffineConstraints *dependenceDomain) { - unsigned srcNumIneq = srcDomain.getNumInequalities(); - unsigned srcNumDims = srcDomain.getNumDimIds(); - unsigned srcNumSymbols = srcDomain.getNumSymbolIds(); - unsigned srcNumIds = srcNumDims + srcNumSymbols; - - unsigned dstNumIneq = dstDomain.getNumInequalities(); - unsigned dstNumDims = dstDomain.getNumDimIds(); - unsigned dstNumSymbols = dstDomain.getNumSymbolIds(); - unsigned dstNumIds = dstNumDims + dstNumSymbols; + unsigned depNumDimsAndSymbolIds = dependenceDomain->getNumDimAndSymbolIds(); + + SmallVector cst(dependenceDomain->getNumCols()); + + auto addDomain = [&](bool isSrc, bool isEq, unsigned localOffset) { + const FlatAffineConstraints &domain = isSrc ? srcDomain : dstDomain; + unsigned numCsts = + isEq ? domain.getNumEqualities() : domain.getNumInequalities(); + unsigned numDimAndSymbolIds = domain.getNumDimAndSymbolIds(); + auto at = [&](unsigned i, unsigned j) -> int64_t { + return isEq ? domain.atEq(i, j) : domain.atIneq(i, j); + }; + auto map = [&](unsigned i) -> int64_t { + return isSrc ? valuePosMap.getSrcDimOrSymPos(domain.getIdValue(i)) + : valuePosMap.getDstDimOrSymPos(domain.getIdValue(i)); + }; + + for (unsigned i = 0; i < numCsts; ++i) { + // Zero fill. + std::fill(cst.begin(), cst.end(), 0); + // Set coefficients for identifiers corresponding to domain. + for (unsigned j = 0; j < numDimAndSymbolIds; ++j) + cst[map(j)] = at(i, j); + // Local terms. + for (unsigned j = 0, e = domain.getNumLocalIds(); j < e; j++) + cst[depNumDimsAndSymbolIds + localOffset + j] = + at(i, numDimAndSymbolIds + j); + // Set constant term. + cst[cst.size() - 1] = at(i, domain.getNumCols() - 1); + // Add constraint. + if (isEq) + dependenceDomain->addEquality(cst); + else + dependenceDomain->addInequality(cst); + } + }; - SmallVector ineq(dependenceDomain->getNumCols()); + // Add equalities from src domain. + addDomain(/*isSrc=*/true, /*isEq=*/true, /*localOffset=*/0); // Add inequalities from src domain. - for (unsigned i = 0; i < srcNumIneq; ++i) { - // Zero fill. - std::fill(ineq.begin(), ineq.end(), 0); - // Set coefficients for identifiers corresponding to src domain. - for (unsigned j = 0; j < srcNumIds; ++j) - ineq[valuePosMap.getSrcDimOrSymPos(srcDomain.getIdValue(j))] = - srcDomain.atIneq(i, j); - // Set constant term. - ineq[ineq.size() - 1] = srcDomain.atIneq(i, srcNumIds); - // Add inequality constraint. - dependenceDomain->addInequality(ineq); - } + addDomain(/*isSrc=*/true, /*isEq=*/false, /*localOffset=*/0); + // Add equalities from dst domain. + addDomain(/*isSrc=*/false, /*isEq=*/true, + /*localOffset=*/srcDomain.getNumLocalIds()); // Add inequalities from dst domain. - for (unsigned i = 0; i < dstNumIneq; ++i) { - // Zero fill. - std::fill(ineq.begin(), ineq.end(), 0); - // Set coefficients for identifiers corresponding to dst domain. - for (unsigned j = 0; j < dstNumIds; ++j) - ineq[valuePosMap.getDstDimOrSymPos(dstDomain.getIdValue(j))] = - dstDomain.atIneq(i, j); - // Set constant term. - ineq[ineq.size() - 1] = dstDomain.atIneq(i, dstNumIds); - // Add inequality constraint. - dependenceDomain->addInequality(ineq); - } + addDomain(/*isSrc=*/false, /*isEq=*/false, + /*localOffset=*/srcDomain.getNumLocalIds()); } // Adds equality constraints that equate src and dst access functions @@ -376,15 +386,11 @@ static void addDomainConstraints(const FlatAffineConstraints &srcDomain, // // Returns failure if any AffineExpr cannot be flattened (due to it being // semi-affine). Returns success otherwise. -// TODO(bondhugula): assumes that dependenceDomain doesn't have local -// variables already. Fix this soon. static LogicalResult addMemRefAccessConstraints(const AffineValueMap &srcAccessMap, const AffineValueMap &dstAccessMap, const ValuePositionMap &valuePosMap, FlatAffineConstraints *dependenceDomain) { - if (dependenceDomain->getNumLocalIds() != 0) - return failure(); AffineMap srcMap = srcAccessMap.getAffineMap(); AffineMap dstMap = dstAccessMap.getAffineMap(); assert(srcMap.getNumResults() == dstMap.getNumResults()); @@ -404,6 +410,7 @@ addMemRefAccessConstraints(const AffineValueMap &srcAccessMap, failed(getFlattenedAffineExprs(dstMap, &destFlatExprs, &destLocalVarCst))) return failure(); + unsigned domNumLocalIds = dependenceDomain->getNumLocalIds(); unsigned srcNumLocalIds = srcLocalVarCst.getNumLocalIds(); unsigned dstNumLocalIds = destLocalVarCst.getNumLocalIds(); unsigned numLocalIdsToAdd = srcNumLocalIds + dstNumLocalIds; @@ -414,6 +421,7 @@ addMemRefAccessConstraints(const AffineValueMap &srcAccessMap, unsigned numDims = dependenceDomain->getNumDimIds(); unsigned numSymbols = dependenceDomain->getNumSymbolIds(); unsigned numSrcLocalIds = srcLocalVarCst.getNumLocalIds(); + unsigned newLocalIdOffset = numDims + numSymbols + domNumLocalIds; // Equality to add. SmallVector eq(dependenceDomain->getNumCols()); @@ -428,7 +436,7 @@ addMemRefAccessConstraints(const AffineValueMap &srcAccessMap, eq[valuePosMap.getSrcDimOrSymPos(srcOperands[j])] = srcFlatExpr[j]; // Local terms. for (unsigned j = 0, e = srcNumLocalIds; j < e; j++) - eq[numDims + numSymbols + j] = srcFlatExpr[srcNumIds + j]; + eq[newLocalIdOffset + j] = srcFlatExpr[srcNumIds + j]; // Set constant term. eq[eq.size() - 1] = srcFlatExpr[srcFlatExpr.size() - 1]; @@ -439,8 +447,7 @@ addMemRefAccessConstraints(const AffineValueMap &srcAccessMap, eq[valuePosMap.getDstDimOrSymPos(dstOperands[j])] -= destFlatExpr[j]; // Local terms. for (unsigned j = 0, e = dstNumLocalIds; j < e; j++) - eq[numDims + numSymbols + numSrcLocalIds + j] = - -destFlatExpr[dstNumIds + j]; + eq[newLocalIdOffset + numSrcLocalIds + j] = -destFlatExpr[dstNumIds + j]; // Set constant term. eq[eq.size() - 1] -= destFlatExpr[destFlatExpr.size() - 1]; @@ -486,7 +493,7 @@ addMemRefAccessConstraints(const AffineValueMap &srcAccessMap, srcLocalVarCst.atIneq(r, j); // Local terms. for (unsigned j = 0, e = srcNumLocalIds; j < e; j++) - ineq[numDims + numSymbols + j] = srcLocalVarCst.atIneq(r, srcNumIds + j); + ineq[newLocalIdOffset + j] = srcLocalVarCst.atIneq(r, srcNumIds + j); // Set constant term. ineq[ineq.size() - 1] = srcLocalVarCst.atIneq(r, srcLocalVarCst.getNumCols() - 1); @@ -501,7 +508,7 @@ addMemRefAccessConstraints(const AffineValueMap &srcAccessMap, destLocalVarCst.atIneq(r, j); // Local terms. for (unsigned j = 0, e = dstNumLocalIds; j < e; j++) - ineq[numDims + numSymbols + numSrcLocalIds + j] = + ineq[newLocalIdOffset + numSrcLocalIds + j] = destLocalVarCst.atIneq(r, dstNumIds + j); // Set constant term. ineq[ineq.size() - 1] = diff --git a/mlir/lib/Analysis/AffineStructures.cpp b/mlir/lib/Analysis/AffineStructures.cpp index 483e69f..4553c3a 100644 --- a/mlir/lib/Analysis/AffineStructures.cpp +++ b/mlir/lib/Analysis/AffineStructures.cpp @@ -1171,6 +1171,7 @@ unsigned FlatAffineConstraints::gaussianEliminateIds(unsigned posStart, normalizeConstraintByGCD(this, i); } removeEquality(pivotRow); + GCDTightenInequalities(); } // Update position limit based on number eliminated. posLimit = pivotCol; @@ -2512,6 +2513,8 @@ void FlatAffineConstraints::FourierMotzkinEliminate( } } + LLVM_DEBUG(llvm::dbgs() << "FM isResultIntegerExact: " << (lcmProducts == 1) + << "\n"); if (lcmProducts == 1 && isResultIntegerExact) *isResultIntegerExact = 1; diff --git a/mlir/test/Transforms/memref-dependence-check.mlir b/mlir/test/Transforms/memref-dependence-check.mlir index 00d0e73..e287af9 100644 --- a/mlir/test/Transforms/memref-dependence-check.mlir +++ b/mlir/test/Transforms/memref-dependence-check.mlir @@ -782,3 +782,71 @@ func @delinearize_mod_floordiv() { } // TODO(bondhugula): add more test cases involving mod's/div's. + +// ----- + +// Load and store ops access the same elements in strided loop. +// CHECK-LABEL: func @strided_loop_with_dependence_at_depth2 +func @strided_loop_with_dependence_at_depth2() { + %0 = alloc() : memref<10xf32> + %cf0 = constant 0.0 : f32 + affine.for %i0 = 0 to 8 step 2 { + store %cf0, %0[%i0] : memref<10xf32> + // expected-note@-1 {{dependence from 0 to 0 at depth 1 = false}} + // expected-note@-2 {{dependence from 0 to 0 at depth 2 = false}} + // expected-note@-3 {{dependence from 0 to 1 at depth 1 = false}} + // expected-note@-4 {{dependence from 0 to 1 at depth 2 = true}} + %v0 = load %0[%i0] : memref<10xf32> + // expected-note@-1 {{dependence from 1 to 0 at depth 1 = false}} + // expected-note@-2 {{dependence from 1 to 0 at depth 2 = false}} + // expected-note@-3 {{dependence from 1 to 1 at depth 1 = false}} + // expected-note@-4 {{dependence from 1 to 1 at depth 2 = false}} + } + return +} + +// ----- + +// Load and store ops access alternating memref elements: no dependence. +// CHECK-LABEL: func @strided_loop_with_no_dependence +func @strided_loop_with_no_dependence() { + %0 = alloc() : memref<10xf32> + %cf0 = constant 0.0 : f32 + affine.for %i0 = 0 to 8 step 2 { + %a0 = affine.apply (d0) -> (d0 + 1)(%i0) + store %cf0, %0[%a0] : memref<10xf32> + // expected-note@-1 {{dependence from 0 to 0 at depth 1 = false}} + // expected-note@-2 {{dependence from 0 to 0 at depth 2 = false}} + // expected-note@-3 {{dependence from 0 to 1 at depth 1 = false}} + // expected-note@-4 {{dependence from 0 to 1 at depth 2 = false}} + %v0 = load %0[%i0] : memref<10xf32> + // expected-note@-1 {{dependence from 1 to 0 at depth 1 = false}} + // expected-note@-2 {{dependence from 1 to 0 at depth 2 = false}} + // expected-note@-3 {{dependence from 1 to 1 at depth 1 = false}} + // expected-note@-4 {{dependence from 1 to 1 at depth 2 = false}} + } + return +} + +// ----- + +// Store op accesses memref elements at offset causing loop-carried dependence. +// CHECK-LABEL: func @strided_loop_with_loop_carried_dependence_at_depth1 +func @strided_loop_with_loop_carried_dependence_at_depth1() { + %0 = alloc() : memref<10xf32> + %cf0 = constant 0.0 : f32 + affine.for %i0 = 0 to 8 step 2 { + %a0 = affine.apply (d0) -> (d0 + 4)(%i0) + store %cf0, %0[%a0] : memref<10xf32> + // expected-note@-1 {{dependence from 0 to 0 at depth 1 = false}} + // expected-note@-2 {{dependence from 0 to 0 at depth 2 = false}} + // expected-note@-3 {{dependence from 0 to 1 at depth 1 = [4, 4]}} + // expected-note@-4 {{dependence from 0 to 1 at depth 2 = false}} + %v0 = load %0[%i0] : memref<10xf32> + // expected-note@-1 {{dependence from 1 to 0 at depth 1 = false}} + // expected-note@-2 {{dependence from 1 to 0 at depth 2 = false}} + // expected-note@-3 {{dependence from 1 to 1 at depth 1 = false}} + // expected-note@-4 {{dependence from 1 to 1 at depth 2 = false}} + } + return +} -- 2.7.4