commit 556c30af1c797be294edde0ce621884f5acf11f0 Author: Keno Fischer Date: Wed Aug 1 20:45:11 2018 -0400 RFC: [SCEV] Add explicit representations of umin/smin Summary: Currently we express umin as `~umax(~x, ~y)`. However, this becomes a problem for operands in non-integral pointer spaces, because `~x` is not something we can compute for `x` non-integral. However, since comparisons are generally still allowed, we are actually able to express `umin(x, y)` directly as long as we don't try to express is as a umax. Support this by adding an explicit umin/smin representation to SCEV. We do this by factoring the existing getUMax/getSMax functions into a new function that does all four. The previous two functions were largely identical, except that the SMax variant used `isKnownPredicate` while the UMax variant used `isKnownViaNonRecursiveReasoning`. Trying to make the UMax variant also use `isKnownPredicate` yields to an infinite recursion, while trying to make the `SMax` variant use `isKnownViaNonRecursiveReasoning` causes `Transforms/IndVarSimplify/backedge-on-min-max.ll` to fail. I would appreciate any insight into which predicate is correct here. Reviewers: reames, sanjoy Subscribers: javed.absar, llvm-commits Differential Revision: https://reviews.llvm.org/D50167 diff --git a/include/llvm/Analysis/ScalarEvolution.h b/include/llvm/Analysis/ScalarEvolution.h index 21b72f3e13c..9fd6794395c 100644 --- a/include/llvm/Analysis/ScalarEvolution.h +++ b/include/llvm/Analysis/ScalarEvolution.h @@ -582,12 +582,15 @@ public: /// \p IndexExprs The expressions for the indices. const SCEV *getGEPExpr(GEPOperator *GEP, const SmallVectorImpl &IndexExprs); + const SCEV *getUSMinMaxExpr(unsigned Kind, SmallVectorImpl &Operands); const SCEV *getSMaxExpr(const SCEV *LHS, const SCEV *RHS); const SCEV *getSMaxExpr(SmallVectorImpl &Operands); const SCEV *getUMaxExpr(const SCEV *LHS, const SCEV *RHS); const SCEV *getUMaxExpr(SmallVectorImpl &Operands); const SCEV *getSMinExpr(const SCEV *LHS, const SCEV *RHS); + const SCEV *getSMinExpr(SmallVectorImpl &Operands); const SCEV *getUMinExpr(const SCEV *LHS, const SCEV *RHS); + const SCEV *getUMinExpr(SmallVectorImpl &Operands); const SCEV *getUnknown(Value *V); const SCEV *getCouldNotCompute(); diff --git a/include/llvm/Analysis/ScalarEvolutionExpander.h b/include/llvm/Analysis/ScalarEvolutionExpander.h index 3df04e98bd2..9e407c63abc 100644 --- a/include/llvm/Analysis/ScalarEvolutionExpander.h +++ b/include/llvm/Analysis/ScalarEvolutionExpander.h @@ -367,6 +367,10 @@ namespace llvm { Value *visitUMaxExpr(const SCEVUMaxExpr *S); + Value *visitSMinExpr(const SCEVSMinExpr *S); + + Value *visitUMinExpr(const SCEVUMinExpr *S); + Value *visitUnknown(const SCEVUnknown *S) { return S->getValue(); } diff --git a/include/llvm/Analysis/ScalarEvolutionExpressions.h b/include/llvm/Analysis/ScalarEvolutionExpressions.h index acf83455cdc..0d20a1bcdcc 100644 --- a/include/llvm/Analysis/ScalarEvolutionExpressions.h +++ b/include/llvm/Analysis/ScalarEvolutionExpressions.h @@ -40,7 +40,7 @@ class Type; // These should be ordered in terms of increasing complexity to make the // folders simpler. scConstant, scTruncate, scZeroExtend, scSignExtend, scAddExpr, scMulExpr, - scUDivExpr, scAddRecExpr, scUMaxExpr, scSMaxExpr, + scUDivExpr, scAddRecExpr, scUMaxExpr, scSMaxExpr, scUMinExpr, scSMinExpr, scUnknown, scCouldNotCompute }; @@ -187,6 +187,8 @@ class Type; S->getSCEVType() == scMulExpr || S->getSCEVType() == scSMaxExpr || S->getSCEVType() == scUMaxExpr || + S->getSCEVType() == scSMinExpr || + S->getSCEVType() == scUMinExpr || S->getSCEVType() == scAddRecExpr; } }; @@ -204,7 +206,9 @@ class Type; return S->getSCEVType() == scAddExpr || S->getSCEVType() == scMulExpr || S->getSCEVType() == scSMaxExpr || - S->getSCEVType() == scUMaxExpr; + S->getSCEVType() == scUMaxExpr || + S->getSCEVType() == scSMinExpr || + S->getSCEVType() == scUMinExpr; } /// Set flags for a non-recurrence without clearing previously set flags. @@ -396,6 +400,42 @@ class Type; } }; + /// This class represents a signed minimum selection. + class SCEVSMinExpr : public SCEVCommutativeExpr { + friend class ScalarEvolution; + + SCEVSMinExpr(const FoldingSetNodeIDRef ID, + const SCEV *const *O, size_t N) + : SCEVCommutativeExpr(ID, scSMinExpr, O, N) { + // Min never overflows. + setNoWrapFlags((NoWrapFlags)(FlagNUW | FlagNSW)); + } + + public: + /// Methods for support type inquiry through isa, cast, and dyn_cast: + static bool classof(const SCEV *S) { + return S->getSCEVType() == scSMinExpr; + } + }; + + /// This class represents an unsigned minimum selection. + class SCEVUMinExpr : public SCEVCommutativeExpr { + friend class ScalarEvolution; + + SCEVUMinExpr(const FoldingSetNodeIDRef ID, + const SCEV *const *O, size_t N) + : SCEVCommutativeExpr(ID, scUMinExpr, O, N) { + // Min never overflows. + setNoWrapFlags((NoWrapFlags)(FlagNUW | FlagNSW)); + } + + public: + /// Methods for support type inquiry through isa, cast, and dyn_cast: + static bool classof(const SCEV *S) { + return S->getSCEVType() == scUMinExpr; + } + }; + /// This means that we are dealing with an entirely unknown SCEV /// value, and only represent it as its LLVM Value. This is the /// "bottom" value for the analysis. @@ -468,6 +508,10 @@ class Type; return ((SC*)this)->visitSMaxExpr((const SCEVSMaxExpr*)S); case scUMaxExpr: return ((SC*)this)->visitUMaxExpr((const SCEVUMaxExpr*)S); + case scSMinExpr: + return ((SC*)this)->visitSMinExpr((const SCEVSMinExpr*)S); + case scUMinExpr: + return ((SC*)this)->visitUMinExpr((const SCEVUMinExpr*)S); case scUnknown: return ((SC*)this)->visitUnknown((const SCEVUnknown*)S); case scCouldNotCompute: @@ -521,6 +565,8 @@ class Type; case scMulExpr: case scSMaxExpr: case scUMaxExpr: + case scSMinExpr: + case scUMinExpr: case scAddRecExpr: for (const auto *Op : cast(S)->operands()) push(Op); @@ -683,6 +729,26 @@ class Type; return !Changed ? Expr : SE.getUMaxExpr(Operands); } + const SCEV *visitSMinExpr(const SCEVSMinExpr *Expr) { + SmallVector Operands; + bool Changed = false; + for (auto *Op : Expr->operands()) { + Operands.push_back(((SC *)this)->visit(Op)); + Changed |= Op != Operands.back(); + } + return !Changed ? Expr : SE.getSMinExpr(Operands); + } + + const SCEV *visitUMinExpr(const SCEVUMinExpr *Expr) { + SmallVector Operands; + bool Changed = false; + for (auto *Op : Expr->operands()) { + Operands.push_back(((SC*)this)->visit(Op)); + Changed |= Op != Operands.back(); + } + return !Changed ? Expr : SE.getUMinExpr(Operands); + } + const SCEV *visitUnknown(const SCEVUnknown *Expr) { return Expr; } diff --git a/lib/Analysis/ScalarEvolution.cpp b/lib/Analysis/ScalarEvolution.cpp index bfff7afb5b4..750c1fdfdfb 100644 --- a/lib/Analysis/ScalarEvolution.cpp +++ b/lib/Analysis/ScalarEvolution.cpp @@ -271,7 +271,9 @@ void SCEV::print(raw_ostream &OS) const { case scAddExpr: case scMulExpr: case scUMaxExpr: - case scSMaxExpr: { + case scSMaxExpr: + case scUMinExpr: + case scSMinExpr: { const SCEVNAryExpr *NAry = cast(this); const char *OpStr = nullptr; switch (NAry->getSCEVType()) { @@ -279,6 +281,8 @@ void SCEV::print(raw_ostream &OS) const { case scMulExpr: OpStr = " * "; break; case scUMaxExpr: OpStr = " umax "; break; case scSMaxExpr: OpStr = " smax "; break; + case scUMinExpr: OpStr = " umin "; break; + case scSMinExpr: OpStr = " smin "; break; } OS << "("; for (SCEVNAryExpr::op_iterator I = NAry->op_begin(), E = NAry->op_end(); @@ -347,6 +351,8 @@ Type *SCEV::getType() const { case scMulExpr: case scUMaxExpr: case scSMaxExpr: + case scUMinExpr: + case scSMinExpr: return cast(this)->getType(); case scAddExpr: return cast(this)->getType(); @@ -718,7 +724,9 @@ static int CompareSCEVComplexity( case scAddExpr: case scMulExpr: case scSMaxExpr: - case scUMaxExpr: { + case scUMaxExpr: + case scSMinExpr: + case scUMinExpr: { const SCEVNAryExpr *LC = cast(LHS); const SCEVNAryExpr *RC = cast(RHS); @@ -922,6 +930,8 @@ public: void visitUDivExpr(const SCEVUDivExpr *Numerator) {} void visitSMaxExpr(const SCEVSMaxExpr *Numerator) {} void visitUMaxExpr(const SCEVUMaxExpr *Numerator) {} + void visitSMinExpr(const SCEVSMinExpr *Numerator) {} + void visitUMinExpr(const SCEVUMinExpr *Numerator) {} void visitUnknown(const SCEVUnknown *Numerator) {} void visitCouldNotCompute(const SCEVCouldNotCompute *Numerator) {} @@ -2276,6 +2286,8 @@ bool ScalarEvolution::isAvailableAtLoopEntry(const SCEV *S, const Loop *L) { case scMulExpr: case scUMaxExpr: case scSMaxExpr: + case scUMinExpr: + case scSMinExpr: case scUDivExpr: return true; case scUnknown: @@ -3405,23 +3417,20 @@ ScalarEvolution::getGEPExpr(GEPOperator *GEP, return getAddExpr(BaseExpr, TotalOffset, Wrap); } -const SCEV *ScalarEvolution::getSMaxExpr(const SCEV *LHS, - const SCEV *RHS) { - SmallVector Ops = {LHS, RHS}; - return getSMaxExpr(Ops); -} - const SCEV * -ScalarEvolution::getSMaxExpr(SmallVectorImpl &Ops) { - assert(!Ops.empty() && "Cannot get empty smax!"); +ScalarEvolution::getUSMinMaxExpr(unsigned Kind, SmallVectorImpl &Ops) { + assert(!Ops.empty() && "Cannot get empty (u|s)(min|max)!"); if (Ops.size() == 1) return Ops[0]; #ifndef NDEBUG Type *ETy = getEffectiveSCEVType(Ops[0]->getType()); for (unsigned i = 1, e = Ops.size(); i != e; ++i) assert(getEffectiveSCEVType(Ops[i]->getType()) == ETy && - "SCEVSMaxExpr operand types don't match!"); + "Operand types don't match!"); #endif + bool IsSigned = Kind == scSMaxExpr || Kind == scSMinExpr; + bool IsMax = Kind == scSMaxExpr || Kind == scUMaxExpr; + // Sort by complexity, this groups all similar expression types together. GroupByComplexity(Ops, &LI, DT); @@ -3430,61 +3439,85 @@ ScalarEvolution::getSMaxExpr(SmallVectorImpl &Ops) { if (const SCEVConstant *LHSC = dyn_cast(Ops[0])) { ++Idx; assert(Idx < Ops.size()); + auto &FoldOp = + Kind == scSMaxExpr ? APIntOps::smax : + Kind == scSMinExpr ? APIntOps::smin : + Kind == scUMaxExpr ? APIntOps::umax : + APIntOps::umin; while (const SCEVConstant *RHSC = dyn_cast(Ops[Idx])) { // We found two constants, fold them together! ConstantInt *Fold = ConstantInt::get( - getContext(), APIntOps::smax(LHSC->getAPInt(), RHSC->getAPInt())); + getContext(), FoldOp(LHSC->getAPInt(), RHSC->getAPInt())); Ops[0] = getConstant(Fold); Ops.erase(Ops.begin()+1); // Erase the folded element if (Ops.size() == 1) return Ops[0]; LHSC = cast(Ops[0]); } - // If we are left with a constant minimum-int, strip it off. - if (cast(Ops[0])->getValue()->isMinValue(true)) { - Ops.erase(Ops.begin()); - --Idx; - } else if (cast(Ops[0])->getValue()->isMaxValue(true)) { - // If we have an smax with a constant maximum-int, it will always be - // maximum-int. - return Ops[0]; + if (IsMax) { + // If we are left with a constant minimum-int, strip it off. + if (cast(Ops[0])->getValue()->isMinValue(IsSigned)) { + Ops.erase(Ops.begin()); + --Idx; + } else if (cast(Ops[0])->getValue()->isMaxValue(IsSigned)) { + // If we have an smax with a constant maximum-int, it will always be + // maximum-int. + return Ops[0]; + } + } else { + // If we are left with a constant maximum-int, strip it off. + if (cast(Ops[0])->getValue()->isMaxValue(IsSigned)) { + Ops.erase(Ops.begin()); + --Idx; + } else if (cast(Ops[0])->getValue()->isMinValue(IsSigned)) { + // If we have an smax with a constant minimum-int, it will always be + // maximum-int. + return Ops[0]; + } } if (Ops.size() == 1) return Ops[0]; } - // Find the first SMax - while (Idx < Ops.size() && Ops[Idx]->getSCEVType() < scSMaxExpr) + // Find the first operation of the same kind + while (Idx < Ops.size() && Ops[Idx]->getSCEVType() != Kind) ++Idx; // Check to see if one of the operands is an SMax. If so, expand its operands // onto our operand list, and recurse to simplify. if (Idx < Ops.size()) { - bool DeletedSMax = false; - while (const SCEVSMaxExpr *SMax = dyn_cast(Ops[Idx])) { + bool DeletedAny = false; + while (Ops[Idx]->getSCEVType() == Kind) { + const SCEVCommutativeExpr *SCE = cast(Ops[Idx]); Ops.erase(Ops.begin()+Idx); - Ops.append(SMax->op_begin(), SMax->op_end()); - DeletedSMax = true; + Ops.append(SCE->op_begin(), SCE->op_end()); + DeletedAny = true; } - if (DeletedSMax) - return getSMaxExpr(Ops); + if (DeletedAny) + return getUSMinMaxExpr(Kind, Ops); } // Okay, check to see if the same value occurs in the operand list twice. If // so, delete one. Since we sorted the list, these values are required to // be adjacent. - for (unsigned i = 0, e = Ops.size()-1; i != e; ++i) - // X smax Y smax Y --> X smax Y - // X smax Y --> X, if X is always greater than Y - if (Ops[i] == Ops[i+1] || - isKnownPredicate(ICmpInst::ICMP_SGE, Ops[i], Ops[i+1])) { - Ops.erase(Ops.begin()+i+1, Ops.begin()+i+2); - --i; --e; - } else if (isKnownPredicate(ICmpInst::ICMP_SLE, Ops[i], Ops[i+1])) { - Ops.erase(Ops.begin()+i, Ops.begin()+i+1); - --i; --e; - } + llvm::CmpInst::Predicate GEPred = IsSigned ? ICmpInst::ICMP_SGE : ICmpInst::ICMP_UGE; + llvm::CmpInst::Predicate LEPred = IsSigned ? ICmpInst::ICMP_SLE : ICmpInst::ICMP_ULE; + llvm::CmpInst::Predicate FirstPred = IsMax ? GEPred : LEPred; + llvm::CmpInst::Predicate SecondPred = IsMax ? LEPred : GEPred; + for (unsigned i = 0, e = Ops.size()-1; i != e; ++i) { + if (Ops[i] == Ops[i+1] || + isKnownPredicate(FirstPred, Ops[i], Ops[i+1])) { + // X op Y op Y --> X op Y + // X op Y --> X, if we know X, Y are ordered appropriately + Ops.erase(Ops.begin()+i+1, Ops.begin()+i+2); + --i; --e; + } else if (isKnownPredicate(SecondPred, Ops[i], Ops[i+1])) { + // X op Y --> Y, if we know X, Y are ordered appropriately + Ops.erase(Ops.begin()+i, Ops.begin()+i+1); + --i; --e; + } + } if (Ops.size() == 1) return Ops[0]; @@ -3493,132 +3526,73 @@ ScalarEvolution::getSMaxExpr(SmallVectorImpl &Ops) { // Okay, it looks like we really DO need an smax expr. Check to see if we // already have one, otherwise create a new one. FoldingSetNodeID ID; - ID.AddInteger(scSMaxExpr); + ID.AddInteger(Kind); for (unsigned i = 0, e = Ops.size(); i != e; ++i) ID.AddPointer(Ops[i]); void *IP = nullptr; if (const SCEV *S = UniqueSCEVs.FindNodeOrInsertPos(ID, IP)) return S; const SCEV **O = SCEVAllocator.Allocate(Ops.size()); std::uninitialized_copy(Ops.begin(), Ops.end(), O); - SCEV *S = new (SCEVAllocator) SCEVSMaxExpr(ID.Intern(SCEVAllocator), - O, Ops.size()); + SCEV *S = nullptr; + + if (Kind == scSMaxExpr) { + S = new (SCEVAllocator) SCEVSMaxExpr(ID.Intern(SCEVAllocator), + O, Ops.size()); + } else if (Kind == scUMaxExpr) { + S = new (SCEVAllocator) SCEVUMaxExpr(ID.Intern(SCEVAllocator), + O, Ops.size()); + } else if (Kind == scSMinExpr) { + S = new (SCEVAllocator) SCEVSMinExpr(ID.Intern(SCEVAllocator), + O, Ops.size()); + } else { + assert(Kind == scUMinExpr); + S = new (SCEVAllocator) SCEVUMinExpr(ID.Intern(SCEVAllocator), + O, Ops.size()); + } + UniqueSCEVs.InsertNode(S, IP); addToLoopUseLists(S); return S; } -const SCEV *ScalarEvolution::getUMaxExpr(const SCEV *LHS, +const SCEV *ScalarEvolution::getSMaxExpr(const SCEV *LHS, const SCEV *RHS) { SmallVector Ops = {LHS, RHS}; - return getUMaxExpr(Ops); + return getSMaxExpr(Ops); } -const SCEV * -ScalarEvolution::getUMaxExpr(SmallVectorImpl &Ops) { - assert(!Ops.empty() && "Cannot get empty umax!"); - if (Ops.size() == 1) return Ops[0]; -#ifndef NDEBUG - Type *ETy = getEffectiveSCEVType(Ops[0]->getType()); - for (unsigned i = 1, e = Ops.size(); i != e; ++i) - assert(getEffectiveSCEVType(Ops[i]->getType()) == ETy && - "SCEVUMaxExpr operand types don't match!"); -#endif - - // Sort by complexity, this groups all similar expression types together. - GroupByComplexity(Ops, &LI, DT); - - // If there are any constants, fold them together. - unsigned Idx = 0; - if (const SCEVConstant *LHSC = dyn_cast(Ops[0])) { - ++Idx; - assert(Idx < Ops.size()); - while (const SCEVConstant *RHSC = dyn_cast(Ops[Idx])) { - // We found two constants, fold them together! - ConstantInt *Fold = ConstantInt::get( - getContext(), APIntOps::umax(LHSC->getAPInt(), RHSC->getAPInt())); - Ops[0] = getConstant(Fold); - Ops.erase(Ops.begin()+1); // Erase the folded element - if (Ops.size() == 1) return Ops[0]; - LHSC = cast(Ops[0]); - } - - // If we are left with a constant minimum-int, strip it off. - if (cast(Ops[0])->getValue()->isMinValue(false)) { - Ops.erase(Ops.begin()); - --Idx; - } else if (cast(Ops[0])->getValue()->isMaxValue(false)) { - // If we have an umax with a constant maximum-int, it will always be - // maximum-int. - return Ops[0]; - } - - if (Ops.size() == 1) return Ops[0]; - } - - // Find the first UMax - while (Idx < Ops.size() && Ops[Idx]->getSCEVType() < scUMaxExpr) - ++Idx; - - // Check to see if one of the operands is a UMax. If so, expand its operands - // onto our operand list, and recurse to simplify. - if (Idx < Ops.size()) { - bool DeletedUMax = false; - while (const SCEVUMaxExpr *UMax = dyn_cast(Ops[Idx])) { - Ops.erase(Ops.begin()+Idx); - Ops.append(UMax->op_begin(), UMax->op_end()); - DeletedUMax = true; - } - - if (DeletedUMax) - return getUMaxExpr(Ops); - } - - // Okay, check to see if the same value occurs in the operand list twice. If - // so, delete one. Since we sorted the list, these values are required to - // be adjacent. - for (unsigned i = 0, e = Ops.size()-1; i != e; ++i) - // X umax Y umax Y --> X umax Y - // X umax Y --> X, if X is always greater than Y - if (Ops[i] == Ops[i+1] || - isKnownPredicate(ICmpInst::ICMP_UGE, Ops[i], Ops[i+1])) { - Ops.erase(Ops.begin()+i+1, Ops.begin()+i+2); - --i; --e; - } else if (isKnownPredicate(ICmpInst::ICMP_ULE, Ops[i], Ops[i+1])) { - Ops.erase(Ops.begin()+i, Ops.begin()+i+1); - --i; --e; - } - - if (Ops.size() == 1) return Ops[0]; +const SCEV *ScalarEvolution::getSMaxExpr(SmallVectorImpl &Ops) { + return getUSMinMaxExpr(scSMaxExpr, Ops); +} - assert(!Ops.empty() && "Reduced umax down to nothing!"); +const SCEV *ScalarEvolution::getUMaxExpr(const SCEV *LHS, + const SCEV *RHS) { + SmallVector Ops = {LHS, RHS}; + return getUMaxExpr(Ops); +} - // Okay, it looks like we really DO need a umax expr. Check to see if we - // already have one, otherwise create a new one. - FoldingSetNodeID ID; - ID.AddInteger(scUMaxExpr); - for (unsigned i = 0, e = Ops.size(); i != e; ++i) - ID.AddPointer(Ops[i]); - void *IP = nullptr; - if (const SCEV *S = UniqueSCEVs.FindNodeOrInsertPos(ID, IP)) return S; - const SCEV **O = SCEVAllocator.Allocate(Ops.size()); - std::uninitialized_copy(Ops.begin(), Ops.end(), O); - SCEV *S = new (SCEVAllocator) SCEVUMaxExpr(ID.Intern(SCEVAllocator), - O, Ops.size()); - UniqueSCEVs.InsertNode(S, IP); - addToLoopUseLists(S); - return S; +const SCEV *ScalarEvolution::getUMaxExpr(SmallVectorImpl &Ops) { + return getUSMinMaxExpr(scUMaxExpr, Ops); } const SCEV *ScalarEvolution::getSMinExpr(const SCEV *LHS, const SCEV *RHS) { - // ~smax(~x, ~y) == smin(x, y). - return getNotSCEV(getSMaxExpr(getNotSCEV(LHS), getNotSCEV(RHS))); + SmallVector Ops = { LHS, RHS }; + return getSMinExpr(Ops); +} + +const SCEV *ScalarEvolution::getSMinExpr(SmallVectorImpl &Ops) { + return getUSMinMaxExpr(scSMinExpr, Ops); } const SCEV *ScalarEvolution::getUMinExpr(const SCEV *LHS, const SCEV *RHS) { - // ~umax(~x, ~y) == umin(x, y) - return getNotSCEV(getUMaxExpr(getNotSCEV(LHS), getNotSCEV(RHS))); + SmallVector Ops = { LHS, RHS }; + return getUMinExpr(Ops); +} + +const SCEV *ScalarEvolution::getUMinExpr(SmallVectorImpl &Ops) { + return getUSMinMaxExpr(scUMinExpr, Ops); } const SCEV *ScalarEvolution::getSizeOfExpr(Type *IntTy, Type *AllocTy) { @@ -5002,6 +4976,7 @@ static bool IsAvailableOnEntry(const Loop *L, DominatorTree &DT, const SCEV *S, switch (S->getSCEVType()) { case scConstant: case scTruncate: case scZeroExtend: case scSignExtend: case scAddExpr: case scMulExpr: case scUMaxExpr: case scSMaxExpr: + case scUMinExpr: case scSMinExpr: // These expressions are available if their operand(s) is/are. return true; @@ -7885,7 +7860,9 @@ static Constant *BuildConstantFromSCEV(const SCEV *V) { } case scSMaxExpr: case scUMaxExpr: - break; // TODO: smax, umax. + case scSMinExpr: + case scUMinExpr: + break; // TODO: smax, umax, smin, umax. } return nullptr; } @@ -8015,6 +7992,10 @@ const SCEV *ScalarEvolution::computeSCEVAtScope(const SCEV *V, const Loop *L) { return getSMaxExpr(NewOps); if (isa(Comm)) return getUMaxExpr(NewOps); + if (isa(Comm)) + return getSMinExpr(NewOps); + if (isa(Comm)) + return getUMinExpr(NewOps); llvm_unreachable("Unknown commutative SCEV type!"); } } @@ -10998,7 +10979,9 @@ ScalarEvolution::computeLoopDisposition(const SCEV *S, const Loop *L) { case scAddExpr: case scMulExpr: case scUMaxExpr: - case scSMaxExpr: { + case scSMaxExpr: + case scUMinExpr: + case scSMinExpr: { bool HasVarying = false; for (auto *Op : cast(S)->operands()) { LoopDisposition D = getLoopDisposition(Op, L); @@ -11085,7 +11068,9 @@ ScalarEvolution::computeBlockDisposition(const SCEV *S, const BasicBlock *BB) { case scAddExpr: case scMulExpr: case scUMaxExpr: - case scSMaxExpr: { + case scSMaxExpr: + case scUMinExpr: + case scSMinExpr: { const SCEVNAryExpr *NAry = cast(S); bool Proper = true; for (const SCEV *NAryOp : NAry->operands()) { diff --git a/lib/Analysis/ScalarEvolutionExpander.cpp b/lib/Analysis/ScalarEvolutionExpander.cpp index 01a8732b0b8..8160a1eaa0b 100644 --- a/lib/Analysis/ScalarEvolutionExpander.cpp +++ b/lib/Analysis/ScalarEvolutionExpander.cpp @@ -1634,14 +1634,15 @@ Value *SCEVExpander::visitSMaxExpr(const SCEVSMaxExpr *S) { for (int i = S->getNumOperands()-2; i >= 0; --i) { // In the case of mixed integer and pointer types, do the // rest of the comparisons as integer. - if (S->getOperand(i)->getType() != Ty) { + Type *OpTy = S->getOperand(i)->getType(); + if (OpTy->isIntegerTy() != Ty->isIntegerTy()) { Ty = SE.getEffectiveSCEVType(Ty); LHS = InsertNoopCastOfTo(LHS, Ty); } Value *RHS = expandCodeFor(S->getOperand(i), Ty); Value *ICmp = Builder.CreateICmpSGT(LHS, RHS); rememberInstruction(ICmp); - Value *Sel = Builder.CreateSelect(ICmp, LHS, RHS, "smax"); + Value *Sel = Builder.CreateSelect(ICmp, LHS, RHS, "smin"); rememberInstruction(Sel); LHS = Sel; } @@ -1658,14 +1659,15 @@ Value *SCEVExpander::visitUMaxExpr(const SCEVUMaxExpr *S) { for (int i = S->getNumOperands()-2; i >= 0; --i) { // In the case of mixed integer and pointer types, do the // rest of the comparisons as integer. - if (S->getOperand(i)->getType() != Ty) { + Type *OpTy = S->getOperand(i)->getType(); + if (OpTy->isIntegerTy() != Ty->isIntegerTy()) { Ty = SE.getEffectiveSCEVType(Ty); LHS = InsertNoopCastOfTo(LHS, Ty); } Value *RHS = expandCodeFor(S->getOperand(i), Ty); Value *ICmp = Builder.CreateICmpUGT(LHS, RHS); rememberInstruction(ICmp); - Value *Sel = Builder.CreateSelect(ICmp, LHS, RHS, "umax"); + Value *Sel = Builder.CreateSelect(ICmp, LHS, RHS, "umin"); rememberInstruction(Sel); LHS = Sel; } @@ -1671,6 +1671,56 @@ Value *SCEVExpander::visitUMaxExpr(const SCEVUMaxExpr *S) { return LHS; } +Value *SCEVExpander::visitSMinExpr(const SCEVSMinExpr *S) { + Value *LHS = expand(S->getOperand(S->getNumOperands()-1)); + Type *Ty = LHS->getType(); + for (int i = S->getNumOperands()-2; i >= 0; --i) { + // In the case of mixed integer and pointer types, do the + // rest of the comparisons as integer. + Type *OpTy = S->getOperand(i)->getType(); + if (OpTy->isIntegerTy() != Ty->isIntegerTy()) { + Ty = SE.getEffectiveSCEVType(Ty); + LHS = InsertNoopCastOfTo(LHS, Ty); + } + Value *RHS = expandCodeFor(S->getOperand(i), Ty); + Value *ICmp = Builder.CreateICmpSLT(LHS, RHS); + rememberInstruction(ICmp); + Value *Sel = Builder.CreateSelect(ICmp, LHS, RHS, "smax"); + rememberInstruction(Sel); + LHS = Sel; + } + // In the case of mixed integer and pointer types, cast the + // final result back to the pointer type. + if (LHS->getType() != S->getType()) + LHS = InsertNoopCastOfTo(LHS, S->getType()); + return LHS; +} + +Value *SCEVExpander::visitUMinExpr(const SCEVUMinExpr *S) { + Value *LHS = expand(S->getOperand(S->getNumOperands()-1)); + Type *Ty = LHS->getType(); + for (int i = S->getNumOperands()-2; i >= 0; --i) { + // In the case of mixed integer and pointer types, do the + // rest of the comparisons as integer. + Type *OpTy = S->getOperand(i)->getType(); + if (OpTy->isIntegerTy() != Ty->isIntegerTy()) { + Ty = SE.getEffectiveSCEVType(Ty); + LHS = InsertNoopCastOfTo(LHS, Ty); + } + Value *RHS = expandCodeFor(S->getOperand(i), Ty); + Value *ICmp = Builder.CreateICmpULT(LHS, RHS); + rememberInstruction(ICmp); + Value *Sel = Builder.CreateSelect(ICmp, LHS, RHS, "umax"); + rememberInstruction(Sel); + LHS = Sel; + } + // In the case of mixed integer and pointer types, cast the + // final result back to the pointer type. + if (LHS->getType() != S->getType()) + LHS = InsertNoopCastOfTo(LHS, S->getType()); + return LHS; +} + Value *SCEVExpander::expandCodeFor(const SCEV *SH, Type *Ty, Instruction *IP) { setInsertPoint(IP); diff --git a/test/Analysis/LoopAccessAnalysis/memcheck-ni.ll b/test/Analysis/LoopAccessAnalysis/memcheck-ni.ll new file mode 100644 index 00000000000..a08632f38d1 --- /dev/null +++ b/test/Analysis/LoopAccessAnalysis/memcheck-ni.ll @@ -0,0 +1,50 @@ +; RUN: opt -loop-versioning -S < %s | FileCheck %s + +; NB: addrspaces 10-13 are non-integral +target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128-ni:10:11:12:13" + +%jl_value_t = type opaque +%jl_array_t = type { i8 addrspace(13)*, i64, i16, i16, i32 } + +define void @"japi1_permutedims!_33509"(%jl_value_t addrspace(10)**) { +; CHECK: [[CMP:%[^ ]*]] = icmp ult double addrspace(13)* [[A:%[^ ]*]], [[B:%[^ ]*]] +; CHECK: [[SELECT:%[^ ]*]] = select i1 %18, double addrspace(13)* [[A]], double addrspace(13)* [[B]] +top: + %1 = alloca [3 x i64], align 8 + %2 = load %jl_value_t addrspace(10)*, %jl_value_t addrspace(10)** %0, align 8 + %3 = getelementptr inbounds %jl_value_t addrspace(10)*, %jl_value_t addrspace(10)** %0, i64 1 + %4 = load %jl_value_t addrspace(10)*, %jl_value_t addrspace(10)** %3, align 8 + %5 = getelementptr inbounds [3 x i64], [3 x i64]* %1, i64 0, i64 0 + store i64 1, i64* %5, align 8 + %6 = getelementptr inbounds [3 x i64], [3 x i64]* %1, i64 0, i64 1 + %7 = load i64, i64* inttoptr (i64 24 to i64*), align 8 + %8 = addrspacecast %jl_value_t addrspace(10)* %4 to %jl_value_t addrspace(11)* + %9 = bitcast %jl_value_t addrspace(11)* %8 to double addrspace(13)* addrspace(11)* + %10 = load double addrspace(13)*, double addrspace(13)* addrspace(11)* %9, align 8 + %11 = addrspacecast %jl_value_t addrspace(10)* %2 to %jl_value_t addrspace(11)* + %12 = bitcast %jl_value_t addrspace(11)* %11 to double addrspace(13)* addrspace(11)* + %13 = load double addrspace(13)*, double addrspace(13)* addrspace(11)* %12, align 8 + %14 = load i64, i64* %6, align 8 + br label %L74 + +L74: + %value_phi20 = phi i64 [ 1, %top ], [ %22, %L74 ] + %value_phi21 = phi i64 [ 1, %top ], [ %23, %L74 ] + %value_phi22 = phi i64 [ 1, %top ], [ %25, %L74 ] + %15 = add i64 %value_phi21, -1 + %16 = getelementptr inbounds double, double addrspace(13)* %10, i64 %15 + %17 = bitcast double addrspace(13)* %16 to i64 addrspace(13)* + %18 = load i64, i64 addrspace(13)* %17, align 8 + %19 = add i64 %value_phi20, -1 + %20 = getelementptr inbounds double, double addrspace(13)* %13, i64 %19 + %21 = bitcast double addrspace(13)* %20 to i64 addrspace(13)* + store i64 %18, i64 addrspace(13)* %21, align 8 + %22 = add i64 %value_phi20, 1 + %23 = add i64 %14, %value_phi21 + %24 = icmp eq i64 %value_phi22, %7 + %25 = add i64 %value_phi22, 1 + br i1 %24, label %L94, label %L74 + +L94: + ret void +} diff --git a/test/Analysis/LoopAccessAnalysis/reverse-memcheck-bounds.ll b/test/Analysis/LoopAccessAnalysis/reverse-memcheck-bounds.ll index 405a47554e4..4285ef0f117 100644 --- a/test/Analysis/LoopAccessAnalysis/reverse-memcheck-bounds.ll +++ b/test/Analysis/LoopAccessAnalysis/reverse-memcheck-bounds.ll @@ -58,7 +58,7 @@ for.end: ; preds = %for.body ; Here it is not obvious what the limits are, since 'step' could be negative. -; CHECK: Low: (-1 + (-1 * ((-60001 + (-1 * %a)) umax (-60001 + (40000 * %step) + (-1 * %a))))) +; CHECK: Low: ((60000 + %a) umin (60000 + (-40000 * %step) + %a)) ; CHECK: High: (4 + ((60000 + %a) umax (60000 + (-40000 * %step) + %a))) define void @g(i64 %step) { diff --git a/test/Analysis/ScalarEvolution/2008-07-29-SMinExpr.ll b/test/Analysis/ScalarEvolution/2008-07-29-SMinExpr.ll index 3542ad2a41e..53e024a68fb 100644 --- a/test/Analysis/ScalarEvolution/2008-07-29-SMinExpr.ll +++ b/test/Analysis/ScalarEvolution/2008-07-29-SMinExpr.ll @@ -22,5 +22,5 @@ afterfor: ; preds = %forinc, %entry ret i32 %j.0.lcssa } -; CHECK: backedge-taken count is (-2147483632 + ((-1 + (-1 * %{{[xy]}})) smax (-1 + (-1 * %{{[xy]}})))) +; CHECK: backedge-taken count is (-2147483633 + (-1 * (%x smin %y))) diff --git a/test/Analysis/ScalarEvolution/min-max-exprs.ll b/test/Analysis/ScalarEvolution/min-max-exprs.ll index e8c1e33e095..51f72c643cc 100644 --- a/test/Analysis/ScalarEvolution/min-max-exprs.ll +++ b/test/Analysis/ScalarEvolution/min-max-exprs.ll @@ -33,7 +33,7 @@ bb2: ; preds = %bb1 %tmp9 = select i1 %tmp4, i64 %tmp5, i64 %tmp6 ; min(N, i+3) ; CHECK: select i1 %tmp4, i64 %tmp5, i64 %tmp6 -; CHECK-NEXT: --> (-1 + (-1 * ((-1 + (-1 * (sext i32 {3,+,1}<%bb1> to i64))) smax (-1 + (-1 * (sext i32 %N to i64)))))) +; CHECK-NEXT: --> ((sext i32 {3,+,1}<%bb1> to i64) smin (sext i32 %N to i64)) %tmp11 = getelementptr inbounds i32, i32* %A, i64 %tmp9 %tmp12 = load i32, i32* %tmp11, align 4 %tmp13 = shl nsw i32 %tmp12, 1 diff --git a/test/Analysis/ScalarEvolution/pr28705.ll b/test/Analysis/ScalarEvolution/pr28705.ll index 8fbc08e3ca6..7d797a15bd5 100644 --- a/test/Analysis/ScalarEvolution/pr28705.ll +++ b/test/Analysis/ScalarEvolution/pr28705.ll @@ -5,7 +5,7 @@ ; with "%.sroa.speculated + 1". ; ; CHECK-LABEL: @foo( -; CHECK: %[[EXIT:.+]] = sub i32 %.sroa.speculated, -1 +; CHECK: %[[EXIT:.+]] = add i32 %.sroa.speculated, 1 ; CHECK: %DB.sroa.9.0.lcssa = phi i32 [ 1, %entry ], [ %[[EXIT]], %loopexit ] ; define void @foo(i32 %sub.ptr.div.i, i8* %ref.i1174) local_unnamed_addr { diff --git a/test/Analysis/ScalarEvolution/predicated-trip-count.ll b/test/Analysis/ScalarEvolution/predicated-trip-count.ll index 2db0a8b5777..b07662ed95f 100644 --- a/test/Analysis/ScalarEvolution/predicated-trip-count.ll +++ b/test/Analysis/ScalarEvolution/predicated-trip-count.ll @@ -80,7 +80,7 @@ return: ; preds = %bb5 ; CHECK-NEXT: --> (sext i16 {%Start,+,-1}<%bb3> to i32) ; CHECK: Loop %bb3: Unpredictable backedge-taken count. ; CHECK-NEXT: Loop %bb3: Unpredictable max backedge-taken count. -; CHECK-NEXT: Loop %bb3: Predicated backedge-taken count is (2 + (sext i16 %Start to i32) + ((-2 + (-1 * (sext i16 %Start to i32))) smax (-1 + (-1 * %M)))) +; CHECK-NEXT: Loop %bb3: Predicated backedge-taken count is (1 + (sext i16 %Start to i32) + (-1 * ((1 + (sext i16 %Start to i32)) smin %M))) ; CHECK-NEXT: Predicates: ; CHECK-NEXT: {%Start,+,-1}<%bb3> Added Flags: diff --git a/test/Analysis/ScalarEvolution/trip-count3.ll b/test/Analysis/ScalarEvolution/trip-count3.ll index cce0182d649..7f20b4e71be 100644 --- a/test/Analysis/ScalarEvolution/trip-count3.ll +++ b/test/Analysis/ScalarEvolution/trip-count3.ll @@ -4,7 +4,7 @@ ; dividing by the stride will have a remainder. This could theoretically ; be teaching it how to use a more elaborate trip count computation. -; CHECK: Loop %bb3.i: backedge-taken count is ((64 + (-64 smax (-1 + (-1 * %0))) + %0) /u 64) +; CHECK: Loop %bb3.i: backedge-taken count is ((63 + (-1 * (63 smin %0)) + %0) /u 64) ; CHECK: Loop %bb3.i: max backedge-taken count is 33554431 %struct.FILE = type { i32, i8*, i8*, i8*, i8*, i8*, i8*, i8*, i8*, i8*, i8*, i8*, %struct._IO_marker*, %struct.FILE*, i32, i32, i64, i16, i8, [1 x i8], i8*, i64, i8*, i8*, i8*, i8*, i64, i32, [20 x i8] } diff --git a/test/Transforms/IRCE/conjunctive-checks.ll b/test/Transforms/IRCE/conjunctive-checks.ll index f6a909e432c..d9bf485df3a 100644 --- a/test/Transforms/IRCE/conjunctive-checks.ll +++ b/test/Transforms/IRCE/conjunctive-checks.ll @@ -4,16 +4,6 @@ define void @f_0(i32 *%arr, i32 *%a_len_ptr, i32 %n, i1* %cond_buf) { ; CHECK-LABEL: @f_0( ; CHECK: loop.preheader: -; CHECK: [[not_n:[^ ]+]] = sub i32 -1, %n -; CHECK: [[not_safe_range_end:[^ ]+]] = sub i32 3, %len -; CHECK: [[not_exit_main_loop_at_hiclamp_cmp:[^ ]+]] = icmp sgt i32 [[not_n]], [[not_safe_range_end]] -; CHECK: [[not_exit_main_loop_at_hiclamp:[^ ]+]] = select i1 [[not_exit_main_loop_at_hiclamp_cmp]], i32 [[not_n]], i32 [[not_safe_range_end]] -; CHECK: [[exit_main_loop_at_hiclamp:[^ ]+]] = sub i32 -1, [[not_exit_main_loop_at_hiclamp]] -; CHECK: [[exit_main_loop_at_loclamp_cmp:[^ ]+]] = icmp sgt i32 [[exit_main_loop_at_hiclamp]], 0 -; CHECK: [[exit_main_loop_at_loclamp:[^ ]+]] = select i1 [[exit_main_loop_at_loclamp_cmp]], i32 [[exit_main_loop_at_hiclamp]], i32 0 -; CHECK: [[enter_main_loop:[^ ]+]] = icmp slt i32 0, [[exit_main_loop_at_loclamp]] -; CHECK: br i1 [[enter_main_loop]], label %loop.preheader2, label %main.pseudo.exit - ; CHECK: loop.preheader2: ; CHECK: br label %loop @@ -57,14 +47,10 @@ define void @f_1( ; CHECK-LABEL: @f_1( ; CHECK: loop.preheader: -; CHECK: [[not_len_b:[^ ]+]] = sub i32 -1, %len.b -; CHECK: [[not_len_a:[^ ]+]] = sub i32 -1, %len.a -; CHECK: [[smax_not_len_cond:[^ ]+]] = icmp sgt i32 [[not_len_b]], [[not_len_a]] -; CHECK: [[smax_not_len:[^ ]+]] = select i1 [[smax_not_len_cond]], i32 [[not_len_b]], i32 [[not_len_a]] -; CHECK: [[not_n:[^ ]+]] = sub i32 -1, %n -; CHECK: [[not_upper_limit_cond_loclamp:[^ ]+]] = icmp sgt i32 [[smax_not_len]], [[not_n]] -; CHECK: [[not_upper_limit_loclamp:[^ ]+]] = select i1 [[not_upper_limit_cond_loclamp]], i32 [[smax_not_len]], i32 [[not_n]] -; CHECK: [[upper_limit_loclamp:[^ ]+]] = sub i32 -1, [[not_upper_limit_loclamp]] +; CHECK: [[smax_len_cond:[^ ]+]] = icmp slt i32 %len.b, %len.a +; CHECK: [[smax_len:[^ ]+]] = select i1 [[smax_len_cond]], i32 %len.b, i32 %len.a +; CHECK: [[upper_limit_cond_loclamp:[^ ]+]] = icmp slt i32 [[smax_len]], %n +; CHECK: [[upper_limit_loclamp:[^ ]+]] = select i1 [[upper_limit_cond_loclamp]], i32 [[smax_len]], i32 %n ; CHECK: [[upper_limit_cmp:[^ ]+]] = icmp sgt i32 [[upper_limit_loclamp]], 0 ; CHECK: [[upper_limit:[^ ]+]] = select i1 [[upper_limit_cmp]], i32 [[upper_limit_loclamp]], i32 0 diff --git a/test/Transforms/IRCE/decrementing-loop.ll b/test/Transforms/IRCE/decrementing-loop.ll index fac873b4a24..30663da9e9f 100644 --- a/test/Transforms/IRCE/decrementing-loop.ll +++ b/test/Transforms/IRCE/decrementing-loop.ll @@ -28,11 +28,8 @@ define void @decrementing_loop(i32 *%arr, i32 *%a_len_ptr, i32 %n) { ret void ; CHECK: loop.preheader: -; CHECK: [[not_len:[^ ]+]] = sub i32 -1, %len -; CHECK: [[not_n:[^ ]+]] = sub i32 -1, %n -; CHECK: [[not_len_hiclamp_cmp:[^ ]+]] = icmp sgt i32 [[not_len]], [[not_n]] -; CHECK: [[not_len_hiclamp:[^ ]+]] = select i1 [[not_len_hiclamp_cmp]], i32 [[not_len]], i32 [[not_n]] -; CHECK: [[len_hiclamp:[^ ]+]] = sub i32 -1, [[not_len_hiclamp]] +; CHECK: [[len_hiclamp_cmp:[^ ]+]] = icmp slt i32 %len, %n +; CHECK: [[len_hiclamp:[^ ]+]] = select i1 [[len_hiclamp_cmp]], i32 %len, i32 %n ; CHECK: [[not_exit_preloop_at_cmp:[^ ]+]] = icmp sgt i32 [[len_hiclamp]], 0 ; CHECK: [[not_exit_preloop_at:[^ ]+]] = select i1 [[not_exit_preloop_at_cmp]], i32 [[len_hiclamp]], i32 0 ; CHECK: %exit.preloop.at = add i32 [[not_exit_preloop_at]], -1 diff --git a/test/Transforms/IRCE/multiple-access-no-preloop.ll b/test/Transforms/IRCE/multiple-access-no-preloop.ll index 31bfe7881b6..e693b1b8ef4 100644 --- a/test/Transforms/IRCE/multiple-access-no-preloop.ll +++ b/test/Transforms/IRCE/multiple-access-no-preloop.ll @@ -37,14 +37,10 @@ define void @multiple_access_no_preloop( ; CHECK-LABEL: @multiple_access_no_preloop( ; CHECK: loop.preheader: -; CHECK: [[not_len_b:[^ ]+]] = sub i32 -1, %len.b -; CHECK: [[not_len_a:[^ ]+]] = sub i32 -1, %len.a -; CHECK: [[smax_not_len_cond:[^ ]+]] = icmp sgt i32 [[not_len_b]], [[not_len_a]] -; CHECK: [[smax_not_len:[^ ]+]] = select i1 [[smax_not_len_cond]], i32 [[not_len_b]], i32 [[not_len_a]] -; CHECK: [[not_n:[^ ]+]] = sub i32 -1, %n -; CHECK: [[not_upper_limit_cond_loclamp:[^ ]+]] = icmp sgt i32 [[smax_not_len]], [[not_n]] -; CHECK: [[not_upper_limit_loclamp:[^ ]+]] = select i1 [[not_upper_limit_cond_loclamp]], i32 [[smax_not_len]], i32 [[not_n]] -; CHECK: [[upper_limit_loclamp:[^ ]+]] = sub i32 -1, [[not_upper_limit_loclamp]] +; CHECK: [[smax_len_cond:[^ ]+]] = icmp slt i32 %len.b, %len.a +; CHECK: [[smax_len:[^ ]+]] = select i1 [[smax_len_cond]], i32 %len.b, i32 %len.a +; CHECK: [[upper_limit_cond_loclamp:[^ ]+]] = icmp slt i32 [[smax_len]], %n +; CHECK: [[upper_limit_loclamp:[^ ]+]] = select i1 [[upper_limit_cond_loclamp]], i32 [[smax_len]], i32 %n ; CHECK: [[upper_limit_cmp:[^ ]+]] = icmp sgt i32 [[upper_limit_loclamp]], 0 ; CHECK: [[upper_limit:[^ ]+]] = select i1 [[upper_limit_cmp]], i32 [[upper_limit_loclamp]], i32 0 diff --git a/test/Transforms/IRCE/ranges_of_different_types.ll b/test/Transforms/IRCE/ranges_of_different_types.ll index c38ef24bc18..5694906a4c5 100644 --- a/test/Transforms/IRCE/ranges_of_different_types.ll +++ b/test/Transforms/IRCE/ranges_of_different_types.ll @@ -22,12 +22,11 @@ define void @test_01(i32* %arr, i32* %a_len_ptr) #0 { ; CHECK-NOT: preloop ; CHECK: entry: ; CHECK-NEXT: %len = load i32, i32* %a_len_ptr, !range !0 -; CHECK-NEXT: [[SUB1:%[^ ]+]] = sub i32 12, %len -; CHECK-NEXT: [[CMP1:%[^ ]+]] = icmp sgt i32 [[SUB1]], -102 -; CHECK-NEXT: [[SMAX:%[^ ]+]] = select i1 [[CMP1]], i32 [[SUB1]], i32 -102 -; CHECK-NEXT: [[SUB2:%[^ ]+]] = sub i32 -1, [[SMAX]] -; CHECK-NEXT: [[CMP2:%[^ ]+]] = icmp sgt i32 [[SUB2]], 0 -; CHECK-NEXT: %exit.mainloop.at = select i1 [[CMP2]], i32 [[SUB2]], i32 0 +; CHECK-NEXT: [[SUB1:%[^ ]+]] = add i32 %len, -13 +; CHECK-NEXT: [[CMP1:%[^ ]+]] = icmp slt i32 [[SUB1]], 101 +; CHECK-NEXT: [[SMAX:%[^ ]+]] = select i1 [[CMP1]], i32 [[SUB1]], i32 101 +; CHECK-NEXT: [[CMP2:%[^ ]+]] = icmp sgt i32 [[SMAX]], 0 +; CHECK-NEXT: %exit.mainloop.at = select i1 [[CMP2]], i32 [[SMAX]], i32 0 ; CHECK-NEXT: [[GOTO_LOOP:%[^ ]+]] = icmp slt i32 0, %exit.mainloop.at ; CHECK-NEXT: br i1 [[GOTO_LOOP]], label %loop.preheader, label %main.pseudo.exit ; CHECK: loop @@ -82,13 +81,11 @@ define void @test_02(i32* %arr, i32* %a_len_ptr) #0 { ; CHECK-NEXT: [[LEN_MINUS_SMAX:%[^ ]+]] = add i32 %len, -2147483647 ; CHECK-NEXT: [[CMP1:%[^ ]+]] = icmp sgt i32 [[LEN_MINUS_SMAX]], -13 ; CHECK-NEXT: [[SMAX1:%[^ ]+]] = select i1 [[CMP1]], i32 [[LEN_MINUS_SMAX]], i32 -13 -; CHECK-NEXT: [[ADD1:%[^ ]+]] = add i32 [[SMAX1]], -1 -; CHECK-NEXT: [[SUB1:%[^ ]+]] = sub i32 [[ADD1]], %len -; CHECK-NEXT: [[CMP2:%[^ ]+]] = icmp sgt i32 [[SUB1]], -102 -; CHECK-NEXT: [[SMAX2:%[^ ]+]] = select i1 [[CMP2]], i32 [[SUB1]], i32 -102 -; CHECK-NEXT: [[SUB2:%[^ ]+]] = sub i32 -1, [[SMAX2]] -; CHECK-NEXT: [[CMP3:%[^ ]+]] = icmp sgt i32 [[SUB2]], 0 -; CHECK-NEXT: %exit.mainloop.at = select i1 [[CMP3]], i32 [[SUB2]], i32 0 +; CHECK-NEXT: [[SUB1:%[^ ]+]] = sub i32 %len, [[SMAX1]] +; CHECK-NEXT: [[CMP2:%[^ ]+]] = icmp slt i32 [[SUB1]], 101 +; CHECK-NEXT: [[SMAX2:%[^ ]+]] = select i1 [[CMP2]], i32 [[SUB1]], i32 101 +; CHECK-NEXT: [[CMP3:%[^ ]+]] = icmp sgt i32 [[SMAX2]], 0 +; CHECK-NEXT: %exit.mainloop.at = select i1 [[CMP3]], i32 [[SMAX2]], i32 0 ; CHECK-NEXT: br i1 true, label %loop.preloop.preheader ; CHECK: loop.preloop: ; CHECK-NEXT: %idx.preloop = phi i32 [ %idx.next.preloop, %in.bounds.preloop ], [ 0, %loop.preloop.preheader ] @@ -150,14 +147,11 @@ define void @test_03(i32* %arr, i32* %a_len_ptr) #0 { ; CHECK-NOT: preloop ; CHECK: entry: ; CHECK-NEXT: %len = load i32, i32* %a_len_ptr, !range !0 -; CHECK-NEXT: [[SUB1:%[^ ]+]] = sub i32 -2, %len -; CHECK-NEXT: [[SUB2:%[^ ]+]] = sub i32 -1, %len -; CHECK-NEXT: [[CMP1:%[^ ]+]] = icmp sgt i32 [[SUB2]], -14 -; CHECK-NEXT: [[SMAX1:%[^ ]+]] = select i1 [[CMP1]], i32 [[SUB2]], i32 -14 -; CHECK-NEXT: [[SUB3:%[^ ]+]] = sub i32 [[SUB1]], [[SMAX1]] -; CHECK-NEXT: [[CMP2:%[^ ]+]] = icmp ugt i32 [[SUB3]], -102 -; CHECK-NEXT: [[UMAX1:%[^ ]+]] = select i1 [[CMP2]], i32 [[SUB3]], i32 -102 -; CHECK-NEXT: %exit.mainloop.at = sub i32 -1, [[UMAX1]] +; CHECK-NEXT: [[CMP1:%[^ ]+]] = icmp slt i32 %len, 13 +; CHECK-NEXT: [[SMAX1:%[^ ]+]] = select i1 [[CMP1]], i32 %len, i32 13 +; CHECK-NEXT: [[SUB3:%[^ ]+]] = sub i32 %len, [[SMAX1]] +; CHECK-NEXT: [[CMP2:%[^ ]+]] = icmp ult i32 [[SUB3]], 101 +; CHECK-NEXT: %exit.mainloop.at = select i1 [[CMP2]], i32 [[SUB3]], i32 101 ; CHECK-NEXT: [[CMP3:%[^ ]+]] = icmp ult i32 0, %exit.mainloop.at ; CHECK-NEXT: br i1 [[CMP3]], label %loop.preheader, label %main.pseudo.exit ; CHECK: postloop: @@ -207,10 +201,9 @@ define void @test_04(i32* %arr, i32* %a_len_ptr) #0 { ; CHECK-LABEL: test_04( ; CHECK: entry: ; CHECK-NEXT: %len = load i32, i32* %a_len_ptr, !range !0 -; CHECK-NEXT: [[SUB1:%[^ ]+]] = sub i32 -14, %len -; CHECK-NEXT: [[CMP1:%[^ ]+]] = icmp ugt i32 [[SUB1]], -102 -; CHECK-NEXT: [[UMAX1:%[^ ]+]] = select i1 [[CMP1]], i32 [[SUB1]], i32 -102 -; CHECK-NEXT: %exit.mainloop.at = sub i32 -1, [[UMAX1]] +; CHECK-NEXT: [[SUB1:%[^ ]+]] = add i32 %len, 13 +; CHECK-NEXT: [[CMP1:%[^ ]+]] = icmp ult i32 [[SUB1]], 101 +; CHECK-NEXT: %exit.mainloop.at = select i1 [[CMP1]], i32 [[SUB1]], i32 101 ; CHECK-NEXT: br i1 true, label %loop.preloop.preheader ; CHECK: in.bounds.preloop: ; CHECK-NEXT: %addr.preloop = getelementptr i32, i32* %arr, i32 %idx.preloop @@ -251,12 +244,11 @@ define void @test_05(i32* %arr, i32* %a_len_ptr) #0 { ; CHECK-NOT: preloop ; CHECK: entry: ; CHECK-NEXT: %len = load i32, i32* %a_len_ptr, !range !0 -; CHECK-NEXT: [[SUB1:%[^ ]+]] = sub i32 12, %len -; CHECK-NEXT: [[CMP1:%[^ ]+]] = icmp sgt i32 [[SUB1]], -102 -; CHECK-NEXT: [[SMAX:%[^ ]+]] = select i1 [[CMP1]], i32 [[SUB1]], i32 -102 -; CHECK-NEXT: [[SUB2:%[^ ]+]] = sub i32 -1, [[SMAX]] -; CHECK-NEXT: [[CMP2:%[^ ]+]] = icmp sgt i32 [[SUB2]], 0 -; CHECK-NEXT: %exit.mainloop.at = select i1 [[CMP2]], i32 [[SUB2]], i32 0 +; CHECK-NEXT: [[SUB1:%[^ ]+]] = add i32 %len, -13 +; CHECK-NEXT: [[CMP1:%[^ ]+]] = icmp slt i32 [[SUB1]], 101 +; CHECK-NEXT: [[SMAX:%[^ ]+]] = select i1 [[CMP1]], i32 [[SUB1]], i32 101 +; CHECK-NEXT: [[CMP2:%[^ ]+]] = icmp sgt i32 [[SMAX]], 0 +; CHECK-NEXT: %exit.mainloop.at = select i1 [[CMP2]], i32 [[SMAX]], i32 0 ; CHECK-NEXT: [[GOTO_LOOP:%[^ ]+]] = icmp slt i32 0, %exit.mainloop.at ; CHECK-NEXT: br i1 [[GOTO_LOOP]], label %loop.preheader, label %main.pseudo.exit ; CHECK: loop @@ -296,13 +288,11 @@ define void @test_06(i32* %arr, i32* %a_len_ptr) #0 { ; CHECK-NEXT: [[LEN_MINUS_SMAX:%[^ ]+]] = add i32 %len, -2147483647 ; CHECK-NEXT: [[CMP1:%[^ ]+]] = icmp sgt i32 [[LEN_MINUS_SMAX]], -13 ; CHECK-NEXT: [[SMAX1:%[^ ]+]] = select i1 [[CMP1]], i32 [[LEN_MINUS_SMAX]], i32 -13 -; CHECK-NEXT: [[ADD1:%[^ ]+]] = add i32 [[SMAX1]], -1 -; CHECK-NEXT: [[SUB1:%[^ ]+]] = sub i32 [[ADD1]], %len -; CHECK-NEXT: [[CMP2:%[^ ]+]] = icmp sgt i32 [[SUB1]], -102 -; CHECK-NEXT: [[SMAX2:%[^ ]+]] = select i1 [[CMP2]], i32 [[SUB1]], i32 -102 -; CHECK-NEXT: [[SUB2:%[^ ]+]] = sub i32 -1, [[SMAX2]] -; CHECK-NEXT: [[CMP3:%[^ ]+]] = icmp sgt i32 [[SUB2]], 0 -; CHECK-NEXT: %exit.mainloop.at = select i1 [[CMP3]], i32 [[SUB2]], i32 0 +; CHECK-NEXT: [[SUB1:%[^ ]+]] = sub i32 %len, [[SMAX1]] +; CHECK-NEXT: [[CMP2:%[^ ]+]] = icmp slt i32 [[SUB1]], 101 +; CHECK-NEXT: [[SMAX2:%[^ ]+]] = select i1 [[CMP2]], i32 [[SUB1]], i32 101 +; CHECK-NEXT: [[CMP3:%[^ ]+]] = icmp sgt i32 [[SMAX2]], 0 +; CHECK-NEXT: %exit.mainloop.at = select i1 [[CMP3]], i32 [[SMAX2]], i32 0 ; CHECK-NEXT: br i1 true, label %loop.preloop.preheader ; CHECK: in.bounds.preloop: ; CHECK-NEXT: %addr.preloop = getelementptr i32, i32* %arr, i32 %idx.preloop @@ -343,14 +333,11 @@ define void @test_07(i32* %arr, i32* %a_len_ptr) #0 { ; CHECK-NOT: preloop ; CHECK: entry: ; CHECK-NEXT: %len = load i32, i32* %a_len_ptr, !range !0 -; CHECK-NEXT: [[SUB1:%[^ ]+]] = sub i32 -2, %len -; CHECK-NEXT: [[SUB2:%[^ ]+]] = sub i32 -1, %len -; CHECK-NEXT: [[CMP1:%[^ ]+]] = icmp sgt i32 [[SUB2]], -14 -; CHECK-NEXT: [[SMAX1:%[^ ]+]] = select i1 [[CMP1]], i32 [[SUB2]], i32 -14 -; CHECK-NEXT: [[SUB3:%[^ ]+]] = sub i32 [[SUB1]], [[SMAX1]] -; CHECK-NEXT: [[CMP2:%[^ ]+]] = icmp ugt i32 [[SUB3]], -102 -; CHECK-NEXT: [[UMAX1:%[^ ]+]] = select i1 [[CMP2]], i32 [[SUB3]], i32 -102 -; CHECK-NEXT: %exit.mainloop.at = sub i32 -1, [[UMAX1]] +; CHECK-NEXT: [[CMP1:%[^ ]+]] = icmp slt i32 %len, 13 +; CHECK-NEXT: [[SMAX1:%[^ ]+]] = select i1 [[CMP1]], i32 %len, i32 13 +; CHECK-NEXT: [[SUB3:%[^ ]+]] = sub i32 %len, [[SMAX1]] +; CHECK-NEXT: [[CMP2:%[^ ]+]] = icmp ult i32 [[SUB3]], 101 +; CHECK-NEXT: %exit.mainloop.at = select i1 [[CMP2]], i32 [[SUB3]], i32 101 ; CHECK-NEXT: [[CMP3:%[^ ]+]] = icmp ult i32 0, %exit.mainloop.at ; CHECK-NEXT: br i1 [[CMP3]], label %loop.preheader, label %main.pseudo.exit ; CHECK: loop @@ -387,10 +374,9 @@ define void @test_08(i32* %arr, i32* %a_len_ptr) #0 { ; CHECK-LABEL: test_08( ; CHECK: entry: ; CHECK-NEXT: %len = load i32, i32* %a_len_ptr, !range !0 -; CHECK-NEXT: [[SUB1:%[^ ]+]] = sub i32 -14, %len -; CHECK-NEXT: [[CMP1:%[^ ]+]] = icmp ugt i32 [[SUB1]], -102 -; CHECK-NEXT: [[UMAX1:%[^ ]+]] = select i1 [[CMP1]], i32 [[SUB1]], i32 -102 -; CHECK-NEXT: %exit.mainloop.at = sub i32 -1, [[UMAX1]] +; CHECK-NEXT: [[SUB1:%[^ ]+]] = add i32 %len, 13 +; CHECK-NEXT: [[CMP1:%[^ ]+]] = icmp ult i32 [[SUB1]], 101 +; CHECK-NEXT: %exit.mainloop.at = select i1 [[CMP1]], i32 [[SUB1]], i32 101 ; CHECK-NEXT: br i1 true, label %loop.preloop.preheader ; CHECK: in.bounds.preloop: ; CHECK-NEXT: %addr.preloop = getelementptr i32, i32* %arr, i32 %idx.preloop diff --git a/test/Transforms/IRCE/single-access-no-preloop.ll b/test/Transforms/IRCE/single-access-no-preloop.ll index 53f430d0ba3..cbbdf81d46c 100644 --- a/test/Transforms/IRCE/single-access-no-preloop.ll +++ b/test/Transforms/IRCE/single-access-no-preloop.ll @@ -85,11 +85,9 @@ define void @single_access_no_preloop_with_offset(i32 *%arr, i32 *%a_len_ptr, i3 ; CHECK-LABEL: @single_access_no_preloop_with_offset( ; CHECK: loop.preheader: -; CHECK: [[not_n:[^ ]+]] = sub i32 -1, %n -; CHECK: [[not_safe_range_end:[^ ]+]] = sub i32 3, %len -; CHECK: [[not_exit_main_loop_at_hiclamp_cmp:[^ ]+]] = icmp sgt i32 [[not_n]], [[not_safe_range_end]] -; CHECK: [[not_exit_main_loop_at_hiclamp:[^ ]+]] = select i1 [[not_exit_main_loop_at_hiclamp_cmp]], i32 [[not_n]], i32 [[not_safe_range_end]] -; CHECK: [[exit_main_loop_at_hiclamp:[^ ]+]] = sub i32 -1, [[not_exit_main_loop_at_hiclamp]] +; CHECK: [[safe_range_end:[^ ]+]] = add i32 %len, -4 +; CHECK: [[exit_main_loop_at_hiclamp_cmp:[^ ]+]] = icmp slt i32 %n, [[safe_range_end]] +; CHECK: [[exit_main_loop_at_hiclamp:[^ ]+]] = select i1 [[exit_main_loop_at_hiclamp_cmp]], i32 %n, i32 [[safe_range_end]] ; CHECK: [[exit_main_loop_at_loclamp_cmp:[^ ]+]] = icmp sgt i32 [[exit_main_loop_at_hiclamp]], 0 ; CHECK: [[exit_main_loop_at_loclamp:[^ ]+]] = select i1 [[exit_main_loop_at_loclamp_cmp]], i32 [[exit_main_loop_at_hiclamp]], i32 0 ; CHECK: [[enter_main_loop:[^ ]+]] = icmp slt i32 0, [[exit_main_loop_at_loclamp]] diff --git a/test/Transforms/IRCE/single-access-with-preloop.ll b/test/Transforms/IRCE/single-access-with-preloop.ll index 4b93122b6e7..3e2395dd100 100644 --- a/test/Transforms/IRCE/single-access-with-preloop.ll +++ b/test/Transforms/IRCE/single-access-with-preloop.ll @@ -33,11 +33,9 @@ define void @single_access_with_preloop(i32 *%arr, i32 *%a_len_ptr, i32 %n, i32 ; CHECK: [[check_min_sint_offset:[^ ]+]] = icmp sgt i32 %offset, -2147483647 ; CHECK: [[safe_offset_preloop:[^ ]+]] = select i1 [[check_min_sint_offset]], i32 %offset, i32 -2147483647 ; If Offset was a SINT_MIN, we could have an overflow here. That is why we calculated its safe version. -; CHECK: [[not_safe_start:[^ ]+]] = add i32 [[safe_offset_preloop]], -1 -; CHECK: [[not_n:[^ ]+]] = sub i32 -1, %n -; CHECK: [[not_exit_preloop_at_cond_loclamp:[^ ]+]] = icmp sgt i32 [[not_safe_start]], [[not_n]] -; CHECK: [[not_exit_preloop_at_loclamp:[^ ]+]] = select i1 [[not_exit_preloop_at_cond_loclamp]], i32 [[not_safe_start]], i32 [[not_n]] -; CHECK: [[exit_preloop_at_loclamp:[^ ]+]] = sub i32 -1, [[not_exit_preloop_at_loclamp]] +; CHECK: [[safe_start:[^ ]+]] = sub i32 0, [[safe_offset_preloop]] +; CHECK: [[exit_preloop_at_cond_loclamp:[^ ]+]] = icmp slt i32 %n, [[safe_start]] +; CHECK: [[exit_preloop_at_loclamp:[^ ]+]] = select i1 [[exit_preloop_at_cond_loclamp]], i32 %n, i32 [[safe_start]] ; CHECK: [[exit_preloop_at_cond:[^ ]+]] = icmp sgt i32 [[exit_preloop_at_loclamp]], 0 ; CHECK: [[exit_preloop_at:[^ ]+]] = select i1 [[exit_preloop_at_cond]], i32 [[exit_preloop_at_loclamp]], i32 0 @@ -45,17 +43,15 @@ define void @single_access_with_preloop(i32 *%arr, i32 *%a_len_ptr, i32 %n, i32 ; CHECK: [[len_minus_sint_max:[^ ]+]] = add i32 %len, -2147483647 ; CHECK: [[check_len_min_sint_offset:[^ ]+]] = icmp sgt i32 %offset, [[len_minus_sint_max]] ; CHECK: [[safe_offset_mainloop:[^ ]+]] = select i1 [[check_len_min_sint_offset]], i32 %offset, i32 [[len_minus_sint_max]] -; CHECK: [[not_safe_start_2:[^ ]+]] = add i32 [[safe_offset_mainloop]], -1 ; If Offset was a SINT_MIN, we could have an overflow here. That is why we calculated its safe version. -; CHECK: [[not_safe_upper_end:[^ ]+]] = sub i32 [[not_safe_start_2]], %len -; CHECK: [[not_exit_mainloop_at_cond_loclamp:[^ ]+]] = icmp sgt i32 [[not_safe_upper_end]], [[not_n]] -; CHECK: [[not_exit_mainloop_at_loclamp:[^ ]+]] = select i1 [[not_exit_mainloop_at_cond_loclamp]], i32 [[not_safe_upper_end]], i32 [[not_n]] +; CHECK: [[safe_upper_end:[^ ]+]] = sub i32 %len, [[safe_offset_mainloop]] +; CHECK: [[exit_mainloop_at_cond_loclamp:[^ ]+]] = icmp slt i32 %n, [[safe_upper_end]] +; CHECK: [[exit_mainloop_at_loclamp:[^ ]+]] = select i1 [[exit_mainloop_at_cond_loclamp]], i32 %n, i32 [[safe_upper_end]] ; CHECK: [[check_offset_mainloop_2:[^ ]+]] = icmp sgt i32 %offset, 0 ; CHECK: [[safe_offset_mainloop_2:[^ ]+]] = select i1 [[check_offset_mainloop_2]], i32 %offset, i32 0 -; CHECK: [[not_safe_lower_end:[^ ]+]] = add i32 [[safe_offset_mainloop_2]], -2147483648 -; CHECK: [[not_exit_mainloop_at_cond_hiclamp:[^ ]+]] = icmp sgt i32 [[not_exit_mainloop_at_loclamp]], [[not_safe_lower_end]] -; CHECK: [[not_exit_mainloop_at_hiclamp:[^ ]+]] = select i1 [[not_exit_mainloop_at_cond_hiclamp]], i32 [[not_exit_mainloop_at_loclamp]], i32 [[not_safe_lower_end]] -; CHECK: [[exit_mainloop_at_hiclamp:[^ ]+]] = sub i32 -1, [[not_exit_mainloop_at_hiclamp]] +; CHECK: [[safe_lower_end:[^ ]+]] = sub i32 2147483647, [[safe_offset_mainloop_2]] +; CHECK: [[exit_mainloop_at_cond_hiclamp:[^ ]+]] = icmp slt i32 [[exit_mainloop_at_loclamp]], [[safe_lower_end]] +; CHECK: [[exit_mainloop_at_hiclamp:[^ ]+]] = select i1 [[exit_mainloop_at_cond_hiclamp]], i32 [[exit_mainloop_at_loclamp]], i32 [[safe_lower_end]] ; CHECK: [[exit_mainloop_at_cmp:[^ ]+]] = icmp sgt i32 [[exit_mainloop_at_hiclamp]], 0 ; CHECK: [[exit_mainloop_at:[^ ]+]] = select i1 [[exit_mainloop_at_cmp]], i32 [[exit_mainloop_at_hiclamp]], i32 0 diff --git a/test/Transforms/LoopStrengthReduce/2013-01-14-ReuseCast.ll b/test/Transforms/LoopStrengthReduce/2013-01-14-ReuseCast.ll index ea3f6077231..d5232e1874c 100644 --- a/test/Transforms/LoopStrengthReduce/2013-01-14-ReuseCast.ll +++ b/test/Transforms/LoopStrengthReduce/2013-01-14-ReuseCast.ll @@ -14,8 +14,6 @@ target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f3 ; current LSR cost model. ; CHECK-NOT: = ptrtoint i8* undef to i64 ; CHECK: .lr.ph -; CHECK: [[TMP:%[^ ]+]] = add i64 %tmp{{[0-9]+}}, -1 -; CHECK: sub i64 [[TMP]], %tmp{{[0-9]+}} ; CHECK: ret void define void @VerifyDiagnosticConsumerTest() unnamed_addr nounwind uwtable align 2 { bb: