From 478642224bca43ebe2c878159fb5b7989ed2641e Mon Sep 17 00:00:00 2001 From: Arnur Nigmetov Date: Thu, 6 Apr 2017 11:54:54 +0200 Subject: Heuristic for bottleneck: estimate on sampled diagram Sample input diagrams and use the distance between samples as initial guess in binary search. Relevant variables: bottleneck_dist.cpp:10 useSamplingHeur (if false, heuristic is never applied) bottleneck_dist.cpp:11 heurThreshold (heuristic is not applid to diagrams with fewer points) TODO: add command-line parameters instead Sampling strategy: sort points by multiplicity, set cutting value to be the multiplicity where the increase of the next one is maximal and only keep points whose multiplicity is less then the cutting value --- geom_bottleneck/bottleneck/include/bottleneck.h | 15 +++ geom_bottleneck/bottleneck/src/bottleneck.cpp | 122 ++++++++++++++++++++++++ geom_bottleneck/example/bottleneck_dist.cpp | 11 ++- 3 files changed, 147 insertions(+), 1 deletion(-) (limited to 'geom_bottleneck') diff --git a/geom_bottleneck/bottleneck/include/bottleneck.h b/geom_bottleneck/bottleneck/include/bottleneck.h index 2078965..75c902f 100644 --- a/geom_bottleneck/bottleneck/include/bottleneck.h +++ b/geom_bottleneck/bottleneck/include/bottleneck.h @@ -46,6 +46,10 @@ typedef std::pair> DistVerticesPair; // b) if the interval is not (0,0), then (distMax - distMin) / distMin < epsilon std::pair bottleneckDistApproxInterval(DiagramPointSet& A, DiagramPointSet& B, const double epsilon); + +// heuristic (sample diagram to estimate the distance) +std::pair bottleneckDistApproxIntervalHeur(DiagramPointSet& A, DiagramPointSet& B, const double epsilon); + // get approximate distance, // see bottleneckDistApproxInterval double bottleneckDistApprox(DiagramPointSet& A, DiagramPointSet& B, const double epsilon); @@ -74,6 +78,17 @@ std::pair bottleneckDistApproxInterval(PairContainer& A, PairCon return bottleneckDistApproxInterval(a, b, epsilon); } + +template +double bottleneckDistApproxHeur(PairContainer& A, PairContainer& B, const double epsilon) +{ + DiagramPointSet a(A.begin(), A.end()); + DiagramPointSet b(B.begin(), B.end()); + std::pair resPair = bottleneckDistApproxIntervalHeur(a, b, epsilon); + return resPair.second; +} + + // get approximate distance, // see bottleneckDistApproxInterval template diff --git a/geom_bottleneck/bottleneck/src/bottleneck.cpp b/geom_bottleneck/bottleneck/src/bottleneck.cpp index 8365878..e41e131 100644 --- a/geom_bottleneck/bottleneck/src/bottleneck.cpp +++ b/geom_bottleneck/bottleneck/src/bottleneck.cpp @@ -91,6 +91,128 @@ std::pair bottleneckDistApproxInterval(DiagramPointSet& A, Diagr return result; } +void sampleDiagramForHeur(const DiagramPointSet& dgmIn, DiagramPointSet& dgmOut) +{ + struct pair_hash { + std::size_t operator()(const std::pair p) const + { + return std::hash()(p.first) ^ std::hash()(p.second); + } + }; + std::unordered_map, int, pair_hash> m; + for(auto ptIter = dgmIn.cbegin(); ptIter != dgmIn.cend(); ++ptIter) { + m[std::make_pair(ptIter->getRealX(), ptIter->getRealY())]++; + } + if (m.size() < 2) { + dgmOut = dgmIn; + return; + } + std::vector v; + for(const auto& ptQtyPair : m) { + v.push_back(ptQtyPair.second); + } + std::sort(v.begin(), v.end()); + int maxLeap = v[1] - v[0]; + int cutVal = v[0]; + for(int i = 1; i < v.size() - 1; ++i) { + int currLeap = v[i+1] - v[i]; + if (currLeap > maxLeap) { + maxLeap = currLeap; + cutVal = v[i]; + } + } + std::vector> vv; + for(const auto& ptQty : m) { + if (ptQty.second < cutVal) { + for(int i = 0; i < ptQty.second; ++i) { + vv.push_back(std::make_pair(ptQty.first.first, ptQty.first.second)); + } + } + } + // keep points whose multiplicites are at most cutVal + dgmOut.clear(); + dgmOut = DiagramPointSet(vv.begin(), vv.end()); +} + + +// return the interval (distMin, distMax) such that: +// a) actual bottleneck distance between A and B is contained in the interval +// b) if the interval is not (0,0), then (distMax - distMin) / distMin < epsilon +std::pair bottleneckDistApproxIntervalWithInitial(DiagramPointSet& A, DiagramPointSet& B, const double epsilon, const std::pair initialGuess) +{ + // empty diagrams are not considered as error + if (A.empty() and B.empty()) + return std::make_pair(0.0, 0.0); + + // link diagrams A and B by adding projections + addProjections(A, B); + + constexpr double epsThreshold { 1.0e-10 }; + std::pair result { 0.0, 0.0 }; + bool useRangeSearch { true }; + // construct an oracle + BoundMatchOracle oracle(A, B, epsThreshold, useRangeSearch); + double& distMin {result.first}; + double& distMax {result.second}; + + // initialize search interval from initialGuess + distMin = initialGuess.first; + distMax = initialGuess.second; + + assert(distMin <= distMax); + + // make sure that distMin is a lower bound + while(oracle.isMatchLess(distMin)) { + // distMin is in fact an upper bound, so assign it to distMax + distMax = distMin; + // and decrease distMin by 5 % + distMin = 0.95 * distMin; + } + + // make sure that distMax is an upper bound + while(not oracle.isMatchLess(distMax)) { + // distMax is in fact a lower bound, so assign it to distMin + distMin = distMax; + // and increase distMax by 5 % + distMax = 1.05 * distMax; + } + + // bounds are found, perform binary search + //std::cout << "Bounds found, distMin = " << distMin << ", distMax = " << distMax << ", ratio = " << ( distMax - distMin ) / distMin << std::endl ; + double distProbe = ( distMin + distMax ) / 2.0; + while ( ( distMax - distMin ) / distMin >= epsilon ) { + if (oracle.isMatchLess(distProbe)) { + distMax = distProbe; + } else { + distMin = distProbe; + } + distProbe = ( distMin + distMax ) / 2.0; + } + return result; +} + +// return the interval (distMin, distMax) such that: +// a) actual bottleneck distance between A and B is contained in the interval +// b) if the interval is not (0,0), then (distMax - distMin) / distMin < epsilon +// use heuristic: initial estimate on sampled diagrams +std::pair bottleneckDistApproxIntervalHeur(DiagramPointSet& A, DiagramPointSet& B, const double epsilon) +{ + // empty diagrams are not considered as error + if (A.empty() and B.empty()) + return std::make_pair(0.0, 0.0); + + DiagramPointSet sampledA, sampledB; + sampleDiagramForHeur(A, sampledA); + sampleDiagramForHeur(B, sampledB); + //std::cout << "A : " << A.size() << ", sampled: " << sampledA.size() << std::endl; + //std::cout << "B : " << B.size() << ", sampled: " << sampledB.size() << std::endl; + std::pair initGuess = bottleneckDistApproxInterval(sampledA, sampledB, epsilon); + //std::cout << "initial guess: " << initGuess.first << ", " << initGuess.second << std::endl; + return bottleneckDistApproxIntervalWithInitial(A, B, epsilon, initGuess); +} + + + // get approximate distance, // see bottleneckDistApproxInterval double bottleneckDistApprox(DiagramPointSet& A, DiagramPointSet& B, const double epsilon) diff --git a/geom_bottleneck/example/bottleneck_dist.cpp b/geom_bottleneck/example/bottleneck_dist.cpp index 655de44..b27bdbb 100644 --- a/geom_bottleneck/example/bottleneck_dist.cpp +++ b/geom_bottleneck/example/bottleneck_dist.cpp @@ -6,6 +6,11 @@ typedef std::vector> PairVector; +// estimate initial guess on sampled diagram? +constexpr bool useSamplingHeur = false; +// if diagrams contain fewer points, don't use heuristic +constexpr int heurThreshold = 30000; + int main(int argc, char* argv[]) { if (argc < 3 ) { @@ -29,7 +34,11 @@ int main(int argc, char* argv[]) // return approximate distance (faster) double approxEpsilon = atof(argv[3]); if (approxEpsilon > 0.0) { - res = geom_bt::bottleneckDistApprox(diagramA, diagramB, approxEpsilon); + if (useSamplingHeur && diagramA.size() > heurThreshold && diagramB.size() > heurThreshold) { + res = geom_bt::bottleneckDistApproxHeur(diagramA, diagramB, approxEpsilon); + } else { + res = geom_bt::bottleneckDistApprox(diagramA, diagramB, approxEpsilon); + } } else if (approxEpsilon == 0.0) { res = geom_bt::bottleneckDistExact(diagramA, diagramB, decPrecision); } else { -- cgit v1.2.3