From 75cf0745e95a37c8d65e7a283a11cd500ab6edc2 Mon Sep 17 00:00:00 2001 From: Arnur Nigmetov Date: Sat, 19 May 2018 20:17:53 +0200 Subject: Points at infinity handled correctly. 1. Reader copied from wasserstein code, can parse inf, moved to hera namespace. 2. addProjections removes points at infinity. 3. Points at infinity handled separately by getInfinityCost(). --- geom_bottleneck/example/bottleneck_dist.cpp | 16 +- geom_bottleneck/include/basic_defs_bt.h | 43 +- geom_bottleneck/include/bottleneck.h | 6 +- geom_bottleneck/include/bottleneck_detail.hpp | 580 +++++++------------------- geom_bottleneck/include/diagram_reader.h | 217 ++++++---- 5 files changed, 316 insertions(+), 546 deletions(-) diff --git a/geom_bottleneck/example/bottleneck_dist.cpp b/geom_bottleneck/example/bottleneck_dist.cpp index b66e7bc..91c4190 100644 --- a/geom_bottleneck/example/bottleneck_dist.cpp +++ b/geom_bottleneck/example/bottleneck_dist.cpp @@ -48,11 +48,11 @@ int main(int argc, char* argv[]) PairVector diagramA, diagramB; int decPrecision { 0 }; - if (!hera::bt::readDiagramPointSet(argv[1], diagramA, decPrecision)) { + if (!hera::readDiagramPointSet(argv[1], diagramA, decPrecision)) { std::exit(1); } - if (!hera::bt::readDiagramPointSet(argv[2], diagramB, decPrecision)) { + if (!hera::readDiagramPointSet(argv[2], diagramB, decPrecision)) { std::exit(1); } @@ -63,20 +63,11 @@ int main(int argc, char* argv[]) double delta = atof(argv[3]); if (delta > 0.0) { if (useSamplingHeur && diagramA.size() > heurThreshold && diagramB.size() > heurThreshold) { -#ifdef VERBOSE_BOTTLENECK - std::cout << "using sampling heuristic" << std::endl; -#endif res = hera::bottleneckDistApproxHeur(diagramA, diagramB, delta); } else { -#ifdef VERBOSE_BOTTLENECK - std::cout << "NOT using sampling heuristic" << std::endl; -#endif res = hera::bottleneckDistApprox(diagramA, diagramB, delta); } } else if (delta == 0.0) { -#ifdef VERBOSE_BOTTLENECK - std::cout << "NOT using sampling heuristic, computing EXACT answer" << std::endl; -#endif res = hera::bottleneckDistExact(diagramA, diagramB, decPrecision); } else { std::cerr << "The third parameter (relative error) must be positive!" << std::endl; @@ -84,9 +75,6 @@ int main(int argc, char* argv[]) } } else { // only filenames have been supplied, return exact distance -#ifdef VERBOSE_BOTTLENECK - std::cout << "NOT using sampling heuristic, computing EXACT answer" << std::endl; -#endif res = hera::bottleneckDistExact(diagramA, diagramB, decPrecision); } std::cout << std::setprecision(15) << res << std::endl; diff --git a/geom_bottleneck/include/basic_defs_bt.h b/geom_bottleneck/include/basic_defs_bt.h index ad09986..69dc709 100644 --- a/geom_bottleneck/include/basic_defs_bt.h +++ b/geom_bottleneck/include/basic_defs_bt.h @@ -128,9 +128,17 @@ public: } - bool isDiagonal(void) const { return type == DIAG; } + bool isDiagonal() const { return type == DIAG; } - bool isNormal(void) const { return type == NORMAL; } + bool isNormal() const { return type == NORMAL; } + + bool isInfinity() const + { + return x == std::numeric_limits::infinity() or + x == -std::numeric_limits::infinity() or + y == std::numeric_limits::infinity() or + y == -std::numeric_limits::infinity(); + } Real inline getRealX() const // return the x-coord { @@ -177,17 +185,6 @@ inline void hash_combine(std::size_t & seed, const T & v) seed ^= hasher(v) + 0x9e3779b9 + (seed << 6) + (seed >> 2); } -//template -//struct PointHash { -// size_t operator()(const Point &p) const -// { -// size_t seed = 0; -// hash_combine(seed, p.x); -// hash_combine(seed, p.y); -// return seed; -// } -//}; - template struct DiagramPointHash { size_t operator()(const DiagramPoint &p) const @@ -306,11 +303,6 @@ public: return points.end(); } - const_iterator find(const DgmPoint &p) const - { - return points.find(p); - } - const_iterator cbegin() const { return points.cbegin(); @@ -321,6 +313,12 @@ public: return points.cend(); } + + const_iterator find(const DgmPoint &p) const + { + return points.find(p); + } + #ifndef FOR_R_TDA template friend std::ostream& operator<<(std::ostream& output, const DiagramPointSet& ps) @@ -405,7 +403,8 @@ Real getFurthestDistance3Approx(DiagPointContainer& A, DiagPointContainer& B) { } // preprocess diagrams A and B by adding projections onto diagonal of points of -// A to B and vice versa. NB: ids of points will be changed! +// A to B and vice versa. Also removes points at infinity! +// NB: ids of points will be changed! template void addProjections(DiagramPointSet& A, DiagramPointSet& B) { @@ -420,7 +419,8 @@ void addProjections(DiagramPointSet& A, DiagramPointSet& B) // copy normal points from A to newA // add projections to newB for(auto& pA : A) { - if (pA.isNormal()) { + if (pA.isNormal() and not pA.isInfinity()) { + // add pA's projection to B DgmPoint dpA {pA.getRealX(), pA.getRealY(), DgmPoint::NORMAL, uniqueId++}; DgmPoint dpB {(pA.getRealX() +pA.getRealY())/2, (pA.getRealX() +pA.getRealY())/2, DgmPoint::DIAG, uniqueId++}; newA.insert(dpA); @@ -429,7 +429,8 @@ void addProjections(DiagramPointSet& A, DiagramPointSet& B) } for(auto& pB : B) { - if (pB.isNormal()) { + if (pB.isNormal() and not pB.isInfinity()) { + // add pB's projection to A DgmPoint dpB {pB.getRealX(), pB.getRealY(), DgmPoint::NORMAL, uniqueId++}; DgmPoint dpA {(pB.getRealX() +pB.getRealY())/2, (pB.getRealX() +pB.getRealY())/2, DgmPoint::DIAG, uniqueId++}; newB.insert(dpB); diff --git a/geom_bottleneck/include/bottleneck.h b/geom_bottleneck/include/bottleneck.h index d0d82b6..0d4e1ed 100644 --- a/geom_bottleneck/include/bottleneck.h +++ b/geom_bottleneck/include/bottleneck.h @@ -50,6 +50,8 @@ namespace hera { // PairContainer class must support iteration of the form // for(it = pairContainer.begin(); it != pairContainer.end(); ++it) +// all functions in this header are wrappers around +// functions from hera::bt namespace // get exact bottleneck distance, template @@ -86,7 +88,8 @@ bottleneckDistApproxInterval(PairContainer& dgm_A, PairContainer& dgm_B, const t return hera::bt::bottleneckDistApproxInterval(a, b, delta); } - +// use sampling heuristic: discard most of the points with small persistency +// to get a good initial approximation of the bottleneck distance template typename DiagramTraits::RealType bottleneckDistApproxHeur(PairContainer& dgm_A, PairContainer& dgm_B, const typename DiagramTraits::RealType delta) @@ -98,7 +101,6 @@ bottleneckDistApproxHeur(PairContainer& dgm_A, PairContainer& dgm_B, const typen return resPair.second; } - // get approximate distance, // see bottleneckDistApproxInterval template diff --git a/geom_bottleneck/include/bottleneck_detail.hpp b/geom_bottleneck/include/bottleneck_detail.hpp index 64c6696..24cb725 100644 --- a/geom_bottleneck/include/bottleneck_detail.hpp +++ b/geom_bottleneck/include/bottleneck_detail.hpp @@ -46,16 +46,144 @@ derivative works thereof, in binary and source code form. namespace hera { namespace bt { +template +void binarySearch(const Real epsilon, + std::pair& result, + BoundMatchOracle& oracle, + const Real infinityCost, + bool isResultInitializedCorrectly, + const Real distProbeInit) +{ + // aliases for result components + Real& distMin = result.first; + Real& distMax = result.second; + + distMin = std::max(distMin, infinityCost); + distMax = std::max(distMax, infinityCost); + + Real distProbe; + + if (not isResultInitializedCorrectly) { + distProbe = distProbeInit; + if (oracle.isMatchLess(distProbe)) { + // distProbe is an upper bound, + // find lower bound with binary search + do { + distMax = distProbe; + distProbe /= 2.0; + } while (oracle.isMatchLess(distProbe)); + distMin = distProbe; + } else { + // distProbe is a lower bound, + // find upper bound with exponential search + do { + distMin = distProbe; + distProbe *= 2.0; + } while (!oracle.isMatchLess(distProbe)); + distMax = distProbe; + } + } + // bounds are correct , perform binary search + distProbe = ( distMin + distMax ) / 2.0; + while (( distMax - distMin ) / distMin >= epsilon ) { + + if (distMax < infinityCost) { + distMin = infinityCost; + distMax = infinityCost; + break; + } + + if (oracle.isMatchLess(distProbe)) { + distMax = distProbe; + } else { + distMin = distProbe; + } + + distProbe = ( distMin + distMax ) / 2.0; + } + + distMin = std::max(distMin, infinityCost); + distMax = std::max(distMax, infinityCost); +} + +template +inline Real getOneDimensionalCost(std::vector &set_A, std::vector &set_B) +{ + if (set_A.size() != set_B.size()) { + return std::numeric_limits::infinity(); + } + + if (set_A.empty()) { + return Real(0.0); + } + + std::sort(set_A.begin(), set_A.end()); + std::sort(set_B.begin(), set_B.end()); + + Real result = 0.0; + for(size_t i = 0; i < set_A.size(); ++i) { + result = std::max(result, (std::fabs(set_A[i] - set_B[i]))); + } + + return result; +} + +template +inline Real getInfinityCost(const DiagramPointSet &A, const DiagramPointSet &B) +{ + std::vector x_plus_A, x_minus_A, y_plus_A, y_minus_A; + std::vector x_plus_B, x_minus_B, y_plus_B, y_minus_B; + + for(auto iter_A = A.cbegin(); iter_A != A.cend(); ++iter_A) { + Real x = iter_A->getRealX(); + Real y = iter_A->getRealY(); + if ( x == std::numeric_limits::infinity()) { + y_plus_A.push_back(y); + } else if (x == -std::numeric_limits::infinity()) { + y_minus_A.push_back(y); + } else if (y == std::numeric_limits::infinity()) { + x_plus_A.push_back(x); + } else if (y == -std::numeric_limits::infinity()) { + x_minus_A.push_back(x); + } + } + + for(auto iter_B = B.cbegin(); iter_B != B.cend(); ++iter_B) { + Real x = iter_B->getRealX(); + Real y = iter_B->getRealY(); + if (x == std::numeric_limits::infinity()) { + y_plus_B.push_back(y); + } else if (x == -std::numeric_limits::infinity()) { + y_minus_B.push_back(y); + } else if (y == std::numeric_limits::infinity()) { + x_plus_B.push_back(x); + } else if (y == -std::numeric_limits::infinity()) { + x_minus_B.push_back(x); + } + } + + Real infinity_cost = getOneDimensionalCost(x_plus_A, x_plus_B); + infinity_cost = std::max(infinity_cost, getOneDimensionalCost(x_minus_A, x_minus_B)); + infinity_cost = std::max(infinity_cost, getOneDimensionalCost(y_plus_A, y_plus_B)); + infinity_cost = std::max(infinity_cost, getOneDimensionalCost(y_minus_A, y_minus_B)); + + return infinity_cost; +} + // return the interval (distMin, distMax) such that: // a) actual bottleneck distance between A and B is contained in the interval // b) if the interval is not (0,0), then (distMax - distMin) / distMin < epsilon template -std::pair bottleneckDistApproxInterval(DiagramPointSet& A, DiagramPointSet& B, const Real epsilon) +inline std::pair bottleneckDistApproxInterval(DiagramPointSet& A, DiagramPointSet& B, const Real epsilon) { // empty diagrams are not considered as error if (A.empty() and B.empty()) return std::make_pair(0.0, 0.0); + Real infinity_cost = getInfinityCost(A, B); + if (infinity_cost == std::numeric_limits::infinity()) + return std::make_pair(infinity_cost, infinity_cost); + // link diagrams A and B by adding projections addProjections(A, B); @@ -74,47 +202,13 @@ std::pair bottleneckDistApproxInterval(DiagramPointSet& A, Dia // get a 3-approximation of maximal distance between A and B // as a starting value for probe distance Real distProbe { getFurthestDistance3Approx>(A, B) }; - // aliases for result components - Real& distMin {result.first}; - Real& distMax {result.second}; - - if ( oracle.isMatchLess(distProbe) ) { - // distProbe is an upper bound, - // find lower bound with binary search - do { - distMax = distProbe; - distProbe /= 2.0; - } while (oracle.isMatchLess(distProbe)); - distMin = distProbe; - } else { - // distProbe is a lower bound, - // find upper bound with exponential search - do { - distMin = distProbe; - distProbe *= 2.0; - } while (!oracle.isMatchLess(distProbe)); - distMax = distProbe; - } - // bounds are found, perform binary search - //std::cout << "Bounds found, distMin = " << distMin << ", distMax = " << distMax << ", ratio = " << ( distMax - distMin ) / distMin << std::endl ; - distProbe = ( distMin + distMax ) / 2.0; - while ( ( distMax - distMin ) / distMin >= epsilon ) { - if (oracle.isMatchLess(distProbe)) { - distMax = distProbe; - } else { - distMin = distProbe; - } - distProbe = ( distMin + distMax ) / 2.0; - } + binarySearch(epsilon, result, oracle, infinity_cost, false, distProbe); return result; } template void sampleDiagramForHeur(const DiagramPointSet& dgmIn, DiagramPointSet& dgmOut) { -#ifdef VERBOSE_BOTTLENECK - std::cout << "Entered sampleDiagramForHeur, dgmIn.size = " << dgmIn.size() << std::endl; -#endif struct pair_hash { std::size_t operator()(const std::pair p) const { @@ -123,14 +217,11 @@ void sampleDiagramForHeur(const DiagramPointSet& dgmIn, DiagramPointSet, int, pair_hash> m; for(auto ptIter = dgmIn.cbegin(); ptIter != dgmIn.cend(); ++ptIter) { - if (ptIter->isNormal()) { + if (ptIter->isNormal() and not ptIter->isInfinity()) { m[std::make_pair(ptIter->getRealX(), ptIter->getRealY())]++; } } -#ifdef VERBOSE_BOTTLENECK - std::cout << "map filled in, m.size = " << m.size() << std::endl; -#endif - if (m.size() < 2) { + if (m.size() < 2) { dgmOut = dgmIn; return; } @@ -138,13 +229,7 @@ void sampleDiagramForHeur(const DiagramPointSet& dgmIn, DiagramPointSet(v.size())- 1; ++i) { @@ -154,10 +239,7 @@ void sampleDiagramForHeur(const DiagramPointSet& dgmIn, DiagramPointSet initGuess = bottleneckDistApproxInterval(sampledA, sampledB, epsilon); -#ifdef VERBOSE_BOTTLENECK - std::cout << "initial guess with sampling: " << initGuess.first << ", " << initGuess.second << std::endl; - std::cout << "running on the original diagrams" << std::endl; -#endif - return bottleneckDistApproxIntervalWithInitial(A, B, epsilon, initGuess); + + initGuess.first = std::max(initGuess.first, infinity_cost); + initGuess.second = std::max(initGuess.second, infinity_cost); + + return bottleneckDistApproxIntervalWithInitial(A, B, epsilon, initGuess, infinity_cost); } @@ -277,9 +350,6 @@ Real bottleneckDistApprox(DiagramPointSet& A, DiagramPointSet& B, co template Real bottleneckDistExactFromSortedPwDist(DiagramPointSet&A, DiagramPointSet& B, std::vector& pairwiseDist, const int decPrecision) { - //for(size_t k = 0; k < pairwiseDist.size(); ++k) { - //std::cout << "pairwiseDist[" << k << "] = " << std::setprecision(15) << pairwiseDist[k] << std::endl; - //} // trivial case: we have only one candidate if (pairwiseDist.size() == 1) return pairwiseDist[0]; @@ -292,13 +362,11 @@ Real bottleneckDistExactFromSortedPwDist(DiagramPointSet&A, DiagramPointSe } for(size_t k = 0; k < pairwiseDist.size() - 2; ++k) { auto diff = pairwiseDist[k+1]- pairwiseDist[k]; - //std::cout << "diff = " << diff << ", pairwiseDist[k] = " << pairwiseDist[k] << std::endl; if ( diff > diffThreshold and diff < distEpsilon ) { distEpsilon = diff; } } distEpsilon /= 3.0; - //std::cout << "decPrecision = " << decPrecision << ", distEpsilon = " << distEpsilon << std::endl; BoundMatchOracle oracle(A, B, distEpsilon, useRangeSearch); // binary search @@ -307,17 +375,13 @@ Real bottleneckDistExactFromSortedPwDist(DiagramPointSet&A, DiagramPointSe size_t idxMid; while(idxMax > idxMin) { idxMid = static_cast(floor(idxMin + idxMax) / 2.0); - //std::cout << "while begin: min = " << idxMin << ", idxMax = " << idxMax << ", idxMid = " << idxMid << ", testing d = " << std::setprecision(15) << pairwiseDist[idxMid] << std::endl; iterNum++; // not A[imid] < dist <=> A[imid] >= dist <=> A[imid[ >= dist + eps if (oracle.isMatchLess(pairwiseDist[idxMid] + distEpsilon / 2.0)) { - //std::cout << "isMatchLess = true" << std::endl; idxMax = idxMid; } else { - //std::cout << "isMatchLess = false " << std::endl; idxMin = idxMid + 1; } - //std::cout << "while end: idxMin = " << idxMin << ", idxMax = " << idxMax << ", idxMid = " << idxMid << std::endl; } idxMid = static_cast(floor(idxMin + idxMax) / 2.0); return pairwiseDist[idxMid]; @@ -337,11 +401,12 @@ Real bottleneckDistExact(DiagramPointSet& A, DiagramPointSet& B, con constexpr Real epsilon = 0.001; auto interval = bottleneckDistApproxInterval(A, B, epsilon); + if (interval.first == interval.second) + return interval.first; const Real delta = 0.50001 * (interval.second - interval.first); const Real approxDist = 0.5 * ( interval.first + interval.second); const Real minDist = interval.first; const Real maxDist = interval.second; - //std::cout << std::setprecision(15) << "minDist = " << minDist << ", maxDist = " << maxDist << std::endl; if ( delta == 0 ) { return interval.first; } @@ -353,21 +418,6 @@ Real bottleneckDistExact(DiagramPointSet& A, DiagramPointSet& B, con pointsA.push_back(ptA); } - //std::vector killdist; - //for(auto pta : a) { - //for(auto ptb : b) { - //if ( distlinf(pta, ptb) > mindist and distlinf(pta, ptb) < maxdist) { - //killdist.push_back(distlinf(pta, ptb)); - //std::cout << pta << ", " << ptb << std::endl; - //} - //} - //} - //std::sort(killdist.begin(), killdist.end()); - //for(auto d : killdist) { - //std::cout << d << std::endl; - //} - //std::cout << "*************" << std::endl; - // in this vector we store the distances between the points // that are candidates to realize std::vector pairwiseDist; @@ -385,13 +435,6 @@ Real bottleneckDistExact(DiagramPointSet& A, DiagramPointSet& B, con { return a.first < b.first; }; std::sort(xCentersVec.begin(), xCentersVec.end(), compLambda); - //std::cout << "xCentersVec.size = " << xCentersVec.size() << std::endl; - //for(auto p = xCentersVec.begin(); p!= xCentersVec.end(); ++p) { - //if (p->second.id == 200) { - //std::cout << "index of 200: " << p - xCentersVec.begin() << std::endl; - //} - //} - //std::vector // todo: sort points in B, reduce search range in lower and upper bounds for(auto ptB : B) { // iterator to the first stripe such that ptB lies to the left @@ -400,30 +443,17 @@ Real bottleneckDistExact(DiagramPointSet& A, DiagramPointSet& B, con xCentersVec.end(), std::make_pair(ptB.getRealX() - delta, ptB), compLambda); - //if (ptB.id == 236) { - //std::cout << itStart - xCentersVec.begin() << std::endl; - //} for(auto iterA = itStart; iterA < xCentersVec.end(); ++iterA) { - //if (ptB.id == 236) { - //std::cout << "consider " << iterA->second << std::endl; - //} if ( ptB.getRealX() < iterA->first - delta) { // from that moment x_B >= x_j - delta // is violated: x_B no longer lies to right from the left // boundary of current stripe - //if (ptB.id == 236) { - //std::cout << "break" << std::endl; - //} break; } // we're here => ptB lies in vertical stripe, // check if distance fits into the interval we've found Real pwDist = distLInf(iterA->second, ptB); - //if (ptB.id == 236) { - //std::cout << pwDist << std::endl; - //} - //std::cout << 1000*minDist << " <= " << 1000*pwDist << " <= " << 1000*maxDist << std::endl; if (pwDist >= minDist and pwDist <= maxDist) { pairwiseDist.push_back(pwDist); } @@ -447,13 +477,6 @@ Real bottleneckDistExact(DiagramPointSet& A, DiagramPointSet& B, con std::sort(yCentersVec.begin(), yCentersVec.end(), compLambda); - // std::cout << "Sorted vector of y-centers:" << std::endl; - //for(auto coordPtPair : yCentersVec) { - //std::cout << coordPtPair.first << ", id = " << coordPtPair.second.id << std::endl; - //} - /*std::cout << "End of sorted vector of y-centers:" << std::endl;*/ - - //std::vector // todo: sort points in B, reduce search range in lower and upper bounds for(auto ptB : B) { auto itStart = std::lower_bound(yCentersVec.begin(), @@ -467,7 +490,6 @@ Real bottleneckDistExact(DiagramPointSet& A, DiagramPointSet& B, con break; } Real pwDist = distLInf(iterA->second, ptB); - //std::cout << 1000*minDist << " <= " << 1000*pwDist << " <= " << 1000*maxDist << std::endl; if (pwDist >= minDist and pwDist <= maxDist) { pairwiseDist.push_back(pwDist); } @@ -475,311 +497,11 @@ Real bottleneckDistExact(DiagramPointSet& A, DiagramPointSet& B, con } } - //std::cout << "pairwiseDist.size = " << pairwiseDist.size() << " out of " << A.size() * A.size() << std::endl; std::sort(pairwiseDist.begin(), pairwiseDist.end()); - //for(auto ddd : pairwiseDist) { - //std::cout << std::setprecision(15) << ddd << std::endl; - //} return bottleneckDistExactFromSortedPwDist(A, B, pairwiseDist, decPrecision); } -template -Real bottleneckDistSlow(DiagramPointSet& A, DiagramPointSet& B) -{ - using DistVerticesPair = std::pair>; - - // use range search when building the layer graph - bool useRangeSearch { true }; - // find maximum of min. distances for each point, - // use this value as lower bound for bottleneck distance - bool useHeurMinIdx { true }; - - // find matching in a greedy manner to - // get an upper bound for a bottleneck distance - bool useHeurGreedyMatching { false }; - - // use successive multiplication of idxMin with 2 to get idxMax - bool goUpToFindIdxMax { false }; - // - goUpToFindIdxMax = goUpToFindIdxMax and !useHeurGreedyMatching; - - if (!useHeurGreedyMatching) { - long int N = 3 * (A.size() / 2 ) * (B.size() / 2); - std::vector pairwiseDist; - pairwiseDist.reserve(N); - Real maxMinDist {0.0}; - for(auto& p_A : A) { - Real minDist { std::numeric_limits::max() }; - for(auto& p_B : B) { - if (p_A.isNormal() or p_B.isNormal()) { - Real d = distLInf(p_A, p_B); - pairwiseDist.push_back(d); - if (useHeurMinIdx and p_A.isNormal()) { - if (d < minDist) - minDist = d; - } - } - } - if (useHeurMinIdx and p_A.isNormal() and minDist > maxMinDist) { - maxMinDist = minDist; - } - } - - std::sort(pairwiseDist.begin(), pairwiseDist.end()); - - Real distEpsilon = std::numeric_limits::max(); - for(size_t k = 0; k < pairwiseDist.size() - 2; ++k) { - auto diff = pairwiseDist[k+1]- pairwiseDist[k]; - if ( diff > 1.0e-10 and diff < distEpsilon ) { - distEpsilon = diff; - } - } - distEpsilon /= 3.0; - - BoundMatchOracle oracle(A, B, distEpsilon, useRangeSearch); - // binary search - size_t iterNum {0}; - size_t idxMin {0}, idxMax {pairwiseDist.size() - 1}; - if (useHeurMinIdx) { - auto maxMinIter = std::equal_range(pairwiseDist.begin(), pairwiseDist.end(), maxMinDist); - assert(maxMinIter.first != pairwiseDist.end()); - idxMin = maxMinIter.first - pairwiseDist.begin(); - //std::cout << "maxMinDist = " << maxMinDist << ", idxMin = " << idxMin << ", d = " << pairwiseDist[idxMin] << std::endl; - } - - if (goUpToFindIdxMax) { - if ( pairwiseDist.size() == 1) { - return pairwiseDist[0]; - } - - idxMax = std::max(idxMin, 1); - while (!oracle.isMatchLess(pairwiseDist[idxMax])) { - //std::cout << "entered while" << std::endl; - idxMin = idxMax; - if (2*idxMax > pairwiseDist.size() -1) { - idxMax = pairwiseDist.size() - 1; - break; - } else { - idxMax *= 2; - } - } - //std::cout << "size = " << pairwiseDist.size() << ", idxMax = " << idxMax << ", pw[max] = " << pairwiseDist[idxMax] << std::endl; - } - - size_t idxMid { (idxMin + idxMax) / 2 }; - while(idxMax > idxMin) { - iterNum++; - if (oracle.isMatchLess(pairwiseDist[idxMid])) { - idxMax = idxMid; - } else { - if (idxMax - idxMin == 1) - idxMin++; - else - idxMin = idxMid; - } - idxMid = (idxMin + idxMax) / 2; - } - return pairwiseDist[idxMid]; - } else { - // with greeedy matching - long int N = A.size() * B.size(); - std::vector pairwiseDist; - pairwiseDist.reserve(N); - Real maxMinDist {0.0}; - size_t idxA{0}, idxB{0}; - for(auto p_A : A) { - Real minDist { std::numeric_limits::max() }; - idxB = 0; - for(auto p_B : B) { - Real d = distLInf(p_A, p_B); - pairwiseDist.push_back( std::make_pair(d, std::make_pair(idxA, idxB) ) ); - if (useHeurMinIdx and p_A.isNormal()) { - if (d < minDist) - minDist = d; - } - idxB++; - } - if (useHeurMinIdx and p_A.isNormal() and minDist > maxMinDist) { - maxMinDist = minDist; - } - idxA++; - } - - auto compLambda = [](DistVerticesPair a, DistVerticesPair b) - { return a.first < b.first;}; - - std::sort(pairwiseDist.begin(), - pairwiseDist.end(), - compLambda); - - Real distEpsilon = std::numeric_limits::max(); - for(size_t k = 0; k < pairwiseDist.size() - 2; ++k) { - auto diff = pairwiseDist[k+1].first - pairwiseDist[k].first; - if ( diff > 1.0e-10 and diff < distEpsilon ) { - distEpsilon = diff; - } - } - distEpsilon /= 3.0; - - BoundMatchOracle oracle(A, B, distEpsilon, useRangeSearch); - - // construct greedy matching - size_t numVert { A.size() }; - size_t numMatched { 0 }; - std::unordered_set aTobMatched, bToaMatched; - aTobMatched.reserve(numVert); - bToaMatched.reserve(numVert); - size_t distVecIdx {0}; - while( numMatched < numVert) { - auto vertPair = pairwiseDist[distVecIdx++].second; - //std::cout << "distVecIdx = " << distVecIdx << ", matched: " << numMatched << " out of " << numVert << std::endl; - //std::cout << "vertex A idx = " << vertPair.first << ", B idx: " << vertPair.second << " out of " << numVert << std::endl; - if ( aTobMatched.count(vertPair.first) == 0 and - bToaMatched.count(vertPair.second) == 0 ) { - aTobMatched.insert(vertPair.first); - bToaMatched.insert(vertPair.second); - numMatched++; - } - } - size_t idxMax = distVecIdx-1; - //std::cout << "idxMax = " << idxMax << ", size = " << pairwiseDist.size() << std::endl; - // binary search - size_t iterNum {0}; - size_t idxMin {0}; - if (useHeurMinIdx) { - auto maxMinIter = std::equal_range(pairwiseDist.begin(), - pairwiseDist.end(), - std::make_pair(maxMinDist, std::make_pair(0,0)), - compLambda); - assert(maxMinIter.first != pairwiseDist.end()); - idxMin = maxMinIter.first - pairwiseDist.begin(); - //std::cout << "maxMinDist = " << maxMinDist << ", idxMin = " << idxMin << ", d = " << pairwiseDist[idxMin].first << std::endl; - } - size_t idxMid { (idxMin + idxMax) / 2 }; - while(idxMax > idxMin) { - iterNum++; - if (oracle.isMatchLess(pairwiseDist[idxMid].first)) { - idxMax = idxMid; - } else { - if (idxMax - idxMin == 1) - idxMin++; - else - idxMin = idxMid; - } - idxMid = (idxMin + idxMax) / 2; - } - return pairwiseDist[idxMid].first; - } - // stats - /* - // count number of edges - // pairwiseDist is sorted, add edges of the same length - int edgeNumber {idxMid}; - while(pairwiseDist[edgeNumber + 1] == pairwiseDist[edgeNumber]) - edgeNumber++; - // add edges between diagonal points - edgeNumber += N / 3; - // output stats - std::cout << idxMid << "\t" << N; - std::cout << "\t" << iterNum; - std::cout << "\t" << A.size() + B.size(); - std::cout << "\t" << edgeNumber << "\t"; - std::cout << (Real)(edgeNumber) / (Real)(A.size() + B.size()) << std::endl; - */ -} - -// wrappers -template -bool readDiagramPointSet(const std::string& fname, std::vector>& result) -{ - int decPrecision; - return readDiagramPointSet(fname.c_str(), result, decPrecision); -} - -template -bool readDiagramPointSet(const char* fname, std::vector>& result) -{ - int decPrecision; - return readDiagramPointSet(fname, result, decPrecision); -} - -template -bool readDiagramPointSet(const std::string& fname, std::vector>& result, int& decPrecision) -{ - return readDiagramPointSet(fname.c_str(), result, decPrecision); -} - -// reading function -template -bool readDiagramPointSet(const char* fname, std::vector>& result, int& decPrecision) -{ - size_t lineNumber { 0 }; - result.clear(); - std::ifstream f(fname); - if (!f.good()) { -#ifndef FOR_R_TDA - std::cerr << "Cannot open file " << fname << std::endl; -#endif - return false; - } - std::string line; - while(std::getline(f, line)) { - lineNumber++; - // process comments: remove everything after hash - auto hashPos = line.find_first_of("#", 0); - if( std::string::npos != hashPos) { - line = std::string(line.begin(), line.begin() + hashPos); - } - if (line.empty()) { - continue; - } - // trim whitespaces - auto whiteSpaceFront = std::find_if_not(line.begin(),line.end(),isspace); - auto whiteSpaceBack = std::find_if_not(line.rbegin(),line.rend(),isspace).base(); - if (whiteSpaceBack <= whiteSpaceFront) { - // line consists of spaces only - move to the next line - continue; - } - line = std::string(whiteSpaceFront,whiteSpaceBack); - bool fracPart = false; - int currDecPrecision = 0; - for(auto c : line) { - if (c == '.') { - fracPart = true; - } else if (fracPart) { - if (isdigit(c)) { - currDecPrecision++; - } else { - fracPart = false; - if (currDecPrecision > decPrecision) - decPrecision = currDecPrecision; - currDecPrecision = 0; - } - } - } - Real x, y; - std::istringstream iss(line); - if (not(iss >> x >> y)) { -#ifndef FOR_R_TDA - std::cerr << "Error in file " << fname << ", line number " << lineNumber << ": cannot parse \"" << line << "\"" << std::endl; -#endif - return false; - } - if ( x != y ) { - result.push_back(std::make_pair(x,y)); - } else { -#ifndef FOR_R_TDA -#ifndef VERBOSE_BOTTLENECK - std::cerr << "Warning: in file " << fname << ", line number " << lineNumber << ", zero persistence point ignored: \"" << line << "\"" << std::endl; -#endif -#endif - } - } - f.close(); - return true; -} - } // end namespace bt } // end namespace hera #endif // HERA_BOTTLENECK_HPP diff --git a/geom_bottleneck/include/diagram_reader.h b/geom_bottleneck/include/diagram_reader.h index 5bf106d..08d9e2b 100644 --- a/geom_bottleneck/include/diagram_reader.h +++ b/geom_bottleneck/include/diagram_reader.h @@ -1,5 +1,4 @@ /* - Copyright (c) 2015, M. Kerber, D. Morozov, A. Nigmetov All rights reserved. @@ -39,101 +38,159 @@ derivative works thereof, in binary and source code form. #include namespace hera { + +// cannot choose stod, stof or stold based on RealType, +// lazy solution: partial specialization + template + inline RealType parse_real_from_str(const std::string& s); + + template <> + inline double parse_real_from_str(const std::string& s) + { + return std::stod(s); + } + + + template <> + inline long double parse_real_from_str(const std::string& s) + { + return std::stold(s); + } + + + template <> + inline float parse_real_from_str(const std::string& s) + { + return std::stof(s); + } + + + template + inline RealType parse_real_from_str(const std::string& s) + { + static_assert(sizeof(RealType) != sizeof(RealType), "Must be specialized for each type you want to use, see above"); + } + // fill in result with points from file fname // return false if file can't be opened // or error occurred while reading // decPrecision is the maximal decimal precision in the input, // it is zero if all coordinates in the input are integers -template>> -bool readDiagramPointSet(const char* fname, ContType_& result, int& decPrecision) -{ - size_t lineNumber { 0 }; - result.clear(); - std::ifstream f(fname); - if (!f.good()) { + + template>> + inline bool readDiagramPointSet(const char* fname, ContType_& result, int& decPrecision) + { + using RealType = RealType_; + + size_t lineNumber { 0 }; + result.clear(); + std::ifstream f(fname); + if (!f.good()) { #ifndef FOR_R_TDA - std::cerr << "Cannot open file " << fname << std::endl; + std::cerr << "Cannot open file " << fname << std::endl; #endif - return false; - } - std::string line; - while(std::getline(f, line)) { - lineNumber++; - // process comments: remove everything after hash - auto hashPos = line.find_first_of("#", 0); - if( std::string::npos != hashPos) { - line = std::string(line.begin(), line.begin() + hashPos); - } - if (line.empty()) { - continue; - } - // trim whitespaces - auto whiteSpaceFront = std::find_if_not(line.begin(),line.end(),isspace); - auto whiteSpaceBack = std::find_if_not(line.rbegin(),line.rend(),isspace).base(); - if (whiteSpaceBack <= whiteSpaceFront) { - // line consists of spaces only - move to the next line - continue; + return false; } - line = std::string(whiteSpaceFront,whiteSpaceBack); - bool fracPart = false; - int currDecPrecision = 0; - for(auto c : line) { - if (c == '.') { - fracPart = true; - } else if (fracPart) { - if (isdigit(c)) { - currDecPrecision++; + std::locale loc; + std::string line; + while(std::getline(f, line)) { + lineNumber++; + // process comments: remove everything after hash + auto hashPos = line.find_first_of("#", 0); + if( std::string::npos != hashPos) { + line = std::string(line.begin(), line.begin() + hashPos); + } + if (line.empty()) { + continue; + } + // trim whitespaces + auto whiteSpaceFront = std::find_if_not(line.begin(),line.end(),isspace); + auto whiteSpaceBack = std::find_if_not(line.rbegin(),line.rend(),isspace).base(); + if (whiteSpaceBack <= whiteSpaceFront) { + // line consists of spaces only - move to the next line + continue; + } + line = std::string(whiteSpaceFront,whiteSpaceBack); + + // transform line to lower case + // to parse Infinity + for(auto& c : line) { + c = std::tolower(c, loc); + } + + bool fracPart = false; + int currDecPrecision = 0; + for(auto c : line) { + if (c == '.') { + fracPart = true; + } else if (fracPart) { + if (isdigit(c)) { + currDecPrecision++; + } else { + fracPart = false; + if (currDecPrecision > decPrecision) + decPrecision = currDecPrecision; + currDecPrecision = 0; + } + } + } + + RealType x, y; + std::string str_x, str_y; + std::istringstream iss(line); + try { + iss >> str_x >> str_y; + + x = parse_real_from_str(str_x); + y = parse_real_from_str(str_y); + + if (x != y) { + result.push_back(std::make_pair(x, y)); } else { - fracPart = false; - if (currDecPrecision > decPrecision) - decPrecision = currDecPrecision; - currDecPrecision = 0; +#ifndef FOR_R_TDA + std::cerr << "Warning: point with 0 persistence ignored in " << fname << ":" << lineNumber << "\n"; +#endif } } - } - RealType_ x, y; - std::istringstream iss(line); - if (not(iss >> x >> y)) { + catch (const std::invalid_argument& e) { #ifndef FOR_R_TDA - std::cerr << "Error in file " << fname << ", line number " << lineNumber << ": cannot parse \"" << line << "\"" << std::endl; + std::cerr << "Error in file " << fname << ", line number " << lineNumber << ": cannot parse \"" << line << "\"" << std::endl; #endif - return false; - } - if ( x != y ) { - result.push_back(std::make_pair(x,y)); - } else { -#ifdef VERBOSE_BOTTLENECK - std::cerr << "Warning: in file " << fname << ", line number " << lineNumber << ", zero persistence point ignored: \"" << line << "\"" << std::endl; + return false; + } + catch (const std::out_of_range&) { +#ifndef FOR_R_TDA + std::cerr << "Error while reading file " << fname << ", line number " << lineNumber << ": value too large in \"" << line << "\"" << std::endl; #endif + return false; + } } + f.close(); + return true; + } + + // wrappers + template>> + inline bool readDiagramPointSet(const std::string& fname, ContType_& result, int& decPrecision) + { + return readDiagramPointSet(fname.c_str(), result, decPrecision); + } + + // these two functions are now just wrappers for the previous ones, + // in case someone needs them; decPrecision is ignored + template>> + inline bool readDiagramPointSet(const char* fname, ContType_& result) + { + int decPrecision; + return readDiagramPointSet(fname, result, decPrecision); + } + + template>> + inline bool readDiagramPointSet(const std::string& fname, ContType_& result) + { + int decPrecision; + return readDiagramPointSet(fname.c_str(), result, decPrecision); } - f.close(); - return true; -} - - -// wrappers -template>> -bool readDiagramPointSet(const std::string& fname, ContType_& result, int& decPrecision) -{ - return readDiagramPointSet(fname.c_str(), result, decPrecision); -} - -// these two functions are now just wrappers for the previous ones, -// in case someone needs them; decPrecision is ignored -template>> -bool readDiagramPointSet(const char* fname, ContType_& result) -{ - int decPrecision; - return readDiagramPointSet(fname, result, decPrecision); -} - -template>> -bool readDiagramPointSet(const std::string& fname, ContType_& result) -{ - int decPrecision; - return readDiagramPointSet(fname.c_str(), result, decPrecision); -} } // end namespace hera #endif // HERA_DIAGRAM_READER_H -- cgit v1.2.3