From ce3a42a493f63eb5b23e8a293bf995abaa1cb9e8 Mon Sep 17 00:00:00 2001 From: Arnur Nigmetov Date: Tue, 14 Jan 2020 16:17:43 +0100 Subject: Interim: matching distance for modules 1. Templatize DistanceCalculator (DiagramProvider) 2. Add BifiltrationProxy with the same interface as ModulePresentation (dimension fixed). 3. Call Hera with relative error. 4. Add class ModulePresentation. To-Do: reading module presentations from Rivet format. --- matching/include/bifiltration.h | 31 +- matching/include/common_util.h | 43 +- matching/include/matching_distance.h | 59 +- matching/include/matching_distance.hpp | 784 +++++++++++++++++++++ matching/include/persistence_module.h | 63 +- matching/include/phat/compute_persistence_pairs.h | 11 +- matching/include/phat/helpers/misc.h | 3 + matching/src/bifiltration.cpp | 84 ++- matching/src/dual_box.cpp | 2 +- matching/src/matching_distance.cpp | 795 +--------------------- matching/src/persistence_module.cpp | 239 ++++--- matching/src/simplex.cpp | 3 +- matching/src/tests/test_matching_distance.cpp | 5 +- 13 files changed, 1209 insertions(+), 913 deletions(-) create mode 100644 matching/include/matching_distance.hpp diff --git a/matching/include/bifiltration.h b/matching/include/bifiltration.h index c9b64f1..2b4950f 100644 --- a/matching/include/bifiltration.h +++ b/matching/include/bifiltration.h @@ -38,7 +38,7 @@ namespace md { init(); } - DiagramKeeper weighted_slice_diagram(const DualPoint& line, int dim) const; + Diagram weighted_slice_diagram(const DualPoint& line, int dim) const; SimplexVector simplices() const { return simplices_; } @@ -100,8 +100,37 @@ namespace md { }; std::ostream& operator<<(std::ostream& os, const Bifiltration& bif); + + class BifiltrationProxy { + public: + BifiltrationProxy(const Bifiltration& bif, int dim = 0); + // return critical values of simplices that are important for current dimension (dim and dim+1) + PointVec positions() const; + // set current dimension + int set_dim(int new_dim); + + // wrappers of Bifiltration + int maximal_dim() const; + void translate(Real a); + Real minimal_coordinate() const; + Box bounding_box() const; + Real max_x() const; + Real max_y() const; + Real min_x() const; + Real min_y() const; + Diagram weighted_slice_diagram(const DualPoint& slice) const; + + private: + int dim_ { 0 }; + mutable PointVec cached_positions_; + Bifiltration bif_; + + void cache_positions() const; + }; } + + #endif //MATCHING_DISTANCE_BIFILTRATION_H //// The value type of OutputIterator is Simplex_in_2D_filtration diff --git a/matching/include/common_util.h b/matching/include/common_util.h index 674783b..b2ee22d 100644 --- a/matching/include/common_util.h +++ b/matching/include/common_util.h @@ -11,18 +11,51 @@ #include #include "common_defs.h" +#include "phat/helpers/misc.h" namespace md { using Real = double; - using Index = long; + using RealVec = std::vector; + using Index = phat::index; + using IndexVec = std::vector; static constexpr Real pi = M_PI; using Column = std::vector; + struct IntPoint { + Index x; + Index y; + + IntPoint(Index x, Index y) :x(x), y(y) { } + + IntPoint() :x(0), y(0) { } + + inline bool operator==(const IntPoint& v) const + { + return this->x == v.x && this->y == v.y; + } + + // compare both coordinates, as needed in persistence + // do not overload operator<, requirements are not satisfied + inline bool is_less(const IntPoint& other, bool strict = false) const + { + if (x <= other.x and y <= other.y) { + if (strict) { + return x != other.x or y != other.y; + } + else { + return true; + } + } + return false; + } + }; + + struct Point { Real x; Real y; @@ -39,6 +72,12 @@ namespace md { y /= nor; } + inline void translate(Real a) + { + x += a; + y += a; + } + inline bool operator==(const Point& v) const { return this->x == v.x && this->y == v.y; @@ -60,6 +99,8 @@ namespace md { } }; + using PointVec = std::vector; + Point operator+(const Point& u, const Point& v); Point operator-(const Point& u, const Point& v); diff --git a/matching/include/matching_distance.h b/matching/include/matching_distance.h index 603b535..72743a4 100644 --- a/matching/include/matching_distance.h +++ b/matching/include/matching_distance.h @@ -6,6 +6,7 @@ #include #include "spdlog/spdlog.h" +#include "spdlog/fmt/ostr.h" #include "common_defs.h" #include "cell_with_value.h" @@ -53,7 +54,7 @@ namespace md { struct CalculationParams { static constexpr int ALL_DIMENSIONS = -1; - Real hera_epsilon {0.01}; // relative error in hera call + Real hera_epsilon {0.001}; // relative error in hera call Real delta {0.1}; // relative error for matching distance int max_depth {6}; // maximal number of refinenemnts int initialization_depth {3}; @@ -76,9 +77,11 @@ namespace md { #endif }; + template class DistanceCalculator { - using DiagramProvider = md::Bifiltration; +// using DiagramProvider = md::Bifiltration; +// using DiagramProvider = md::ModulePresentation; using DualBox = md::DualBox; using CellValueVector = std::vector; @@ -90,29 +93,17 @@ namespace md { Real distance(); - int get_hera_calls_number() const - { - int result; - for(auto c : n_hera_calls_) result += c.second; - return result; - } - - int get_hera_calls_number(int dim) const; - - void clear_cache(); - -// for tests - make everything p + int get_hera_calls_number() const; +// for tests - make everything public // private: DiagramProvider module_a_; DiagramProvider module_b_; CalculationParams& params_; - int maximal_dim_ {0}; - - std::map n_hera_calls_; + int n_hera_calls_; std::map n_hera_calls_per_level_; - std::vector distances_; // indexed by dim + Real distance_; // if calculate_on_intermediate, then weighted distance // will be calculated on centers of each grid in between @@ -126,18 +117,18 @@ namespace md { Real get_max_y(int module) const; - void set_cell_central_value(CellWithValue& dual_cell, int dim); + void set_cell_central_value(CellWithValue& dual_cell); - Real distance_in_dimension(int dim); + Real get_distance(); - Real distance_in_dimension_pq(int dim); + Real get_distance_pq(); // temporary, to try priority queue Real get_max_possible_value(const CellWithValue* first_cell_ptr, int n_cells); - Real get_upper_bound(const CellWithValue& dual_cell, int dim, Real good_enough_upper_bound) const; + Real get_upper_bound(const CellWithValue& dual_cell, Real good_enough_upper_bound) const; - Real get_single_dgm_bound(const CellWithValue& dual_cell, ValuePoint vp, int module, int dim, + Real get_single_dgm_bound(const CellWithValue& dual_cell, ValuePoint vp, int module, Real good_enough_value) const; // this bound depends only on dual box @@ -155,14 +146,30 @@ namespace md { Real get_max_displacement_single_point(const CellWithValue& dual_cell, ValuePoint value_point, const Point& p) const; - void check_upper_bound(const CellWithValue& dual_cell, int dim) const; + void check_upper_bound(const CellWithValue& dual_cell) const; - Real distance_on_line(int dim, DualPoint line); - Real distance_on_line_const(int dim, DualPoint line) const; + Real distance_on_line(DualPoint line); + Real distance_on_line_const(DualPoint line) const; Real current_error(Real lower_bound, Real upper_bound); }; Real matching_distance(const Bifiltration& bif_a, const Bifiltration& bif_b, CalculationParams& params); + Real matching_distance(const ModulePresentation& mod_a, const ModulePresentation& mod_b, CalculationParams& params); + + // for upper bound experiment + struct UbExperimentRecord { + Real error; + Real lower_bound; + Real upper_bound; + CellWithValue cell; + long long int time; + long long int n_hera_calls; + }; + + std::ostream& operator<<(std::ostream& os, const UbExperimentRecord& r); + } + +#include "matching_distance.hpp" diff --git a/matching/include/matching_distance.hpp b/matching/include/matching_distance.hpp new file mode 100644 index 0000000..218f33b --- /dev/null +++ b/matching/include/matching_distance.hpp @@ -0,0 +1,784 @@ +namespace md { + + template + void print_map(const std::map& dic) + { + for(const auto kv : dic) { + fmt::print("{} -> {}\n", kv.first, kv.second); + } + } + + template + void DistanceCalculator::check_upper_bound(const CellWithValue& dual_cell) const + { + spd::debug("Enter check_get_max_delta_on_cell"); + const int n_samples_lambda = 100; + const int n_samples_mu = 100; + DualBox db = dual_cell.dual_box(); + Real min_lambda = db.lambda_min(); + Real max_lambda = db.lambda_max(); + Real min_mu = db.mu_min(); + Real max_mu = db.mu_max(); + + Real h_lambda = (max_lambda - min_lambda) / n_samples_lambda; + Real h_mu = (max_mu - min_mu) / n_samples_mu; + for(int i = 1; i < n_samples_lambda; ++i) { + for(int j = 1; j < n_samples_mu; ++j) { + Real lambda = min_lambda + i * h_lambda; + Real mu = min_mu + j * h_mu; + DualPoint l(db.axis_type(), db.angle_type(), lambda, mu); + Real other_result = distance_on_line_const(l); + Real diff = fabs(dual_cell.stored_upper_bound() - other_result); + if (other_result > dual_cell.stored_upper_bound()) { + spd::error( + "in check_upper_bound, upper_bound = {}, other_result = {}, diff = {}\ndual_cell = {}", + dual_cell.stored_upper_bound(), other_result, diff, dual_cell); + throw std::runtime_error("Wrong delta estimate"); + } + } + } + spd::debug("Exit check_get_max_delta_on_cell"); + } + + // for all lines l, l' inside dual box, + // find the upper bound on the difference of weighted pushes of p + template + Real + DistanceCalculator::get_max_displacement_single_point(const CellWithValue& dual_cell, ValuePoint vp, + const Point& p) const + { + assert(p.x >= 0 && p.y >= 0); + +#ifdef MD_DEBUG + std::vector debug_ids = {3, 13, 54, 218, 350, 382, 484, 795, 2040, 8415, 44076}; + bool debug = false; // std::find(debug_ids.begin(), debug_ids.end(), dual_cell.id) != debug_ids.end(); +#endif + DualPoint line = dual_cell.value_point(vp); + const Real base_value = line.weighted_push(p); + + spd::debug("Enter get_max_displacement_single_point, p = {},\ndual_cell = {},\nline = {}, base_value = {}\n", p, + dual_cell, line, base_value); + + Real result = 0.0; + for(DualPoint dp : dual_cell.dual_box().critical_points(p)) { + Real dp_value = dp.weighted_push(p); + spd::debug( + "In get_max_displacement_single_point, p = {}, critical dp = {},\ndp_value = {}, diff = {},\ndual_cell = {}\n", + p, dp, dp_value, fabs(base_value - dp_value), dual_cell); + result = std::max(result, fabs(base_value - dp_value)); + } + +#ifdef MD_DO_FULL_CHECK + DualBox db = dual_cell.dual_box(); + std::uniform_real_distribution dlambda(db.lambda_min(), db.lambda_max()); + std::uniform_real_distribution dmu(db.mu_min(), db.mu_max()); + std::mt19937 gen(1); + for(int i = 0; i < 1000; ++i) { + Real lambda = dlambda(gen); + Real mu = dmu(gen); + DualPoint dp_random { db.axis_type(), db.angle_type(), lambda, mu }; + Real dp_value = dp_random.weighted_push(p); + if (fabs(base_value - dp_value) > result) { + spd::error("in get_max_displacement_single_point, p = {}, vp = {}\ndb = {}\nresult = {}, base_value = {}, dp_value = {}, dp_random = {}", + p, vp, db, result, base_value, dp_value, dp_random); + throw std::runtime_error("error in get_max_displacement_single_value"); + } + } +#endif + + return result; + } + + template + typename DistanceCalculator::CellValueVector DistanceCalculator::get_initial_dual_grid(Real& lower_bound) + { + CellValueVector result = get_refined_grid(params_.initialization_depth, false, true); + + lower_bound = -1.0; + for(const auto& dc : result) { + lower_bound = std::max(lower_bound, dc.max_corner_value()); + } + + assert(lower_bound >= 0); + + for(auto& dual_cell : result) { + Real good_enough_ub = get_good_enough_upper_bound(lower_bound); + Real max_value_on_cell = get_upper_bound(dual_cell, good_enough_ub); + dual_cell.set_max_possible_value(max_value_on_cell); + +#ifdef MD_DO_FULL_CHECK + check_upper_bound(dual_cell); +#endif + + spd::debug("DEBUG INIT: added cell {}", dual_cell); + } + + return result; + } + + template + typename DistanceCalculator::CellValueVector + DistanceCalculator::get_refined_grid(int init_depth, bool calculate_on_intermediate, bool calculate_on_last) + { + const Real y_max = std::max(module_a_.max_y(), module_b_.max_y()); + const Real x_max = std::max(module_a_.max_x(), module_b_.max_x()); + + const Real lambda_min = 0; + const Real lambda_max = 1; + + const Real mu_min = 0; + + DualBox x_flat(DualPoint(AxisType::x_type, AngleType::flat, lambda_min, mu_min), + DualPoint(AxisType::x_type, AngleType::flat, lambda_max, x_max)); + + DualBox x_steep(DualPoint(AxisType::x_type, AngleType::steep, lambda_min, mu_min), + DualPoint(AxisType::x_type, AngleType::steep, lambda_max, x_max)); + + DualBox y_flat(DualPoint(AxisType::y_type, AngleType::flat, lambda_min, mu_min), + DualPoint(AxisType::y_type, AngleType::flat, lambda_max, y_max)); + + DualBox y_steep(DualPoint(AxisType::y_type, AngleType::steep, lambda_min, mu_min), + DualPoint(AxisType::y_type, AngleType::steep, lambda_max, y_max)); + + CellWithValue x_flat_cell(x_flat, 0); + CellWithValue x_steep_cell(x_steep, 0); + CellWithValue y_flat_cell(y_flat, 0); + CellWithValue y_steep_cell(y_steep, 0); + + if (init_depth == 0) { + DualPoint diagonal_x_flat(AxisType::x_type, AngleType::flat, 1, 0); + + Real diagonal_value = distance_on_line(diagonal_x_flat); + n_hera_calls_per_level_[0]++; + + x_flat_cell.set_value_at(ValuePoint::lower_right, diagonal_value); + y_flat_cell.set_value_at(ValuePoint::lower_right, diagonal_value); + x_steep_cell.set_value_at(ValuePoint::lower_right, diagonal_value); + y_steep_cell.set_value_at(ValuePoint::lower_right, diagonal_value); + } + +#ifdef MD_DEBUG + x_flat_cell.id = 1; + x_steep_cell.id = 2; + y_flat_cell.id = 3; + y_steep_cell.id = 4; + CellWithValue::max_id = 4; +#endif + + CellValueVector result {x_flat_cell, x_steep_cell, y_flat_cell, y_steep_cell}; + + if (init_depth == 0) { + return result; + } + + CellValueVector refined_result; + + for(int i = 1; i <= init_depth; ++i) { + refined_result.clear(); + for(const auto& dual_cell : result) { + for(auto refined_cell : dual_cell.get_refined_cells()) { + // we calculate for init_dept - 1, not init_depth, + // because we want the cells to have value at a corner + if ((i == init_depth - 1 and calculate_on_last) or calculate_on_intermediate) + set_cell_central_value(refined_cell); + refined_result.push_back(refined_cell); + } + } + result = std::move(refined_result); + } + return result; + } + + template + DistanceCalculator::DistanceCalculator(const T& a, + const T& b, + CalculationParams& params) + : + module_a_(a), + module_b_(b), + params_(params) + { + // make all coordinates non-negative + auto min_coord = std::min(module_a_.minimal_coordinate(), + module_b_.minimal_coordinate()); + if (min_coord < 0) { + module_a_.translate(-min_coord); + module_b_.translate(-min_coord); + } + + assert(std::min({module_a_.min_x(), module_b_.min_x(), module_a_.min_y(), + module_b_.min_y()}) >= 0); + + spd::info("DistanceCalculator constructed, module_a: max_x = {}, max_y = {}, module_b: max_x = {}, max_y = {}", + module_a_.max_x(), module_a_.max_y(), module_b_.max_x(), module_b_.max_y()); + } + + template + Real DistanceCalculator::get_max_x(int module) const + { + return (module == 0) ? module_a_.max_x() : module_b_.max_x(); + } + + template + Real DistanceCalculator::get_max_y(int module) const + { + return (module == 0) ? module_a_.max_y() : module_b_.max_y(); + } + + template + Real + DistanceCalculator::get_local_refined_bound(const md::DualBox& dual_box) const + { + return get_local_refined_bound(0, dual_box) + get_local_refined_bound(1, dual_box); + } + + template + Real + DistanceCalculator::get_local_refined_bound(int module, const md::DualBox& dual_box) const + { + spd::debug("Enter get_local_refined_bound, dual_box = {}", dual_box); + Real d_lambda = dual_box.lambda_max() - dual_box.lambda_min(); + Real d_mu = dual_box.mu_max() - dual_box.mu_min(); + Real result; + if (dual_box.axis_type() == AxisType::x_type) { + if (dual_box.is_flat()) { + result = dual_box.lambda_max() * d_mu + (get_max_x(module) - dual_box.mu_min()) * d_lambda; + } else { + result = d_mu + get_max_y(module) * d_lambda; + } + } else { + // y-type + if (dual_box.is_flat()) { + result = d_mu + get_max_x(module) * d_lambda; + } else { + // steep + result = dual_box.lambda_max() * d_mu + (get_max_y(module) - dual_box.mu_min()) * d_lambda; + } + } + return result; + } + + template + Real DistanceCalculator::get_local_dual_bound(int module, const md::DualBox& dual_box) const + { + Real dlambda = dual_box.lambda_max() - dual_box.lambda_min(); + Real dmu = dual_box.mu_max() - dual_box.mu_min(); + + if (dual_box.is_flat()) { + return get_max_x(module) * dlambda + dmu; + } else { + return get_max_y(module) * dlambda + dmu; + } + } + + template + Real DistanceCalculator::get_local_dual_bound(const md::DualBox& dual_box) const + { + return get_local_dual_bound(0, dual_box) + get_local_dual_bound(1, dual_box); + } + + template + Real DistanceCalculator::get_upper_bound(const CellWithValue& dual_cell, Real good_enough_ub) const + { + assert(good_enough_ub >= 0); + + switch(params_.bound_strategy) { + case BoundStrategy::bruteforce: + return std::numeric_limits::max(); + + case BoundStrategy::local_dual_bound: + return dual_cell.min_value() + get_local_dual_bound(dual_cell.dual_box()); + + case BoundStrategy::local_dual_bound_refined: + return dual_cell.min_value() + get_local_refined_bound(dual_cell.dual_box()); + + case BoundStrategy::local_combined: { + Real cheap_upper_bound = dual_cell.min_value() + get_local_refined_bound(dual_cell.dual_box()); + if (cheap_upper_bound < good_enough_ub) { + return cheap_upper_bound; + } else { + [[fallthrough]]; + } + } + + case BoundStrategy::local_dual_bound_for_each_point: { + Real result = std::numeric_limits::max(); + for(ValuePoint vp : k_corner_vps) { + if (not dual_cell.has_value_at(vp)) { + continue; + } + + Real base_value = dual_cell.value_at(vp); + Real bound_dgm_a = get_single_dgm_bound(dual_cell, vp, 0, good_enough_ub); + + if (params_.stop_asap and bound_dgm_a + base_value >= good_enough_ub) { + // we want to return a valid upper bound, not just something that will prevent discarding the cell + // and we don't want to compute pushes for points in second bifiltration. + // so just return a constant time bound + return dual_cell.min_value() + get_local_refined_bound(dual_cell.dual_box()); + } + + Real bound_dgm_b = get_single_dgm_bound(dual_cell, vp, 1, + std::max(Real(0), good_enough_ub - bound_dgm_a)); + + result = std::min(result, base_value + bound_dgm_a + bound_dgm_b); + +#ifdef MD_DEBUG + spd::debug("In get_upper_bound, cell = {}", dual_cell); + spd::debug("In get_upper_bound, vp = {}, base_value = {}, bound_dgm_a = {}, bound_dgm_b = {}, result = {}", vp, base_value, bound_dgm_a, bound_dgm_b, result); +#endif + + if (params_.stop_asap and result < good_enough_ub) { + break; + } + } + return result; + } + } + // to suppress compiler warning + return std::numeric_limits::max(); + } + + // find maximal displacement of weighted points of m for all lines in dual_box + template + Real + DistanceCalculator::get_single_dgm_bound(const CellWithValue& dual_cell, + ValuePoint vp, + int module, + [[maybe_unused]] Real good_enough_value) const + { + Real result = 0; + Point max_point; + + spd::debug( + "Enter get_single_dgm_bound, module = {}, dual_cell = {}, vp = {}, good_enough_value = {}, stop_asap = {}\n", + module, dual_cell, vp, good_enough_value, params_.stop_asap); + + const T& m = (module == 0) ? module_a_ : module_b_; + for(const auto& position : m.positions()) { + spd::debug("in get_single_dgm_bound, simplex = {}\n", position); + + Real x = get_max_displacement_single_point(dual_cell, vp, position); + + spd::debug("In get_single_dgm_bound, point = {}, displacement = {}", position, x); + + if (x > result) { + result = x; + max_point = position; + spd::debug("In get_single_dgm_bound, point = {}, result now = displacement = {}", position, x); + } + + if (params_.stop_asap and result > good_enough_value) { + // we want to return a valid upper bound, + // now we just see it is worse than we need, but it may be even more + // just return a valid upper bound + spd::debug("result {} > good_enough_value {}, exit and return refined bound {}", result, + good_enough_value, get_local_refined_bound(dual_cell.dual_box())); + result = get_local_refined_bound(dual_cell.dual_box()); + break; + } + } + + spd::debug("Exit get_single_dgm_bound,\ndual_cell = {}\nmodule = {}, result = {}, max_point = {}", dual_cell, + module, result, max_point); + + return result; + } + + template + Real DistanceCalculator::distance() + { + return get_distance_pq(); + } + + // calculate weighted bottleneneck distance between slices on line + // increments hera calls counter + template + Real DistanceCalculator::distance_on_line(DualPoint line) + { + ++n_hera_calls_; + Real result = distance_on_line_const(line); + return result; + } + + template + Real DistanceCalculator::distance_on_line_const(DualPoint line) const + { + // TODO: think about this - how to call Hera + auto dgm_a = module_a_.weighted_slice_diagram(line); + auto dgm_b = module_b_.weighted_slice_diagram(line); + Real result; + if (params_.hera_epsilon > static_cast(0)) { + result = hera::bottleneckDistApprox(dgm_a, dgm_b, params_.hera_epsilon) / ( params_.hera_epsilon + 1); + } else { + result = hera::bottleneckDistExact(dgm_a, dgm_b); + } + if (n_hera_calls_ % 100 == 1) { + spd::debug("Calling Hera, dgm_a.size = {}, dgm_b.size = {}, line = {}, result = {}", dgm_a.size(), + dgm_b.size(), line, result); + } else { + spd::debug("Calling Hera, dgm_a.size = {}, dgm_b.size = {}, line = {}, result = {}", dgm_a.size(), + dgm_b.size(), line, result); + } + return result; + } + + template + Real DistanceCalculator::get_good_enough_upper_bound(Real lower_bound) const + { + Real result; + // in upper_bound strategy we only prune cells if they cannot improve the lower bound, + // otherwise the experiment is supposed to run indefinitely + if (params_.traverse_strategy == TraverseStrategy::upper_bound) { + result = lower_bound; + } else { + result = (1.0 + params_.delta) * lower_bound; + } + return result; + } + + // helper function + // calculate weighted bt distance on cell center, + // assign distance value to cell, keep it in heat_map, and return + template + void DistanceCalculator::set_cell_central_value(CellWithValue& dual_cell) + { + DualPoint central_line {dual_cell.center()}; + + spd::debug("In set_cell_central_value, processing dual cell = {}, line = {}", dual_cell.dual_box(), + central_line); + Real new_value = distance_on_line(central_line); + n_hera_calls_per_level_[dual_cell.level() + 1]++; + dual_cell.set_value_at(ValuePoint::center, new_value); + params_.actual_max_depth = std::max(params_.actual_max_depth, dual_cell.level() + 1); + +#ifdef PRINT_HEAT_MAP + if (params_.bound_strategy == BoundStrategy::bruteforce) { + spd::debug("In set_cell_central_value, adding to heat_map pair {} - {}", dual_cell.center(), new_value); + if (dual_cell.level() > params_.initialization_depth + 1 + and params_.heat_maps[dual_cell.level()].count(dual_cell.center()) > 0) { + auto existing = params_.heat_maps[dual_cell.level()].find(dual_cell.center()); + spd::debug("EXISTING: {} -> {}", existing->first, existing->second); + } + assert(dual_cell.level() <= params_.initialization_depth + 1 + or params_.heat_maps[dual_cell.level()].count(dual_cell.center()) == 0); + params_.heat_maps[dual_cell.level()][dual_cell.center()] = new_value; + } +#endif + } + + // quick-and-dirty hack to efficiently traverse priority queue with dual cells + // returns maximal possible value on all cells in queue + // assumes that the underlying container is vector! + // cell_ptr: pointer to the first element in queue + // n_cells: queue size + template + Real DistanceCalculator::get_max_possible_value(const CellWithValue* cell_ptr, int n_cells) + { + Real result = (n_cells > 0) ? cell_ptr->stored_upper_bound() : 0; + for(int i = 0; i < n_cells; ++i, ++cell_ptr) { + result = std::max(result, cell_ptr->stored_upper_bound()); + } + return result; + } + + // helper function: + // return current error from lower and upper bounds + // and save it in params_ (hence not const) + template + Real DistanceCalculator::current_error(Real lower_bound, Real upper_bound) + { + Real current_error = (lower_bound > 0.0) ? (upper_bound - lower_bound) / lower_bound + : std::numeric_limits::max(); + + params_.actual_error = current_error; + + if (current_error < params_.delta) { + spd::debug( + "Threshold achieved! bound_strategy = {}, traverse_strategy = {}, upper_bound = {}, current_error = {}", + params_.bound_strategy, params_.traverse_strategy, upper_bound, current_error); + } + return current_error; + } + + // return matching distance + // use priority queue to store dual cells + // comparison function depends on the strategies in params_ + // ressets hera calls counter + template + Real DistanceCalculator::get_distance_pq() + { + std::map n_cells_considered; + std::map n_cells_pushed_into_queue; + long int n_too_deep_cells = 0; + std::map n_cells_discarded; + std::map n_cells_pruned; + + spd::info("Enter get_distance_pq, bound strategy = {}, traverse strategy = {}, stop_asap = {} ", + params_.bound_strategy, params_.traverse_strategy, params_.stop_asap); + + std::chrono::high_resolution_clock timer; + auto start_time = timer.now(); + + n_hera_calls_ = 0; + n_hera_calls_per_level_.clear(); + + + // if cell is too deep and is not pushed into queue, + // we still need to take its max value into account; + // the max over such cells is stored in max_result_on_too_fine_cells + Real upper_bound_on_deep_cells = -1; + + spd::debug("Started iterations in dual space, delta = {}, bound_strategy = {}", params_.delta, + params_.bound_strategy); + // user-defined less lambda function + // to regulate priority queue depending on strategy + auto dual_cell_less = [this](const CellWithValue& a, const CellWithValue& b) { + + int a_level = a.level(); + int b_level = b.level(); + Real a_value = a.max_corner_value(); + Real b_value = b.max_corner_value(); + Real a_ub = a.stored_upper_bound(); + Real b_ub = b.stored_upper_bound(); + if (this->params_.traverse_strategy == TraverseStrategy::upper_bound and + (not a.has_max_possible_value() or not b.has_max_possible_value())) { + throw std::runtime_error("no upper bound on cell"); + } + DualPoint a_lower_left = a.dual_box().lower_left(); + DualPoint b_lower_left = b.dual_box().lower_left(); + + switch(this->params_.traverse_strategy) { + // in both breadth_first searches we want coarser cells + // to be processed first. Cells with smaller level must be larger, + // hence the minus in front of level + case TraverseStrategy::breadth_first: + return std::make_tuple(-a_level, a_lower_left) + < std::make_tuple(-b_level, b_lower_left); + case TraverseStrategy::breadth_first_value: + return std::make_tuple(-a_level, a_value, a_lower_left) + < std::make_tuple(-b_level, b_value, b_lower_left); + case TraverseStrategy::depth_first: + return std::make_tuple(a_value, a_level, a_lower_left) + < std::make_tuple(b_value, b_level, b_lower_left); + case TraverseStrategy::upper_bound: + return std::make_tuple(a_ub, a_level, a_lower_left) + < std::make_tuple(b_ub, b_level, b_lower_left); + default: + throw std::runtime_error("Forgotten case"); + } + }; + + std::priority_queue dual_cells_queue( + dual_cell_less); + + // weighted bt distance on the center of current cell + Real lower_bound = std::numeric_limits::min(); + + // init pq and lower bound + for(auto& init_cell : get_initial_dual_grid(lower_bound)) { + dual_cells_queue.push(init_cell); + } + + Real upper_bound = get_max_possible_value(&dual_cells_queue.top(), dual_cells_queue.size()); + + std::vector ub_experiment_results; + + while(not dual_cells_queue.empty()) { + + CellWithValue dual_cell = dual_cells_queue.top(); + dual_cells_queue.pop(); + assert(dual_cell.has_corner_value() + and dual_cell.has_max_possible_value() + and dual_cell.max_corner_value() <= upper_bound); + + n_cells_considered[dual_cell.level()]++; + + bool discard_cell = false; + + if (not params_.stop_asap) { + // if stop_asap is on, it is safer to never discard a cell + if (params_.bound_strategy == BoundStrategy::bruteforce) { + discard_cell = false; + } else if (params_.traverse_strategy == TraverseStrategy::upper_bound) { + discard_cell = (dual_cell.stored_upper_bound() <= lower_bound); + } else { + discard_cell = (dual_cell.stored_upper_bound() <= (1.0 + params_.delta) * lower_bound); + } + } + + spd::debug( + "CURRENT CELL bound_strategy = {}, traverse_strategy = {}, dual cell: {}, upper_bound = {}, lower_bound = {}, current_error = {}, discard_cell = {}", + params_.bound_strategy, params_.traverse_strategy, dual_cell, upper_bound, lower_bound, + current_error(lower_bound, upper_bound), discard_cell); + + if (discard_cell) { + n_cells_discarded[dual_cell.level()]++; + continue; + } + + // until now, dual_cell knows its value in one of its corners + // new_value will be the weighted distance at its center + set_cell_central_value(dual_cell); + Real new_value = dual_cell.value_at(ValuePoint::center); + lower_bound = std::max(new_value, lower_bound); + + spd::debug("Processed cell = {}, weighted value = {}, lower_bound = {}", dual_cell, new_value, lower_bound); + + assert(upper_bound >= lower_bound); + + if (current_error(lower_bound, upper_bound) < params_.delta) { + break; + } + + // refine cell and push 4 smaller cells into queue + for(auto refined_cell : dual_cell.get_refined_cells()) { + + if (refined_cell.num_values() == 0) + throw std::runtime_error("no value on cell"); + + // if delta is smaller than good_enough_value, it allows to prune cell + Real good_enough_ub = get_good_enough_upper_bound(lower_bound); + + // upper bound of the parent holds for refined_cell + // and can sometimes be smaller! + Real upper_bound_on_refined_cell = std::min(dual_cell.stored_upper_bound(), + get_upper_bound(refined_cell, good_enough_ub)); + + spd::debug("upper_bound_on_refined_cell = {}, dual_cell.stored_upper_bound = {}, get_upper_bound = {}", + upper_bound_on_refined_cell, dual_cell.stored_upper_bound(), + get_upper_bound(refined_cell, good_enough_ub)); + + refined_cell.set_max_possible_value(upper_bound_on_refined_cell); + +#ifdef MD_DO_FULL_CHECK + check_upper_bound(refined_cell); +#endif + + bool prune_cell = false; + + if (refined_cell.level() <= params_.max_depth) { + // cell might be added to queue; if it is not added, its maximal value can be safely ignored + if (params_.traverse_strategy == TraverseStrategy::upper_bound) { + prune_cell = (refined_cell.stored_upper_bound() <= lower_bound); + } else if (params_.bound_strategy != BoundStrategy::bruteforce) { + prune_cell = (refined_cell.stored_upper_bound() <= (1.0 + params_.delta) * lower_bound); + } + if (prune_cell) + n_cells_pruned[refined_cell.level()]++; +// prune_cell = (max_result_on_refined_cell <= lower_bound); + } else { + // cell is too deep, it won't be added to queue + // we must memorize maximal value on this cell, because we won't see it anymore + prune_cell = true; + if (refined_cell.stored_upper_bound() > (1 + params_.delta) * lower_bound) { + n_too_deep_cells++; + } + upper_bound_on_deep_cells = std::max(upper_bound_on_deep_cells, refined_cell.stored_upper_bound()); + } + + spd::debug( + "In get_distance_pq, loop over refined cells, bound_strategy = {}, traverse_strategy = {}, refined cell: {}, max_value_on_cell = {}, upper_bound = {}, current_error = {}, prune_cell = {}", + params_.bound_strategy, params_.traverse_strategy, refined_cell, + refined_cell.stored_upper_bound(), upper_bound, current_error(lower_bound, upper_bound), + prune_cell); + + if (not prune_cell) { + n_cells_pushed_into_queue[refined_cell.level()]++; + dual_cells_queue.push(refined_cell); + } + } // end loop over refined cells + + if (dual_cells_queue.empty()) + upper_bound = std::max(upper_bound, upper_bound_on_deep_cells); + else + upper_bound = std::max(upper_bound_on_deep_cells, + get_max_possible_value(&dual_cells_queue.top(), dual_cells_queue.size())); + + if (params_.traverse_strategy == TraverseStrategy::upper_bound) { + upper_bound = dual_cells_queue.top().stored_upper_bound(); + + if (get_hera_calls_number() < 20 || get_hera_calls_number() % 20 == 0) { + auto elapsed = timer.now() - start_time; + UbExperimentRecord ub_exp_record; + + ub_exp_record.error = current_error(lower_bound, upper_bound); + ub_exp_record.lower_bound = lower_bound; + ub_exp_record.upper_bound = upper_bound; + ub_exp_record.cell = dual_cells_queue.top(); + ub_exp_record.n_hera_calls = n_hera_calls_; + ub_exp_record.time = std::chrono::duration_cast(elapsed).count(); + +#ifdef MD_DO_CHECKS + if (ub_experiment_results.size() > 0) { + auto prev = ub_experiment_results.back(); + if (upper_bound > prev.upper_bound) { + spd::error("ALARM 1, upper_bound = {}, top = {}, prev.ub = {}, prev cell = {}, lower_bound = {}, prev.lower_bound = {}", + upper_bound, ub_exp_record.cell, prev.upper_bound, prev.cell, lower_bound, prev.lower_bound); + throw std::runtime_error("die"); + } + + if (lower_bound < prev.lower_bound) { + spd::error("ALARM 2, lower_bound = {}, prev.lower_bound = {}, top = {}, prev.ub = {}, prev cell = {}", lower_bound, prev.lower_bound, ub_exp_record.cell, prev.upper_bound, prev.cell); + throw std::runtime_error("die"); + } + } +#endif + + ub_experiment_results.emplace_back(ub_exp_record); + + fmt::print(std::cerr, "[UB_EXPERIMENT]\t{}\n", ub_exp_record); + } + } + + assert(upper_bound >= lower_bound); + + if (current_error(lower_bound, upper_bound) < params_.delta) { + break; + } + } + + params_.actual_error = current_error(lower_bound, upper_bound); + + if (n_too_deep_cells > 0) { + spd::warn( + "Error not guaranteed, there were {} too deep cells. Actual error = {}. Increase max_depth or delta", + n_too_deep_cells, params_.actual_error); + } + // otherwise actual_error in params can be larger than delta, + // but this is OK + + spd::info("#############################################################"); + spd::info( + "Exiting get_distance_pq, bound_strategy = {}, traverse_strategy = {}, lower_bound = {}, upper_bound = {}, current_error = {}, actual_max_level = {}", + params_.bound_strategy, params_.traverse_strategy, lower_bound, + upper_bound, params_.actual_error, params_.actual_max_depth); + + spd::info("#############################################################"); + + bool print_stats = true; + if (print_stats) { + fmt::print("EXIT STATS, cells considered:\n"); + print_map(n_cells_considered); + fmt::print("EXIT STATS, cells discarded:\n"); + print_map(n_cells_discarded); + fmt::print("EXIT STATS, cells pruned:\n"); + print_map(n_cells_pruned); + fmt::print("EXIT STATS, cells pushed:\n"); + print_map(n_cells_pushed_into_queue); + fmt::print("EXIT STATS, hera calls:\n"); + print_map(n_hera_calls_per_level_); + + fmt::print("EXIT STATS, too deep cells with high value: {}\n", n_too_deep_cells); + } + + return lower_bound; + } + + template + int DistanceCalculator::get_hera_calls_number() const + { + return n_hera_calls_; + } + +} \ No newline at end of file diff --git a/matching/include/persistence_module.h b/matching/include/persistence_module.h index 50ae8d2..ea9cfd0 100644 --- a/matching/include/persistence_module.h +++ b/matching/include/persistence_module.h @@ -12,37 +12,68 @@ namespace md { - class Relation { + + class ModulePresentation { public: - Relation() {} - Relation(const Point& _pos) - :position_(_pos) { } + struct Relation { - Real get_x() const { return position_.x; } + Point position_; + IntPoint bigrade_; - Real get_y() const { return position_.y; } + Relation() {} + Relation(const Point& _pos) : position_(_pos) { } - private: - Point position_; - }; + Real get_x() const { return position_.x; } + Real get_y() const { return position_.y; } - class PersistenceModule { - public: - using Box = md::Box; + IndexVec components_; - PersistenceModule() { } + }; - PersistenceModule(const std::string& fname); // read from file + ModulePresentation() { } - Diagram slice_diagram(const DualPoint& line); + ModulePresentation(const std::string& fname); // read from file + Diagram weighted_slice_diagram(const DualPoint& line) const; + + // translate all points by vector (a,a) + void translate(Real a); + + // return minimal value of x- and y-coordinates + Real minimal_coordinate() const { return std::min(min_x(), min_x()); } + + // return box that contains all positions of all simplices Box bounding_box() const; + friend std::ostream& operator<<(std::ostream& os, const ModulePresentation& mp); + + Real max_x() const { return max_x_; } + + Real max_y() const { return max_y_; } + + Real min_x() const { return min_x_; } + + Real min_y() const { return min_y_; } + + PointVec positions() const; + private: - std::vector generators_; + + Real max_x_ { std::numeric_limits::max() }; + Real max_y_ { std::numeric_limits::max() }; + Real min_x_ { -std::numeric_limits::max() }; + Real min_y_ { -std::numeric_limits::max() }; + Box bounding_box_; + + PointVec generators_; std::vector relations_; + PointVec positions_; + + void init_boundaries(); + void project_generators(const DualPoint& slice, IndexVec& sorted_indices, RealVec& projections) const; + void project_relations(const DualPoint& slice, IndexVec& sorted_indices, RealVec& projections) const; }; // template diff --git a/matching/include/phat/compute_persistence_pairs.h b/matching/include/phat/compute_persistence_pairs.h index 48be65c..06f5372 100644 --- a/matching/include/phat/compute_persistence_pairs.h +++ b/matching/include/phat/compute_persistence_pairs.h @@ -97,14 +97,23 @@ namespace phat { ReductionAlgorithm reduce; reduce( boundary_matrix ); pairs.clear(); + std::set max_indices; + // finite pairs for( index idx = 0; idx < boundary_matrix.get_num_cols(); idx++ ) { if( !boundary_matrix.is_empty( idx ) ) { index birth = boundary_matrix.get_max_index( idx ); + max_indices.insert(birth); index death = idx; pairs.append_pair( birth, death ); } } - } + // infinite pairs: column idx is 0, and row idx does not contain a lowest one + for( index idx = 0; idx < boundary_matrix.get_num_cols(); idx++ ) { + if(boundary_matrix.is_empty(idx) && max_indices.count(idx) == 0 ) { + pairs.append_pair( idx, k_infinity_index); + } + } + } template< typename ReductionAlgorithm, typename Representation > void compute_persistence_pairs_dualized( persistence_pairs& pairs, boundary_matrix< Representation >& boundary_matrix ) { diff --git a/matching/include/phat/helpers/misc.h b/matching/include/phat/helpers/misc.h index 3e1ed56..5a5c682 100644 --- a/matching/include/phat/helpers/misc.h +++ b/matching/include/phat/helpers/misc.h @@ -35,6 +35,7 @@ #include #include #include +#include // VS2008 and below unfortunately do not support stdint.h #if defined(_MSC_VER)&& _MSC_VER < 1600 @@ -55,6 +56,8 @@ namespace phat { typedef int64_t index; typedef int8_t dimension; typedef std::vector< index > column; + + constexpr index k_infinity_index = std::numeric_limits::max(); } // OpenMP (proxy) functions diff --git a/matching/src/bifiltration.cpp b/matching/src/bifiltration.cpp index 429a9a8..b0d4248 100644 --- a/matching/src/bifiltration.cpp +++ b/matching/src/bifiltration.cpp @@ -31,7 +31,7 @@ namespace md { Bifiltration::Bifiltration(const std::string& fname, BifiltrationFormat input_format) { - std::ifstream ifstr{fname.c_str()}; + std::ifstream ifstr {fname.c_str()}; if (!ifstr.good()) { std::string error_message = fmt::format("Cannot open file {0}", fname); std::cerr << error_message << std::endl; @@ -107,7 +107,7 @@ namespace md { #endif } - DiagramKeeper Bifiltration::weighted_slice_diagram(const DualPoint& line, int /*dim*/) const + Diagram Bifiltration::weighted_slice_diagram(const DualPoint& line, int dim) const { DiagramKeeper dgm; @@ -158,10 +158,12 @@ namespace md { phat::compute_persistence_pairs(phat_persistence_pairs, phat_matrix); dgm.clear(); + constexpr Real real_inf = std::numeric_limits::infinity(); for(long i = 0; i < (long) phat_persistence_pairs.get_num_pairs(); i++) { std::pair new_pair = phat_persistence_pairs.get_pair(i); + bool is_finite_pair = new_pair.second != phat::k_infinity_index; Real birth = simplices.at(new_pair.first).value(); - Real death = simplices.at(new_pair.second).value(); + Real death = is_finite_pair ? simplices.at(new_pair.second).value() : real_inf; int dim = simplices[new_pair.first].dim(); assert(dim + 1 == simplices[new_pair.second].dim()); if (birth != death) { @@ -171,7 +173,7 @@ namespace md { spdlog::debug("Exiting slice_diagram, #dgm[0] = {}", dgm.get_diagram(0).size()); - return dgm; + return dgm.get_diagram(dim); } Box Bifiltration::bounding_box() const @@ -264,13 +266,12 @@ namespace md { } } - void Bifiltration::postprocess_rivet_format() { std::map facets_to_ids; // fill the map - for(Index i = 0; i < (Index)simplices_.size(); ++i) { + for(Index i = 0; i < (Index) simplices_.size(); ++i) { assert(simplices_[i].id() == i); facets_to_ids[simplices_[i].vertices_] = i; } @@ -305,5 +306,76 @@ namespace md { } return os; } + + BifiltrationProxy::BifiltrationProxy(const md::Bifiltration& bif, int dim) + : + dim_(dim), + bif_(bif) + { + cache_positions(); + } + + void BifiltrationProxy::cache_positions() const + { + cached_positions_.clear(); + for(const auto& simplex : bif_.simplices()) { + if (simplex.dim() == dim_ or simplex.dim() == dim_ + 1) + cached_positions_.push_back(simplex.position()); + } + } + + PointVec BifiltrationProxy::positions() const + { + if (cached_positions_.empty()) { + cache_positions(); + } + return cached_positions_; + } + + // translate all points by vector (a,a) + void BifiltrationProxy::translate(Real a) + { + bif_.translate(a); + } + + // return minimal value of x- and y-coordinates + // among all simplices + Real BifiltrationProxy::minimal_coordinate() const + { + return bif_.minimal_coordinate(); + } + + // return box that contains positions of all simplices + Box BifiltrationProxy::bounding_box() const + { + return bif_.bounding_box(); + } + + Real BifiltrationProxy::max_x() const + { + return bif_.max_x(); + } + + Real BifiltrationProxy::max_y() const + { + return bif_.max_y(); + } + + Real BifiltrationProxy::min_x() const + { + return bif_.min_x(); + } + + Real BifiltrationProxy::min_y() const + { + return bif_.min_y(); + } + + + Diagram BifiltrationProxy::weighted_slice_diagram(const DualPoint& slice) const + { + return bif_.weighted_slice_diagram(slice, dim_); + } + } diff --git a/matching/src/dual_box.cpp b/matching/src/dual_box.cpp index f9d2979..ff4d30c 100644 --- a/matching/src/dual_box.cpp +++ b/matching/src/dual_box.cpp @@ -99,7 +99,7 @@ namespace md { return result; } - std::vector DualBox::critical_points(const Point& p) const + std::vector DualBox::critical_points(const Point& /*p*/) const { // maximal difference is attained at corners return corners(); diff --git a/matching/src/matching_distance.cpp b/matching/src/matching_distance.cpp index ac96ba2..daaff5f 100644 --- a/matching/src/matching_distance.cpp +++ b/matching/src/matching_distance.cpp @@ -4,799 +4,42 @@ #include "common_defs.h" -#include "spdlog/fmt/ostr.h" #include "matching_distance.h" namespace md { - template - void print_map(const std::map& dic) - { - for(const auto kv : dic) { - fmt::print("{} -> {}\n", kv.first, kv.second); - } - } - - void DistanceCalculator::check_upper_bound(const CellWithValue& dual_cell, int dim) const - { - spd::debug("Enter check_get_max_delta_on_cell"); - const int n_samples_lambda = 100; - const int n_samples_mu = 100; - DualBox db = dual_cell.dual_box(); - Real min_lambda = db.lambda_min(); - Real max_lambda = db.lambda_max(); - Real min_mu = db.mu_min(); - Real max_mu = db.mu_max(); - - Real h_lambda = (max_lambda - min_lambda) / n_samples_lambda; - Real h_mu = (max_mu - min_mu) / n_samples_mu; - for(int i = 1; i < n_samples_lambda; ++i) { - for(int j = 1; j < n_samples_mu; ++j) { - Real lambda = min_lambda + i * h_lambda; - Real mu = min_mu + j * h_mu; - DualPoint l(db.axis_type(), db.angle_type(), lambda, mu); - Real other_result = distance_on_line_const(dim, l); - Real diff = fabs(dual_cell.stored_upper_bound() - other_result); - if (other_result > dual_cell.stored_upper_bound()) { - spd::error( - "in check_upper_bound, upper_bound = {}, other_result = {}, diff = {}, dim = {}\ndual_cell = {}", - dual_cell.stored_upper_bound(), other_result, diff, dim, dual_cell); - throw std::runtime_error("Wrong delta estimate"); - } - } - } - spd::debug("Exit check_get_max_delta_on_cell"); - } - - // for all lines l, l' inside dual box, - // find the upper bound on the difference of weighted pushes of p - Real - DistanceCalculator::get_max_displacement_single_point(const CellWithValue& dual_cell, ValuePoint vp, - const Point& p) const - { - assert(p.x >= 0 && p.y >= 0); - -#ifdef MD_DEBUG - std::vector debug_ids = {3, 13, 54, 218, 350, 382, 484, 795, 2040, 8415, 44076}; - bool debug = false; // std::find(debug_ids.begin(), debug_ids.end(), dual_cell.id) != debug_ids.end(); -#endif - DualPoint line = dual_cell.value_point(vp); - const Real base_value = line.weighted_push(p); - - spd::debug("Enter get_max_displacement_single_point, p = {},\ndual_cell = {},\nline = {}, base_value = {}\n", p, - dual_cell, line, base_value); - - Real result = 0.0; - for(DualPoint dp : dual_cell.dual_box().critical_points(p)) { - Real dp_value = dp.weighted_push(p); - spd::debug( - "In get_max_displacement_single_point, p = {}, critical dp = {},\ndp_value = {}, diff = {},\ndual_cell = {}\n", - p, dp, dp_value, fabs(base_value - dp_value), dual_cell); - result = std::max(result, fabs(base_value - dp_value)); - } - -#ifdef MD_DO_FULL_CHECK - DualBox db = dual_cell.dual_box(); - std::uniform_real_distribution dlambda(db.lambda_min(), db.lambda_max()); - std::uniform_real_distribution dmu(db.mu_min(), db.mu_max()); - std::mt19937 gen(1); - for(int i = 0; i < 1000; ++i) { - Real lambda = dlambda(gen); - Real mu = dmu(gen); - DualPoint dp_random { db.axis_type(), db.angle_type(), lambda, mu }; - Real dp_value = dp_random.weighted_push(p); - if (fabs(base_value - dp_value) > result) { - spd::error("in get_max_displacement_single_point, p = {}, vp = {}\ndb = {}\nresult = {}, base_value = {}, dp_value = {}, dp_random = {}", - p, vp, db, result, base_value, dp_value, dp_random); - throw std::runtime_error("error in get_max_displacement_single_value"); - } - } -#endif - - return result; - } - - DistanceCalculator::CellValueVector DistanceCalculator::get_initial_dual_grid(Real& lower_bound) - { - CellValueVector result = get_refined_grid(params_.initialization_depth, false, true); - - lower_bound = -1.0; - for(const auto& dc : result) { - lower_bound = std::max(lower_bound, dc.max_corner_value()); - } - - assert(lower_bound >= 0); - - for(auto& dual_cell : result) { - Real good_enough_ub = get_good_enough_upper_bound(lower_bound); - Real max_value_on_cell = get_upper_bound(dual_cell, params_.dim, good_enough_ub); - dual_cell.set_max_possible_value(max_value_on_cell); - -#ifdef MD_DO_FULL_CHECK - check_upper_bound(dual_cell, params_.dim); -#endif - - spd::debug("DEBUG INIT: added cell {}", dual_cell); - } - - - - return result; - } - - DistanceCalculator::CellValueVector - DistanceCalculator::get_refined_grid(int init_depth, bool calculate_on_intermediate, bool calculate_on_last) - { - const Real y_max = std::max(module_a_.max_y(), module_b_.max_y()); - const Real x_max = std::max(module_a_.max_x(), module_b_.max_x()); - - const Real lambda_min = 0; - const Real lambda_max = 1; - - const Real mu_min = 0; - - DualBox x_flat(DualPoint(AxisType::x_type, AngleType::flat, lambda_min, mu_min), - DualPoint(AxisType::x_type, AngleType::flat, lambda_max, x_max)); - - DualBox x_steep(DualPoint(AxisType::x_type, AngleType::steep, lambda_min, mu_min), - DualPoint(AxisType::x_type, AngleType::steep, lambda_max, x_max)); - - DualBox y_flat(DualPoint(AxisType::y_type, AngleType::flat, lambda_min, mu_min), - DualPoint(AxisType::y_type, AngleType::flat, lambda_max, y_max)); - - DualBox y_steep(DualPoint(AxisType::y_type, AngleType::steep, lambda_min, mu_min), - DualPoint(AxisType::y_type, AngleType::steep, lambda_max, y_max)); - - CellWithValue x_flat_cell(x_flat, 0); - CellWithValue x_steep_cell(x_steep, 0); - CellWithValue y_flat_cell(y_flat, 0); - CellWithValue y_steep_cell(y_steep, 0); - - if (init_depth == 0) { - DualPoint diagonal_x_flat(AxisType::x_type, AngleType::flat, 1, 0); - - Real diagonal_value = distance_on_line(params_.dim, diagonal_x_flat); - n_hera_calls_per_level_[0]++; - - x_flat_cell.set_value_at(ValuePoint::lower_right, diagonal_value); - y_flat_cell.set_value_at(ValuePoint::lower_right, diagonal_value); - x_steep_cell.set_value_at(ValuePoint::lower_right, diagonal_value); - y_steep_cell.set_value_at(ValuePoint::lower_right, diagonal_value); - } - -#ifdef MD_DEBUG - x_flat_cell.id = 1; - x_steep_cell.id = 2; - y_flat_cell.id = 3; - y_steep_cell.id = 4; - CellWithValue::max_id = 4; -#endif - - CellValueVector result {x_flat_cell, x_steep_cell, y_flat_cell, y_steep_cell}; - - if (init_depth == 0) { - return result; - } - - CellValueVector refined_result; - - for(int i = 1; i <= init_depth; ++i) { - refined_result.clear(); - for(const auto& dual_cell : result) { - for(auto refined_cell : dual_cell.get_refined_cells()) { - // we calculate for init_dept - 1, not init_depth, - // because we want the cells to have value at a corner - if ((i == init_depth - 1 and calculate_on_last) or calculate_on_intermediate) - set_cell_central_value(refined_cell, params_.dim); - refined_result.push_back(refined_cell); - } - } - result = std::move(refined_result); - } - return result; - } - - DistanceCalculator::DistanceCalculator(const DiagramProvider& a, - const DiagramProvider& b, + Real matching_distance(const Bifiltration& bif_a, const Bifiltration& bif_b, CalculationParams& params) - : - module_a_(a), - module_b_(b), - params_(params), - maximal_dim_(std::max(a.maximal_dim(), b.maximal_dim())), - distances_(1 + std::max(a.maximal_dim(), b.maximal_dim()), Real(-1)) - { - // make all coordinates non-negative - auto min_coord = std::min(module_a_.minimal_coordinate(), - module_b_.minimal_coordinate()); - if (min_coord < 0) { - module_a_.translate(-min_coord); - module_b_.translate(-min_coord); - } - - assert(std::min({module_a_.min_x(), module_b_.min_x(), module_a_.min_y(), - module_b_.min_y()}) >= 0); - - spd::info("DistanceCalculator constructed, module_a: max_x = {}, max_y = {}, module_b: max_x = {}, max_y = {}", - module_a_.max_x(), module_a_.max_y(), module_b_.max_x(), module_b_.max_y()); - } - - void DistanceCalculator::clear_cache() - { - distances_ = std::vector(maximal_dim_, Real(-1)); - } - - Real DistanceCalculator::get_max_x(int module) const - { - return (module == 0) ? module_a_.max_x() : module_b_.max_x(); - } - - Real DistanceCalculator::get_max_y(int module) const - { - return (module == 0) ? module_a_.max_y() : module_b_.max_y(); - } - - Real - DistanceCalculator::get_local_refined_bound(const md::DualBox& dual_box) const - { - return get_local_refined_bound(0, dual_box) + get_local_refined_bound(1, dual_box); - } - - Real - DistanceCalculator::get_local_refined_bound(int module, const md::DualBox& dual_box) const - { - spd::debug("Enter get_local_refined_bound, dual_box = {}", dual_box); - Real d_lambda = dual_box.lambda_max() - dual_box.lambda_min(); - Real d_mu = dual_box.mu_max() - dual_box.mu_min(); - Real result; - if (dual_box.axis_type() == AxisType::x_type) { - if (dual_box.is_flat()) { - result = dual_box.lambda_max() * d_mu + (get_max_x(module) - dual_box.mu_min()) * d_lambda; - } else { - result = d_mu + get_max_y(module) * d_lambda; - } - } else { - // y-type - if (dual_box.is_flat()) { - result = d_mu + get_max_x(module) * d_lambda; - } else { - // steep - result = dual_box.lambda_max() * d_mu + (get_max_y(module) - dual_box.mu_min()) * d_lambda; - } - } - return result; - } - - Real DistanceCalculator::get_local_dual_bound(int module, const md::DualBox& dual_box) const - { - Real dlambda = dual_box.lambda_max() - dual_box.lambda_min(); - Real dmu = dual_box.mu_max() - dual_box.mu_min(); - Real C = std::max(get_max_x(module), get_max_y(module)); - - //return 2 * (C * dlambda + dmu); - - // additional factor of 2 because we mimic Cerri's paper - // where subdivision is on angle spaces, - // and tangent/cotangent is 2-Lipschitz - if (dual_box.is_flat()) { - return get_max_x(module) * dlambda + dmu; - } else { - return get_max_y(module) * dlambda + dmu; - } - } - - Real DistanceCalculator::get_local_dual_bound(const md::DualBox& dual_box) const - { - return get_local_dual_bound(0, dual_box) + get_local_dual_bound(1, dual_box); - } - - Real DistanceCalculator::get_upper_bound(const CellWithValue& dual_cell, int dim, Real good_enough_ub) const - { - assert(good_enough_ub >= 0); - - switch(params_.bound_strategy) { - case BoundStrategy::bruteforce: - return std::numeric_limits::max(); - - case BoundStrategy::local_dual_bound: - return dual_cell.min_value() + get_local_dual_bound(dual_cell.dual_box()); - - case BoundStrategy::local_dual_bound_refined: - return dual_cell.min_value() + get_local_refined_bound(dual_cell.dual_box()); - - case BoundStrategy::local_combined: { - Real cheap_upper_bound = dual_cell.min_value() + get_local_refined_bound(dual_cell.dual_box()); - if (cheap_upper_bound < good_enough_ub) { - return cheap_upper_bound; - } else { - [[fallthrough]]; - } - } - - case BoundStrategy::local_dual_bound_for_each_point: { - Real result = std::numeric_limits::max(); - for(ValuePoint vp : k_corner_vps) { - if (not dual_cell.has_value_at(vp)) { - continue; - } - - Real base_value = dual_cell.value_at(vp); - Real bound_dgm_a = get_single_dgm_bound(dual_cell, vp, 0, dim, good_enough_ub); - - if (params_.stop_asap and bound_dgm_a + base_value >= good_enough_ub) { - // we want to return a valid upper bound, not just something that will prevent discarding the cell - // and we don't want to compute pushes for points in second bifiltration. - // so just return a constant time bound - return dual_cell.min_value() + get_local_refined_bound(dual_cell.dual_box()); - } - - Real bound_dgm_b = get_single_dgm_bound(dual_cell, vp, 1, dim, - std::max(Real(0), good_enough_ub - bound_dgm_a)); - - result = std::min(result, base_value + bound_dgm_a + bound_dgm_b); - -#ifdef MD_DEBUG - spd::debug("In get_upper_bound, cell = {}", dual_cell); - spd::debug("In get_upper_bound, vp = {}, base_value = {}, bound_dgm_a = {}, bound_dgm_b = {}, result = {}", vp, base_value, bound_dgm_a, bound_dgm_b, result); -#endif - - if (params_.stop_asap and result < good_enough_ub) { - break; - } - } - return result; - } - } - // to suppress compiler warning - return std::numeric_limits::max(); - } - - // find maximal displacement of weighted points of m for all lines in dual_box - Real - DistanceCalculator::get_single_dgm_bound(const CellWithValue& dual_cell, - ValuePoint vp, - int module, - int dim, - [[maybe_unused]] Real good_enough_value) const - { - Real result = 0; - Point max_point; - - spd::debug("Enter get_single_dgm_bound, module = {}, dual_cell = {}, vp = {}, good_enough_value = {}, stop_asap = {}\n", module, dual_cell, vp, good_enough_value, params_.stop_asap); - - const DiagramProvider& m = (module == 0) ? module_a_ : module_b_; - for(const auto& simplex : m.simplices()) { - spd::debug("in get_single_dgm_bound, simplex = {}\n", simplex); - if (dim != simplex.dim() and dim + 1 != simplex.dim()) - continue; - - Real x = get_max_displacement_single_point(dual_cell, vp, simplex.position()); - - spd::debug("In get_single_dgm_bound, point = {}, displacement = {}", simplex.position(), x); - - if (x > result) { - result = x; - max_point = simplex.position(); - spd::debug("In get_single_dgm_bound, point = {}, result now = displacement = {}", simplex.position(), x); - } - - if (params_.stop_asap and result > good_enough_value) { - // we want to return a valid upper bound, - // now we just see it is worse than we need, but it may be even more - // just return a valid upper bound - spd::debug("result {} > good_enough_value {}, exit and return refined bound {}", result, good_enough_value, get_local_refined_bound(dual_cell.dual_box())); - result = get_local_refined_bound(dual_cell.dual_box()); - break; - } - } - - spd::debug("Exit get_single_dgm_bound,\ndual_cell = {}\nmodule = {}, dim = {}, result = {}, max_point = {}", dual_cell, module, dim, result, max_point); - - return result; - } - - Real DistanceCalculator::distance() - { - if (params_.dim != CalculationParams::ALL_DIMENSIONS) { - return distance_in_dimension_pq(params_.dim); - } else { - Real result = -1.0; - for(int d = 0; d <= maximal_dim_; ++d) { - result = std::max(result, distance_in_dimension_pq(d)); - } - return result; - } - } - - // calculate weighted bottleneneck distance between slices on line - // in dimension dim - // increments hera calls counter - Real DistanceCalculator::distance_on_line(int dim, DualPoint line) - { - // order matters - distance_on_line_const assumes n_hera_calls_ map has entry for dim - ++n_hera_calls_[dim]; - Real result = distance_on_line_const(dim, line); - return result; - } - - Real DistanceCalculator::distance_on_line_const(int dim, DualPoint line) const - { - // TODO: think about this - how to call Hera - Real hera_epsilon = 0.001; - auto dgm_a = module_a_.weighted_slice_diagram(line, dim).get_diagram(dim); - auto dgm_b = module_b_.weighted_slice_diagram(line, dim).get_diagram(dim); -// Real result = hera::bottleneckDistApprox(dgm_a, dgm_b, hera_epsilon); - Real result = hera::bottleneckDistExact(dgm_a, dgm_b); - if (n_hera_calls_.at(dim) % 100 == 1) { - spd::debug("Calling Hera, dgm_a.size = {}, dgm_b.size = {}, line = {}, result = {}", dgm_a.size(), dgm_b.size(), line, result); - } else { - spd::debug("Calling Hera, dgm_a.size = {}, dgm_b.size = {}, line = {}, result = {}", dgm_a.size(), dgm_b.size(), line, result); - } - return result; - } - - Real DistanceCalculator::get_good_enough_upper_bound(Real lower_bound) const { Real result; - // in upper_bound strategy we only prune cells if they cannot improve the lower bound, - // otherwise the experiment is supposed to run indefinitely - if (params_.traverse_strategy == TraverseStrategy::upper_bound) { - result = lower_bound; + // compute distance only in one dimension + if (params.dim != CalculationParams::ALL_DIMENSIONS) { + BifiltrationProxy bifp_a(bif_a, params.dim); + BifiltrationProxy bifp_b(bif_a, params.dim); + DistanceCalculator runner(bifp_a, bifp_b, params); + result = runner.distance(); + params.n_hera_calls = runner.get_hera_calls_number(); } else { - result = (1.0 + params_.delta) * lower_bound; - } - return result; - } - - // helper function - // calculate weighted bt distance in dim on cell center, - // assign distance value to cell, keep it in heat_map, and return - void DistanceCalculator::set_cell_central_value(CellWithValue& dual_cell, int dim) - { - DualPoint central_line {dual_cell.center()}; - - spd::debug("In set_cell_central_value, processing dual cell = {}, line = {}", dual_cell.dual_box(), - central_line); - Real new_value = distance_on_line(dim, central_line); - n_hera_calls_per_level_[dual_cell.level() + 1]++; - dual_cell.set_value_at(ValuePoint::center, new_value); - params_.actual_max_depth = std::max(params_.actual_max_depth, dual_cell.level() + 1); - -#ifdef PRINT_HEAT_MAP - if (params_.bound_strategy == BoundStrategy::bruteforce) { - spd::debug("In set_cell_central_value, adding to heat_map pair {} - {}", dual_cell.center(), new_value); - if (dual_cell.level() > params_.initialization_depth + 1 - and params_.heat_maps[dual_cell.level()].count(dual_cell.center()) > 0) { - auto existing = params_.heat_maps[dual_cell.level()].find(dual_cell.center()); - spd::debug("EXISTING: {} -> {}", existing->first, existing->second); + // compute distance in all dimensions, return maximal + result = -1; + for(int dim = 0; dim < std::max(bif_a.maximal_dim(), bif_b.maximal_dim()); ++dim) { + BifiltrationProxy bifp_a(bif_a, params.dim); + BifiltrationProxy bifp_b(bif_a, params.dim); + DistanceCalculator runner(bifp_a, bifp_b, params); + result = std::max(result, runner.distance()); + params.n_hera_calls += runner.get_hera_calls_number(); } - assert(dual_cell.level() <= params_.initialization_depth + 1 - or params_.heat_maps[dual_cell.level()].count(dual_cell.center()) == 0); - params_.heat_maps[dual_cell.level()][dual_cell.center()] = new_value; - } -#endif - } - - // quick-and-dirty hack to efficiently traverse priority queue with dual cells - // returns maximal possible value on all cells in queue - // assumes that the underlying container is vector! - // cell_ptr: pointer to the first element in queue - // n_cells: queue size - Real DistanceCalculator::get_max_possible_value(const CellWithValue* cell_ptr, int n_cells) - { - Real result = (n_cells > 0) ? cell_ptr->stored_upper_bound() : 0; - for(int i = 0; i < n_cells; ++i, ++cell_ptr) { - result = std::max(result, cell_ptr->stored_upper_bound()); } return result; } - // helper function: - // return current error from lower and upper bounds - // and save it in params_ (hence not const) - Real DistanceCalculator::current_error(Real lower_bound, Real upper_bound) - { - Real current_error = (lower_bound > 0.0) ? (upper_bound - lower_bound) / lower_bound - : std::numeric_limits::max(); - - params_.actual_error = current_error; - - if (current_error < params_.delta) { - spd::debug( - "Threshold achieved! bound_strategy = {}, traverse_strategy = {}, upper_bound = {}, current_error = {}", - params_.bound_strategy, params_.traverse_strategy, upper_bound, current_error); - } - return current_error; - } - - struct UbExperimentRecord { - Real error; - Real lower_bound; - Real upper_bound; - CellWithValue cell; - long long int time; - long long int n_hera_calls; - }; - - std::ostream& operator<<(std::ostream& os, const UbExperimentRecord& r); - - // return matching distance in dimension dim - // use priority queue to store dual cells - // comparison function depends on the strategies in params_ - // ressets hera calls counter - Real DistanceCalculator::distance_in_dimension_pq(int dim) - { - std::map n_cells_considered; - std::map n_cells_pushed_into_queue; - long int n_too_deep_cells = 0; - std::map n_cells_discarded; - std::map n_cells_pruned; - - spd::info("Enter distance_in_dimension_pq, dim = {}, bound strategy = {}, traverse strategy = {}, stop_asap = {} ", dim, params_.bound_strategy, params_.traverse_strategy, params_.stop_asap); - - std::chrono::high_resolution_clock timer; - auto start_time = timer.now(); - - n_hera_calls_[dim] = 0; - n_hera_calls_per_level_.clear(); - - - // if cell is too deep and is not pushed into queue, - // we still need to take its max value into account; - // the max over such cells is stored in max_result_on_too_fine_cells - Real upper_bound_on_deep_cells = -1; - - spd::debug("Started iterations in dual space, delta = {}, bound_strategy = {}", params_.delta, params_.bound_strategy); - // user-defined less lambda function - // to regulate priority queue depending on strategy - auto dual_cell_less = [this](const CellWithValue& a, const CellWithValue& b) { - - int a_level = a.level(); - int b_level = b.level(); - Real a_value = a.max_corner_value(); - Real b_value = b.max_corner_value(); - Real a_ub = a.stored_upper_bound(); - Real b_ub = b.stored_upper_bound(); - if (this->params_.traverse_strategy == TraverseStrategy::upper_bound and - (not a.has_max_possible_value() or not b.has_max_possible_value())) { - throw std::runtime_error("no upper bound on cell"); - } - DualPoint a_lower_left = a.dual_box().lower_left(); - DualPoint b_lower_left = b.dual_box().lower_left(); - - switch(this->params_.traverse_strategy) { - // in both breadth_first searches we want coarser cells - // to be processed first. Cells with smaller level must be larger, - // hence the minus in front of level - case TraverseStrategy::breadth_first: - return std::make_tuple(-a_level, a_lower_left) - < std::make_tuple(-b_level, b_lower_left); - case TraverseStrategy::breadth_first_value: - return std::make_tuple(-a_level, a_value, a_lower_left) - < std::make_tuple(-b_level, b_value, b_lower_left); - case TraverseStrategy::depth_first: - return std::make_tuple(a_value, a_level, a_lower_left) - < std::make_tuple(b_value, b_level, b_lower_left); - case TraverseStrategy::upper_bound: - return std::make_tuple(a_ub, a_level, a_lower_left) - < std::make_tuple(b_ub, b_level, b_lower_left); - default: - throw std::runtime_error("Forgotten case"); - } - }; - - std::priority_queue dual_cells_queue( - dual_cell_less); - - // weighted bt distance on the center of current cell - Real lower_bound = std::numeric_limits::min(); - - // init pq and lower bound - for(auto& init_cell : get_initial_dual_grid(lower_bound)) { - dual_cells_queue.push(init_cell); - } - - Real upper_bound = get_max_possible_value(&dual_cells_queue.top(), dual_cells_queue.size()); - - std::vector ub_experiment_results; - - while(not dual_cells_queue.empty()) { - - CellWithValue dual_cell = dual_cells_queue.top(); - dual_cells_queue.pop(); - assert(dual_cell.has_corner_value() - and dual_cell.has_max_possible_value() - and dual_cell.max_corner_value() <= upper_bound); - - n_cells_considered[dual_cell.level()]++; - - bool discard_cell = false; - - if (not params_.stop_asap) { - // if stop_asap is on, it is safer to never discard a cell - if (params_.bound_strategy == BoundStrategy::bruteforce) { - discard_cell = false; - } else if (params_.traverse_strategy == TraverseStrategy::upper_bound) { - discard_cell = (dual_cell.stored_upper_bound() <= lower_bound); - } else { - discard_cell = (dual_cell.stored_upper_bound() <= (1.0 + params_.delta) * lower_bound); - } - } - - spd::debug("CURRENT CELL bound_strategy = {}, traverse_strategy = {}, dual cell: {}, upper_bound = {}, lower_bound = {}, current_error = {}, discard_cell = {}", - params_.bound_strategy, params_.traverse_strategy, dual_cell, upper_bound, lower_bound, current_error(lower_bound, upper_bound), discard_cell); - - if (discard_cell) { - n_cells_discarded[dual_cell.level()]++; - continue; - } - - // until now, dual_cell knows its value in one of its corners - // new_value will be the weighted distance at its center - set_cell_central_value(dual_cell, dim); - Real new_value = dual_cell.value_at(ValuePoint::center); - lower_bound = std::max(new_value, lower_bound); - - spd::debug("Processed cell = {}, weighted value = {}, lower_bound = {}", dual_cell, new_value, lower_bound); - - assert(upper_bound >= lower_bound); - - if (current_error(lower_bound, upper_bound) < params_.delta) { - break; - } - - // refine cell and push 4 smaller cells into queue - for(auto refined_cell : dual_cell.get_refined_cells()) { - - if (refined_cell.num_values() == 0) - throw std::runtime_error("no value on cell"); - - // if delta is smaller than good_enough_value, it allows to prune cell - Real good_enough_ub = get_good_enough_upper_bound(lower_bound); - - // upper bound of the parent holds for refined_cell - // and can sometimes be smaller! - Real upper_bound_on_refined_cell = std::min(dual_cell.stored_upper_bound(), - get_upper_bound(refined_cell, dim, good_enough_ub)); - - spd::debug("upper_bound_on_refined_cell = {}, dual_cell.stored_upper_bound = {}, get_upper_bound = {}", - upper_bound_on_refined_cell, dual_cell.stored_upper_bound(), get_upper_bound(refined_cell, dim, good_enough_ub)); - - refined_cell.set_max_possible_value(upper_bound_on_refined_cell); - -#ifdef MD_DO_FULL_CHECK - check_upper_bound(refined_cell, dim); -#endif - - bool prune_cell = false; - - if (refined_cell.level() <= params_.max_depth) { - // cell might be added to queue; if it is not added, its maximal value can be safely ignored - if (params_.traverse_strategy == TraverseStrategy::upper_bound) { - prune_cell = (refined_cell.stored_upper_bound() <= lower_bound); - } else if (params_.bound_strategy != BoundStrategy::bruteforce) { - prune_cell = (refined_cell.stored_upper_bound() <= (1.0 + params_.delta) * lower_bound); - } - if (prune_cell) - n_cells_pruned[refined_cell.level()]++; -// prune_cell = (max_result_on_refined_cell <= lower_bound); - } else { - // cell is too deep, it won't be added to queue - // we must memorize maximal value on this cell, because we won't see it anymore - prune_cell = true; - if (refined_cell.stored_upper_bound() > (1 + params_.delta) * lower_bound) { - n_too_deep_cells++; - } - upper_bound_on_deep_cells = std::max(upper_bound_on_deep_cells, refined_cell.stored_upper_bound()); - } - - spd::debug("In distance_in_dimension_pq, loop over refined cells, bound_strategy = {}, traverse_strategy = {}, refined cell: {}, max_value_on_cell = {}, upper_bound = {}, current_error = {}, prune_cell = {}", - params_.bound_strategy, params_.traverse_strategy, refined_cell, refined_cell.stored_upper_bound(), upper_bound, current_error(lower_bound, upper_bound), prune_cell); - - if (not prune_cell) { - n_cells_pushed_into_queue[refined_cell.level()]++; - dual_cells_queue.push(refined_cell); - } - } // end loop over refined cells - - if (dual_cells_queue.empty()) - upper_bound = std::max(upper_bound, upper_bound_on_deep_cells); - else - upper_bound = std::max(upper_bound_on_deep_cells, - get_max_possible_value(&dual_cells_queue.top(), dual_cells_queue.size())); - - if (params_.traverse_strategy == TraverseStrategy::upper_bound) { - upper_bound = dual_cells_queue.top().stored_upper_bound(); - - if (get_hera_calls_number(params_.dim) < 20 || get_hera_calls_number(params_.dim) % 20 == 0) { - auto elapsed = timer.now() - start_time; - UbExperimentRecord ub_exp_record; - - ub_exp_record.error = current_error(lower_bound, upper_bound); - ub_exp_record.lower_bound = lower_bound; - ub_exp_record.upper_bound = upper_bound; - ub_exp_record.cell = dual_cells_queue.top(); - ub_exp_record.n_hera_calls = n_hera_calls_[dim]; - ub_exp_record.time = std::chrono::duration_cast(elapsed).count(); - -#ifdef MD_DO_CHECKS - if (ub_experiment_results.size() > 0) { - auto prev = ub_experiment_results.back(); - if (upper_bound > prev.upper_bound) { - spd::error("ALARM 1, upper_bound = {}, top = {}, prev.ub = {}, prev cell = {}, lower_bound = {}, prev.lower_bound = {}", - upper_bound, ub_exp_record.cell, prev.upper_bound, prev.cell, lower_bound, prev.lower_bound); - throw std::runtime_error("die"); - } - - if (lower_bound < prev.lower_bound) { - spd::error("ALARM 2, lower_bound = {}, prev.lower_bound = {}, top = {}, prev.ub = {}, prev cell = {}", lower_bound, prev.lower_bound, ub_exp_record.cell, prev.upper_bound, prev.cell); - throw std::runtime_error("die"); - } - } -#endif - - ub_experiment_results.emplace_back(ub_exp_record); - - fmt::print(std::cerr, "[UB_EXPERIMENT]\t{}\n", ub_exp_record); - } - } - - assert(upper_bound >= lower_bound); - - if (current_error(lower_bound, upper_bound) < params_.delta) { - break; - } - } - - params_.actual_error = current_error(lower_bound, upper_bound); - - if (n_too_deep_cells > 0) { - spd::warn("Error not guaranteed, there were {} too deep cells. Actual error = {}. Increase max_depth or delta", n_too_deep_cells, params_.actual_error); - } - // otherwise actual_error in params can be larger than delta, - // but this is OK - - spd::info("#############################################################"); - spd::info("Exiting distance_in_dimension_pq, bound_strategy = {}, traverse_strategy = {}, lower_bound = {}, upper_bound = {}, current_error = {}, actual_max_level = {}", - params_.bound_strategy, params_.traverse_strategy, lower_bound, - upper_bound, params_.actual_error, params_.actual_max_depth); - - spd::info("#############################################################"); - - bool print_stats = true; - if (print_stats) { - fmt::print("EXIT STATS, cells considered:\n"); - print_map(n_cells_considered); - fmt::print("EXIT STATS, cells discarded:\n"); - print_map(n_cells_discarded); - fmt::print("EXIT STATS, cells pruned:\n"); - print_map(n_cells_pruned); - fmt::print("EXIT STATS, cells pushed:\n"); - print_map(n_cells_pushed_into_queue); - fmt::print("EXIT STATS, hera calls:\n"); - print_map(n_hera_calls_per_level_); - - fmt::print("EXIT STATS, too deep cells with high value: {}\n", n_too_deep_cells); - } - - return lower_bound; - } - - int DistanceCalculator::get_hera_calls_number(int dim) const - { - if (dim == CalculationParams::ALL_DIMENSIONS) - return std::accumulate(n_hera_calls_.begin(), n_hera_calls_.end(), 0, - [](auto x, auto y) { return x + y.second; }); - else - return n_hera_calls_.at(dim); - } - Real matching_distance(const Bifiltration& bif_a, const Bifiltration& bif_b, + Real matching_distance(const ModulePresentation& mod_a, const ModulePresentation& mod_b, CalculationParams& params) { - DistanceCalculator runner(bif_a, bif_b, params); + DistanceCalculator runner(mod_a, mod_b, params); Real result = runner.distance(); - params.n_hera_calls = runner.get_hera_calls_number(params.dim); + params.n_hera_calls = runner.get_hera_calls_number(); return result; } diff --git a/matching/src/persistence_module.cpp b/matching/src/persistence_module.cpp index e947925..f759a2b 100644 --- a/matching/src/persistence_module.cpp +++ b/matching/src/persistence_module.cpp @@ -1,104 +1,179 @@ -#include -#include +#include +#include + +#include +#include #include "persistence_module.h" namespace md { - PersistenceModule::PersistenceModule(const std::string& /*fname*/) // read from file + + /** + * + * @param values vector of length n + * @return [a_1,...,a_n] such that + * 1) values[a_1] <= values[a_2] <= ... <= values[a_n] + * 2) a_1,...,a_n is a permutation of 1,..,n + */ + + template + IndexVec get_sorted_indices(const std::vector& values) + { + IndexVec result(values.size()); + std::iota(result.begin(), result.end(), 0); + std::sort(result.begin(), result.end(), + [&values](size_t a, size_t b) { return values[a] < values[b]; }); + return result; + } + + // helper function to initialize const member positions_ in ModulePresentation + PointVec + concat_gen_and_rel_positions(const PointVec& generators, const std::vector& relations) + { + PointVec result(generators); + result.reserve(result.size() + relations.size()); + for(const auto& rel : relations) { + result.push_back(rel.position_); + } + return result; + } + + + void ModulePresentation::init_boundaries() + { + max_x_ = std::numeric_limits::max(); + max_y_ = std::numeric_limits::max(); + min_x_ = -std::numeric_limits::max(); + min_y_ = -std::numeric_limits::max(); + + for(const auto& gen : positions_) { + min_x_ = std::min(gen.x, min_x_); + min_y_ = std::min(gen.y, min_y_); + max_x_ = std::max(gen.x, max_x_); + max_y_ = std::max(gen.y, max_y_); + } + + bounding_box_ = Box(Point(min_x_, min_y_), Point(max_x_, max_y_)); + } + + void ModulePresentation::translate(md::Real a) + { + for(auto& g : generators_) { + g.translate(a); + } + + for(auto& r : relations_) { + r.position_.translate(a); + } + + positions_ = concat_gen_and_rel_positions(generators_, relations_); + init_boundaries(); + } + + + ModulePresentation::ModulePresentation(const std::string& fname) // read from file : generators_(), - relations_() + relations_(), + positions_(concat_gen_and_rel_positions(generators_, relations_)) { + init_boundaries(); } - Diagram PersistenceModule::slice_diagram(const DualPoint& /*line*/) + /** + * + * @param slice line on which generators are projected + * @param sorted_indices [a_1,...,a_n] s.t. wpush(generator[a_1]) <= wpush(generator[a_2]) <= .. + * @param projections sorted weighted pushes of generators + */ + + void + ModulePresentation::project_generators(const DualPoint& slice, IndexVec& sorted_indices, RealVec& projections) const { - //Vector2D b_of_line(L.b, -L.b); - //for (int i = 0; i<(int) F.size(); i++) { - // Simplex_in_2D_filtration& curr_simplex = F[i]; - // Vector2D proj = push(curr_simplex.pos, L); - - // curr_simplex.v = L_2(proj - b_of_line); - // //std::cout << proj << std::endl; - // //std::cout << "v=" << curr_simplex.v << std::endl; - //} - //std::sort(F.begin(), F.end(), sort_functor); - //std::map index_map; - //for (Index i = 0; i<(int) F.size(); i++) { - // index_map[F[i].index] = i; - // //std::cout << F[i].index << " -> " << i << std::endl; - //} - //phat::boundary_matrix<> phat_matrix; - //phat_matrix.set_num_cols(F.size()); - //std::vector bd_in_slice_filtration; - //for (Index i = 0; i<(int) F.size(); i++) { - // phat_matrix.set_dim(i, F[i].dim); - // bd_in_slice_filtration.clear(); - // //std::cout << "new col" << i << std::endl; - // for (int j = 0; j<(int) F[i].bd.size(); j++) { - // // F[i] contains the indices of its facet wrt to the - // // original filtration. We have to express it, however, - // // wrt to the filtration along the slice. That is why - // // we need the index_map - // //std::cout << "Found " << F[i].bd[j] << ", returning " << index_map[F[i].bd[j]] << std::endl; - // bd_in_slice_filtration.push_back(index_map[F[i].bd[j]]); - // } - // std::sort(bd_in_slice_filtration.begin(), bd_in_slice_filtration.end()); - // [> - // for(int j=0;j(phat_persistence_pairs, phat_matrix); - - //dgm.clear(); - //for (long i = 0; i<(long) phat_persistence_pairs.get_num_pairs(); i++) { - // std::pair new_pair = phat_persistence_pairs.get_pair(i); - // double birth = F[new_pair.first].v; - // double death = F[new_pair.second].v; - // if (birth!=death) { - // dgm.push_back(std::make_pair(birth, death)); - // } - //} - - //[> - //std::cout << "Done, created diagram: " << std::endl; - //for(int i=0;i<(int)dgm.size();i++) { - // std::cout << dgm[i].first << " " << dgm[i].second << std::endl; - //} - //*/ - return Diagram(); + size_t num_gens = generators_.size(); + RealVec gen_values; + gen_values.reserve(num_gens); + for(const auto& pos : generators_) { + gen_values.push_back(slice.weighted_push(pos)); + } + sorted_indices = get_sorted_indices(gen_values); + projections.clear(); + projections.reserve(num_gens); + for(auto i : sorted_indices) { + projections.push_back(gen_values[i]); + } } - PersistenceModule::Box PersistenceModule::bounding_box() const + void ModulePresentation::project_relations(const DualPoint& slice, IndexVec& sorted_rel_indices, + RealVec& projections) const { - Real ll_x = std::numeric_limits::max(); - Real ll_y = std::numeric_limits::max(); - Real ur_x = -std::numeric_limits::max(); - Real ur_y = -std::numeric_limits::max(); - - for(const auto& gen : generators_) { - ll_x = std::min(gen.x, ll_x); - ll_y = std::min(gen.y, ll_y); - ur_x = std::max(gen.x, ur_x); - ur_y = std::max(gen.y, ur_y); - } + size_t num_rels = relations_.size(); + RealVec rel_values; + rel_values.reserve(num_rels); for(const auto& rel : relations_) { + rel_values.push_back(slice.weighted_push(rel.position_)); + } + sorted_rel_indices = get_sorted_indices(rel_values); + projections.clear(); + projections.reserve(num_rels); + for(auto i : sorted_rel_indices) { + projections.push_back(rel_values[i]); + } + } + + Diagram ModulePresentation::weighted_slice_diagram(const DualPoint& slice) const + { + IndexVec sorted_gen_indices, sorted_rel_indices; + RealVec gen_projections, rel_projections; + + project_generators(slice, sorted_gen_indices, gen_projections); + project_relations(slice, sorted_rel_indices, rel_projections); + + phat::boundary_matrix<> phat_matrix; + + phat_matrix.set_num_cols(relations_.size()); - ll_x = std::min(rel.get_x(), ll_x); - ll_y = std::min(rel.get_y(), ll_y); + for(Index i = 0; i < (Index) relations_.size(); i++) { + IndexVec current_relation = relations_[sorted_rel_indices[i]].components_; + for(auto& j : current_relation) { + j = sorted_gen_indices[j]; + } + std::sort(current_relation.begin(), current_relation.end()); + phat_matrix.set_dim(i, current_relation.size()); + phat_matrix.set_col(i, current_relation); + } + + phat::persistence_pairs phat_persistence_pairs; + phat::compute_persistence_pairs(phat_persistence_pairs, phat_matrix); + + Diagram dgm; - ur_x = std::max(rel.get_x(), ur_x); - ur_y = std::max(rel.get_y(), ur_y); + constexpr Real real_inf = std::numeric_limits::infinity(); + + for(Index i = 0; i < (Index) phat_persistence_pairs.get_num_pairs(); i++) { + std::pair new_pair = phat_persistence_pairs.get_pair(i); + bool is_finite_pair = new_pair.second != phat::k_infinity_index; + Real birth = gen_projections.at(new_pair.first); + Real death = is_finite_pair ? rel_projections.at(new_pair.second) : real_inf; + if (birth != death) { + dgm.emplace_back(birth, death); + } } - return Box(Point(ll_x, ll_y), Point(ur_x, ur_y)); + return dgm; + } + + PointVec ModulePresentation::positions() const + { + return positions_; + } + + + Box ModulePresentation::bounding_box() const + { + return bounding_box_; } + } diff --git a/matching/src/simplex.cpp b/matching/src/simplex.cpp index c5cdd25..661dcb1 100644 --- a/matching/src/simplex.cpp +++ b/matching/src/simplex.cpp @@ -57,8 +57,7 @@ namespace md { void Simplex::translate(Real a) { - pos_.x += a; - pos_.y += a; + pos_.translate(a); } void Simplex::init_rivet(std::string s) diff --git a/matching/src/tests/test_matching_distance.cpp b/matching/src/tests/test_matching_distance.cpp index a54e18e..caf32cd 100644 --- a/matching/src/tests/test_matching_distance.cpp +++ b/matching/src/tests/test_matching_distance.cpp @@ -37,7 +37,10 @@ TEST_CASE("Different bounds", "[bounds]") CalculationParams params; params.initialization_depth = 2; - DistanceCalculator calc(bif_a, bif_b, params); + BifiltrationProxy bifp_a(bif_a, params.dim); + BifiltrationProxy bifp_b(bif_b, params.dim); + + DistanceCalculator calc(bifp_a, bifp_b, params); // REQUIRE(calc.max_x_ == Approx(max_x)); // REQUIRE(calc.max_y_ == Approx(max_y)); -- cgit v1.2.3