diff options
author | Marc Glisse <marc.glisse@inria.fr> | 2021-04-24 12:11:45 +0200 |
---|---|---|
committer | GitHub <noreply@github.com> | 2021-04-24 12:11:45 +0200 |
commit | 73cbc900975f8925cb1e2f2dc9cdd8820e17c9bd (patch) | |
tree | 6b8c5680791690502a5d78835c1b1a3f6c077b05 /src | |
parent | f21240a4c4e0fad25193dc0d409deee9715e0aae (diff) | |
parent | 33cb5826e62abf8dd84d2adb59d99fc1f54a2aa1 (diff) |
Merge pull request #476 from mglisse/far
Sparse Rips and choose_n_farthest_points, fix #456
Diffstat (limited to 'src')
-rw-r--r-- | src/Rips_complex/include/gudhi/Sparse_rips_complex.h | 115 | ||||
-rw-r--r-- | src/Simplex_tree/include/gudhi/Simplex_tree.h | 19 | ||||
-rw-r--r-- | src/Subsampling/include/gudhi/choose_n_farthest_points.h | 55 | ||||
-rw-r--r-- | src/Subsampling/test/test_choose_n_farthest_points.cpp | 5 | ||||
-rwxr-xr-x | src/python/test/test_rips_complex.py | 21 |
5 files changed, 161 insertions, 54 deletions
diff --git a/src/Rips_complex/include/gudhi/Sparse_rips_complex.h b/src/Rips_complex/include/gudhi/Sparse_rips_complex.h index a5501004..7ae7b317 100644 --- a/src/Rips_complex/include/gudhi/Sparse_rips_complex.h +++ b/src/Rips_complex/include/gudhi/Sparse_rips_complex.h @@ -15,12 +15,71 @@ #include <gudhi/graph_simplicial_complex.h> #include <gudhi/choose_n_farthest_points.h> -#include <boost/graph/adjacency_list.hpp> +#include <boost/graph/graph_traits.hpp> #include <boost/range/metafunctions.hpp> +#include <boost/iterator/counting_iterator.hpp> #include <vector> namespace Gudhi { +namespace rips_complex { +// A custom graph class, because boost::adjacency_list does not conveniently allow to choose vertex descriptors +template <class Vertex_handle, class Filtration_value> +struct Graph { + typedef std::vector<Vertex_handle> VList; + typedef std::vector<std::tuple<Vertex_handle, Vertex_handle, Filtration_value>> EList; + typedef typename VList::const_iterator vertex_iterator; + typedef boost::counting_iterator<std::size_t> edge_iterator; + VList vlist; + EList elist; +}; +template <class Vertex_handle, class Filtration_value> +void add_vertex(Vertex_handle v, Graph<Vertex_handle, Filtration_value>&g) { g.vlist.push_back(v); } +template <class Vertex_handle, class Filtration_value> +void add_edge(Vertex_handle u, Vertex_handle v, Filtration_value f, Graph<Vertex_handle, Filtration_value>&g) { g.elist.emplace_back(u, v, f); } +template <class Vertex_handle, class Filtration_value> +std::size_t num_vertices(Graph<Vertex_handle, Filtration_value> const&g) { return g.vlist.size(); } +template <class Vertex_handle, class Filtration_value> +std::size_t num_edges(Graph<Vertex_handle, Filtration_value> const&g) { return g.elist.size(); } +template <class Vertex_handle, class Filtration_value, class Iter = typename Graph<Vertex_handle, Filtration_value>::vertex_iterator> +std::pair<Iter, Iter> +vertices(Graph<Vertex_handle, Filtration_value> const&g) { + return { g.vlist.begin(), g.vlist.end() }; +} +template <class Vertex_handle, class Filtration_value> +std::pair<boost::counting_iterator<std::size_t>, boost::counting_iterator<std::size_t>> +edges(Graph<Vertex_handle, Filtration_value> const&g) { + typedef boost::counting_iterator<std::size_t> I; + return { I(0), I(g.elist.size()) }; +} +template <class Vertex_handle, class Filtration_value> +Vertex_handle source(std::size_t e, Graph<Vertex_handle, Filtration_value> const&g) { return std::get<0>(g.elist[e]); } +template <class Vertex_handle, class Filtration_value> +Vertex_handle target(std::size_t e, Graph<Vertex_handle, Filtration_value> const&g) { return std::get<1>(g.elist[e]); } +template <class Vertex_handle, class Filtration_value> +Filtration_value get(vertex_filtration_t, Graph<Vertex_handle, Filtration_value> const&, Vertex_handle) { return 0; } +template <class Vertex_handle, class Filtration_value> +Filtration_value get(edge_filtration_t, Graph<Vertex_handle, Filtration_value> const&g, std::size_t e) { return std::get<2>(g.elist[e]); } +} // namespace rips_complex +} // namespace Gudhi +namespace boost { +template <class Vertex_handle, class Filtration_value> +struct graph_traits<Gudhi::rips_complex::Graph<Vertex_handle, Filtration_value>> { + typedef Gudhi::rips_complex::Graph<Vertex_handle, Filtration_value> G; + struct traversal_category : vertex_list_graph_tag, edge_list_graph_tag {}; + typedef Vertex_handle vertex_descriptor; + typedef typename G::vertex_iterator vertex_iterator; + typedef std::size_t vertices_size_type; + typedef std::size_t edge_descriptor; + typedef typename G::edge_iterator edge_iterator; + typedef std::size_t edges_size_type; + typedef directed_tag directed_category; + typedef disallow_parallel_edge_tag edge_parallel_category; +}; +// Etc, since we don't expose this graph to the world, we know we are not going to query property_traits for instance. +} + +namespace Gudhi { namespace rips_complex { @@ -45,12 +104,8 @@ template <typename Filtration_value> class Sparse_rips_complex { private: // TODO(MG): use a different graph where we know we can safely insert in parallel. - typedef typename boost::adjacency_list<boost::vecS, boost::vecS, boost::directedS, - boost::property<vertex_filtration_t, Filtration_value>, - boost::property<edge_filtration_t, Filtration_value>> - Graph; - typedef int Vertex_handle; + typedef rips_complex::Graph<Vertex_handle, Filtration_value> Graph; public: /** \brief Sparse_rips_complex constructor from a list of points. @@ -63,10 +118,11 @@ class Sparse_rips_complex { * */ template <typename RandomAccessPointRange, typename Distance> - Sparse_rips_complex(const RandomAccessPointRange& points, Distance distance, double epsilon, Filtration_value mini=-std::numeric_limits<Filtration_value>::infinity(), Filtration_value maxi=std::numeric_limits<Filtration_value>::infinity()) + Sparse_rips_complex(const RandomAccessPointRange& points, Distance distance, double const epsilon, Filtration_value const mini=-std::numeric_limits<Filtration_value>::infinity(), Filtration_value const maxi=std::numeric_limits<Filtration_value>::infinity()) : epsilon_(epsilon) { GUDHI_CHECK(epsilon > 0, "epsilon must be positive"); auto dist_fun = [&](Vertex_handle i, Vertex_handle j) { return distance(points[i], points[j]); }; + // TODO: stop choose_n_farthest_points once it reaches mini or 0? subsampling::choose_n_farthest_points(dist_fun, boost::irange<Vertex_handle>(0, boost::size(points)), -1, -1, std::back_inserter(sorted_points), std::back_inserter(params)); compute_sparse_graph(dist_fun, epsilon, mini, maxi); @@ -83,7 +139,7 @@ class Sparse_rips_complex { * @param[in] maxi Maximal filtration value. Ignore anything above this scale. */ template <typename DistanceMatrix> - Sparse_rips_complex(const DistanceMatrix& distance_matrix, double epsilon, Filtration_value mini=-std::numeric_limits<Filtration_value>::infinity(), Filtration_value maxi=std::numeric_limits<Filtration_value>::infinity()) + Sparse_rips_complex(const DistanceMatrix& distance_matrix, double const epsilon, Filtration_value const mini=-std::numeric_limits<Filtration_value>::infinity(), Filtration_value const maxi=std::numeric_limits<Filtration_value>::infinity()) : Sparse_rips_complex(boost::irange<Vertex_handle>(0, boost::size(distance_matrix)), [&](Vertex_handle i, Vertex_handle j) { return (i==j) ? 0 : (i<j) ? distance_matrix[j][i] : distance_matrix[i][j]; }, epsilon, mini, maxi) {} @@ -99,7 +155,7 @@ class Sparse_rips_complex { * */ template <typename SimplicialComplexForRips> - void create_complex(SimplicialComplexForRips& complex, int dim_max) { + void create_complex(SimplicialComplexForRips& complex, int const dim_max) { GUDHI_CHECK(complex.num_vertices() == 0, std::invalid_argument("Sparse_rips_complex::create_complex - simplicial complex is not empty")); @@ -108,17 +164,17 @@ class Sparse_rips_complex { complex.expansion(dim_max); return; } - const int n = boost::size(params); - std::vector<Filtration_value> lambda(n); + const std::size_t n = num_vertices(graph_); + std::vector<Filtration_value> lambda(max_v + 1); // lambda[original_order]=params[sorted_order] - for(int i=0;i<n;++i) + for(std::size_t i=0;i<n;++i) lambda[sorted_points[i]] = params[i]; double cst = epsilon_ * (1 - epsilon_) / 2; auto block = [cst,&complex,&lambda](typename SimplicialComplexForRips::Simplex_handle sh){ auto filt = complex.filtration(sh); - auto mini = filt * cst; + auto min_f = filt * cst; for(auto v : complex.simplex_vertex_range(sh)){ - if(lambda[v] < mini) + if(lambda[v] < min_f) return true; // v died before this simplex could be born } return false; @@ -129,32 +185,34 @@ class Sparse_rips_complex { private: // PointRange must be random access. template <typename Distance> - void compute_sparse_graph(Distance& dist, double epsilon, Filtration_value mini, Filtration_value maxi) { + void compute_sparse_graph(Distance& dist, double const epsilon, Filtration_value const mini, Filtration_value const maxi) { const auto& points = sorted_points; // convenience alias - const int n = boost::size(points); + std::size_t n = boost::size(points); double cst = epsilon * (1 - epsilon) / 2; - graph_.~Graph(); - new (&graph_) Graph(n); - // for(auto v : vertices(g)) // doesn't work :-( - typename boost::graph_traits<Graph>::vertex_iterator v_i, v_e; - for (std::tie(v_i, v_e) = vertices(graph_); v_i != v_e; ++v_i) { - auto v = *v_i; - // This whole loop might not be necessary, leave it until someone investigates if it is safe to remove. - put(vertex_filtration_t(), graph_, v, 0); + max_v = -1; // Useful for the size of the map lambda. + for (std::size_t i = 0; i < n; ++i) { + if ((params[i] < mini || params[i] <= 0) && i != 0) break; + // The parameter of the first point is not very meaningful, it is supposed to be infinite, + // but if the type does not support it... + // It would be better to do this reduction of the number of points earlier, around choose_n_farthest_points. + add_vertex(points[i], graph_); + max_v = std::max(max_v, points[i]); } + n = num_vertices(graph_); // TODO(MG): // - make it parallel // - only test near-enough neighbors - for (int i = 0; i < n; ++i) { + for (std::size_t i = 0; i < n; ++i) { auto&& pi = points[i]; auto li = params[i]; - if (li < mini) break; - for (int j = i + 1; j < n; ++j) { + // If we inserted all the points, points with multiplicity would get connected to their first representative, + // no need to handle the redundant ones in the outer loop. + // if (li <= 0 && i != 0) break; + for (std::size_t j = i + 1; j < n; ++j) { auto&& pj = points[j]; auto d = dist(pi, pj); auto lj = params[j]; - if (lj < mini) break; GUDHI_CHECK(lj <= li, "Bad furthest point sorting"); Filtration_value alpha; @@ -178,6 +236,7 @@ class Sparse_rips_complex { Graph graph_; double epsilon_; + Vertex_handle max_v; // Because of the arbitrary split between constructor and create_complex // sorted_points[sorted_order]=original_order std::vector<Vertex_handle> sorted_points; diff --git a/src/Simplex_tree/include/gudhi/Simplex_tree.h b/src/Simplex_tree/include/gudhi/Simplex_tree.h index 85d6c3b0..85790baf 100644 --- a/src/Simplex_tree/include/gudhi/Simplex_tree.h +++ b/src/Simplex_tree/include/gudhi/Simplex_tree.h @@ -1060,8 +1060,8 @@ class Simplex_tree { * * Inserts all vertices and edges given by a OneSkeletonGraph. * OneSkeletonGraph must be a model of - * <a href="http://www.boost.org/doc/libs/1_65_1/libs/graph/doc/EdgeListGraph.html">boost::EdgeListGraph</a> - * and <a href="http://www.boost.org/doc/libs/1_65_1/libs/graph/doc/PropertyGraph.html">boost::PropertyGraph</a>. + * <a href="http://www.boost.org/doc/libs/1_76_0/libs/graph/doc/VertexAndEdgeListGraph.html">boost::VertexAndEdgeListGraph</a> + * and <a href="http://www.boost.org/doc/libs/1_76_0/libs/graph/doc/PropertyGraph.html">boost::PropertyGraph</a>. * * The vertex filtration value is accessible through the property tag * vertex_filtration_t. @@ -1081,7 +1081,10 @@ class Simplex_tree { // the simplex tree must be empty assert(num_simplices() == 0); - if (boost::num_vertices(skel_graph) == 0) { + // is there a better way to let the compiler know that we don't mean Simplex_tree::num_vertices? + using boost::num_vertices; + + if (num_vertices(skel_graph) == 0) { return; } if (num_edges(skel_graph) == 0) { @@ -1090,18 +1093,18 @@ class Simplex_tree { dimension_ = 1; } - root_.members_.reserve(boost::num_vertices(skel_graph)); + root_.members_.reserve(num_vertices(skel_graph)); typename boost::graph_traits<OneSkeletonGraph>::vertex_iterator v_it, v_it_end; - for (std::tie(v_it, v_it_end) = boost::vertices(skel_graph); v_it != v_it_end; + for (std::tie(v_it, v_it_end) = vertices(skel_graph); v_it != v_it_end; ++v_it) { root_.members_.emplace_hint( root_.members_.end(), *v_it, - Node(&root_, boost::get(vertex_filtration_t(), skel_graph, *v_it))); + Node(&root_, get(vertex_filtration_t(), skel_graph, *v_it))); } std::pair<typename boost::graph_traits<OneSkeletonGraph>::edge_iterator, - typename boost::graph_traits<OneSkeletonGraph>::edge_iterator> boost_edges = boost::edges(skel_graph); + typename boost::graph_traits<OneSkeletonGraph>::edge_iterator> boost_edges = edges(skel_graph); // boost_edges.first is the equivalent to boost_edges.begin() // boost_edges.second is the equivalent to boost_edges.end() for (; boost_edges.first != boost_edges.second; boost_edges.first++) { @@ -1123,7 +1126,7 @@ class Simplex_tree { } sh->second.children()->members().emplace(v, - Node(sh->second.children(), boost::get(edge_filtration_t(), skel_graph, edge))); + Node(sh->second.children(), get(edge_filtration_t(), skel_graph, edge))); } } diff --git a/src/Subsampling/include/gudhi/choose_n_farthest_points.h b/src/Subsampling/include/gudhi/choose_n_farthest_points.h index e6347d96..44c02df1 100644 --- a/src/Subsampling/include/gudhi/choose_n_farthest_points.h +++ b/src/Subsampling/include/gudhi/choose_n_farthest_points.h @@ -42,7 +42,7 @@ enum : std::size_t { * The iteration starts with the landmark `starting point` or, if `starting point==random_starting_point`, * with a random landmark. * It chooses `final_size` points from a random access range - * `input_pts` (or the number of distinct points if `final_size` is larger) + * `input_pts` (or the number of input points if `final_size` is larger) * and outputs them in the output iterator `output_it`. It also * outputs the distance from each of those points to the set of previous * points in `dist_it`. @@ -88,34 +88,57 @@ void choose_n_farthest_points(Distance dist, starting_point = dis(gen); } - std::size_t current_number_of_landmarks = 0; // counter for landmarks - static_assert(std::numeric_limits<double>::has_infinity, "the number type needs to support infinity()"); // FIXME: don't hard-code the type as double. For Epeck_d, we also want to handle types that do not have an infinity. - const double infty = std::numeric_limits<double>::infinity(); // infinity (see next entry) - std::vector< double > dist_to_L(nb_points, infty); // vector of current distances to L from input_pts + static_assert(std::numeric_limits<double>::has_infinity, "the number type needs to support infinity()"); + + *output_it++ = input_pts[starting_point]; + *dist_it++ = std::numeric_limits<double>::infinity(); + if (final_size == 1) return; + + std::vector<std::size_t> points(nb_points); // map from remaining points to indexes in input_pts + std::vector< double > dist_to_L(nb_points); // vector of current distances to L from points + for(std::size_t i = 0; i < nb_points; ++i) { + points[i] = i; + dist_to_L[i] = dist(input_pts[i], input_pts[starting_point]); + } + // The indirection through points makes the program a bit slower. Some alternatives: + // - the original code never removed points and counted on them not + // reappearing because of a self-distance of 0. This causes unnecessary + // computations when final_size is large. It also causes trouble if there are + // input points at distance 0 from each other. + // - copy input_pts and update the local copy when removing points. std::size_t curr_max_w = starting_point; - for (current_number_of_landmarks = 0; current_number_of_landmarks != final_size; current_number_of_landmarks++) { - // curr_max_w at this point is the next landmark - *output_it++ = input_pts[curr_max_w]; - *dist_it++ = dist_to_L[curr_max_w]; + for (std::size_t current_number_of_landmarks = 1; current_number_of_landmarks != final_size; current_number_of_landmarks++) { + std::size_t latest_landmark = points[curr_max_w]; + // To remove the latest landmark at index curr_max_w, replace it + // with the last point and reduce the length of the vector. + std::size_t last = points.size() - 1; + if (curr_max_w != last) { + points[curr_max_w] = points[last]; + dist_to_L[curr_max_w] = dist_to_L[last]; + } + points.pop_back(); + + // Update distances to L. std::size_t i = 0; - for (auto&& p : input_pts) { - double curr_dist = dist(p, input_pts[curr_max_w]); + for (auto p : points) { + double curr_dist = dist(input_pts[p], input_pts[latest_landmark]); if (curr_dist < dist_to_L[i]) dist_to_L[i] = curr_dist; ++i; } - // choose the next curr_max_w - double curr_max_dist = 0; // used for defining the furhest point from L - for (i = 0; i < dist_to_L.size(); i++) + // choose the next landmark + curr_max_w = 0; + double curr_max_dist = dist_to_L[curr_max_w]; // used for defining the furthest point from L + for (i = 1; i < points.size(); i++) if (dist_to_L[i] > curr_max_dist) { curr_max_dist = dist_to_L[i]; curr_max_w = i; } - // If all that remains are duplicates of points already taken, stop. - if (curr_max_dist == 0) break; + *output_it++ = input_pts[points[curr_max_w]]; + *dist_it++ = dist_to_L[curr_max_w]; } } diff --git a/src/Subsampling/test/test_choose_n_farthest_points.cpp b/src/Subsampling/test/test_choose_n_farthest_points.cpp index 94793295..c384c61b 100644 --- a/src/Subsampling/test/test_choose_n_farthest_points.cpp +++ b/src/Subsampling/test/test_choose_n_farthest_points.cpp @@ -102,11 +102,12 @@ BOOST_AUTO_TEST_CASE_TEMPLATE(test_choose_farthest_point_limits, Kernel, list_of BOOST_CHECK(distances[1] == 1); landmarks.clear(); distances.clear(); - // Ignore duplicated points + // Accept duplicated points points.emplace_back(point.begin(), point.end()); Gudhi::subsampling::choose_n_farthest_points(d, points, -1, -1, std::back_inserter(landmarks), std::back_inserter(distances)); - BOOST_CHECK(landmarks.size() == 2 && distances.size() == 2); + BOOST_CHECK(landmarks.size() == 3 && distances.size() == 3); BOOST_CHECK(distances[0] == std::numeric_limits<FT>::infinity()); BOOST_CHECK(distances[1] == 1); + BOOST_CHECK(distances[2] == 0); landmarks.clear(); distances.clear(); } diff --git a/src/python/test/test_rips_complex.py b/src/python/test/test_rips_complex.py index b86e7498..a2f43a1b 100755 --- a/src/python/test/test_rips_complex.py +++ b/src/python/test/test_rips_complex.py @@ -133,3 +133,24 @@ def test_filtered_rips_from_distance_matrix(): assert simplex_tree.num_simplices() == 8 assert simplex_tree.num_vertices() == 4 + + +def test_sparse_with_multiplicity(): + points = [ + [3, 4], + [0.1, 2], + [0.1, 2], + [0.1, 2], + [0.1, 2], + [0.1, 2], + [0.1, 2], + [0.1, 2], + [0.1, 2], + [0.1, 2], + [0.1, 2], + [3, 4.1], + ] + rips = RipsComplex(points=points, sparse=0.01) + simplex_tree = rips.create_simplex_tree(max_dimension=2) + assert simplex_tree.num_simplices() == 7 + diag = simplex_tree.persistence() |