diff options
Diffstat (limited to 'src')
25 files changed, 1558 insertions, 97 deletions
diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index c7744c49..3a831814 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -108,6 +108,8 @@ else() add_subdirectory(example/Bitmap_cubical_complex) add_subdirectory(example/Witness_complex) add_subdirectory(example/Alpha_complex) + add_subdirectory(example/Spatial_searching) + add_subdirectory(example/Subsampling) # data points generator add_subdirectory(data/points/generator) diff --git a/src/Doxyfile b/src/Doxyfile index dd9a33fb..51950b3d 100644 --- a/src/Doxyfile +++ b/src/Doxyfile @@ -845,7 +845,9 @@ IMAGE_PATH = doc/Skeleton_blocker/ \ doc/Simplex_tree/ \ doc/Persistent_cohomology/ \ doc/Witness_complex/ \ - doc/Bitmap_cubical_complex/ + doc/Bitmap_cubical_complex/ \ + doc/Subsampling/ \ + doc/Spatial_searching/ # The INPUT_FILTER tag can be used to specify a program that doxygen should # invoke to filter for each input file. Doxygen will invoke the filter program diff --git a/src/Spatial_searching/doc/Intro_spatial_searching.h b/src/Spatial_searching/doc/Intro_spatial_searching.h new file mode 100644 index 00000000..2406c931 --- /dev/null +++ b/src/Spatial_searching/doc/Intro_spatial_searching.h @@ -0,0 +1,58 @@ +/* This file is part of the Gudhi Library. The Gudhi library + * (Geometric Understanding in Higher Dimensions) is a generic C++ + * library for computational topology. + * + * Author(s): Clement Jamin + * + * Copyright (C) 2016 INRIA + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see <http://www.gnu.org/licenses/>. + */ + +#ifndef DOC_SPATIAL_SEARCHING_INTRO_SPATIAL_SEARCHING_H_ +#define DOC_SPATIAL_SEARCHING_INTRO_SPATIAL_SEARCHING_H_ + +// needs namespaces for Doxygen to link on classes +namespace Gudhi { +namespace spatial_searching { + +/** \defgroup spatial_searching Spatial_searching + * + * \author Clément Jamin + * + * @{ + * + * \section introduction Introduction + * + * This Gudhi component is a wrapper around + * <a target="_blank" href="http://doc.cgal.org/latest/Spatial_searching/index.html">CGAL dD spatial searching algorithms</a>. + * It provides a simplified API to perform (approximate) neighbor searches. Contrary to CGAL default behavior, the tree + * does not store the points themselves, but stores indices. + * + * \section spatial_searching_examples Example + * + * This example generates 500 random points, then performs queries for nearest and farthest points using different methods. + * + * \include Spatial_searching/example_spatial_searching.cpp + * + * \copyright GNU General Public License v3. + * \verbatim Contact: gudhi-users@lists.gforge.inria.fr \endverbatim + */ +/** @} */ // end defgroup spatial_searching + +} // namespace spatial_searching + +} // namespace Gudhi + +#endif // DOC_SPATIAL_SEARCHING_INTRO_SPATIAL_SEARCHING_H_ diff --git a/src/Spatial_searching/example/CMakeLists.txt b/src/Spatial_searching/example/CMakeLists.txt new file mode 100644 index 00000000..3c3970d8 --- /dev/null +++ b/src/Spatial_searching/example/CMakeLists.txt @@ -0,0 +1,19 @@ +cmake_minimum_required(VERSION 2.6) +project(Spatial_searching_examples) + +if(CGAL_FOUND) + if (NOT CGAL_VERSION VERSION_LESS 4.8.1) + message(STATUS "CGAL version: ${CGAL_VERSION}.") + + if (EIGEN3_FOUND) + add_executable( Spatial_searching_example_spatial_searching example_spatial_searching.cpp ) + target_link_libraries(Spatial_searching_example_spatial_searching ${CGAL_LIBRARY}) + else() + message(WARNING "Eigen3 not found. Version 3.1.0 is required for the Tangential_complex examples.") + endif() + else() + message(WARNING "CGAL version: ${CGAL_VERSION} is too old to compile Spatial_searching examples. Version 4.8.1 is required.") + endif () +else() + message(WARNING "CGAL not found. It is required for the Spatial_searching examples.") +endif() diff --git a/src/Spatial_searching/example/example_spatial_searching.cpp b/src/Spatial_searching/example/example_spatial_searching.cpp new file mode 100644 index 00000000..7b48d055 --- /dev/null +++ b/src/Spatial_searching/example/example_spatial_searching.cpp @@ -0,0 +1,54 @@ +#include <gudhi/Kd_tree_search.h> + +#include <CGAL/Epick_d.h> +#include <CGAL/Random.h> + +#include <vector> + +namespace gss = Gudhi::spatial_searching; + +int main (void) +{ + typedef CGAL::Epick_d<CGAL::Dimension_tag<4> > K; + typedef typename K::FT FT; + typedef typename K::Point_d Point; + typedef std::vector<Point> Points; + + typedef gss::Kd_tree_search<K, Points> Points_ds; + + CGAL::Random rd; + + Points points; + for (int i = 0; i < 500; ++i) + points.push_back(Point(rd.get_double(-1.,1), rd.get_double(-1.,1), rd.get_double(-1.,1), rd.get_double(-1.,1))); + + Points_ds points_ds(points); + + // 10-nearest neighbor query + std::cout << "10 nearest neighbors from points[20]:\n"; + auto kns_range = points_ds.query_k_nearest_neighbors(points[20], 10, true); + for (auto const& nghb : kns_range) + std::cout << nghb.first << " (sq. dist. = " << nghb.second << ")\n"; + + // Incremental nearest neighbor query + std::cout << "Incremental nearest neighbors:\n"; + auto ins_range = points_ds.query_incremental_nearest_neighbors(points[45]); + // Get all the neighbors that are closer than 0.5 + for (auto ins_iterator = ins_range.begin(); ins_iterator->second < 0.5*0.5 ; ++ins_iterator) + std::cout << ins_iterator->first << " (sq. dist. = " << ins_iterator->second << ")\n"; + + // 10-farthest neighbor query + std::cout << "10 farthest neighbors from points[20]:\n"; + auto kfs_range = points_ds.query_k_farthest_neighbors(points[20], 10, true); + for (auto const& nghb : kfs_range) + std::cout << nghb.first << " (sq. dist. = " << nghb.second << ")\n"; + + // Incremental farthest neighbor query + std::cout << "Incremental farthest neighbors:\n"; + auto ifs_range = points_ds.query_incremental_farthest_neighbors(points[45]); + // Get all the neighbors that are farthest than 2.3 + for (auto ifs_iterator = ifs_range.begin(); ifs_iterator->second > 2.3*2.3 ; ++ifs_iterator) + std::cout << ifs_iterator->first << " (sq. dist. = " << ifs_iterator->second << ")\n"; + + return 0; +} diff --git a/src/Spatial_searching/include/gudhi/Kd_tree_search.h b/src/Spatial_searching/include/gudhi/Kd_tree_search.h new file mode 100644 index 00000000..ff630e9d --- /dev/null +++ b/src/Spatial_searching/include/gudhi/Kd_tree_search.h @@ -0,0 +1,276 @@ +/* This file is part of the Gudhi Library. The Gudhi library + * (Geometric Understanding in Higher Dimensions) is a generic C++ + * library for computational topology. + * + * Author(s): Clement Jamin + * + * Copyright (C) 2016 INRIA + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see <http://www.gnu.org/licenses/>. + */ + +#ifndef GUDHI_SPATIAL_TREE_DS_H_ +#define GUDHI_SPATIAL_TREE_DS_H_ + +#include <CGAL/Orthogonal_k_neighbor_search.h> +#include <CGAL/Orthogonal_incremental_neighbor_search.h> +#include <CGAL/Search_traits.h> +#include <CGAL/Search_traits_adapter.h> +#include <CGAL/property_map.h> + +#include <boost/property_map/property_map.hpp> +#include <boost/iterator/counting_iterator.hpp> + +#include <cstddef> +#include <vector> + +namespace Gudhi { +namespace spatial_searching { + + + /** + * \class Kd_tree_search Kd_tree_search.h gudhi/Kd_tree_search.h + * \brief Spatial tree data structure to perform (approximate) nearest neighbor search. + * + * \ingroup spatial_searching + * + * \details + * The class Kd_tree_search is a tree-based data structure, based on + * <a target="_blank" href="http://doc.cgal.org/latest/Spatial_searching/index.html">CGAL dD spatial searching data structures</a>. + * It provides a simplified API to perform (approximate) nearest neighbor searches. Contrary to CGAL default behavior, the tree + * does not store the points themselves, but stores indices. + * + * There are two types of queries: the <i>k-nearest neighbor query</i>, where <i>k</i> is fixed and the <i>k</i> nearest points are + * computed right away, + * and the <i>incremental nearest neighbor query</i>, where no number of neighbors is provided during the call, as the + * neighbors will be computed incrementally when the iterator on the range is incremented. + * + * \tparam K must be a model of the <a target="_blank" + * href="http://doc.cgal.org/latest/Spatial_searching/classSearchTraits.html">SearchTraits</a> + * concept, such as the <a target="_blank" + * href="http://doc.cgal.org/latest/Kernel_d/classCGAL_1_1Epick__d.html">CGAL::Epick_d</a> class, which + * can be static if you know the ambiant dimension at compile-time, or dynamic if you don't. + * \tparam Point_range is the type of the range that provides the points. + * It must be a range whose iterator type is a `RandomAccessIterator`. + */ +template <typename K, typename Point_range> +class Kd_tree_search +{ + typedef boost::iterator_property_map< + typename Point_range::const_iterator, + CGAL::Identity_property_map<std::ptrdiff_t> > Point_property_map; + +public: + /// The kernel. + typedef K Kernel; + /// Number type used for distances. + typedef typename Kernel::FT FT; + /// The point type. + typedef typename Point_range::value_type Point; + + typedef CGAL::Search_traits< + FT, Point, + typename Kernel::Cartesian_const_iterator_d, + typename Kernel::Construct_cartesian_const_iterator_d> Traits_base; + + typedef CGAL::Search_traits_adapter< + std::ptrdiff_t, + Point_property_map, + Traits_base> STraits; + + typedef CGAL::Orthogonal_k_neighbor_search<STraits> K_neighbor_search; + typedef typename K_neighbor_search::Tree Tree; + typedef typename K_neighbor_search::Distance Distance; + /// \brief The range returned by a k-nearest neighbor search. + /// Its value type is `std::pair<std::size_t, FT>` where `first` is the index + /// of a point P and `second` is the squared distance between P and the query point. + typedef K_neighbor_search KNS_range; + + typedef CGAL::Orthogonal_incremental_neighbor_search< + STraits, Distance, CGAL::Sliding_midpoint<STraits>, Tree> + Incremental_neighbor_search; + /// \brief The range returned by an incremental nearest neighbor search. + /// Its value type is `std::pair<std::size_t, FT>` where `first` is the index + /// of a point P and `second` is the squared distance between P and the query point. + typedef Incremental_neighbor_search INS_range; + + /// \brief Constructor + /// @param[in] points Const reference to the point range. This range + /// is not copied, so it should not be destroyed or modified afterwards. + Kd_tree_search(Point_range const& points) + : m_points(points), + m_tree(boost::counting_iterator<std::ptrdiff_t>(0), + boost::counting_iterator<std::ptrdiff_t>(points.size()), + typename Tree::Splitter(), + STraits(std::begin(points)) ) + { + // Build the tree now (we don't want to wait for the first query) + m_tree.build(); + } + + /// \brief Constructor + /// @param[in] points Const reference to the point range. This range + /// is not copied, so it should not be destroyed or modified afterwards. + /// @param[in] only_these_points Specifies the indices of the points that + /// should be actually inserted into the tree. The other points are ignored. + template <typename Point_indices_range> + Kd_tree_search( + Point_range const& points, + Point_indices_range const& only_these_points) + : m_points(points), + m_tree( + only_these_points.begin(), only_these_points.end(), + typename Tree::Splitter(), + STraits(std::begin(points))) + { + // Build the tree now (we don't want to wait for the first query) + m_tree.build(); + } + + /// \brief Constructor + /// @param[in] points Const reference to the point range. This range + /// is not copied, so it should not be destroyed or modified afterwards. + /// @param[in] begin_idx, past_the_end_idx Define the subset of the points that + /// should be actually inserted into the tree. The other points are ignored. + Kd_tree_search( + Point_range const& points, + std::size_t begin_idx, std::size_t past_the_end_idx) + : m_points(points), + m_tree( + boost::counting_iterator<std::ptrdiff_t>(begin_idx), + boost::counting_iterator<std::ptrdiff_t>(past_the_end_idx), + typename Tree::Splitter(), + STraits(std::begin(points)) ) + { + // Build the tree now (we don't want to wait for the first query) + m_tree.build(); + } + + // Be careful, this function invalidates the tree, + // which will be recomputed at the next query + void insert(std::ptrdiff_t point_idx) + { + m_tree.insert(point_idx); + } + + /// \brief Search for the k-nearest neighbors from a query point. + /// @param[in] p The query point. + /// @param[in] k Number of nearest points to search. + /// @param[in] sorted Indicates if the computed sequence of k-nearest neighbors needs to be sorted. + /// @param[in] eps Approximation factor. + /// @return A range containing the k-nearest neighbors. + KNS_range query_k_nearest_neighbors(const + Point &p, + unsigned int k, + bool sorted = true, + FT eps = FT(0)) const + { + // Initialize the search structure, and search all N points + // Note that we need to pass the Distance explicitly since it needs to + // know the property map + K_neighbor_search search( + m_tree, + p, + k, + eps, + true, + CGAL::Distance_adapter<std::ptrdiff_t,Point_property_map,CGAL::Euclidean_distance<Traits_base> >( + std::begin(m_points)), + sorted); + + return search; + } + + /// \brief Search incrementally for the nearest neighbors from a query point. + /// @param[in] p The query point. + /// @param[in] eps Approximation factor. + /// @return A range containing the neighbors sorted by their distance to p. + /// All the neighbors are not computed by this function, but they will be + /// computed incrementally when the iterator on the range is incremented. + INS_range query_incremental_nearest_neighbors(const Point &p, FT eps = FT(0)) const + { + // Initialize the search structure, and search all N points + // Note that we need to pass the Distance explicitly since it needs to + // know the property map + Incremental_neighbor_search search( + m_tree, + p, + eps, + true, + CGAL::Distance_adapter<std::ptrdiff_t, Point_property_map, CGAL::Euclidean_distance<Traits_base> >( + std::begin(m_points)) ); + + return search; + } + + /// \brief Search for the k-farthest points from a query point. + /// @param[in] p The query point. + /// @param[in] k Number of farthest points to search. + /// @param[in] sorted Indicates if the computed sequence of k-farthest neighbors needs to be sorted. + /// @param[in] eps Approximation factor. + /// @return A range containing the k-farthest neighbors. + KNS_range query_k_farthest_neighbors(const + Point &p, + unsigned int k, + bool sorted = true, + FT eps = FT(0)) const + { + // Initialize the search structure, and search all N points + // Note that we need to pass the Distance explicitly since it needs to + // know the property map + K_neighbor_search search( + m_tree, + p, + k, + eps, + false, + CGAL::Distance_adapter<std::ptrdiff_t,Point_property_map,CGAL::Euclidean_distance<Traits_base> >( + std::begin(m_points)), + sorted); + + return search; + } + + /// \brief Search incrementally for the farthest neighbors from a query point. + /// @param[in] p The query point. + /// @param[in] eps Approximation factor. + /// @return A range containing the neighbors sorted by their distance to p. + /// All the neighbors are not computed by this function, but they will be + /// computed incrementally when the iterator on the range is incremented. + INS_range query_incremental_farthest_neighbors(const Point &p, FT eps = FT(0)) const + { + // Initialize the search structure, and search all N points + // Note that we need to pass the Distance explicitly since it needs to + // know the property map + Incremental_neighbor_search search( + m_tree, + p, + eps, + false, + CGAL::Distance_adapter<std::ptrdiff_t, Point_property_map, CGAL::Euclidean_distance<Traits_base> >( + std::begin(m_points)) ); + + return search; + } + + +private: + Point_range const& m_points; + Tree m_tree; +}; + +} // namespace spatial_searching +} // namespace Gudhi + +#endif // GUDHI_SPATIAL_TREE_DS_H_ diff --git a/src/Spatial_searching/test/CMakeLists.txt b/src/Spatial_searching/test/CMakeLists.txt new file mode 100644 index 00000000..0f2c5ae4 --- /dev/null +++ b/src/Spatial_searching/test/CMakeLists.txt @@ -0,0 +1,30 @@ +cmake_minimum_required(VERSION 2.6) +project(Spatial_searching_tests) + +if (GCOVR_PATH) + # for gcovr to make coverage reports - Corbera Jenkins plugin + set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fprofile-arcs -ftest-coverage") +endif() +if (GPROF_PATH) + # for gprof to make coverage reports - Jenkins + set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -pg") +endif() + +if(CGAL_FOUND) + if (NOT CGAL_VERSION VERSION_LESS 4.8.1) + message(STATUS "CGAL version: ${CGAL_VERSION}.") + + if (EIGEN3_FOUND) + + add_executable( Spatial_searching_test_Kd_tree_search test_Kd_tree_search.cpp ) + target_link_libraries(Spatial_searching_test_Kd_tree_search + ${CGAL_LIBRARY} ${Boost_SYSTEM_LIBRARY} ${Boost_UNIT_TEST_FRAMEWORK_LIBRARY}) + else() + message(WARNING "Eigen3 not found. Version 3.1.0 is required for the Spatial_searching tests.") + endif() + else() + message(WARNING "CGAL version: ${CGAL_VERSION} is too old to compile Subsampling tests. Version 4.8.1 is required.") + endif () +else() + message(WARNING "CGAL not found. It is required for the Spatial_searching tests.") +endif() diff --git a/src/Spatial_searching/test/test_Kd_tree_search.cpp b/src/Spatial_searching/test/test_Kd_tree_search.cpp new file mode 100644 index 00000000..6f99b288 --- /dev/null +++ b/src/Spatial_searching/test/test_Kd_tree_search.cpp @@ -0,0 +1,118 @@ +/* This file is part of the Gudhi Library. The Gudhi library + * (Geometric Understanding in Higher Dimensions) is a generic C++ + * library for computational topology. + * + * Author(s): Clement Jamin + * + * Copyright (C) 2016 INRIA Sophia-Antipolis (France) + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see <http://www.gnu.org/licenses/>. + */ + +#define BOOST_TEST_DYN_LINK +#define BOOST_TEST_MODULE Spatial_searching - test Kd_tree_search +#include <boost/test/unit_test.hpp> + +#include <gudhi/Kd_tree_search.h> + +#include <CGAL/Epick_d.h> +#include <CGAL/Random.h> + +#include <array> +#include <vector> + +BOOST_AUTO_TEST_CASE(test_Kd_tree_search) +{ + typedef CGAL::Epick_d<CGAL::Dimension_tag<4> > K; + typedef K::FT FT; + typedef K::Point_d Point; + typedef std::vector<Point> Points; + + typedef Gudhi::spatial_searching::Kd_tree_search< + K, Points> Points_ds; + + CGAL::Random rd; + + Points points; + for (int i = 0 ; i < 500 ; ++i) + points.push_back(Point(rd.get_double(-1.,1), rd.get_double(-1.,1), rd.get_double(-1.,1), rd.get_double(-1.,1))); + + Points_ds points_ds(points); + + // Test query_k_nearest_neighbors + std::size_t closest_pt_index = + points_ds.query_k_nearest_neighbors(points[10], 1, false).begin()->first; + BOOST_CHECK(closest_pt_index == 10); + + auto kns_range = points_ds.query_k_nearest_neighbors(points[20], 10, true); + + std::vector<std::size_t> knn_result; + FT last_dist = -1.; + for (auto const& nghb : kns_range) + { + BOOST_CHECK(nghb.second > last_dist); + knn_result.push_back(nghb.second); + last_dist = nghb.second; + } + + // Test query_incremental_nearest_neighbors + closest_pt_index = + points_ds.query_incremental_nearest_neighbors(points[10]).begin()->first; + BOOST_CHECK(closest_pt_index == 10); + + auto ins_range = points_ds.query_incremental_nearest_neighbors(points[20]); + + std::vector<std::size_t> inn_result; + last_dist = -1.; + auto ins_it = ins_range.begin(); + for (int i = 0 ; i < 10 ; ++ins_it, ++i) + { + auto const& nghb = *ins_it; + BOOST_CHECK(nghb.second > last_dist); + inn_result.push_back(nghb.second); + last_dist = nghb.second; + } + + // Same result for KNN and INN? + BOOST_CHECK(knn_result == inn_result); + + // Test query_k_farthest_neighbors + auto kfs_range = points_ds.query_k_farthest_neighbors(points[20], 10, true); + + std::vector<std::size_t> kfn_result; + last_dist = kfs_range.begin()->second; + for (auto const& nghb : kfs_range) + { + BOOST_CHECK(nghb.second <= last_dist); + kfn_result.push_back(nghb.second); + last_dist = nghb.second; + } + + // Test query_k_farthest_neighbors + auto ifs_range = points_ds.query_incremental_farthest_neighbors(points[20]); + + std::vector<std::size_t> ifn_result; + last_dist = ifs_range.begin()->second; + auto ifs_it = ifs_range.begin(); + for (int i = 0; i < 10; ++ifs_it, ++i) + { + auto const& nghb = *ifs_it; + BOOST_CHECK(nghb.second <= last_dist); + ifn_result.push_back(nghb.second); + last_dist = nghb.second; + } + + // Same result for KFN and IFN? + BOOST_CHECK(kfn_result == ifn_result); +} diff --git a/src/Subsampling/doc/Intro_subsampling.h b/src/Subsampling/doc/Intro_subsampling.h new file mode 100644 index 00000000..f8eb2fdd --- /dev/null +++ b/src/Subsampling/doc/Intro_subsampling.h @@ -0,0 +1,70 @@ +/* This file is part of the Gudhi Library. The Gudhi library + * (Geometric Understanding in Higher Dimensions) is a generic C++ + * library for computational topology. + * + * Author(s): Clement Jamin + * + * Copyright (C) 2016 INRIA + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see <http://www.gnu.org/licenses/>. + */ + +#ifndef DOC_SUBSAMPLING_INTRO_SUBSAMPLING_H_ +#define DOC_SUBSAMPLING_INTRO_SUBSAMPLING_H_ + +// needs namespace for Doxygen to link on classes +namespace Gudhi { +// needs namespace for Doxygen to link on classes +namespace subsampling { + +/** \defgroup subsampling Subsampling + * + * \author Clément Jamin, Siargey Kachanovich + * + * @{ + * + * \section introduction Introduction + * + * This Gudhi component offers methods to subsample a set of points. + * + * \section sparsifyexamples Example: sparsify_point_set + * + * This example outputs a subset of the input points so that the + * squared distance between any two points + * is greater than or equal to 0.4. + * + * \include Subsampling/example_sparsify_point_set.cpp + * + * \section farthestpointexamples Example: choose_n_farthest_points + * + * This example outputs a subset of 100 points obtained by González algorithm, + * starting with a random point. + * + * \include Subsampling/example_choose_n_farthest_points.cpp + * + * \section randompointexamples Example: pick_n_random_points + * + * This example outputs a subset of 100 points picked randomly. + * + * \include Subsampling/example_pick_n_random_points.cpp + * \copyright GNU General Public License v3. + * \verbatim Contact: gudhi-users@lists.gforge.inria.fr \endverbatim + */ +/** @} */ // end defgroup subsampling + +} // namespace subsampling + +} // namespace Gudhi + +#endif // DOC_SUBSAMPLING_INTRO_SUBSAMPLING_H_ diff --git a/src/Subsampling/example/CMakeLists.txt b/src/Subsampling/example/CMakeLists.txt new file mode 100644 index 00000000..81b39d6e --- /dev/null +++ b/src/Subsampling/example/CMakeLists.txt @@ -0,0 +1,22 @@ +cmake_minimum_required(VERSION 2.6) +project(Subsampling_examples) + +if(CGAL_FOUND) + if (NOT CGAL_VERSION VERSION_LESS 4.8.1) + message(STATUS "CGAL version: ${CGAL_VERSION}.") + + if (EIGEN3_FOUND) + + add_executable(Subsampling_example_pick_n_random_points example_pick_n_random_points.cpp) + add_executable(Subsampling_example_choose_n_farthest_points example_choose_n_farthest_points.cpp) + add_executable(Subsampling_example_sparsify_point_set example_sparsify_point_set.cpp) + target_link_libraries(Subsampling_example_sparsify_point_set ${CGAL_LIBRARY}) + else() + message(WARNING "Eigen3 not found. Version 3.1.0 is required for Subsampling feature.") + endif() + else() + message(WARNING "CGAL version: ${CGAL_VERSION} is too old to compile Subsampling examples. Version 4.8.1 is required.") + endif () +else() + message(WARNING "CGAL not found. It is required for the Subsampling examples.") +endif() diff --git a/src/Subsampling/example/example_choose_n_farthest_points.cpp b/src/Subsampling/example/example_choose_n_farthest_points.cpp new file mode 100644 index 00000000..9955d0ec --- /dev/null +++ b/src/Subsampling/example/example_choose_n_farthest_points.cpp @@ -0,0 +1,29 @@ +#include <gudhi/choose_n_farthest_points.h> + +#include <CGAL/Epick_d.h> +#include <CGAL/Random.h> + +#include <array> +#include <vector> +#include <iterator> + +int main (void) +{ + typedef CGAL::Epick_d<CGAL::Dimension_tag<4> > K; + typedef typename K::FT FT; + typedef typename K::Point_d Point_d; + + CGAL::Random rd; + + std::vector<Point_d> points; + for (int i = 0 ; i < 500 ; ++i) + points.push_back(Point_d(std::array<FT,4>({rd.get_double(-1.,1),rd.get_double(-1.,1),rd.get_double(-1.,1),rd.get_double(-1.,1)}))); + + K k; + std::vector<Point_d> results; + Gudhi::subsampling::choose_n_farthest_points(k, points, 100, std::back_inserter(results)); + std::cout << "Before sparsification: " << points.size() << " points.\n"; + std::cout << "After sparsification: " << results.size() << " points.\n"; + + return 0; +} diff --git a/src/Subsampling/example/example_pick_n_random_points.cpp b/src/Subsampling/example/example_pick_n_random_points.cpp new file mode 100644 index 00000000..d2f063ba --- /dev/null +++ b/src/Subsampling/example/example_pick_n_random_points.cpp @@ -0,0 +1,29 @@ +#include <gudhi/pick_n_random_points.h> + +#include <CGAL/Epick_d.h> +#include <CGAL/Random.h> + +#include <array> +#include <vector> +#include <iterator> + +int main (void) +{ + typedef CGAL::Epick_d<CGAL::Dimension_tag<4> > K; + typedef typename K::FT FT; + typedef typename K::Point_d Point_d; + + CGAL::Random rd; + + std::vector<Point_d> points; + for (int i = 0 ; i < 500 ; ++i) + points.push_back(Point_d(std::array<FT,4>({rd.get_double(-1.,1),rd.get_double(-1.,1),rd.get_double(-1.,1),rd.get_double(-1.,1)}))); + + K k; + std::vector<Point_d> results; + Gudhi::subsampling::pick_n_random_points(points, 100, std::back_inserter(results)); + std::cout << "Before sparsification: " << points.size() << " points.\n"; + std::cout << "After sparsification: " << results.size() << " points.\n"; + + return 0; +} diff --git a/src/Subsampling/example/example_sparsify_point_set.cpp b/src/Subsampling/example/example_sparsify_point_set.cpp new file mode 100644 index 00000000..cb5ccf0b --- /dev/null +++ b/src/Subsampling/example/example_sparsify_point_set.cpp @@ -0,0 +1,29 @@ +#include <gudhi/sparsify_point_set.h> + +#include <CGAL/Epick_d.h> +#include <CGAL/Random.h> + +#include <array> +#include <vector> +#include <iterator> + +int main (void) +{ + typedef CGAL::Epick_d<CGAL::Dimension_tag<4> > K; + typedef typename K::FT FT; + typedef typename K::Point_d Point_d; + + CGAL::Random rd; + + std::vector<Point_d> points; + for (int i = 0 ; i < 500 ; ++i) + points.push_back(Point_d(std::array<FT,4>({rd.get_double(-1.,1),rd.get_double(-1.,1),rd.get_double(-1.,1),rd.get_double(-1.,1)}))); + + K k; + std::vector<Point_d> results; + Gudhi::subsampling::sparsify_point_set(k, points, 0.4, std::back_inserter(results)); + std::cout << "Before sparsification: " << points.size() << " points.\n"; + std::cout << "After sparsification: " << results.size() << " points.\n"; + + return 0; +} diff --git a/src/Subsampling/include/gudhi/choose_n_farthest_points.h b/src/Subsampling/include/gudhi/choose_n_farthest_points.h new file mode 100644 index 00000000..8bfb38a4 --- /dev/null +++ b/src/Subsampling/include/gudhi/choose_n_farthest_points.h @@ -0,0 +1,125 @@ +/* This file is part of the Gudhi Library. The Gudhi library + * (Geometric Understanding in Higher Dimensions) is a generic C++ + * library for computational topology. + * + * Author(s): Siargey Kachanovich + * + * Copyright (C) 2016 INRIA + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see <http://www.gnu.org/licenses/>. + */ + +#ifndef CHOOSE_N_FARTHEST_POINTS_H_ +#define CHOOSE_N_FARTHEST_POINTS_H_ + +#include <boost/range.hpp> + +#include <gudhi/Kd_tree_search.h> + +#include <gudhi/Clock.h> + +#include <CGAL/Search_traits.h> +#include <CGAL/Search_traits_adapter.h> +#include <CGAL/Fuzzy_sphere.h> + +#include <iterator> +#include <algorithm> // for sort +#include <vector> +#include <random> + +namespace Gudhi { + +namespace subsampling { + /** + * \ingroup subsampling + * \brief Subsample by a greedy strategy of iteratively adding the farthest point from the + * current chosen point set to the subsampling. + * The iteration starts with the landmark `starting point`. + * \details It chooses `final_size` points from a random access range `input_pts` and + * outputs it in the output iterator `output_it`. + * + */ + + template < typename Kernel, + typename Point_container, + typename OutputIterator> + void choose_n_farthest_points( Kernel const &k, + Point_container const &input_pts, + unsigned final_size, + unsigned starting_point, + OutputIterator output_it) + { + typename Kernel::Squared_distance_d sqdist = k.squared_distance_d_object(); + + int nb_points = boost::size(input_pts); + assert(nb_points >= final_size); + + unsigned current_number_of_landmarks = 0; // counter for landmarks + double curr_max_dist = 0; // used for defining the furhest point from L + const double infty = std::numeric_limits<double>::infinity(); // infinity (see next entry) + std::vector< double > dist_to_L(nb_points, infty); // vector of current distances to L from input_pts + + int curr_max_w = starting_point; + + for (current_number_of_landmarks = 0; current_number_of_landmarks != final_size; current_number_of_landmarks++) { + // curr_max_w at this point is the next landmark + *output_it++ = input_pts[curr_max_w]; + // std::cout << curr_max_w << "\n"; + unsigned i = 0; + for (auto& p : input_pts) { + double curr_dist = sqdist(p, *(std::begin(input_pts) + curr_max_w)); + if (curr_dist < dist_to_L[i]) + dist_to_L[i] = curr_dist; + ++i; + } + // choose the next curr_max_w + curr_max_dist = 0; + for (i = 0; i < dist_to_L.size(); i++) + if (dist_to_L[i] > curr_max_dist) { + curr_max_dist = dist_to_L[i]; + curr_max_w = i; + } + } + } + + /** + * \ingroup subsampling + * \brief Subsample by a greedy strategy of iteratively adding the farthest point from the + * current chosen point set to the subsampling. + * The iteration starts with a random landmark. + * \details It chooses `final_size` points from a random access range `input_pts` and + * outputs it in the output iterator `output_it`. + * + */ + template < typename Kernel, + typename Point_container, + typename OutputIterator> + void choose_n_farthest_points( Kernel& k, + Point_container const &input_pts, + int final_size, + OutputIterator output_it) + { + // Choose randomly the first landmark + std::random_device rd; + std::mt19937 gen(rd()); + std::uniform_int_distribution<> dis(1, 6); + int starting_point = dis(gen); + choose_n_farthest_points(k, input_pts, final_size, starting_point, output_it); + } + +} // namespace subsampling + +} // namespace Gudhi + +#endif // CHOOSE_N_FARTHEST_POINTS_H_ diff --git a/src/Subsampling/include/gudhi/pick_n_random_points.h b/src/Subsampling/include/gudhi/pick_n_random_points.h new file mode 100644 index 00000000..4ca1fafc --- /dev/null +++ b/src/Subsampling/include/gudhi/pick_n_random_points.h @@ -0,0 +1,82 @@ +/* This file is part of the Gudhi Library. The Gudhi library + * (Geometric Understanding in Higher Dimensions) is a generic C++ + * library for computational topology. + * + * Author(s): Siargey Kachanovich + * + * Copyright (C) 2016 INRIA + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see <http://www.gnu.org/licenses/>. + */ + +#ifndef PICK_RANDOM_POINTS_H_ +#define PICK_RANDOM_POINTS_H_ + +#include <boost/range/size.hpp> + +#include <random> // random_device, mt19937 +#include <algorithm> // shuffle +#include <numeric> // iota +#include <iterator> +#include <gudhi/Clock.h> + + +namespace Gudhi { + +namespace subsampling { + + /** + * \ingroup subsampling + * \brief Subsample a point set by picking random vertices. + * + * \details It chooses `final_size` distinct points from a random access range `points` + * and outputs them to the output iterator `output_it`. + * Point_container::iterator should be ValueSwappable and RandomAccessIterator. + */ + + template <typename Point_container, + typename OutputIterator> + void pick_n_random_points(Point_container const &points, + unsigned final_size, + OutputIterator output_it) { +#ifdef GUDHI_SUBS_PROFILING + Gudhi::Clock t; +#endif + + unsigned nbP = boost::size(points); + assert(nbP >= final_size); + std::vector<int> landmarks(nbP); + std::iota(landmarks.begin(), landmarks.end(), 0); + + std::random_device rd; + std::mt19937 g(rd()); + + std::shuffle(landmarks.begin(), landmarks.end(), g); + landmarks.resize(final_size); + + for (int l: landmarks) + *output_it++ = points[l]; + +#ifdef GUDHI_SUBS_PROFILING + t.end(); + std::cerr << "Random landmark choice took " << t.num_seconds() + << " seconds." << std::endl; +#endif + } + +} // namesapce subsampling + +} // namespace Gudhi + +#endif // PICK_RANDOM_POINTS_H_ diff --git a/src/Subsampling/include/gudhi/sparsify_point_set.h b/src/Subsampling/include/gudhi/sparsify_point_set.h new file mode 100644 index 00000000..607555b4 --- /dev/null +++ b/src/Subsampling/include/gudhi/sparsify_point_set.h @@ -0,0 +1,119 @@ +/* This file is part of the Gudhi Library. The Gudhi library + * (Geometric Understanding in Higher Dimensions) is a generic C++ + * library for computational topology. + * + * Author(s): Clement Jamin + * + * Copyright (C) 2016 INRIA + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see <http://www.gnu.org/licenses/>. + */ + +#ifndef GUDHI_SPARSIFY_POINT_SET_H +#define GUDHI_SPARSIFY_POINT_SET_H + +#include <gudhi/Kd_tree_search.h> +#ifdef GUDHI_SUBSAMPLING_PROFILING +#include <gudhi/Clock.h> +#endif + +#include <cstddef> +#include <vector> + +namespace Gudhi { +namespace subsampling { + +/** + * \ingroup subsampling + * \brief Outputs a subset of the input points so that the + * squared distance between any two points + * is greater than or equal to `min_squared_dist`. + * + * \tparam Kernel must be a model of the <a target="_blank" + * href="http://doc.cgal.org/latest/Spatial_searching/classSearchTraits.html">SearchTraits</a> + * concept, such as the <a target="_blank" + * href="http://doc.cgal.org/latest/Kernel_d/classCGAL_1_1Epick__d.html">CGAL::Epick_d</a> class, which + * can be static if you know the ambiant dimension at compile-time, or dynamic if you don't. + * \tparam Point_range Range whose value type is Kernel::Point_d. It must provide random-access + * via `operator[]` and the points should be stored contiguously in memory. + * \tparam OutputIterator Output iterator whose value type is Kernel::Point_d. + * + * @param[in] k A kernel object. + * @param[in] input_pts Const reference to the input points. + * @param[in] min_squared_dist Minimum squared distance separating the output points. + * @param[out] output_it The output iterator. + */ + +template <typename Kernel, typename Point_range, typename OutputIterator> +void +sparsify_point_set( + const Kernel &k, Point_range const& input_pts, + typename Kernel::FT min_squared_dist, + OutputIterator output_it) +{ + typedef typename Gudhi::spatial_searching::Kd_tree_search< + Kernel, Point_range> Points_ds; + + typename Kernel::Squared_distance_d sqdist = k.squared_distance_d_object(); + +#ifdef GUDHI_SUBSAMPLING_PROFILING + Gudhi::Clock t; +#endif + + Points_ds points_ds(input_pts); + + std::vector<bool> dropped_points(input_pts.size(), false); + + // Parse the input points, and add them if they are not too close to + // the other points + std::size_t pt_idx = 0; + for (typename Point_range::const_iterator it_pt = input_pts.begin() ; + it_pt != input_pts.end(); + ++it_pt, ++pt_idx) + { + if (dropped_points[pt_idx]) + continue; + + *output_it++ = *it_pt; + + auto ins_range = points_ds.query_incremental_nearest_neighbors(*it_pt); + + // If another point Q is closer that min_squared_dist, mark Q to be dropped + for (auto const& neighbor : ins_range) + { + std::size_t neighbor_point_idx = neighbor.first; + // If the neighbor is too close, we drop the neighbor + if (neighbor.second < min_squared_dist) + { + // N.B.: If neighbor_point_idx < pt_idx, + // dropped_points[neighbor_point_idx] is already true but adding a + // test doesn't make things faster, so why bother? + dropped_points[neighbor_point_idx] = true; + } + else + break; + } + } + +#ifdef GUDHI_SUBSAMPLING_PROFILING + t.end(); + std::cerr << "Point set sparsified in " << t.num_seconds() + << " seconds." << std::endl; +#endif +} + +} // namespace subsampling +} // namespace Gudhi + +#endif // GUDHI_POINT_CLOUD_H diff --git a/src/Subsampling/test/CMakeLists.txt b/src/Subsampling/test/CMakeLists.txt new file mode 100644 index 00000000..91d4ed8f --- /dev/null +++ b/src/Subsampling/test/CMakeLists.txt @@ -0,0 +1,34 @@ +cmake_minimum_required(VERSION 2.6) +project(Subsampling_tests) + +if (GCOVR_PATH) + # for gcovr to make coverage reports - Corbera Jenkins plugin + set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fprofile-arcs -ftest-coverage") +endif() +if (GPROF_PATH) + # for gprof to make coverage reports - Jenkins + set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -pg") +endif() + +if(CGAL_FOUND) + if (NOT CGAL_VERSION VERSION_LESS 4.8.1) + message(STATUS "CGAL version: ${CGAL_VERSION}.") + + if (EIGEN3_FOUND) + add_executable( Subsampling_test_pick_n_random_points test_pick_n_random_points.cpp ) + target_link_libraries(Subsampling_test_pick_n_random_points ${CGAL_LIBRARY} ${Boost_UNIT_TEST_FRAMEWORK_LIBRARY}) + + add_executable( Subsampling_test_choose_n_farthest_points test_choose_n_farthest_points.cpp ) + target_link_libraries(Subsampling_test_choose_n_farthest_points ${CGAL_LIBRARY} ${Boost_UNIT_TEST_FRAMEWORK_LIBRARY}) + + add_executable(Subsampling_test_sparsify_point_set test_sparsify_point_set.cpp) + target_link_libraries(Subsampling_test_sparsify_point_set ${CGAL_LIBRARY} ${Boost_UNIT_TEST_FRAMEWORK_LIBRARY}) + else() + message(WARNING "Eigen3 not found. Version 3.1.0 is required for Subsampling feature.") + endif() + else() + message(WARNING "CGAL version: ${CGAL_VERSION} is too old to compile Subsampling tests. Version 4.8.1 is required.") + endif () +else() + message(WARNING "CGAL not found. It is required for the Subsampling tests.") +endif() diff --git a/src/Subsampling/test/test_choose_n_farthest_points.cpp b/src/Subsampling/test/test_choose_n_farthest_points.cpp new file mode 100644 index 00000000..f79a4dfb --- /dev/null +++ b/src/Subsampling/test/test_choose_n_farthest_points.cpp @@ -0,0 +1,57 @@ +/* This file is part of the Gudhi Library. The Gudhi library + * (Geometric Understanding in Higher Dimensions) is a generic C++ + * library for computational topology. + * + * Author(s): Siargey Kachanovich + * + * Copyright (C) 2016 INRIA + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see <http://www.gnu.org/licenses/>. + */ + +// #ifdef _DEBUG +// # define TBB_USE_THREADING_TOOL +// #endif + +#define BOOST_TEST_DYN_LINK +#define BOOST_TEST_MODULE "witness_complex_points" +#include <boost/test/unit_test.hpp> +#include <boost/mpl/list.hpp> + +#include <gudhi/choose_n_farthest_points.h> +#include <vector> +#include <iterator> + +#include <CGAL/Epick_d.h> + +typedef CGAL::Epick_d<CGAL::Dynamic_dimension_tag> K; +typedef typename K::FT FT; +typedef typename K::Point_d Point_d; + + +BOOST_AUTO_TEST_CASE(test_choose_farthest_point) { + std::vector< Point_d > points, landmarks; + // Add grid points (625 points) + for (FT i = 0; i < 5; i += 1.0) + for (FT j = 0; j < 5; j += 1.0) + for (FT k = 0; k < 5; k += 1.0) + for (FT l = 0; l < 5; l += 1.0) + points.push_back(Point_d(std::vector<FT>({i, j, k, l}))); + + landmarks.clear(); + K k; + Gudhi::subsampling::choose_n_farthest_points(k, points, 100, std::back_inserter(landmarks)); + + BOOST_CHECK(landmarks.size() == 100); +} diff --git a/src/Subsampling/test/test_pick_n_random_points.cpp b/src/Subsampling/test/test_pick_n_random_points.cpp new file mode 100644 index 00000000..6c8dbea2 --- /dev/null +++ b/src/Subsampling/test/test_pick_n_random_points.cpp @@ -0,0 +1,69 @@ +/* This file is part of the Gudhi Library. The Gudhi library + * (Geometric Understanding in Higher Dimensions) is a generic C++ + * library for computational topology. + * + * Author(s): Siargey Kachanovich + * + * Copyright (C) 2016 INRIA + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see <http://www.gnu.org/licenses/>. + */ + +// #ifdef _DEBUG +// # define TBB_USE_THREADING_TOOL +// #endif + +#define BOOST_TEST_DYN_LINK +#define BOOST_TEST_MODULE Subsampling - test pick_n_random_points +#include <boost/test/unit_test.hpp> + +#include <gudhi/pick_n_random_points.h> +#include <vector> +#include <iterator> + +#include <CGAL/Epick_d.h> + + +BOOST_AUTO_TEST_CASE(test_pick_n_random_points) +{ + typedef CGAL::Epick_d<CGAL::Dynamic_dimension_tag> K; + typedef typename K::FT FT; + typedef typename K::Point_d Point_d; + + std::vector<Point_d> vect; + vect.push_back(Point_d(std::vector<FT>({0,0,0,0}))); + vect.push_back(Point_d(std::vector<FT>({0,0,0,1}))); + vect.push_back(Point_d(std::vector<FT>({0,0,1,0}))); + vect.push_back(Point_d(std::vector<FT>({0,0,1,1}))); + vect.push_back(Point_d(std::vector<FT>({0,1,0,0}))); + vect.push_back(Point_d(std::vector<FT>({0,1,0,1}))); + vect.push_back(Point_d(std::vector<FT>({0,1,1,0}))); + vect.push_back(Point_d(std::vector<FT>({0,1,1,1}))); + vect.push_back(Point_d(std::vector<FT>({1,0,0,0}))); + vect.push_back(Point_d(std::vector<FT>({1,0,0,1}))); + vect.push_back(Point_d(std::vector<FT>({1,0,1,0}))); + vect.push_back(Point_d(std::vector<FT>({1,0,1,1}))); + vect.push_back(Point_d(std::vector<FT>({1,1,0,0}))); + vect.push_back(Point_d(std::vector<FT>({1,1,0,1}))); + vect.push_back(Point_d(std::vector<FT>({1,1,1,0}))); + vect.push_back(Point_d(std::vector<FT>({1,1,1,1}))); + + std::vector<Point_d> results; + Gudhi::subsampling::pick_n_random_points(vect, 5, std::back_inserter(results)); + std::cout << "landmark vector contains: "; + for (auto l: results) + std::cout << l << "\n"; + + BOOST_CHECK(results.size() == 5); +} diff --git a/src/Subsampling/test/test_sparsify_point_set.cpp b/src/Subsampling/test/test_sparsify_point_set.cpp new file mode 100644 index 00000000..61f6fa18 --- /dev/null +++ b/src/Subsampling/test/test_sparsify_point_set.cpp @@ -0,0 +1,57 @@ +/* This file is part of the Gudhi Library. The Gudhi library + * (Geometric Understanding in Higher Dimensions) is a generic C++ + * library for computational topology. + * + * Author(s): Clement Jamin + * + * Copyright (C) 2016 INRIA + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see <http://www.gnu.org/licenses/>. + */ + +#define BOOST_TEST_DYN_LINK +#define BOOST_TEST_MODULE Subsampling - test sparsify_point_set +#include <boost/test/unit_test.hpp> + +#include <gudhi/sparsify_point_set.h> + +#include <CGAL/Epick_d.h> +#include <CGAL/Random.h> + +#include <array> +#include <vector> +#include <iterator> + +BOOST_AUTO_TEST_CASE(test_sparsify_point_set) +{ + typedef CGAL::Epick_d<CGAL::Dimension_tag<4> > K; + typedef typename K::FT FT; + typedef typename K::Point_d Point_d; + + CGAL::Random rd; + + std::vector<Point_d> points; + for (int i = 0 ; i < 500 ; ++i) + points.push_back(Point_d(std::array<FT,4>({rd.get_double(-1.,1),rd.get_double(-1.,1),rd.get_double(-1.,1),rd.get_double(-1.,1)}))); + + K k; + std::vector<Point_d> results; + Gudhi::subsampling::sparsify_point_set(k, points, 0.5, std::back_inserter(results)); + std::cout << "Before sparsification: " << points.size() << " points.\n"; + std::cout << "After sparsification: " << results.size() << " points.\n"; + //for (auto p : results) + // std::cout << p << "\n"; + + BOOST_CHECK(points.size() > results.size()); +} diff --git a/src/Witness_complex/example/witness_complex_from_file.cpp b/src/Witness_complex/example/witness_complex_from_file.cpp index 53207ad2..5e9f0e81 100644 --- a/src/Witness_complex/example/witness_complex_from_file.cpp +++ b/src/Witness_complex/example/witness_complex_from_file.cpp @@ -25,7 +25,8 @@ #include <gudhi/Simplex_tree.h> #include <gudhi/Witness_complex.h> -#include <gudhi/Landmark_choice_by_random_point.h> +#include <gudhi/Construct_closest_landmark_table.h> +#include <gudhi/pick_n_random_points.h> #include <gudhi/reader_utils.h> #include <iostream> @@ -78,7 +79,7 @@ int main(int argc, char * const argv[]) { Gudhi::Simplex_tree<> simplex_tree; // Read the point file - Point_Vector point_vector; + Point_Vector point_vector, landmarks; read_points_cust(file_name, point_vector); std::cout << "Successfully read " << point_vector.size() << " points.\n"; std::cout << "Ambient dimension is " << point_vector[0].size() << ".\n"; @@ -86,7 +87,8 @@ int main(int argc, char * const argv[]) { // Choose landmarks start = clock(); std::vector<std::vector< int > > knn; - Gudhi::witness_complex::landmark_choice_by_random_point(point_vector, nbL, knn); + Gudhi::subsampling::pick_n_random_points(point_vector, 100, std::back_inserter(landmarks)); + Gudhi::witness_complex::construct_closest_landmark_table(point_vector, landmarks, knn); end = clock(); std::cout << "Landmark choice for " << nbL << " landmarks took " << static_cast<double>(end - start) / CLOCKS_PER_SEC << " s. \n"; diff --git a/src/Witness_complex/example/witness_complex_sphere.cpp b/src/Witness_complex/example/witness_complex_sphere.cpp index b26c9f36..3d4a54aa 100644 --- a/src/Witness_complex/example/witness_complex_sphere.cpp +++ b/src/Witness_complex/example/witness_complex_sphere.cpp @@ -27,9 +27,12 @@ #include <gudhi/Simplex_tree.h> #include <gudhi/Witness_complex.h> -#include <gudhi/Landmark_choice_by_random_point.h> +#include <gudhi/Construct_closest_landmark_table.h> +#include <gudhi/pick_n_random_points.h> #include <gudhi/reader_utils.h> +#include <CGAL/Epick_d.h> + #include <iostream> #include <fstream> #include <ctime> @@ -67,7 +70,7 @@ int main(int argc, char * const argv[]) { // Read the point file for (int nbP = 500; nbP < 10000; nbP += 500) { - Point_Vector point_vector; + Point_Vector point_vector, landmarks; generate_points_sphere(point_vector, nbP, 4); std::cout << "Successfully generated " << point_vector.size() << " points.\n"; std::cout << "Ambient dimension is " << point_vector[0].size() << ".\n"; @@ -75,10 +78,16 @@ int main(int argc, char * const argv[]) { // Choose landmarks start = clock(); std::vector<std::vector< int > > knn; - Gudhi::witness_complex::landmark_choice_by_random_point(point_vector, number_of_landmarks, knn); + Gudhi::subsampling::pick_n_random_points(point_vector, 100, std::back_inserter(landmarks)); + Gudhi::witness_complex::construct_closest_landmark_table(point_vector, landmarks, knn); // Compute witness complex - Gudhi::witness_complex::witness_complex(knn, number_of_landmarks, point_vector[0].size(), simplex_tree); + // Gudhi::witness_complex::witness_complex(knn, number_of_landmarks, point_vector[0].size(), simplex_tree); + Gudhi::witness_complex::Witness_complex<Gudhi::Simplex_tree<>, CGAL::Epick_d<CGAL::Dynamic_dimension_tag>>(landmarks.begin(), + landmarks.end(), + point_vector.begin(), + point_vector.end(), + simplex_tree); end = clock(); double time = static_cast<double>(end - start) / CLOCKS_PER_SEC; std::cout << "Witness complex for " << number_of_landmarks << " landmarks took " diff --git a/src/Witness_complex/include/gudhi/Construct_closest_landmark_table.h b/src/Witness_complex/include/gudhi/Construct_closest_landmark_table.h new file mode 100644 index 00000000..ef711c34 --- /dev/null +++ b/src/Witness_complex/include/gudhi/Construct_closest_landmark_table.h @@ -0,0 +1,90 @@ +/* This file is part of the Gudhi Library. The Gudhi library + * (Geometric Understanding in Higher Dimensions) is a generic C++ + * library for computational topology. + * + * Author(s): Siargey Kachanovich + * + * Copyright (C) 2015 INRIA Sophia Antipolis-Méditerranée (France) + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see <http://www.gnu.org/licenses/>. + */ + +#ifndef CONSTRUCT_CLOSEST_LANDMARK_TABLE_H_ +#define CONSTRUCT_CLOSEST_LANDMARK_TABLE_H_ + +#include <boost/range/size.hpp> + +#include <queue> // for priority_queue<> +#include <utility> // for pair<> +#include <iterator> +#include <vector> +#include <set> + +namespace Gudhi { + +namespace witness_complex { + + /** + * \ingroup witness_complex + * \brief Construct the closest landmark tables for all witnesses. + * \details Output a table 'knn', each line of which represents a witness and + * consists of landmarks sorted by + * euclidean distance from the corresponding witness. + * + * The type WitnessContainer is a random access range and + * the type LandmarkContainer is a range. + * The type KNearestNeighbors can be seen as + * Witness_range<Closest_landmark_range<Vertex_handle>>, where + * Witness_range and Closest_landmark_range are random access ranges and + * Vertex_handle is the label type of a vertex in a simplicial complex. + * Closest_landmark_range needs to have push_back operation. + */ + + template <typename WitnessContainer, + typename LandmarkContainer, + typename KNearestNeighbours> + void construct_closest_landmark_table(WitnessContainer const &points, + LandmarkContainer const &landmarks, + KNearestNeighbours &knn) { + int nbP = boost::size(points); + assert(nbP >= boost::size(landmarks)); + + int dim = boost::size(*std::begin(points)); + typedef std::pair<double, int> dist_i; + typedef bool (*comp)(dist_i, dist_i); + knn = KNearestNeighbours(nbP); + for (int points_i = 0; points_i < nbP; points_i++) { + std::priority_queue<dist_i, std::vector<dist_i>, comp> l_heap([](dist_i j1, dist_i j2) { + return j1.first > j2.first; + }); + typename LandmarkContainer::const_iterator landmarks_it; + int landmarks_i = 0; + for (landmarks_it = landmarks.begin(), landmarks_i = 0; landmarks_it != landmarks.end(); + ++landmarks_it, landmarks_i++) { + dist_i dist = std::make_pair(euclidean_distance(points[points_i], *landmarks_it), landmarks_i); + l_heap.push(dist); + } + for (int i = 0; i < dim + 1; i++) { + dist_i dist = l_heap.top(); + knn[points_i].push_back(dist.second); + l_heap.pop(); + } + } + } + +} // namespace witness_complex + +} // namespace Gudhi + +#endif // CONSTRUCT_CLOSEST_LANDMARK_TABLE_H_ diff --git a/src/Witness_complex/include/gudhi/Witness_complex.h b/src/Witness_complex/include/gudhi/Witness_complex.h index 5c6d087f..4c489e7a 100644 --- a/src/Witness_complex/include/gudhi/Witness_complex.h +++ b/src/Witness_complex/include/gudhi/Witness_complex.h @@ -31,6 +31,7 @@ #include <boost/range/size.hpp> #include <gudhi/distance_functions.h> +#include <gudhi/Kd_tree_search.h> #include <algorithm> #include <utility> @@ -42,8 +43,10 @@ #include <ctime> #include <iostream> -namespace Gudhi { +namespace gss = Gudhi::spatial_searching; +namespace Gudhi { + namespace witness_complex { // /* @@ -55,9 +58,15 @@ namespace witness_complex { template< class SimplicialComplex, class Kernel_ > class Witness_complex { - typedef Kernel_ K; - typedef K::Point_d Point_d; - + typedef Kernel_ K; + typedef typename K::Point_d Point_d; + typedef typename K::FT FT; + typedef std::vector<Point_d> Point_range; + typedef gss::Kd_tree_search<Kernel_, Point_range> Kd_tree; + typedef typename Kd_tree::INS_range Nearest_landmark_range; + typedef typename std::vector<Nearest_landmark_range> Nearest_landmark_table; + typedef typename Nearest_landmark_table::const_iterator Nearest_landmark_table_iterator; + //typedef std::vector<std::pair<unsigned,FT>> Nearest_landmarks; private: struct Active_witness { @@ -86,9 +95,10 @@ class Witness_complex { private: int nbL_; // Number of landmarks - SimplicialComplex& sc_; // Simplicial complex - std::vector<Point_d> witnesses_, landmarks_; - + SimplicialComplex& sc_; // Simplicial complex + Point_range witnesses_, landmarks_; + Kd_tree landmark_tree_; + std::vector<Nearest_landmark_range> nearest_landmarks_; public: ///////////////////////////////////////////////////////////////////////////////////////////////////////////////////// @@ -118,52 +128,53 @@ class Witness_complex { Witness_complex(InputIteratorLandmarks landmarks_first, InputIteratorLandmarks landmarks_last, InputIteratorWitnesses witnesses_first, - InputIteratorWitnesses witnesses_last) - : witnesses_(witnesses_first, witnesses_last), landmarks_(landmarks_first, landmarks_last) + InputIteratorWitnesses witnesses_last, + SimplicialComplex& sc) + : witnesses_(witnesses_first, witnesses_last), landmarks_(landmarks_first, landmarks_last), landmark_tree_(landmarks_), sc_(sc) { + for (Point_d w : witnesses_) + nearest_landmarks_.push_back(landmark_tree_.query_incremental_nearest_neighbors(w)); } + Point_d get_point( std::size_t vertex ) const + { + return landmarks_[vertex]; + } - - template< typename KNearestNeighbors > - Witness_complex(KNearestNeighbors const & knn, - int nbL, - int dim, - SimplicialComplex & sc) : nbL_(nbL), sc_(sc) { - // Construction of the active witness list - int nbW = boost::size(knn); + template < typename SimplicialComplexForWitness > + bool create_complex(SimplicialComplexForWitness& complex, + FT max_alpha_square) const + { + unsigned nbW = witnesses_.size(), + nbL = landmarks_.size(); typeVectorVertex vv; - int counter = 0; - /* The list of still useful witnesses - * it will diminuish in the course of iterations - */ - ActiveWitnessList active_w; // = new ActiveWitnessList(); - for (Vertex_handle i = 0; i != nbL_; ++i) { + ActiveWitnessList active_w;// = new ActiveWitnessList(); + for (int i = 0; i != nbL; ++i) { // initial fill of 0-dimensional simplices // by doing it we don't assume that landmarks are necessarily witnesses themselves anymore - counter++; + //counter++; vv = {i}; - sc_.insert_simplex(vv); - // TODO(SK) Error if not inserted : normally no need here though + complex.insert_simplex(vv, Filtration_value(0.0)); + /* TODO Error if not inserted : normally no need here though*/ } - int k = 1; /* current dimension in iterative construction */ - for (int i = 0; i != nbW; ++i) + unsigned k = 1; /* current dimension in iterative construction */ + for (int i=0; i != nbW; ++i) active_w.push_back(i); - while (!active_w.empty() && k < dim) { - typename ActiveWitnessList::iterator it = active_w.begin(); - while (it != active_w.end()) { - typeVectorVertex simplex_vector; - /* THE INSERTION: Checking if all the subfaces are in the simplex tree*/ - bool ok = all_faces_in(knn, *it, k); - if (ok) { - for (int i = 0; i != k + 1; ++i) - simplex_vector.push_back(knn[*it][i]); - sc_.insert_simplex(simplex_vector); - // TODO(SK) Error if not inserted : normally no need here though - ++it; - } else { - active_w.erase(it++); // First increase the iterator and then erase the previous element - } + while (!active_w.empty() && k < nbL ) { + typename ActiveWitnessList::iterator aw_it = active_w.begin(); + while (aw_it != active_w.end()) { + std::vector<int> simplex; + bool ok = add_all_faces_of_dimension(k, + max_alpha_square, + std::numeric_limits<double>::infinity(), + nearest_landmarks_[*aw_it].begin(), + simplex, + complex, + nearest_landmarks_[*aw_it].end()); + if (!ok) + active_w.erase(aw_it++); //First increase the iterator and then erase the previous element + else + aw_it++; } k++; } @@ -172,40 +183,114 @@ class Witness_complex { //@} private: - /* \brief Check if the facets of the k-dimensional simplex witnessed - * by witness witness_id are already in the complex. - * inserted_vertex is the handle of the (k+1)-th vertex witnessed by witness_id + + /* \brief Adds recursively all the faces of a certain dimension dim witnessed by the same witness + * Iterator is needed to know until how far we can take landmarks to form simplexes + * simplex is the prefix of the simplexes to insert + * The output value indicates if the witness rests active or not */ - template <typename KNearestNeighbors> - bool all_faces_in(KNearestNeighbors const &knn, int witness_id, int k) { - std::vector< Vertex_handle > facet; - // CHECK ALL THE FACETS - for (int i = 0; i != k + 1; ++i) { - facet = {}; - for (int j = 0; j != k + 1; ++j) { - if (j != i) { - facet.push_back(knn[witness_id][j]); + template < typename SimplicialComplexForWitness > + bool add_all_faces_of_dimension(int dim, + double alpha2, + double norelax_dist2, + Nearest_landmark_table_iterator curr_l, + std::vector<int>& simplex, + SimplicialComplexForWitness& sc, + Nearest_landmark_table_iterator end) + { + if (curr_l == end) + return false; + bool will_be_active = false; + Nearest_landmark_table_iterator l_it = curr_l; + if (dim > 0) + for (; l_it->second - alpha2 <= norelax_dist2 && l_it != end; ++l_it) { + simplex.push_back(l_it->first); + if (sc.find(simplex) != sc.null_simplex()) + will_be_active = add_all_faces_of_dimension(dim-1, + alpha2, + norelax_dist2, + l_it+1, + simplex, + sc, + end) || will_be_active; + simplex.pop_back(); + // If norelax_dist is infinity, change to first omitted distance + if (l_it->second <= norelax_dist2) + norelax_dist2 = l_it->second; + will_be_active = add_all_faces_of_dimension(dim-1, + alpha2, + norelax_dist2, + l_it+1, + simplex, + sc, + end) || will_be_active; + } + else if (dim == 0) + for (; l_it->second - alpha2 <= norelax_dist2 && l_it != end; ++l_it) { + simplex.push_back(l_it->second); + double filtration_value = 0; + // if norelax_dist is infinite, relaxation is 0. + if (l_it->second > norelax_dist2) + filtration_value = l_it->second - norelax_dist2; + if (all_faces_in(simplex, &filtration_value, sc)) { + will_be_active = true; + sc.insert_simplex(simplex, filtration_value); } - } // endfor - if (sc_.find(facet) == sc_.null_simplex()) - return false; - } // endfor - return true; + simplex.pop_back(); + // If norelax_dist is infinity, change to first omitted distance + if (l_it->second < norelax_dist2) + norelax_dist2 = l_it->second; + } + return will_be_active; } - - template <typename T> - static void print_vector(const std::vector<T>& v) { - std::cout << "["; - if (!v.empty()) { - std::cout << *(v.begin()); - for (auto it = v.begin() + 1; it != v.end(); ++it) { - std::cout << ","; - std::cout << *it; + + /** \brief Check if the facets of the k-dimensional simplex witnessed + * by witness witness_id are already in the complex. + * inserted_vertex is the handle of the (k+1)-th vertex witnessed by witness_id + */ + template < typename SimplicialComplexForWitness > + bool all_faces_in(std::vector<int>& simplex, + double* filtration_value, + SimplicialComplexForWitness& sc) + { + std::vector< int > facet; + for (std::vector<int>::iterator not_it = simplex.begin(); not_it != simplex.end(); ++not_it) + { + facet.clear(); + for (std::vector<int>::iterator it = simplex.begin(); it != simplex.end(); ++it) + if (it != not_it) + facet.push_back(*it); + Simplex_handle facet_sh = sc.find(facet); + if (facet_sh == sc.null_simplex()) + return false; + else if (sc.filtration(facet_sh) > *filtration_value) + *filtration_value = sc.filtration(facet_sh); } - } - std::cout << "]"; + return true; } + // bool is_face(Simplex_handle face, Simplex_handle coface) + // { + // // vertex range is sorted in decreasing order + // auto fvr = sc.simplex_vertex_range(face); + // auto cfvr = sc.simplex_vertex_range(coface); + // auto fv_it = fvr.begin(); + // auto cfv_it = cfvr.begin(); + // while (fv_it != fvr.end() && cfv_it != cfvr.end()) { + // if (*fv_it < *cfv_it) + // ++cfv_it; + // else if (*fv_it == *cfv_it) { + // ++cfv_it; + // ++fv_it; + // } + // else + // return false; + + // } + // return (fv_it == fvr.end()); + // } + + public: // /* // * \brief Verification if every simplex in the complex is witnessed by witnesses in knn. @@ -269,13 +354,13 @@ class Witness_complex { * * Landmarks are supposed to be in [0,nbL_-1] */ - template <class KNearestNeighbors, class SimplicialComplexForWitness> - void witness_complex(KNearestNeighbors const & knn, - int nbL, - int dim, - SimplicialComplexForWitness & sc) { - Witness_complex<SimplicialComplexForWitness>(knn, nbL, dim, sc); - } + // template <class KNearestNeighbors, class SimplicialComplexForWitness> + // void witness_complex(KNearestNeighbors const & knn, + // int nbL, + // int dim, + // SimplicialComplexForWitness & sc) { + // Witness_complex<SimplicialComplexForWitness>(knn, nbL, dim, sc); + // } } // namespace witness_complex diff --git a/src/Witness_complex/test/witness_complex_points.cpp b/src/Witness_complex/test/witness_complex_points.cpp index bd3df604..d40bbf14 100644 --- a/src/Witness_complex/test/witness_complex_points.cpp +++ b/src/Witness_complex/test/witness_complex_points.cpp @@ -27,8 +27,8 @@ #include <gudhi/Simplex_tree.h> #include <gudhi/Witness_complex.h> -#include <gudhi/Landmark_choice_by_random_point.h> -#include <gudhi/Landmark_choice_by_furthest_point.h> +#include <gudhi/Construct_closest_landmark_table.h> +#include <gudhi/pick_n_random_points.h> #include <iostream> #include <vector> @@ -40,7 +40,7 @@ typedef Gudhi::witness_complex::Witness_complex<Simplex_tree> WitnessComplex; BOOST_AUTO_TEST_CASE(witness_complex_points) { std::vector< typeVectorVertex > knn; - std::vector< Point > points; + std::vector< Point > points, landmarks; // Add grid points as witnesses for (double i = 0; i < 10; i += 1.0) for (double j = 0; j < 10; j += 1.0) @@ -50,15 +50,9 @@ BOOST_AUTO_TEST_CASE(witness_complex_points) { bool b_print_output = false; // First test: random choice Simplex_tree complex1; - Gudhi::witness_complex::landmark_choice_by_random_point(points, 100, knn); + Gudhi::subsampling::pick_n_random_points(points, 100, std::back_inserter(landmarks)); + Gudhi::witness_complex::construct_closest_landmark_table(points, landmarks, knn); assert(!knn.empty()); WitnessComplex witnessComplex1(knn, 100, 3, complex1); BOOST_CHECK(witnessComplex1.is_witness_complex(knn, b_print_output)); - - // Second test: furthest choice - knn.clear(); - Simplex_tree complex2; - Gudhi::witness_complex::landmark_choice_by_furthest_point(points, 100, knn); - WitnessComplex witnessComplex2(knn, 100, 3, complex2); - BOOST_CHECK(witnessComplex2.is_witness_complex(knn, b_print_output)); } |