diff options
20 files changed, 429 insertions, 70 deletions
diff --git a/.circleci/config.yml b/.circleci/config.yml index f6a875dd..90737006 100644 --- a/.circleci/config.yml +++ b/.circleci/config.yml @@ -61,7 +61,7 @@ jobs: cmake -DUSER_VERSION_DIR=version .. make user_version cd version - cmake -DCMAKE_BUILD_TYPE=Release -DWITH_GUDHI_EXAMPLE=OFF -DWITH_GUDHI_UTILITIES=OFF -DWITH_GUDHI_PYTHON=ON -DPython_ADDITIONAL_VERSIONS=3 . + cmake -DCMAKE_BUILD_TYPE=Release -DWITH_GUDHI_EXAMPLE=OFF -DWITH_GUDHI_UTILITIES=OFF -DWITH_GUDHI_PYTHON=ON -DPython_ADDITIONAL_VERSIONS=3 -DWITH_GUDHI_REMOTE_TEST=ON . cd python python3 setup.py build_ext --inplace make sphinx diff --git a/.github/for_maintainers/tests_strategy.md b/.github/for_maintainers/tests_strategy.md index c25acf9b..338d4282 100644 --- a/.github/for_maintainers/tests_strategy.md +++ b/.github/for_maintainers/tests_strategy.md @@ -8,13 +8,13 @@ The aim is to help maintainers to anticipate third parties modifications, update ### Linux -As all the third parties are already installed (thanks to docker), the compilations has been seperated by categories to be parallelized: +As all the third parties are already installed (thanks to docker), the compilations have been separated in categories to be parallelized: * examples (C++) * tests (C++) * utils (C++) * doxygen (C++ documentation that is available in the artefacts) -* python (including documentation and code coverage that are available in the artefacts) +* python (including documentation and code coverage that are available in the artefacts; here the WITH_GUDHI_REMOTE_TEST option is enabled which adds datasets fetching test) (cf. `.circleci/config.yml`) @@ -25,9 +25,9 @@ Without CGAL, and, with or without Eigen builds are performed inside the docker #### Update docker images -C++ third parties installation are done thanks to apt on Ubuntu latest LTS. +C++ third parties installation is done thanks to apt on Ubuntu latest LTS. -Docker images need to be rebuild and push each time `.github/build-requirements`, `.github/test-requirements`, when a new third party is added, when a new CGAL version improves gudhi performances, ... +Docker images need to be rebuilt and pushed each time `.github/build-requirements`, `.github/test-requirements`, when a new third party is added, when a new CGAL version improves gudhi performances, ... ```bash docker build -f Dockerfile_for_circleci_image -t gudhi/ci_for_gudhi:latest . @@ -39,35 +39,35 @@ docker push gudhi/ci_for_gudhi_wo_cgal:latest ### Windows -The compilations are not parallelized, as installation time (about 30 minutes) is too much compare to +The compilations are not parallelized, as installation time (about 30 minutes) is too much compared to build and tests timings (about 30 minutes). Builds and tests include: * examples (C++) * tests (C++) * utils (C++) -* python +* python (here the WITH_GUDHI_REMOTE_TEST option is enabled which adds datasets fetching test) Doxygen (C++) is not generated. (cf. `azure-pipelines.yml`) -C++ third parties installation are done thanks to [vcpkg](https://github.com/microsoft/vcpkg/). -In case of installation issue, check in [vcpkg issues](https://github.com/microsoft/vcpkg/issues). +C++ third parties installation is done thanks to [vcpkg](https://github.com/microsoft/vcpkg/). +In case of an installation issue, check in [vcpkg issues](https://github.com/microsoft/vcpkg/issues). ### OSx The compilations are not parallelized, but they should, as installation time (about 4 minutes) is -negligeable compare to build and tests timings (about 30 minutes). Builds and tests include: +negligible compared to build and tests timings (about 30 minutes). Builds and tests include: * examples (C++) * tests (C++) * utils (C++) -* python +* python (here the WITH_GUDHI_REMOTE_TEST option is enabled which adds datasets fetching test) * Doxygen (C++) (cf. `azure-pipelines.yml`) -C++ third parties installation are done thanks to [brew](https://formulae.brew.sh/formula/). -In case of installation issue, check in formula issues. +C++ third parties installation is done thanks to [brew](https://formulae.brew.sh/formula/). +In case of an installation issue, check in formula issues. ## Pip packaging @@ -80,9 +80,9 @@ Only the Linux pip package is based on a docker image (`gudhi/pip_for_gudhi` bas ### Update docker image -C++ third parties installation are done thanks to yum on an image based on `quay.io/pypa/manylinux2014_x86_64`. +C++ third parties installation is done thanks to yum on an image based on `quay.io/pypa/manylinux2014_x86_64`. -Docker image need to be rebuild and push each time `.github/build-requirements`, when a new third party is added, when a new CGAL version improves gudhi performances, ... +Docker image needs to be rebuilt and pushed each time `.github/build-requirements`, when a new third party is added, when a new CGAL version improves gudhi performances, ... As `.github/test-requirements` is not installed, no need to rebuild image when this file is modified. ```bash diff --git a/azure-pipelines.yml b/azure-pipelines.yml index 31264c37..e005b3be 100644 --- a/azure-pipelines.yml +++ b/azure-pipelines.yml @@ -30,7 +30,7 @@ jobs: - bash: | mkdir build cd build - cmake -DCMAKE_BUILD_TYPE:STRING=$(cmakeBuildType) -DWITH_GUDHI_EXAMPLE=ON -DWITH_GUDHI_TEST=ON -DWITH_GUDHI_UTILITIES=ON -DWITH_GUDHI_PYTHON=ON .. + cmake -DCMAKE_BUILD_TYPE:STRING=$(cmakeBuildType) -DWITH_GUDHI_EXAMPLE=ON -DWITH_GUDHI_TEST=ON -DWITH_GUDHI_UTILITIES=ON -DWITH_GUDHI_PYTHON=ON -DWITH_GUDHI_REMOTE_TEST=ON .. make make doxygen ctest --output-on-failure @@ -74,7 +74,7 @@ jobs: IF %errorlevel% NEQ 0 exit /b %errorlevel% ctest --output-on-failure -C Release -E diff_files IF %errorlevel% NEQ 0 exit /b %errorlevel% - cmake -DWITH_GUDHI_PYTHON=ON . + cmake -DWITH_GUDHI_PYTHON=ON -DWITH_GUDHI_REMOTE_TEST=ON . IF %errorlevel% NEQ 0 exit /b %errorlevel% cd src\python copy "C:\vcpkg\installed\x64-windows\bin\mpfr*.dll" ".\gudhi\" diff --git a/src/Alpha_complex/include/gudhi/Alpha_complex.h b/src/Alpha_complex/include/gudhi/Alpha_complex.h index b1a9407b..aec8c1b1 100644 --- a/src/Alpha_complex/include/gudhi/Alpha_complex.h +++ b/src/Alpha_complex/include/gudhi/Alpha_complex.h @@ -461,10 +461,10 @@ class Alpha_complex { void propagate_alpha_filtration(SimplicialComplexForAlpha& complex, Simplex_handle f_simplex) { // From SimplicialComplexForAlpha type required to assign filtration values. using Filtration_value = typename SimplicialComplexForAlpha::Filtration_value; - using Vertex_handle = typename SimplicialComplexForAlpha::Vertex_handle; // ### Foreach Tau face of Sigma - for (auto f_boundary : complex.boundary_simplex_range(f_simplex)) { + for (auto face_opposite_vertex : complex.boundary_opposite_vertex_simplex_range(f_simplex)) { + auto f_boundary = face_opposite_vertex.first; #ifdef DEBUG_TRACES std::clog << " | --------------------------------------------------\n"; std::clog << " | Tau "; @@ -485,18 +485,10 @@ class Alpha_complex { #endif // DEBUG_TRACES // ### Else } else { - // Find which vertex of f_simplex is missing in f_boundary. We could actually write a variant of boundary_simplex_range that gives pairs (f_boundary, vertex). We rely on the fact that simplex_vertex_range is sorted. - auto longlist = complex.simplex_vertex_range(f_simplex); - auto shortlist = complex.simplex_vertex_range(f_boundary); - auto longiter = std::begin(longlist); - auto shortiter = std::begin(shortlist); - auto enditer = std::end(shortlist); - while(shortiter != enditer && *longiter == *shortiter) { ++longiter; ++shortiter; } - Vertex_handle extra = *longiter; auto const& cache=get_cache(complex, f_boundary); - bool is_gab = kernel_.is_gabriel(cache, get_point_(extra)); + bool is_gab = kernel_.is_gabriel(cache, get_point_(face_opposite_vertex.second)); #ifdef DEBUG_TRACES - std::clog << " | Tau is_gabriel(Sigma)=" << is_gab << " - vertexForGabriel=" << extra << std::endl; + std::clog << " | Tau is_gabriel(Sigma)=" << is_gab << " - vertexForGabriel=" << face_opposite_vertex.second << std::endl; #endif // DEBUG_TRACES // ### If Tau is not Gabriel of Sigma if (false == is_gab) { diff --git a/src/Cech_complex/benchmark/cech_complex_benchmark.cpp b/src/Cech_complex/benchmark/cech_complex_benchmark.cpp index d2a71879..a9dc5d0d 100644 --- a/src/Cech_complex/benchmark/cech_complex_benchmark.cpp +++ b/src/Cech_complex/benchmark/cech_complex_benchmark.cpp @@ -31,7 +31,7 @@ using Points_off_reader = Gudhi::Points_off_reader<Point>; using Rips_complex = Gudhi::rips_complex::Rips_complex<Filtration_value>; template<typename Kernel> -Simplex_tree benchmark_cech(const std::string& off_file_points, const Filtration_value& radius, const int& dim_max) { +Simplex_tree benchmark_cech(const std::string& off_file_points, const Filtration_value& radius, const int& dim_max, const bool exact) { using Point_cgal = typename Kernel::Point_d; using Points_off_reader_cgal = Gudhi::Points_off_reader<Point_cgal>; using Cech_complex = Gudhi::cech_complex::Cech_complex<Kernel, Simplex_tree>; @@ -42,7 +42,7 @@ Simplex_tree benchmark_cech(const std::string& off_file_points, const Filtration Gudhi::Clock cech_clock("Cech computation"); Cech_complex cech_complex_from_points(off_reader_cgal.get_point_cloud(), radius); Simplex_tree cech_stree; - cech_complex_from_points.create_complex(cech_stree, dim_max); + cech_complex_from_points.create_complex(cech_stree, dim_max, exact); // ------------------------------------------ // Display information about the Cech complex @@ -56,24 +56,27 @@ int main(int argc, char* argv[]) { boost::filesystem::path full_path(boost::filesystem::current_path()); std::clog << "Current path is : " << full_path << std::endl; - std::clog << "File name ; Radius ; Rips time ; Dim-3 Epick Cech time ; Dynamic_dim Epick Cech time ; " - "Dim-3 Epeck Cech time ; Dynamic_dim Epeck Cech time ; Cech nb simplices ; Rips nb simplices;" + std::clog << "File name ; Radius ; Rips time ; Dim-3 Fast Cech time ; Dynamic_dim Fast Cech time ; " + "Dim-3 Safe Cech time ; Dynamic_dim Safe Cech time ; Dim-3 Exact Cech time ; Dynamic_dim Exact Cech time ; " + "Cech nb simplices ; Rips nb simplices;" << std::endl; boost::filesystem::directory_iterator end_itr; // default construction yields past-the-end + // For every ".off" file in the current directory, and for 3 predefined thresholds, compare Rips and various Cech constructions for (boost::filesystem::directory_iterator itr(boost::filesystem::current_path()); itr != end_itr; ++itr) { if (!boost::filesystem::is_directory(itr->status())) { if (itr->path().extension() == ".off") { Points_off_reader off_reader(itr->path().string()); Point p0 = off_reader.get_point_cloud()[0]; - - for (Filtration_value radius = 0.1; radius < 0.4; radius += 0.1) { + // Loop over the different thresholds + for (Filtration_value radius = 0.1; radius < 0.35; radius += 0.1) { std::clog << itr->path().stem() << " ; "; std::clog << radius << " ; "; Gudhi::Clock rips_clock("Rips computation"); Rips_complex rips_complex_from_points(off_reader.get_point_cloud(), radius, Gudhi::Euclidean_distance()); Simplex_tree rips_stree; - rips_complex_from_points.create_complex(rips_stree, p0.size() - 1); + int dim_max = p0.size() - 1; + rips_complex_from_points.create_complex(rips_stree, dim_max); // ------------------------------------------ // Display information about the Rips complex // ------------------------------------------ @@ -83,10 +86,15 @@ int main(int argc, char* argv[]) { // -------------- // Cech complex // -------------- - benchmark_cech<CGAL::Epick_d<CGAL::Dimension_tag<3>>>(itr->path().string(), radius, p0.size() - 1); - benchmark_cech<CGAL::Epick_d<CGAL::Dynamic_dimension_tag>>(itr->path().string(), radius, p0.size() - 1); - benchmark_cech<CGAL::Epeck_d<CGAL::Dimension_tag<3>>>(itr->path().string(), radius, p0.size() - 1); - auto cech_stree = benchmark_cech<CGAL::Epeck_d<CGAL::Dynamic_dimension_tag>>(itr->path().string(), radius, p0.size() - 1); + // Fast + benchmark_cech<CGAL::Epick_d<CGAL::Dimension_tag<3>>>(itr->path().string(), radius, dim_max, false); + benchmark_cech<CGAL::Epick_d<CGAL::Dynamic_dimension_tag>>(itr->path().string(), radius, dim_max, false); + // Safe + benchmark_cech<CGAL::Epeck_d<CGAL::Dimension_tag<3>>>(itr->path().string(), radius, dim_max, false); + benchmark_cech<CGAL::Epeck_d<CGAL::Dynamic_dimension_tag>>(itr->path().string(), radius, dim_max, false); + // Exact + benchmark_cech<CGAL::Epeck_d<CGAL::Dimension_tag<3>>>(itr->path().string(), radius, dim_max, true); + auto cech_stree = benchmark_cech<CGAL::Epeck_d<CGAL::Dynamic_dimension_tag>>(itr->path().string(), radius, dim_max, true); std::clog << cech_stree.num_simplices() << " ; "; std::clog << rips_stree.num_simplices() << ";" << std::endl; diff --git a/src/Cech_complex/include/gudhi/Cech_complex.h b/src/Cech_complex/include/gudhi/Cech_complex.h index fc39f75b..08b7a72f 100644 --- a/src/Cech_complex/include/gudhi/Cech_complex.h +++ b/src/Cech_complex/include/gudhi/Cech_complex.h @@ -30,7 +30,7 @@ namespace cech_complex { * \ingroup cech_complex * * \details - * Cech complex is a simplicial complex constructed from a proximity graph, where the set of all simplices is filtered + * Cech complex is a simplicial complex where the set of all simplices is filtered * by the radius of their minimal enclosing ball and bounded by the given max_radius. * * \tparam Kernel CGAL kernel: either Epick_d or Epeck_d. @@ -70,7 +70,7 @@ class Cech_complex { point_cloud_.assign(std::begin(points), std::end(points)); cech_skeleton_graph_ = Gudhi::compute_proximity_graph<SimplicialComplexForCechComplex>( - point_cloud_, max_radius_, Sphere_circumradius<Kernel>()); + point_cloud_, max_radius_, Sphere_circumradius<Kernel, Filtration_value>()); } /** \brief Initializes the simplicial complex from the proximity graph and expands it until a given maximal @@ -78,17 +78,19 @@ class Cech_complex { * * @param[in] complex SimplicialComplexForCech to be created. * @param[in] dim_max graph expansion until this given maximal dimension. + * @param[in] exact Exact filtration values computation. Not exact if `Kernel` is not <a target="_blank" + * href="https://doc.cgal.org/latest/Kernel_d/structCGAL_1_1Epeck__d.html">CGAL::Epeck_d</a>. * @exception std::invalid_argument In debug mode, if `complex.num_vertices()` does not return 0. * */ - void create_complex(SimplicialComplexForCechComplex& complex, int dim_max) { + void create_complex(SimplicialComplexForCechComplex& complex, int dim_max, const bool exact = false) { GUDHI_CHECK(complex.num_vertices() == 0, std::invalid_argument("Cech_complex::create_complex - simplicial complex is not empty")); // insert the proximity graph in the simplicial complex complex.insert_graph(cech_skeleton_graph_); // expand the graph until dimension dim_max - complex.expansion_with_blockers(dim_max, cech_blocker(&complex, this)); + complex.expansion_with_blockers(dim_max, cech_blocker(&complex, this, exact)); } /** @return max_radius value given at construction. */ diff --git a/src/Cech_complex/include/gudhi/Cech_complex_blocker.h b/src/Cech_complex/include/gudhi/Cech_complex_blocker.h index 9917999f..1bb205b3 100644 --- a/src/Cech_complex/include/gudhi/Cech_complex_blocker.h +++ b/src/Cech_complex/include/gudhi/Cech_complex_blocker.h @@ -12,6 +12,7 @@ #define CECH_COMPLEX_BLOCKER_H_ #include <CGAL/NT_converter.h> // for casting from FT to Filtration_value +#include <CGAL/Lazy_exact_nt.h> // for CGAL::exact #include <iostream> #include <vector> @@ -84,9 +85,9 @@ class Cech_blocker { Point_cloud face_points; for (auto vertex : sc_ptr_->simplex_vertex_range(face_opposite_vertex.first)) { face_points.push_back(cc_ptr_->get_point(vertex)); - #ifdef DEBUG_TRACES - std::clog << "#(" << vertex << ")#"; - #endif // DEBUG_TRACES +#ifdef DEBUG_TRACES + std::clog << "#(" << vertex << ")#"; +#endif // DEBUG_TRACES } sph = get_sphere(face_points.cbegin(), face_points.cend()); // Put edge sphere in cache @@ -97,10 +98,13 @@ class Cech_blocker { } // Check if the minimal enclosing ball of current face contains the extra point/opposite vertex if (kernel_.squared_distance_d_object()(sph.first, cc_ptr_->get_point(face_opposite_vertex.second)) <= sph.second) { - #ifdef DEBUG_TRACES - std::clog << "center: " << sph.first << ", radius: " << radius << std::endl; - #endif // DEBUG_TRACES +#ifdef DEBUG_TRACES + std::clog << "center: " << sph.first << ", radius: " << radius << std::endl; +#endif // DEBUG_TRACES is_min_enclos_ball = true; +#if CGAL_VERSION_NR >= 1050000000 + if(exact_) CGAL::exact(sph.second); +#endif radius = std::sqrt(cast_to_fv(sph.second)); sc_ptr_->assign_key(sh, cc_ptr_->get_cache().size()); cc_ptr_->get_cache().push_back(sph); @@ -114,10 +118,13 @@ class Cech_blocker { points.push_back(cc_ptr_->get_point(vertex)); } Sphere sph = get_sphere(points.cbegin(), points.cend()); +#if CGAL_VERSION_NR >= 1050000000 + if(exact_) CGAL::exact(sph.second); +#endif radius = std::sqrt(cast_to_fv(sph.second)); sc_ptr_->assign_key(sh, cc_ptr_->get_cache().size()); - cc_ptr_->get_cache().push_back(sph); + cc_ptr_->get_cache().push_back(std::move(sph)); } #ifdef DEBUG_TRACES @@ -128,12 +135,13 @@ class Cech_blocker { } /** \internal \brief Čech complex blocker constructor. */ - Cech_blocker(SimplicialComplexForCech* sc_ptr, Cech_complex* cc_ptr) : sc_ptr_(sc_ptr), cc_ptr_(cc_ptr) {} + Cech_blocker(SimplicialComplexForCech* sc_ptr, Cech_complex* cc_ptr, const bool exact) : sc_ptr_(sc_ptr), cc_ptr_(cc_ptr), exact_(exact) {} private: SimplicialComplexForCech* sc_ptr_; Cech_complex* cc_ptr_; Kernel kernel_; + const bool exact_; }; } // namespace cech_complex diff --git a/src/Cech_complex/include/gudhi/Sphere_circumradius.h b/src/Cech_complex/include/gudhi/Sphere_circumradius.h index b0d9f7cc..790f6950 100644 --- a/src/Cech_complex/include/gudhi/Sphere_circumradius.h +++ b/src/Cech_complex/include/gudhi/Sphere_circumradius.h @@ -11,7 +11,7 @@ #ifndef SPHERE_CIRCUMRADIUS_H_ #define SPHERE_CIRCUMRADIUS_H_ -#include <CGAL/Epeck_d.h> // for #include <CGAL/NewKernel_d/KernelD_converter.h> +#include <CGAL/Epick_d.h> // for #include <CGAL/NT_converter.h> which is not working/compiling alone #include <cmath> // for std::sqrt #include <vector> @@ -22,14 +22,17 @@ namespace cech_complex { /** \private @brief Compute the circumradius of the sphere passing through points given by a range of coordinates. * The points are assumed to have the same dimension. */ -template<typename Kernel> +template<typename Kernel, typename Filtration_value> class Sphere_circumradius { private: Kernel kernel_; public: + using FT = typename Kernel::FT; using Point = typename Kernel::Point_d; using Point_cloud = typename std::vector<Point>; + CGAL::NT_converter<FT, Filtration_value> cast_to_fv; + /** \brief Circumradius of sphere passing through two points using CGAL. * * @param[in] point_1 @@ -38,8 +41,8 @@ class Sphere_circumradius { * \tparam Point must be a Kernel::Point_d from CGAL. * */ - double operator()(const Point& point_1, const Point& point_2) const { - return std::sqrt(CGAL::to_double(kernel_.squared_distance_d_object()(point_1, point_2))) / 2.; + Filtration_value operator()(const Point& point_1, const Point& point_2) const { + return std::sqrt(cast_to_fv(kernel_.squared_distance_d_object()(point_1, point_2))) / 2.; } /** \brief Circumradius of sphere passing through point cloud using CGAL. @@ -49,8 +52,8 @@ class Sphere_circumradius { * \tparam Point_cloud must be a range of Kernel::Point_d points from CGAL. * */ - double operator()(const Point_cloud& point_cloud) const { - return std::sqrt(CGAL::to_double(kernel_.compute_squared_radius_d_object()(point_cloud.begin(), point_cloud.end()))); + Filtration_value operator()(const Point_cloud& point_cloud) const { + return std::sqrt(cast_to_fv(kernel_.compute_squared_radius_d_object()(point_cloud.begin(), point_cloud.end()))); } }; diff --git a/src/Cech_complex/test/test_cech_complex.cpp b/src/Cech_complex/test/test_cech_complex.cpp index ea32f596..f5980e6d 100644 --- a/src/Cech_complex/test/test_cech_complex.cpp +++ b/src/Cech_complex/test/test_cech_complex.cpp @@ -107,11 +107,11 @@ BOOST_AUTO_TEST_CASE(Cech_complex_for_documentation) { std::clog << vertex << ","; vp.push_back(points.at(vertex)); } - std::clog << ") - distance =" << Gudhi::cech_complex::Sphere_circumradius<Kernel>()(vp.at(0), vp.at(1)) + std::clog << ") - distance =" << Gudhi::cech_complex::Sphere_circumradius<Kernel, Filtration_value>()(vp.at(0), vp.at(1)) << " - filtration =" << st.filtration(f_simplex) << std::endl; BOOST_CHECK(vp.size() == 2); GUDHI_TEST_FLOAT_EQUALITY_CHECK(st.filtration(f_simplex), - Gudhi::cech_complex::Sphere_circumradius<Kernel>()(vp.at(0), vp.at(1))); + Gudhi::cech_complex::Sphere_circumradius<Kernel, Filtration_value>()(vp.at(0), vp.at(1))); } } diff --git a/src/cmake/modules/GUDHI_options.cmake b/src/cmake/modules/GUDHI_options.cmake index bffb3ffc..c75b72f5 100644 --- a/src/cmake/modules/GUDHI_options.cmake +++ b/src/cmake/modules/GUDHI_options.cmake @@ -1,5 +1,6 @@ option(WITH_GUDHI_BENCHMARK "Activate/deactivate benchmark compilation" OFF) option(WITH_GUDHI_EXAMPLE "Activate/deactivate examples compilation and installation" OFF) +option(WITH_GUDHI_REMOTE_TEST "Activate/deactivate datasets fetching test which uses the Internet" OFF) option(WITH_GUDHI_PYTHON "Activate/deactivate python module compilation and installation" ON) option(WITH_GUDHI_TEST "Activate/deactivate examples compilation and installation" ON) option(WITH_GUDHI_UTILITIES "Activate/deactivate utilities compilation and installation" ON) diff --git a/src/common/doc/installation.h b/src/common/doc/installation.h index 24a7fc7a..229c9f59 100644 --- a/src/common/doc/installation.h +++ b/src/common/doc/installation.h @@ -40,6 +40,8 @@ make \endverbatim * `make test` is using <a href="https://cmake.org/cmake/help/latest/manual/ctest.1.html">Ctest</a> (CMake test driver * program). If some of the tests are failing, please send us the result of the following command: * \verbatim ctest --output-on-failure \endverbatim + * Testing fetching datasets feature requires the use of the internet and is disabled by default. If you want to include this test, set WITH_GUDHI_REMOTE_TEST to ON when building in the previous step (note that this test is included in the python module): + * \verbatim cmake -DCMAKE_BUILD_TYPE=Release -DWITH_GUDHI_TEST=ON -DWITH_GUDHI_REMOTE_TEST=ON --DWITH_GUDHI_PYTHON=ON .. \endverbatim * * \subsection documentationgeneration Documentation * To generate the documentation, <a target="_blank" href="http://www.doxygen.org/">Doxygen</a> is required. diff --git a/src/common/doc/main_page.md b/src/common/doc/main_page.md index 2cb02e3f..ce903405 100644 --- a/src/common/doc/main_page.md +++ b/src/common/doc/main_page.md @@ -180,7 +180,7 @@ <td width="15%"> <b>Author:</b> Vincent Rouvreau<br> <b>Introduced in:</b> GUDHI 2.2.0<br> - <b>Copyright:</b> MIT [(GPL v3)](../../licensing/)<br> + <b>Copyright:</b> MIT [(LGPL v3)](../../licensing/)<br> <b>Requires:</b> \ref cgal </td> </tr> diff --git a/src/python/CMakeLists.txt b/src/python/CMakeLists.txt index af0b6115..c3768475 100644 --- a/src/python/CMakeLists.txt +++ b/src/python/CMakeLists.txt @@ -591,6 +591,11 @@ if(PYTHONINTERP_FOUND) add_gudhi_py_test(test_dtm_rips_complex) endif() + # Fetch remote datasets + if(WITH_GUDHI_REMOTE_TEST) + add_gudhi_py_test(test_remote_datasets) + endif() + # persistence graphical tools if(MATPLOTLIB_FOUND) add_gudhi_py_test(test_persistence_graphical_tools) diff --git a/src/python/doc/datasets_generators.inc b/src/python/doc/datasets.inc index 8d169275..95a87678 100644 --- a/src/python/doc/datasets_generators.inc +++ b/src/python/doc/datasets.inc @@ -2,7 +2,7 @@ :widths: 30 40 30 +-----------------------------------+--------------------------------------------+--------------------------------------------------------------------------------------+ - | .. figure:: | Datasets generators (points). | :Authors: Hind Montassif | + | .. figure:: | Datasets either generated or fetched. | :Authors: Hind Montassif | | img/sphere_3d.png | | | | | | :Since: GUDHI 3.5.0 | | | | | @@ -10,5 +10,5 @@ | | | | | | | :Requires: `CGAL <installation.html#cgal>`_ | +-----------------------------------+--------------------------------------------+--------------------------------------------------------------------------------------+ - | * :doc:`datasets_generators` | + | * :doc:`datasets` | +-----------------------------------+-----------------------------------------------------------------------------------------------------------------------------------+ diff --git a/src/python/doc/datasets_generators.rst b/src/python/doc/datasets.rst index 260c3882..2d11a19d 100644 --- a/src/python/doc/datasets_generators.rst +++ b/src/python/doc/datasets.rst @@ -3,12 +3,14 @@ .. To get rid of WARNING: document isn't included in any toctree -=========================== -Datasets generators manual -=========================== +================ +Datasets manual +================ -We provide the generation of different customizable datasets to use as inputs for Gudhi complexes and data structures. +Datasets generators +=================== +We provide the generation of different customizable datasets to use as inputs for Gudhi complexes and data structures. Points generators ------------------ @@ -103,3 +105,29 @@ Example .. autofunction:: gudhi.datasets.generators.points.torus + + +Fetching datasets +================= + +We provide some ready-to-use datasets that are not available by default when getting GUDHI, and need to be fetched explicitly. + +By **default**, the fetched datasets directory is set to a folder named **'gudhi_data'** in the **user home folder**. +Alternatively, it can be set using the **'GUDHI_DATA'** environment variable. + +.. autofunction:: gudhi.datasets.remote.fetch_bunny + +.. figure:: ./img/bunny.png + :figclass: align-center + + 3D Stanford bunny with 35947 vertices. + + +.. autofunction:: gudhi.datasets.remote.fetch_spiral_2d + +.. figure:: ./img/spiral_2d.png + :figclass: align-center + + 2D spiral with 114562 vertices. + +.. autofunction:: gudhi.datasets.remote.clear_data_home diff --git a/src/python/doc/img/bunny.png b/src/python/doc/img/bunny.png Binary files differnew file mode 100644 index 00000000..769aa530 --- /dev/null +++ b/src/python/doc/img/bunny.png diff --git a/src/python/doc/img/spiral_2d.png b/src/python/doc/img/spiral_2d.png Binary files differnew file mode 100644 index 00000000..abd247cd --- /dev/null +++ b/src/python/doc/img/spiral_2d.png diff --git a/src/python/doc/index.rst b/src/python/doc/index.rst index 2d7921ae..35f4ba46 100644 --- a/src/python/doc/index.rst +++ b/src/python/doc/index.rst @@ -92,7 +92,7 @@ Clustering .. include:: clustering.inc -Datasets generators -******************* +Datasets +******** -.. include:: datasets_generators.inc +.. include:: datasets.inc diff --git a/src/python/gudhi/datasets/remote.py b/src/python/gudhi/datasets/remote.py new file mode 100644 index 00000000..f6d3fe56 --- /dev/null +++ b/src/python/gudhi/datasets/remote.py @@ -0,0 +1,223 @@ +# This file is part of the Gudhi Library - https://gudhi.inria.fr/ - which is released under MIT. +# See file LICENSE or go to https://gudhi.inria.fr/licensing/ for full license details. +# Author(s): Hind Montassif +# +# Copyright (C) 2021 Inria +# +# Modification(s): +# - YYYY/MM Author: Description of the modification + +from os.path import join, split, exists, expanduser +from os import makedirs, remove, environ + +from urllib.request import urlretrieve +import hashlib +import shutil + +import numpy as np + +def _get_data_home(data_home = None): + """ + Return the path of the remote datasets directory. + This folder is used to store remotely fetched datasets. + By default the datasets directory is set to a folder named 'gudhi_data' in the user home folder. + Alternatively, it can be set by the 'GUDHI_DATA' environment variable. + The '~' symbol is expanded to the user home folder. + If the folder does not already exist, it is automatically created. + + Parameters + ---------- + data_home : string + The path to remote datasets directory. + Default is `None`, meaning that the data home directory will be set to "~/gudhi_data", + if the 'GUDHI_DATA' environment variable does not exist. + + Returns + ------- + data_home: string + The path to remote datasets directory. + """ + if data_home is None: + data_home = environ.get("GUDHI_DATA", join("~", "gudhi_data")) + data_home = expanduser(data_home) + makedirs(data_home, exist_ok=True) + return data_home + + +def clear_data_home(data_home = None): + """ + Delete the data home cache directory and all its content. + + Parameters + ---------- + data_home : string, default is None. + The path to remote datasets directory. + If `None` and the 'GUDHI_DATA' environment variable does not exist, + the default directory to be removed is set to "~/gudhi_data". + """ + data_home = _get_data_home(data_home) + shutil.rmtree(data_home) + +def _checksum_sha256(file_path): + """ + Compute the file checksum using sha256. + + Parameters + ---------- + file_path: string + Full path of the created file including filename. + + Returns + ------- + The hex digest of file_path. + """ + sha256_hash = hashlib.sha256() + chunk_size = 4096 + with open(file_path,"rb") as f: + # Read and update hash string value in blocks of 4K + while True: + buffer = f.read(chunk_size) + if not buffer: + break + sha256_hash.update(buffer) + return sha256_hash.hexdigest() + +def _fetch_remote(url, file_path, file_checksum = None): + """ + Fetch the wanted dataset from the given url and save it in file_path. + + Parameters + ---------- + url : string + The url to fetch the dataset from. + file_path : string + Full path of the downloaded file including filename. + file_checksum : string + The file checksum using sha256 to check against the one computed on the downloaded file. + Default is 'None', which means the checksum is not checked. + + Raises + ------ + IOError + If the computed SHA256 checksum of file does not match the one given by the user. + """ + + # Get the file + urlretrieve(url, file_path) + + if file_checksum is not None: + checksum = _checksum_sha256(file_path) + if file_checksum != checksum: + # Remove file and raise error + remove(file_path) + raise IOError("{} has a SHA256 checksum : {}, " + "different from expected : {}." + "The file may be corrupted or the given url may be wrong !".format(file_path, checksum, file_checksum)) + +def _get_archive_path(file_path, label): + """ + Get archive path based on file_path given by user and label. + + Parameters + ---------- + file_path: string + Full path of the file to get including filename, or None. + label: string + Label used along with 'data_home' to get archive path, in case 'file_path' is None. + + Returns + ------- + Full path of archive including filename. + """ + if file_path is None: + archive_path = join(_get_data_home(), label) + dirname = split(archive_path)[0] + makedirs(dirname, exist_ok=True) + else: + archive_path = file_path + dirname = split(archive_path)[0] + makedirs(dirname, exist_ok=True) + + return archive_path + +def fetch_spiral_2d(file_path = None): + """ + Load the spiral_2d dataset. + + Note that if the dataset already exists in the target location, it is not downloaded again, + and the corresponding array is returned from cache. + + Parameters + ---------- + file_path : string + Full path of the downloaded file including filename. + + Default is None, meaning that it's set to "data_home/points/spiral_2d/spiral_2d.npy". + + The "data_home" directory is set by default to "~/gudhi_data", + unless the 'GUDHI_DATA' environment variable is set. + + Returns + ------- + points: numpy array + Array of shape (114562, 2). + """ + file_url = "https://raw.githubusercontent.com/GUDHI/gudhi-data/main/points/spiral_2d/spiral_2d.npy" + file_checksum = '2226024da76c073dd2f24b884baefbfd14928b52296df41ad2d9b9dc170f2401' + + archive_path = _get_archive_path(file_path, "points/spiral_2d/spiral_2d.npy") + + if not exists(archive_path): + _fetch_remote(file_url, archive_path, file_checksum) + + return np.load(archive_path, mmap_mode='r') + +def fetch_bunny(file_path = None, accept_license = False): + """ + Load the Stanford bunny dataset. + + This dataset contains 35947 vertices. + + Note that if the dataset already exists in the target location, it is not downloaded again, + and the corresponding array is returned from cache. + + Parameters + ---------- + file_path : string + Full path of the downloaded file including filename. + + Default is None, meaning that it's set to "data_home/points/bunny/bunny.npy". + In this case, the LICENSE file would be downloaded as "data_home/points/bunny/bunny.LICENSE". + + The "data_home" directory is set by default to "~/gudhi_data", + unless the 'GUDHI_DATA' environment variable is set. + + accept_license : boolean + Flag to specify if user accepts the file LICENSE and prevents from printing the corresponding license terms. + + Default is False. + + Returns + ------- + points: numpy array + Array of shape (35947, 3). + """ + + file_url = "https://raw.githubusercontent.com/GUDHI/gudhi-data/main/points/bunny/bunny.npy" + file_checksum = 'f382482fd89df8d6444152dc8fd454444fe597581b193fd139725a85af4a6c6e' + license_url = "https://raw.githubusercontent.com/GUDHI/gudhi-data/main/points/bunny/bunny.LICENSE" + license_checksum = 'b763dbe1b2fc6015d05cbf7bcc686412a2eb100a1f2220296e3b4a644c69633a' + + archive_path = _get_archive_path(file_path, "points/bunny/bunny.npy") + + if not exists(archive_path): + _fetch_remote(file_url, archive_path, file_checksum) + license_path = join(split(archive_path)[0], "bunny.LICENSE") + _fetch_remote(license_url, license_path, license_checksum) + # Print license terms unless accept_license is set to True + if not accept_license: + if exists(license_path): + with open(license_path, 'r') as f: + print(f.read()) + + return np.load(archive_path, mmap_mode='r') diff --git a/src/python/test/test_remote_datasets.py b/src/python/test/test_remote_datasets.py new file mode 100644 index 00000000..e5d2de82 --- /dev/null +++ b/src/python/test/test_remote_datasets.py @@ -0,0 +1,87 @@ +# This file is part of the Gudhi Library - https://gudhi.inria.fr/ - which is released under MIT. +# See file LICENSE or go to https://gudhi.inria.fr/licensing/ for full license details. +# Author(s): Hind Montassif +# +# Copyright (C) 2021 Inria +# +# Modification(s): +# - YYYY/MM Author: Description of the modification + +from gudhi.datasets import remote + +import shutil +import io +import sys +import pytest + +from os.path import isdir, expanduser, exists +from os import remove, environ + +def test_data_home(): + # Test _get_data_home and clear_data_home on new empty folder + empty_data_home = remote._get_data_home(data_home="empty_folder_for_test") + assert isdir(empty_data_home) + + remote.clear_data_home(data_home=empty_data_home) + assert not isdir(empty_data_home) + +def test_fetch_remote(): + # Test fetch with a wrong checksum + with pytest.raises(OSError): + remote._fetch_remote("https://raw.githubusercontent.com/GUDHI/gudhi-data/main/points/spiral_2d/spiral_2d.npy", "tmp_spiral_2d.npy", file_checksum = 'XXXXXXXXXX') + assert not exists("tmp_spiral_2d.npy") + +def _get_bunny_license_print(accept_license = False): + capturedOutput = io.StringIO() + # Redirect stdout + sys.stdout = capturedOutput + + bunny_arr = remote.fetch_bunny("./tmp_for_test/bunny.npy", accept_license) + assert bunny_arr.shape == (35947, 3) + del bunny_arr + remove("./tmp_for_test/bunny.npy") + + # Reset redirect + sys.stdout = sys.__stdout__ + return capturedOutput + +def test_print_bunny_license(): + # Test not printing bunny.npy LICENSE when accept_license = True + assert "" == _get_bunny_license_print(accept_license = True).getvalue() + # Test printing bunny.LICENSE file when fetching bunny.npy with accept_license = False (default) + with open("./tmp_for_test/bunny.LICENSE") as f: + assert f.read().rstrip("\n") == _get_bunny_license_print().getvalue().rstrip("\n") + shutil.rmtree("./tmp_for_test") + +def test_fetch_remote_datasets_wrapped(): + # Test fetch_spiral_2d and fetch_bunny wrapping functions with data directory different from default (twice, to test case of already fetched files) + # Default case is not tested because it would fail in case the user sets the 'GUDHI_DATA' environment variable locally + for i in range(2): + spiral_2d_arr = remote.fetch_spiral_2d("./another_fetch_folder_for_test/spiral_2d.npy") + assert spiral_2d_arr.shape == (114562, 2) + + bunny_arr = remote.fetch_bunny("./another_fetch_folder_for_test/bunny.npy") + assert bunny_arr.shape == (35947, 3) + + # Check that the directory was created + assert isdir("./another_fetch_folder_for_test") + # Check downloaded files + assert exists("./another_fetch_folder_for_test/spiral_2d.npy") + assert exists("./another_fetch_folder_for_test/bunny.npy") + assert exists("./another_fetch_folder_for_test/bunny.LICENSE") + + # Remove test folders + del spiral_2d_arr + del bunny_arr + shutil.rmtree("./another_fetch_folder_for_test") + +def test_gudhi_data_env(): + # Set environment variable "GUDHI_DATA" + environ["GUDHI_DATA"] = "./test_folder_from_env_var" + bunny_arr = remote.fetch_bunny() + assert bunny_arr.shape == (35947, 3) + assert exists("./test_folder_from_env_var/points/bunny/bunny.npy") + assert exists("./test_folder_from_env_var/points/bunny/bunny.LICENSE") + # Remove test folder + del bunny_arr + shutil.rmtree("./test_folder_from_env_var") |