From 436a7fbe36a9de6a969afd5978c3d496773a8690 Mon Sep 17 00:00:00 2001 From: mathieu Date: Thu, 16 Jan 2020 15:46:41 -0500 Subject: added wrapper functions --- src/python/gudhi/cubical_complex.pyx | 29 ++++++++- src/python/gudhi/simplex_tree.pxd | 1 + src/python/gudhi/simplex_tree.pyx | 28 +++++++- .../include/Persistent_cohomology_interface.h | 76 ++++++++++++++++++++++ 4 files changed, 132 insertions(+), 2 deletions(-) diff --git a/src/python/gudhi/cubical_complex.pyx b/src/python/gudhi/cubical_complex.pyx index cbeda014..5562e8a7 100644 --- a/src/python/gudhi/cubical_complex.pyx +++ b/src/python/gudhi/cubical_complex.pyx @@ -31,6 +31,7 @@ cdef extern from "Persistent_cohomology_interface.h" namespace "Gudhi": cdef cppclass Cubical_complex_persistence_interface "Gudhi::Persistent_cohomology_interface>": Cubical_complex_persistence_interface(Bitmap_cubical_complex_base_interface * st, bool persistence_dim_max) vector[pair[int, pair[double, double]]] get_persistence(int homology_coeff_field, double min_persistence) + vector[pair[int, pair[pair[double, int], pair[double, int]]]] get_persistence_cubical_generators(int homology_coeff_field, double min_persistence) vector[int] betti_numbers() vector[int] persistent_betti_numbers(double from_value, double to_value) vector[pair[double,double]] intervals_in_dimension(int dimension) @@ -85,7 +86,7 @@ cdef class CubicalComplex: elif ((dimensions is None) and (top_dimensional_cells is None) and (perseus_file != '')): if os.path.isfile(perseus_file): - self.thisptr = new Bitmap_cubical_complex_base_interface(perseus_file.encode('utf-8')) + self.thisptr = new Bitmap_cubical_complex_base_interface(str.encode(perseus_file)) else: print("file " + perseus_file + " not found.") else: @@ -145,6 +146,32 @@ cdef class CubicalComplex: persistence_result = self.pcohptr.get_persistence(homology_coeff_field, min_persistence) return persistence_result + def persistence_generators(self, homology_coeff_field=11, min_persistence=0, persistence_dim_max = False): + """This function returns the persistence of the simplicial complex. + + :param homology_coeff_field: The homology coefficient field. Must be a + prime number. Default value is 11. + :type homology_coeff_field: int. + :param min_persistence: The minimum persistence value to take into + account (strictly greater than min_persistence). Default value is + 0.0. + Sets min_persistence to -1.0 to see all values. + :type min_persistence: float. + :param persistence_dim_max: If true, the persistent homology for the + maximal dimension in the complex is computed. If false, it is + ignored. Default is false. + :type persistence_dim_max: bool + :returns: The persistence of the simplicial complex, together with the corresponding generators, i.e., the positive and negative top-dimensional cells. + :rtype: list of pairs(dimension, pair(index of positive top-dimensional cell, index of negative top-dimensional cell)) + """ + if self.pcohptr != NULL: + del self.pcohptr + self.pcohptr = new Cubical_complex_persistence_interface(self.thisptr, True) + cdef vector[pair[int, pair[pair[double, int], pair[double, int]]]] persistence_result + if self.pcohptr != NULL: + persistence_result = self.pcohptr.get_persistence_cubical_generators(homology_coeff_field, min_persistence) + return persistence_result + def betti_numbers(self): """This function returns the Betti numbers of the complex. diff --git a/src/python/gudhi/simplex_tree.pxd b/src/python/gudhi/simplex_tree.pxd index 1066d44b..9e52a8aa 100644 --- a/src/python/gudhi/simplex_tree.pxd +++ b/src/python/gudhi/simplex_tree.pxd @@ -48,6 +48,7 @@ cdef extern from "Persistent_cohomology_interface.h" namespace "Gudhi": cdef cppclass Simplex_tree_persistence_interface "Gudhi::Persistent_cohomology_interface>": Simplex_tree_persistence_interface(Simplex_tree_interface_full_featured * st, bool persistence_dim_max) vector[pair[int, pair[double, double]]] get_persistence(int homology_coeff_field, double min_persistence) + vector[pair[int, pair[pair[double, vector[int]], pair[double, vector[int]]]]] get_persistence_generators(int homology_coeff_field, double min_persistence) vector[int] betti_numbers() vector[int] persistent_betti_numbers(double from_value, double to_value) vector[pair[double,double]] intervals_in_dimension(int dimension) diff --git a/src/python/gudhi/simplex_tree.pyx b/src/python/gudhi/simplex_tree.pyx index b18627c4..8cc58f8f 100644 --- a/src/python/gudhi/simplex_tree.pyx +++ b/src/python/gudhi/simplex_tree.pyx @@ -412,6 +412,32 @@ cdef class SimplexTree: persistence_result = self.pcohptr.get_persistence(homology_coeff_field, min_persistence) return persistence_result + def persistence_generators(self, homology_coeff_field=11, min_persistence=0, persistence_dim_max = False): + """This function returns the persistence of the simplicial complex. + + :param homology_coeff_field: The homology coefficient field. Must be a + prime number. Default value is 11. + :type homology_coeff_field: int. + :param min_persistence: The minimum persistence value to take into + account (strictly greater than min_persistence). Default value is + 0.0. + Sets min_persistence to -1.0 to see all values. + :type min_persistence: float. + :param persistence_dim_max: If true, the persistent homology for the + maximal dimension in the complex is computed. If false, it is + ignored. Default is false. + :type persistence_dim_max: bool + :returns: The persistence of the simplicial complex, together with the corresponding generators, i.e., the positive and negative simplices. + :rtype: list of pairs(dimension, pair(positive_simplex, negative_simplex)) + """ + if self.pcohptr != NULL: + del self.pcohptr + self.pcohptr = new Simplex_tree_persistence_interface(self.get_ptr(), persistence_dim_max) + cdef vector[pair[int, pair[pair[double, vector[int]], pair[double, vector[int]]]]] persistence_result + if self.pcohptr != NULL: + persistence_result = self.pcohptr.get_persistence_generators(homology_coeff_field, min_persistence) + return persistence_result + def betti_numbers(self): """This function returns the Betti numbers of the simplicial complex. @@ -508,7 +534,7 @@ cdef class SimplexTree: """ if self.pcohptr != NULL: if persistence_file != '': - self.pcohptr.write_output_diagram(persistence_file.encode('utf-8')) + self.pcohptr.write_output_diagram(str.encode(persistence_file)) else: print("persistence_file must be specified") else: diff --git a/src/python/include/Persistent_cohomology_interface.h b/src/python/include/Persistent_cohomology_interface.h index 8c79e6f3..774eb56a 100644 --- a/src/python/include/Persistent_cohomology_interface.h +++ b/src/python/include/Persistent_cohomology_interface.h @@ -73,6 +73,82 @@ persistent_cohomology::Persistent_cohomology>, std::pair>>>> get_persistence_generators(int homology_coeff_field, + double min_persistence) { + persistent_cohomology::Persistent_cohomology::init_coefficients(homology_coeff_field); + persistent_cohomology::Persistent_cohomology::compute_persistent_cohomology(min_persistence); + + // Custom sort and output persistence + cmp_intervals_by_dim_then_length cmp(stptr_); + auto persistent_pairs = persistent_cohomology::Persistent_cohomology::get_persistent_pairs(); + std::sort(std::begin(persistent_pairs), std::end(persistent_pairs), cmp); + + std::vector>, std::pair>>>> persistence; + for (auto pair : persistent_pairs) { + std::vector splx0, splx1; + for (auto vertex : stptr_->simplex_vertex_range(get<0>(pair))){splx0.push_back(vertex);} + if (isfinite(stptr_->filtration(get<1>(pair)))){ for (auto vertex : stptr_->simplex_vertex_range(get<1>(pair))){splx1.push_back(vertex);}} + persistence.push_back(std::make_pair(stptr_->dimension(get<0>(pair)), std::make_pair(std::make_pair(stptr_->filtration(get<0>(pair)), splx0), std::make_pair(stptr_->filtration(get<1>(pair)), splx1)))); + } + return persistence; + } + + void top_dimensional_cofaces(std::vector & cofaces, int splx){ + if (stptr_->dimension(stptr_->simplex(splx)) == stptr_->dimension()){cofaces.push_back(stptr_->simplex(splx));} + else{ for (auto v : stptr_->coboundary_simplex_range(stptr_->simplex(splx))){top_dimensional_cofaces(cofaces, stptr_->key(v));} } + } + + std::vector, std::pair>>> get_persistence_cubical_generators(int homology_coeff_field, + double min_persistence) { + + // Gather all top-dimensional cells and store their simplex handles + std::vector max_splx; for (auto splx : stptr_->filtration_simplex_range()){ if (stptr_->dimension(splx) == stptr_->dimension()) max_splx.push_back(splx); } + // Sort these simplex handles and compute the ordering function + // This function allows to go directly from the simplex handle to the position of the corresponding top-dimensional cell in the input data + std::map order; std::sort(max_splx.begin(), max_splx.end()); for (int i = 0; i < max_splx.size(); i++) order.insert(std::make_pair(max_splx[i], i)); + + persistent_cohomology::Persistent_cohomology::init_coefficients(homology_coeff_field); + persistent_cohomology::Persistent_cohomology::compute_persistent_cohomology(min_persistence); + + // Custom sort and output persistence + cmp_intervals_by_dim_then_length cmp(stptr_); + auto persistent_pairs = persistent_cohomology::Persistent_cohomology::get_persistent_pairs(); + std::sort(std::begin(persistent_pairs), std::end(persistent_pairs), cmp); + + std::vector, std::pair>>> persistence; + for (auto pair : persistent_pairs) { + + int splx0, splx1; + + double f0 = stptr_->filtration(get<0>(pair)); + // Recursively get the top-dimensional cells / cofaces associated to the persistence generator + std::vector faces0; top_dimensional_cofaces(faces0, stptr_->key(get<0>(pair))); + // Find the top-dimensional cell / coface with the same filtration value + int cf; for (int i = 0; i < faces0.size(); i++){ if (stptr_->filtration(faces0[i]) == f0){cf = i; break;}} + // Retrieve the index of the corresponding top-dimensional cell in the input data + splx0 = order[faces0[cf]]; + + if (isfinite(stptr_->filtration(get<1>(pair)))){ + double f1 = stptr_->filtration(get<1>(pair)); + // Recursively get the top-dimensional cells / cofaces associated to the persistence generator + std::vector faces1; top_dimensional_cofaces(faces1, stptr_->key(get<1>(pair))); + // Find the top-dimensional cell / coface with the same filtration value + int cf; for (int i = 0; i < faces0.size(); i++){ if (stptr_->filtration(faces0[i]) == f0){cf = i; break;}} + // Retrieve the index of the corresponding top-dimensional cell in the input data + splx1 = order[faces1[cf]]; + } + + persistence.push_back(std::make_pair(stptr_->dimension(get<0>(pair)), std::make_pair(std::make_pair(stptr_->filtration(get<0>(pair)), splx0), std::make_pair(stptr_->filtration(get<1>(pair)), splx1)))); + } + return persistence; + } + std::vector, std::vector>> persistence_pairs() { auto pairs = persistent_cohomology::Persistent_cohomology::get_persistent_pairs(); -- cgit v1.2.3 From 5694670b3e20f0cb935a751614ef12b6009a60c0 Mon Sep 17 00:00:00 2001 From: mathieu Date: Thu, 16 Jan 2020 15:58:15 -0500 Subject: fix to detect infinite persistence --- src/python/include/Persistent_cohomology_interface.h | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/src/python/include/Persistent_cohomology_interface.h b/src/python/include/Persistent_cohomology_interface.h index 774eb56a..acc32b21 100644 --- a/src/python/include/Persistent_cohomology_interface.h +++ b/src/python/include/Persistent_cohomology_interface.h @@ -124,16 +124,15 @@ persistent_cohomology::Persistent_cohomology, std::pair>>> persistence; for (auto pair : persistent_pairs) { - int splx0, splx1; - double f0 = stptr_->filtration(get<0>(pair)); // Recursively get the top-dimensional cells / cofaces associated to the persistence generator std::vector faces0; top_dimensional_cofaces(faces0, stptr_->key(get<0>(pair))); // Find the top-dimensional cell / coface with the same filtration value int cf; for (int i = 0; i < faces0.size(); i++){ if (stptr_->filtration(faces0[i]) == f0){cf = i; break;}} // Retrieve the index of the corresponding top-dimensional cell in the input data - splx0 = order[faces0[cf]]; + int splx0 = order[faces0[cf]]; + int splx1 = -1; if (isfinite(stptr_->filtration(get<1>(pair)))){ double f1 = stptr_->filtration(get<1>(pair)); // Recursively get the top-dimensional cells / cofaces associated to the persistence generator -- cgit v1.2.3 From c89df405c77bb7270db1a7d8f0e49bc22c1b010d Mon Sep 17 00:00:00 2001 From: mathieu Date: Thu, 16 Jan 2020 16:17:38 -0500 Subject: fix typo + coboundary error --- src/Bitmap_cubical_complex/include/gudhi/Bitmap_cubical_complex.h | 1 + src/python/include/Persistent_cohomology_interface.h | 6 +++--- 2 files changed, 4 insertions(+), 3 deletions(-) diff --git a/src/Bitmap_cubical_complex/include/gudhi/Bitmap_cubical_complex.h b/src/Bitmap_cubical_complex/include/gudhi/Bitmap_cubical_complex.h index 37514dee..bf09532e 100644 --- a/src/Bitmap_cubical_complex/include/gudhi/Bitmap_cubical_complex.h +++ b/src/Bitmap_cubical_complex/include/gudhi/Bitmap_cubical_complex.h @@ -340,6 +340,7 @@ class Bitmap_cubical_complex : public T { * that provides ranges for the Boundary_simplex_iterator. **/ Boundary_simplex_range boundary_simplex_range(Simplex_handle sh) { return this->get_boundary_of_a_cell(sh); } + Boundary_simplex_range coboundary_simplex_range(Simplex_handle sh) { return this->get_coboundary_of_a_cell(sh); } /** * filtration_simplex_range creates an object of a Filtration_simplex_range class diff --git a/src/python/include/Persistent_cohomology_interface.h b/src/python/include/Persistent_cohomology_interface.h index acc32b21..0ad14477 100644 --- a/src/python/include/Persistent_cohomology_interface.h +++ b/src/python/include/Persistent_cohomology_interface.h @@ -108,7 +108,7 @@ persistent_cohomology::Persistent_cohomology max_splx; for (auto splx : stptr_->filtration_simplex_range()){ if (stptr_->dimension(splx) == stptr_->dimension()) max_splx.push_back(splx); } // Sort these simplex handles and compute the ordering function // This function allows to go directly from the simplex handle to the position of the corresponding top-dimensional cell in the input data - std::map order; std::sort(max_splx.begin(), max_splx.end()); for (int i = 0; i < max_splx.size(); i++) order.insert(std::make_pair(max_splx[i], i)); + std::map order; std::sort(max_splx.begin(), max_splx.end()); for (unsigned int i = 0; i < max_splx.size(); i++) order.insert(std::make_pair(max_splx[i], i)); persistent_cohomology::Persistent_cohomology::init_coefficients(homology_coeff_field); @@ -128,7 +128,7 @@ persistent_cohomology::Persistent_cohomology faces0; top_dimensional_cofaces(faces0, stptr_->key(get<0>(pair))); // Find the top-dimensional cell / coface with the same filtration value - int cf; for (int i = 0; i < faces0.size(); i++){ if (stptr_->filtration(faces0[i]) == f0){cf = i; break;}} + int cf; for (unsigned int i = 0; i < faces0.size(); i++){if (stptr_->filtration(faces0[i]) == f0){cf = i; break;}} // Retrieve the index of the corresponding top-dimensional cell in the input data int splx0 = order[faces0[cf]]; @@ -138,7 +138,7 @@ persistent_cohomology::Persistent_cohomology faces1; top_dimensional_cofaces(faces1, stptr_->key(get<1>(pair))); // Find the top-dimensional cell / coface with the same filtration value - int cf; for (int i = 0; i < faces0.size(); i++){ if (stptr_->filtration(faces0[i]) == f0){cf = i; break;}} + int cf; for (unsigned int i = 0; i < faces1.size(); i++){if (stptr_->filtration(faces1[i]) == f1){cf = i; break;}} // Retrieve the index of the corresponding top-dimensional cell in the input data splx1 = order[faces1[cf]]; } -- cgit v1.2.3 From 19562b27182dcfa6ed262002c2bc8934382f5a53 Mon Sep 17 00:00:00 2001 From: MathieuCarriere Date: Thu, 16 Jan 2020 21:26:02 -0500 Subject: get rid of persistence_generators and modified name for cubical complex --- src/python/gudhi/cubical_complex.pyx | 8 +++--- src/python/gudhi/simplex_tree.pxd | 1 - src/python/gudhi/simplex_tree.pyx | 26 ------------------- .../include/Persistent_cohomology_interface.h | 29 +++------------------- 4 files changed, 8 insertions(+), 56 deletions(-) diff --git a/src/python/gudhi/cubical_complex.pyx b/src/python/gudhi/cubical_complex.pyx index 5562e8a7..8ea31486 100644 --- a/src/python/gudhi/cubical_complex.pyx +++ b/src/python/gudhi/cubical_complex.pyx @@ -31,7 +31,7 @@ cdef extern from "Persistent_cohomology_interface.h" namespace "Gudhi": cdef cppclass Cubical_complex_persistence_interface "Gudhi::Persistent_cohomology_interface>": Cubical_complex_persistence_interface(Bitmap_cubical_complex_base_interface * st, bool persistence_dim_max) vector[pair[int, pair[double, double]]] get_persistence(int homology_coeff_field, double min_persistence) - vector[pair[int, pair[pair[double, int], pair[double, int]]]] get_persistence_cubical_generators(int homology_coeff_field, double min_persistence) + vector[pair[int, pair[pair[double, int], pair[double, int]]]] get_cofaces_of_cubical_persistence_pairs(int homology_coeff_field, double min_persistence) vector[int] betti_numbers() vector[int] persistent_betti_numbers(double from_value, double to_value) vector[pair[double,double]] intervals_in_dimension(int dimension) @@ -146,7 +146,7 @@ cdef class CubicalComplex: persistence_result = self.pcohptr.get_persistence(homology_coeff_field, min_persistence) return persistence_result - def persistence_generators(self, homology_coeff_field=11, min_persistence=0, persistence_dim_max = False): + def cofaces_of_cubical_persistence_pairs(self, homology_coeff_field=11, min_persistence=0, persistence_dim_max = False): """This function returns the persistence of the simplicial complex. :param homology_coeff_field: The homology coefficient field. Must be a @@ -161,7 +161,7 @@ cdef class CubicalComplex: maximal dimension in the complex is computed. If false, it is ignored. Default is false. :type persistence_dim_max: bool - :returns: The persistence of the simplicial complex, together with the corresponding generators, i.e., the positive and negative top-dimensional cells. + :returns: The persistence of the simplicial complex, together with the cofaces of the corresponding generators, i.e., the top-dimensional cells/cofaces of the positive and negative simplices. :rtype: list of pairs(dimension, pair(index of positive top-dimensional cell, index of negative top-dimensional cell)) """ if self.pcohptr != NULL: @@ -169,7 +169,7 @@ cdef class CubicalComplex: self.pcohptr = new Cubical_complex_persistence_interface(self.thisptr, True) cdef vector[pair[int, pair[pair[double, int], pair[double, int]]]] persistence_result if self.pcohptr != NULL: - persistence_result = self.pcohptr.get_persistence_cubical_generators(homology_coeff_field, min_persistence) + persistence_result = self.pcohptr.get_cofaces_of_cubical_persistence_pairs(homology_coeff_field, min_persistence) return persistence_result def betti_numbers(self): diff --git a/src/python/gudhi/simplex_tree.pxd b/src/python/gudhi/simplex_tree.pxd index 9e52a8aa..1066d44b 100644 --- a/src/python/gudhi/simplex_tree.pxd +++ b/src/python/gudhi/simplex_tree.pxd @@ -48,7 +48,6 @@ cdef extern from "Persistent_cohomology_interface.h" namespace "Gudhi": cdef cppclass Simplex_tree_persistence_interface "Gudhi::Persistent_cohomology_interface>": Simplex_tree_persistence_interface(Simplex_tree_interface_full_featured * st, bool persistence_dim_max) vector[pair[int, pair[double, double]]] get_persistence(int homology_coeff_field, double min_persistence) - vector[pair[int, pair[pair[double, vector[int]], pair[double, vector[int]]]]] get_persistence_generators(int homology_coeff_field, double min_persistence) vector[int] betti_numbers() vector[int] persistent_betti_numbers(double from_value, double to_value) vector[pair[double,double]] intervals_in_dimension(int dimension) diff --git a/src/python/gudhi/simplex_tree.pyx b/src/python/gudhi/simplex_tree.pyx index 8cc58f8f..85d25492 100644 --- a/src/python/gudhi/simplex_tree.pyx +++ b/src/python/gudhi/simplex_tree.pyx @@ -412,32 +412,6 @@ cdef class SimplexTree: persistence_result = self.pcohptr.get_persistence(homology_coeff_field, min_persistence) return persistence_result - def persistence_generators(self, homology_coeff_field=11, min_persistence=0, persistence_dim_max = False): - """This function returns the persistence of the simplicial complex. - - :param homology_coeff_field: The homology coefficient field. Must be a - prime number. Default value is 11. - :type homology_coeff_field: int. - :param min_persistence: The minimum persistence value to take into - account (strictly greater than min_persistence). Default value is - 0.0. - Sets min_persistence to -1.0 to see all values. - :type min_persistence: float. - :param persistence_dim_max: If true, the persistent homology for the - maximal dimension in the complex is computed. If false, it is - ignored. Default is false. - :type persistence_dim_max: bool - :returns: The persistence of the simplicial complex, together with the corresponding generators, i.e., the positive and negative simplices. - :rtype: list of pairs(dimension, pair(positive_simplex, negative_simplex)) - """ - if self.pcohptr != NULL: - del self.pcohptr - self.pcohptr = new Simplex_tree_persistence_interface(self.get_ptr(), persistence_dim_max) - cdef vector[pair[int, pair[pair[double, vector[int]], pair[double, vector[int]]]]] persistence_result - if self.pcohptr != NULL: - persistence_result = self.pcohptr.get_persistence_generators(homology_coeff_field, min_persistence) - return persistence_result - def betti_numbers(self): """This function returns the Betti numbers of the simplicial complex. diff --git a/src/python/include/Persistent_cohomology_interface.h b/src/python/include/Persistent_cohomology_interface.h index 0ad14477..1a1e716e 100644 --- a/src/python/include/Persistent_cohomology_interface.h +++ b/src/python/include/Persistent_cohomology_interface.h @@ -73,36 +73,15 @@ persistent_cohomology::Persistent_cohomology>, std::pair>>>> get_persistence_generators(int homology_coeff_field, - double min_persistence) { - persistent_cohomology::Persistent_cohomology::init_coefficients(homology_coeff_field); - persistent_cohomology::Persistent_cohomology::compute_persistent_cohomology(min_persistence); - - // Custom sort and output persistence - cmp_intervals_by_dim_then_length cmp(stptr_); - auto persistent_pairs = persistent_cohomology::Persistent_cohomology::get_persistent_pairs(); - std::sort(std::begin(persistent_pairs), std::end(persistent_pairs), cmp); - - std::vector>, std::pair>>>> persistence; - for (auto pair : persistent_pairs) { - std::vector splx0, splx1; - for (auto vertex : stptr_->simplex_vertex_range(get<0>(pair))){splx0.push_back(vertex);} - if (isfinite(stptr_->filtration(get<1>(pair)))){ for (auto vertex : stptr_->simplex_vertex_range(get<1>(pair))){splx1.push_back(vertex);}} - persistence.push_back(std::make_pair(stptr_->dimension(get<0>(pair)), std::make_pair(std::make_pair(stptr_->filtration(get<0>(pair)), splx0), std::make_pair(stptr_->filtration(get<1>(pair)), splx1)))); - } - return persistence; - } - void top_dimensional_cofaces(std::vector & cofaces, int splx){ if (stptr_->dimension(stptr_->simplex(splx)) == stptr_->dimension()){cofaces.push_back(stptr_->simplex(splx));} else{ for (auto v : stptr_->coboundary_simplex_range(stptr_->simplex(splx))){top_dimensional_cofaces(cofaces, stptr_->key(v));} } } - std::vector, std::pair>>> get_persistence_cubical_generators(int homology_coeff_field, - double min_persistence) { + std::vector, std::pair>>> get_cofaces_of_cubical_persistence_pairs(int homology_coeff_field, + double min_persistence) { + + // Warning: this function is meant to be used with CubicalComplex only!! // Gather all top-dimensional cells and store their simplex handles std::vector max_splx; for (auto splx : stptr_->filtration_simplex_range()){ if (stptr_->dimension(splx) == stptr_->dimension()) max_splx.push_back(splx); } -- cgit v1.2.3 From 62e92e64bd97ec0bd26c31e071228f7d7c78b0e5 Mon Sep 17 00:00:00 2001 From: MathieuCarriere Date: Thu, 16 Jan 2020 21:29:55 -0500 Subject: fixed typo for CubicalComplex --- src/python/gudhi/cubical_complex.pyx | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/python/gudhi/cubical_complex.pyx b/src/python/gudhi/cubical_complex.pyx index 8ea31486..bd432834 100644 --- a/src/python/gudhi/cubical_complex.pyx +++ b/src/python/gudhi/cubical_complex.pyx @@ -86,7 +86,7 @@ cdef class CubicalComplex: elif ((dimensions is None) and (top_dimensional_cells is None) and (perseus_file != '')): if os.path.isfile(perseus_file): - self.thisptr = new Bitmap_cubical_complex_base_interface(str.encode(perseus_file)) + self.thisptr = new Bitmap_cubical_complex_base_interface(perseus_file.encode('utf-8')) else: print("file " + perseus_file + " not found.") else: -- cgit v1.2.3 From a145c7168fdb3f4205cb68870f06fc5cb8e08dea Mon Sep 17 00:00:00 2001 From: MathieuCarriere Date: Fri, 31 Jan 2020 14:49:59 -0500 Subject: factorization of distance and kernel computations --- src/python/gudhi/representations/kernel_methods.py | 131 +++++++---- src/python/gudhi/representations/metrics.py | 247 +++++++++------------ 2 files changed, 193 insertions(+), 185 deletions(-) diff --git a/src/python/gudhi/representations/kernel_methods.py b/src/python/gudhi/representations/kernel_methods.py index bfc83aff..bbbb7c31 100644 --- a/src/python/gudhi/representations/kernel_methods.py +++ b/src/python/gudhi/representations/kernel_methods.py @@ -9,13 +9,83 @@ import numpy as np from sklearn.base import BaseEstimator, TransformerMixin -from sklearn.metrics import pairwise_distances -from .metrics import SlicedWassersteinDistance, PersistenceFisherDistance +from sklearn.metrics import pairwise_distances, pairwise_kernels +from .metrics import SlicedWassersteinDistance, PersistenceFisherDistance, sklearn_wrapper, pairwise_persistence_diagram_distances, sliced_wasserstein_distance, persistence_fisher_distance +from .preprocessing import Padding ############################################# # Kernel methods ############################ ############################################# +def persistence_weighted_gaussian_kernel(D1, D2, weight=lambda x: 1, kernel_approx=None, bandwidth=1.): + """ + This is a function for computing the persistence weighted Gaussian kernel value from two persistence diagrams. The persistence weighted Gaussian kernel is computed by convolving the persistence diagram points with weighted Gaussian kernels. See http://proceedings.mlr.press/v48/kusano16.html for more details. + :param D1: (n x 2) numpy.array encoding the (finite points of the) first diagram. Must not contain essential points (i.e. with infinite coordinate). + :param D2: (m x 2) numpy.array encoding the second diagram. + :param bandwidth: bandwidth of the Gaussian kernel with which persistence diagrams will be convolved + :param weight: weight function for the persistence diagram points. This function must be defined on 2D points, ie lists or numpy arrays of the form [p_x,p_y]. + :param kernel_approx: kernel approximation class used to speed up computation. Common kernel approximations classes can be found in the scikit-learn library (such as RBFSampler for instance). + :returns: the persistence weighted Gaussian kernel value between persistence diagrams. + :rtype: float + """ + ws1 = np.array([weight(D1[j,:]) for j in range(len(D1))]) + ws2 = np.array([weight(D2[j,:]) for j in range(len(D2))]) + if kernel_approx is not None: + approx1 = np.sum(np.multiply(ws1[:,np.newaxis], kernel_approx.transform(D1)), axis=0) + approx2 = np.sum(np.multiply(ws2[:,np.newaxis], kernel_approx.transform(D2)), axis=0) + return (1./(np.sqrt(2*np.pi)*bandwidth)) * np.matmul(approx1, approx2.T) + else: + W = np.matmul(ws1[:,np.newaxis], ws2[np.newaxis,:]) + E = (1./(np.sqrt(2*np.pi)*bandwidth)) * np.exp(-np.square(pairwise_distances(D1,D2))/(2*bandwidth*bandwidth)) + return np.sum(np.multiply(W, E)) + +def persistence_scale_space_kernel(D1, D2, kernel_approx=None, bandwidth=1.): + """ + This is a function for computing the persistence scale space kernel value from two persistence diagrams. The persistence scale space kernel is computed by adding the symmetric to the diagonal of each point in each persistence diagram, with negative weight, and then convolving the points with a Gaussian kernel. See https://www.cv-foundation.org/openaccess/content_cvpr_2015/papers/Reininghaus_A_Stable_Multi-Scale_2015_CVPR_paper.pdf for more details. + :param D1: (n x 2) numpy.array encoding the (finite points of the) first diagram. Must not contain essential points (i.e. with infinite coordinate). + :param D2: (m x 2) numpy.array encoding the second diagram. + :param bandwidth: bandwidth of the Gaussian kernel with which persistence diagrams will be convolved + :param kernel_approx: kernel approximation class used to speed up computation. Common kernel approximations classes can be found in the scikit-learn library (such as RBFSampler for instance). + :returns: the persistence scale space kernel value between persistence diagrams. + :rtype: float + """ + DD1 = np.concatenate([D1, D1[:,[1,0]]], axis=0) + DD2 = np.concatenate([D2, D2[:,[1,0]]], axis=0) + weight_pss = lambda x: 1 if x[1] >= x[0] else -1 + return 0.5 * persistence_weighted_gaussian_kernel(DD1, DD2, weight=weight_pss, kernel_approx=kernel_approx, bandwidth=bandwidth) + +def pairwise_persistence_diagram_kernels(X, Y=None, metric="sliced_wasserstein", **kwargs): + """ + This function computes the kernel matrix between two lists of persistence diagrams given as numpy arrays of shape (nx2). + :param X: first list of persistence diagrams. + :param Y: second list of persistence diagrams (optional). If None, pairwise kernel values are computed from the first list only. + :param metric: kernel to use. It can be either a string ("sliced_wasserstein", "persistence_scale_space", "persistence_weighted_gaussian", "persistence_fisher") or a function taking two numpy arrays of shape (nx2) and (mx2) as inputs. + :returns: kernel matrix, i.e., numpy array of shape (num diagrams 1 x num diagrams 2) + :rtype: float + """ + if Y is None: + YY = None + pX = Padding(use=True).fit_transform(X) + diag_len = len(pX[0]) + XX = np.reshape(np.vstack(pX), [-1, diag_len*3]) + else: + nX, nY = len(X), len(Y) + pD = Padding(use=True).fit_transform(X + Y) + diag_len = len(pD[0]) + XX = np.reshape(np.vstack(pD[:nX]), [-1, diag_len*3]) + YY = np.reshape(np.vstack(pD[nX:]), [-1, diag_len*3]) + + if metric == "sliced_wasserstein": + return np.exp(-pairwise_persistence_diagram_distances(X, Y, metric="sliced_wasserstein", num_directions=kwargs["num_directions"]) / kwargs["bandwidth"]) + elif metric == "persistence_fisher": + return np.exp(-pairwise_persistence_diagram_distances(X, Y, metric="persistence_fisher", kernel_approx=kwargs["kernel_approx"], bandwidth=kwargs["bandwidth"]) / kwargs["bandwidth_fisher"]) + elif metric == "persistence_scale_space": + return pairwise_kernels(XX, YY, metric=sklearn_wrapper(persistence_scale_space_kernel, **kwargs)) + elif metric == "persistence_weighted_gaussian": + return pairwise_kernels(XX, YY, metric=sklearn_wrapper(persistence_weighted_gaussian_kernel, **kwargs)) + else: + return pairwise_kernels(XX, YY, metric=sklearn_wrapper(metric, **kwargs)) + class SlicedWassersteinKernel(BaseEstimator, TransformerMixin): """ This is a class for computing the sliced Wasserstein kernel matrix from a list of persistence diagrams. The sliced Wasserstein kernel is computed by exponentiating the corresponding sliced Wasserstein distance with a Gaussian kernel. See http://proceedings.mlr.press/v70/carriere17a.html for more details. @@ -29,7 +99,7 @@ class SlicedWassersteinKernel(BaseEstimator, TransformerMixin): num_directions (int): number of lines evenly sampled from [-pi/2,pi/2] in order to approximate and speed up the kernel computation (default 10). """ self.bandwidth = bandwidth - self.sw_ = SlicedWassersteinDistance(num_directions=num_directions) + self.num_directions = num_directions def fit(self, X, y=None): """ @@ -39,7 +109,7 @@ class SlicedWassersteinKernel(BaseEstimator, TransformerMixin): X (list of n x 2 numpy arrays): input persistence diagrams. y (n x 1 array): persistence diagram labels (unused). """ - self.sw_.fit(X, y) + self.diagrams_ = X return self def transform(self, X): @@ -52,7 +122,7 @@ class SlicedWassersteinKernel(BaseEstimator, TransformerMixin): Returns: numpy array of shape (number of diagrams in **diagrams**) x (number of diagrams in X): matrix of pairwise sliced Wasserstein kernel values. """ - return np.exp(-self.sw_.transform(X)/self.bandwidth) + return pairwise_persistence_diagram_kernels(X, self.diagrams_, metric="sliced_wasserstein", bandwidth=self.bandwidth, num_directions=self.num_directions) class PersistenceWeightedGaussianKernel(BaseEstimator, TransformerMixin): """ @@ -78,10 +148,7 @@ class PersistenceWeightedGaussianKernel(BaseEstimator, TransformerMixin): X (list of n x 2 numpy arrays): input persistence diagrams. y (n x 1 array): persistence diagram labels (unused). """ - self.diagrams_ = list(X) - self.ws_ = [ np.array([self.weight(self.diagrams_[i][j,:]) for j in range(self.diagrams_[i].shape[0])]) for i in range(len(self.diagrams_)) ] - if self.kernel_approx is not None: - self.approx_ = np.concatenate([np.sum(np.multiply(self.ws_[i][:,np.newaxis], self.kernel_approx.transform(self.diagrams_[i])), axis=0)[np.newaxis,:] for i in range(len(self.diagrams_))]) + self.diagrams_ = X return self def transform(self, X): @@ -94,31 +161,7 @@ class PersistenceWeightedGaussianKernel(BaseEstimator, TransformerMixin): Returns: numpy array of shape (number of diagrams in **diagrams**) x (number of diagrams in X): matrix of pairwise persistence weighted Gaussian kernel values. """ - Xp = list(X) - Xfit = np.zeros((len(Xp), len(self.diagrams_))) - if len(self.diagrams_) == len(Xp) and np.all([np.array_equal(self.diagrams_[i], Xp[i]) for i in range(len(Xp))]): - if self.kernel_approx is not None: - Xfit = (1./(np.sqrt(2*np.pi)*self.bandwidth)) * np.matmul(self.approx_, self.approx_.T) - else: - for i in range(len(self.diagrams_)): - for j in range(i+1, len(self.diagrams_)): - W = np.matmul(self.ws_[i][:,np.newaxis], self.ws_[j][np.newaxis,:]) - E = (1./(np.sqrt(2*np.pi)*self.bandwidth)) * np.exp(-np.square(pairwise_distances(self.diagrams_[i], self.diagrams_[j]))/(2*np.square(self.bandwidth))) - Xfit[i,j] = np.sum(np.multiply(W, E)) - Xfit[j,i] = Xfit[i,j] - else: - ws = [ np.array([self.weight(Xp[i][j,:]) for j in range(Xp[i].shape[0])]) for i in range(len(Xp)) ] - if self.kernel_approx is not None: - approx = np.concatenate([np.sum(np.multiply(ws[i][:,np.newaxis], self.kernel_approx.transform(Xp[i])), axis=0)[np.newaxis,:] for i in range(len(Xp))]) - Xfit = (1./(np.sqrt(2*np.pi)*self.bandwidth)) * np.matmul(approx, self.approx_.T) - else: - for i in range(len(Xp)): - for j in range(len(self.diagrams_)): - W = np.matmul(ws[i][:,np.newaxis], self.ws_[j][np.newaxis,:]) - E = (1./(np.sqrt(2*np.pi)*self.bandwidth)) * np.exp(-np.square(pairwise_distances(Xp[i], self.diagrams_[j]))/(2*np.square(self.bandwidth))) - Xfit[i,j] = np.sum(np.multiply(W, E)) - - return Xfit + return pairwise_persistence_diagram_kernels(X, self.diagrams_, metric="persistence_weighted_gaussian", bandwidth=self.bandwidth, weight=self.weight, kernel_approx=self.kernel_approx) class PersistenceScaleSpaceKernel(BaseEstimator, TransformerMixin): """ @@ -132,7 +175,7 @@ class PersistenceScaleSpaceKernel(BaseEstimator, TransformerMixin): bandwidth (double): bandwidth of the Gaussian kernel with which persistence diagrams will be convolved (default 1.) kernel_approx (class): kernel approximation class used to speed up computation (default None). Common kernel approximations classes can be found in the scikit-learn library (such as RBFSampler for instance). """ - self.pwg_ = PersistenceWeightedGaussianKernel(bandwidth=bandwidth, weight=lambda x: 1 if x[1] >= x[0] else -1, kernel_approx=kernel_approx) + self.bandwidth, self.kernel_approx = bandwidth, kernel_approx def fit(self, X, y=None): """ @@ -142,11 +185,7 @@ class PersistenceScaleSpaceKernel(BaseEstimator, TransformerMixin): X (list of n x 2 numpy arrays): input persistence diagrams. y (n x 1 array): persistence diagram labels (unused). """ - self.diagrams_ = list(X) - for i in range(len(self.diagrams_)): - op_D = self.diagrams_[i][:,[1,0]] - self.diagrams_[i] = np.concatenate([self.diagrams_[i], op_D], axis=0) - self.pwg_.fit(X) + self.diagrams_ = X return self def transform(self, X): @@ -159,11 +198,7 @@ class PersistenceScaleSpaceKernel(BaseEstimator, TransformerMixin): Returns: numpy array of shape (number of diagrams in **diagrams**) x (number of diagrams in X): matrix of pairwise persistence scale space kernel values. """ - Xp = list(X) - for i in range(len(Xp)): - op_X = Xp[i][:,[1,0]] - Xp[i] = np.concatenate([Xp[i], op_X], axis=0) - return self.pwg_.transform(Xp) + return pairwise_persistence_diagram_kernels(X, self.diagrams_, metric="persistence_scale_space", bandwidth=self.bandwidth, kernel_approx=self.kernel_approx) class PersistenceFisherKernel(BaseEstimator, TransformerMixin): """ @@ -179,7 +214,7 @@ class PersistenceFisherKernel(BaseEstimator, TransformerMixin): kernel_approx (class): kernel approximation class used to speed up computation (default None). Common kernel approximations classes can be found in the scikit-learn library (such as RBFSampler for instance). """ self.bandwidth = bandwidth - self.pf_ = PersistenceFisherDistance(bandwidth=bandwidth_fisher, kernel_approx=kernel_approx) + self.bandwidth_fisher, self.kernel_approx = bandwidth_fisher, kernel_approx def fit(self, X, y=None): """ @@ -189,7 +224,7 @@ class PersistenceFisherKernel(BaseEstimator, TransformerMixin): X (list of n x 2 numpy arrays): input persistence diagrams. y (n x 1 array): persistence diagram labels (unused). """ - self.pf_.fit(X, y) + self.diagrams_ = X return self def transform(self, X): @@ -202,5 +237,5 @@ class PersistenceFisherKernel(BaseEstimator, TransformerMixin): Returns: numpy array of shape (number of diagrams in **diagrams**) x (number of diagrams in X): matrix of pairwise persistence Fisher kernel values. """ - return np.exp(-self.pf_.transform(X)/self.bandwidth) + return pairwise_persistence_diagram_kernels(X, self.diagrams_, metric="persistence_fisher", bandwidth=self.bandwidth, bandwidth_fisher=self.bandwidth_fisher, kernel_approx=self.kernel_approx) diff --git a/src/python/gudhi/representations/metrics.py b/src/python/gudhi/representations/metrics.py index 290c1d07..cc788994 100644 --- a/src/python/gudhi/representations/metrics.py +++ b/src/python/gudhi/representations/metrics.py @@ -11,6 +11,8 @@ import numpy as np from sklearn.base import BaseEstimator, TransformerMixin from sklearn.metrics import pairwise_distances from gudhi.wasserstein import wasserstein_distance +from .preprocessing import Padding + try: from .. import bottleneck_distance USE_GUDHI = True @@ -22,6 +24,108 @@ except ImportError: # Metrics ################################### ############################################# +def sliced_wasserstein_distance(D1, D2, num_directions): + """ + This is a function for computing the sliced Wasserstein distance from two persistence diagrams. The Sliced Wasserstein distance is computed by projecting the persistence diagrams onto lines, comparing the projections with the 1-norm, and finally integrating over all possible lines. See http://proceedings.mlr.press/v70/carriere17a.html for more details. + :param D1: (n x 2) numpy.array encoding the (finite points of the) first diagram. Must not contain essential points (i.e. with infinite coordinate). + :param D2: (m x 2) numpy.array encoding the second diagram. + :param num_directions: number of lines evenly sampled from [-pi/2,pi/2] in order to approximate and speed up the distance computation. + :returns: the sliced Wasserstein distance between persistence diagrams. + :rtype: float + """ + thetas = np.linspace(-np.pi/2, np.pi/2, num=num_directions+1)[np.newaxis,:-1] + lines = np.concatenate([np.cos(thetas), np.sin(thetas)], axis=0) + approx1 = np.matmul(D1, lines) + diag_proj1 = (1./2) * np.ones((2,2)) + approx_diag1 = np.matmul(np.matmul(D1, diag_proj1), lines) + approx2 = np.matmul(D2, lines) + diag_proj2 = (1./2) * np.ones((2,2)) + approx_diag2 = np.matmul(np.matmul(D2, diag_proj2), lines) + A = np.sort(np.concatenate([approx1, approx_diag2], axis=0), axis=0) + B = np.sort(np.concatenate([approx2, approx_diag1], axis=0), axis=0) + L1 = np.sum(np.abs(A-B), axis=0) + return np.mean(L1) + +def persistence_fisher_distance(D1, D2, kernel_approx=None, bandwidth=1.): + """ + This is a function for computing the persistence Fisher distance from two persistence diagrams. The persistence Fisher distance is obtained by computing the original Fisher distance between the probability distributions associated to the persistence diagrams given by convolving them with a Gaussian kernel. See http://papers.nips.cc/paper/8205-persistence-fisher-kernel-a-riemannian-manifold-kernel-for-persistence-diagrams for more details. + :param D1: (n x 2) numpy.array encoding the (finite points of the) first diagram. Must not contain essential points (i.e. with infinite coordinate). + :param D2: (m x 2) numpy.array encoding the second diagram. + :param bandwidth: bandwidth of the Gaussian kernel used to turn persistence diagrams into probability distributions. + :param kernel_approx: kernel approximation class used to speed up computation. Common kernel approximations classes can be found in the scikit-learn library (such as RBFSampler for instance). + :returns: the persistence Fisher distance between persistence diagrams. + :rtype: float + """ + projection = (1./2) * np.ones((2,2)) + diagonal_projections1 = np.matmul(D1, projection) + diagonal_projections2 = np.matmul(D2, projection) + if kernel_approx is not None: + approx1 = kernel_approx.transform(D1) + approx_diagonal1 = kernel_approx.transform(diagonal_projections1) + approx2 = kernel_approx.transform(D2) + approx_diagonal2 = kernel_approx.transform(diagonal_projections2) + Z = np.concatenate([approx1, approx_diagonal1, approx2, approx_diagonal2], axis=0) + U, V = np.sum(np.concatenate([approx1, approx_diagonal2], axis=0), axis=0), np.sum(np.concatenate([approx2, approx_diagonal1], axis=0), axis=0) + vectori, vectorj = np.abs(np.matmul(Z, U.T)), np.abs(np.matmul(Z, V.T)) + vectori_sum, vectorj_sum = np.sum(vectori), np.sum(vectorj) + if vectori_sum != 0: + vectori = vectori/vectori_sum + if vectorj_sum != 0: + vectorj = vectorj/vectorj_sum + return np.arccos( min(np.dot(np.sqrt(vectori), np.sqrt(vectorj)), 1.) ) + else: + Z = np.concatenate([D1, diagonal_projections1, D2, diagonal_projections2], axis=0) + U, V = np.concatenate([D1, diagonal_projections2], axis=0), np.concatenate([D2, diagonal_projections1], axis=0) + vectori = np.sum(np.exp(-np.square(pairwise_distances(Z,U))/(2 * np.square(bandwidth)))/(bandwidth * np.sqrt(2*np.pi)), axis=1) + vectorj = np.sum(np.exp(-np.square(pairwise_distances(Z,V))/(2 * np.square(bandwidth)))/(bandwidth * np.sqrt(2*np.pi)), axis=1) + vectori_sum, vectorj_sum = np.sum(vectori), np.sum(vectorj) + if vectori_sum != 0: + vectori = vectori/vectori_sum + if vectorj_sum != 0: + vectorj = vectorj/vectorj_sum + return np.arccos( min(np.dot(np.sqrt(vectori), np.sqrt(vectorj)), 1.) ) + +def sklearn_wrapper(metric, **kwargs): + """ + This function is a wrapper for any metric between two persistence diagrams that takes two numpy arrays of shapes (nx2) and (mx2) as arguments. It turns the metric into another that takes flattened and padded diagrams as inputs. + """ + def flat_metric(D1, D2): + DD1, DD2 = np.reshape(D1, [-1,3]), np.reshape(D2, [-1,3]) + return metric(DD1[DD1[:,2]==1,0:2], DD2[DD2[:,2]==1,0:2], **kwargs) + return flat_metric + +def pairwise_persistence_diagram_distances(X, Y=None, metric="bottleneck", **kwargs): + """ + This function computes the distance matrix between two lists of persistence diagrams given as numpy arrays of shape (nx2). + :param X: first list of persistence diagrams. + :param Y: second list of persistence diagrams (optional). If None, pairwise distances are computed from the first list only. + :param metric: distance to use. It can be either a string ("sliced_wasserstein", "wasserstein", "bottleneck", "persistence_fisher") or a function taking two numpy arrays of shape (nx2) and (mx2) as inputs. + :returns: distance matrix, i.e., numpy array of shape (num diagrams 1 x num diagrams 2) + :rtype: float + """ + if Y is None: + YY = None + pX = Padding(use=True).fit_transform(X) + diag_len = len(pX[0]) + XX = np.reshape(np.vstack(pX), [-1, diag_len*3]) + else: + nX, nY = len(X), len(Y) + pD = Padding(use=True).fit_transform(X + Y) + diag_len = len(pD[0]) + XX = np.reshape(np.vstack(pD[:nX]), [-1, diag_len*3]) + YY = np.reshape(np.vstack(pD[nX:]), [-1, diag_len*3]) + + if metric == "bottleneck": + return pairwise_distances(XX, YY, metric=sklearn_wrapper(bottleneck_distance, **kwargs)) + elif metric == "wasserstein": + return pairwise_distances(XX, YY, metric=sklearn_wrapper(wasserstein_distance, **kwargs)) + elif metric == "sliced_wasserstein": + return pairwise_distances(XX, YY, metric=sklearn_wrapper(sliced_wasserstein_distance, **kwargs)) + elif metric == "persistence_fisher": + return pairwise_distances(XX, YY, metric=sklearn_wrapper(persistence_fisher_distance, **kwargs)) + else: + return pairwise_distances(XX, YY, metric=sklearn_wrapper(metric, **kwargs)) + class SlicedWassersteinDistance(BaseEstimator, TransformerMixin): """ This is a class for computing the sliced Wasserstein distance matrix from a list of persistence diagrams. The Sliced Wasserstein distance is computed by projecting the persistence diagrams onto lines, comparing the projections with the 1-norm, and finally integrating over all possible lines. See http://proceedings.mlr.press/v70/carriere17a.html for more details. @@ -34,8 +138,6 @@ class SlicedWassersteinDistance(BaseEstimator, TransformerMixin): num_directions (int): number of lines evenly sampled from [-pi/2,pi/2] in order to approximate and speed up the distance computation (default 10). """ self.num_directions = num_directions - thetas = np.linspace(-np.pi/2, np.pi/2, num=self.num_directions+1)[np.newaxis,:-1] - self.lines_ = np.concatenate([np.cos(thetas), np.sin(thetas)], axis=0) def fit(self, X, y=None): """ @@ -46,9 +148,6 @@ class SlicedWassersteinDistance(BaseEstimator, TransformerMixin): y (n x 1 array): persistence diagram labels (unused). """ self.diagrams_ = X - self.approx_ = [np.matmul(X[i], self.lines_) for i in range(len(X))] - diag_proj = (1./2) * np.ones((2,2)) - self.approx_diag_ = [np.matmul(np.matmul(X[i], diag_proj), self.lines_) for i in range(len(X))] return self def transform(self, X): @@ -61,27 +160,7 @@ class SlicedWassersteinDistance(BaseEstimator, TransformerMixin): Returns: numpy array of shape (number of diagrams in **diagrams**) x (number of diagrams in X): matrix of pairwise sliced Wasserstein distances. """ - Xfit = np.zeros((len(X), len(self.approx_))) - if len(self.diagrams_) == len(X) and np.all([np.array_equal(self.diagrams_[i], X[i]) for i in range(len(X))]): - for i in range(len(self.approx_)): - for j in range(i+1, len(self.approx_)): - A = np.sort(np.concatenate([self.approx_[i], self.approx_diag_[j]], axis=0), axis=0) - B = np.sort(np.concatenate([self.approx_[j], self.approx_diag_[i]], axis=0), axis=0) - L1 = np.sum(np.abs(A-B), axis=0) - Xfit[i,j] = np.mean(L1) - Xfit[j,i] = Xfit[i,j] - else: - diag_proj = (1./2) * np.ones((2,2)) - approx = [np.matmul(X[i], self.lines_) for i in range(len(X))] - approx_diag = [np.matmul(np.matmul(X[i], diag_proj), self.lines_) for i in range(len(X))] - for i in range(len(approx)): - for j in range(len(self.approx_)): - A = np.sort(np.concatenate([approx[i], self.approx_diag_[j]], axis=0), axis=0) - B = np.sort(np.concatenate([self.approx_[j], approx_diag[i]], axis=0), axis=0) - L1 = np.sum(np.abs(A-B), axis=0) - Xfit[i,j] = np.mean(L1) - - return Xfit + return pairwise_persistence_diagram_distances(X, self.diagrams_, metric="sliced_wasserstein", num_directions=self.num_directions) class BottleneckDistance(BaseEstimator, TransformerMixin): """ @@ -117,33 +196,9 @@ class BottleneckDistance(BaseEstimator, TransformerMixin): Returns: numpy array of shape (number of diagrams in **diagrams**) x (number of diagrams in X): matrix of pairwise bottleneck distances. """ - num_diag1 = len(X) - - #if len(self.diagrams_) == len(X) and np.all([np.array_equal(self.diagrams_[i], X[i]) for i in range(len(X))]): - if X is self.diagrams_: - matrix = np.zeros((num_diag1, num_diag1)) - - if USE_GUDHI: - for i in range(num_diag1): - for j in range(i+1, num_diag1): - matrix[i,j] = bottleneck_distance(X[i], X[j], self.epsilon) - matrix[j,i] = matrix[i,j] - else: - print("Gudhi built without CGAL: returning a null matrix") - - else: - num_diag2 = len(self.diagrams_) - matrix = np.zeros((num_diag1, num_diag2)) - - if USE_GUDHI: - for i in range(num_diag1): - for j in range(num_diag2): - matrix[i,j] = bottleneck_distance(X[i], self.diagrams_[j], self.epsilon) - else: - print("Gudhi built without CGAL: returning a null matrix") - - Xfit = matrix - + if not USE_GUDHI: + print("Gudhi built without CGAL: returning a null matrix") + Xfit = pairwise_persistence_diagram_distances(X, self.diagrams_, metric="bottleneck", e=self.epsilon) if USE_GUDHI else np.zeros((len(X), len(self.diagrams_))) return Xfit class WassersteinDistance(BaseEstimator, TransformerMixin): @@ -181,28 +236,7 @@ class WassersteinDistance(BaseEstimator, TransformerMixin): Returns: numpy array of shape (number of diagrams in **diagrams**) x (number of diagrams in X): matrix of pairwise Wasserstein distances. """ - num_diag1 = len(X) - - #if len(self.diagrams_) == len(X) and np.all([np.array_equal(self.diagrams_[i], X[i]) for i in range(len(X))]): - if X is self.diagrams_: - matrix = np.zeros((num_diag1, num_diag1)) - - for i in range(num_diag1): - for j in range(i+1, num_diag1): - matrix[i,j] = wasserstein_distance(X[i], X[j], self.order, self.internal_p) - matrix[j,i] = matrix[i,j] - - else: - num_diag2 = len(self.diagrams_) - matrix = np.zeros((num_diag1, num_diag2)) - - for i in range(num_diag1): - for j in range(num_diag2): - matrix[i,j] = wasserstein_distance(X[i], self.diagrams_[j], self.order, self.internal_p) - - Xfit = matrix - - return Xfit + return pairwise_persistence_diagram_distances(X, self.diagrams_, metric="wasserstein", order=self.order, internal_p=self.internal_p) class PersistenceFisherDistance(BaseEstimator, TransformerMixin): """ @@ -227,11 +261,6 @@ class PersistenceFisherDistance(BaseEstimator, TransformerMixin): y (n x 1 array): persistence diagram labels (unused). """ self.diagrams_ = X - projection = (1./2) * np.ones((2,2)) - self.diagonal_projections_ = [np.matmul(X[i], projection) for i in range(len(X))] - if self.kernel_approx is not None: - self.approx_ = [self.kernel_approx.transform(X[i]) for i in range(len(X))] - self.approx_diagonal_ = [self.kernel_approx.transform(self.diagonal_projections_[i]) for i in range(len(X))] return self def transform(self, X): @@ -244,60 +273,4 @@ class PersistenceFisherDistance(BaseEstimator, TransformerMixin): Returns: numpy array of shape (number of diagrams in **diagrams**) x (number of diagrams in X): matrix of pairwise persistence Fisher distances. """ - Xfit = np.zeros((len(X), len(self.diagrams_))) - if len(self.diagrams_) == len(X) and np.all([np.array_equal(self.diagrams_[i], X[i]) for i in range(len(X))]): - for i in range(len(self.diagrams_)): - for j in range(i+1, len(self.diagrams_)): - if self.kernel_approx is not None: - Z = np.concatenate([self.approx_[i], self.approx_diagonal_[i], self.approx_[j], self.approx_diagonal_[j]], axis=0) - U, V = np.sum(np.concatenate([self.approx_[i], self.approx_diagonal_[j]], axis=0), axis=0), np.sum(np.concatenate([self.approx_[j], self.approx_diagonal_[i]], axis=0), axis=0) - vectori, vectorj = np.abs(np.matmul(Z, U.T)), np.abs(np.matmul(Z, V.T)) - vectori_sum, vectorj_sum = np.sum(vectori), np.sum(vectorj) - if vectori_sum != 0: - vectori = vectori/vectori_sum - if vectorj_sum != 0: - vectorj = vectorj/vectorj_sum - Xfit[i,j] = np.arccos( min(np.dot(np.sqrt(vectori), np.sqrt(vectorj)), 1.) ) - Xfit[j,i] = Xfit[i,j] - else: - Z = np.concatenate([self.diagrams_[i], self.diagonal_projections_[i], self.diagrams_[j], self.diagonal_projections_[j]], axis=0) - U, V = np.concatenate([self.diagrams_[i], self.diagonal_projections_[j]], axis=0), np.concatenate([self.diagrams_[j], self.diagonal_projections_[i]], axis=0) - vectori = np.sum(np.exp(-np.square(pairwise_distances(Z,U))/(2 * np.square(self.bandwidth)))/(self.bandwidth * np.sqrt(2*np.pi)), axis=1) - vectorj = np.sum(np.exp(-np.square(pairwise_distances(Z,V))/(2 * np.square(self.bandwidth)))/(self.bandwidth * np.sqrt(2*np.pi)), axis=1) - vectori_sum, vectorj_sum = np.sum(vectori), np.sum(vectorj) - if vectori_sum != 0: - vectori = vectori/vectori_sum - if vectorj_sum != 0: - vectorj = vectorj/vectorj_sum - Xfit[i,j] = np.arccos( min(np.dot(np.sqrt(vectori), np.sqrt(vectorj)), 1.) ) - Xfit[j,i] = Xfit[i,j] - else: - projection = (1./2) * np.ones((2,2)) - diagonal_projections = [np.matmul(X[i], projection) for i in range(len(X))] - if self.kernel_approx is not None: - approx = [self.kernel_approx.transform(X[i]) for i in range(len(X))] - approx_diagonal = [self.kernel_approx.transform(diagonal_projections[i]) for i in range(len(X))] - for i in range(len(X)): - for j in range(len(self.diagrams_)): - if self.kernel_approx is not None: - Z = np.concatenate([approx[i], approx_diagonal[i], self.approx_[j], self.approx_diagonal_[j]], axis=0) - U, V = np.sum(np.concatenate([approx[i], self.approx_diagonal_[j]], axis=0), axis=0), np.sum(np.concatenate([self.approx_[j], approx_diagonal[i]], axis=0), axis=0) - vectori, vectorj = np.abs(np.matmul(Z, U.T)), np.abs(np.matmul(Z, V.T)) - vectori_sum, vectorj_sum = np.sum(vectori), np.sum(vectorj) - if vectori_sum != 0: - vectori = vectori/vectori_sum - if vectorj_sum != 0: - vectorj = vectorj/vectorj_sum - Xfit[i,j] = np.arccos( min(np.dot(np.sqrt(vectori), np.sqrt(vectorj)), 1.) ) - else: - Z = np.concatenate([X[i], diagonal_projections[i], self.diagrams_[j], self.diagonal_projections_[j]], axis=0) - U, V = np.concatenate([X[i], self.diagonal_projections_[j]], axis=0), np.concatenate([self.diagrams_[j], diagonal_projections[i]], axis=0) - vectori = np.sum(np.exp(-np.square(pairwise_distances(Z,U))/(2 * np.square(self.bandwidth)))/(self.bandwidth * np.sqrt(2*np.pi)), axis=1) - vectorj = np.sum(np.exp(-np.square(pairwise_distances(Z,V))/(2 * np.square(self.bandwidth)))/(self.bandwidth * np.sqrt(2*np.pi)), axis=1) - vectori_sum, vectorj_sum = np.sum(vectori), np.sum(vectorj) - if vectori_sum != 0: - vectori = vectori/vectori_sum - if vectorj_sum != 0: - vectorj = vectorj/vectorj_sum - Xfit[i,j] = np.arccos( min(np.dot(np.sqrt(vectori), np.sqrt(vectorj)), 1.) ) - return Xfit + return pairwise_persistence_diagram_distances(X, self.diagrams_, metric="persistence_fisher", bandwidth=self.bandwidth, kernel_approx=self.kernel_approx) -- cgit v1.2.3 From 29e81d5038116aef0ec505e4d21d29f1c5920e34 Mon Sep 17 00:00:00 2001 From: MathieuCarriere Date: Fri, 7 Feb 2020 21:00:17 -0500 Subject: added sklearn trick --- src/python/gudhi/representations/kernel_methods.py | 20 +++--------- src/python/gudhi/representations/metrics.py | 37 +++++++++------------- 2 files changed, 20 insertions(+), 37 deletions(-) diff --git a/src/python/gudhi/representations/kernel_methods.py b/src/python/gudhi/representations/kernel_methods.py index bbbb7c31..d89f69ab 100644 --- a/src/python/gudhi/representations/kernel_methods.py +++ b/src/python/gudhi/representations/kernel_methods.py @@ -62,27 +62,17 @@ def pairwise_persistence_diagram_kernels(X, Y=None, metric="sliced_wasserstein", :param metric: kernel to use. It can be either a string ("sliced_wasserstein", "persistence_scale_space", "persistence_weighted_gaussian", "persistence_fisher") or a function taking two numpy arrays of shape (nx2) and (mx2) as inputs. :returns: kernel matrix, i.e., numpy array of shape (num diagrams 1 x num diagrams 2) :rtype: float - """ - if Y is None: - YY = None - pX = Padding(use=True).fit_transform(X) - diag_len = len(pX[0]) - XX = np.reshape(np.vstack(pX), [-1, diag_len*3]) - else: - nX, nY = len(X), len(Y) - pD = Padding(use=True).fit_transform(X + Y) - diag_len = len(pD[0]) - XX = np.reshape(np.vstack(pD[:nX]), [-1, diag_len*3]) - YY = np.reshape(np.vstack(pD[nX:]), [-1, diag_len*3]) - + """ + XX = np.reshape(np.arange(len(X)), [-1,1]) + YY = None if Y is None else np.reshape(np.arange(len(Y)), [-1,1]) if metric == "sliced_wasserstein": return np.exp(-pairwise_persistence_diagram_distances(X, Y, metric="sliced_wasserstein", num_directions=kwargs["num_directions"]) / kwargs["bandwidth"]) elif metric == "persistence_fisher": return np.exp(-pairwise_persistence_diagram_distances(X, Y, metric="persistence_fisher", kernel_approx=kwargs["kernel_approx"], bandwidth=kwargs["bandwidth"]) / kwargs["bandwidth_fisher"]) elif metric == "persistence_scale_space": - return pairwise_kernels(XX, YY, metric=sklearn_wrapper(persistence_scale_space_kernel, **kwargs)) + return pairwise_kernels(XX, YY, metric=sklearn_wrapper(persistence_scale_space_kernel, X, Y, **kwargs)) elif metric == "persistence_weighted_gaussian": - return pairwise_kernels(XX, YY, metric=sklearn_wrapper(persistence_weighted_gaussian_kernel, **kwargs)) + return pairwise_kernels(XX, YY, metric=sklearn_wrapper(persistence_weighted_gaussian_kernel, X, Y, **kwargs)) else: return pairwise_kernels(XX, YY, metric=sklearn_wrapper(metric, **kwargs)) diff --git a/src/python/gudhi/representations/metrics.py b/src/python/gudhi/representations/metrics.py index cc788994..fead8aa0 100644 --- a/src/python/gudhi/representations/metrics.py +++ b/src/python/gudhi/representations/metrics.py @@ -85,13 +85,16 @@ def persistence_fisher_distance(D1, D2, kernel_approx=None, bandwidth=1.): vectorj = vectorj/vectorj_sum return np.arccos( min(np.dot(np.sqrt(vectori), np.sqrt(vectorj)), 1.) ) -def sklearn_wrapper(metric, **kwargs): +def sklearn_wrapper(metric, X, Y, **kwargs): """ - This function is a wrapper for any metric between two persistence diagrams that takes two numpy arrays of shapes (nx2) and (mx2) as arguments. It turns the metric into another that takes flattened and padded diagrams as inputs. + This function is a wrapper for any metric between two persistence diagrams that takes two numpy arrays of shapes (nx2) and (mx2) as arguments. """ - def flat_metric(D1, D2): - DD1, DD2 = np.reshape(D1, [-1,3]), np.reshape(D2, [-1,3]) - return metric(DD1[DD1[:,2]==1,0:2], DD2[DD2[:,2]==1,0:2], **kwargs) + if Y is None: + def flat_metric(a, b): + return metric(X[int(a[0])], X[int(b[0])], **kwargs) + else: + def flat_metric(a, b): + return metric(X[int(a[0])], Y[int(b[0])], **kwargs) return flat_metric def pairwise_persistence_diagram_distances(X, Y=None, metric="bottleneck", **kwargs): @@ -103,28 +106,18 @@ def pairwise_persistence_diagram_distances(X, Y=None, metric="bottleneck", **kwa :returns: distance matrix, i.e., numpy array of shape (num diagrams 1 x num diagrams 2) :rtype: float """ - if Y is None: - YY = None - pX = Padding(use=True).fit_transform(X) - diag_len = len(pX[0]) - XX = np.reshape(np.vstack(pX), [-1, diag_len*3]) - else: - nX, nY = len(X), len(Y) - pD = Padding(use=True).fit_transform(X + Y) - diag_len = len(pD[0]) - XX = np.reshape(np.vstack(pD[:nX]), [-1, diag_len*3]) - YY = np.reshape(np.vstack(pD[nX:]), [-1, diag_len*3]) - + XX = np.reshape(np.arange(len(X)), [-1,1]) + YY = None if Y is None else np.reshape(np.arange(len(Y)), [-1,1]) if metric == "bottleneck": - return pairwise_distances(XX, YY, metric=sklearn_wrapper(bottleneck_distance, **kwargs)) + return pairwise_distances(XX, YY, metric=sklearn_wrapper(bottleneck_distance, X, Y, **kwargs)) elif metric == "wasserstein": - return pairwise_distances(XX, YY, metric=sklearn_wrapper(wasserstein_distance, **kwargs)) + return pairwise_distances(XX, YY, metric=sklearn_wrapper(wasserstein_distance, X, Y, **kwargs)) elif metric == "sliced_wasserstein": - return pairwise_distances(XX, YY, metric=sklearn_wrapper(sliced_wasserstein_distance, **kwargs)) + return pairwise_distances(XX, YY, metric=sklearn_wrapper(sliced_wasserstein_distance, X, Y, **kwargs)) elif metric == "persistence_fisher": - return pairwise_distances(XX, YY, metric=sklearn_wrapper(persistence_fisher_distance, **kwargs)) + return pairwise_distances(XX, YY, metric=sklearn_wrapper(persistence_fisher_distance, X, Y, **kwargs)) else: - return pairwise_distances(XX, YY, metric=sklearn_wrapper(metric, **kwargs)) + return pairwise_distances(XX, YY, metric=sklearn_wrapper(metric, X, Y, **kwargs)) class SlicedWassersteinDistance(BaseEstimator, TransformerMixin): """ -- cgit v1.2.3 From ef0f82ef2155440827e17c552abb49b509866fc7 Mon Sep 17 00:00:00 2001 From: mathieu Date: Thu, 13 Feb 2020 16:01:29 -0500 Subject: integrated hera --- .../diagram_vectorizations_distances_kernels.py | 7 ++++++- src/python/gudhi/representations/metrics.py | 23 ++++++++++++++++------ 2 files changed, 23 insertions(+), 7 deletions(-) diff --git a/src/python/example/diagram_vectorizations_distances_kernels.py b/src/python/example/diagram_vectorizations_distances_kernels.py index 66c32cc2..6352d2b5 100755 --- a/src/python/example/diagram_vectorizations_distances_kernels.py +++ b/src/python/example/diagram_vectorizations_distances_kernels.py @@ -117,7 +117,12 @@ X = SW.fit(diags) Y = SW.transform(diags2) print("SW kernel is " + str(Y[0][0])) -W = WassersteinDistance(order=2, internal_p=2) +W = WassersteinDistance(order=2, internal_p=2, mode="pot") +X = W.fit(diags) +Y = W.transform(diags2) +print("Wasserstein distance is " + str(Y[0][0])) + +W = WassersteinDistance(order=2, internal_p=2, mode="hera", delta=0.0001) X = W.fit(diags) Y = W.transform(diags2) print("Wasserstein distance is " + str(Y[0][0])) diff --git a/src/python/gudhi/representations/metrics.py b/src/python/gudhi/representations/metrics.py index cc788994..ed998603 100644 --- a/src/python/gudhi/representations/metrics.py +++ b/src/python/gudhi/representations/metrics.py @@ -10,7 +10,8 @@ import numpy as np from sklearn.base import BaseEstimator, TransformerMixin from sklearn.metrics import pairwise_distances -from gudhi.wasserstein import wasserstein_distance +from gudhi.wasserstein import wasserstein_distance as pot_wasserstein_distance +from gudhi.hera import wasserstein_distance as hera_wasserstein_distance from .preprocessing import Padding try: @@ -117,8 +118,10 @@ def pairwise_persistence_diagram_distances(X, Y=None, metric="bottleneck", **kwa if metric == "bottleneck": return pairwise_distances(XX, YY, metric=sklearn_wrapper(bottleneck_distance, **kwargs)) - elif metric == "wasserstein": - return pairwise_distances(XX, YY, metric=sklearn_wrapper(wasserstein_distance, **kwargs)) + elif metric == "wasserstein" or metric == "pot_wasserstein": + return pairwise_distances(XX, YY, metric=sklearn_wrapper(pot_wasserstein_distance, **kwargs)) + elif metric == "hera_wasserstein": + return pairwise_distances(XX, YY, metric=sklearn_wrapper(hera_wasserstein_distance, **kwargs)) elif metric == "sliced_wasserstein": return pairwise_distances(XX, YY, metric=sklearn_wrapper(sliced_wasserstein_distance, **kwargs)) elif metric == "persistence_fisher": @@ -205,15 +208,19 @@ class WassersteinDistance(BaseEstimator, TransformerMixin): """ This is a class for computing the Wasserstein distance matrix from a list of persistence diagrams. """ - def __init__(self, order=2, internal_p=2): + def __init__(self, order=2, internal_p=2, mode="pot", delta=0.0001): """ Constructor for the WassersteinDistance class. Parameters: order (int): exponent for Wasserstein, default value is 2., see :func:`gudhi.wasserstein.wasserstein_distance`. internal_p (int): ground metric on the (upper-half) plane (i.e. norm l_p in R^2), default value is 2 (euclidean norm), see :func:`gudhi.wasserstein.wasserstein_distance`. + mode (str): method for computing Wasserstein distance. Either "pot" or "hera". + delta (float): relative error 1+delta. Used only if mode == "hera". """ - self.order, self.internal_p = order, internal_p + self.order, self.internal_p, self.mode = order, internal_p, mode + self.metric = "pot_wasserstein" if mode == "pot" else "hera_wasserstein" + self.delta = delta def fit(self, X, y=None): """ @@ -236,7 +243,11 @@ class WassersteinDistance(BaseEstimator, TransformerMixin): Returns: numpy array of shape (number of diagrams in **diagrams**) x (number of diagrams in X): matrix of pairwise Wasserstein distances. """ - return pairwise_persistence_diagram_distances(X, self.diagrams_, metric="wasserstein", order=self.order, internal_p=self.internal_p) + if self.metric == "hera_wasserstein": + Xfit = pairwise_persistence_diagram_distances(X, self.diagrams_, metric=self.metric, order=self.order, internal_p=self.internal_p, delta=self.delta) + else: + Xfit = pairwise_persistence_diagram_distances(X, self.diagrams_, metric=self.metric, order=self.order, internal_p=self.internal_p) + return Xfit class PersistenceFisherDistance(BaseEstimator, TransformerMixin): """ -- cgit v1.2.3 From d9290a78741fc14dc0f87d395da967a4d561b34a Mon Sep 17 00:00:00 2001 From: mathieu Date: Thu, 13 Feb 2020 16:11:34 -0500 Subject: small modif on example file --- src/python/example/diagram_vectorizations_distances_kernels.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/python/example/diagram_vectorizations_distances_kernels.py b/src/python/example/diagram_vectorizations_distances_kernels.py index 6352d2b5..507ead7c 100755 --- a/src/python/example/diagram_vectorizations_distances_kernels.py +++ b/src/python/example/diagram_vectorizations_distances_kernels.py @@ -120,12 +120,12 @@ print("SW kernel is " + str(Y[0][0])) W = WassersteinDistance(order=2, internal_p=2, mode="pot") X = W.fit(diags) Y = W.transform(diags2) -print("Wasserstein distance is " + str(Y[0][0])) +print("Wasserstein distance (POT) is " + str(Y[0][0])) W = WassersteinDistance(order=2, internal_p=2, mode="hera", delta=0.0001) X = W.fit(diags) Y = W.transform(diags2) -print("Wasserstein distance is " + str(Y[0][0])) +print("Wasserstein distance (hera) is " + str(Y[0][0])) W = BottleneckDistance(epsilon=.001) X = W.fit(diags) -- cgit v1.2.3 From fe754ca20cf942e2af186f14e5a3d24e23b6c80e Mon Sep 17 00:00:00 2001 From: mathieu Date: Thu, 13 Feb 2020 19:27:40 -0500 Subject: fix Marc's comments --- src/python/gudhi/cubical_complex.pyx | 49 ++++++++-------- .../include/Persistent_cohomology_interface.h | 67 ++++++++++------------ 2 files changed, 55 insertions(+), 61 deletions(-) diff --git a/src/python/gudhi/cubical_complex.pyx b/src/python/gudhi/cubical_complex.pyx index bd432834..8cf43539 100644 --- a/src/python/gudhi/cubical_complex.pyx +++ b/src/python/gudhi/cubical_complex.pyx @@ -31,7 +31,7 @@ cdef extern from "Persistent_cohomology_interface.h" namespace "Gudhi": cdef cppclass Cubical_complex_persistence_interface "Gudhi::Persistent_cohomology_interface>": Cubical_complex_persistence_interface(Bitmap_cubical_complex_base_interface * st, bool persistence_dim_max) vector[pair[int, pair[double, double]]] get_persistence(int homology_coeff_field, double min_persistence) - vector[pair[int, pair[pair[double, int], pair[double, int]]]] get_cofaces_of_cubical_persistence_pairs(int homology_coeff_field, double min_persistence) + vector[vector[int]] cofaces_of_cubical_persistence_pairs() vector[int] betti_numbers() vector[int] persistent_betti_numbers(double from_value, double to_value) vector[pair[double,double]] intervals_in_dimension(int dimension) @@ -146,31 +146,32 @@ cdef class CubicalComplex: persistence_result = self.pcohptr.get_persistence(homology_coeff_field, min_persistence) return persistence_result - def cofaces_of_cubical_persistence_pairs(self, homology_coeff_field=11, min_persistence=0, persistence_dim_max = False): - """This function returns the persistence of the simplicial complex. - - :param homology_coeff_field: The homology coefficient field. Must be a - prime number. Default value is 11. - :type homology_coeff_field: int. - :param min_persistence: The minimum persistence value to take into - account (strictly greater than min_persistence). Default value is - 0.0. - Sets min_persistence to -1.0 to see all values. - :type min_persistence: float. - :param persistence_dim_max: If true, the persistent homology for the - maximal dimension in the complex is computed. If false, it is - ignored. Default is false. - :type persistence_dim_max: bool - :returns: The persistence of the simplicial complex, together with the cofaces of the corresponding generators, i.e., the top-dimensional cells/cofaces of the positive and negative simplices. - :rtype: list of pairs(dimension, pair(index of positive top-dimensional cell, index of negative top-dimensional cell)) + def cofaces_of_persistence_pairs(self): + """A persistence interval is described by a pair of cells, one that creates the + feature and one that kills it. The filtration values of those 2 cells give coordinates + for a point in a persistence diagram, or a bar in a barcode. Structurally, in the + cubical complexes provided here, the filtration value of any cell is the minimum of the + filtration values of the maximal cells that contain it. Connecting persistence diagram + coordinates to the corresponding value in the input (i.e. the filtration values of + the top-dimensional cells) is useful for differentiation purposes. + + This function returns a list of pairs of top-dimensional cells corresponding to + the persistence birth and death cells of the filtration. The cells are represented by + their indices in the input list of top-dimensional cells (and not their indices in the + internal datastructure that includes non-maximal cells). Note that when two adjacent + top-dimensional cells have the same filtration value, we arbitrarily return one of the two + when calling the function on one of their common faces. + + :returns: The top-dimensional cells/cofaces of the positive and negative cells. + :rtype: list of pairs(index of positive top-dimensional cell, index of negative top-dimensional cell) """ + cdef vector[vector[int]] persistence_result if self.pcohptr != NULL: - del self.pcohptr - self.pcohptr = new Cubical_complex_persistence_interface(self.thisptr, True) - cdef vector[pair[int, pair[pair[double, int], pair[double, int]]]] persistence_result - if self.pcohptr != NULL: - persistence_result = self.pcohptr.get_cofaces_of_cubical_persistence_pairs(homology_coeff_field, min_persistence) - return persistence_result + persistence_result = self.pcohptr.cofaces_of_cubical_persistence_pairs() + else: + print("cofaces_of_persistence_pairs function requires persistence function" + " to be launched first.") + return np.array(persistence_result) def betti_numbers(self): """This function returns the Betti numbers of the complex. diff --git a/src/python/include/Persistent_cohomology_interface.h b/src/python/include/Persistent_cohomology_interface.h index 1a1e716e..e5accf50 100644 --- a/src/python/include/Persistent_cohomology_interface.h +++ b/src/python/include/Persistent_cohomology_interface.h @@ -63,7 +63,6 @@ persistent_cohomology::Persistent_cohomology::get_persistent_pairs(); std::sort(std::begin(persistent_pairs), std::end(persistent_pairs), cmp); - std::vector>> persistence; for (auto pair : persistent_pairs) { persistence.push_back(std::make_pair(stptr_->dimension(get<0>(pair)), @@ -73,58 +72,52 @@ persistent_cohomology::Persistent_cohomology & cofaces, int splx){ - if (stptr_->dimension(stptr_->simplex(splx)) == stptr_->dimension()){cofaces.push_back(stptr_->simplex(splx));} - else{ for (auto v : stptr_->coboundary_simplex_range(stptr_->simplex(splx))){top_dimensional_cofaces(cofaces, stptr_->key(v));} } + int top_dimensional_coface(int splx){ + if (stptr_->dimension(splx) == stptr_->dimension()){return splx;} + else{ + for (auto v : stptr_->coboundary_simplex_range(splx)){ + if(stptr_->filtration(v) == stptr_->filtration(splx)){ + return top_dimensional_coface(v); + } + } + } } - std::vector, std::pair>>> get_cofaces_of_cubical_persistence_pairs(int homology_coeff_field, - double min_persistence) { + std::vector> cofaces_of_cubical_persistence_pairs() { // Warning: this function is meant to be used with CubicalComplex only!! + auto pairs = persistent_cohomology::Persistent_cohomology::get_persistent_pairs(); + // Gather all top-dimensional cells and store their simplex handles - std::vector max_splx; for (auto splx : stptr_->filtration_simplex_range()){ if (stptr_->dimension(splx) == stptr_->dimension()) max_splx.push_back(splx); } + std::vector max_splx; for (auto splx : stptr_->top_dimensional_cells_range()){ + max_splx.push_back(splx); + } // Sort these simplex handles and compute the ordering function // This function allows to go directly from the simplex handle to the position of the corresponding top-dimensional cell in the input data - std::map order; std::sort(max_splx.begin(), max_splx.end()); for (unsigned int i = 0; i < max_splx.size(); i++) order.insert(std::make_pair(max_splx[i], i)); - - persistent_cohomology::Persistent_cohomology::init_coefficients(homology_coeff_field); - persistent_cohomology::Persistent_cohomology::compute_persistent_cohomology(min_persistence); + std::map order; std::sort(max_splx.begin(), max_splx.end()); + for (unsigned int i = 0; i < max_splx.size(); i++) order.insert(std::make_pair(max_splx[i], i)); - // Custom sort and output persistence - cmp_intervals_by_dim_then_length cmp(stptr_); - auto persistent_pairs = persistent_cohomology::Persistent_cohomology::get_persistent_pairs(); - std::sort(std::begin(persistent_pairs), std::end(persistent_pairs), cmp); - - std::vector, std::pair>>> persistence; - for (auto pair : persistent_pairs) { - - double f0 = stptr_->filtration(get<0>(pair)); - // Recursively get the top-dimensional cells / cofaces associated to the persistence generator - std::vector faces0; top_dimensional_cofaces(faces0, stptr_->key(get<0>(pair))); - // Find the top-dimensional cell / coface with the same filtration value - int cf; for (unsigned int i = 0; i < faces0.size(); i++){if (stptr_->filtration(faces0[i]) == f0){cf = i; break;}} + std::vector> persistence_pairs; + for (auto pair : pairs) { + int h = stptr_->dimension(get<0>(pair)); + // Recursively get the top-dimensional cell / coface associated to the persistence generator + int face0 = top_dimensional_coface(get<0>(pair)); // Retrieve the index of the corresponding top-dimensional cell in the input data - int splx0 = order[faces0[cf]]; + int splx0 = order[face0]; int splx1 = -1; if (isfinite(stptr_->filtration(get<1>(pair)))){ - double f1 = stptr_->filtration(get<1>(pair)); - // Recursively get the top-dimensional cells / cofaces associated to the persistence generator - std::vector faces1; top_dimensional_cofaces(faces1, stptr_->key(get<1>(pair))); - // Find the top-dimensional cell / coface with the same filtration value - int cf; for (unsigned int i = 0; i < faces1.size(); i++){if (stptr_->filtration(faces1[i]) == f1){cf = i; break;}} + // Recursively get the top-dimensional cell / coface associated to the persistence generator + int face1 = top_dimensional_coface(get<1>(pair)); // Retrieve the index of the corresponding top-dimensional cell in the input data - splx1 = order[faces1[cf]]; + splx1 = order[face1]; } - - persistence.push_back(std::make_pair(stptr_->dimension(get<0>(pair)), std::make_pair(std::make_pair(stptr_->filtration(get<0>(pair)), splx0), std::make_pair(stptr_->filtration(get<1>(pair)), splx1)))); + std::vector vect{ h, splx0, splx1}; + persistence_pairs.push_back(vect); } - return persistence; + return persistence_pairs; } std::vector, std::vector>> persistence_pairs() { -- cgit v1.2.3 From 64199fd8037556f135f90102ba8270cccf9d3e60 Mon Sep 17 00:00:00 2001 From: Marc Glisse Date: Sat, 7 Mar 2020 01:08:10 +0100 Subject: persistence generators for lower-star and flag filtrations --- src/python/gudhi/simplex_tree.pxd | 2 + src/python/gudhi/simplex_tree.pyx | 55 ++++++++ .../include/Persistent_cohomology_interface.h | 138 ++++++++++++++++----- 3 files changed, 167 insertions(+), 28 deletions(-) diff --git a/src/python/gudhi/simplex_tree.pxd b/src/python/gudhi/simplex_tree.pxd index 96d14079..4e435c67 100644 --- a/src/python/gudhi/simplex_tree.pxd +++ b/src/python/gudhi/simplex_tree.pxd @@ -53,3 +53,5 @@ cdef extern from "Persistent_cohomology_interface.h" namespace "Gudhi": vector[pair[double,double]] intervals_in_dimension(int dimension) void write_output_diagram(string diagram_file_name) vector[pair[vector[int], vector[int]]] persistence_pairs() + pair[vector[vector[int]], vector[vector[int]]] lower_star_generators() + pair[vector[vector[int]], vector[vector[int]]] flag_generators() diff --git a/src/python/gudhi/simplex_tree.pyx b/src/python/gudhi/simplex_tree.pyx index b18627c4..1c9b9cf1 100644 --- a/src/python/gudhi/simplex_tree.pyx +++ b/src/python/gudhi/simplex_tree.pyx @@ -514,3 +514,58 @@ cdef class SimplexTree: else: print("intervals_in_dim function requires persistence function" " to be launched first.") + + def lower_star_persistence_generators(self): + """Assuming this is a lower-star filtration, this function returns the persistence pairs, + where each simplex is replaced with the vertex that gave it its filtration value. + + :returns: first the regular persistence pairs, grouped by dimension, with one vertex per extremity, + and second the essential features, grouped by dimension, with one vertex each + :rtype: Tuple[List[numpy.array[int] of shape (n,2)], List[numpy.array[int] of shape (m,)]] + + :note: intervals_in_dim function requires + :func:`persistence()` + function to be launched first. + """ + if self.pcohptr != NULL: + gen = self.pcohptr.lower_star_generators() + normal = [np_array(d).reshape(-1,2) for d in gen.first] + infinite = [np_array(d) for d in gen.second] + return (normal, infinite) + else: + print("lower_star_persistence_generators() requires that persistence() be called first.") + + def flag_persistence_generators(self): + """Assuming this is a flag complex, this function returns the persistence pairs, + where each simplex is replaced with the vertices of the edges that gave it its filtration value. + + :returns: first the regular persistence pairs of dimension 0, with one vertex for birth and two for death; + then the other regular persistence pairs, grouped by dimension, with 2 vertices per extremity; + then the connected components, with one vertex each; + finally the other essential features, grouped by dimension, with 2 vertices for birth. + :rtype: Tuple[List[numpy.array[int] of shape (n,3)], List[numpy.array[int] of shape (m,4)], List[numpy.array[int] of shape (l,)], List[numpy.array[int] of shape (k,2)]] + + :note: intervals_in_dim function requires + :func:`persistence()` + function to be launched first. + """ + if self.pcohptr != NULL: + gen = self.pcohptr.flag_generators() + if len(gen.first) == 0: + normal0 = np_array([]) + normals = np_array([]) + else: + l = iter(gen.first) + normal0 = np_array(next(l)).reshape(-1,3) + normals = [np_array(d).reshape(-1,4) for d in l] + if len(gen.second) == 0: + infinite0 = np_array([]) + infinites = np_array([]) + else: + l = iter(gen.second) + infinite0 = np_array(next(l)) + infinites = [np_array(d).reshape(-1,3) for d in l] + + return (normal0, normals, infinite0, infinites) + else: + print("lower_star_persistence_generators() requires that persistence() be called first.") diff --git a/src/python/include/Persistent_cohomology_interface.h b/src/python/include/Persistent_cohomology_interface.h index 8c79e6f3..6e9aac52 100644 --- a/src/python/include/Persistent_cohomology_interface.h +++ b/src/python/include/Persistent_cohomology_interface.h @@ -23,61 +23,55 @@ template class Persistent_cohomology_interface : public persistent_cohomology::Persistent_cohomology { private: + typedef persistent_cohomology::Persistent_cohomology Base; /* * Compare two intervals by dimension, then by length. */ struct cmp_intervals_by_dim_then_length { - explicit cmp_intervals_by_dim_then_length(FilteredComplex * sc) - : sc_(sc) { } - template bool operator()(const Persistent_interval & p1, const Persistent_interval & p2) { - if (sc_->dimension(get < 0 > (p1)) == sc_->dimension(get < 0 > (p2))) - return (sc_->filtration(get < 1 > (p1)) - sc_->filtration(get < 0 > (p1)) - > sc_->filtration(get < 1 > (p2)) - sc_->filtration(get < 0 > (p2))); + if (std::get<0>(p1) == std::get<0>(p2)) { + auto& i1 = std::get<1>(p1); + auto& i2 = std::get<1>(p2); + return std::get<1>(i1) - std::get<0>(i1) > std::get<1>(i2) - std::get<0>(i2); + } else - return (sc_->dimension(get < 0 > (p1)) > sc_->dimension(get < 0 > (p2))); + return (std::get<0>(p1) > std::get<0>(p2)); + // Why does this sort by decreasing dimension? } - FilteredComplex* sc_; }; public: Persistent_cohomology_interface(FilteredComplex* stptr) - : persistent_cohomology::Persistent_cohomology(*stptr), + : Base(*stptr), stptr_(stptr) { } Persistent_cohomology_interface(FilteredComplex* stptr, bool persistence_dim_max) - : persistent_cohomology::Persistent_cohomology(*stptr, persistence_dim_max), + : Base(*stptr, persistence_dim_max), stptr_(stptr) { } std::vector>> get_persistence(int homology_coeff_field, double min_persistence) { - persistent_cohomology::Persistent_cohomology::init_coefficients(homology_coeff_field); - persistent_cohomology::Persistent_cohomology::compute_persistent_cohomology(min_persistence); - - // Custom sort and output persistence - cmp_intervals_by_dim_then_length cmp(stptr_); - auto persistent_pairs = persistent_cohomology::Persistent_cohomology::get_persistent_pairs(); - std::sort(std::begin(persistent_pairs), std::end(persistent_pairs), cmp); + Base::init_coefficients(homology_coeff_field); + Base::compute_persistent_cohomology(min_persistence); + auto const& persistent_pairs = Base::get_persistent_pairs(); std::vector>> persistence; + persistence.reserve(persistent_pairs.size()); for (auto pair : persistent_pairs) { - persistence.push_back(std::make_pair(stptr_->dimension(get<0>(pair)), - std::make_pair(stptr_->filtration(get<0>(pair)), - stptr_->filtration(get<1>(pair))))); + persistence.emplace_back(stptr_->dimension(get<0>(pair)), + std::make_pair(stptr_->filtration(get<0>(pair)), + stptr_->filtration(get<1>(pair)))); } + // Custom sort and output persistence + cmp_intervals_by_dim_then_length cmp; + std::sort(std::begin(persistence), std::end(persistence), cmp); return persistence; } std::vector, std::vector>> persistence_pairs() { - auto pairs = persistent_cohomology::Persistent_cohomology::get_persistent_pairs(); - std::vector, std::vector>> persistence_pairs; + auto const& pairs = Base::get_persistent_pairs(); persistence_pairs.reserve(pairs.size()); for (auto pair : pairs) { std::vector birth; @@ -89,16 +83,104 @@ persistent_cohomology::Persistent_cohomology death; if (get<1>(pair) != stptr_->null_simplex()) { + death.reserve(birth.size()+1); for (auto vertex : stptr_->simplex_vertex_range(get<1>(pair))) { death.push_back(vertex); } } - persistence_pairs.push_back(std::make_pair(birth, death)); + persistence_pairs.emplace_back(std::move(birth), std::move(death)); } return persistence_pairs; } + // TODO: (possibly at the python level) + // - an option to ignore intervals of length 0? + // - an option to return only some of those vectors? + typedef std::pair>, std::vector>> Generators; + + Generators lower_star_generators() { + Generators out; + // diags[i] should be interpreted as vector> + auto& diags = out.first; + // diagsinf[i] should be interpreted as vector + auto& diagsinf = out.second; + for (auto pair : Base::get_persistent_pairs()) { + auto s = std::get<0>(pair); + auto t = std::get<1>(pair); + int dim = stptr_->dimension(s); + auto v = stptr_->vertex_with_same_filtration(s); + if(t == stptr_->null_simplex()) { + while(diagsinf.size() < dim+1) diagsinf.emplace_back(); + diagsinf[dim].push_back(v); + } else { + while(diags.size() < dim+1) diags.emplace_back(); + auto w = stptr_->vertex_with_same_filtration(t); + diags[dim].push_back(v); + diags[dim].push_back(w); + } + } + return out; + } + + Generators flag_generators() { + Generators out; + // diags[0] should be interpreted as vector> and other diags[i] as vector> + auto& diags = out.first; + // diagsinf[0] should be interpreted as vector and other diagsinf[i] as vector> + auto& diagsinf = out.second; + for (auto pair : Base::get_persistent_pairs()) { + auto s = std::get<0>(pair); + auto t = std::get<1>(pair); + int dim = stptr_->dimension(s); + bool infinite = t == stptr_->null_simplex(); + if(infinite) { + if(dim == 0) { + auto v = *std::begin(stptr_->simplex_vertex_range(s)); + if(diagsinf.size()==0)diagsinf.emplace_back(); + diagsinf[0].push_back(v); + } else { + auto e = stptr_->edge_with_same_filtration(s); + auto&& e_vertices = stptr_->simplex_vertex_range(e); + auto i = std::begin(e_vertices); + auto v1 = *i; + auto v2 = *++i; + GUDHI_CHECK(++i==std::end(e_vertices), "must be an edge"); + while(diagsinf.size() < dim+1) diagsinf.emplace_back(); + diagsinf[dim].push_back(v1); + diagsinf[dim].push_back(v2); + } + } else { + auto et = stptr_->edge_with_same_filtration(t); + auto&& et_vertices = stptr_->simplex_vertex_range(et); + auto it = std::begin(et_vertices); + auto w1 = *it; + auto w2 = *++it; + GUDHI_CHECK(++it==std::end(et_vertices), "must be an edge"); + if(dim == 0) { + auto v = *std::begin(stptr_->simplex_vertex_range(s)); + if(diags.size()==0)diags.emplace_back(); + diags[0].push_back(v); + diags[0].push_back(w1); + diags[0].push_back(w2); + } else { + auto es = stptr_->edge_with_same_filtration(s); + auto&& es_vertices = stptr_->simplex_vertex_range(es); + auto is = std::begin(es_vertices); + auto v1 = *is; + auto v2 = *++is; + GUDHI_CHECK(++is==std::end(es_vertices), "must be an edge"); + while(diags.size() < dim+1) diags.emplace_back(); + diags[dim].push_back(v1); + diags[dim].push_back(v2); + diags[dim].push_back(w1); + diags[dim].push_back(w2); + } + } + } + return out; + } + private: // A copy FilteredComplex* stptr_; -- cgit v1.2.3 From 35e08b30836fb0c419c0377eaf51d2a3b16e7670 Mon Sep 17 00:00:00 2001 From: Marc Glisse Date: Sat, 7 Mar 2020 14:05:05 +0100 Subject: min_persistence for generators --- src/python/gudhi/simplex_tree.pxd | 4 +-- src/python/gudhi/simplex_tree.pyx | 36 +++++++++++++--------- .../include/Persistent_cohomology_interface.h | 10 ++++-- 3 files changed, 30 insertions(+), 20 deletions(-) diff --git a/src/python/gudhi/simplex_tree.pxd b/src/python/gudhi/simplex_tree.pxd index 4e435c67..53e2bbc9 100644 --- a/src/python/gudhi/simplex_tree.pxd +++ b/src/python/gudhi/simplex_tree.pxd @@ -53,5 +53,5 @@ cdef extern from "Persistent_cohomology_interface.h" namespace "Gudhi": vector[pair[double,double]] intervals_in_dimension(int dimension) void write_output_diagram(string diagram_file_name) vector[pair[vector[int], vector[int]]] persistence_pairs() - pair[vector[vector[int]], vector[vector[int]]] lower_star_generators() - pair[vector[vector[int]], vector[vector[int]]] flag_generators() + pair[vector[vector[int]], vector[vector[int]]] lower_star_generators(double) + pair[vector[vector[int]], vector[vector[int]]] flag_generators(double) diff --git a/src/python/gudhi/simplex_tree.pyx b/src/python/gudhi/simplex_tree.pyx index 1c9b9cf1..3f582ac9 100644 --- a/src/python/gudhi/simplex_tree.pyx +++ b/src/python/gudhi/simplex_tree.pyx @@ -395,7 +395,7 @@ cdef class SimplexTree: :param min_persistence: The minimum persistence value to take into account (strictly greater than min_persistence). Default value is 0.0. - Sets min_persistence to -1.0 to see all values. + Set min_persistence to -1.0 to see all values. :type min_persistence: float. :param persistence_dim_max: If true, the persistent homology for the maximal dimension in the complex is computed. If false, it is @@ -515,42 +515,48 @@ cdef class SimplexTree: print("intervals_in_dim function requires persistence function" " to be launched first.") - def lower_star_persistence_generators(self): + def lower_star_persistence_generators(self, min_persistence=0.): """Assuming this is a lower-star filtration, this function returns the persistence pairs, where each simplex is replaced with the vertex that gave it its filtration value. - :returns: first the regular persistence pairs, grouped by dimension, with one vertex per extremity, + :param min_persistence: The minimum persistence value to take into + account (strictly greater than min_persistence). Default value is + 0.0. + Set min_persistence to -1.0 to see all values. + :type min_persistence: float. + :returns: First the regular persistence pairs, grouped by dimension, with one vertex per extremity, and second the essential features, grouped by dimension, with one vertex each :rtype: Tuple[List[numpy.array[int] of shape (n,2)], List[numpy.array[int] of shape (m,)]] - :note: intervals_in_dim function requires - :func:`persistence()` - function to be launched first. + :note: lower_star_persistence_generators requires that `persistence()` be called first. """ if self.pcohptr != NULL: - gen = self.pcohptr.lower_star_generators() + gen = self.pcohptr.lower_star_generators(min_persistence) normal = [np_array(d).reshape(-1,2) for d in gen.first] infinite = [np_array(d) for d in gen.second] return (normal, infinite) else: print("lower_star_persistence_generators() requires that persistence() be called first.") - def flag_persistence_generators(self): + def flag_persistence_generators(self, min_persistence=0.): """Assuming this is a flag complex, this function returns the persistence pairs, where each simplex is replaced with the vertices of the edges that gave it its filtration value. - :returns: first the regular persistence pairs of dimension 0, with one vertex for birth and two for death; + :param min_persistence: The minimum persistence value to take into + account (strictly greater than min_persistence). Default value is + 0.0. + Set min_persistence to -1.0 to see all values. + :type min_persistence: float. + :returns: First the regular persistence pairs of dimension 0, with one vertex for birth and two for death; then the other regular persistence pairs, grouped by dimension, with 2 vertices per extremity; then the connected components, with one vertex each; finally the other essential features, grouped by dimension, with 2 vertices for birth. - :rtype: Tuple[List[numpy.array[int] of shape (n,3)], List[numpy.array[int] of shape (m,4)], List[numpy.array[int] of shape (l,)], List[numpy.array[int] of shape (k,2)]] + :rtype: Tuple[numpy.array[int] of shape (n,3), List[numpy.array[int] of shape (m,4)], numpy.array[int] of shape (l,), List[numpy.array[int] of shape (k,2)]] - :note: intervals_in_dim function requires - :func:`persistence()` - function to be launched first. + :note: flag_persistence_generators requires that `persistence()` be called first. """ if self.pcohptr != NULL: - gen = self.pcohptr.flag_generators() + gen = self.pcohptr.flag_generators(min_persistence) if len(gen.first) == 0: normal0 = np_array([]) normals = np_array([]) @@ -568,4 +574,4 @@ cdef class SimplexTree: return (normal0, normals, infinite0, infinites) else: - print("lower_star_persistence_generators() requires that persistence() be called first.") + print("flag_persistence_generators() requires that persistence() be called first.") diff --git a/src/python/include/Persistent_cohomology_interface.h b/src/python/include/Persistent_cohomology_interface.h index 6e9aac52..8e721fc0 100644 --- a/src/python/include/Persistent_cohomology_interface.h +++ b/src/python/include/Persistent_cohomology_interface.h @@ -95,11 +95,10 @@ persistent_cohomology::Persistent_cohomology>, std::vector>> Generators; - Generators lower_star_generators() { + Generators lower_star_generators(double min_persistence) { Generators out; // diags[i] should be interpreted as vector> auto& diags = out.first; @@ -108,6 +107,8 @@ persistent_cohomology::Persistent_cohomology(pair); auto t = std::get<1>(pair); + if(stptr_->filtration(t) - stptr_->filtration(s) <= min_persistence) + continue; int dim = stptr_->dimension(s); auto v = stptr_->vertex_with_same_filtration(s); if(t == stptr_->null_simplex()) { @@ -123,7 +124,8 @@ persistent_cohomology::Persistent_cohomology> and other diags[i] as vector> auto& diags = out.first; @@ -132,6 +134,8 @@ persistent_cohomology::Persistent_cohomology(pair); auto t = std::get<1>(pair); + if(stptr_->filtration(t) - stptr_->filtration(s) <= min_persistence) + continue; int dim = stptr_->dimension(s); bool infinite = t == stptr_->null_simplex(); if(infinite) { -- cgit v1.2.3 From 08be68c1fb3c05a35d738eab53712ec6cb4d1ad5 Mon Sep 17 00:00:00 2001 From: Marc Glisse Date: Sat, 7 Mar 2020 14:14:45 +0100 Subject: [ci skip] Comment --- src/python/include/Persistent_cohomology_interface.h | 1 + 1 file changed, 1 insertion(+) diff --git a/src/python/include/Persistent_cohomology_interface.h b/src/python/include/Persistent_cohomology_interface.h index 8e721fc0..22d6f654 100644 --- a/src/python/include/Persistent_cohomology_interface.h +++ b/src/python/include/Persistent_cohomology_interface.h @@ -125,6 +125,7 @@ persistent_cohomology::Persistent_cohomology> and other diags[i] as vector> -- cgit v1.2.3 From 55c1385419edd4e152df219dfff596d2631367f1 Mon Sep 17 00:00:00 2001 From: Marc Glisse Date: Sun, 8 Mar 2020 11:15:04 +0100 Subject: Typo in shape of array --- src/python/gudhi/simplex_tree.pyx | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/src/python/gudhi/simplex_tree.pyx b/src/python/gudhi/simplex_tree.pyx index 3f582ac9..d5f642d1 100644 --- a/src/python/gudhi/simplex_tree.pyx +++ b/src/python/gudhi/simplex_tree.pyx @@ -8,6 +8,7 @@ # - YYYY/MM Author: Description of the modification from libc.stdint cimport intptr_t +import numpy from numpy import array as np_array cimport simplex_tree @@ -558,19 +559,19 @@ cdef class SimplexTree: if self.pcohptr != NULL: gen = self.pcohptr.flag_generators(min_persistence) if len(gen.first) == 0: - normal0 = np_array([]) - normals = np_array([]) + normal0 = numpy.empty((0,3)) + normals = [] else: l = iter(gen.first) normal0 = np_array(next(l)).reshape(-1,3) normals = [np_array(d).reshape(-1,4) for d in l] if len(gen.second) == 0: - infinite0 = np_array([]) - infinites = np_array([]) + infinite0 = numpy.empty(0) + infinites = [] else: l = iter(gen.second) infinite0 = np_array(next(l)) - infinites = [np_array(d).reshape(-1,3) for d in l] + infinites = [np_array(d).reshape(-1,2) for d in l] return (normal0, normals, infinite0, infinites) else: -- cgit v1.2.3 From 5a737eefc7abd690e8a174d2557d0157e77f5f4c Mon Sep 17 00:00:00 2001 From: mathieu Date: Tue, 10 Mar 2020 19:13:37 -0400 Subject: new fixes --- .../include/gudhi/Bitmap_cubical_complex.h | 1 - src/python/gudhi/cubical_complex.pyx | 32 +++++++++++----------- src/python/gudhi/periodic_cubical_complex.pyx | 28 +++++++++++++++++++ src/python/gudhi/simplex_tree.pyx | 2 +- .../include/Persistent_cohomology_interface.h | 3 +- src/python/test/test_cubical_complex.py | 5 ++++ 6 files changed, 52 insertions(+), 19 deletions(-) diff --git a/src/Bitmap_cubical_complex/include/gudhi/Bitmap_cubical_complex.h b/src/Bitmap_cubical_complex/include/gudhi/Bitmap_cubical_complex.h index bf09532e..37514dee 100644 --- a/src/Bitmap_cubical_complex/include/gudhi/Bitmap_cubical_complex.h +++ b/src/Bitmap_cubical_complex/include/gudhi/Bitmap_cubical_complex.h @@ -340,7 +340,6 @@ class Bitmap_cubical_complex : public T { * that provides ranges for the Boundary_simplex_iterator. **/ Boundary_simplex_range boundary_simplex_range(Simplex_handle sh) { return this->get_boundary_of_a_cell(sh); } - Boundary_simplex_range coboundary_simplex_range(Simplex_handle sh) { return this->get_coboundary_of_a_cell(sh); } /** * filtration_simplex_range creates an object of a Filtration_simplex_range class diff --git a/src/python/gudhi/cubical_complex.pyx b/src/python/gudhi/cubical_complex.pyx index 8cf43539..9e701fe6 100644 --- a/src/python/gudhi/cubical_complex.pyx +++ b/src/python/gudhi/cubical_complex.pyx @@ -148,22 +148,22 @@ cdef class CubicalComplex: def cofaces_of_persistence_pairs(self): """A persistence interval is described by a pair of cells, one that creates the - feature and one that kills it. The filtration values of those 2 cells give coordinates - for a point in a persistence diagram, or a bar in a barcode. Structurally, in the - cubical complexes provided here, the filtration value of any cell is the minimum of the - filtration values of the maximal cells that contain it. Connecting persistence diagram - coordinates to the corresponding value in the input (i.e. the filtration values of - the top-dimensional cells) is useful for differentiation purposes. - - This function returns a list of pairs of top-dimensional cells corresponding to - the persistence birth and death cells of the filtration. The cells are represented by - their indices in the input list of top-dimensional cells (and not their indices in the - internal datastructure that includes non-maximal cells). Note that when two adjacent - top-dimensional cells have the same filtration value, we arbitrarily return one of the two - when calling the function on one of their common faces. - - :returns: The top-dimensional cells/cofaces of the positive and negative cells. - :rtype: list of pairs(index of positive top-dimensional cell, index of negative top-dimensional cell) + feature and one that kills it. The filtration values of those 2 cells give coordinates + for a point in a persistence diagram, or a bar in a barcode. Structurally, in the + cubical complexes provided here, the filtration value of any cell is the minimum of the + filtration values of the maximal cells that contain it. Connecting persistence diagram + coordinates to the corresponding value in the input (i.e. the filtration values of + the top-dimensional cells) is useful for differentiation purposes. + + This function returns a list of pairs of top-dimensional cells corresponding to + the persistence birth and death cells of the filtration. The cells are represented by + their indices in the input list of top-dimensional cells (and not their indices in the + internal datastructure that includes non-maximal cells). Note that when two adjacent + top-dimensional cells have the same filtration value, we arbitrarily return one of the two + when calling the function on one of their common faces. + + :returns: The top-dimensional cells/cofaces of the positive and negative cells, together with the corresponding homological dimension. + :rtype: numpy array of integers of shape [number_of_persistence_points, 3], the integers of eah row being: (homological dimension, index of positive top-dimensional cell, index of negative top-dimensional cell). If the homological feature is essential, i.e., if the death time is +infinity, then the index of the corresponding negative top-dimensional cell is -1. """ cdef vector[vector[int]] persistence_result if self.pcohptr != NULL: diff --git a/src/python/gudhi/periodic_cubical_complex.pyx b/src/python/gudhi/periodic_cubical_complex.pyx index 37f76201..ba039e80 100644 --- a/src/python/gudhi/periodic_cubical_complex.pyx +++ b/src/python/gudhi/periodic_cubical_complex.pyx @@ -31,6 +31,7 @@ cdef extern from "Persistent_cohomology_interface.h" namespace "Gudhi": cdef cppclass Periodic_cubical_complex_persistence_interface "Gudhi::Persistent_cohomology_interface>>": Periodic_cubical_complex_persistence_interface(Periodic_cubical_complex_base_interface * st, bool persistence_dim_max) vector[pair[int, pair[double, double]]] get_persistence(int homology_coeff_field, double min_persistence) + vector[vector[int]] cofaces_of_cubical_persistence_pairs() vector[int] betti_numbers() vector[int] persistent_betti_numbers(double from_value, double to_value) vector[pair[double,double]] intervals_in_dimension(int dimension) @@ -155,6 +156,33 @@ cdef class PeriodicCubicalComplex: persistence_result = self.pcohptr.get_persistence(homology_coeff_field, min_persistence) return persistence_result + def cofaces_of_persistence_pairs(self): + """A persistence interval is described by a pair of cells, one that creates the + feature and one that kills it. The filtration values of those 2 cells give coordinates + for a point in a persistence diagram, or a bar in a barcode. Structurally, in the + cubical complexes provided here, the filtration value of any cell is the minimum of the + filtration values of the maximal cells that contain it. Connecting persistence diagram + coordinates to the corresponding value in the input (i.e. the filtration values of + the top-dimensional cells) is useful for differentiation purposes. + + This function returns a list of pairs of top-dimensional cells corresponding to + the persistence birth and death cells of the filtration. The cells are represented by + their indices in the input list of top-dimensional cells (and not their indices in the + internal datastructure that includes non-maximal cells). Note that when two adjacent + top-dimensional cells have the same filtration value, we arbitrarily return one of the two + when calling the function on one of their common faces. + + :returns: The top-dimensional cells/cofaces of the positive and negative cells, together with the corresponding homological dimension. + :rtype: numpy array of integers of shape [number_of_persistence_points, 3], the integers of eah row being: (homological dimension, index of positive top-dimensional cell, index of negative top-dimensional cell). If the homological feature is essential, i.e., if the death time is +infinity, then the index of the corresponding negative top-dimensional cell is -1. + """ + cdef vector[vector[int]] persistence_result + if self.pcohptr != NULL: + persistence_result = self.pcohptr.cofaces_of_cubical_persistence_pairs() + else: + print("cofaces_of_persistence_pairs function requires persistence function" + " to be launched first.") + return np.array(persistence_result) + def betti_numbers(self): """This function returns the Betti numbers of the complex. diff --git a/src/python/gudhi/simplex_tree.pyx b/src/python/gudhi/simplex_tree.pyx index 85d25492..b18627c4 100644 --- a/src/python/gudhi/simplex_tree.pyx +++ b/src/python/gudhi/simplex_tree.pyx @@ -508,7 +508,7 @@ cdef class SimplexTree: """ if self.pcohptr != NULL: if persistence_file != '': - self.pcohptr.write_output_diagram(str.encode(persistence_file)) + self.pcohptr.write_output_diagram(persistence_file.encode('utf-8')) else: print("persistence_file must be specified") else: diff --git a/src/python/include/Persistent_cohomology_interface.h b/src/python/include/Persistent_cohomology_interface.h index e5accf50..defac88c 100644 --- a/src/python/include/Persistent_cohomology_interface.h +++ b/src/python/include/Persistent_cohomology_interface.h @@ -75,12 +75,13 @@ persistent_cohomology::Persistent_cohomologydimension(splx) == stptr_->dimension()){return splx;} else{ - for (auto v : stptr_->coboundary_simplex_range(splx)){ + for (auto v : stptr_->get_coboundary_of_a_cell(splx)){ if(stptr_->filtration(v) == stptr_->filtration(splx)){ return top_dimensional_coface(v); } } } + return splx; } std::vector> cofaces_of_cubical_persistence_pairs() { diff --git a/src/python/test/test_cubical_complex.py b/src/python/test/test_cubical_complex.py index 8c1b2600..8af63355 100755 --- a/src/python/test/test_cubical_complex.py +++ b/src/python/test/test_cubical_complex.py @@ -147,3 +147,8 @@ def test_connected_sublevel_sets(): periodic_dimensions = periodic_dimensions) assert cub.persistence() == [(0, (2.0, float("inf")))] assert cub.betti_numbers() == [1, 0, 0] + +def test_connected_sublevel_sets(): + cub = CubicalComplex(top_dimensional_cells = [[0, 0, 0], [0, 1, 0], [0, 0, 0]]) + cub.persistence() + assert cub.cofaces_of_persistence_pairs() == np.array([[1, 7, 4], [0, 8, -1]]) -- cgit v1.2.3 From a47ace987876cb52351ae9223d335629aedbd71e Mon Sep 17 00:00:00 2001 From: mathieu Date: Tue, 10 Mar 2020 19:44:57 -0400 Subject: new fixes --- ext/hera | 2 +- src/python/gudhi/representations/metrics.py | 27 ++++++++++++--------------- 2 files changed, 13 insertions(+), 16 deletions(-) diff --git a/ext/hera b/ext/hera index cb1838e6..9a899718 160000 --- a/ext/hera +++ b/ext/hera @@ -1 +1 @@ -Subproject commit cb1838e682ec07f80720241cf9098400caeb83c7 +Subproject commit 9a89971855acefe39dce0e2adadf53b88ca8f683 diff --git a/src/python/gudhi/representations/metrics.py b/src/python/gudhi/representations/metrics.py index c5439a67..0659b457 100644 --- a/src/python/gudhi/representations/metrics.py +++ b/src/python/gudhi/representations/metrics.py @@ -10,17 +10,9 @@ import numpy as np from sklearn.base import BaseEstimator, TransformerMixin from sklearn.metrics import pairwise_distances -from gudhi.wasserstein import wasserstein_distance as pot_wasserstein_distance from gudhi.hera import wasserstein_distance as hera_wasserstein_distance from .preprocessing import Padding -try: - from .. import bottleneck_distance - USE_GUDHI = True -except ImportError: - USE_GUDHI = False - print("Gudhi built without CGAL: BottleneckDistance will return a null matrix") - ############################################# # Metrics ################################### ############################################# @@ -111,9 +103,13 @@ def pairwise_persistence_diagram_distances(X, Y=None, metric="bottleneck", **kwa YY = None if Y is None else np.reshape(np.arange(len(Y)), [-1,1]) if metric == "bottleneck": return pairwise_distances(XX, YY, metric=sklearn_wrapper(bottleneck_distance, X, Y, **kwargs)) - elif metric == "wasserstein" or metric == "pot_wasserstein": - return pairwise_distances(XX, YY, metric=sklearn_wrapper(pot_wasserstein_distance, X, Y, **kwargs)) - elif metric == "hera_wasserstein": + elif metric == "pot_wasserstein": + try: + from gudhi.wasserstein import wasserstein_distance as pot_wasserstein_distance + return pairwise_distances(XX, YY, metric=sklearn_wrapper(pot_wasserstein_distance, X, Y, **kwargs)) + except ImportError: + print("Gudhi built without POT") + elif metric == "wasserstein" or metric == "hera_wasserstein": return pairwise_distances(XX, YY, metric=sklearn_wrapper(hera_wasserstein_distance, X, Y, **kwargs)) elif metric == "sliced_wasserstein": return pairwise_distances(XX, YY, metric=sklearn_wrapper(sliced_wasserstein_distance, X, Y, **kwargs)) @@ -192,16 +188,17 @@ class BottleneckDistance(BaseEstimator, TransformerMixin): Returns: numpy array of shape (number of diagrams in **diagrams**) x (number of diagrams in X): matrix of pairwise bottleneck distances. """ - if not USE_GUDHI: - print("Gudhi built without CGAL: returning a null matrix") - Xfit = pairwise_persistence_diagram_distances(X, self.diagrams_, metric="bottleneck", e=self.epsilon) if USE_GUDHI else np.zeros((len(X), len(self.diagrams_))) + try: + Xfit = pairwise_persistence_diagram_distances(X, self.diagrams_, metric="bottleneck", e=self.epsilon) + except ImportError: + print("Gudhi built without CGAL") return Xfit class WassersteinDistance(BaseEstimator, TransformerMixin): """ This is a class for computing the Wasserstein distance matrix from a list of persistence diagrams. """ - def __init__(self, order=2, internal_p=2, mode="pot", delta=0.0001): + def __init__(self, order=2, internal_p=2, mode="pot", delta=0.01): """ Constructor for the WassersteinDistance class. -- cgit v1.2.3 From 45b918a17cfa26a0c58d7871b869aa13b0e45019 Mon Sep 17 00:00:00 2001 From: mathieu Date: Wed, 11 Mar 2020 12:05:15 -0400 Subject: moved location of top_dimensional_coface function --- ext/hera | 2 +- .../include/gudhi/Bitmap_cubical_complex_base.h | 21 +++++++++++++++++++++ src/python/gudhi/cubical_complex.pyx | 4 +++- src/python/gudhi/periodic_cubical_complex.pyx | 4 +++- .../include/Persistent_cohomology_interface.h | 16 ++-------------- 5 files changed, 30 insertions(+), 17 deletions(-) diff --git a/ext/hera b/ext/hera index cb1838e6..9a899718 160000 --- a/ext/hera +++ b/ext/hera @@ -1 +1 @@ -Subproject commit cb1838e682ec07f80720241cf9098400caeb83c7 +Subproject commit 9a89971855acefe39dce0e2adadf53b88ca8f683 diff --git a/src/Bitmap_cubical_complex/include/gudhi/Bitmap_cubical_complex_base.h b/src/Bitmap_cubical_complex/include/gudhi/Bitmap_cubical_complex_base.h index 0d6299d2..7496d74a 100644 --- a/src/Bitmap_cubical_complex/include/gudhi/Bitmap_cubical_complex_base.h +++ b/src/Bitmap_cubical_complex/include/gudhi/Bitmap_cubical_complex_base.h @@ -109,6 +109,14 @@ class Bitmap_cubical_complex_base { **/ virtual inline std::vector get_coboundary_of_a_cell(std::size_t cell) const; + /** + * This function computes the index of one of the top-dimensional cubes (chosen arbitrarily) associated + * to a given simplex handle. Note that the input parameter is not necessarily a cube, it might also + * be an edge or vertex of a cube. On the other hand, the output is always indicating the position of + * a cube in the data structure. + **/ + inline int get_top_dimensional_coface_of_a_cell(int splx); + /** * This procedure compute incidence numbers between cubes. For a cube \f$A\f$ of * dimension n and a cube \f$B \subset A\f$ of dimension n-1, an incidence @@ -602,6 +610,19 @@ void Bitmap_cubical_complex_base::setup_bitmap_based_on_top_dimensional_cells this->impose_lower_star_filtration(); } +template +int Bitmap_cubical_complex_base::get_top_dimensional_coface_of_a_cell(int splx) { + if (this->get_dimension_of_a_cell(splx) == this->dimension()){return splx;} + else{ + for (auto v : this->get_coboundary_of_a_cell(splx)){ + if(this->get_cell_data(v) == this->get_cell_data(splx)){ + return this->get_top_dimensional_coface_of_a_cell(v); + } + } + } + return splx; +} + template Bitmap_cubical_complex_base::Bitmap_cubical_complex_base(const std::vector& sizes_in_following_directions, const std::vector& top_dimensional_cells) { diff --git a/src/python/gudhi/cubical_complex.pyx b/src/python/gudhi/cubical_complex.pyx index 9e701fe6..84fec60e 100644 --- a/src/python/gudhi/cubical_complex.pyx +++ b/src/python/gudhi/cubical_complex.pyx @@ -163,7 +163,9 @@ cdef class CubicalComplex: when calling the function on one of their common faces. :returns: The top-dimensional cells/cofaces of the positive and negative cells, together with the corresponding homological dimension. - :rtype: numpy array of integers of shape [number_of_persistence_points, 3], the integers of eah row being: (homological dimension, index of positive top-dimensional cell, index of negative top-dimensional cell). If the homological feature is essential, i.e., if the death time is +infinity, then the index of the corresponding negative top-dimensional cell is -1. + :rtype: numpy array of integers of shape [number_of_persistence_points, 3], the integers of eah row being: (homological dimension, + index of positive top-dimensional cell, index of negative top-dimensional cell). If the homological feature is essential, i.e., if + the death time is +infinity, then the index of the corresponding negative top-dimensional cell is -1. """ cdef vector[vector[int]] persistence_result if self.pcohptr != NULL: diff --git a/src/python/gudhi/periodic_cubical_complex.pyx b/src/python/gudhi/periodic_cubical_complex.pyx index ba039e80..993d95c7 100644 --- a/src/python/gudhi/periodic_cubical_complex.pyx +++ b/src/python/gudhi/periodic_cubical_complex.pyx @@ -173,7 +173,9 @@ cdef class PeriodicCubicalComplex: when calling the function on one of their common faces. :returns: The top-dimensional cells/cofaces of the positive and negative cells, together with the corresponding homological dimension. - :rtype: numpy array of integers of shape [number_of_persistence_points, 3], the integers of eah row being: (homological dimension, index of positive top-dimensional cell, index of negative top-dimensional cell). If the homological feature is essential, i.e., if the death time is +infinity, then the index of the corresponding negative top-dimensional cell is -1. + :rtype: numpy array of integers of shape [number_of_persistence_points, 3], the integers of eah row being: (homological dimension, + index of positive top-dimensional cell, index of negative top-dimensional cell). If the homological feature is essential, i.e., if + the death time is +infinity, then the index of the corresponding negative top-dimensional cell is -1. """ cdef vector[vector[int]] persistence_result if self.pcohptr != NULL: diff --git a/src/python/include/Persistent_cohomology_interface.h b/src/python/include/Persistent_cohomology_interface.h index defac88c..77555349 100644 --- a/src/python/include/Persistent_cohomology_interface.h +++ b/src/python/include/Persistent_cohomology_interface.h @@ -72,18 +72,6 @@ persistent_cohomology::Persistent_cohomologydimension(splx) == stptr_->dimension()){return splx;} - else{ - for (auto v : stptr_->get_coboundary_of_a_cell(splx)){ - if(stptr_->filtration(v) == stptr_->filtration(splx)){ - return top_dimensional_coface(v); - } - } - } - return splx; - } - std::vector> cofaces_of_cubical_persistence_pairs() { // Warning: this function is meant to be used with CubicalComplex only!! @@ -104,14 +92,14 @@ persistent_cohomology::Persistent_cohomologydimension(get<0>(pair)); // Recursively get the top-dimensional cell / coface associated to the persistence generator - int face0 = top_dimensional_coface(get<0>(pair)); + int face0 = stptr_->get_top_dimensional_coface_of_a_cell(get<0>(pair)); // Retrieve the index of the corresponding top-dimensional cell in the input data int splx0 = order[face0]; int splx1 = -1; if (isfinite(stptr_->filtration(get<1>(pair)))){ // Recursively get the top-dimensional cell / coface associated to the persistence generator - int face1 = top_dimensional_coface(get<1>(pair)); + int face1 = stptr_->get_top_dimensional_coface_of_a_cell(get<1>(pair)); // Retrieve the index of the corresponding top-dimensional cell in the input data splx1 = order[face1]; } -- cgit v1.2.3 From 25e40a52ec7bc9e1bfe418fb1aa16e2a06994d1b Mon Sep 17 00:00:00 2001 From: mathieu Date: Wed, 11 Mar 2020 15:35:37 -0400 Subject: new fixes --- src/python/gudhi/representations/metrics.py | 63 +++++++++++++++++++++++------ 1 file changed, 50 insertions(+), 13 deletions(-) diff --git a/src/python/gudhi/representations/metrics.py b/src/python/gudhi/representations/metrics.py index 0659b457..f913f1fc 100644 --- a/src/python/gudhi/representations/metrics.py +++ b/src/python/gudhi/representations/metrics.py @@ -19,7 +19,7 @@ from .preprocessing import Padding def sliced_wasserstein_distance(D1, D2, num_directions): """ - This is a function for computing the sliced Wasserstein distance from two persistence diagrams. The Sliced Wasserstein distance is computed by projecting the persistence diagrams onto lines, comparing the projections with the 1-norm, and finally integrating over all possible lines. See http://proceedings.mlr.press/v70/carriere17a.html for more details. + This is a function for computing the sliced Wasserstein distance from two persistence diagrams. The Sliced Wasserstein distance is computed by projecting the persistence diagrams onto lines, comparing the projections with the 1-norm, and finally averaging over the lines. See http://proceedings.mlr.press/v70/carriere17a.html for more details. :param D1: (n x 2) numpy.array encoding the (finite points of the) first diagram. Must not contain essential points (i.e. with infinite coordinate). :param D2: (m x 2) numpy.array encoding the second diagram. :param num_directions: number of lines evenly sampled from [-pi/2,pi/2] in order to approximate and speed up the distance computation. @@ -39,6 +39,34 @@ def sliced_wasserstein_distance(D1, D2, num_directions): L1 = np.sum(np.abs(A-B), axis=0) return np.mean(L1) +def compute_persistence_diagram_projections(X, num_directions): + """ + This is a function for projecting the points of a list of persistence diagrams (as well as their diagonal projections) onto a fixed number of lines sampled uniformly on [-pi/2, pi/2]. This function can be used as a preprocessing step in order to speed up the running time for computing all pairwise sliced Wasserstein distances / kernel values on a list of persistence diagrams. + :param X: list of persistence diagrams. + :param num_directions: number of lines evenly sampled from [-pi/2,pi/2] in order to approximate and speed up the distance computation. + :returns: list of projected persistence diagrams. + :rtype: float + """ + thetas = np.linspace(-np.pi/2, np.pi/2, num=num_directions+1)[np.newaxis,:-1] + lines = np.concatenate([np.cos(thetas), np.sin(thetas)], axis=0) + XX = [np.vstack([np.matmul(D, lines), np.matmul(np.matmul(D, .5 * np.ones((2,2))), lines)]) for D in X] + return XX + +def sliced_wasserstein_distance_on_projections(D1, D2): + """ + This is a function for computing the sliced Wasserstein distance between two persistence diagrams that have already been projected onto some lines. It simply amounts to comparing the sorted projections with the 1-norm, and averaging over the lines. See http://proceedings.mlr.press/v70/carriere17a.html for more details. + :param D1: (2n x number_of_lines) numpy.array containing the n projected points of the first diagram, and the n projections of their diagonal projections. + :param D2: (2m x number_of_lines) numpy.array containing the m projected points of the second diagram, and the m projections of their diagonal projections. + :returns: the sliced Wasserstein distance between the projected persistence diagrams. + :rtype: float + """ + lim1, lim2 = int(len(D1)/2), int(len(D2)/2) + approx1, approx_diag1, approx2, approx_diag2 = D1[:lim1], D1[lim1:], D2[:lim2], D2[lim2:] + A = np.sort(np.concatenate([approx1, approx_diag2], axis=0), axis=0) + B = np.sort(np.concatenate([approx2, approx_diag1], axis=0), axis=0) + L1 = np.sum(np.abs(A-B), axis=0) + return np.mean(L1) + def persistence_fisher_distance(D1, D2, kernel_approx=None, bandwidth=1.): """ This is a function for computing the persistence Fisher distance from two persistence diagrams. The persistence Fisher distance is obtained by computing the original Fisher distance between the probability distributions associated to the persistence diagrams given by convolving them with a Gaussian kernel. See http://papers.nips.cc/paper/8205-persistence-fisher-kernel-a-riemannian-manifold-kernel-for-persistence-diagrams for more details. @@ -90,31 +118,43 @@ def sklearn_wrapper(metric, X, Y, **kwargs): return metric(X[int(a[0])], Y[int(b[0])], **kwargs) return flat_metric +PAIRWISE_DISTANCE_FUNCTIONS = { + "wasserstein": hera_wasserstein_distance, + "hera_wasserstein": hera_wasserstein_distance, + "persistence_fisher": persistence_fisher_distance, +} + def pairwise_persistence_diagram_distances(X, Y=None, metric="bottleneck", **kwargs): """ This function computes the distance matrix between two lists of persistence diagrams given as numpy arrays of shape (nx2). :param X: first list of persistence diagrams. :param Y: second list of persistence diagrams (optional). If None, pairwise distances are computed from the first list only. - :param metric: distance to use. It can be either a string ("sliced_wasserstein", "wasserstein", "bottleneck", "persistence_fisher") or a function taking two numpy arrays of shape (nx2) and (mx2) as inputs. + :param metric: distance to use. It can be either a string ("sliced_wasserstein", "wasserstein", "hera_wasserstein" (Wasserstein distance computed with Hera---note that Hera is also used for the default option "wasserstein"), "pot_wasserstein" (Wasserstein distance computed with POT), "bottleneck", "persistence_fisher") or a function taking two numpy arrays of shape (nx2) and (mx2) as inputs. :returns: distance matrix, i.e., numpy array of shape (num diagrams 1 x num diagrams 2) :rtype: float """ XX = np.reshape(np.arange(len(X)), [-1,1]) YY = None if Y is None else np.reshape(np.arange(len(Y)), [-1,1]) if metric == "bottleneck": - return pairwise_distances(XX, YY, metric=sklearn_wrapper(bottleneck_distance, X, Y, **kwargs)) + try: + from .. import bottleneck_distance + return pairwise_distances(XX, YY, metric=sklearn_wrapper(bottleneck_distance, X, Y, **kwargs)) + except ImportError: + print("Gudhi built without CGAL") + raise elif metric == "pot_wasserstein": try: from gudhi.wasserstein import wasserstein_distance as pot_wasserstein_distance return pairwise_distances(XX, YY, metric=sklearn_wrapper(pot_wasserstein_distance, X, Y, **kwargs)) except ImportError: - print("Gudhi built without POT") - elif metric == "wasserstein" or metric == "hera_wasserstein": - return pairwise_distances(XX, YY, metric=sklearn_wrapper(hera_wasserstein_distance, X, Y, **kwargs)) + print("Gudhi built without POT. Please install POT or use metric='wasserstein' or metric='hera_wasserstein'") + raise elif metric == "sliced_wasserstein": - return pairwise_distances(XX, YY, metric=sklearn_wrapper(sliced_wasserstein_distance, X, Y, **kwargs)) - elif metric == "persistence_fisher": - return pairwise_distances(XX, YY, metric=sklearn_wrapper(persistence_fisher_distance, X, Y, **kwargs)) + Xproj = compute_persistence_diagram_projections(X, **kwargs) + Yproj = None if Y is None else compute_persistence_diagram_projections(Y, **kwargs) + return pairwise_distances(XX, YY, metric=sklearn_wrapper(sliced_wasserstein_distance_on_projections, Xproj, Yproj)) + elif type(metric) == str: + return pairwise_distances(XX, YY, metric=sklearn_wrapper(PAIRWISE_DISTANCE_FUNCTIONS[metric], X, Y, **kwargs)) else: return pairwise_distances(XX, YY, metric=sklearn_wrapper(metric, X, Y, **kwargs)) @@ -188,10 +228,7 @@ class BottleneckDistance(BaseEstimator, TransformerMixin): Returns: numpy array of shape (number of diagrams in **diagrams**) x (number of diagrams in X): matrix of pairwise bottleneck distances. """ - try: - Xfit = pairwise_persistence_diagram_distances(X, self.diagrams_, metric="bottleneck", e=self.epsilon) - except ImportError: - print("Gudhi built without CGAL") + Xfit = pairwise_persistence_diagram_distances(X, self.diagrams_, metric="bottleneck", e=self.epsilon) return Xfit class WassersteinDistance(BaseEstimator, TransformerMixin): -- cgit v1.2.3 From 6552d09c3f290a25ee910e007084fe3809f8c8ed Mon Sep 17 00:00:00 2001 From: MathieuCarriere Date: Thu, 12 Mar 2020 16:19:34 -0400 Subject: fixed error message --- src/python/gudhi/representations/metrics.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/python/gudhi/representations/metrics.py b/src/python/gudhi/representations/metrics.py index f913f1fc..4070c321 100644 --- a/src/python/gudhi/representations/metrics.py +++ b/src/python/gudhi/representations/metrics.py @@ -147,7 +147,7 @@ def pairwise_persistence_diagram_distances(X, Y=None, metric="bottleneck", **kwa from gudhi.wasserstein import wasserstein_distance as pot_wasserstein_distance return pairwise_distances(XX, YY, metric=sklearn_wrapper(pot_wasserstein_distance, X, Y, **kwargs)) except ImportError: - print("Gudhi built without POT. Please install POT or use metric='wasserstein' or metric='hera_wasserstein'") + print("POT (Python Optimal Transport) is not installed. Please install POT or use metric='wasserstein' or metric='hera_wasserstein'") raise elif metric == "sliced_wasserstein": Xproj = compute_persistence_diagram_projections(X, **kwargs) -- cgit v1.2.3 From d239af744539572b485b09031f60121383fc1bc6 Mon Sep 17 00:00:00 2001 From: mathieu Date: Fri, 13 Mar 2020 11:31:19 -0400 Subject: tried to fix hera's update --- ext/hera | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ext/hera b/ext/hera index 9a899718..cb1838e6 160000 --- a/ext/hera +++ b/ext/hera @@ -1 +1 @@ -Subproject commit 9a89971855acefe39dce0e2adadf53b88ca8f683 +Subproject commit cb1838e682ec07f80720241cf9098400caeb83c7 -- cgit v1.2.3 From 4fb8ab586088dd582b3949cecc11395c37b6f3e6 Mon Sep 17 00:00:00 2001 From: mathieu Date: Fri, 13 Mar 2020 11:32:22 -0400 Subject: tried to fix hera's update --- ext/hera | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ext/hera b/ext/hera index 0019cae9..cb1838e6 160000 --- a/ext/hera +++ b/ext/hera @@ -1 +1 @@ -Subproject commit 0019cae9dc1e9d11aa03bc59681435ba7f21eea8 +Subproject commit cb1838e682ec07f80720241cf9098400caeb83c7 -- cgit v1.2.3 From e313e98661a54accafd6649ab274aa17cf7e4fb2 Mon Sep 17 00:00:00 2001 From: mathieu Date: Fri, 13 Mar 2020 11:56:43 -0400 Subject: fix hera --- ext/hera | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ext/hera b/ext/hera index cb1838e6..0019cae9 160000 --- a/ext/hera +++ b/ext/hera @@ -1 +1 @@ -Subproject commit cb1838e682ec07f80720241cf9098400caeb83c7 +Subproject commit 0019cae9dc1e9d11aa03bc59681435ba7f21eea8 -- cgit v1.2.3 From 6410abe3788e17a24b1569bcd7f121d126e1c6cc Mon Sep 17 00:00:00 2001 From: mathieu Date: Fri, 13 Mar 2020 11:58:25 -0400 Subject: fix hera --- ext/hera | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ext/hera b/ext/hera index cb1838e6..0019cae9 160000 --- a/ext/hera +++ b/ext/hera @@ -1 +1 @@ -Subproject commit cb1838e682ec07f80720241cf9098400caeb83c7 +Subproject commit 0019cae9dc1e9d11aa03bc59681435ba7f21eea8 -- cgit v1.2.3 From 3099b2395fa143aa6c9b3df2c6087ccd017ff87c Mon Sep 17 00:00:00 2001 From: MathieuCarriere Date: Mon, 16 Mar 2020 12:51:34 -0400 Subject: fixed doc --- src/python/gudhi/representations/kernel_methods.py | 45 +++++++++------- src/python/gudhi/representations/metrics.py | 63 +++++++++++++--------- 2 files changed, 66 insertions(+), 42 deletions(-) diff --git a/src/python/gudhi/representations/kernel_methods.py b/src/python/gudhi/representations/kernel_methods.py index d89f69ab..50186d63 100644 --- a/src/python/gudhi/representations/kernel_methods.py +++ b/src/python/gudhi/representations/kernel_methods.py @@ -20,13 +20,16 @@ from .preprocessing import Padding def persistence_weighted_gaussian_kernel(D1, D2, weight=lambda x: 1, kernel_approx=None, bandwidth=1.): """ This is a function for computing the persistence weighted Gaussian kernel value from two persistence diagrams. The persistence weighted Gaussian kernel is computed by convolving the persistence diagram points with weighted Gaussian kernels. See http://proceedings.mlr.press/v48/kusano16.html for more details. - :param D1: (n x 2) numpy.array encoding the (finite points of the) first diagram. Must not contain essential points (i.e. with infinite coordinate). - :param D2: (m x 2) numpy.array encoding the second diagram. - :param bandwidth: bandwidth of the Gaussian kernel with which persistence diagrams will be convolved - :param weight: weight function for the persistence diagram points. This function must be defined on 2D points, ie lists or numpy arrays of the form [p_x,p_y]. - :param kernel_approx: kernel approximation class used to speed up computation. Common kernel approximations classes can be found in the scikit-learn library (such as RBFSampler for instance). - :returns: the persistence weighted Gaussian kernel value between persistence diagrams. - :rtype: float + + Parameters: + D1: (n x 2) numpy.array encoding the (finite points of the) first diagram. Must not contain essential points (i.e. with infinite coordinate). + D2: (m x 2) numpy.array encoding the second diagram. + bandwidth (double): bandwidth of the Gaussian kernel with which persistence diagrams will be convolved + weight: weight function for the persistence diagram points. This function must be defined on 2D points, ie lists or numpy arrays of the form [p_x,p_y]. + kernel_approx: kernel approximation class used to speed up computation. Common kernel approximations classes can be found in the scikit-learn library (such as RBFSampler for instance). + + Returns: + float: the persistence weighted Gaussian kernel value between persistence diagrams. """ ws1 = np.array([weight(D1[j,:]) for j in range(len(D1))]) ws2 = np.array([weight(D2[j,:]) for j in range(len(D2))]) @@ -42,12 +45,15 @@ def persistence_weighted_gaussian_kernel(D1, D2, weight=lambda x: 1, kernel_appr def persistence_scale_space_kernel(D1, D2, kernel_approx=None, bandwidth=1.): """ This is a function for computing the persistence scale space kernel value from two persistence diagrams. The persistence scale space kernel is computed by adding the symmetric to the diagonal of each point in each persistence diagram, with negative weight, and then convolving the points with a Gaussian kernel. See https://www.cv-foundation.org/openaccess/content_cvpr_2015/papers/Reininghaus_A_Stable_Multi-Scale_2015_CVPR_paper.pdf for more details. - :param D1: (n x 2) numpy.array encoding the (finite points of the) first diagram. Must not contain essential points (i.e. with infinite coordinate). - :param D2: (m x 2) numpy.array encoding the second diagram. - :param bandwidth: bandwidth of the Gaussian kernel with which persistence diagrams will be convolved - :param kernel_approx: kernel approximation class used to speed up computation. Common kernel approximations classes can be found in the scikit-learn library (such as RBFSampler for instance). - :returns: the persistence scale space kernel value between persistence diagrams. - :rtype: float + + Parameters: + D1: (n x 2) numpy.array encoding the (finite points of the) first diagram. Must not contain essential points (i.e. with infinite coordinate). + D2: (m x 2) numpy.array encoding the second diagram. + bandwidth (double): bandwidth of the Gaussian kernel with which persistence diagrams will be convolved + kernel_approx: kernel approximation class used to speed up computation. Common kernel approximations classes can be found in the scikit-learn library (such as RBFSampler for instance). + + Returns: + float: the persistence scale space kernel value between persistence diagrams. """ DD1 = np.concatenate([D1, D1[:,[1,0]]], axis=0) DD2 = np.concatenate([D2, D2[:,[1,0]]], axis=0) @@ -57,11 +63,14 @@ def persistence_scale_space_kernel(D1, D2, kernel_approx=None, bandwidth=1.): def pairwise_persistence_diagram_kernels(X, Y=None, metric="sliced_wasserstein", **kwargs): """ This function computes the kernel matrix between two lists of persistence diagrams given as numpy arrays of shape (nx2). - :param X: first list of persistence diagrams. - :param Y: second list of persistence diagrams (optional). If None, pairwise kernel values are computed from the first list only. - :param metric: kernel to use. It can be either a string ("sliced_wasserstein", "persistence_scale_space", "persistence_weighted_gaussian", "persistence_fisher") or a function taking two numpy arrays of shape (nx2) and (mx2) as inputs. - :returns: kernel matrix, i.e., numpy array of shape (num diagrams 1 x num diagrams 2) - :rtype: float + + Parameters: + X (list of n numpy arrays of shape (numx2)): first list of persistence diagrams. + Y (list of m numpy arrays of shape (numx2)): second list of persistence diagrams (optional). If None, pairwise kernel values are computed from the first list only. + metric: kernel to use. It can be either a string ("sliced_wasserstein", "persistence_scale_space", "persistence_weighted_gaussian", "persistence_fisher") or a function taking two numpy arrays of shape (nx2) and (mx2) as inputs. + + Returns: + numpy array of shape (nxm): kernel matrix. """ XX = np.reshape(np.arange(len(X)), [-1,1]) YY = None if Y is None else np.reshape(np.arange(len(Y)), [-1,1]) diff --git a/src/python/gudhi/representations/metrics.py b/src/python/gudhi/representations/metrics.py index 4070c321..e2c30f8c 100644 --- a/src/python/gudhi/representations/metrics.py +++ b/src/python/gudhi/representations/metrics.py @@ -20,11 +20,14 @@ from .preprocessing import Padding def sliced_wasserstein_distance(D1, D2, num_directions): """ This is a function for computing the sliced Wasserstein distance from two persistence diagrams. The Sliced Wasserstein distance is computed by projecting the persistence diagrams onto lines, comparing the projections with the 1-norm, and finally averaging over the lines. See http://proceedings.mlr.press/v70/carriere17a.html for more details. - :param D1: (n x 2) numpy.array encoding the (finite points of the) first diagram. Must not contain essential points (i.e. with infinite coordinate). - :param D2: (m x 2) numpy.array encoding the second diagram. - :param num_directions: number of lines evenly sampled from [-pi/2,pi/2] in order to approximate and speed up the distance computation. - :returns: the sliced Wasserstein distance between persistence diagrams. - :rtype: float + + Parameters: + D1: (n x 2) numpy.array encoding the (finite points of the) first diagram. Must not contain essential points (i.e. with infinite coordinate). + D2: (m x 2) numpy.array encoding the second diagram. + num_directions (int): number of lines evenly sampled from [-pi/2,pi/2] in order to approximate and speed up the distance computation. + + Returns: + float: the sliced Wasserstein distance between persistence diagrams. """ thetas = np.linspace(-np.pi/2, np.pi/2, num=num_directions+1)[np.newaxis,:-1] lines = np.concatenate([np.cos(thetas), np.sin(thetas)], axis=0) @@ -42,10 +45,13 @@ def sliced_wasserstein_distance(D1, D2, num_directions): def compute_persistence_diagram_projections(X, num_directions): """ This is a function for projecting the points of a list of persistence diagrams (as well as their diagonal projections) onto a fixed number of lines sampled uniformly on [-pi/2, pi/2]. This function can be used as a preprocessing step in order to speed up the running time for computing all pairwise sliced Wasserstein distances / kernel values on a list of persistence diagrams. - :param X: list of persistence diagrams. - :param num_directions: number of lines evenly sampled from [-pi/2,pi/2] in order to approximate and speed up the distance computation. - :returns: list of projected persistence diagrams. - :rtype: float + + Parameters: + X (list of n numpy arrays of shape (numx2)): list of persistence diagrams. + num_directions (int): number of lines evenly sampled from [-pi/2,pi/2] in order to approximate and speed up the distance computation. + + Returns: + XX (list of n numpy arrays of shape (2*numx2)): list of projected persistence diagrams. """ thetas = np.linspace(-np.pi/2, np.pi/2, num=num_directions+1)[np.newaxis,:-1] lines = np.concatenate([np.cos(thetas), np.sin(thetas)], axis=0) @@ -55,10 +61,13 @@ def compute_persistence_diagram_projections(X, num_directions): def sliced_wasserstein_distance_on_projections(D1, D2): """ This is a function for computing the sliced Wasserstein distance between two persistence diagrams that have already been projected onto some lines. It simply amounts to comparing the sorted projections with the 1-norm, and averaging over the lines. See http://proceedings.mlr.press/v70/carriere17a.html for more details. - :param D1: (2n x number_of_lines) numpy.array containing the n projected points of the first diagram, and the n projections of their diagonal projections. - :param D2: (2m x number_of_lines) numpy.array containing the m projected points of the second diagram, and the m projections of their diagonal projections. - :returns: the sliced Wasserstein distance between the projected persistence diagrams. - :rtype: float + + Parameters: + D1: (2n x number_of_lines) numpy.array containing the n projected points of the first diagram, and the n projections of their diagonal projections. + D2: (2m x number_of_lines) numpy.array containing the m projected points of the second diagram, and the m projections of their diagonal projections. + + Returns: + float: the sliced Wasserstein distance between the projected persistence diagrams. """ lim1, lim2 = int(len(D1)/2), int(len(D2)/2) approx1, approx_diag1, approx2, approx_diag2 = D1[:lim1], D1[lim1:], D2[:lim2], D2[lim2:] @@ -70,12 +79,15 @@ def sliced_wasserstein_distance_on_projections(D1, D2): def persistence_fisher_distance(D1, D2, kernel_approx=None, bandwidth=1.): """ This is a function for computing the persistence Fisher distance from two persistence diagrams. The persistence Fisher distance is obtained by computing the original Fisher distance between the probability distributions associated to the persistence diagrams given by convolving them with a Gaussian kernel. See http://papers.nips.cc/paper/8205-persistence-fisher-kernel-a-riemannian-manifold-kernel-for-persistence-diagrams for more details. - :param D1: (n x 2) numpy.array encoding the (finite points of the) first diagram. Must not contain essential points (i.e. with infinite coordinate). - :param D2: (m x 2) numpy.array encoding the second diagram. - :param bandwidth: bandwidth of the Gaussian kernel used to turn persistence diagrams into probability distributions. - :param kernel_approx: kernel approximation class used to speed up computation. Common kernel approximations classes can be found in the scikit-learn library (such as RBFSampler for instance). - :returns: the persistence Fisher distance between persistence diagrams. - :rtype: float + + Parameters: + D1: (n x 2) numpy.array encoding the (finite points of the) first diagram). Must not contain essential points (i.e. with infinite coordinate). + D2: (m x 2) numpy.array encoding the second diagram. + bandwidth (float): bandwidth of the Gaussian kernel used to turn persistence diagrams into probability distributions. + kernel_approx: kernel approximation class used to speed up computation. Common kernel approximations classes can be found in the scikit-learn library (such as RBFSampler for instance). + + Returns: + float: the persistence Fisher distance between persistence diagrams. """ projection = (1./2) * np.ones((2,2)) diagonal_projections1 = np.matmul(D1, projection) @@ -127,11 +139,14 @@ PAIRWISE_DISTANCE_FUNCTIONS = { def pairwise_persistence_diagram_distances(X, Y=None, metric="bottleneck", **kwargs): """ This function computes the distance matrix between two lists of persistence diagrams given as numpy arrays of shape (nx2). - :param X: first list of persistence diagrams. - :param Y: second list of persistence diagrams (optional). If None, pairwise distances are computed from the first list only. - :param metric: distance to use. It can be either a string ("sliced_wasserstein", "wasserstein", "hera_wasserstein" (Wasserstein distance computed with Hera---note that Hera is also used for the default option "wasserstein"), "pot_wasserstein" (Wasserstein distance computed with POT), "bottleneck", "persistence_fisher") or a function taking two numpy arrays of shape (nx2) and (mx2) as inputs. - :returns: distance matrix, i.e., numpy array of shape (num diagrams 1 x num diagrams 2) - :rtype: float + + Parameters: + X (list of n numpy arrays of shape (numx2)): first list of persistence diagrams. + Y (list of m numpy arrays of shape (numx2)): second list of persistence diagrams (optional). If None, pairwise distances are computed from the first list only. + metric: distance to use. It can be either a string ("sliced_wasserstein", "wasserstein", "hera_wasserstein" (Wasserstein distance computed with Hera---note that Hera is also used for the default option "wasserstein"), "pot_wasserstein" (Wasserstein distance computed with POT), "bottleneck", "persistence_fisher") or a function taking two numpy arrays of shape (nx2) and (mx2) as inputs. + + Returns: + numpy array of shape (nxm): distance matrix """ XX = np.reshape(np.arange(len(X)), [-1,1]) YY = None if Y is None else np.reshape(np.arange(len(Y)), [-1,1]) -- cgit v1.2.3 From 0b4eddeb0d53d465016d5eb913b382123bc5b891 Mon Sep 17 00:00:00 2001 From: Marc Glisse Date: Mon, 23 Mar 2020 18:35:07 +0100 Subject: Avoid consecutive push_back --- src/python/include/Persistent_cohomology_interface.h | 19 ++++++++----------- 1 file changed, 8 insertions(+), 11 deletions(-) diff --git a/src/python/include/Persistent_cohomology_interface.h b/src/python/include/Persistent_cohomology_interface.h index 22d6f654..89ff5137 100644 --- a/src/python/include/Persistent_cohomology_interface.h +++ b/src/python/include/Persistent_cohomology_interface.h @@ -117,8 +117,8 @@ persistent_cohomology::Persistent_cohomologyvertex_with_same_filtration(t); - diags[dim].push_back(v); - diags[dim].push_back(w); + auto& d = diags[dim]; + d.insert(d.end(), { v, w }); } } return out; @@ -152,8 +152,8 @@ persistent_cohomology::Persistent_cohomologyedge_with_same_filtration(t); @@ -165,9 +165,8 @@ persistent_cohomology::Persistent_cohomologysimplex_vertex_range(s)); if(diags.size()==0)diags.emplace_back(); - diags[0].push_back(v); - diags[0].push_back(w1); - diags[0].push_back(w2); + auto& d = diags[0]; + d.insert(d.end(), { v, w1, w2 }); } else { auto es = stptr_->edge_with_same_filtration(s); auto&& es_vertices = stptr_->simplex_vertex_range(es); @@ -176,10 +175,8 @@ persistent_cohomology::Persistent_cohomology Date: Mon, 23 Mar 2020 18:52:49 +0100 Subject: Reuse vector Reuse + copy should be slightly faster than regrowing each time (and moving) --- src/python/include/Persistent_cohomology_interface.h | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/src/python/include/Persistent_cohomology_interface.h b/src/python/include/Persistent_cohomology_interface.h index 89ff5137..3ce40af5 100644 --- a/src/python/include/Persistent_cohomology_interface.h +++ b/src/python/include/Persistent_cohomology_interface.h @@ -73,15 +73,17 @@ persistent_cohomology::Persistent_cohomology, std::vector>> persistence_pairs; auto const& pairs = Base::get_persistent_pairs(); persistence_pairs.reserve(pairs.size()); + std::vector birth; + std::vector death; for (auto pair : pairs) { - std::vector birth; + birth.clear(); if (get<0>(pair) != stptr_->null_simplex()) { for (auto vertex : stptr_->simplex_vertex_range(get<0>(pair))) { birth.push_back(vertex); } } - std::vector death; + death.clear(); if (get<1>(pair) != stptr_->null_simplex()) { death.reserve(birth.size()+1); for (auto vertex : stptr_->simplex_vertex_range(get<1>(pair))) { @@ -89,7 +91,7 @@ persistent_cohomology::Persistent_cohomology Date: Mon, 23 Mar 2020 21:54:56 +0100 Subject: Add test --- src/python/test/test_simplex_generators.py | 57 ++++++++++++++++++++++++++++++ 1 file changed, 57 insertions(+) create mode 100755 src/python/test/test_simplex_generators.py diff --git a/src/python/test/test_simplex_generators.py b/src/python/test/test_simplex_generators.py new file mode 100755 index 00000000..efb5f8e3 --- /dev/null +++ b/src/python/test/test_simplex_generators.py @@ -0,0 +1,57 @@ +""" This file is part of the Gudhi Library - https://gudhi.inria.fr/ - which is released under MIT. + See file LICENSE or go to https://gudhi.inria.fr/licensing/ for full license details. + Author(s): Marc Glisse + + Copyright (C) 2020 Inria + + Modification(s): + - YYYY/MM Author: Description of the modification +""" + +import gudhi +import numpy as np + + +def test_flag_generators(): + pts = np.array([[0, 0], [0, 1.01], [1, 0], [1.02, 1.03], [100, 0], [100, 3.01], [103, 0], [103.02, 3.03]]) + r = gudhi.RipsComplex(pts, max_edge_length=4) + st = r.create_simplex_tree(max_dimension=50) + st.persistence() + g = st.flag_persistence_generators() + assert np.array_equal(g[0], [[2, 2, 0], [1, 1, 0], [3, 3, 1], [6, 6, 4], [5, 5, 4], [7, 7, 5]]) + assert len(g[1]) == 1 + assert np.array_equal(g[1][0], [[3, 2, 2, 1]]) + assert np.array_equal(g[2], [0, 4]) + assert len(g[3]) == 1 + assert np.array_equal(g[3][0], [[7, 6]]) + + +def test_lower_star_generators(): + st = gudhi.SimplexTree() + st.insert([0, 1, 2], -10) + st.insert([0, 3], -10) + st.insert([1, 3], -10) + st.assign_filtration([2], -1) + st.assign_filtration([3], 0) + st.assign_filtration([0], 1) + st.assign_filtration([1], 2) + st.make_filtration_non_decreasing() + st.persistence(min_persistence=-1) + g = st.lower_star_persistence_generators(min_persistence=-1) + assert len(g[0]) == 2 + assert np.array_equal(g[0][0], [[0, 0], [3, 0], [1, 1]]) + assert np.array_equal(g[0][1], [[1, 1]]) + assert len(g[1]) == 2 + assert np.array_equal(g[1][0], [2]) + assert np.array_equal(g[1][1], [1]) + + +def test_empty(): + st = gudhi.SimplexTree() + st.persistence() + assert st.lower_star_persistence_generators() == ([], []) + g = st.flag_persistence_generators() + assert np.array_equal(g[0], np.empty((0, 3))) + assert g[1] == [] + assert np.array_equal(g[2], []) + assert g[3] == [] -- cgit v1.2.3 From cb838b2ea4a4db9c54f71103001bdafb90766306 Mon Sep 17 00:00:00 2001 From: ROUVREAU Vincent Date: Tue, 24 Mar 2020 06:37:00 +0100 Subject: merge https://github.com/mglisse/gudhi-devel/tree/alpha-cache and fix conflicts --- src/Alpha_complex/include/gudhi/Alpha_complex.h | 89 ++++++++++--------------- 1 file changed, 37 insertions(+), 52 deletions(-) diff --git a/src/Alpha_complex/include/gudhi/Alpha_complex.h b/src/Alpha_complex/include/gudhi/Alpha_complex.h index 1b5d6997..eb4ef427 100644 --- a/src/Alpha_complex/include/gudhi/Alpha_complex.h +++ b/src/Alpha_complex/include/gudhi/Alpha_complex.h @@ -132,6 +132,8 @@ class Alpha_complex { Delaunay_triangulation* triangulation_; /** \brief Kernel for triangulation_ functions access.*/ Kernel kernel_; + /** \brief Cache for geometric constructions: circumcenter and squared radius of a simplex.*/ + std::vector> cache_; public: /** \brief Alpha_complex constructor from an OFF file name. @@ -246,6 +248,24 @@ class Alpha_complex { } } + template + auto& get_cache(SimplicialComplexForAlpha& cplx, typename SimplicialComplexForAlpha::Simplex_handle s) { + auto k = cplx.key(s); + if(k==cplx.null_key()){ + k = cache_.size(); + cplx.assign_key(s, k); + // Use a transform_range? Check the impact on perf. + thread_local std::vector v; + v.clear(); + for (auto vertex : cplx.simplex_vertex_range(s)) + v.push_back(get_point(vertex)); + Point_d c = kernel_.construct_circumcenter_d_object()(v.cbegin(), v.cend()); + typename Kernel::FT r = kernel_.squared_distance_d_object()(c, v[0]); + cache_.emplace_back(std::move(c), std::move(r)); + } + return cache_[k]; + } + public: /** \brief Inserts all Delaunay triangulation into the simplicial complex. * It also computes the filtration values accordingly to the \ref createcomplexalgorithm if default_filtration_value @@ -324,46 +344,28 @@ class Alpha_complex { if (!default_filtration_value) { // -------------------------------------------------------------------------------------------- - // Will be re-used many times - Vector_of_CGAL_points pointVector; // ### For i : d -> 0 for (int decr_dim = triangulation_->maximal_dimension(); decr_dim >= 0; decr_dim--) { // ### Foreach Sigma of dim i for (Simplex_handle f_simplex : complex.skeleton_simplex_range(decr_dim)) { int f_simplex_dim = complex.dimension(f_simplex); if (decr_dim == f_simplex_dim) { - pointVector.clear(); - #ifdef DEBUG_TRACES - std::clog << "Sigma of dim " << decr_dim << " is"; - #endif // DEBUG_TRACES - for (auto vertex : complex.simplex_vertex_range(f_simplex)) { - pointVector.push_back(get_point(vertex)); - #ifdef DEBUG_TRACES - std::clog << " " << vertex; - #endif // DEBUG_TRACES - } - #ifdef DEBUG_TRACES - std::clog << std::endl; - #endif // DEBUG_TRACES // ### If filt(Sigma) is NaN : filt(Sigma) = alpha(Sigma) if (std::isnan(complex.filtration(f_simplex))) { Filtration_value alpha_complex_filtration = 0.0; // No need to compute squared_radius on a single point - alpha is 0.0 if (f_simplex_dim > 0) { - // squared_radius function initialization - Squared_Radius squared_radius = kernel_.compute_squared_radius_d_object(); - - CGAL::NT_converter cv; - auto sqrad = squared_radius(pointVector.begin(), pointVector.end()); - #if CGAL_VERSION_NR >= 1050000000 + auto const& sqrad = get_cache(complex, f_simplex).second; +#if CGAL_VERSION_NR >= 1050000000 if(exact) CGAL::exact(sqrad); - #endif +#endif + CGAL::NT_converter cv; alpha_complex_filtration = cv(sqrad); } complex.assign_filtration(f_simplex, alpha_complex_filtration); - #ifdef DEBUG_TRACES +#ifdef DEBUG_TRACES std::clog << "filt(Sigma) is NaN : filt(Sigma) =" << complex.filtration(f_simplex) << std::endl; - #endif // DEBUG_TRACES +#endif // DEBUG_TRACES } // No need to propagate further, unweighted points all have value 0 if (decr_dim > 1) @@ -388,9 +390,7 @@ class Alpha_complex { void propagate_alpha_filtration(SimplicialComplexForAlpha& complex, Simplex_handle f_simplex) { // From SimplicialComplexForAlpha type required to assign filtration values. typedef typename SimplicialComplexForAlpha::Filtration_value Filtration_value; -#ifdef DEBUG_TRACES typedef typename SimplicialComplexForAlpha::Vertex_handle Vertex_handle; -#endif // DEBUG_TRACES // ### Foreach Tau face of Sigma for (auto f_boundary : complex.boundary_simplex_range(f_simplex)) { @@ -414,33 +414,18 @@ class Alpha_complex { #endif // DEBUG_TRACES // ### Else } else { - // insert the Tau points in a vector for is_gabriel function - Vector_of_CGAL_points pointVector; -#ifdef DEBUG_TRACES - Vertex_handle vertexForGabriel = Vertex_handle(); -#endif // DEBUG_TRACES - for (auto vertex : complex.simplex_vertex_range(f_boundary)) { - pointVector.push_back(get_point(vertex)); - } - // Retrieve the Sigma point that is not part of Tau - parameter for is_gabriel function - Point_d point_for_gabriel; - for (auto vertex : complex.simplex_vertex_range(f_simplex)) { - point_for_gabriel = get_point(vertex); - if (std::find(pointVector.begin(), pointVector.end(), point_for_gabriel) == pointVector.end()) { -#ifdef DEBUG_TRACES - // vertex is not found in Tau - vertexForGabriel = vertex; -#endif // DEBUG_TRACES - // No need to continue loop - break; - } - } - // is_gabriel function initialization - Is_Gabriel is_gabriel = kernel_.side_of_bounded_sphere_d_object(); - bool is_gab = is_gabriel(pointVector.begin(), pointVector.end(), point_for_gabriel) - != CGAL::ON_BOUNDED_SIDE; + // Find which vertex of f_simplex is missing in f_boundary. We could actually write a variant of boundary_simplex_range that gives pairs (f_boundary, vertex). We rely on the fact that simplex_vertex_range is sorted. + auto longlist = complex.simplex_vertex_range(f_simplex); + auto shortlist = complex.simplex_vertex_range(f_boundary); + auto longiter = std::begin(longlist); + auto shortiter = std::begin(shortlist); + auto enditer = std::end(shortlist); + while(shortiter != enditer && *longiter == *shortiter) { ++longiter; ++shortiter; } + Vertex_handle extra = *longiter; + auto const& cache=get_cache(complex, f_boundary); + bool is_gab = kernel_.squared_distance_d_object()(cache.first, get_point(extra)) >= cache.second; #ifdef DEBUG_TRACES - std::clog << " | Tau is_gabriel(Sigma)=" << is_gab << " - vertexForGabriel=" << vertexForGabriel << std::endl; + std::clog << " | Tau is_gabriel(Sigma)=" << is_gab << " - vertexForGabriel=" << extra << std::endl; #endif // DEBUG_TRACES // ### If Tau is not Gabriel of Sigma if (false == is_gab) { -- cgit v1.2.3 From ec4a9583adaa73c01b05a4b30425581ed7256379 Mon Sep 17 00:00:00 2001 From: Marc Glisse Date: Tue, 24 Mar 2020 14:50:53 +0100 Subject: Remove min_persistence from generators It is supposed to be handled in persistence() already. --- src/python/CMakeLists.txt | 1 + src/python/gudhi/simplex_tree.pxd | 4 ++-- src/python/gudhi/simplex_tree.pyx | 18 ++++-------------- src/python/include/Persistent_cohomology_interface.h | 8 ++------ src/python/test/test_simplex_generators.py | 2 +- 5 files changed, 10 insertions(+), 23 deletions(-) diff --git a/src/python/CMakeLists.txt b/src/python/CMakeLists.txt index f00966a5..fb219884 100644 --- a/src/python/CMakeLists.txt +++ b/src/python/CMakeLists.txt @@ -374,6 +374,7 @@ if(PYTHONINTERP_FOUND) ${PYTHON_EXECUTABLE} ${CMAKE_CURRENT_SOURCE_DIR}/example/simplex_tree_example.py) add_gudhi_py_test(test_simplex_tree) + add_gudhi_py_test(test_simplex_generators) # Witness add_test(NAME witness_complex_from_nearest_landmark_table_py_test diff --git a/src/python/gudhi/simplex_tree.pxd b/src/python/gudhi/simplex_tree.pxd index 44789365..4038b41d 100644 --- a/src/python/gudhi/simplex_tree.pxd +++ b/src/python/gudhi/simplex_tree.pxd @@ -75,5 +75,5 @@ cdef extern from "Persistent_cohomology_interface.h" namespace "Gudhi": vector[pair[double,double]] intervals_in_dimension(int dimension) void write_output_diagram(string diagram_file_name) vector[pair[vector[int], vector[int]]] persistence_pairs() - pair[vector[vector[int]], vector[vector[int]]] lower_star_generators(double) - pair[vector[vector[int]], vector[vector[int]]] flag_generators(double) + pair[vector[vector[int]], vector[vector[int]]] lower_star_generators() + pair[vector[vector[int]], vector[vector[int]]] flag_generators() diff --git a/src/python/gudhi/simplex_tree.pyx b/src/python/gudhi/simplex_tree.pyx index faa9f9d8..beb40bc4 100644 --- a/src/python/gudhi/simplex_tree.pyx +++ b/src/python/gudhi/simplex_tree.pyx @@ -526,15 +526,10 @@ cdef class SimplexTree: print("intervals_in_dim function requires persistence function" " to be launched first.") - def lower_star_persistence_generators(self, min_persistence=0.): + def lower_star_persistence_generators(self): """Assuming this is a lower-star filtration, this function returns the persistence pairs, where each simplex is replaced with the vertex that gave it its filtration value. - :param min_persistence: The minimum persistence value to take into - account (strictly greater than min_persistence). Default value is - 0.0. - Set min_persistence to -1.0 to see all values. - :type min_persistence: float. :returns: First the regular persistence pairs, grouped by dimension, with one vertex per extremity, and second the essential features, grouped by dimension, with one vertex each :rtype: Tuple[List[numpy.array[int] of shape (n,2)], List[numpy.array[int] of shape (m,)]] @@ -542,22 +537,17 @@ cdef class SimplexTree: :note: lower_star_persistence_generators requires that `persistence()` be called first. """ if self.pcohptr != NULL: - gen = self.pcohptr.lower_star_generators(min_persistence) + gen = self.pcohptr.lower_star_generators() normal = [np_array(d).reshape(-1,2) for d in gen.first] infinite = [np_array(d) for d in gen.second] return (normal, infinite) else: print("lower_star_persistence_generators() requires that persistence() be called first.") - def flag_persistence_generators(self, min_persistence=0.): + def flag_persistence_generators(self): """Assuming this is a flag complex, this function returns the persistence pairs, where each simplex is replaced with the vertices of the edges that gave it its filtration value. - :param min_persistence: The minimum persistence value to take into - account (strictly greater than min_persistence). Default value is - 0.0. - Set min_persistence to -1.0 to see all values. - :type min_persistence: float. :returns: First the regular persistence pairs of dimension 0, with one vertex for birth and two for death; then the other regular persistence pairs, grouped by dimension, with 2 vertices per extremity; then the connected components, with one vertex each; @@ -567,7 +557,7 @@ cdef class SimplexTree: :note: flag_persistence_generators requires that `persistence()` be called first. """ if self.pcohptr != NULL: - gen = self.pcohptr.flag_generators(min_persistence) + gen = self.pcohptr.flag_generators() if len(gen.first) == 0: normal0 = numpy.empty((0,3)) normals = [] diff --git a/src/python/include/Persistent_cohomology_interface.h b/src/python/include/Persistent_cohomology_interface.h index 3ce40af5..3074389c 100644 --- a/src/python/include/Persistent_cohomology_interface.h +++ b/src/python/include/Persistent_cohomology_interface.h @@ -100,7 +100,7 @@ persistent_cohomology::Persistent_cohomology>, std::vector>> Generators; - Generators lower_star_generators(double min_persistence) { + Generators lower_star_generators() { Generators out; // diags[i] should be interpreted as vector> auto& diags = out.first; @@ -109,8 +109,6 @@ persistent_cohomology::Persistent_cohomology(pair); auto t = std::get<1>(pair); - if(stptr_->filtration(t) - stptr_->filtration(s) <= min_persistence) - continue; int dim = stptr_->dimension(s); auto v = stptr_->vertex_with_same_filtration(s); if(t == stptr_->null_simplex()) { @@ -128,7 +126,7 @@ persistent_cohomology::Persistent_cohomology> and other diags[i] as vector> auto& diags = out.first; @@ -137,8 +135,6 @@ persistent_cohomology::Persistent_cohomology(pair); auto t = std::get<1>(pair); - if(stptr_->filtration(t) - stptr_->filtration(s) <= min_persistence) - continue; int dim = stptr_->dimension(s); bool infinite = t == stptr_->null_simplex(); if(infinite) { diff --git a/src/python/test/test_simplex_generators.py b/src/python/test/test_simplex_generators.py index efb5f8e3..e3bdc094 100755 --- a/src/python/test/test_simplex_generators.py +++ b/src/python/test/test_simplex_generators.py @@ -37,7 +37,7 @@ def test_lower_star_generators(): st.assign_filtration([1], 2) st.make_filtration_non_decreasing() st.persistence(min_persistence=-1) - g = st.lower_star_persistence_generators(min_persistence=-1) + g = st.lower_star_persistence_generators() assert len(g[0]) == 2 assert np.array_equal(g[0][0], [[0, 0], [3, 0], [1, 1]]) assert np.array_equal(g[0][1], [[1, 1]]) -- cgit v1.2.3 From d5c8dc1ba4d00ead5875b97e164d07f6180526b0 Mon Sep 17 00:00:00 2001 From: Marc Glisse Date: Tue, 24 Mar 2020 20:31:05 +0100 Subject: print -> assert --- src/python/gudhi/simplex_tree.pyx | 47 +++++++++++++++++---------------------- 1 file changed, 21 insertions(+), 26 deletions(-) diff --git a/src/python/gudhi/simplex_tree.pyx b/src/python/gudhi/simplex_tree.pyx index beb40bc4..dcf1b46e 100644 --- a/src/python/gudhi/simplex_tree.pyx +++ b/src/python/gudhi/simplex_tree.pyx @@ -536,13 +536,11 @@ cdef class SimplexTree: :note: lower_star_persistence_generators requires that `persistence()` be called first. """ - if self.pcohptr != NULL: - gen = self.pcohptr.lower_star_generators() - normal = [np_array(d).reshape(-1,2) for d in gen.first] - infinite = [np_array(d) for d in gen.second] - return (normal, infinite) - else: - print("lower_star_persistence_generators() requires that persistence() be called first.") + assert self.pcohptr != NULL, "lower_star_persistence_generators() requires that persistence() be called first." + gen = self.pcohptr.lower_star_generators() + normal = [np_array(d).reshape(-1,2) for d in gen.first] + infinite = [np_array(d) for d in gen.second] + return (normal, infinite) def flag_persistence_generators(self): """Assuming this is a flag complex, this function returns the persistence pairs, @@ -556,23 +554,20 @@ cdef class SimplexTree: :note: flag_persistence_generators requires that `persistence()` be called first. """ - if self.pcohptr != NULL: - gen = self.pcohptr.flag_generators() - if len(gen.first) == 0: - normal0 = numpy.empty((0,3)) - normals = [] - else: - l = iter(gen.first) - normal0 = np_array(next(l)).reshape(-1,3) - normals = [np_array(d).reshape(-1,4) for d in l] - if len(gen.second) == 0: - infinite0 = numpy.empty(0) - infinites = [] - else: - l = iter(gen.second) - infinite0 = np_array(next(l)) - infinites = [np_array(d).reshape(-1,2) for d in l] - - return (normal0, normals, infinite0, infinites) + assert self.pcohptr != NULL, "flag_persistence_generators() requires that persistence() be called first." + gen = self.pcohptr.flag_generators() + if len(gen.first) == 0: + normal0 = numpy.empty((0,3)) + normals = [] + else: + l = iter(gen.first) + normal0 = np_array(next(l)).reshape(-1,3) + normals = [np_array(d).reshape(-1,4) for d in l] + if len(gen.second) == 0: + infinite0 = numpy.empty(0) + infinites = [] else: - print("flag_persistence_generators() requires that persistence() be called first.") + l = iter(gen.second) + infinite0 = np_array(next(l)) + infinites = [np_array(d).reshape(-1,2) for d in l] + return (normal0, normals, infinite0, infinites) -- cgit v1.2.3 From c8c942c43643131a7ef9899826a7095e497150fe Mon Sep 17 00:00:00 2001 From: Marc Glisse Date: Thu, 26 Mar 2020 22:10:26 +0100 Subject: cmake --- .../modules/GUDHI_third_party_libraries.cmake | 3 + src/python/CMakeLists.txt | 14 ++ src/python/gudhi/point_cloud/dtm.py | 40 +++++ src/python/gudhi/point_cloud/knn.py | 193 +++++++++++++++++++++ src/python/test/test_dtm.py | 32 ++++ 5 files changed, 282 insertions(+) create mode 100644 src/python/gudhi/point_cloud/dtm.py create mode 100644 src/python/gudhi/point_cloud/knn.py create mode 100755 src/python/test/test_dtm.py diff --git a/src/cmake/modules/GUDHI_third_party_libraries.cmake b/src/cmake/modules/GUDHI_third_party_libraries.cmake index 2d010483..c2039674 100644 --- a/src/cmake/modules/GUDHI_third_party_libraries.cmake +++ b/src/cmake/modules/GUDHI_third_party_libraries.cmake @@ -160,6 +160,9 @@ if( PYTHONINTERP_FOUND ) find_python_module("sklearn") find_python_module("ot") find_python_module("pybind11") + find_python_module("torch") + find_python_module("hnswlib") + find_python_module("pykeops") endif() if(NOT GUDHI_PYTHON_PATH) diff --git a/src/python/CMakeLists.txt b/src/python/CMakeLists.txt index f00966a5..d26d3e6e 100644 --- a/src/python/CMakeLists.txt +++ b/src/python/CMakeLists.txt @@ -78,6 +78,15 @@ if(PYTHONINTERP_FOUND) if(OT_FOUND) add_gudhi_debug_info("POT version ${OT_VERSION}") endif() + if(HNSWLIB_FOUND) + add_gudhi_debug_info("HNSWlib version ${OT_VERSION}") + endif() + if(TORCH_FOUND) + add_gudhi_debug_info("PyTorch version ${OT_VERSION}") + endif() + if(PYKEOPS_FOUND) + add_gudhi_debug_info("PyKeOps version ${OT_VERSION}") + endif() set(GUDHI_PYTHON_EXTRA_COMPILE_ARGS "${GUDHI_PYTHON_EXTRA_COMPILE_ARGS}'-DBOOST_RESULT_OF_USE_DECLTYPE', ") set(GUDHI_PYTHON_EXTRA_COMPILE_ARGS "${GUDHI_PYTHON_EXTRA_COMPILE_ARGS}'-DBOOST_ALL_NO_LIB', ") @@ -399,6 +408,11 @@ if(PYTHONINTERP_FOUND) # Time Delay add_gudhi_py_test(test_time_delay) + # DTM + if(SCIPY_FOUND AND SKLEARN_FOUND AND TORCH_FOUND AND HNSWLIB_FOUND AND PYKEOPS_FOUND) + add_gudhi_py_test(test_dtm) + endif() + # Documentation generation is available through sphinx - requires all modules if(SPHINX_PATH) if(MATPLOTLIB_FOUND) diff --git a/src/python/gudhi/point_cloud/dtm.py b/src/python/gudhi/point_cloud/dtm.py new file mode 100644 index 00000000..08f9ea60 --- /dev/null +++ b/src/python/gudhi/point_cloud/dtm.py @@ -0,0 +1,40 @@ +from .knn import KNN + + +class DTM: + def __init__(self, k, q=2, **kwargs): + """ + Args: + q (float): order used to compute the distance to measure. Defaults to the dimension, or 2 if input_type is 'distance_matrix'. + kwargs: Same parameters as KNN, except that metric="neighbors" means that transform() expects an array with the distances to the k nearest neighbors. + """ + self.k = k + self.q = q + self.params = kwargs + + def fit_transform(self, X, y=None): + return self.fit(X).transform(X) + + def fit(self, X, y=None): + """ + Args: + X (numpy.array): coordinates for mass points + """ + if self.params.setdefault("metric", "euclidean") != "neighbors": + self.knn = KNN(self.k, return_index=False, return_distance=True, **self.params) + self.knn.fit(X) + return self + + def transform(self, X): + """ + Args: + X (numpy.array): coordinates for query points, or distance matrix if metric is "precomputed", or distances to the k nearest neighbors if metric is "neighbors" (if the array has more than k columns, the remaining ones are ignored). + """ + if self.params["metric"] == "neighbors": + distances = X[:, : self.k] + else: + distances = self.knn.transform(X) + distances = distances ** self.q + dtm = distances.sum(-1) / self.k + dtm = dtm ** (1.0 / self.q) + return dtm diff --git a/src/python/gudhi/point_cloud/knn.py b/src/python/gudhi/point_cloud/knn.py new file mode 100644 index 00000000..57078f1e --- /dev/null +++ b/src/python/gudhi/point_cloud/knn.py @@ -0,0 +1,193 @@ +import numpy + + +class KNN: + def __init__(self, k, return_index=True, return_distance=False, metric="euclidean", **kwargs): + """ + Args: + k (int): number of neighbors (including the point itself). + return_index (bool): if True, return the index of each neighbor. + return_distance (bool): if True, return the distance to each neighbor. + implementation (str): Choice of the library that does the real work. + + * 'keops' for a brute-force, CUDA implementation through pykeops. Useful when the dimension becomes + large (10+) but the number of points remains low (less than a million). + Only "minkowski" and its aliases are supported. + * 'ckdtree' for scipy's cKDTree. Only "minkowski" and its aliases are supported. + * 'sklearn' for scikit-learn's NearestNeighbors. + Note that this provides in particular an option algorithm="brute". + * 'hnsw' for hnswlib.Index. It is very fast but does not provide guarantees. + Only supports "euclidean" for now. + * None will try to select a sensible one (scipy if possible, scikit-learn otherwise). + metric (str): see `sklearn.neighbors.NearestNeighbors`. + eps (float): relative error when computing nearest neighbors with the cKDTree. + p (float): norm L^p on input points (including numpy.inf) if metric is "minkowski". Defaults to 2. + n_jobs (int): Number of jobs to schedule for parallel processing of nearest neighbors on the CPU. + If -1 is given all processors are used. Default: 1. + + Additional parameters are forwarded to the backends. + """ + self.k = k + self.return_index = return_index + self.return_distance = return_distance + self.metric = metric + self.params = kwargs + # canonicalize + if metric == "euclidean": + self.params["p"] = 2 + self.metric = "minkowski" + elif metric == "manhattan": + self.params["p"] = 1 + self.metric = "minkowski" + elif metric == "chebyshev": + self.params["p"] = numpy.inf + self.metric = "minkowski" + elif metric == "minkowski": + self.params["p"] = kwargs.get("p", 2) + if self.params.get("implementation") in {"keops", "ckdtree"}: + assert self.metric == "minkowski" + if self.params.get("implementation") == "hnsw": + assert self.metric == "minkowski" and self.params["p"] == 2 + if not self.params.get("implementation"): + if self.metric == "minkowski": + self.params["implementation"] = "ckdtree" + else: + self.params["implementation"] = "sklearn" + + def fit_transform(self, X, y=None): + return self.fit(X).transform(X) + + def fit(self, X, y=None): + """ + Args: + X (numpy.array): coordinates for reference points + """ + self.ref_points = X + if self.params.get("implementation") == "ckdtree": + # sklearn could handle this, but it is much slower + from scipy.spatial import cKDTree + self.kdtree = cKDTree(X) + + if self.params.get("implementation") == "sklearn" and self.metric != "precomputed": + # FIXME: sklearn badly handles "precomputed" + from sklearn.neighbors import NearestNeighbors + + nargs = {k: v for k, v in self.params.items() if k in {"p", "n_jobs", "metric_params", "algorithm", "leaf_size"}} + self.nn = NearestNeighbors(self.k, metric=self.metric, **nargs) + self.nn.fit(X) + + if self.params.get("implementation") == "hnsw": + import hnswlib + self.graph = hnswlib.Index("l2", len(X[0])) # Actually returns squared distances + self.graph.init_index(len(X), **{k:v for k,v in self.params.items() if k in {"ef_construction", "M", "random_seed"}}) + n = self.params.get("num_threads") + if n is None: + n = self.params.get("n_jobs", 1) + self.params["num_threads"] = n + self.graph.add_items(X, num_threads=n) + + return self + + def transform(self, X): + """ + Args: + X (numpy.array): coordinates for query points, or distance matrix if metric is "precomputed" + """ + metric = self.metric + k = self.k + + if metric == "precomputed": + # scikit-learn could handle that, but they insist on calling fit() with an unused square array, which is too unnatural. + X = numpy.array(X) + if self.return_index: + neighbors = numpy.argpartition(X, k - 1)[:, 0:k] + distances = numpy.take_along_axis(X, neighbors, axis=-1) + ngb_order = numpy.argsort(distances, axis=-1) + neighbors = numpy.take_along_axis(neighbors, ngb_order, axis=-1) + if self.return_distance: + distances = numpy.take_along_axis(distances, ngb_order, axis=-1) + return neighbors, distances + else: + return neighbors + if self.return_distance: + distances = numpy.partition(X, k - 1)[:, 0:k] + # partition is not guaranteed to sort the lower half, although it often does + distances.sort(axis=-1) + return distances + return None + + if self.params.get("implementation") == "hnsw": + ef = self.params.get("ef") + if ef is not None: + self.graph.set_ef(ef) + neighbors, distances = self.graph.knn_query(X, k, num_threads=self.params["num_threads"]) + # The k nearest neighbors are always sorted. I couldn't find it in the doc, but the code calls searchKnn, + # which returns a priority_queue, and then fills the return array backwards with top/pop on the queue. + if self.return_index: + if self.return_distance: + return neighbors, numpy.sqrt(distances) + else: + return neighbors + if self.return_distance: + return numpy.sqrt(distances) + return None + + if self.params.get("implementation") == "keops": + import torch + from pykeops.torch import LazyTensor + + # 'float64' is slow except on super expensive GPUs. Allow it with some param? + XX = torch.tensor(X, dtype=torch.float32) + if X is self.ref_points: + YY = XX + else: + YY = torch.tensor(self.ref_points, dtype=torch.float32) + + p = self.params["p"] + if p == numpy.inf: + # Requires a version of pykeops strictly more recent than 1.3 + mat = (LazyTensor(XX[:, None, :]) - LazyTensor(YY[None, :, :])).abs().max(-1) + elif p == 2: # Any even integer? + mat = ((LazyTensor(XX[:, None, :]) - LazyTensor(YY[None, :, :])) ** p).sum(-1) + else: + mat = ((LazyTensor(XX[:, None, :]) - LazyTensor(YY[None, :, :])).abs() ** p).sum(-1) + + if self.return_index: + if self.return_distance: + distances, neighbors = mat.Kmin_argKmin(k, dim=1) + if p != numpy.inf: + distances = distances ** (1.0 / p) + return neighbors, distances + else: + neighbors = mat.argKmin(k, dim=1) + return neighbors + if self.return_distance: + distances = mat.Kmin(k, dim=1) + if p != numpy.inf: + distances = distances ** (1.0 / p) + return distances + return None + # FIXME: convert everything back to numpy arrays or not? + + if hasattr(self, "kdtree"): + qargs = {key: val for key, val in self.params.items() if key in {"p", "eps", "n_jobs"}} + distances, neighbors = self.kdtree.query(X, k=self.k, **qargs) + if self.return_index: + if self.return_distance: + return neighbors, distances + else: + return neighbors + if self.return_distance: + return distances + return None + + if self.return_distance: + distances, neighbors = self.nn.kneighbors(X, return_distance=True) + if self.return_index: + return neighbors, distances + else: + return distances + if self.return_index: + neighbors = self.nn.kneighbors(X, return_distance=False) + return neighbors + return None diff --git a/src/python/test/test_dtm.py b/src/python/test/test_dtm.py new file mode 100755 index 00000000..57fdd131 --- /dev/null +++ b/src/python/test/test_dtm.py @@ -0,0 +1,32 @@ +""" This file is part of the Gudhi Library - https://gudhi.inria.fr/ - which is released under MIT. + See file LICENSE or go to https://gudhi.inria.fr/licensing/ for full license details. + Author(s): Marc Glisse + + Copyright (C) 2020 Inria + + Modification(s): + - YYYY/MM Author: Description of the modification +""" + +from gudhi.point_cloud.dtm import DTM +import numpy + + +def test_dtm_euclidean(): + pts = numpy.random.rand(1000,4) + k = 3 + dtm = DTM(k,implementation="ckdtree") + print(dtm.fit_transform(pts)) + dtm = DTM(k,implementation="sklearn") + print(dtm.fit_transform(pts)) + dtm = DTM(k,implementation="sklearn",algorithm="brute") + print(dtm.fit_transform(pts)) + dtm = DTM(k,implementation="hnsw") + print(dtm.fit_transform(pts)) + from scipy.spatial.distance import cdist + d = cdist(pts,pts) + dtm = DTM(k,metric="precomputed") + print(dtm.fit_transform(d)) + dtm = DTM(k,implementation="keops") + print(dtm.fit_transform(pts)) + -- cgit v1.2.3 From 5c4c398b99fe1b157d64cd43a4977ce1504ca795 Mon Sep 17 00:00:00 2001 From: Marc Glisse Date: Thu, 26 Mar 2020 22:25:28 +0100 Subject: HNSWlib doesn't define __version__ --- src/cmake/modules/GUDHI_third_party_libraries.cmake | 21 ++++++++++++++++++++- src/python/CMakeLists.txt | 7 ++++--- 2 files changed, 24 insertions(+), 4 deletions(-) diff --git a/src/cmake/modules/GUDHI_third_party_libraries.cmake b/src/cmake/modules/GUDHI_third_party_libraries.cmake index c2039674..a931b3a1 100644 --- a/src/cmake/modules/GUDHI_third_party_libraries.cmake +++ b/src/cmake/modules/GUDHI_third_party_libraries.cmake @@ -150,6 +150,25 @@ function( find_python_module PYTHON_MODULE_NAME ) endif() endfunction( find_python_module ) +# For modules that do not define module.__version__ +function( find_python_module_no_version PYTHON_MODULE_NAME ) + string(TOUPPER ${PYTHON_MODULE_NAME} PYTHON_MODULE_NAME_UP) + execute_process( + COMMAND ${PYTHON_EXECUTABLE} -c "import ${PYTHON_MODULE_NAME}" + RESULT_VARIABLE PYTHON_MODULE_RESULT + ERROR_VARIABLE PYTHON_MODULE_ERROR) + if(PYTHON_MODULE_RESULT EQUAL 0) + # Remove carriage return + message ("++ Python module ${PYTHON_MODULE_NAME} found") + set(${PYTHON_MODULE_NAME_UP}_FOUND TRUE PARENT_SCOPE) + else() + message ("PYTHON_MODULE_NAME = ${PYTHON_MODULE_NAME} + - PYTHON_MODULE_RESULT = ${PYTHON_MODULE_RESULT} + - PYTHON_MODULE_ERROR = ${PYTHON_MODULE_ERROR}") + set(${PYTHON_MODULE_NAME_UP}_FOUND FALSE PARENT_SCOPE) + endif() +endfunction( find_python_module_no_version ) + if( PYTHONINTERP_FOUND ) find_python_module("cython") find_python_module("pytest") @@ -161,8 +180,8 @@ if( PYTHONINTERP_FOUND ) find_python_module("ot") find_python_module("pybind11") find_python_module("torch") - find_python_module("hnswlib") find_python_module("pykeops") + find_python_module_no_version("hnswlib") endif() if(NOT GUDHI_PYTHON_PATH) diff --git a/src/python/CMakeLists.txt b/src/python/CMakeLists.txt index d26d3e6e..ec0ab1ca 100644 --- a/src/python/CMakeLists.txt +++ b/src/python/CMakeLists.txt @@ -79,13 +79,14 @@ if(PYTHONINTERP_FOUND) add_gudhi_debug_info("POT version ${OT_VERSION}") endif() if(HNSWLIB_FOUND) - add_gudhi_debug_info("HNSWlib version ${OT_VERSION}") + # Does not have a version number... + add_gudhi_debug_info("HNSWlib found") endif() if(TORCH_FOUND) - add_gudhi_debug_info("PyTorch version ${OT_VERSION}") + add_gudhi_debug_info("PyTorch version ${TORCH_VERSION}") endif() if(PYKEOPS_FOUND) - add_gudhi_debug_info("PyKeOps version ${OT_VERSION}") + add_gudhi_debug_info("PyKeOps version ${PYKEOPS_VERSION}") endif() set(GUDHI_PYTHON_EXTRA_COMPILE_ARGS "${GUDHI_PYTHON_EXTRA_COMPILE_ARGS}'-DBOOST_RESULT_OF_USE_DECLTYPE', ") -- cgit v1.2.3 From 7ddad8220fdd34fd3ed91e16882feaa3961b2d67 Mon Sep 17 00:00:00 2001 From: Marc Glisse Date: Thu, 26 Mar 2020 22:59:20 +0100 Subject: license --- src/python/gudhi/point_cloud/dtm.py | 9 +++++++++ src/python/gudhi/point_cloud/knn.py | 9 +++++++++ 2 files changed, 18 insertions(+) diff --git a/src/python/gudhi/point_cloud/dtm.py b/src/python/gudhi/point_cloud/dtm.py index 08f9ea60..839e7452 100644 --- a/src/python/gudhi/point_cloud/dtm.py +++ b/src/python/gudhi/point_cloud/dtm.py @@ -1,3 +1,12 @@ +# This file is part of the Gudhi Library - https://gudhi.inria.fr/ - which is released under MIT. +# See file LICENSE or go to https://gudhi.inria.fr/licensing/ for full license details. +# Author(s): Marc Glisse +# +# Copyright (C) 2020 Inria +# +# Modification(s): +# - YYYY/MM Author: Description of the modification + from .knn import KNN diff --git a/src/python/gudhi/point_cloud/knn.py b/src/python/gudhi/point_cloud/knn.py index 57078f1e..943d4e9f 100644 --- a/src/python/gudhi/point_cloud/knn.py +++ b/src/python/gudhi/point_cloud/knn.py @@ -1,3 +1,12 @@ +# This file is part of the Gudhi Library - https://gudhi.inria.fr/ - which is released under MIT. +# See file LICENSE or go to https://gudhi.inria.fr/licensing/ for full license details. +# Author(s): Marc Glisse +# +# Copyright (C) 2020 Inria +# +# Modification(s): +# - YYYY/MM Author: Description of the modification + import numpy -- cgit v1.2.3 From 7120b186471828a9570fdeef37900bd8b98d0d31 Mon Sep 17 00:00:00 2001 From: Marc Glisse Date: Thu, 26 Mar 2020 23:06:06 +0100 Subject: license --- src/python/doc/point_cloud_sum.inc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/python/doc/point_cloud_sum.inc b/src/python/doc/point_cloud_sum.inc index 0a159680..ecc18951 100644 --- a/src/python/doc/point_cloud_sum.inc +++ b/src/python/doc/point_cloud_sum.inc @@ -6,7 +6,7 @@ | | :math:`(y_1, y_2, \ldots, y_d)` | | | | | | :Since: GUDHI 2.0.0 | | | | | - | | | :License: MIT (`GPL v3 `_) | + | | | :License: MIT (`GPL v3 `_, BSD-3-Clause, Apache-2.0) | | | Parts of this package require CGAL. | | | | | :Requires: `Eigen `__ :math:`\geq` 3.1.0 and `CGAL `__ :math:`\geq` 4.11.0 | | | | | -- cgit v1.2.3 From af35ea5b4ce631ae826f1db1940798f254aba658 Mon Sep 17 00:00:00 2001 From: Marc Glisse Date: Thu, 26 Mar 2020 23:39:59 +0100 Subject: clean-up use of "implementation" --- src/python/gudhi/point_cloud/knn.py | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) diff --git a/src/python/gudhi/point_cloud/knn.py b/src/python/gudhi/point_cloud/knn.py index 943d4e9f..a4ea3acd 100644 --- a/src/python/gudhi/point_cloud/knn.py +++ b/src/python/gudhi/point_cloud/knn.py @@ -72,12 +72,12 @@ class KNN: X (numpy.array): coordinates for reference points """ self.ref_points = X - if self.params.get("implementation") == "ckdtree": + if self.params["implementation"] == "ckdtree": # sklearn could handle this, but it is much slower from scipy.spatial import cKDTree self.kdtree = cKDTree(X) - if self.params.get("implementation") == "sklearn" and self.metric != "precomputed": + if self.params["implementation"] == "sklearn" and self.metric != "precomputed": # FIXME: sklearn badly handles "precomputed" from sklearn.neighbors import NearestNeighbors @@ -85,7 +85,7 @@ class KNN: self.nn = NearestNeighbors(self.k, metric=self.metric, **nargs) self.nn.fit(X) - if self.params.get("implementation") == "hnsw": + if self.params["implementation"] == "hnsw": import hnswlib self.graph = hnswlib.Index("l2", len(X[0])) # Actually returns squared distances self.graph.init_index(len(X), **{k:v for k,v in self.params.items() if k in {"ef_construction", "M", "random_seed"}}) @@ -125,7 +125,7 @@ class KNN: return distances return None - if self.params.get("implementation") == "hnsw": + if self.params["implementation"] == "hnsw": ef = self.params.get("ef") if ef is not None: self.graph.set_ef(ef) @@ -141,7 +141,7 @@ class KNN: return numpy.sqrt(distances) return None - if self.params.get("implementation") == "keops": + if self.params["implementation"] == "keops": import torch from pykeops.torch import LazyTensor @@ -178,7 +178,7 @@ class KNN: return None # FIXME: convert everything back to numpy arrays or not? - if hasattr(self, "kdtree"): + if self.params["implementation"] == "ckdtree": qargs = {key: val for key, val in self.params.items() if key in {"p", "eps", "n_jobs"}} distances, neighbors = self.kdtree.query(X, k=self.k, **qargs) if self.return_index: @@ -190,6 +190,7 @@ class KNN: return distances return None + assert self.params["implementation"] == "sklearn" if self.return_distance: distances, neighbors = self.nn.kneighbors(X, return_distance=True) if self.return_index: -- cgit v1.2.3 From f74c71ca8e474ff927cae029ea63329d30293582 Mon Sep 17 00:00:00 2001 From: Marc Glisse Date: Fri, 27 Mar 2020 13:43:58 +0100 Subject: Improve coverage --- src/python/gudhi/point_cloud/dtm.py | 2 ++ src/python/test/test_dtm.py | 48 +++++++++++++++++++++++++------------ 2 files changed, 35 insertions(+), 15 deletions(-) diff --git a/src/python/gudhi/point_cloud/dtm.py b/src/python/gudhi/point_cloud/dtm.py index 839e7452..541b74a6 100644 --- a/src/python/gudhi/point_cloud/dtm.py +++ b/src/python/gudhi/point_cloud/dtm.py @@ -30,6 +30,8 @@ class DTM: X (numpy.array): coordinates for mass points """ if self.params.setdefault("metric", "euclidean") != "neighbors": + # KNN gives sorted distances, which is unnecessary here. + # Maybe add a parameter to say we don't need sorting? self.knn = KNN(self.k, return_index=False, return_distance=True, **self.params) self.knn.fit(X) return self diff --git a/src/python/test/test_dtm.py b/src/python/test/test_dtm.py index 57fdd131..841f8c3c 100755 --- a/src/python/test/test_dtm.py +++ b/src/python/test/test_dtm.py @@ -10,23 +10,41 @@ from gudhi.point_cloud.dtm import DTM import numpy +import pytest -def test_dtm_euclidean(): - pts = numpy.random.rand(1000,4) +def test_dtm_compare_euclidean(): + pts = numpy.random.rand(1000, 4) k = 3 - dtm = DTM(k,implementation="ckdtree") - print(dtm.fit_transform(pts)) - dtm = DTM(k,implementation="sklearn") - print(dtm.fit_transform(pts)) - dtm = DTM(k,implementation="sklearn",algorithm="brute") - print(dtm.fit_transform(pts)) - dtm = DTM(k,implementation="hnsw") - print(dtm.fit_transform(pts)) + dtm = DTM(k, implementation="ckdtree") + r0 = dtm.fit_transform(pts) + dtm = DTM(k, implementation="sklearn") + r1 = dtm.fit_transform(pts) + assert r1 == pytest.approx(r0) + dtm = DTM(k, implementation="sklearn", algorithm="brute") + r2 = dtm.fit_transform(pts) + assert r2 == pytest.approx(r0) + dtm = DTM(k, implementation="hnsw") + r3 = dtm.fit_transform(pts) + assert r3 == pytest.approx(r0) from scipy.spatial.distance import cdist - d = cdist(pts,pts) - dtm = DTM(k,metric="precomputed") - print(dtm.fit_transform(d)) - dtm = DTM(k,implementation="keops") - print(dtm.fit_transform(pts)) + d = cdist(pts, pts) + dtm = DTM(k, metric="precomputed") + r4 = dtm.fit_transform(d) + assert r4 == pytest.approx(r0) + dtm = DTM(k, implementation="keops") + r5 = dtm.fit_transform(pts) + assert r5 == pytest.approx(r0) + + +def test_dtm_precomputed(): + dist = numpy.array([[1.0, 3, 8], [1, 5, 5], [0, 2, 3]]) + dtm = DTM(2, q=1, metric="neighbors") + r = dtm.fit_transform(dist) + assert r == pytest.approx([2.0, 3, 1]) + + dist = numpy.array([[2.0, 2], [0, 1], [3, 4]]) + dtm = DTM(2, q=2, metric="neighbors") + r = dtm.fit_transform(dist) + assert r == pytest.approx([2.0, .707, 3.5355], rel=.01) -- cgit v1.2.3 From 03376ffe0f6060864ee8908893297f8800b7b8d1 Mon Sep 17 00:00:00 2001 From: Marc Glisse Date: Fri, 27 Mar 2020 20:27:10 +0100 Subject: doc --- src/python/doc/point_cloud.rst | 17 +++++++++++++++-- src/python/gudhi/point_cloud/dtm.py | 6 +++++- src/python/gudhi/point_cloud/knn.py | 31 ++++++++++++++++++------------- src/python/test/test_dtm.py | 2 +- 4 files changed, 39 insertions(+), 17 deletions(-) diff --git a/src/python/doc/point_cloud.rst b/src/python/doc/point_cloud.rst index c0d4b303..351b0786 100644 --- a/src/python/doc/point_cloud.rst +++ b/src/python/doc/point_cloud.rst @@ -21,10 +21,23 @@ Subsampling :special-members: :show-inheritance: -TimeDelayEmbedding ------------------- +Time Delay Embedding +-------------------- .. autoclass:: gudhi.point_cloud.timedelay.TimeDelayEmbedding :members: :special-members: __call__ +Nearest neighbors +----------------- + +.. automodule:: gudhi.point_cloud.knn + :members: + :special-members: __init__ + +Distance to measure +------------------- + +.. automodule:: gudhi.point_cloud.dtm + :members: + :special-members: __init__ diff --git a/src/python/gudhi/point_cloud/dtm.py b/src/python/gudhi/point_cloud/dtm.py index 541b74a6..e4096c5e 100644 --- a/src/python/gudhi/point_cloud/dtm.py +++ b/src/python/gudhi/point_cloud/dtm.py @@ -11,11 +11,15 @@ from .knn import KNN class DTM: + """ + Class to compute the distance to the empirical measure defined by a point set. + """ + def __init__(self, k, q=2, **kwargs): """ Args: q (float): order used to compute the distance to measure. Defaults to the dimension, or 2 if input_type is 'distance_matrix'. - kwargs: Same parameters as KNN, except that metric="neighbors" means that transform() expects an array with the distances to the k nearest neighbors. + kwargs: Same parameters as :class:`~gudhi.point_cloud.knn.KNN`, except that metric="neighbors" means that :func:`transform` expects an array with the distances to the k nearest neighbors. """ self.k = k self.q = q diff --git a/src/python/gudhi/point_cloud/knn.py b/src/python/gudhi/point_cloud/knn.py index a4ea3acd..02448530 100644 --- a/src/python/gudhi/point_cloud/knn.py +++ b/src/python/gudhi/point_cloud/knn.py @@ -11,6 +11,10 @@ import numpy class KNN: + """ + Class wrapping several implementations for computing the k nearest neighbors in a point set. + """ + def __init__(self, k, return_index=True, return_distance=False, metric="euclidean", **kwargs): """ Args: @@ -19,22 +23,17 @@ class KNN: return_distance (bool): if True, return the distance to each neighbor. implementation (str): Choice of the library that does the real work. - * 'keops' for a brute-force, CUDA implementation through pykeops. Useful when the dimension becomes - large (10+) but the number of points remains low (less than a million). - Only "minkowski" and its aliases are supported. + * 'keops' for a brute-force, CUDA implementation through pykeops. Useful when the dimension becomes large (10+) but the number of points remains low (less than a million). Only "minkowski" and its aliases are supported. * 'ckdtree' for scipy's cKDTree. Only "minkowski" and its aliases are supported. - * 'sklearn' for scikit-learn's NearestNeighbors. - Note that this provides in particular an option algorithm="brute". - * 'hnsw' for hnswlib.Index. It is very fast but does not provide guarantees. - Only supports "euclidean" for now. + * 'sklearn' for scikit-learn's NearestNeighbors. Note that this provides in particular an option algorithm="brute". + * 'hnsw' for hnswlib.Index. It can be very fast but does not provide guarantees. Only supports "euclidean" for now. * None will try to select a sensible one (scipy if possible, scikit-learn otherwise). metric (str): see `sklearn.neighbors.NearestNeighbors`. eps (float): relative error when computing nearest neighbors with the cKDTree. p (float): norm L^p on input points (including numpy.inf) if metric is "minkowski". Defaults to 2. n_jobs (int): Number of jobs to schedule for parallel processing of nearest neighbors on the CPU. If -1 is given all processors are used. Default: 1. - - Additional parameters are forwarded to the backends. + kwargs: additional parameters are forwarded to the backends. """ self.k = k self.return_index = return_index @@ -75,20 +74,26 @@ class KNN: if self.params["implementation"] == "ckdtree": # sklearn could handle this, but it is much slower from scipy.spatial import cKDTree + self.kdtree = cKDTree(X) if self.params["implementation"] == "sklearn" and self.metric != "precomputed": # FIXME: sklearn badly handles "precomputed" from sklearn.neighbors import NearestNeighbors - nargs = {k: v for k, v in self.params.items() if k in {"p", "n_jobs", "metric_params", "algorithm", "leaf_size"}} + nargs = { + k: v for k, v in self.params.items() if k in {"p", "n_jobs", "metric_params", "algorithm", "leaf_size"} + } self.nn = NearestNeighbors(self.k, metric=self.metric, **nargs) self.nn.fit(X) if self.params["implementation"] == "hnsw": import hnswlib - self.graph = hnswlib.Index("l2", len(X[0])) # Actually returns squared distances - self.graph.init_index(len(X), **{k:v for k,v in self.params.items() if k in {"ef_construction", "M", "random_seed"}}) + + self.graph = hnswlib.Index("l2", len(X[0])) # Actually returns squared distances + self.graph.init_index( + len(X), **{k: v for k, v in self.params.items() if k in {"ef_construction", "M", "random_seed"}} + ) n = self.params.get("num_threads") if n is None: n = self.params.get("n_jobs", 1) @@ -154,7 +159,7 @@ class KNN: p = self.params["p"] if p == numpy.inf: - # Requires a version of pykeops strictly more recent than 1.3 + # Requires pykeops 1.4 or later mat = (LazyTensor(XX[:, None, :]) - LazyTensor(YY[None, :, :])).abs().max(-1) elif p == 2: # Any even integer? mat = ((LazyTensor(XX[:, None, :]) - LazyTensor(YY[None, :, :])) ** p).sum(-1) diff --git a/src/python/test/test_dtm.py b/src/python/test/test_dtm.py index 841f8c3c..93b13e1a 100755 --- a/src/python/test/test_dtm.py +++ b/src/python/test/test_dtm.py @@ -47,4 +47,4 @@ def test_dtm_precomputed(): dist = numpy.array([[2.0, 2], [0, 1], [3, 4]]) dtm = DTM(2, q=2, metric="neighbors") r = dtm.fit_transform(dist) - assert r == pytest.approx([2.0, .707, 3.5355], rel=.01) + assert r == pytest.approx([2.0, 0.707, 3.5355], rel=0.01) -- cgit v1.2.3 From 68839b95e7751afd04155cd2565cc53362f01fa2 Mon Sep 17 00:00:00 2001 From: Marc Glisse Date: Sat, 28 Mar 2020 10:41:50 +0100 Subject: Missing test --- src/python/CMakeLists.txt | 1 + src/python/test/test_knn.py | 82 +++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 83 insertions(+) create mode 100755 src/python/test/test_knn.py diff --git a/src/python/CMakeLists.txt b/src/python/CMakeLists.txt index ec0ab1ca..d7a6a4db 100644 --- a/src/python/CMakeLists.txt +++ b/src/python/CMakeLists.txt @@ -411,6 +411,7 @@ if(PYTHONINTERP_FOUND) # DTM if(SCIPY_FOUND AND SKLEARN_FOUND AND TORCH_FOUND AND HNSWLIB_FOUND AND PYKEOPS_FOUND) + add_gudhi_py_test(test_knn) add_gudhi_py_test(test_dtm) endif() diff --git a/src/python/test/test_knn.py b/src/python/test/test_knn.py new file mode 100755 index 00000000..e455fb48 --- /dev/null +++ b/src/python/test/test_knn.py @@ -0,0 +1,82 @@ +""" This file is part of the Gudhi Library - https://gudhi.inria.fr/ - which is released under MIT. + See file LICENSE or go to https://gudhi.inria.fr/licensing/ for full license details. + Author(s): Marc Glisse + + Copyright (C) 2020 Inria + + Modification(s): + - YYYY/MM Author: Description of the modification +""" + +from gudhi.point_cloud.knn import KNN +import numpy as np +import pytest + + +def test_knn_explicit(): + base = np.array([[1.0, 1], [1, 2], [4, 2], [4, 3]]) + query = np.array([[1.0, 1], [2, 2], [4, 4]]) + knn = KNN(2, metric="manhattan", return_distance=True, return_index=True) + knn.fit(base) + r = knn.transform(query) + assert r[0] == pytest.approx(np.array([[0, 1], [1, 0], [3, 2]])) + assert r[1] == pytest.approx(np.array([[0.0, 1], [1, 2], [1, 2]])) + + knn = KNN(2, metric="chebyshev", return_distance=True, return_index=False) + knn.fit(base) + r = knn.transform(query) + assert r == pytest.approx(np.array([[0.0, 1], [1, 1], [1, 2]])) + r = ( + KNN(2, metric="chebyshev", return_distance=True, return_index=False, implementation="keops") + .fit(base) + .transform(query) + ) + assert r == pytest.approx(np.array([[0.0, 1], [1, 1], [1, 2]])) + + knn = KNN(2, metric="minkowski", p=3, return_distance=False, return_index=True) + knn.fit(base) + r = knn.transform(query) + assert np.array_equal(r, [[0, 1], [1, 0], [3, 2]]) + r = ( + KNN(2, metric="minkowski", p=3, return_distance=False, return_index=True, implementation="keops") + .fit(base) + .transform(query) + ) + assert np.array_equal(r, [[0, 1], [1, 0], [3, 2]]) + + dist = np.array([[0.0, 3, 8], [1, 0, 5], [1, 2, 0]]) + knn = KNN(2, metric="precomputed", return_index=True, return_distance=False) + r = knn.fit_transform(dist) + assert np.array_equal(r, [[0, 1], [1, 0], [2, 0]]) + knn = KNN(2, metric="precomputed", return_index=True, return_distance=True) + r = knn.fit_transform(dist) + assert np.array_equal(r[0], [[0, 1], [1, 0], [2, 0]]) + assert np.array_equal(r[1], [[0, 3], [0, 1], [0, 1]]) + + +def test_knn_compare(): + base = np.array([[1.0, 1], [1, 2], [4, 2], [4, 3]]) + query = np.array([[1.0, 1], [2, 2], [4, 4]]) + r0 = KNN(2, implementation="ckdtree", return_index=True, return_distance=False).fit(base).transform(query) + r1 = KNN(2, implementation="sklearn", return_index=True, return_distance=False).fit(base).transform(query) + r2 = KNN(2, implementation="hnsw", return_index=True, return_distance=False).fit(base).transform(query) + r3 = KNN(2, implementation="keops", return_index=True, return_distance=False).fit(base).transform(query) + assert np.array_equal(r0, r1) and np.array_equal(r0, r2) and np.array_equal(r0, r3) + + r0 = KNN(2, implementation="ckdtree", return_index=True, return_distance=True).fit(base).transform(query) + r1 = KNN(2, implementation="sklearn", return_index=True, return_distance=True).fit(base).transform(query) + r2 = KNN(2, implementation="hnsw", return_index=True, return_distance=True).fit(base).transform(query) + r3 = KNN(2, implementation="keops", return_index=True, return_distance=True).fit(base).transform(query) + assert np.array_equal(r0[0], r1[0]) and np.array_equal(r0[0], r2[0]) and np.array_equal(r0[0], r3[0]) + d0 = pytest.approx(r0[1]) + assert r1[1] == d0 and r2[1] == d0 and r3[1] == d0 + + +def test_knn_nop(): + # This doesn't look super useful... + p = np.array([[0.0]]) + assert None is KNN(k=1, return_index=False, return_distance=False, implementation="sklearn").fit_transform(p) + assert None is KNN(k=1, return_index=False, return_distance=False, implementation="ckdtree").fit_transform(p) + assert None is KNN(k=1, return_index=False, return_distance=False, implementation="hnsw", ef=5).fit_transform(p) + assert None is KNN(k=1, return_index=False, return_distance=False, implementation="keops").fit_transform(p) + assert None is KNN(k=1, return_index=False, return_distance=False, metric="precomputed").fit_transform(p) -- cgit v1.2.3 From 35a12b553c85af8ce31629b90a27a7071b0cc379 Mon Sep 17 00:00:00 2001 From: Marc Glisse Date: Sat, 28 Mar 2020 11:48:43 +0100 Subject: Doc tweaks, default DTM exponent --- src/python/doc/point_cloud.rst | 6 ++++-- src/python/doc/point_cloud_sum.inc | 4 ++-- src/python/gudhi/point_cloud/dtm.py | 17 ++++++++++++----- src/python/gudhi/point_cloud/knn.py | 6 +++--- 4 files changed, 21 insertions(+), 12 deletions(-) diff --git a/src/python/doc/point_cloud.rst b/src/python/doc/point_cloud.rst index 351b0786..192f70db 100644 --- a/src/python/doc/point_cloud.rst +++ b/src/python/doc/point_cloud.rst @@ -28,11 +28,12 @@ Time Delay Embedding :members: :special-members: __call__ -Nearest neighbors ------------------ +K nearest neighbors +------------------- .. automodule:: gudhi.point_cloud.knn :members: + :undoc-members: :special-members: __init__ Distance to measure @@ -40,4 +41,5 @@ Distance to measure .. automodule:: gudhi.point_cloud.dtm :members: + :undoc-members: :special-members: __init__ diff --git a/src/python/doc/point_cloud_sum.inc b/src/python/doc/point_cloud_sum.inc index ecc18951..d4761aba 100644 --- a/src/python/doc/point_cloud_sum.inc +++ b/src/python/doc/point_cloud_sum.inc @@ -2,8 +2,8 @@ :widths: 30 40 30 +----------------------------------------------------------------+------------------------------------------------------------------------+-----------------------------------------------------------------------------------------------------------------------------+ - | | :math:`(x_1, x_2, \ldots, x_d)` | Utilities to process point clouds: read from file, subsample, etc. | :Author: Vincent Rouvreau | - | | :math:`(y_1, y_2, \ldots, y_d)` | | | + | | :math:`(x_1, x_2, \ldots, x_d)` | Utilities to process point clouds: read from file, subsample, | :Authors: Vincent Rouvreau, Marc Glisse, Masatoshi Takenouchi | + | | :math:`(y_1, y_2, \ldots, y_d)` | find neighbors, embed time series in higher dimension, etc. | | | | | :Since: GUDHI 2.0.0 | | | | | | | | :License: MIT (`GPL v3 `_, BSD-3-Clause, Apache-2.0) | diff --git a/src/python/gudhi/point_cloud/dtm.py b/src/python/gudhi/point_cloud/dtm.py index e4096c5e..520cbea8 100644 --- a/src/python/gudhi/point_cloud/dtm.py +++ b/src/python/gudhi/point_cloud/dtm.py @@ -15,10 +15,11 @@ class DTM: Class to compute the distance to the empirical measure defined by a point set. """ - def __init__(self, k, q=2, **kwargs): + def __init__(self, k, q=None, **kwargs): """ Args: - q (float): order used to compute the distance to measure. Defaults to the dimension, or 2 if input_type is 'distance_matrix'. + k (int): number of neighbors (possibly including the point itself). + q (float): order used to compute the distance to measure. Defaults to the dimension, or 2 if metric is "neighbors" or "distance_matrix". kwargs: Same parameters as :class:`~gudhi.point_cloud.knn.KNN`, except that metric="neighbors" means that :func:`transform` expects an array with the distances to the k nearest neighbors. """ self.k = k @@ -31,7 +32,7 @@ class DTM: def fit(self, X, y=None): """ Args: - X (numpy.array): coordinates for mass points + X (numpy.array): coordinates for mass points. """ if self.params.setdefault("metric", "euclidean") != "neighbors": # KNN gives sorted distances, which is unnecessary here. @@ -45,11 +46,17 @@ class DTM: Args: X (numpy.array): coordinates for query points, or distance matrix if metric is "precomputed", or distances to the k nearest neighbors if metric is "neighbors" (if the array has more than k columns, the remaining ones are ignored). """ + q = self.q + if q is None: + if self.params["metric"] in {"neighbors", "precomputed"}: + q = 2 + else: + q = len(X[0]) if self.params["metric"] == "neighbors": distances = X[:, : self.k] else: distances = self.knn.transform(X) - distances = distances ** self.q + distances = distances ** q dtm = distances.sum(-1) / self.k - dtm = dtm ** (1.0 / self.q) + dtm = dtm ** (1.0 / q) return dtm diff --git a/src/python/gudhi/point_cloud/knn.py b/src/python/gudhi/point_cloud/knn.py index 02448530..31e4fc9f 100644 --- a/src/python/gudhi/point_cloud/knn.py +++ b/src/python/gudhi/point_cloud/knn.py @@ -18,7 +18,7 @@ class KNN: def __init__(self, k, return_index=True, return_distance=False, metric="euclidean", **kwargs): """ Args: - k (int): number of neighbors (including the point itself). + k (int): number of neighbors (possibly including the point itself). return_index (bool): if True, return the index of each neighbor. return_distance (bool): if True, return the distance to each neighbor. implementation (str): Choice of the library that does the real work. @@ -68,7 +68,7 @@ class KNN: def fit(self, X, y=None): """ Args: - X (numpy.array): coordinates for reference points + X (numpy.array): coordinates for reference points. """ self.ref_points = X if self.params["implementation"] == "ckdtree": @@ -105,7 +105,7 @@ class KNN: def transform(self, X): """ Args: - X (numpy.array): coordinates for query points, or distance matrix if metric is "precomputed" + X (numpy.array): coordinates for query points, or distance matrix if metric is "precomputed". """ metric = self.metric k = self.k -- cgit v1.2.3 From a911f9707d44259a38ae3dbb6fbcec75779fc639 Mon Sep 17 00:00:00 2001 From: Marc Glisse Date: Sat, 28 Mar 2020 12:17:29 +0100 Subject: doc --- src/python/gudhi/point_cloud/dtm.py | 2 +- src/python/gudhi/point_cloud/knn.py | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/src/python/gudhi/point_cloud/dtm.py b/src/python/gudhi/point_cloud/dtm.py index 520cbea8..3ac69f31 100644 --- a/src/python/gudhi/point_cloud/dtm.py +++ b/src/python/gudhi/point_cloud/dtm.py @@ -20,7 +20,7 @@ class DTM: Args: k (int): number of neighbors (possibly including the point itself). q (float): order used to compute the distance to measure. Defaults to the dimension, or 2 if metric is "neighbors" or "distance_matrix". - kwargs: Same parameters as :class:`~gudhi.point_cloud.knn.KNN`, except that metric="neighbors" means that :func:`transform` expects an array with the distances to the k nearest neighbors. + kwargs: same parameters as :class:`~gudhi.point_cloud.knn.KNN`, except that metric="neighbors" means that :func:`transform` expects an array with the distances to the k nearest neighbors. """ self.k = k self.q = q diff --git a/src/python/gudhi/point_cloud/knn.py b/src/python/gudhi/point_cloud/knn.py index 31e4fc9f..bb7757f2 100644 --- a/src/python/gudhi/point_cloud/knn.py +++ b/src/python/gudhi/point_cloud/knn.py @@ -21,7 +21,7 @@ class KNN: k (int): number of neighbors (possibly including the point itself). return_index (bool): if True, return the index of each neighbor. return_distance (bool): if True, return the distance to each neighbor. - implementation (str): Choice of the library that does the real work. + implementation (str): choice of the library that does the real work. * 'keops' for a brute-force, CUDA implementation through pykeops. Useful when the dimension becomes large (10+) but the number of points remains low (less than a million). Only "minkowski" and its aliases are supported. * 'ckdtree' for scipy's cKDTree. Only "minkowski" and its aliases are supported. @@ -31,7 +31,7 @@ class KNN: metric (str): see `sklearn.neighbors.NearestNeighbors`. eps (float): relative error when computing nearest neighbors with the cKDTree. p (float): norm L^p on input points (including numpy.inf) if metric is "minkowski". Defaults to 2. - n_jobs (int): Number of jobs to schedule for parallel processing of nearest neighbors on the CPU. + n_jobs (int): number of jobs to schedule for parallel processing of nearest neighbors on the CPU. If -1 is given all processors are used. Default: 1. kwargs: additional parameters are forwarded to the backends. """ -- cgit v1.2.3 From 990d54f2f13e116f97c1d0f35cbb751015d863fe Mon Sep 17 00:00:00 2001 From: Marc Glisse Date: Sat, 28 Mar 2020 12:20:57 +0100 Subject: Fix test --- src/python/test/test_dtm.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/python/test/test_dtm.py b/src/python/test/test_dtm.py index 93b13e1a..1d080ab4 100755 --- a/src/python/test/test_dtm.py +++ b/src/python/test/test_dtm.py @@ -30,7 +30,7 @@ def test_dtm_compare_euclidean(): from scipy.spatial.distance import cdist d = cdist(pts, pts) - dtm = DTM(k, metric="precomputed") + dtm = DTM(k, q=2, metric="precomputed") r4 = dtm.fit_transform(d) assert r4 == pytest.approx(r0) dtm = DTM(k, implementation="keops") -- cgit v1.2.3 From 40f4b6fb1fe20c3843b1fd80f99996e6d25c9426 Mon Sep 17 00:00:00 2001 From: Marc Glisse Date: Sat, 28 Mar 2020 12:26:36 +0100 Subject: Comment --- src/python/gudhi/point_cloud/dtm.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/src/python/gudhi/point_cloud/dtm.py b/src/python/gudhi/point_cloud/dtm.py index 3ac69f31..ba011eaf 100644 --- a/src/python/gudhi/point_cloud/dtm.py +++ b/src/python/gudhi/point_cloud/dtm.py @@ -59,4 +59,6 @@ class DTM: distances = distances ** q dtm = distances.sum(-1) / self.k dtm = dtm ** (1.0 / q) + # We compute too many powers, 1/p in knn then q in dtm, 1/q in dtm then q or some log in the caller. + # Add option to skip the final root? return dtm -- cgit v1.2.3 From 7f323484acdeafca93efdd9bdd20ed428f8fb95b Mon Sep 17 00:00:00 2001 From: Marc Glisse Date: Sat, 28 Mar 2020 12:45:00 +0100 Subject: Optional sort_results --- src/python/gudhi/point_cloud/dtm.py | 4 +--- src/python/gudhi/point_cloud/knn.py | 19 +++++++++++++------ 2 files changed, 14 insertions(+), 9 deletions(-) diff --git a/src/python/gudhi/point_cloud/dtm.py b/src/python/gudhi/point_cloud/dtm.py index ba011eaf..678524f2 100644 --- a/src/python/gudhi/point_cloud/dtm.py +++ b/src/python/gudhi/point_cloud/dtm.py @@ -35,9 +35,7 @@ class DTM: X (numpy.array): coordinates for mass points. """ if self.params.setdefault("metric", "euclidean") != "neighbors": - # KNN gives sorted distances, which is unnecessary here. - # Maybe add a parameter to say we don't need sorting? - self.knn = KNN(self.k, return_index=False, return_distance=True, **self.params) + self.knn = KNN(self.k, return_index=False, return_distance=True, sort_results=False, **self.params) self.knn.fit(X) return self diff --git a/src/python/gudhi/point_cloud/knn.py b/src/python/gudhi/point_cloud/knn.py index bb7757f2..8369f1f8 100644 --- a/src/python/gudhi/point_cloud/knn.py +++ b/src/python/gudhi/point_cloud/knn.py @@ -33,6 +33,9 @@ class KNN: p (float): norm L^p on input points (including numpy.inf) if metric is "minkowski". Defaults to 2. n_jobs (int): number of jobs to schedule for parallel processing of nearest neighbors on the CPU. If -1 is given all processors are used. Default: 1. + sort_results (bool): if True, then distances and indices of each point are + sorted on return, so that the first column contains the closest points. + Otherwise, neighbors are returned in an arbitrary order. Defaults to True. kwargs: additional parameters are forwarded to the backends. """ self.k = k @@ -115,18 +118,22 @@ class KNN: X = numpy.array(X) if self.return_index: neighbors = numpy.argpartition(X, k - 1)[:, 0:k] - distances = numpy.take_along_axis(X, neighbors, axis=-1) - ngb_order = numpy.argsort(distances, axis=-1) - neighbors = numpy.take_along_axis(neighbors, ngb_order, axis=-1) + if self.params.get("sort_results", True): + X = numpy.take_along_axis(X, neighbors, axis=-1) + ngb_order = numpy.argsort(X, axis=-1) + neighbors = numpy.take_along_axis(neighbors, ngb_order, axis=-1) + else: + ngb_order = neighbors if self.return_distance: - distances = numpy.take_along_axis(distances, ngb_order, axis=-1) + distances = numpy.take_along_axis(X, ngb_order, axis=-1) return neighbors, distances else: return neighbors if self.return_distance: distances = numpy.partition(X, k - 1)[:, 0:k] - # partition is not guaranteed to sort the lower half, although it often does - distances.sort(axis=-1) + if self.params.get("sort_results"): + # partition is not guaranteed to sort the lower half, although it often does + distances.sort(axis=-1) return distances return None -- cgit v1.2.3 From 75286efcf311f0c7c46a7039970d663f60953e14 Mon Sep 17 00:00:00 2001 From: Marc Glisse Date: Sat, 28 Mar 2020 12:59:01 +0100 Subject: Fix test --- src/python/test/test_dtm.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/python/test/test_dtm.py b/src/python/test/test_dtm.py index 1d080ab4..33b2f3a2 100755 --- a/src/python/test/test_dtm.py +++ b/src/python/test/test_dtm.py @@ -30,7 +30,7 @@ def test_dtm_compare_euclidean(): from scipy.spatial.distance import cdist d = cdist(pts, pts) - dtm = DTM(k, q=2, metric="precomputed") + dtm = DTM(k, q=4, metric="precomputed") r4 = dtm.fit_transform(d) assert r4 == pytest.approx(r0) dtm = DTM(k, implementation="keops") -- cgit v1.2.3 From dd9457649d0d197bbed6402200e0f2f55655680e Mon Sep 17 00:00:00 2001 From: Marc Glisse Date: Sat, 28 Mar 2020 15:39:15 +0100 Subject: Default param of 2 for DTM --- src/python/gudhi/point_cloud/dtm.py | 14 ++++---------- src/python/test/test_dtm.py | 2 +- 2 files changed, 5 insertions(+), 11 deletions(-) diff --git a/src/python/gudhi/point_cloud/dtm.py b/src/python/gudhi/point_cloud/dtm.py index 678524f2..c26ba844 100644 --- a/src/python/gudhi/point_cloud/dtm.py +++ b/src/python/gudhi/point_cloud/dtm.py @@ -15,11 +15,11 @@ class DTM: Class to compute the distance to the empirical measure defined by a point set. """ - def __init__(self, k, q=None, **kwargs): + def __init__(self, k, q=2, **kwargs): """ Args: k (int): number of neighbors (possibly including the point itself). - q (float): order used to compute the distance to measure. Defaults to the dimension, or 2 if metric is "neighbors" or "distance_matrix". + q (float): order used to compute the distance to measure. Defaults to 2. kwargs: same parameters as :class:`~gudhi.point_cloud.knn.KNN`, except that metric="neighbors" means that :func:`transform` expects an array with the distances to the k nearest neighbors. """ self.k = k @@ -44,19 +44,13 @@ class DTM: Args: X (numpy.array): coordinates for query points, or distance matrix if metric is "precomputed", or distances to the k nearest neighbors if metric is "neighbors" (if the array has more than k columns, the remaining ones are ignored). """ - q = self.q - if q is None: - if self.params["metric"] in {"neighbors", "precomputed"}: - q = 2 - else: - q = len(X[0]) if self.params["metric"] == "neighbors": distances = X[:, : self.k] else: distances = self.knn.transform(X) - distances = distances ** q + distances = distances ** self.q dtm = distances.sum(-1) / self.k - dtm = dtm ** (1.0 / q) + dtm = dtm ** (1.0 / self.q) # We compute too many powers, 1/p in knn then q in dtm, 1/q in dtm then q or some log in the caller. # Add option to skip the final root? return dtm diff --git a/src/python/test/test_dtm.py b/src/python/test/test_dtm.py index 33b2f3a2..93b13e1a 100755 --- a/src/python/test/test_dtm.py +++ b/src/python/test/test_dtm.py @@ -30,7 +30,7 @@ def test_dtm_compare_euclidean(): from scipy.spatial.distance import cdist d = cdist(pts, pts) - dtm = DTM(k, q=4, metric="precomputed") + dtm = DTM(k, metric="precomputed") r4 = dtm.fit_transform(d) assert r4 == pytest.approx(r0) dtm = DTM(k, implementation="keops") -- cgit v1.2.3 From 8d06fbeae596a0372bf9a921de7d04cc734eaa3b Mon Sep 17 00:00:00 2001 From: Marc Glisse Date: Mon, 30 Mar 2020 08:14:46 +0200 Subject: Biblio --- biblio/bibliography.bib | 15 +++++++++++++++ src/python/gudhi/point_cloud/dtm.py | 2 +- 2 files changed, 16 insertions(+), 1 deletion(-) diff --git a/biblio/bibliography.bib b/biblio/bibliography.bib index 3bbe7960..f9d43638 100644 --- a/biblio/bibliography.bib +++ b/biblio/bibliography.bib @@ -1192,3 +1192,18 @@ numpages = {11}, location = {Montr\'{e}al, Canada}, series = {NIPS’18} } +@Article{dtm, +author={Chazal, Fr{\'e}d{\'e}ric +and Cohen-Steiner, David +and M{\'e}rigot, Quentin}, +title={Geometric Inference for Probability Measures}, +journal={Foundations of Computational Mathematics}, +year={2011}, +volume={11}, +number={6}, +pages={733-751}, +abstract={Data often comes in the form of a point cloud sampled from an unknown compact subset of Euclidean space. The general goal of geometric inference is then to recover geometric and topological features (e.g., Betti numbers, normals) of this subset from the approximating point cloud data. It appears that the study of distance functions allows one to address many of these questions successfully. However, one of the main limitations of this framework is that it does not cope well with outliers or with background noise. In this paper, we show how to extend the framework of distance functions to overcome this problem. Replacing compact subsets by measures, we introduce a notion of distance function to a probability distribution in Rd. These functions share many properties with classical distance functions, which make them suitable for inference purposes. In particular, by considering appropriate level sets of these distance functions, we show that it is possible to reconstruct offsets of sampled shapes with topological guarantees even in the presence of outliers. Moreover, in settings where empirical measures are considered, these functions can be easily evaluated, making them of particular practical interest.}, +issn={1615-3383}, +doi={10.1007/s10208-011-9098-0}, +url={https://doi.org/10.1007/s10208-011-9098-0} +} diff --git a/src/python/gudhi/point_cloud/dtm.py b/src/python/gudhi/point_cloud/dtm.py index c26ba844..23c36b88 100644 --- a/src/python/gudhi/point_cloud/dtm.py +++ b/src/python/gudhi/point_cloud/dtm.py @@ -12,7 +12,7 @@ from .knn import KNN class DTM: """ - Class to compute the distance to the empirical measure defined by a point set. + Class to compute the distance to the empirical measure defined by a point set, as introduced in :cite:`dtm`. """ def __init__(self, k, q=2, **kwargs): -- cgit v1.2.3 From 0a404547afec2e43dd5edf9410ff079d156d691a Mon Sep 17 00:00:00 2001 From: Marc Glisse Date: Mon, 30 Mar 2020 08:18:38 +0200 Subject: One more ref, to be cited later --- biblio/bibliography.bib | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/biblio/bibliography.bib b/biblio/bibliography.bib index f9d43638..056ccd72 100644 --- a/biblio/bibliography.bib +++ b/biblio/bibliography.bib @@ -1207,3 +1207,15 @@ issn={1615-3383}, doi={10.1007/s10208-011-9098-0}, url={https://doi.org/10.1007/s10208-011-9098-0} } +@article{dtmdensity, +author = "Biau, Gérard and Chazal, Frédéric and Cohen-Steiner, David and Devroye, Luc and Rodríguez, Carlos", +doi = "10.1214/11-EJS606", +fjournal = "Electronic Journal of Statistics", +journal = "Electron. J. Statist.", +pages = "204--237", +publisher = "The Institute of Mathematical Statistics and the Bernoulli Society", +title = "A weighted k-nearest neighbor density estimate for geometric inference", +url = "https://doi.org/10.1214/11-EJS606", +volume = "5", +year = "2011" +} -- cgit v1.2.3 From 74155081bb8b3330c562d5c40d7f0a32fc188012 Mon Sep 17 00:00:00 2001 From: Marc Glisse Date: Mon, 30 Mar 2020 18:02:43 +0200 Subject: Add density estimator --- src/python/gudhi/point_cloud/dtm.py | 66 +++++++++++++++++++++++++++++++++++++ 1 file changed, 66 insertions(+) diff --git a/src/python/gudhi/point_cloud/dtm.py b/src/python/gudhi/point_cloud/dtm.py index 23c36b88..e12eefa1 100644 --- a/src/python/gudhi/point_cloud/dtm.py +++ b/src/python/gudhi/point_cloud/dtm.py @@ -8,6 +8,7 @@ # - YYYY/MM Author: Description of the modification from .knn import KNN +import numpy as np class DTM: @@ -54,3 +55,68 @@ class DTM: # We compute too many powers, 1/p in knn then q in dtm, 1/q in dtm then q or some log in the caller. # Add option to skip the final root? return dtm + + +class DTMDensity: + """ + Density estimator based on the distance to the empirical measure defined by a point set, as defined in :cite:`dtmdensity`. Note that this implementation does not renormalize so the total measure is not 1, see the reference for suitable normalization factors in the Euclidean case. + """ + + def __init__(self, k=None, weights=None, q=None, dim=None, **kwargs): + """ + Args: + k (int): number of neighbors (possibly including the point itself). + weights (numpy.array): weights of each of the k neighbors, optional. + q (float): order used to compute the distance to measure. Defaults to dim. + dim (float): final exponent representing the dimension. Defaults to the dimension, and must be specified when the dimension cannot be read from the input (metric="neighbors" or metric="precomputed"). + kwargs: same parameters as :class:`~gudhi.point_cloud.knn.KNN`, except that metric="neighbors" means that :func:`transform` expects an array with the distances to the k nearest neighbors. + """ + if weights is None: + assert k is not None, "Must specify k or weights" + self.k = k + self.weights = np.full(k, 1.0 / k) + else: + self.weights = weights + self.k = len(weights) + assert k is None or k == self.k, "k differs from the length of weights" + self.q = q + self.dim = dim + self.params = kwargs + + def fit_transform(self, X, y=None): + return self.fit(X).transform(X) + + def fit(self, X, y=None): + """ + Args: + X (numpy.array): coordinates for mass points. + """ + if self.params.setdefault("metric", "euclidean") != "neighbors": + self.knn = KNN(self.k, return_index=False, return_distance=True, sort_results=False, **self.params) + self.knn.fit(X) + return self + + def transform(self, X): + """ + Args: + X (numpy.array): coordinates for query points, or distance matrix if metric is "precomputed", or distances to the k nearest neighbors if metric is "neighbors" (if the array has more than k columns, the remaining ones are ignored). + """ + q = self.q + dim = self.dim + if dim is None: + assert self.params["metric"] not in { + "neighbors", + "precomputed", + }, "dim not specified and cannot guess the dimension" + dim = len(X[0]) + if q is None: + q = dim + if self.params["metric"] == "neighbors": + distances = X[:, : self.k] + else: + distances = self.knn.transform(X) + distances = distances ** q + dtm = (distances * weights).sum(-1) + return dtm ** (-dim / q) + # We compute too many powers, 1/p in knn then q in dtm, d/q in dtm then whatever in the caller. + # Add option to skip the final root? -- cgit v1.2.3 From c5c565dfd92ce1ad5b318dca40edf9429d6334c2 Mon Sep 17 00:00:00 2001 From: Marc Glisse Date: Mon, 30 Mar 2020 20:46:56 +0200 Subject: Streamline initialize_filtration --- src/Alpha_complex/test/Alpha_complex_unit_test.cpp | 3 -- .../utilities/alpha_complex_3d_persistence.cpp | 3 -- .../utilities/alpha_complex_persistence.cpp | 3 -- .../alpha_rips_persistence_bottleneck_distance.cpp | 6 --- .../example/custom_persistence_sort.cpp | 3 -- .../example/persistence_from_file.cpp | 3 -- .../example/plain_homology.cpp | 3 -- .../example/rips_multifield_persistence.cpp | 3 -- .../example/rips_persistence_step_by_step.cpp | 3 -- .../include/gudhi/Persistent_cohomology.h | 2 - .../rips_correlation_matrix_persistence.cpp | 3 -- .../utilities/rips_distance_matrix_persistence.cpp | 3 -- src/Rips_complex/utilities/rips_persistence.cpp | 3 -- .../utilities/sparse_rips_persistence.cpp | 3 -- src/Simplex_tree/include/gudhi/Simplex_tree.h | 56 ++++++++++++++-------- src/python/doc/simplex_tree_ref.rst | 1 - .../example/alpha_complex_from_points_example.py | 3 -- src/python/example/simplex_tree_example.py | 1 - src/python/gudhi/simplex_tree.pxd | 3 +- src/python/gudhi/simplex_tree.pyx | 50 ++----------------- src/python/include/Alpha_complex_interface.h | 1 - .../Euclidean_strong_witness_complex_interface.h | 2 - .../include/Euclidean_witness_complex_interface.h | 2 - src/python/include/Nerve_gic_interface.h | 1 - src/python/include/Rips_complex_interface.h | 1 - src/python/include/Simplex_tree_interface.h | 15 +++--- .../include/Strong_witness_complex_interface.h | 2 - src/python/include/Tangential_complex_interface.h | 1 - src/python/include/Witness_complex_interface.h | 2 - src/python/test/test_simplex_tree.py | 3 -- 30 files changed, 48 insertions(+), 140 deletions(-) diff --git a/src/Alpha_complex/test/Alpha_complex_unit_test.cpp b/src/Alpha_complex/test/Alpha_complex_unit_test.cpp index da1d8004..4b37e4bd 100644 --- a/src/Alpha_complex/test/Alpha_complex_unit_test.cpp +++ b/src/Alpha_complex/test/Alpha_complex_unit_test.cpp @@ -188,9 +188,6 @@ BOOST_AUTO_TEST_CASE(Alpha_complex_from_points) { // Test after prune_above_filtration bool modified = simplex_tree.prune_above_filtration(0.6); - if (modified) { - simplex_tree.initialize_filtration(); - } BOOST_CHECK(modified); // Another way to check num_simplices diff --git a/src/Alpha_complex/utilities/alpha_complex_3d_persistence.cpp b/src/Alpha_complex/utilities/alpha_complex_3d_persistence.cpp index e93c412e..91899040 100644 --- a/src/Alpha_complex/utilities/alpha_complex_3d_persistence.cpp +++ b/src/Alpha_complex/utilities/alpha_complex_3d_persistence.cpp @@ -222,9 +222,6 @@ int main(int argc, char **argv) { break; } - // Sort the simplices in the order of the filtration - simplex_tree.initialize_filtration(); - std::clog << "Simplex_tree dim: " << simplex_tree.dimension() << std::endl; // Compute the persistence diagram of the complex Persistent_cohomology pcoh(simplex_tree, true); diff --git a/src/Alpha_complex/utilities/alpha_complex_persistence.cpp b/src/Alpha_complex/utilities/alpha_complex_persistence.cpp index be60ff78..7c898dfd 100644 --- a/src/Alpha_complex/utilities/alpha_complex_persistence.cpp +++ b/src/Alpha_complex/utilities/alpha_complex_persistence.cpp @@ -75,9 +75,6 @@ int main(int argc, char **argv) { std::clog << "Simplicial complex is of dimension " << simplex.dimension() << " - " << simplex.num_simplices() << " simplices - " << simplex.num_vertices() << " vertices." << std::endl; - // Sort the simplices in the order of the filtration - simplex.initialize_filtration(); - std::clog << "Simplex_tree dim: " << simplex.dimension() << std::endl; // Compute the persistence diagram of the complex Gudhi::persistent_cohomology::Persistent_cohomology pcoh( diff --git a/src/Bottleneck_distance/example/alpha_rips_persistence_bottleneck_distance.cpp b/src/Bottleneck_distance/example/alpha_rips_persistence_bottleneck_distance.cpp index 4769eca3..ceb9e226 100644 --- a/src/Bottleneck_distance/example/alpha_rips_persistence_bottleneck_distance.cpp +++ b/src/Bottleneck_distance/example/alpha_rips_persistence_bottleneck_distance.cpp @@ -71,9 +71,6 @@ int main(int argc, char * argv[]) { std::clog << "The Rips complex contains " << rips_stree.num_simplices() << " simplices and has dimension " << rips_stree.dimension() << " \n"; - // Sort the simplices in the order of the filtration - rips_stree.initialize_filtration(); - // Compute the persistence diagram of the complex Persistent_cohomology rips_pcoh(rips_stree); // initializes the coefficient field for homology @@ -92,9 +89,6 @@ int main(int argc, char * argv[]) { std::clog << "The Alpha complex contains " << alpha_stree.num_simplices() << " simplices and has dimension " << alpha_stree.dimension() << " \n"; - // Sort the simplices in the order of the filtration - alpha_stree.initialize_filtration(); - // Compute the persistence diagram of the complex Persistent_cohomology alpha_pcoh(alpha_stree); // initializes the coefficient field for homology diff --git a/src/Persistent_cohomology/example/custom_persistence_sort.cpp b/src/Persistent_cohomology/example/custom_persistence_sort.cpp index 87e9c207..410cd987 100644 --- a/src/Persistent_cohomology/example/custom_persistence_sort.cpp +++ b/src/Persistent_cohomology/example/custom_persistence_sort.cpp @@ -86,9 +86,6 @@ int main(int argc, char **argv) { " - " << simplex.num_simplices() << " simplices - " << simplex.num_vertices() << " vertices." << std::endl; - // Sort the simplices in the order of the filtration - simplex.initialize_filtration(); - std::clog << "Simplex_tree dim: " << simplex.dimension() << std::endl; Persistent_cohomology pcoh(simplex); diff --git a/src/Persistent_cohomology/example/persistence_from_file.cpp b/src/Persistent_cohomology/example/persistence_from_file.cpp index 79108730..38c44514 100644 --- a/src/Persistent_cohomology/example/persistence_from_file.cpp +++ b/src/Persistent_cohomology/example/persistence_from_file.cpp @@ -59,9 +59,6 @@ int main(int argc, char * argv[]) { std::clog << std::endl; }*/ - // Sort the simplices in the order of the filtration - simplex_tree.initialize_filtration(); - // Compute the persistence diagram of the complex Persistent_cohomology< Simplex_tree<>, Field_Zp > pcoh(simplex_tree); // initializes the coefficient field for homology diff --git a/src/Persistent_cohomology/example/plain_homology.cpp b/src/Persistent_cohomology/example/plain_homology.cpp index 4d329020..236b67de 100644 --- a/src/Persistent_cohomology/example/plain_homology.cpp +++ b/src/Persistent_cohomology/example/plain_homology.cpp @@ -59,9 +59,6 @@ int main() { st.insert_simplex_and_subfaces(edge35); st.insert_simplex(vertex4); - // Sort the simplices in the order of the filtration - st.initialize_filtration(); - // Class for homology computation // By default, since the complex has dimension 1, only 0-dimensional homology would be computed. // Here we also want persistent homology to be computed for the maximal dimension in the complex (persistence_dim_max = true) diff --git a/src/Persistent_cohomology/example/rips_multifield_persistence.cpp b/src/Persistent_cohomology/example/rips_multifield_persistence.cpp index e2e2c0a5..2edf5bc4 100644 --- a/src/Persistent_cohomology/example/rips_multifield_persistence.cpp +++ b/src/Persistent_cohomology/example/rips_multifield_persistence.cpp @@ -59,9 +59,6 @@ int main(int argc, char * argv[]) { std::clog << "The complex contains " << simplex_tree.num_simplices() << " simplices \n"; std::clog << " and has dimension " << simplex_tree.dimension() << " \n"; - // Sort the simplices in the order of the filtration - simplex_tree.initialize_filtration(); - // Compute the persistence diagram of the complex Persistent_cohomology pcoh(simplex_tree); // initializes the coefficient field for homology diff --git a/src/Persistent_cohomology/example/rips_persistence_step_by_step.cpp b/src/Persistent_cohomology/example/rips_persistence_step_by_step.cpp index 7da9f15d..a503d983 100644 --- a/src/Persistent_cohomology/example/rips_persistence_step_by_step.cpp +++ b/src/Persistent_cohomology/example/rips_persistence_step_by_step.cpp @@ -76,9 +76,6 @@ int main(int argc, char * argv[]) { std::clog << "The complex contains " << st.num_simplices() << " simplices \n"; std::clog << " and has dimension " << st.dimension() << " \n"; - // Sort the simplices in the order of the filtration - st.initialize_filtration(); - // Compute the persistence diagram of the complex Persistent_cohomology pcoh(st); // initializes the coefficient field for homology diff --git a/src/Persistent_cohomology/include/gudhi/Persistent_cohomology.h b/src/Persistent_cohomology/include/gudhi/Persistent_cohomology.h index ca4bc10d..bc111f94 100644 --- a/src/Persistent_cohomology/include/gudhi/Persistent_cohomology.h +++ b/src/Persistent_cohomology/include/gudhi/Persistent_cohomology.h @@ -561,7 +561,6 @@ class Persistent_cohomology { void output_diagram(std::ostream& ostream = std::cout) { cmp_intervals_by_length cmp(cpx_); std::sort(std::begin(persistent_pairs_), std::end(persistent_pairs_), cmp); - bool has_infinity = std::numeric_limits::has_infinity; for (auto pair : persistent_pairs_) { ostream << get<2>(pair) << " " << cpx_->dimension(get<0>(pair)) << " " << cpx_->filtration(get<0>(pair)) << " " @@ -573,7 +572,6 @@ class Persistent_cohomology { std::ofstream diagram_out(diagram_name.c_str()); cmp_intervals_by_length cmp(cpx_); std::sort(std::begin(persistent_pairs_), std::end(persistent_pairs_), cmp); - bool has_infinity = std::numeric_limits::has_infinity; for (auto pair : persistent_pairs_) { diagram_out << cpx_->dimension(get<0>(pair)) << " " << cpx_->filtration(get<0>(pair)) << " " diff --git a/src/Rips_complex/utilities/rips_correlation_matrix_persistence.cpp b/src/Rips_complex/utilities/rips_correlation_matrix_persistence.cpp index 67f921a6..b473738e 100644 --- a/src/Rips_complex/utilities/rips_correlation_matrix_persistence.cpp +++ b/src/Rips_complex/utilities/rips_correlation_matrix_persistence.cpp @@ -71,9 +71,6 @@ int main(int argc, char* argv[]) { std::clog << "The complex contains " << simplex_tree.num_simplices() << " simplices \n"; std::clog << " and has dimension " << simplex_tree.dimension() << " \n"; - // Sort the simplices in the order of the filtration - simplex_tree.initialize_filtration(); - // Compute the persistence diagram of the complex Persistent_cohomology pcoh(simplex_tree); // initializes the coefficient field for homology diff --git a/src/Rips_complex/utilities/rips_distance_matrix_persistence.cpp b/src/Rips_complex/utilities/rips_distance_matrix_persistence.cpp index 4ad19675..6306755d 100644 --- a/src/Rips_complex/utilities/rips_distance_matrix_persistence.cpp +++ b/src/Rips_complex/utilities/rips_distance_matrix_persistence.cpp @@ -50,9 +50,6 @@ int main(int argc, char* argv[]) { std::clog << "The complex contains " << simplex_tree.num_simplices() << " simplices \n"; std::clog << " and has dimension " << simplex_tree.dimension() << " \n"; - // Sort the simplices in the order of the filtration - simplex_tree.initialize_filtration(); - // Compute the persistence diagram of the complex Persistent_cohomology pcoh(simplex_tree); // initializes the coefficient field for homology diff --git a/src/Rips_complex/utilities/rips_persistence.cpp b/src/Rips_complex/utilities/rips_persistence.cpp index 4cc63d3c..9d7490b3 100644 --- a/src/Rips_complex/utilities/rips_persistence.cpp +++ b/src/Rips_complex/utilities/rips_persistence.cpp @@ -52,9 +52,6 @@ int main(int argc, char* argv[]) { std::clog << "The complex contains " << simplex_tree.num_simplices() << " simplices \n"; std::clog << " and has dimension " << simplex_tree.dimension() << " \n"; - // Sort the simplices in the order of the filtration - simplex_tree.initialize_filtration(); - // Compute the persistence diagram of the complex Persistent_cohomology pcoh(simplex_tree); // initializes the coefficient field for homology diff --git a/src/Rips_complex/utilities/sparse_rips_persistence.cpp b/src/Rips_complex/utilities/sparse_rips_persistence.cpp index 40606158..ac935b41 100644 --- a/src/Rips_complex/utilities/sparse_rips_persistence.cpp +++ b/src/Rips_complex/utilities/sparse_rips_persistence.cpp @@ -54,9 +54,6 @@ int main(int argc, char* argv[]) { std::clog << "The complex contains " << simplex_tree.num_simplices() << " simplices \n"; std::clog << " and has dimension " << simplex_tree.dimension() << " \n"; - // Sort the simplices in the order of the filtration - simplex_tree.initialize_filtration(); - // Compute the persistence diagram of the complex Persistent_cohomology pcoh(simplex_tree); // initializes the coefficient field for homology diff --git a/src/Simplex_tree/include/gudhi/Simplex_tree.h b/src/Simplex_tree/include/gudhi/Simplex_tree.h index b455ae31..43250795 100644 --- a/src/Simplex_tree/include/gudhi/Simplex_tree.h +++ b/src/Simplex_tree/include/gudhi/Simplex_tree.h @@ -142,7 +142,10 @@ class Simplex_tree { public: /** \brief Handle type to a simplex contained in the simplicial complex represented - * by the simplex tree. */ + * by the simplex tree. + * + * They are essentially pointers into internal vectors, and any insertion or removal + * of a simplex may invalidate any other Simplex_handle in the complex. */ typedef typename Dictionary::iterator Simplex_handle; private: @@ -255,11 +258,9 @@ class Simplex_tree { * * The filtration must be valid. If the filtration has not been initialized yet, the * method initializes it (i.e. order the simplices). If the complex has changed since the last time the filtration - * was initialized, please call `initialize_filtration()` to recompute it. */ + * was initialized, please call `clear_filtration()` or `initialize_filtration()` to recompute it. */ Filtration_simplex_range const& filtration_simplex_range(Indexing_tag = Indexing_tag()) { - if (filtration_vect_.empty()) { - initialize_filtration(); - } + maybe_initialize_filtration(); return filtration_vect_; } @@ -877,15 +878,13 @@ class Simplex_tree { } public: - /** \brief Initializes the filtrations, i.e. sort the - * simplices according to their order in the filtration and initializes all Simplex_keys. + /** \brief Initializes the filtration cache, i.e. sorts the + * simplices according to their order in the filtration. * - * After calling this method, filtration_simplex_range() becomes valid, and each simplex is - * assigned a Simplex_key corresponding to its order in the filtration (from 0 to m-1 for a - * simplicial complex with m simplices). + * It always recomputes the cache, even if one already exists. * - * Will be automatically called when calling filtration_simplex_range() - * if the filtration has never been initialized yet. */ + * Any insertion, deletion or change of filtration value invalidates this cache, + * which can be cleared with clear_filtration(). */ void initialize_filtration() { filtration_vect_.clear(); filtration_vect_.reserve(num_simplices()); @@ -907,6 +906,21 @@ class Simplex_tree { std::stable_sort(filtration_vect_.begin(), filtration_vect_.end(), is_before_in_filtration(this)); #endif } + /** \brief Initializes the filtration cache if it isn't initialized yet. + * + * Automatically called by filtration_simplex_range(). */ + void maybe_initialize_filtration() { + if (filtration_vect_.empty()) { + initialize_filtration(); + } + } + /** \brief Clears the filtration cache produced by initialize_filtration(). + * + * Useful when initialize_filtration() has already been called and we perform an operation + * (say an insertion) that invalidates the cache. */ + void clear_filtration() { + filtration_vect_.clear(); + } private: /** Recursive search of cofaces @@ -1128,6 +1142,7 @@ class Simplex_tree { * 1 when calling the method. */ void expansion(int max_dim) { if (max_dim <= 1) return; + clear_filtration(); // Drop the cache. dimension_ = max_dim; for (Dictionary_it root_it = root_.members_.begin(); root_it != root_.members_.end(); ++root_it) { @@ -1338,9 +1353,6 @@ class Simplex_tree { /** \brief This function ensures that each simplex has a higher filtration value than its faces by increasing the * filtration values. * @return True if any filtration value was modified, false if the filtration was already non-decreasing. - * \post Some simplex tree functions require the filtration to be valid. `make_filtration_non_decreasing()` - * function is not launching `initialize_filtration()` but returns the filtration modification information. If the - * complex has changed , please call `initialize_filtration()` to recompute it. * * If a simplex has a `NaN` filtration value, it is considered lower than any other defined filtration value. */ @@ -1352,6 +1364,8 @@ class Simplex_tree { modified |= rec_make_filtration_non_decreasing(simplex.second.children()); } } + if(modified) + clear_filtration(); // Drop the cache. return modified; } @@ -1391,16 +1405,16 @@ class Simplex_tree { public: /** \brief Prune above filtration value given as parameter. * @param[in] filtration Maximum threshold value. - * @return The filtration modification information. - * \post Some simplex tree functions require the filtration to be valid. `prune_above_filtration()` - * function is not launching `initialize_filtration()` but returns the filtration modification information. If the - * complex has changed , please call `initialize_filtration()` to recompute it. + * @return True if any simplex was removed, false if all simplices already had a value below the threshold. * \post Note that the dimension of the simplicial complex may be lower after calling `prune_above_filtration()` * than it was before. However, `upper_bound_dimension()` will return the old value, which remains a valid upper * bound. If you care, you can call `dimension()` to recompute the exact dimension. */ bool prune_above_filtration(Filtration_value filtration) { - return rec_prune_above_filtration(root(), filtration); + bool modified = rec_prune_above_filtration(root(), filtration); + if(modified) + clear_filtration(); // Drop the cache. + return modified; } private: @@ -1467,7 +1481,6 @@ class Simplex_tree { * @param[in] sh Simplex handle on the maximal simplex to remove. * \pre Please check the simplex has no coface before removing it. * \exception std::invalid_argument In debug mode, if sh has children. - * \post Be aware that removing is shifting data in a flat_map (initialize_filtration to be done). * \post Note that the dimension of the simplicial complex may be lower after calling `remove_maximal_simplex()` * than it was before. However, `upper_bound_dimension()` will return the old value, which remains a valid upper * bound. If you care, you can call `dimension()` to recompute the exact dimension. @@ -1539,6 +1552,7 @@ class Simplex_tree { * the original filtration values for each simplex. */ Extended_filtration_data extend_filtration() { + clear_filtration(); // Drop the cache. // Compute maximum and minimum of filtration values Vertex_handle maxvert = std::numeric_limits::min(); diff --git a/src/python/doc/simplex_tree_ref.rst b/src/python/doc/simplex_tree_ref.rst index 9eb8c199..46b2c1e5 100644 --- a/src/python/doc/simplex_tree_ref.rst +++ b/src/python/doc/simplex_tree_ref.rst @@ -8,7 +8,6 @@ Simplex tree reference manual .. autoclass:: gudhi.SimplexTree :members: - :undoc-members: :show-inheritance: .. automethod:: gudhi.SimplexTree.__init__ diff --git a/src/python/example/alpha_complex_from_points_example.py b/src/python/example/alpha_complex_from_points_example.py index 73faf17c..465632eb 100755 --- a/src/python/example/alpha_complex_from_points_example.py +++ b/src/python/example/alpha_complex_from_points_example.py @@ -46,9 +46,6 @@ if simplex_tree.find([4]): else: print("[4] Not found...") -# Some insertions, simplex_tree needs to initialize filtrations -simplex_tree.initialize_filtration() - print("dimension=", simplex_tree.dimension()) print("filtrations=") for simplex_with_filtration in simplex_tree.get_filtration(): diff --git a/src/python/example/simplex_tree_example.py b/src/python/example/simplex_tree_example.py index 34833899..c4635dc5 100755 --- a/src/python/example/simplex_tree_example.py +++ b/src/python/example/simplex_tree_example.py @@ -42,7 +42,6 @@ print("simplices=") for simplex_with_filtration in st.get_simplices(): print("(%s, %.2f)" % tuple(simplex_with_filtration)) -st.initialize_filtration() print("filtration=") for simplex_with_filtration in st.get_filtration(): print("(%s, %.2f)" % tuple(simplex_with_filtration)) diff --git a/src/python/gudhi/simplex_tree.pxd b/src/python/gudhi/simplex_tree.pxd index 595f22bb..7e3bba2b 100644 --- a/src/python/gudhi/simplex_tree.pxd +++ b/src/python/gudhi/simplex_tree.pxd @@ -48,8 +48,7 @@ cdef extern from "Simplex_tree_interface.h" namespace "Gudhi": int dimension() int upper_bound_dimension() bool find_simplex(vector[int] simplex) - bool insert_simplex_and_subfaces(vector[int] simplex, - double filtration) + bool insert(vector[int] simplex, double filtration) vector[pair[vector[int], double]] get_star(vector[int] simplex) vector[pair[vector[int], double]] get_cofaces(vector[int] simplex, int dimension) diff --git a/src/python/gudhi/simplex_tree.pyx b/src/python/gudhi/simplex_tree.pyx index cc3753e1..a709980f 100644 --- a/src/python/gudhi/simplex_tree.pyx +++ b/src/python/gudhi/simplex_tree.pyx @@ -90,7 +90,7 @@ cdef class SimplexTree: (with more :meth:`assign_filtration` or :meth:`make_filtration_non_decreasing` for instance) before calling any function that relies on the filtration property, like - :meth:`initialize_filtration`. + :meth:`persistence`. """ self.get_ptr().assign_simplex_filtration(simplex, filtration) @@ -98,16 +98,7 @@ cdef class SimplexTree: """This function initializes and sorts the simplicial complex filtration vector. - .. note:: - - This function must be launched before - :func:`persistence()`, - :func:`betti_numbers()`, - :func:`persistent_betti_numbers()`, - or :func:`get_filtration()` - after :func:`inserting` or - :func:`removing` - simplices. + .. deprecated:: 3.2.0 """ self.get_ptr().initialize_filtration() @@ -182,10 +173,7 @@ cdef class SimplexTree: :returns: true if the simplex was found, false otherwise. :rtype: bool """ - cdef vector[int] csimplex - for i in simplex: - csimplex.push_back(i) - return self.get_ptr().find_simplex(csimplex) + return self.get_ptr().find_simplex(simplex) def insert(self, simplex, filtration=0.0): """This function inserts the given N-simplex and its subfaces with the @@ -202,11 +190,7 @@ cdef class SimplexTree: otherwise (whatever its original filtration value). :rtype: bool """ - cdef vector[int] csimplex - for i in simplex: - csimplex.push_back(i) - return self.get_ptr().insert_simplex_and_subfaces(csimplex, - filtration) + return self.get_ptr().insert(simplex, filtration) def get_simplices(self): """This function returns a generator with simplices and their given @@ -306,11 +290,6 @@ cdef class SimplexTree: :param simplex: The N-simplex, represented by a list of vertex. :type simplex: list of int. - .. note:: - - Be aware that removing is shifting data in a flat_map - (:func:`initialize_filtration()` to be done). - .. note:: The dimension of the simplicial complex may be lower after calling @@ -332,16 +311,6 @@ cdef class SimplexTree: :rtype: bool - .. note:: - - Some simplex tree functions require the filtration to be valid. - prune_above_filtration function is not launching - :func:`initialize_filtration()` - but returns the filtration modification - information. If the complex has changed , please call - :func:`initialize_filtration()` - to recompute it. - .. note:: Note that the dimension of the simplicial complex may be lower @@ -382,17 +351,6 @@ cdef class SimplexTree: :returns: True if any filtration value was modified, False if the filtration was already non-decreasing. :rtype: bool - - - .. note:: - - Some simplex tree functions require the filtration to be valid. - make_filtration_non_decreasing function is not launching - :func:`initialize_filtration()` - but returns the filtration modification - information. If the complex has changed , please call - :func:`initialize_filtration()` - to recompute it. """ return self.get_ptr().make_filtration_non_decreasing() diff --git a/src/python/include/Alpha_complex_interface.h b/src/python/include/Alpha_complex_interface.h index 8614eee3..40de88f3 100644 --- a/src/python/include/Alpha_complex_interface.h +++ b/src/python/include/Alpha_complex_interface.h @@ -58,7 +58,6 @@ class Alpha_complex_interface { void create_simplex_tree(Simplex_tree_interface<>* simplex_tree, double max_alpha_square) { alpha_complex_->create_complex(*simplex_tree, max_alpha_square); - simplex_tree->initialize_filtration(); } private: diff --git a/src/python/include/Euclidean_strong_witness_complex_interface.h b/src/python/include/Euclidean_strong_witness_complex_interface.h index c1c72737..f94c51ef 100644 --- a/src/python/include/Euclidean_strong_witness_complex_interface.h +++ b/src/python/include/Euclidean_strong_witness_complex_interface.h @@ -50,12 +50,10 @@ class Euclidean_strong_witness_complex_interface { void create_simplex_tree(Gudhi::Simplex_tree<>* simplex_tree, double max_alpha_square, std::size_t limit_dimension) { witness_complex_->create_complex(*simplex_tree, max_alpha_square, limit_dimension); - simplex_tree->initialize_filtration(); } void create_simplex_tree(Gudhi::Simplex_tree<>* simplex_tree, double max_alpha_square) { witness_complex_->create_complex(*simplex_tree, max_alpha_square); - simplex_tree->initialize_filtration(); } std::vector get_point(unsigned vh) { diff --git a/src/python/include/Euclidean_witness_complex_interface.h b/src/python/include/Euclidean_witness_complex_interface.h index 5d7dbdc2..4411ae79 100644 --- a/src/python/include/Euclidean_witness_complex_interface.h +++ b/src/python/include/Euclidean_witness_complex_interface.h @@ -49,12 +49,10 @@ class Euclidean_witness_complex_interface { void create_simplex_tree(Gudhi::Simplex_tree<>* simplex_tree, double max_alpha_square, std::size_t limit_dimension) { witness_complex_->create_complex(*simplex_tree, max_alpha_square, limit_dimension); - simplex_tree->initialize_filtration(); } void create_simplex_tree(Gudhi::Simplex_tree<>* simplex_tree, double max_alpha_square) { witness_complex_->create_complex(*simplex_tree, max_alpha_square); - simplex_tree->initialize_filtration(); } std::vector get_point(unsigned vh) { diff --git a/src/python/include/Nerve_gic_interface.h b/src/python/include/Nerve_gic_interface.h index 5e7f8ae6..ab14c318 100644 --- a/src/python/include/Nerve_gic_interface.h +++ b/src/python/include/Nerve_gic_interface.h @@ -29,7 +29,6 @@ class Nerve_gic_interface : public Cover_complex> { public: void create_simplex_tree(Simplex_tree_interface<>* simplex_tree) { create_complex(*simplex_tree); - simplex_tree->initialize_filtration(); } void set_cover_from_Euclidean_Voronoi(int m) { set_cover_from_Voronoi(Gudhi::Euclidean_distance(), m); diff --git a/src/python/include/Rips_complex_interface.h b/src/python/include/Rips_complex_interface.h index a66b0e5b..d98b0226 100644 --- a/src/python/include/Rips_complex_interface.h +++ b/src/python/include/Rips_complex_interface.h @@ -53,7 +53,6 @@ class Rips_complex_interface { rips_complex_->create_complex(*simplex_tree, dim_max); else sparse_rips_complex_->create_complex(*simplex_tree, dim_max); - simplex_tree->initialize_filtration(); } private: diff --git a/src/python/include/Simplex_tree_interface.h b/src/python/include/Simplex_tree_interface.h index 1a18aed6..5b456baa 100644 --- a/src/python/include/Simplex_tree_interface.h +++ b/src/python/include/Simplex_tree_interface.h @@ -43,16 +43,19 @@ class Simplex_tree_interface : public Simplex_tree { Extended_filtration_data efd; - bool find_simplex(const Simplex& vh) { - return (Base::find(vh) != Base::null_simplex()); + bool find_simplex(const Simplex& simplex) { + return (Base::find(simplex) != Base::null_simplex()); } - void assign_simplex_filtration(const Simplex& vh, Filtration_value filtration) { - Base::assign_filtration(Base::find(vh), filtration); + void assign_simplex_filtration(const Simplex& simplex, Filtration_value filtration) { + Base::assign_filtration(Base::find(simplex), filtration); + Base::clear_filtration(); } bool insert(const Simplex& simplex, Filtration_value filtration = 0) { Insertion_result result = Base::insert_simplex_and_subfaces(simplex, filtration); + if (result.first != Base::null_simplex()) + Base::clear_filtration(); return (result.second); } @@ -86,7 +89,7 @@ class Simplex_tree_interface : public Simplex_tree { void remove_maximal_simplex(const Simplex& simplex) { Base::remove_maximal_simplex(Base::find(simplex)); - Base::initialize_filtration(); + Base::clear_filtration(); } Simplex_and_filtration get_simplex_and_filtration(Simplex_handle f_simplex) { @@ -123,7 +126,6 @@ class Simplex_tree_interface : public Simplex_tree { void compute_extended_filtration() { this->efd = this->extend_filtration(); - this->initialize_filtration(); return; } @@ -158,7 +160,6 @@ class Simplex_tree_interface : public Simplex_tree { } void create_persistence(Gudhi::Persistent_cohomology_interface* pcoh) { - Base::initialize_filtration(); pcoh = new Gudhi::Persistent_cohomology_interface(*this); } diff --git a/src/python/include/Strong_witness_complex_interface.h b/src/python/include/Strong_witness_complex_interface.h index cda5b514..e9ab0c7b 100644 --- a/src/python/include/Strong_witness_complex_interface.h +++ b/src/python/include/Strong_witness_complex_interface.h @@ -41,13 +41,11 @@ class Strong_witness_complex_interface { void create_simplex_tree(Simplex_tree_interface<>* simplex_tree, double max_alpha_square, std::size_t limit_dimension) { witness_complex_->create_complex(*simplex_tree, max_alpha_square, limit_dimension); - simplex_tree->initialize_filtration(); } void create_simplex_tree(Simplex_tree_interface<>* simplex_tree, double max_alpha_square) { witness_complex_->create_complex(*simplex_tree, max_alpha_square); - simplex_tree->initialize_filtration(); } private: diff --git a/src/python/include/Tangential_complex_interface.h b/src/python/include/Tangential_complex_interface.h index 698226cc..b1afce94 100644 --- a/src/python/include/Tangential_complex_interface.h +++ b/src/python/include/Tangential_complex_interface.h @@ -90,7 +90,6 @@ class Tangential_complex_interface { void create_simplex_tree(Simplex_tree<>* simplex_tree) { tangential_complex_->create_complex>(*simplex_tree); - simplex_tree->initialize_filtration(); } void set_max_squared_edge_length(double max_squared_edge_length) { diff --git a/src/python/include/Witness_complex_interface.h b/src/python/include/Witness_complex_interface.h index 45e14253..76947e53 100644 --- a/src/python/include/Witness_complex_interface.h +++ b/src/python/include/Witness_complex_interface.h @@ -41,13 +41,11 @@ class Witness_complex_interface { void create_simplex_tree(Simplex_tree_interface<>* simplex_tree, double max_alpha_square, std::size_t limit_dimension) { witness_complex_->create_complex(*simplex_tree, max_alpha_square, limit_dimension); - simplex_tree->initialize_filtration(); } void create_simplex_tree(Simplex_tree_interface<>* simplex_tree, double max_alpha_square) { witness_complex_->create_complex(*simplex_tree, max_alpha_square); - simplex_tree->initialize_filtration(); } private: diff --git a/src/python/test/test_simplex_tree.py b/src/python/test/test_simplex_tree.py index 70b26e97..2137d822 100755 --- a/src/python/test/test_simplex_tree.py +++ b/src/python/test/test_simplex_tree.py @@ -46,7 +46,6 @@ def test_insertion(): assert st.find([2, 3]) == False # filtration test - st.initialize_filtration() assert st.filtration([0, 1, 2]) == 4.0 assert st.filtration([0, 2]) == 4.0 assert st.filtration([1, 2]) == 4.0 @@ -93,7 +92,6 @@ def test_insertion(): assert st.find([1]) == True assert st.find([2]) == True - st.initialize_filtration() assert st.persistence(persistence_dim_max=True) == [ (1, (4.0, float("inf"))), (0, (0.0, float("inf"))), @@ -151,7 +149,6 @@ def test_expansion(): st.expansion(3) assert st.num_vertices() == 7 assert st.num_simplices() == 22 - st.initialize_filtration() assert list(st.get_filtration()) == [ ([2], 0.1), -- cgit v1.2.3 From 9d89f57376e619515d99ad88c2cdeef35daaedd5 Mon Sep 17 00:00:00 2001 From: ROUVREAU Vincent Date: Wed, 1 Apr 2020 09:04:18 +0200 Subject: code review: use operator[] instead of at() --- src/Alpha_complex/include/gudhi/Alpha_complex.h | 14 ++++++++++++-- 1 file changed, 12 insertions(+), 2 deletions(-) diff --git a/src/Alpha_complex/include/gudhi/Alpha_complex.h b/src/Alpha_complex/include/gudhi/Alpha_complex.h index eb4ef427..4369071c 100644 --- a/src/Alpha_complex/include/gudhi/Alpha_complex.h +++ b/src/Alpha_complex/include/gudhi/Alpha_complex.h @@ -248,6 +248,16 @@ class Alpha_complex { } } + /** \brief get_point_ returns the point corresponding to the vertex given as parameter. + * Only for internal use for faster access. + * + * @param[in] vertex Vertex handle of the point to retrieve. + * @return The point found. + */ + const Point_d& get_point_(std::size_t vertex) const { + return vertex_handle_to_iterator_[vertex]->point(); + } + template auto& get_cache(SimplicialComplexForAlpha& cplx, typename SimplicialComplexForAlpha::Simplex_handle s) { auto k = cplx.key(s); @@ -258,7 +268,7 @@ class Alpha_complex { thread_local std::vector v; v.clear(); for (auto vertex : cplx.simplex_vertex_range(s)) - v.push_back(get_point(vertex)); + v.push_back(get_point_(vertex)); Point_d c = kernel_.construct_circumcenter_d_object()(v.cbegin(), v.cend()); typename Kernel::FT r = kernel_.squared_distance_d_object()(c, v[0]); cache_.emplace_back(std::move(c), std::move(r)); @@ -423,7 +433,7 @@ class Alpha_complex { while(shortiter != enditer && *longiter == *shortiter) { ++longiter; ++shortiter; } Vertex_handle extra = *longiter; auto const& cache=get_cache(complex, f_boundary); - bool is_gab = kernel_.squared_distance_d_object()(cache.first, get_point(extra)) >= cache.second; + bool is_gab = kernel_.squared_distance_d_object()(cache.first, get_point_(extra)) >= cache.second; #ifdef DEBUG_TRACES std::clog << " | Tau is_gabriel(Sigma)=" << is_gab << " - vertexForGabriel=" << extra << std::endl; #endif // DEBUG_TRACES -- cgit v1.2.3 From 0b19e1f991fdebbdb622d3101135eaee65c4ed5d Mon Sep 17 00:00:00 2001 From: Marc Glisse Date: Wed, 1 Apr 2020 14:45:37 +0200 Subject: Split the cache per dimension Try to reduce slightly the memory use. --- src/Alpha_complex/include/gudhi/Alpha_complex.h | 33 +++++++++++++++++++------ 1 file changed, 25 insertions(+), 8 deletions(-) diff --git a/src/Alpha_complex/include/gudhi/Alpha_complex.h b/src/Alpha_complex/include/gudhi/Alpha_complex.h index 4369071c..ba91998d 100644 --- a/src/Alpha_complex/include/gudhi/Alpha_complex.h +++ b/src/Alpha_complex/include/gudhi/Alpha_complex.h @@ -112,9 +112,6 @@ class Alpha_complex { typedef typename Kernel::Side_of_bounded_sphere_d Is_Gabriel; typedef typename Kernel::Point_dimension_d Point_Dimension; - // Type required to compute squared radius, or side of bounded sphere on a vector of points. - typedef typename std::vector Vector_of_CGAL_points; - // Vertex_iterator type from CGAL. typedef typename Delaunay_triangulation::Vertex_iterator CGAL_vertex_iterator; @@ -124,6 +121,9 @@ class Alpha_complex { // Structure to switch from simplex tree vertex handle to CGAL vertex iterator. typedef typename std::vector< CGAL_vertex_iterator > Vector_vertex_iterator; + // Numeric type of coordinates in the kernel + typedef typename Kernel::FT FT; + private: /** \brief Vertex iterator vector to switch from simplex tree vertex handle to CGAL vertex iterator. * Vertex handles are inserted sequentially, starting at 0.*/ @@ -133,7 +133,7 @@ class Alpha_complex { /** \brief Kernel for triangulation_ functions access.*/ Kernel kernel_; /** \brief Cache for geometric constructions: circumcenter and squared radius of a simplex.*/ - std::vector> cache_; + std::vector> cache_, old_cache_; public: /** \brief Alpha_complex constructor from an OFF file name. @@ -258,24 +258,39 @@ class Alpha_complex { return vertex_handle_to_iterator_[vertex]->point(); } + /// Return a reference to the circumcenter and circumradius, writing them in the cache if necessary. template auto& get_cache(SimplicialComplexForAlpha& cplx, typename SimplicialComplexForAlpha::Simplex_handle s) { auto k = cplx.key(s); if(k==cplx.null_key()){ k = cache_.size(); cplx.assign_key(s, k); - // Use a transform_range? Check the impact on perf. + // Using a transform_range is slower, currently. thread_local std::vector v; v.clear(); for (auto vertex : cplx.simplex_vertex_range(s)) v.push_back(get_point_(vertex)); Point_d c = kernel_.construct_circumcenter_d_object()(v.cbegin(), v.cend()); - typename Kernel::FT r = kernel_.squared_distance_d_object()(c, v[0]); + FT r = kernel_.squared_distance_d_object()(c, v[0]); cache_.emplace_back(std::move(c), std::move(r)); } return cache_[k]; } + /// Return the circumradius, either from the old cache or computed, without writing to the cache. + template + auto radius(SimplicialComplexForAlpha& cplx, typename SimplicialComplexForAlpha::Simplex_handle s) { + auto k = cplx.key(s); + if(k!=cplx.null_key()) + return old_cache_[k].second; + // Using a transform_range is slower, currently. + thread_local std::vector v; + v.clear(); + for (auto vertex : cplx.simplex_vertex_range(s)) + v.push_back(get_point_(vertex)); + return kernel_.compute_squared_radius_d_object()(v.cbegin(), v.cend()); + } + public: /** \brief Inserts all Delaunay triangulation into the simplicial complex. * It also computes the filtration values accordingly to the \ref createcomplexalgorithm if default_filtration_value @@ -365,11 +380,11 @@ class Alpha_complex { Filtration_value alpha_complex_filtration = 0.0; // No need to compute squared_radius on a single point - alpha is 0.0 if (f_simplex_dim > 0) { - auto const& sqrad = get_cache(complex, f_simplex).second; + auto const& sqrad = radius(complex, f_simplex); #if CGAL_VERSION_NR >= 1050000000 if(exact) CGAL::exact(sqrad); #endif - CGAL::NT_converter cv; + CGAL::NT_converter cv; alpha_complex_filtration = cv(sqrad); } complex.assign_filtration(f_simplex, alpha_complex_filtration); @@ -382,6 +397,8 @@ class Alpha_complex { propagate_alpha_filtration(complex, f_simplex); } } + old_cache_ = std::move(cache_); + cache_.clear(); } // -------------------------------------------------------------------------------------------- -- cgit v1.2.3 From 6acbc89d185d1c537778fb2d4a8503bab61fca31 Mon Sep 17 00:00:00 2001 From: Marc Glisse Date: Fri, 3 Apr 2020 21:04:52 +0200 Subject: Split compute_persistence from get_persistence. --- src/python/gudhi/cubical_complex.pyx | 6 +++-- src/python/gudhi/periodic_cubical_complex.pyx | 6 +++-- src/python/gudhi/simplex_tree.pxd | 3 ++- src/python/gudhi/simplex_tree.pyx | 6 +++-- .../include/Persistent_cohomology_interface.h | 29 ++++++++++++---------- 5 files changed, 30 insertions(+), 20 deletions(-) diff --git a/src/python/gudhi/cubical_complex.pyx b/src/python/gudhi/cubical_complex.pyx index d5ad1266..ce844558 100644 --- a/src/python/gudhi/cubical_complex.pyx +++ b/src/python/gudhi/cubical_complex.pyx @@ -35,7 +35,8 @@ cdef extern from "Cubical_complex_interface.h" namespace "Gudhi": cdef extern from "Persistent_cohomology_interface.h" namespace "Gudhi": cdef cppclass Cubical_complex_persistence_interface "Gudhi::Persistent_cohomology_interface>": Cubical_complex_persistence_interface(Bitmap_cubical_complex_base_interface * st, bool persistence_dim_max) - vector[pair[int, pair[double, double]]] get_persistence(int homology_coeff_field, double min_persistence) + void compute_persistence(int homology_coeff_field, double min_persistence) + vector[pair[int, pair[double, double]]] get_persistence() vector[int] betti_numbers() vector[int] persistent_betti_numbers(double from_value, double to_value) vector[pair[double,double]] intervals_in_dimension(int dimension) @@ -149,7 +150,8 @@ cdef class CubicalComplex: self.pcohptr = new Cubical_complex_persistence_interface(self.thisptr, True) cdef vector[pair[int, pair[double, double]]] persistence_result if self.pcohptr != NULL: - persistence_result = self.pcohptr.get_persistence(homology_coeff_field, min_persistence) + self.pcohptr.compute_persistence(homology_coeff_field, min_persistence) + persistence_result = self.pcohptr.get_persistence() return persistence_result def betti_numbers(self): diff --git a/src/python/gudhi/periodic_cubical_complex.pyx b/src/python/gudhi/periodic_cubical_complex.pyx index fd08b976..ff5ef3bd 100644 --- a/src/python/gudhi/periodic_cubical_complex.pyx +++ b/src/python/gudhi/periodic_cubical_complex.pyx @@ -32,7 +32,8 @@ cdef extern from "Cubical_complex_interface.h" namespace "Gudhi": cdef extern from "Persistent_cohomology_interface.h" namespace "Gudhi": cdef cppclass Periodic_cubical_complex_persistence_interface "Gudhi::Persistent_cohomology_interface>>": Periodic_cubical_complex_persistence_interface(Periodic_cubical_complex_base_interface * st, bool persistence_dim_max) - vector[pair[int, pair[double, double]]] get_persistence(int homology_coeff_field, double min_persistence) + void compute_persistence(int homology_coeff_field, double min_persistence) + vector[pair[int, pair[double, double]]] get_persistence() vector[int] betti_numbers() vector[int] persistent_betti_numbers(double from_value, double to_value) vector[pair[double,double]] intervals_in_dimension(int dimension) @@ -154,7 +155,8 @@ cdef class PeriodicCubicalComplex: self.pcohptr = new Periodic_cubical_complex_persistence_interface(self.thisptr, True) cdef vector[pair[int, pair[double, double]]] persistence_result if self.pcohptr != NULL: - persistence_result = self.pcohptr.get_persistence(homology_coeff_field, min_persistence) + self.pcohptr.compute_persistence(homology_coeff_field, min_persistence) + persistence_result = self.pcohptr.get_persistence() return persistence_result def betti_numbers(self): diff --git a/src/python/gudhi/simplex_tree.pxd b/src/python/gudhi/simplex_tree.pxd index 595f22bb..44040bcb 100644 --- a/src/python/gudhi/simplex_tree.pxd +++ b/src/python/gudhi/simplex_tree.pxd @@ -71,7 +71,8 @@ cdef extern from "Simplex_tree_interface.h" namespace "Gudhi": cdef extern from "Persistent_cohomology_interface.h" namespace "Gudhi": cdef cppclass Simplex_tree_persistence_interface "Gudhi::Persistent_cohomology_interface>": Simplex_tree_persistence_interface(Simplex_tree_interface_full_featured * st, bool persistence_dim_max) - vector[pair[int, pair[double, double]]] get_persistence(int homology_coeff_field, double min_persistence) + void compute_persistence(int homology_coeff_field, double min_persistence) + vector[pair[int, pair[double, double]]] get_persistence() vector[int] betti_numbers() vector[int] persistent_betti_numbers(double from_value, double to_value) vector[pair[double,double]] intervals_in_dimension(int dimension) diff --git a/src/python/gudhi/simplex_tree.pyx b/src/python/gudhi/simplex_tree.pyx index cc3753e1..69e645b4 100644 --- a/src/python/gudhi/simplex_tree.pyx +++ b/src/python/gudhi/simplex_tree.pyx @@ -443,7 +443,8 @@ cdef class SimplexTree: if self.pcohptr != NULL: del self.pcohptr self.pcohptr = new Simplex_tree_persistence_interface(self.get_ptr(), False) - persistence_result = self.pcohptr.get_persistence(homology_coeff_field, -1.) + self.pcohptr.compute_persistence(homology_coeff_field, -1.) + persistence_result = self.pcohptr.get_persistence() return self.get_ptr().compute_extended_persistence_subdiagrams(persistence_result, min_persistence) @@ -470,7 +471,8 @@ cdef class SimplexTree: self.pcohptr = new Simplex_tree_persistence_interface(self.get_ptr(), persistence_dim_max) cdef vector[pair[int, pair[double, double]]] persistence_result if self.pcohptr != NULL: - persistence_result = self.pcohptr.get_persistence(homology_coeff_field, min_persistence) + self.pcohptr.compute_persistence(homology_coeff_field, min_persistence) + persistence_result = self.pcohptr.get_persistence() return persistence_result def betti_numbers(self): diff --git a/src/python/include/Persistent_cohomology_interface.h b/src/python/include/Persistent_cohomology_interface.h index 8c79e6f3..a29ebbee 100644 --- a/src/python/include/Persistent_cohomology_interface.h +++ b/src/python/include/Persistent_cohomology_interface.h @@ -23,6 +23,7 @@ template class Persistent_cohomology_interface : public persistent_cohomology::Persistent_cohomology { private: + typedef persistent_cohomology::Persistent_cohomology Base; /* * Compare two intervals by dimension, then by length. */ @@ -43,25 +44,28 @@ persistent_cohomology::Persistent_cohomology(*stptr), + : Base(*stptr), stptr_(stptr) { } Persistent_cohomology_interface(FilteredComplex* stptr, bool persistence_dim_max) - : persistent_cohomology::Persistent_cohomology(*stptr, persistence_dim_max), + : Base(*stptr, persistence_dim_max), stptr_(stptr) { } - std::vector>> get_persistence(int homology_coeff_field, - double min_persistence) { - persistent_cohomology::Persistent_cohomology::init_coefficients(homology_coeff_field); - persistent_cohomology::Persistent_cohomology::compute_persistent_cohomology(min_persistence); + void compute_persistence(int homology_coeff_field, double min_persistence) { + Base::init_coefficients(homology_coeff_field); + Base::compute_persistent_cohomology(min_persistence); + } + + void maybe_compute_persistence(int homology_coeff_field, double min_persistence) { + // Currently get_persistent_pairs safely returns an empty vector before compute_persistent_cohomology + if(Base::get_persistent_pairs().empty()) + compute_persistence(homology_coeff_field, min_persistence); + } + std::vector>> get_persistence() { // Custom sort and output persistence cmp_intervals_by_dim_then_length cmp(stptr_); - auto persistent_pairs = persistent_cohomology::Persistent_cohomology::get_persistent_pairs(); + auto persistent_pairs = Base::get_persistent_pairs(); std::sort(std::begin(persistent_pairs), std::end(persistent_pairs), cmp); std::vector>> persistence; @@ -74,8 +78,7 @@ persistent_cohomology::Persistent_cohomology, std::vector>> persistence_pairs() { - auto pairs = persistent_cohomology::Persistent_cohomology::get_persistent_pairs(); + auto pairs = Base::get_persistent_pairs(); std::vector, std::vector>> persistence_pairs; persistence_pairs.reserve(pairs.size()); -- cgit v1.2.3 From 7830d93607257fd75f09b371e88741a517347579 Mon Sep 17 00:00:00 2001 From: Marc Glisse Date: Fri, 3 Apr 2020 21:11:57 +0200 Subject: Dead code --- src/python/include/Simplex_tree_interface.h | 7 ------- 1 file changed, 7 deletions(-) diff --git a/src/python/include/Simplex_tree_interface.h b/src/python/include/Simplex_tree_interface.h index 1a18aed6..27b123f8 100644 --- a/src/python/include/Simplex_tree_interface.h +++ b/src/python/include/Simplex_tree_interface.h @@ -16,8 +16,6 @@ #include #include -#include "Persistent_cohomology_interface.h" - #include #include #include // std::pair @@ -157,11 +155,6 @@ class Simplex_tree_interface : public Simplex_tree { return new_dgm; } - void create_persistence(Gudhi::Persistent_cohomology_interface* pcoh) { - Base::initialize_filtration(); - pcoh = new Gudhi::Persistent_cohomology_interface(*this); - } - // Iterator over the simplex tree Complex_simplex_iterator get_simplices_iterator_begin() { // this specific case works because the range is just a pair of iterators - won't work if range was a vector -- cgit v1.2.3 From b2cfc0691147ca122861bc423d41495c4b444dde Mon Sep 17 00:00:00 2001 From: Marc Glisse Date: Fri, 3 Apr 2020 21:27:01 +0200 Subject: Simplify some code --- src/python/gudhi/simplex_tree.pyx | 9 +++------ 1 file changed, 3 insertions(+), 6 deletions(-) diff --git a/src/python/gudhi/simplex_tree.pyx b/src/python/gudhi/simplex_tree.pyx index 69e645b4..d8bd0b79 100644 --- a/src/python/gudhi/simplex_tree.pyx +++ b/src/python/gudhi/simplex_tree.pyx @@ -413,7 +413,7 @@ cdef class SimplexTree: Note that this code creates an extra vertex internally, so you should make sure that the Simplex_tree does not contain a vertex with the largest possible value (i.e., 4294967295). """ - return self.get_ptr().compute_extended_filtration() + self.get_ptr().compute_extended_filtration() def extended_persistence(self, homology_coeff_field=11, min_persistence=0): """This function retrieves good values for extended persistence, and separate the diagrams @@ -469,11 +469,8 @@ cdef class SimplexTree: if self.pcohptr != NULL: del self.pcohptr self.pcohptr = new Simplex_tree_persistence_interface(self.get_ptr(), persistence_dim_max) - cdef vector[pair[int, pair[double, double]]] persistence_result - if self.pcohptr != NULL: - self.pcohptr.compute_persistence(homology_coeff_field, min_persistence) - persistence_result = self.pcohptr.get_persistence() - return persistence_result + self.pcohptr.compute_persistence(homology_coeff_field, min_persistence) + return self.pcohptr.get_persistence() def betti_numbers(self): """This function returns the Betti numbers of the simplicial complex. -- cgit v1.2.3 From 3ca13b31e5f48fbaef2ba7db980643716c18725c Mon Sep 17 00:00:00 2001 From: Marc Glisse Date: Sun, 5 Apr 2020 00:35:23 +0200 Subject: compute_persistence in python Also simplify references, and replace print with assert for errors --- src/python/gudhi/simplex_tree.pyx | 105 ++++++++++----------- .../include/Persistent_cohomology_interface.h | 13 +-- 2 files changed, 52 insertions(+), 66 deletions(-) diff --git a/src/python/gudhi/simplex_tree.pyx b/src/python/gudhi/simplex_tree.pyx index d8bd0b79..c34a64e6 100644 --- a/src/python/gudhi/simplex_tree.pyx +++ b/src/python/gudhi/simplex_tree.pyx @@ -139,9 +139,9 @@ cdef class SimplexTree: This function is not constant time because it can recompute dimension if required (can be triggered by - :func:`remove_maximal_simplex()` + :func:`remove_maximal_simplex` or - :func:`prune_above_filtration()` + :func:`prune_above_filtration` methods). """ return self.get_ptr().dimension() @@ -166,9 +166,9 @@ cdef class SimplexTree: This function must be used with caution because it disables dimension recomputation when required (this recomputation can be triggered by - :func:`remove_maximal_simplex()` + :func:`remove_maximal_simplex` or - :func:`prune_above_filtration()` + :func:`prune_above_filtration` ). """ self.get_ptr().set_dimension(dimension) @@ -315,10 +315,10 @@ cdef class SimplexTree: The dimension of the simplicial complex may be lower after calling remove_maximal_simplex than it was before. However, - :func:`upper_bound_dimension()` + :func:`upper_bound_dimension` method will return the old value, which remains a valid upper bound. If you care, you can call - :func:`dimension()` + :func:`dimension` to recompute the exact dimension. """ self.get_ptr().remove_maximal_simplex(simplex) @@ -346,12 +346,12 @@ cdef class SimplexTree: Note that the dimension of the simplicial complex may be lower after calling - :func:`prune_above_filtration()` + :func:`prune_above_filtration` than it was before. However, - :func:`upper_bound_dimension()` + :func:`upper_bound_dimension` will return the old value, which remains a valid upper bound. If you care, you can call - :func:`dimension()` + :func:`dimension` method to recompute the exact dimension. """ return self.get_ptr().prune_above_filtration(filtration) @@ -405,7 +405,7 @@ cdef class SimplexTree: Note that after calling this function, the filtration values are actually modified within the Simplex_tree. - The function :func:`extended_persistence()` + The function :func:`extended_persistence` retrieves the original values. .. note:: @@ -427,11 +427,11 @@ cdef class SimplexTree: 0.0. Sets min_persistence to -1.0 to see all values. :type min_persistence: float. - :returns: A list of four persistence diagrams in the format described in :func:`persistence()`. The first one is Ordinary, the second one is Relative, the third one is Extended+ and the fourth one is Extended-. See https://link.springer.com/article/10.1007/s10208-008-9027-z and/or section 2.2 in https://link.springer.com/article/10.1007/s10208-017-9370-z for a description of these subtypes. + :returns: A list of four persistence diagrams in the format described in :func:`persistence`. The first one is Ordinary, the second one is Relative, the third one is Extended+ and the fourth one is Extended-. See https://link.springer.com/article/10.1007/s10208-008-9027-z and/or section 2.2 in https://link.springer.com/article/10.1007/s10208-017-9370-z for a description of these subtypes. .. note:: - This function should be called only if :func:`extend_filtration()` has been called first! + This function should be called only if :func:`extend_filtration` has been called first! .. note:: @@ -466,11 +466,32 @@ cdef class SimplexTree: :returns: The persistence of the simplicial complex. :rtype: list of pairs(dimension, pair(birth, death)) """ + self.compute_persistence(homology_coeff_field, min_persistence, persistence_dim_max) + return self.pcohptr.get_persistence() + + def compute_persistence(self, homology_coeff_field=11, min_persistence=0, persistence_dim_max = False): + """This function computes the persistence of the simplicial complex, so it can be accessed through + :func:`persistent_betti_numbers`, :func:`persistence_pairs`, etc. This function is equivalent to :func:`persistence` + when you do not want the list :func:`persistence` returns. + + :param homology_coeff_field: The homology coefficient field. Must be a + prime number. Default value is 11. + :type homology_coeff_field: int. + :param min_persistence: The minimum persistence value to take into + account (strictly greater than min_persistence). Default value is + 0.0. + Sets min_persistence to -1.0 to see all values. + :type min_persistence: float. + :param persistence_dim_max: If true, the persistent homology for the + maximal dimension in the complex is computed. If false, it is + ignored. Default is false. + :type persistence_dim_max: bool + :returns: Nothing. + """ if self.pcohptr != NULL: del self.pcohptr self.pcohptr = new Simplex_tree_persistence_interface(self.get_ptr(), persistence_dim_max) self.pcohptr.compute_persistence(homology_coeff_field, min_persistence) - return self.pcohptr.get_persistence() def betti_numbers(self): """This function returns the Betti numbers of the simplicial complex. @@ -479,16 +500,11 @@ cdef class SimplexTree: :rtype: list of int :note: betti_numbers function requires - :func:`persistence()` + :func:`compute_persistence` function to be launched first. """ - cdef vector[int] bn_result - if self.pcohptr != NULL: - bn_result = self.pcohptr.betti_numbers() - else: - print("betti_numbers function requires persistence function" - " to be launched first.") - return bn_result + assert self.pcohptr != NULL, "compute_persistence() must be called before betti_numbers()" + return self.pcohptr.betti_numbers() def persistent_betti_numbers(self, from_value, to_value): """This function returns the persistent Betti numbers of the @@ -505,16 +521,11 @@ cdef class SimplexTree: :rtype: list of int :note: persistent_betti_numbers function requires - :func:`persistence()` + :func:`compute_persistence` function to be launched first. """ - cdef vector[int] pbn_result - if self.pcohptr != NULL: - pbn_result = self.pcohptr.persistent_betti_numbers(from_value, to_value) - else: - print("persistent_betti_numbers function requires persistence function" - " to be launched first.") - return pbn_result + assert self.pcohptr != NULL, "compute_persistence() must be called before persistent_betti_numbers()" + return self.pcohptr.persistent_betti_numbers(from_value, to_value) def persistence_intervals_in_dimension(self, dimension): """This function returns the persistence intervals of the simplicial @@ -526,16 +537,11 @@ cdef class SimplexTree: :rtype: numpy array of dimension 2 :note: intervals_in_dim function requires - :func:`persistence()` + :func:`compute_persistence` function to be launched first. """ - cdef vector[pair[double,double]] intervals_result - if self.pcohptr != NULL: - intervals_result = self.pcohptr.intervals_in_dimension(dimension) - else: - print("intervals_in_dim function requires persistence function" - " to be launched first.") - return np_array(intervals_result) + assert self.pcohptr != NULL, "compute_persistence() must be called before persistence_intervals_in_dimension()" + return np_array(self.pcohptr.intervals_in_dimension(dimension)) def persistence_pairs(self): """This function returns a list of persistence birth and death simplices pairs. @@ -544,18 +550,13 @@ cdef class SimplexTree: :rtype: list of pair of list of int :note: persistence_pairs function requires - :func:`persistence()` + :func:`compute_persistence` function to be launched first. """ - cdef vector[pair[vector[int],vector[int]]] persistence_pairs_result - if self.pcohptr != NULL: - persistence_pairs_result = self.pcohptr.persistence_pairs() - else: - print("persistence_pairs function requires persistence function" - " to be launched first.") - return persistence_pairs_result + assert self.pcohptr != NULL, "compute_persistence() must be called before persistence_pairs()" + return self.pcohptr.persistence_pairs() - def write_persistence_diagram(self, persistence_file=''): + def write_persistence_diagram(self, persistence_file): """This function writes the persistence intervals of the simplicial complex in a user given file name. @@ -563,14 +564,8 @@ cdef class SimplexTree: :type persistence_file: string. :note: intervals_in_dim function requires - :func:`persistence()` + :func:`compute_persistence` function to be launched first. """ - if self.pcohptr != NULL: - if persistence_file != '': - self.pcohptr.write_output_diagram(persistence_file.encode('utf-8')) - else: - print("persistence_file must be specified") - else: - print("intervals_in_dim function requires persistence function" - " to be launched first.") + assert self.pcohptr != NULL, "compute_persistence() must be called before write_persistence_diagram()" + self.pcohptr.write_output_diagram(persistence_file.encode('utf-8')) diff --git a/src/python/include/Persistent_cohomology_interface.h b/src/python/include/Persistent_cohomology_interface.h index a29ebbee..e2b69a52 100644 --- a/src/python/include/Persistent_cohomology_interface.h +++ b/src/python/include/Persistent_cohomology_interface.h @@ -43,25 +43,16 @@ persistent_cohomology::Persistent_cohomology>> get_persistence() { // Custom sort and output persistence cmp_intervals_by_dim_then_length cmp(stptr_); -- cgit v1.2.3 From 73a40006dad55b0a9ce6ca270e566ce91efe6af4 Mon Sep 17 00:00:00 2001 From: Marc Glisse Date: Sun, 5 Apr 2020 12:27:15 +0200 Subject: Proper exception in write_output_diagram --- src/Persistent_cohomology/include/gudhi/Persistent_cohomology.h | 1 + src/python/gudhi/simplex_tree.pxd | 2 +- src/python/gudhi/simplex_tree.pyx | 2 +- 3 files changed, 3 insertions(+), 2 deletions(-) diff --git a/src/Persistent_cohomology/include/gudhi/Persistent_cohomology.h b/src/Persistent_cohomology/include/gudhi/Persistent_cohomology.h index ca4bc10d..5e41edb4 100644 --- a/src/Persistent_cohomology/include/gudhi/Persistent_cohomology.h +++ b/src/Persistent_cohomology/include/gudhi/Persistent_cohomology.h @@ -571,6 +571,7 @@ class Persistent_cohomology { void write_output_diagram(std::string diagram_name) { std::ofstream diagram_out(diagram_name.c_str()); + diagram_out.exceptions(diagram_out.failbit); cmp_intervals_by_length cmp(cpx_); std::sort(std::begin(persistent_pairs_), std::end(persistent_pairs_), cmp); bool has_infinity = std::numeric_limits::has_infinity; diff --git a/src/python/gudhi/simplex_tree.pxd b/src/python/gudhi/simplex_tree.pxd index 44040bcb..c46b36ba 100644 --- a/src/python/gudhi/simplex_tree.pxd +++ b/src/python/gudhi/simplex_tree.pxd @@ -76,5 +76,5 @@ cdef extern from "Persistent_cohomology_interface.h" namespace "Gudhi": vector[int] betti_numbers() vector[int] persistent_betti_numbers(double from_value, double to_value) vector[pair[double,double]] intervals_in_dimension(int dimension) - void write_output_diagram(string diagram_file_name) + void write_output_diagram(string diagram_file_name) except + vector[pair[vector[int], vector[int]]] persistence_pairs() diff --git a/src/python/gudhi/simplex_tree.pyx b/src/python/gudhi/simplex_tree.pyx index c34a64e6..7728ebfc 100644 --- a/src/python/gudhi/simplex_tree.pyx +++ b/src/python/gudhi/simplex_tree.pyx @@ -449,7 +449,7 @@ cdef class SimplexTree: def persistence(self, homology_coeff_field=11, min_persistence=0, persistence_dim_max = False): - """This function returns the persistence of the simplicial complex. + """This function computes and returns the persistence of the simplicial complex. :param homology_coeff_field: The homology coefficient field. Must be a prime number. Default value is 11. -- cgit v1.2.3 From 5ce1ee8976ced78de839ef629522c95324b2fabd Mon Sep 17 00:00:00 2001 From: yuichi-ike Date: Mon, 6 Apr 2020 16:25:27 +0900 Subject: weighted rips added --- src/python/CMakeLists.txt | 3 +++ src/python/gudhi/weighted_rips_complex.py | 41 +++++++++++++++++++++++++++++++ 2 files changed, 44 insertions(+) create mode 100644 src/python/gudhi/weighted_rips_complex.py diff --git a/src/python/CMakeLists.txt b/src/python/CMakeLists.txt index d7a6a4db..cac4553a 100644 --- a/src/python/CMakeLists.txt +++ b/src/python/CMakeLists.txt @@ -415,6 +415,9 @@ if(PYTHONINTERP_FOUND) add_gudhi_py_test(test_dtm) endif() + # Weighted Rips + add_gudhi_py_test(test_weighted_rips) + # Documentation generation is available through sphinx - requires all modules if(SPHINX_PATH) if(MATPLOTLIB_FOUND) diff --git a/src/python/gudhi/weighted_rips_complex.py b/src/python/gudhi/weighted_rips_complex.py new file mode 100644 index 00000000..34a627cb --- /dev/null +++ b/src/python/gudhi/weighted_rips_complex.py @@ -0,0 +1,41 @@ +# This file is part of the Gudhi Library - https://gudhi.inria.fr/ - which is released under MIT. +# See file LICENSE or go to https://gudhi.inria.fr/licensing/ for full license details. +# Author(s): Raphaël Tinarrage and Yuichi Ike +# +# Copyright (C) 2020 Inria, Copyright (C) 2020 FUjitsu Laboratories Ltd. +# +# Modification(s): +# - YYYY/MM Author: Description of the modification + +from gudhi import SimplexTree + +class WeightedRipsComplex: + """ + class to generate a weighted Rips complex + from a distance matrix and filtration value + """ + def __init__(self, + distance_matrix=None, + filtration_values=None, + max_filtration=float('inf'), sparse=None): + self.distance_matrix = distance_matrix + self.filtration_values = filtration_values + self.max_filtration = max_filtration + + def create_simplex_tree(self, max_dimension): + dist = self.distance_matrix + F = self.filtration_values + num_pts = len(dist) + + st = SimplexTree() + + for i in range(num_pts): + if F[i] < self.max_filtration: + st.insert([i], F[i]) + for i in range(num_pts): + for j in range(num_pts): + value = (dist[i][j] + F[i] + F[j]) / 2 + if value < self.max_filtration: + st.insert([i,j], filtration=value) + return st + -- cgit v1.2.3 From fadeb80b46001779e2a998941a02195921b03124 Mon Sep 17 00:00:00 2001 From: yuichi-ike Date: Mon, 6 Apr 2020 16:31:59 +0900 Subject: test_weighted_rips added --- src/python/test/test_weighted_rips.py | 27 +++++++++++++++++++++++++++ 1 file changed, 27 insertions(+) create mode 100644 src/python/test/test_weighted_rips.py diff --git a/src/python/test/test_weighted_rips.py b/src/python/test/test_weighted_rips.py new file mode 100644 index 00000000..f0db6798 --- /dev/null +++ b/src/python/test/test_weighted_rips.py @@ -0,0 +1,27 @@ +""" This file is part of the Gudhi Library - https://gudhi.inria.fr/ - which is released under MIT. + See file LICENSE or go to https://gudhi.inria.fr/licensing/ for full license details. + Author(s): Yuichi Ike + + Copyright (C) 2020 Inria + + Modification(s): + - YYYY/MM Author: Description of the modification +""" + +from gudhi.weighted_rips_complex import WeightedRipsComplex +from gudhi.point_cloud.dtm import DTM +import numpy +from scipy.spatial.distance import cdist +import pytest + +def test_dtm_rips_complex(): + pts = numpy.array([[2.0, 2], [0, 1], [3, 4]]) + dist = cdist(pts,pts) + dtm = DTM(2, q=2, metric="precomputed") + r = dtm.fit_transform(dist) + w_rips = WeightedRipsComplex(distance_mattix=dist, filtration_values=r) + st = w_rips.create_simplex_tree(max_dimension=2) + diag = st.persistence() + assert diag == [(0, (1.5811388300841898, float("inf"))), (0, (1.5811388300841898, 2.699172818834085)), (0, (1.5811388300841898, 2.699172818834085))] + + \ No newline at end of file -- cgit v1.2.3 From 5737c5e1e89cc4c939a784742f25b26ca163332d Mon Sep 17 00:00:00 2001 From: yuichi-ike Date: Mon, 6 Apr 2020 16:43:55 +0900 Subject: comments added --- src/python/gudhi/weighted_rips_complex.py | 16 +++++++++++++++- 1 file changed, 15 insertions(+), 1 deletion(-) diff --git a/src/python/gudhi/weighted_rips_complex.py b/src/python/gudhi/weighted_rips_complex.py index 34a627cb..84e8e38e 100644 --- a/src/python/gudhi/weighted_rips_complex.py +++ b/src/python/gudhi/weighted_rips_complex.py @@ -17,12 +17,26 @@ class WeightedRipsComplex: def __init__(self, distance_matrix=None, filtration_values=None, - max_filtration=float('inf'), sparse=None): + max_filtration=float('inf')): + """ + Parameters: + distance_matrix: list of list of float, + distance matrix (full square or lower triangular) + filtration_values: list of float, + flitration value for each index + max_filtration: float, + specifies the maximal filtration value to be considered + """ self.distance_matrix = distance_matrix self.filtration_values = filtration_values self.max_filtration = max_filtration def create_simplex_tree(self, max_dimension): + """ + Parameter: + max_dimension: int + graph expansion until this given dimension + """ dist = self.distance_matrix F = self.filtration_values num_pts = len(dist) -- cgit v1.2.3 From 15586d479be885319dde6f703c3126176b796732 Mon Sep 17 00:00:00 2001 From: yuichi-ike Date: Mon, 6 Apr 2020 16:48:21 +0900 Subject: bug fixed --- src/python/gudhi/weighted_rips_complex.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/src/python/gudhi/weighted_rips_complex.py b/src/python/gudhi/weighted_rips_complex.py index 84e8e38e..7d14ac65 100644 --- a/src/python/gudhi/weighted_rips_complex.py +++ b/src/python/gudhi/weighted_rips_complex.py @@ -51,5 +51,7 @@ class WeightedRipsComplex: value = (dist[i][j] + F[i] + F[j]) / 2 if value < self.max_filtration: st.insert([i,j], filtration=value) + + st.expansion(max_dimension) return st -- cgit v1.2.3 From a4fa5f673784a842e9fac13003c843d454c888a4 Mon Sep 17 00:00:00 2001 From: yuichi-ike Date: Mon, 6 Apr 2020 21:19:55 +0900 Subject: bug fixed, parameter name changed --- src/python/CMakeLists.txt | 2 ++ src/python/gudhi/weighted_rips_complex.py | 19 +++++++++++-------- src/python/test/test_weighted_rips.py | 13 ++++++------- 3 files changed, 19 insertions(+), 15 deletions(-) diff --git a/src/python/CMakeLists.txt b/src/python/CMakeLists.txt index cac4553a..4b87ed9b 100644 --- a/src/python/CMakeLists.txt +++ b/src/python/CMakeLists.txt @@ -57,6 +57,7 @@ if(PYTHONINTERP_FOUND) set(GUDHI_PYTHON_MODULES_EXTRA "${GUDHI_PYTHON_MODULES_EXTRA}'representations', ") set(GUDHI_PYTHON_MODULES_EXTRA "${GUDHI_PYTHON_MODULES_EXTRA}'wasserstein', ") set(GUDHI_PYTHON_MODULES_EXTRA "${GUDHI_PYTHON_MODULES_EXTRA}'point_cloud', ") + set(GUDHI_PYTHON_MODULES_EXTRA "${GUDHI_PYTHON_MODULES_EXTRA}'weighted_rips_complex', ") add_gudhi_debug_info("Python version ${PYTHON_VERSION_STRING}") add_gudhi_debug_info("Cython version ${CYTHON_VERSION}") @@ -228,6 +229,7 @@ if(PYTHONINTERP_FOUND) file(COPY "gudhi/representations" DESTINATION "${CMAKE_CURRENT_BINARY_DIR}/gudhi/") file(COPY "gudhi/wasserstein.py" DESTINATION "${CMAKE_CURRENT_BINARY_DIR}/gudhi") file(COPY "gudhi/point_cloud" DESTINATION "${CMAKE_CURRENT_BINARY_DIR}/gudhi") + file(COPY "gudhi/weighted_rips_complex.py" DESTINATION "${CMAKE_CURRENT_BINARY_DIR}/gudhi") add_custom_command( OUTPUT gudhi.so diff --git a/src/python/gudhi/weighted_rips_complex.py b/src/python/gudhi/weighted_rips_complex.py index 7d14ac65..9df2ddf9 100644 --- a/src/python/gudhi/weighted_rips_complex.py +++ b/src/python/gudhi/weighted_rips_complex.py @@ -1,6 +1,6 @@ # This file is part of the Gudhi Library - https://gudhi.inria.fr/ - which is released under MIT. # See file LICENSE or go to https://gudhi.inria.fr/licensing/ for full license details. -# Author(s): Raphaël Tinarrage and Yuichi Ike +# Author(s): Raphaël Tinarrage, Yuichi Ike, Masatoshi Takenouchi # # Copyright (C) 2020 Inria, Copyright (C) 2020 FUjitsu Laboratories Ltd. # @@ -12,23 +12,26 @@ from gudhi import SimplexTree class WeightedRipsComplex: """ class to generate a weighted Rips complex - from a distance matrix and filtration value + from a distance matrix and weights on vertices """ def __init__(self, - distance_matrix=None, - filtration_values=None, + distance_matrix, + weights=None, max_filtration=float('inf')): """ Parameters: distance_matrix: list of list of float, distance matrix (full square or lower triangular) filtration_values: list of float, - flitration value for each index + weight for each vertex max_filtration: float, specifies the maximal filtration value to be considered """ self.distance_matrix = distance_matrix - self.filtration_values = filtration_values + if weights is not None: + self.weights = weights + else: + self.weights = [0] * len(distance_matrix) self.max_filtration = max_filtration def create_simplex_tree(self, max_dimension): @@ -38,7 +41,7 @@ class WeightedRipsComplex: graph expansion until this given dimension """ dist = self.distance_matrix - F = self.filtration_values + F = self.weights num_pts = len(dist) st = SimplexTree() @@ -47,7 +50,7 @@ class WeightedRipsComplex: if F[i] < self.max_filtration: st.insert([i], F[i]) for i in range(num_pts): - for j in range(num_pts): + for j in range(i): value = (dist[i][j] + F[i] + F[j]) / 2 if value < self.max_filtration: st.insert([i,j], filtration=value) diff --git a/src/python/test/test_weighted_rips.py b/src/python/test/test_weighted_rips.py index f0db6798..7896fb78 100644 --- a/src/python/test/test_weighted_rips.py +++ b/src/python/test/test_weighted_rips.py @@ -1,6 +1,6 @@ """ This file is part of the Gudhi Library - https://gudhi.inria.fr/ - which is released under MIT. See file LICENSE or go to https://gudhi.inria.fr/licensing/ for full license details. - Author(s): Yuichi Ike + Author(s): Yuichi Ike and Masatoshi Takenouchi Copyright (C) 2020 Inria @@ -10,18 +10,17 @@ from gudhi.weighted_rips_complex import WeightedRipsComplex from gudhi.point_cloud.dtm import DTM -import numpy +import numpy as np from scipy.spatial.distance import cdist import pytest def test_dtm_rips_complex(): - pts = numpy.array([[2.0, 2], [0, 1], [3, 4]]) + pts = np.array([[2.0, 2], [0, 1], [3, 4]]) dist = cdist(pts,pts) dtm = DTM(2, q=2, metric="precomputed") r = dtm.fit_transform(dist) - w_rips = WeightedRipsComplex(distance_mattix=dist, filtration_values=r) + w_rips = WeightedRipsComplex(distance_mattix=dist, weights=r) st = w_rips.create_simplex_tree(max_dimension=2) - diag = st.persistence() - assert diag == [(0, (1.5811388300841898, float("inf"))), (0, (1.5811388300841898, 2.699172818834085)), (0, (1.5811388300841898, 2.699172818834085))] + persistence_intervals0 = st.persistence_intervals_in_dimension(0) + assert persistence_intervals0 == pytest.approx(np.array([[1.58113883, 2.69917282],[1.58113883, 2.69917282], [1.58113883, float("inf")]])) - \ No newline at end of file -- cgit v1.2.3 From 5eaca3ed69c564a6f44e6ff21ac33e2cc576bafa Mon Sep 17 00:00:00 2001 From: Marc Glisse Date: Mon, 6 Apr 2020 15:58:10 +0200 Subject: compute_persistence for cubical --- src/python/gudhi/cubical_complex.pyx | 63 ++++++++++++++------------ src/python/gudhi/periodic_cubical_complex.pyx | 65 +++++++++++++++------------ 2 files changed, 71 insertions(+), 57 deletions(-) diff --git a/src/python/gudhi/cubical_complex.pyx b/src/python/gudhi/cubical_complex.pyx index ce844558..007abcb6 100644 --- a/src/python/gudhi/cubical_complex.pyx +++ b/src/python/gudhi/cubical_complex.pyx @@ -130,8 +130,31 @@ cdef class CubicalComplex: """ return self.thisptr.dimension() + def compute_persistence(self, homology_coeff_field=11, min_persistence=0): + """This function computes the persistence of the complex, so it can be + accessed through :func:`persistent_betti_numbers`, + :func:`persistence_intervals_in_dimension`, etc. This function is + equivalent to :func:`persistence` when you do not want the list + :func:`persistence` returns. + + :param homology_coeff_field: The homology coefficient field. Must be a + prime number + :type homology_coeff_field: int. + :param min_persistence: The minimum persistence value to take into + account (strictly greater than min_persistence). Default value is + 0.0. + Sets min_persistence to -1.0 to see all values. + :type min_persistence: float. + :returns: Nothing. + """ + if self.pcohptr != NULL: + del self.pcohptr + assert self.__is_defined() + self.pcohptr = new Cubical_complex_persistence_interface(self.thisptr, True) + self.pcohptr.compute_persistence(homology_coeff_field, min_persistence) + def persistence(self, homology_coeff_field=11, min_persistence=0): - """This function returns the persistence of the complex. + """This function computes and returns the persistence of the complex. :param homology_coeff_field: The homology coefficient field. Must be a prime number @@ -144,31 +167,22 @@ cdef class CubicalComplex: :returns: list of pairs(dimension, pair(birth, death)) -- the persistence of the complex. """ - if self.pcohptr != NULL: - del self.pcohptr - if self.thisptr != NULL: - self.pcohptr = new Cubical_complex_persistence_interface(self.thisptr, True) - cdef vector[pair[int, pair[double, double]]] persistence_result - if self.pcohptr != NULL: - self.pcohptr.compute_persistence(homology_coeff_field, min_persistence) - persistence_result = self.pcohptr.get_persistence() - return persistence_result + self.compute_persistence(homology_coeff_field, min_persistence) + return self.pcohptr.get_persistence() def betti_numbers(self): """This function returns the Betti numbers of the complex. :returns: list of int -- The Betti numbers ([B0, B1, ..., Bn]). - :note: betti_numbers function requires persistence function to be + :note: betti_numbers function requires :func:`compute_persistence` function to be launched first. :note: betti_numbers function always returns [1, 0, 0, ...] as infinity filtration cubes are not removed from the complex. """ - cdef vector[int] bn_result - if self.pcohptr != NULL: - bn_result = self.pcohptr.betti_numbers() - return bn_result + assert self.pcohptr != NULL, "compute_persistence() must be called before betti_numbers()" + return self.pcohptr.betti_numbers() def persistent_betti_numbers(self, from_value, to_value): """This function returns the persistent Betti numbers of the complex. @@ -183,13 +197,11 @@ cdef class CubicalComplex: :returns: list of int -- The persistent Betti numbers ([B0, B1, ..., Bn]). - :note: persistent_betti_numbers function requires persistence + :note: persistent_betti_numbers function requires :func:`compute_persistence` function to be launched first. """ - cdef vector[int] pbn_result - if self.pcohptr != NULL: - pbn_result = self.pcohptr.persistent_betti_numbers(from_value, to_value) - return pbn_result + assert self.pcohptr != NULL, "compute_persistence() must be called before persistent_betti_numbers()" + return self.pcohptr.persistent_betti_numbers(from_value, to_value) def persistence_intervals_in_dimension(self, dimension): """This function returns the persistence intervals of the complex in a @@ -200,13 +212,8 @@ cdef class CubicalComplex: :returns: The persistence intervals. :rtype: numpy array of dimension 2 - :note: intervals_in_dim function requires persistence function to be + :note: intervals_in_dim function requires :func:`compute_persistence` function to be launched first. """ - cdef vector[pair[double,double]] intervals_result - if self.pcohptr != NULL: - intervals_result = self.pcohptr.intervals_in_dimension(dimension) - else: - print("intervals_in_dim function requires persistence function" - " to be launched first.", file=sys.stderr) - return np.array(intervals_result) + assert self.pcohptr != NULL, "compute_persistence() must be called before persistence_intervals_in_dimension()" + return np.array(self.pcohptr.intervals_in_dimension(dimension)) diff --git a/src/python/gudhi/periodic_cubical_complex.pyx b/src/python/gudhi/periodic_cubical_complex.pyx index ff5ef3bd..246a3a02 100644 --- a/src/python/gudhi/periodic_cubical_complex.pyx +++ b/src/python/gudhi/periodic_cubical_complex.pyx @@ -135,8 +135,31 @@ cdef class PeriodicCubicalComplex: """ return self.thisptr.dimension() + def compute_persistence(self, homology_coeff_field=11, min_persistence=0): + """This function computes the persistence of the complex, so it can be + accessed through :func:`persistent_betti_numbers`, + :func:`persistence_intervals_in_dimension`, etc. This function is + equivalent to :func:`persistence` when you do not want the list + :func:`persistence` returns. + + :param homology_coeff_field: The homology coefficient field. Must be a + prime number + :type homology_coeff_field: int. + :param min_persistence: The minimum persistence value to take into + account (strictly greater than min_persistence). Default value is + 0.0. + Sets min_persistence to -1.0 to see all values. + :type min_persistence: float. + :returns: Nothing. + """ + if self.pcohptr != NULL: + del self.pcohptr + assert self.__is_defined() + self.pcohptr = new Periodic_cubical_complex_persistence_interface(self.thisptr, True) + self.pcohptr.compute_persistence(homology_coeff_field, min_persistence) + def persistence(self, homology_coeff_field=11, min_persistence=0): - """This function returns the persistence of the complex. + """This function computes and returns the persistence of the complex. :param homology_coeff_field: The homology coefficient field. Must be a prime number @@ -149,31 +172,22 @@ cdef class PeriodicCubicalComplex: :returns: list of pairs(dimension, pair(birth, death)) -- the persistence of the complex. """ - if self.pcohptr != NULL: - del self.pcohptr - if self.thisptr != NULL: - self.pcohptr = new Periodic_cubical_complex_persistence_interface(self.thisptr, True) - cdef vector[pair[int, pair[double, double]]] persistence_result - if self.pcohptr != NULL: - self.pcohptr.compute_persistence(homology_coeff_field, min_persistence) - persistence_result = self.pcohptr.get_persistence() - return persistence_result + self.compute_persistence(homology_coeff_field, min_persistence) + return self.pcohptr.get_persistence() def betti_numbers(self): """This function returns the Betti numbers of the complex. :returns: list of int -- The Betti numbers ([B0, B1, ..., Bn]). - :note: betti_numbers function requires persistence function to be + :note: betti_numbers function requires :func:`compute_persistence` function to be launched first. - :note: betti_numbers function always returns [1, 0, 0, ...] as infinity + :note: This function always returns the Betti numbers of a torus as infinity filtration cubes are not removed from the complex. """ - cdef vector[int] bn_result - if self.pcohptr != NULL: - bn_result = self.pcohptr.betti_numbers() - return bn_result + assert self.pcohptr != NULL, "compute_persistence() must be called before betti_numbers()" + return self.pcohptr.betti_numbers() def persistent_betti_numbers(self, from_value, to_value): """This function returns the persistent Betti numbers of the complex. @@ -188,13 +202,11 @@ cdef class PeriodicCubicalComplex: :returns: list of int -- The persistent Betti numbers ([B0, B1, ..., Bn]). - :note: persistent_betti_numbers function requires persistence + :note: persistent_betti_numbers function requires :func:`compute_persistence` function to be launched first. """ - cdef vector[int] pbn_result - if self.pcohptr != NULL: - pbn_result = self.pcohptr.persistent_betti_numbers(from_value, to_value) - return pbn_result + assert self.pcohptr != NULL, "compute_persistence() must be called before persistent_betti_numbers()" + return self.pcohptr.persistent_betti_numbers(from_value, to_value) def persistence_intervals_in_dimension(self, dimension): """This function returns the persistence intervals of the complex in a @@ -205,13 +217,8 @@ cdef class PeriodicCubicalComplex: :returns: The persistence intervals. :rtype: numpy array of dimension 2 - :note: intervals_in_dim function requires persistence function to be + :note: intervals_in_dim function requires :func:`compute_persistence` function to be launched first. """ - cdef vector[pair[double,double]] intervals_result - if self.pcohptr != NULL: - intervals_result = self.pcohptr.intervals_in_dimension(dimension) - else: - print("intervals_in_dim function requires persistence function" - " to be launched first.", file=sys.stderr) - return np.array(intervals_result) + assert self.pcohptr != NULL, "compute_persistence() must be called before persistence_intervals_in_dimension()" + return np.array(self.pcohptr.intervals_in_dimension(dimension)) -- cgit v1.2.3 From 173506323471cf5175ea2b340abec63968c5cd5f Mon Sep 17 00:00:00 2001 From: Marc Glisse Date: Mon, 6 Apr 2020 16:51:32 +0200 Subject: Use compute_persistence in an example --- .../example/alpha_rips_persistence_bottleneck_distance.py | 12 ++++-------- 1 file changed, 4 insertions(+), 8 deletions(-) diff --git a/src/python/example/alpha_rips_persistence_bottleneck_distance.py b/src/python/example/alpha_rips_persistence_bottleneck_distance.py index f156826d..3e12b0d5 100755 --- a/src/python/example/alpha_rips_persistence_bottleneck_distance.py +++ b/src/python/example/alpha_rips_persistence_bottleneck_distance.py @@ -5,6 +5,7 @@ import argparse import math import errno import os +import numpy as np """ This file is part of the Gudhi Library - https://gudhi.inria.fr/ - which is released under MIT. @@ -56,7 +57,7 @@ with open(args.file, "r") as f: message = "Number of simplices=" + repr(rips_stree.num_simplices()) print(message) - rips_diag = rips_stree.persistence() + rips_stree.compute_persistence() print("##############################################################") print("AlphaComplex creation from points read in a OFF file") @@ -72,18 +73,13 @@ with open(args.file, "r") as f: message = "Number of simplices=" + repr(alpha_stree.num_simplices()) print(message) - alpha_diag = alpha_stree.persistence() + alpha_stree.compute_persistence() max_b_distance = 0.0 for dim in range(args.max_dimension): # Alpha persistence values needs to be transform because filtration # values are alpha square values - funcs = [math.sqrt, math.sqrt] - alpha_intervals = [] - for interval in alpha_stree.persistence_intervals_in_dimension(dim): - alpha_intervals.append( - map(lambda func, value: func(value), funcs, interval) - ) + alpha_intervals = np.sqrt(alpha_stree.persistence_intervals_in_dimension(dim)) rips_intervals = rips_stree.persistence_intervals_in_dimension(dim) bottleneck_distance = gudhi.bottleneck_distance( -- cgit v1.2.3 From 4294e5fc6e1bff246a7d22f1bd98f91b62f14163 Mon Sep 17 00:00:00 2001 From: yuichi-ike Date: Tue, 7 Apr 2020 09:36:03 +0900 Subject: filtration value fixed --- src/python/gudhi/weighted_rips_complex.py | 2 +- src/python/test/test_weighted_rips.py | 12 +++++++++++- 2 files changed, 12 insertions(+), 2 deletions(-) diff --git a/src/python/gudhi/weighted_rips_complex.py b/src/python/gudhi/weighted_rips_complex.py index 9df2ddf9..7e504b2c 100644 --- a/src/python/gudhi/weighted_rips_complex.py +++ b/src/python/gudhi/weighted_rips_complex.py @@ -51,7 +51,7 @@ class WeightedRipsComplex: st.insert([i], F[i]) for i in range(num_pts): for j in range(i): - value = (dist[i][j] + F[i] + F[j]) / 2 + value = max(F[i], F[j], (dist[i][j] + F[i] + F[j]) / 2) if value < self.max_filtration: st.insert([i,j], filtration=value) diff --git a/src/python/test/test_weighted_rips.py b/src/python/test/test_weighted_rips.py index 7896fb78..a3235276 100644 --- a/src/python/test/test_weighted_rips.py +++ b/src/python/test/test_weighted_rips.py @@ -14,13 +14,23 @@ import numpy as np from scipy.spatial.distance import cdist import pytest +def test_non_dtm_rips_complex(): + dist = [[], [1]] + weights = [1, 100] + w_rips = WeightedRipsComplex(distance_matrix=dist, weights=weights) + st = w_rips.create_simplex_tree(max_dimension=2) + assert st.filtration([0,1]) == pytest.approx(100.0) + + def test_dtm_rips_complex(): pts = np.array([[2.0, 2], [0, 1], [3, 4]]) dist = cdist(pts,pts) dtm = DTM(2, q=2, metric="precomputed") r = dtm.fit_transform(dist) - w_rips = WeightedRipsComplex(distance_mattix=dist, weights=r) + w_rips = WeightedRipsComplex(distance_matrix=dist, weights=r) st = w_rips.create_simplex_tree(max_dimension=2) + st.persistence() persistence_intervals0 = st.persistence_intervals_in_dimension(0) assert persistence_intervals0 == pytest.approx(np.array([[1.58113883, 2.69917282],[1.58113883, 2.69917282], [1.58113883, float("inf")]])) + -- cgit v1.2.3 From f9a933862050ca95b3a96d7a8572d62f7f2205a9 Mon Sep 17 00:00:00 2001 From: Marc Glisse Date: Sat, 11 Apr 2020 18:18:14 +0200 Subject: Use longer names --- src/python/gudhi/point_cloud/dtm.py | 10 +++-- src/python/gudhi/point_cloud/knn.py | 2 +- src/python/test/test_dtm.py | 18 ++++----- src/python/test/test_knn.py | 76 +++++++++++++++++++++++++++---------- 4 files changed, 71 insertions(+), 35 deletions(-) diff --git a/src/python/gudhi/point_cloud/dtm.py b/src/python/gudhi/point_cloud/dtm.py index 23c36b88..38368f29 100644 --- a/src/python/gudhi/point_cloud/dtm.py +++ b/src/python/gudhi/point_cloud/dtm.py @@ -7,10 +7,10 @@ # Modification(s): # - YYYY/MM Author: Description of the modification -from .knn import KNN +from .knn import KNearestNeighbors -class DTM: +class DistanceToMeasure: """ Class to compute the distance to the empirical measure defined by a point set, as introduced in :cite:`dtm`. """ @@ -20,7 +20,7 @@ class DTM: Args: k (int): number of neighbors (possibly including the point itself). q (float): order used to compute the distance to measure. Defaults to 2. - kwargs: same parameters as :class:`~gudhi.point_cloud.knn.KNN`, except that metric="neighbors" means that :func:`transform` expects an array with the distances to the k nearest neighbors. + kwargs: same parameters as :class:`~gudhi.point_cloud.knn.KNearestNeighbors`, except that metric="neighbors" means that :func:`transform` expects an array with the distances to the k nearest neighbors. """ self.k = k self.q = q @@ -35,7 +35,9 @@ class DTM: X (numpy.array): coordinates for mass points. """ if self.params.setdefault("metric", "euclidean") != "neighbors": - self.knn = KNN(self.k, return_index=False, return_distance=True, sort_results=False, **self.params) + self.knn = KNearestNeighbors( + self.k, return_index=False, return_distance=True, sort_results=False, **self.params + ) self.knn.fit(X) return self diff --git a/src/python/gudhi/point_cloud/knn.py b/src/python/gudhi/point_cloud/knn.py index 8369f1f8..6642a3c2 100644 --- a/src/python/gudhi/point_cloud/knn.py +++ b/src/python/gudhi/point_cloud/knn.py @@ -10,7 +10,7 @@ import numpy -class KNN: +class KNearestNeighbors: """ Class wrapping several implementations for computing the k nearest neighbors in a point set. """ diff --git a/src/python/test/test_dtm.py b/src/python/test/test_dtm.py index 93b13e1a..37934fdb 100755 --- a/src/python/test/test_dtm.py +++ b/src/python/test/test_dtm.py @@ -8,7 +8,7 @@ - YYYY/MM Author: Description of the modification """ -from gudhi.point_cloud.dtm import DTM +from gudhi.point_cloud.dtm import DistanceToMeasure import numpy import pytest @@ -16,35 +16,35 @@ import pytest def test_dtm_compare_euclidean(): pts = numpy.random.rand(1000, 4) k = 3 - dtm = DTM(k, implementation="ckdtree") + dtm = DistanceToMeasure(k, implementation="ckdtree") r0 = dtm.fit_transform(pts) - dtm = DTM(k, implementation="sklearn") + dtm = DistanceToMeasure(k, implementation="sklearn") r1 = dtm.fit_transform(pts) assert r1 == pytest.approx(r0) - dtm = DTM(k, implementation="sklearn", algorithm="brute") + dtm = DistanceToMeasure(k, implementation="sklearn", algorithm="brute") r2 = dtm.fit_transform(pts) assert r2 == pytest.approx(r0) - dtm = DTM(k, implementation="hnsw") + dtm = DistanceToMeasure(k, implementation="hnsw") r3 = dtm.fit_transform(pts) assert r3 == pytest.approx(r0) from scipy.spatial.distance import cdist d = cdist(pts, pts) - dtm = DTM(k, metric="precomputed") + dtm = DistanceToMeasure(k, metric="precomputed") r4 = dtm.fit_transform(d) assert r4 == pytest.approx(r0) - dtm = DTM(k, implementation="keops") + dtm = DistanceToMeasure(k, implementation="keops") r5 = dtm.fit_transform(pts) assert r5 == pytest.approx(r0) def test_dtm_precomputed(): dist = numpy.array([[1.0, 3, 8], [1, 5, 5], [0, 2, 3]]) - dtm = DTM(2, q=1, metric="neighbors") + dtm = DistanceToMeasure(2, q=1, metric="neighbors") r = dtm.fit_transform(dist) assert r == pytest.approx([2.0, 3, 1]) dist = numpy.array([[2.0, 2], [0, 1], [3, 4]]) - dtm = DTM(2, q=2, metric="neighbors") + dtm = DistanceToMeasure(2, q=2, metric="neighbors") r = dtm.fit_transform(dist) assert r == pytest.approx([2.0, 0.707, 3.5355], rel=0.01) diff --git a/src/python/test/test_knn.py b/src/python/test/test_knn.py index e455fb48..6aac2006 100755 --- a/src/python/test/test_knn.py +++ b/src/python/test/test_knn.py @@ -8,7 +8,7 @@ - YYYY/MM Author: Description of the modification """ -from gudhi.point_cloud.knn import KNN +from gudhi.point_cloud.knn import KNearestNeighbors import numpy as np import pytest @@ -16,39 +16,39 @@ import pytest def test_knn_explicit(): base = np.array([[1.0, 1], [1, 2], [4, 2], [4, 3]]) query = np.array([[1.0, 1], [2, 2], [4, 4]]) - knn = KNN(2, metric="manhattan", return_distance=True, return_index=True) + knn = KNearestNeighbors(2, metric="manhattan", return_distance=True, return_index=True) knn.fit(base) r = knn.transform(query) assert r[0] == pytest.approx(np.array([[0, 1], [1, 0], [3, 2]])) assert r[1] == pytest.approx(np.array([[0.0, 1], [1, 2], [1, 2]])) - knn = KNN(2, metric="chebyshev", return_distance=True, return_index=False) + knn = KNearestNeighbors(2, metric="chebyshev", return_distance=True, return_index=False) knn.fit(base) r = knn.transform(query) assert r == pytest.approx(np.array([[0.0, 1], [1, 1], [1, 2]])) r = ( - KNN(2, metric="chebyshev", return_distance=True, return_index=False, implementation="keops") + KNearestNeighbors(2, metric="chebyshev", return_distance=True, return_index=False, implementation="keops") .fit(base) .transform(query) ) assert r == pytest.approx(np.array([[0.0, 1], [1, 1], [1, 2]])) - knn = KNN(2, metric="minkowski", p=3, return_distance=False, return_index=True) + knn = KNearestNeighbors(2, metric="minkowski", p=3, return_distance=False, return_index=True) knn.fit(base) r = knn.transform(query) assert np.array_equal(r, [[0, 1], [1, 0], [3, 2]]) r = ( - KNN(2, metric="minkowski", p=3, return_distance=False, return_index=True, implementation="keops") + KNearestNeighbors(2, metric="minkowski", p=3, return_distance=False, return_index=True, implementation="keops") .fit(base) .transform(query) ) assert np.array_equal(r, [[0, 1], [1, 0], [3, 2]]) dist = np.array([[0.0, 3, 8], [1, 0, 5], [1, 2, 0]]) - knn = KNN(2, metric="precomputed", return_index=True, return_distance=False) + knn = KNearestNeighbors(2, metric="precomputed", return_index=True, return_distance=False) r = knn.fit_transform(dist) assert np.array_equal(r, [[0, 1], [1, 0], [2, 0]]) - knn = KNN(2, metric="precomputed", return_index=True, return_distance=True) + knn = KNearestNeighbors(2, metric="precomputed", return_index=True, return_distance=True) r = knn.fit_transform(dist) assert np.array_equal(r[0], [[0, 1], [1, 0], [2, 0]]) assert np.array_equal(r[1], [[0, 3], [0, 1], [0, 1]]) @@ -57,16 +57,40 @@ def test_knn_explicit(): def test_knn_compare(): base = np.array([[1.0, 1], [1, 2], [4, 2], [4, 3]]) query = np.array([[1.0, 1], [2, 2], [4, 4]]) - r0 = KNN(2, implementation="ckdtree", return_index=True, return_distance=False).fit(base).transform(query) - r1 = KNN(2, implementation="sklearn", return_index=True, return_distance=False).fit(base).transform(query) - r2 = KNN(2, implementation="hnsw", return_index=True, return_distance=False).fit(base).transform(query) - r3 = KNN(2, implementation="keops", return_index=True, return_distance=False).fit(base).transform(query) + r0 = ( + KNearestNeighbors(2, implementation="ckdtree", return_index=True, return_distance=False) + .fit(base) + .transform(query) + ) + r1 = ( + KNearestNeighbors(2, implementation="sklearn", return_index=True, return_distance=False) + .fit(base) + .transform(query) + ) + r2 = ( + KNearestNeighbors(2, implementation="hnsw", return_index=True, return_distance=False).fit(base).transform(query) + ) + r3 = ( + KNearestNeighbors(2, implementation="keops", return_index=True, return_distance=False) + .fit(base) + .transform(query) + ) assert np.array_equal(r0, r1) and np.array_equal(r0, r2) and np.array_equal(r0, r3) - r0 = KNN(2, implementation="ckdtree", return_index=True, return_distance=True).fit(base).transform(query) - r1 = KNN(2, implementation="sklearn", return_index=True, return_distance=True).fit(base).transform(query) - r2 = KNN(2, implementation="hnsw", return_index=True, return_distance=True).fit(base).transform(query) - r3 = KNN(2, implementation="keops", return_index=True, return_distance=True).fit(base).transform(query) + r0 = ( + KNearestNeighbors(2, implementation="ckdtree", return_index=True, return_distance=True) + .fit(base) + .transform(query) + ) + r1 = ( + KNearestNeighbors(2, implementation="sklearn", return_index=True, return_distance=True) + .fit(base) + .transform(query) + ) + r2 = KNearestNeighbors(2, implementation="hnsw", return_index=True, return_distance=True).fit(base).transform(query) + r3 = ( + KNearestNeighbors(2, implementation="keops", return_index=True, return_distance=True).fit(base).transform(query) + ) assert np.array_equal(r0[0], r1[0]) and np.array_equal(r0[0], r2[0]) and np.array_equal(r0[0], r3[0]) d0 = pytest.approx(r0[1]) assert r1[1] == d0 and r2[1] == d0 and r3[1] == d0 @@ -75,8 +99,18 @@ def test_knn_compare(): def test_knn_nop(): # This doesn't look super useful... p = np.array([[0.0]]) - assert None is KNN(k=1, return_index=False, return_distance=False, implementation="sklearn").fit_transform(p) - assert None is KNN(k=1, return_index=False, return_distance=False, implementation="ckdtree").fit_transform(p) - assert None is KNN(k=1, return_index=False, return_distance=False, implementation="hnsw", ef=5).fit_transform(p) - assert None is KNN(k=1, return_index=False, return_distance=False, implementation="keops").fit_transform(p) - assert None is KNN(k=1, return_index=False, return_distance=False, metric="precomputed").fit_transform(p) + assert None is KNearestNeighbors( + k=1, return_index=False, return_distance=False, implementation="sklearn" + ).fit_transform(p) + assert None is KNearestNeighbors( + k=1, return_index=False, return_distance=False, implementation="ckdtree" + ).fit_transform(p) + assert None is KNearestNeighbors( + k=1, return_index=False, return_distance=False, implementation="hnsw", ef=5 + ).fit_transform(p) + assert None is KNearestNeighbors( + k=1, return_index=False, return_distance=False, implementation="keops" + ).fit_transform(p) + assert None is KNearestNeighbors( + k=1, return_index=False, return_distance=False, metric="precomputed" + ).fit_transform(p) -- cgit v1.2.3 From 83a1bc1fb6124a35d515f4836d2e830f3dbdf0e7 Mon Sep 17 00:00:00 2001 From: Marc Glisse Date: Sun, 12 Apr 2020 21:57:51 +0200 Subject: Parallelize the "precomputed" case of knn It is supposed to be possible to compile numpy with openmp, but it looks like it isn't done in any of the usual packages. It may be possible to refactor that code so there is less redundancy. --- src/python/gudhi/point_cloud/knn.py | 78 +++++++++++++++++++++++++++++-------- src/python/test/test_dtm.py | 3 ++ src/python/test/test_knn.py | 8 ++++ 3 files changed, 73 insertions(+), 16 deletions(-) diff --git a/src/python/gudhi/point_cloud/knn.py b/src/python/gudhi/point_cloud/knn.py index 6642a3c2..f6870517 100644 --- a/src/python/gudhi/point_cloud/knn.py +++ b/src/python/gudhi/point_cloud/knn.py @@ -115,25 +115,71 @@ class KNearestNeighbors: if metric == "precomputed": # scikit-learn could handle that, but they insist on calling fit() with an unused square array, which is too unnatural. - X = numpy.array(X) if self.return_index: - neighbors = numpy.argpartition(X, k - 1)[:, 0:k] - if self.params.get("sort_results", True): - X = numpy.take_along_axis(X, neighbors, axis=-1) - ngb_order = numpy.argsort(X, axis=-1) - neighbors = numpy.take_along_axis(neighbors, ngb_order, axis=-1) + n_jobs = self.params.get("n_jobs", 1) + # Supposedly numpy can be compiled with OpenMP and handle this, but nobody does that?! + if n_jobs == 1: + neighbors = numpy.argpartition(X, k - 1)[:, 0:k] + if self.params.get("sort_results", True): + X = numpy.take_along_axis(X, neighbors, axis=-1) + ngb_order = numpy.argsort(X, axis=-1) + neighbors = numpy.take_along_axis(neighbors, ngb_order, axis=-1) + else: + ngb_order = neighbors + if self.return_distance: + distances = numpy.take_along_axis(X, ngb_order, axis=-1) + return neighbors, distances + else: + return neighbors else: - ngb_order = neighbors - if self.return_distance: - distances = numpy.take_along_axis(X, ngb_order, axis=-1) - return neighbors, distances - else: - return neighbors + from joblib import Parallel, delayed, effective_n_jobs + from sklearn.utils import gen_even_slices + + slices = gen_even_slices(len(X), effective_n_jobs(-1)) + parallel = Parallel(backend="threading", n_jobs=-1) + if self.params.get("sort_results", True): + + def func(M): + neighbors = numpy.argpartition(M, k - 1)[:, 0:k] + Y = numpy.take_along_axis(M, neighbors, axis=-1) + ngb_order = numpy.argsort(Y, axis=-1) + return numpy.take_along_axis(neighbors, ngb_order, axis=-1) + + else: + + def func(M): + return numpy.argpartition(M, k - 1)[:, 0:k] + + neighbors = numpy.concatenate(parallel(delayed(func)(X[s]) for s in slices)) + if self.return_distance: + distances = numpy.take_along_axis(X, neighbors, axis=-1) + return neighbors, distances + else: + return neighbors if self.return_distance: - distances = numpy.partition(X, k - 1)[:, 0:k] - if self.params.get("sort_results"): - # partition is not guaranteed to sort the lower half, although it often does - distances.sort(axis=-1) + n_jobs = self.params.get("n_jobs", 1) + if n_jobs == 1: + distances = numpy.partition(X, k - 1)[:, 0:k] + if self.params.get("sort_results"): + # partition is not guaranteed to sort the lower half, although it often does + distances.sort(axis=-1) + else: + from joblib import Parallel, delayed, effective_n_jobs + from sklearn.utils import gen_even_slices + + if self.params.get("sort_results"): + + def func(M): + # Not partitioning in place, because we should not modify the user's array? + r = numpy.partition(M, k - 1)[:, 0:k] + r.sort(axis=-1) + return r + + else: + func = lambda M: numpy.partition(M, k - 1)[:, 0:k] + slices = gen_even_slices(len(X), effective_n_jobs(-1)) + parallel = Parallel(backend="threading", n_jobs=-1) + distances = numpy.concatenate(parallel(delayed(func)(X[s]) for s in slices)) return distances return None diff --git a/src/python/test/test_dtm.py b/src/python/test/test_dtm.py index 37934fdb..bc0d3698 100755 --- a/src/python/test/test_dtm.py +++ b/src/python/test/test_dtm.py @@ -33,6 +33,9 @@ def test_dtm_compare_euclidean(): dtm = DistanceToMeasure(k, metric="precomputed") r4 = dtm.fit_transform(d) assert r4 == pytest.approx(r0) + dtm = DistanceToMeasure(k, metric="precomputed", n_jobs=2) + r4b = dtm.fit_transform(d) + assert r4b == pytest.approx(r0) dtm = DistanceToMeasure(k, implementation="keops") r5 = dtm.fit_transform(pts) assert r5 == pytest.approx(r0) diff --git a/src/python/test/test_knn.py b/src/python/test/test_knn.py index 6aac2006..6269df54 100755 --- a/src/python/test/test_knn.py +++ b/src/python/test/test_knn.py @@ -52,6 +52,14 @@ def test_knn_explicit(): r = knn.fit_transform(dist) assert np.array_equal(r[0], [[0, 1], [1, 0], [2, 0]]) assert np.array_equal(r[1], [[0, 3], [0, 1], [0, 1]]) + # Second time in parallel + knn = KNearestNeighbors(2, metric="precomputed", return_index=True, return_distance=False, n_jobs=2) + r = knn.fit_transform(dist) + assert np.array_equal(r, [[0, 1], [1, 0], [2, 0]]) + knn = KNearestNeighbors(2, metric="precomputed", return_index=True, return_distance=True, n_jobs=2) + r = knn.fit_transform(dist) + assert np.array_equal(r[0], [[0, 1], [1, 0], [2, 0]]) + assert np.array_equal(r[1], [[0, 3], [0, 1], [0, 1]]) def test_knn_compare(): -- cgit v1.2.3 From 280eb9d2323837619db1ae013b929adb9b45013b Mon Sep 17 00:00:00 2001 From: Marc Glisse Date: Mon, 13 Apr 2020 01:09:45 +0200 Subject: enable_autodiff with keops This doesn't seem like the best way to handle it, we may want to handle it like a wrapper that gets the indices from knn (whatever backend) and then computes the distances. --- src/python/gudhi/point_cloud/knn.py | 33 +++++++++++++++++++++++++++++---- src/python/test/test_dtm.py | 8 ++++++++ src/python/test/test_knn.py | 6 ++++++ 3 files changed, 43 insertions(+), 4 deletions(-) diff --git a/src/python/gudhi/point_cloud/knn.py b/src/python/gudhi/point_cloud/knn.py index f6870517..79362c09 100644 --- a/src/python/gudhi/point_cloud/knn.py +++ b/src/python/gudhi/point_cloud/knn.py @@ -36,6 +36,9 @@ class KNearestNeighbors: sort_results (bool): if True, then distances and indices of each point are sorted on return, so that the first column contains the closest points. Otherwise, neighbors are returned in an arbitrary order. Defaults to True. + enable_autodiff (bool): if the input is a torch.tensor, jax.numpy.array or similar, this instructs + the function to compute distances in a way that works with automatic differentiation. + This is experimental and not supported for all implementations. kwargs: additional parameters are forwarded to the backends. """ self.k = k @@ -202,13 +205,18 @@ class KNearestNeighbors: if self.params["implementation"] == "keops": import torch from pykeops.torch import LazyTensor + import eagerpy as ep # 'float64' is slow except on super expensive GPUs. Allow it with some param? - XX = torch.tensor(X, dtype=torch.float32) - if X is self.ref_points: + queries = X + X = ep.astensor(X) + XX = torch.as_tensor(X.numpy(), dtype=torch.float32) + if queries is self.ref_points: + Y = X YY = XX else: - YY = torch.tensor(self.ref_points, dtype=torch.float32) + Y = ep.astensor(self.ref_points) + YY = torch.as_tensor(Y.numpy(), dtype=torch.float32) p = self.params["p"] if p == numpy.inf: @@ -219,6 +227,24 @@ class KNearestNeighbors: else: mat = ((LazyTensor(XX[:, None, :]) - LazyTensor(YY[None, :, :])).abs() ** p).sum(-1) + # pykeops does not support autodiff for kmin yet :-( + if self.params.get("enable_autodiff", False) and self.return_distance: + # Compute the indices of the neighbors, and recompute the relevant distances autodiff-friendly. + # Another strategy would be to compute the whole distance matrix with torch.cdist + # and use neighbors as indices into it. + neighbors = ep.astensor(mat.argKmin(k, dim=1)).numpy() + neighbor_pts = Y[neighbors] + diff = neighbor_pts - X[:, None, :] + if p == numpy.inf: + distances = diff.abs().max(-1) + elif p == 2: + distances = (diff ** 2).sum(-1) ** 0.5 + else: + distances = (diff.abs() ** p).sum(-1) ** (1.0 / p) + if self.return_index: + return neighbors.raw, distances.raw + else: + return distances.raw if self.return_index: if self.return_distance: distances, neighbors = mat.Kmin_argKmin(k, dim=1) @@ -234,7 +260,6 @@ class KNearestNeighbors: distances = distances ** (1.0 / p) return distances return None - # FIXME: convert everything back to numpy arrays or not? if self.params["implementation"] == "ckdtree": qargs = {key: val for key, val in self.params.items() if key in {"p", "eps", "n_jobs"}} diff --git a/src/python/test/test_dtm.py b/src/python/test/test_dtm.py index bc0d3698..8709dd07 100755 --- a/src/python/test/test_dtm.py +++ b/src/python/test/test_dtm.py @@ -11,6 +11,7 @@ from gudhi.point_cloud.dtm import DistanceToMeasure import numpy import pytest +import torch def test_dtm_compare_euclidean(): @@ -39,6 +40,13 @@ def test_dtm_compare_euclidean(): dtm = DistanceToMeasure(k, implementation="keops") r5 = dtm.fit_transform(pts) assert r5 == pytest.approx(r0) + pts2 = torch.tensor(pts, requires_grad=True) + assert pts2.grad is None + dtm = DistanceToMeasure(k, implementation="keops", enable_autodiff=True) + r6 = dtm.fit_transform(pts2) + assert r6.detach().numpy() == pytest.approx(r0) + r6.sum().backward() + assert pts2.grad is not None def test_dtm_precomputed(): diff --git a/src/python/test/test_knn.py b/src/python/test/test_knn.py index 6269df54..415c9d48 100755 --- a/src/python/test/test_knn.py +++ b/src/python/test/test_knn.py @@ -32,6 +32,12 @@ def test_knn_explicit(): .transform(query) ) assert r == pytest.approx(np.array([[0.0, 1], [1, 1], [1, 2]])) + r = ( + KNearestNeighbors(2, metric="chebyshev", return_distance=True, return_index=False, implementation="keops", enable_autodiff=True) + .fit(base) + .transform(query) + ) + assert r == pytest.approx(np.array([[0.0, 1], [1, 1], [1, 2]])) knn = KNearestNeighbors(2, metric="minkowski", p=3, return_distance=False, return_index=True) knn.fit(base) -- cgit v1.2.3 From 2f1576a23cf4ac055565875d384ca604c0ff6844 Mon Sep 17 00:00:00 2001 From: Marc Glisse Date: Mon, 13 Apr 2020 15:01:51 +0200 Subject: Small autodiff tweaks --- src/python/gudhi/point_cloud/knn.py | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/src/python/gudhi/point_cloud/knn.py b/src/python/gudhi/point_cloud/knn.py index 79362c09..ab3447d4 100644 --- a/src/python/gudhi/point_cloud/knn.py +++ b/src/python/gudhi/point_cloud/knn.py @@ -233,16 +233,17 @@ class KNearestNeighbors: # Another strategy would be to compute the whole distance matrix with torch.cdist # and use neighbors as indices into it. neighbors = ep.astensor(mat.argKmin(k, dim=1)).numpy() - neighbor_pts = Y[neighbors] + # Work around https://github.com/pytorch/pytorch/issues/34452 + neighbor_pts = Y[neighbors,] diff = neighbor_pts - X[:, None, :] if p == numpy.inf: distances = diff.abs().max(-1) elif p == 2: - distances = (diff ** 2).sum(-1) ** 0.5 + distances = (diff ** 2).sum(-1).sqrt() else: distances = (diff.abs() ** p).sum(-1) ** (1.0 / p) if self.return_index: - return neighbors.raw, distances.raw + return neighbors, distances.raw else: return distances.raw if self.return_index: -- cgit v1.2.3 From 3a86402b733a48d9c25a4995325e72c7438c06c0 Mon Sep 17 00:00:00 2001 From: Marc Glisse Date: Mon, 13 Apr 2020 15:21:06 +0200 Subject: Fix NaN gradient with pytorch --- src/python/gudhi/point_cloud/knn.py | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/src/python/gudhi/point_cloud/knn.py b/src/python/gudhi/point_cloud/knn.py index ab3447d4..185a7764 100644 --- a/src/python/gudhi/point_cloud/knn.py +++ b/src/python/gudhi/point_cloud/knn.py @@ -236,12 +236,11 @@ class KNearestNeighbors: # Work around https://github.com/pytorch/pytorch/issues/34452 neighbor_pts = Y[neighbors,] diff = neighbor_pts - X[:, None, :] - if p == numpy.inf: - distances = diff.abs().max(-1) - elif p == 2: - distances = (diff ** 2).sum(-1).sqrt() + if isinstance(diff, ep.PyTorchTensor): + # https://github.com/jonasrauber/eagerpy/issues/6 + distances = ep.astensor(diff.raw.norm(p, -1)) else: - distances = (diff.abs() ** p).sum(-1) ** (1.0 / p) + distances = diff.norms.lp(p, -1) if self.return_index: return neighbors, distances.raw else: -- cgit v1.2.3 From 3afce326428dddd638e22ab37ee4b2afe52eba75 Mon Sep 17 00:00:00 2001 From: Marc Glisse Date: Mon, 13 Apr 2020 20:32:39 +0200 Subject: Generalize enable_autodiff to more implementations Still limited to L^p --- src/python/gudhi/point_cloud/knn.py | 76 +++++++++++++++++++++++++++---------- 1 file changed, 55 insertions(+), 21 deletions(-) diff --git a/src/python/gudhi/point_cloud/knn.py b/src/python/gudhi/point_cloud/knn.py index 185a7764..87b2798e 100644 --- a/src/python/gudhi/point_cloud/knn.py +++ b/src/python/gudhi/point_cloud/knn.py @@ -9,6 +9,7 @@ import numpy +# TODO: https://github.com/facebookresearch/faiss class KNearestNeighbors: """ @@ -67,6 +68,8 @@ class KNearestNeighbors: self.params["implementation"] = "ckdtree" else: self.params["implementation"] = "sklearn" + if not return_distance: + self.params["enable_autodiff"] = False def fit_transform(self, X, y=None): return self.fit(X).transform(X) @@ -77,6 +80,10 @@ class KNearestNeighbors: X (numpy.array): coordinates for reference points. """ self.ref_points = X + if self.params.get("enable_autodiff", False): + import eagerpy as ep + if self.params["implementation"] != "keops" or not isinstance(X, ep.PyTorchTensor): + X = ep.astensor(X).numpy() if self.params["implementation"] == "ckdtree": # sklearn could handle this, but it is much slower from scipy.spatial import cKDTree @@ -113,6 +120,41 @@ class KNearestNeighbors: Args: X (numpy.array): coordinates for query points, or distance matrix if metric is "precomputed". """ + if self.params.get("enable_autodiff", False): + # pykeops does not support autodiff for kmin yet, but when it does in the future, + # we may want a special path. + import eagerpy as ep + save_return_index = self.return_index + self.return_index = True + self.return_distance = False + self.params["enable_autodiff"] = False + try: + # FIXME: how do we test "X is ref_points" then? + newX = ep.astensor(X) + if self.params["implementation"] != "keops" or not isinstance(newX, ep.PyTorchTensor): + newX = newX.numpy() + neighbors = self.transform(newX) + finally: + self.return_index = save_return_index + self.return_distance = True + self.params["enable_autodiff"] = True + # We can implement more later as needed + assert self.metric == "minkowski" + p = self.params["p"] + Y = ep.astensor(self.ref_points) + neighbor_pts = Y[neighbors,] + diff = neighbor_pts - X[:, None, :] + if isinstance(diff, ep.PyTorchTensor): + # https://github.com/jonasrauber/eagerpy/issues/6 + distances = ep.astensor(diff.raw.norm(p, -1)) + else: + distances = diff.norms.lp(p, -1) + if self.return_index: + return neighbors, distances.raw + else: + return distances.raw + + metric = self.metric k = self.k @@ -207,16 +249,26 @@ class KNearestNeighbors: from pykeops.torch import LazyTensor import eagerpy as ep - # 'float64' is slow except on super expensive GPUs. Allow it with some param? queries = X X = ep.astensor(X) - XX = torch.as_tensor(X.numpy(), dtype=torch.float32) + if isinstance(X, ep.PyTorchTensor): + XX = X.raw + else: + # I don't know a clever way to reuse a GPU tensor from tensorflow in pytorch + # without copying to/from the CPU. + XX = X.numpy() + # 'float64' is slow except on super expensive GPUs. Allow it with some param? + XX = torch.as_tensor(XX, dtype=torch.float32) if queries is self.ref_points: Y = X YY = XX else: Y = ep.astensor(self.ref_points) - YY = torch.as_tensor(Y.numpy(), dtype=torch.float32) + if isinstance(Y, ep.PyTorchTensor): + YY = Y.raw + else: + YY = Y.numpy() + YY = torch.as_tensor(YY, dtype=torch.float32) p = self.params["p"] if p == numpy.inf: @@ -227,24 +279,6 @@ class KNearestNeighbors: else: mat = ((LazyTensor(XX[:, None, :]) - LazyTensor(YY[None, :, :])).abs() ** p).sum(-1) - # pykeops does not support autodiff for kmin yet :-( - if self.params.get("enable_autodiff", False) and self.return_distance: - # Compute the indices of the neighbors, and recompute the relevant distances autodiff-friendly. - # Another strategy would be to compute the whole distance matrix with torch.cdist - # and use neighbors as indices into it. - neighbors = ep.astensor(mat.argKmin(k, dim=1)).numpy() - # Work around https://github.com/pytorch/pytorch/issues/34452 - neighbor_pts = Y[neighbors,] - diff = neighbor_pts - X[:, None, :] - if isinstance(diff, ep.PyTorchTensor): - # https://github.com/jonasrauber/eagerpy/issues/6 - distances = ep.astensor(diff.raw.norm(p, -1)) - else: - distances = diff.norms.lp(p, -1) - if self.return_index: - return neighbors, distances.raw - else: - return distances.raw if self.return_index: if self.return_distance: distances, neighbors = mat.Kmin_argKmin(k, dim=1) -- cgit v1.2.3 From 521d8c17c2b7d71c46a51f0490ff2c13c809fc87 Mon Sep 17 00:00:00 2001 From: Marc Glisse Date: Mon, 13 Apr 2020 21:13:19 +0200 Subject: Remove left-over code eagerpy is only used with enable_autodiff --- src/python/gudhi/point_cloud/knn.py | 29 +++++++++-------------------- 1 file changed, 9 insertions(+), 20 deletions(-) diff --git a/src/python/gudhi/point_cloud/knn.py b/src/python/gudhi/point_cloud/knn.py index 87b2798e..f2cddb38 100644 --- a/src/python/gudhi/point_cloud/knn.py +++ b/src/python/gudhi/point_cloud/knn.py @@ -82,8 +82,11 @@ class KNearestNeighbors: self.ref_points = X if self.params.get("enable_autodiff", False): import eagerpy as ep + X = ep.astensor(X) if self.params["implementation"] != "keops" or not isinstance(X, ep.PyTorchTensor): - X = ep.astensor(X).numpy() + # I don't know a clever way to reuse a GPU tensor from tensorflow in pytorch + # without copying to/from the CPU. + X = X.numpy() if self.params["implementation"] == "ckdtree": # sklearn could handle this, but it is much slower from scipy.spatial import cKDTree @@ -133,6 +136,8 @@ class KNearestNeighbors: newX = ep.astensor(X) if self.params["implementation"] != "keops" or not isinstance(newX, ep.PyTorchTensor): newX = newX.numpy() + else: + newX = X neighbors = self.transform(newX) finally: self.return_index = save_return_index @@ -247,29 +252,13 @@ class KNearestNeighbors: if self.params["implementation"] == "keops": import torch from pykeops.torch import LazyTensor - import eagerpy as ep - queries = X - X = ep.astensor(X) - if isinstance(X, ep.PyTorchTensor): - XX = X.raw - else: - # I don't know a clever way to reuse a GPU tensor from tensorflow in pytorch - # without copying to/from the CPU. - XX = X.numpy() # 'float64' is slow except on super expensive GPUs. Allow it with some param? - XX = torch.as_tensor(XX, dtype=torch.float32) - if queries is self.ref_points: - Y = X + XX = torch.as_tensor(X, dtype=torch.float32) + if X is self.ref_points: YY = XX else: - Y = ep.astensor(self.ref_points) - if isinstance(Y, ep.PyTorchTensor): - YY = Y.raw - else: - YY = Y.numpy() - YY = torch.as_tensor(YY, dtype=torch.float32) - + YY = torch.as_tensor(self.ref_points, dtype=torch.float32) p = self.params["p"] if p == numpy.inf: # Requires pykeops 1.4 or later -- cgit v1.2.3 From ce75f66da5a2d7ad2c479355112d48817c5ba68b Mon Sep 17 00:00:00 2001 From: Marc Glisse Date: Mon, 13 Apr 2020 21:38:24 +0200 Subject: Tweak to detect fit_transform --- src/python/gudhi/point_cloud/knn.py | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) diff --git a/src/python/gudhi/point_cloud/knn.py b/src/python/gudhi/point_cloud/knn.py index f2cddb38..8b3cdb46 100644 --- a/src/python/gudhi/point_cloud/knn.py +++ b/src/python/gudhi/point_cloud/knn.py @@ -11,6 +11,7 @@ import numpy # TODO: https://github.com/facebookresearch/faiss + class KNearestNeighbors: """ Class wrapping several implementations for computing the k nearest neighbors in a point set. @@ -82,6 +83,7 @@ class KNearestNeighbors: self.ref_points = X if self.params.get("enable_autodiff", False): import eagerpy as ep + X = ep.astensor(X) if self.params["implementation"] != "keops" or not isinstance(X, ep.PyTorchTensor): # I don't know a clever way to reuse a GPU tensor from tensorflow in pytorch @@ -127,17 +129,19 @@ class KNearestNeighbors: # pykeops does not support autodiff for kmin yet, but when it does in the future, # we may want a special path. import eagerpy as ep + save_return_index = self.return_index self.return_index = True self.return_distance = False self.params["enable_autodiff"] = False try: - # FIXME: how do we test "X is ref_points" then? newX = ep.astensor(X) - if self.params["implementation"] != "keops" or not isinstance(newX, ep.PyTorchTensor): + if self.params["implementation"] != "keops" or ( + not isinstance(newX, ep.PyTorchTensor) and not isinstance(newX, ep.NumPyTensor) + ): newX = newX.numpy() else: - newX = X + newX = newX.raw neighbors = self.transform(newX) finally: self.return_index = save_return_index @@ -159,7 +163,6 @@ class KNearestNeighbors: else: return distances.raw - metric = self.metric k = self.k -- cgit v1.2.3 From e40f81361c6d40e3470f9b3880fceefd837b3da2 Mon Sep 17 00:00:00 2001 From: Marc Glisse Date: Mon, 13 Apr 2020 21:41:12 +0200 Subject: pip install eagerpy --- .github/test-requirements.txt | 1 + 1 file changed, 1 insertion(+) diff --git a/.github/test-requirements.txt b/.github/test-requirements.txt index 4f9dcefb..fb1df134 100644 --- a/.github/test-requirements.txt +++ b/.github/test-requirements.txt @@ -10,3 +10,4 @@ tensorflow torch pykeops hnswlib +eagerpy -- cgit v1.2.3 From f0c5aab988ee966510503a30b0591105594ac67d Mon Sep 17 00:00:00 2001 From: Marc Glisse Date: Tue, 14 Apr 2020 15:37:31 +0200 Subject: More testing --- src/python/test/test_dtm.py | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/src/python/test/test_dtm.py b/src/python/test/test_dtm.py index 8709dd07..db3e5df5 100755 --- a/src/python/test/test_dtm.py +++ b/src/python/test/test_dtm.py @@ -47,6 +47,13 @@ def test_dtm_compare_euclidean(): assert r6.detach().numpy() == pytest.approx(r0) r6.sum().backward() assert pts2.grad is not None + pts2 = torch.tensor(pts, requires_grad=True) + assert pts2.grad is None + dtm = DistanceToMeasure(k, implementation="ckdtree", enable_autodiff=True) + r7 = dtm.fit_transform(pts2) + assert r7.detach().numpy() == pytest.approx(r0) + r7.sum().backward() + assert pts2.grad is not None def test_dtm_precomputed(): -- cgit v1.2.3 From b908205e85bbe29c8d18ad1f38e783a1327434d7 Mon Sep 17 00:00:00 2001 From: Marc Glisse Date: Tue, 14 Apr 2020 17:00:27 +0200 Subject: EagerPy in cmake --- src/cmake/modules/GUDHI_third_party_libraries.cmake | 1 + src/python/CMakeLists.txt | 5 ++++- 2 files changed, 5 insertions(+), 1 deletion(-) diff --git a/src/cmake/modules/GUDHI_third_party_libraries.cmake b/src/cmake/modules/GUDHI_third_party_libraries.cmake index a931b3a1..0abe66b7 100644 --- a/src/cmake/modules/GUDHI_third_party_libraries.cmake +++ b/src/cmake/modules/GUDHI_third_party_libraries.cmake @@ -181,6 +181,7 @@ if( PYTHONINTERP_FOUND ) find_python_module("pybind11") find_python_module("torch") find_python_module("pykeops") + find_python_module("eagerpy") find_python_module_no_version("hnswlib") endif() diff --git a/src/python/CMakeLists.txt b/src/python/CMakeLists.txt index d7a6a4db..99e8b57c 100644 --- a/src/python/CMakeLists.txt +++ b/src/python/CMakeLists.txt @@ -88,6 +88,9 @@ if(PYTHONINTERP_FOUND) if(PYKEOPS_FOUND) add_gudhi_debug_info("PyKeOps version ${PYKEOPS_VERSION}") endif() + if(EAGERPY_FOUND) + add_gudhi_debug_info("EagerPy version ${EAGERPY_VERSION}") + endif() set(GUDHI_PYTHON_EXTRA_COMPILE_ARGS "${GUDHI_PYTHON_EXTRA_COMPILE_ARGS}'-DBOOST_RESULT_OF_USE_DECLTYPE', ") set(GUDHI_PYTHON_EXTRA_COMPILE_ARGS "${GUDHI_PYTHON_EXTRA_COMPILE_ARGS}'-DBOOST_ALL_NO_LIB', ") @@ -410,7 +413,7 @@ if(PYTHONINTERP_FOUND) add_gudhi_py_test(test_time_delay) # DTM - if(SCIPY_FOUND AND SKLEARN_FOUND AND TORCH_FOUND AND HNSWLIB_FOUND AND PYKEOPS_FOUND) + if(SCIPY_FOUND AND SKLEARN_FOUND AND TORCH_FOUND AND HNSWLIB_FOUND AND PYKEOPS_FOUND AND EAGERPY_FOUND) add_gudhi_py_test(test_knn) add_gudhi_py_test(test_dtm) endif() -- cgit v1.2.3 From 6d02ca0e077cc9750275abdfc024429cec0ba5a5 Mon Sep 17 00:00:00 2001 From: Marc Glisse Date: Tue, 14 Apr 2020 17:10:58 +0200 Subject: Install Ubuntu's python3-grpcio since the one from PyPI seems broken at the moment. --- Dockerfile_for_circleci_image | 1 + 1 file changed, 1 insertion(+) diff --git a/Dockerfile_for_circleci_image b/Dockerfile_for_circleci_image index 20754e2a..c2e8a8f5 100644 --- a/Dockerfile_for_circleci_image +++ b/Dockerfile_for_circleci_image @@ -43,6 +43,7 @@ RUN apt-get install -y make \ python3 \ python3-pip \ python3-tk \ + python3-grpcio \ libfreetype6-dev \ pkg-config \ curl -- cgit v1.2.3 From 9518287cfa2a62948ede2e7d17d5c9f29092e0f4 Mon Sep 17 00:00:00 2001 From: Marc Glisse Date: Tue, 14 Apr 2020 18:27:19 +0200 Subject: Doc improvements --- src/python/gudhi/point_cloud/dtm.py | 12 ++++++++++-- src/python/gudhi/point_cloud/knn.py | 11 ++++++++--- 2 files changed, 18 insertions(+), 5 deletions(-) diff --git a/src/python/gudhi/point_cloud/dtm.py b/src/python/gudhi/point_cloud/dtm.py index 38368f29..58dec536 100644 --- a/src/python/gudhi/point_cloud/dtm.py +++ b/src/python/gudhi/point_cloud/dtm.py @@ -20,7 +20,9 @@ class DistanceToMeasure: Args: k (int): number of neighbors (possibly including the point itself). q (float): order used to compute the distance to measure. Defaults to 2. - kwargs: same parameters as :class:`~gudhi.point_cloud.knn.KNearestNeighbors`, except that metric="neighbors" means that :func:`transform` expects an array with the distances to the k nearest neighbors. + kwargs: same parameters as :class:`~gudhi.point_cloud.knn.KNearestNeighbors`, except that + metric="neighbors" means that :func:`transform` expects an array with the distances + to the k nearest neighbors. """ self.k = k self.q = q @@ -44,7 +46,13 @@ class DistanceToMeasure: def transform(self, X): """ Args: - X (numpy.array): coordinates for query points, or distance matrix if metric is "precomputed", or distances to the k nearest neighbors if metric is "neighbors" (if the array has more than k columns, the remaining ones are ignored). + X (numpy.array): coordinates for query points, or distance matrix if metric is "precomputed", + or distances to the k nearest neighbors if metric is "neighbors" (if the array has more + than k columns, the remaining ones are ignored). + + Returns: + numpy.array: a 1-d array with, for each point of X, its distance to the measure defined + by the argument of :func:`fit`. """ if self.params["metric"] == "neighbors": distances = X[:, : self.k] diff --git a/src/python/gudhi/point_cloud/knn.py b/src/python/gudhi/point_cloud/knn.py index 8b3cdb46..d7cf0b2a 100644 --- a/src/python/gudhi/point_cloud/knn.py +++ b/src/python/gudhi/point_cloud/knn.py @@ -38,9 +38,9 @@ class KNearestNeighbors: sort_results (bool): if True, then distances and indices of each point are sorted on return, so that the first column contains the closest points. Otherwise, neighbors are returned in an arbitrary order. Defaults to True. - enable_autodiff (bool): if the input is a torch.tensor, jax.numpy.array or similar, this instructs - the function to compute distances in a way that works with automatic differentiation. - This is experimental and not supported for all implementations. + enable_autodiff (bool): if the input is a torch.tensor, jax.numpy.ndarray or tensorflow.Tensor, this + instructs the function to compute distances in a way that works with automatic differentiation. + This is experimental and not supported for all metrics. Defaults to False. kwargs: additional parameters are forwarded to the backends. """ self.k = k @@ -124,6 +124,11 @@ class KNearestNeighbors: """ Args: X (numpy.array): coordinates for query points, or distance matrix if metric is "precomputed". + + Returns: + numpy.array: if return_index, an array of shape (len(X), k) with the indices (in the argument + of :func:`fit`) of the k nearest neighbors to the points of X. If return_distance, an array of the + same shape with the distances to those neighbors. If both, a tuple with the two arrays, in this order. """ if self.params.get("enable_autodiff", False): # pykeops does not support autodiff for kmin yet, but when it does in the future, -- cgit v1.2.3 From acb9d5b9d1317d3d8168bc3ac46860d078abba84 Mon Sep 17 00:00:00 2001 From: Marc Glisse Date: Tue, 14 Apr 2020 20:30:29 +0200 Subject: Check that the gradient is not NaN This can easily happen with pytorch, and there is special code to avoid it. --- src/python/test/test_dtm.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/python/test/test_dtm.py b/src/python/test/test_dtm.py index db3e5df5..de74c42b 100755 --- a/src/python/test/test_dtm.py +++ b/src/python/test/test_dtm.py @@ -46,14 +46,14 @@ def test_dtm_compare_euclidean(): r6 = dtm.fit_transform(pts2) assert r6.detach().numpy() == pytest.approx(r0) r6.sum().backward() - assert pts2.grad is not None + assert pts2.grad is not None and not torch.isnan(pts2.grad).any() pts2 = torch.tensor(pts, requires_grad=True) assert pts2.grad is None dtm = DistanceToMeasure(k, implementation="ckdtree", enable_autodiff=True) r7 = dtm.fit_transform(pts2) assert r7.detach().numpy() == pytest.approx(r0) r7.sum().backward() - assert pts2.grad is not None + assert pts2.grad is not None and not torch.isnan(pts2.grad).any() def test_dtm_precomputed(): -- cgit v1.2.3 From d302e90dcf4b284e6dc8b3ab21e8a67fb9cf5179 Mon Sep 17 00:00:00 2001 From: Marc Glisse Date: Thu, 16 Apr 2020 15:40:45 +0200 Subject: Update the concept of the simplicial complex We use the key now. It wouldn't be hard to use an unordered_map, but since we usually have an unused field key... --- src/Alpha_complex/concept/SimplicialComplexForAlpha.h | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) diff --git a/src/Alpha_complex/concept/SimplicialComplexForAlpha.h b/src/Alpha_complex/concept/SimplicialComplexForAlpha.h index 1c6c3b0c..c20c3201 100644 --- a/src/Alpha_complex/concept/SimplicialComplexForAlpha.h +++ b/src/Alpha_complex/concept/SimplicialComplexForAlpha.h @@ -72,6 +72,24 @@ struct SimplicialComplexForAlpha { /** \brief Return type of an insertion of a simplex */ typedef unspecified Insertion_result_type; + + /** \name Map interface + * Conceptually a `std::unordered_map`. + * @{ */ + /** \brief Data stored for each simplex. + * + * Must be an integer type. */ + typedef unspecified Simplex_key; + /** \brief Returns a constant dummy number that is either negative, + * or at least as large as the number of simplices. Suggested value: -1. */ + Simplex_key null_key (); + /** \brief Returns the number stored for a simplex by `assign_key()`. + * + * If `assign_key()` has not been called, it must return `null_key()`. */ + Simplex_key key ( Simplex_handle sh ); + /** \brief Store a number for a simplex, which can later be retrieved with `key()`. */ + void assign_key(Simplex_handle sh, Simplex_key n); + /** @} */ }; } // namespace alpha_complex -- cgit v1.2.3 From 039382cbd951c8c94ddfd43b5ae228666a5cabed Mon Sep 17 00:00:00 2001 From: Marc Glisse Date: Thu, 16 Apr 2020 17:28:58 +0200 Subject: Fix doc of Simplex_tree about keys --- src/Simplex_tree/include/gudhi/Simplex_tree.h | 12 ++++-------- 1 file changed, 4 insertions(+), 8 deletions(-) diff --git a/src/Simplex_tree/include/gudhi/Simplex_tree.h b/src/Simplex_tree/include/gudhi/Simplex_tree.h index 430d1ac4..591a9e37 100644 --- a/src/Simplex_tree/include/gudhi/Simplex_tree.h +++ b/src/Simplex_tree/include/gudhi/Simplex_tree.h @@ -463,7 +463,7 @@ class Simplex_tree { public: /** \brief Returns the key associated to a simplex. * - * The filtration must be initialized. + * If no key has been assigned, returns `null_key()`. * \pre SimplexTreeOptions::store_key */ static Simplex_key key(Simplex_handle sh) { @@ -473,7 +473,6 @@ class Simplex_tree { /** \brief Returns the simplex that has index idx in the filtration. * * The filtration must be initialized. - * \pre SimplexTreeOptions::store_key */ Simplex_handle simplex(Simplex_key idx) const { return filtration_vect_[idx]; @@ -509,8 +508,7 @@ class Simplex_tree { return Dictionary_it(nullptr); } - /** \brief Returns a key different for all keys associated to the - * simplices of the simplicial complex. */ + /** \brief Returns a fixed number not in the interval [0, `num_simplices()`). */ static Simplex_key null_key() { return -1; } @@ -856,11 +854,9 @@ class Simplex_tree { public: /** \brief Initializes the filtrations, i.e. sort the - * simplices according to their order in the filtration and initializes all Simplex_keys. + * simplices according to their order in the filtration. * - * After calling this method, filtration_simplex_range() becomes valid, and each simplex is - * assigned a Simplex_key corresponding to its order in the filtration (from 0 to m-1 for a - * simplicial complex with m simplices). + * After calling this method, filtration_simplex_range() becomes valid. * * Will be automatically called when calling filtration_simplex_range() * if the filtration has never been initialized yet. */ -- cgit v1.2.3 From 17aaa979e4cdfe5faed9b2750d452171de4b67e1 Mon Sep 17 00:00:00 2001 From: Marc Glisse Date: Fri, 17 Apr 2020 22:13:29 +0200 Subject: Simplify distance-to-diagonal in Wasserstein --- src/python/gudhi/wasserstein/wasserstein.py | 24 +++++++++++------------- 1 file changed, 11 insertions(+), 13 deletions(-) diff --git a/src/python/gudhi/wasserstein/wasserstein.py b/src/python/gudhi/wasserstein/wasserstein.py index 35315939..5df66cf9 100644 --- a/src/python/gudhi/wasserstein/wasserstein.py +++ b/src/python/gudhi/wasserstein/wasserstein.py @@ -15,16 +15,19 @@ try: except ImportError: print("POT (Python Optimal Transport) package is not installed. Try to run $ conda install -c conda-forge pot ; or $ pip install POT") -def _proj_on_diag(X): +def _dist_to_diag(X, internal_p): ''' :param X: (n x 2) array encoding the points of a persistent diagram. - :returns: (n x 2) array encoding the (respective orthogonal) projections of the points onto the diagonal + :param internal_p: Ground metric (i.e. norm L^p). + :returns: (n) array encoding the (respective orthogonal) distances of the points to the diagonal + + .. note:: + Assumes that the points are above the diagonal. ''' - Z = (X[:,0] + X[:,1]) / 2. - return np.array([Z , Z]).T + return (X[:, 1] - X[:, 0]) * 2 ** (1.0 / internal_p - 1) -def _build_dist_matrix(X, Y, order=2., internal_p=2.): +def _build_dist_matrix(X, Y, order, internal_p): ''' :param X: (n x 2) numpy.array encoding the (points of the) first diagram. :param Y: (m x 2) numpy.array encoding the second diagram. @@ -36,16 +39,12 @@ def _build_dist_matrix(X, Y, order=2., internal_p=2.): and its orthogonal projection onto the diagonal. note also that C[n, m] = 0 (it costs nothing to move from the diagonal to the diagonal). ''' - Xdiag = _proj_on_diag(X) - Ydiag = _proj_on_diag(Y) + Cxd = _dist_to_diag(X, internal_p)**order + Cdy = _dist_to_diag(Y, internal_p)**order if np.isinf(internal_p): C = sc.cdist(X,Y, metric='chebyshev')**order - Cxd = np.linalg.norm(X - Xdiag, ord=internal_p, axis=1)**order - Cdy = np.linalg.norm(Y - Ydiag, ord=internal_p, axis=1)**order else: C = sc.cdist(X,Y, metric='minkowski', p=internal_p)**order - Cxd = np.linalg.norm(X - Xdiag, ord=internal_p, axis=1)**order - Cdy = np.linalg.norm(Y - Ydiag, ord=internal_p, axis=1)**order Cf = np.hstack((C, Cxd[:,None])) Cdy = np.append(Cdy, 0) @@ -61,8 +60,7 @@ def _perstot(X, order, internal_p): :param internal_p: Ground metric on the (upper-half) plane (i.e. norm L^p in R^2); Default value is 2 (Euclidean norm). :returns: float, the total persistence of the diagram (that is, its distance to the empty diagram). ''' - Xdiag = _proj_on_diag(X) - return (np.sum(np.linalg.norm(X - Xdiag, ord=internal_p, axis=1)**order))**(1./order) + return np.linalg.norm(_dist_to_diag(X, internal_p), ord=order) def wasserstein_distance(X, Y, matching=False, order=2., internal_p=2.): -- cgit v1.2.3 From f93c403b81b4ccb98bfad8e4ef30cdf0e7333f6c Mon Sep 17 00:00:00 2001 From: Marc Glisse Date: Sat, 18 Apr 2020 23:52:12 +0200 Subject: enable_autodiff for POT wasserstein_distance --- src/python/gudhi/wasserstein/wasserstein.py | 64 +++++++++++++++++++++++----- src/python/test/test_wasserstein_distance.py | 14 ++++-- 2 files changed, 63 insertions(+), 15 deletions(-) diff --git a/src/python/gudhi/wasserstein/wasserstein.py b/src/python/gudhi/wasserstein/wasserstein.py index 5df66cf9..9660b99b 100644 --- a/src/python/gudhi/wasserstein/wasserstein.py +++ b/src/python/gudhi/wasserstein/wasserstein.py @@ -53,17 +53,30 @@ def _build_dist_matrix(X, Y, order, internal_p): return Cf -def _perstot(X, order, internal_p): +def _perstot_autodiff(X, order, internal_p): + ''' + Version of _perstot that works on eagerpy tensors. + ''' + return _dist_to_diag(X, internal_p).norms.lp(order) + +def _perstot(X, order, internal_p, enable_autodiff): ''' :param X: (n x 2) numpy.array (points of a given diagram). :param order: exponent for Wasserstein. Default value is 2. :param internal_p: Ground metric on the (upper-half) plane (i.e. norm L^p in R^2); Default value is 2 (Euclidean norm). + :param enable_autodiff: If X is torch.tensor, tensorflow.Tensor or jax.numpy.ndarray, make the computation + transparent to automatic differentiation. + :type enable_autodiff: bool :returns: float, the total persistence of the diagram (that is, its distance to the empty diagram). ''' - return np.linalg.norm(_dist_to_diag(X, internal_p), ord=order) + if enable_autodiff: + import eagerpy as ep + return _perstot_autodiff(ep.astensor(X), order, internal_p).raw + else: + return np.linalg.norm(_dist_to_diag(X, internal_p), ord=order) -def wasserstein_distance(X, Y, matching=False, order=2., internal_p=2.): +def wasserstein_distance(X, Y, matching=False, order=2., internal_p=2., enable_autodiff=False): ''' :param X: (n x 2) numpy.array encoding the (finite points of the) first diagram. Must not contain essential points (i.e. with infinite coordinate). @@ -74,6 +87,9 @@ def wasserstein_distance(X, Y, matching=False, order=2., internal_p=2.): :param order: exponent for Wasserstein; Default value is 2. :param internal_p: Ground metric on the (upper-half) plane (i.e. norm L^p in R^2); Default value is 2 (Euclidean norm). + :param enable_autodiff: If X and Y are torch.tensor, tensorflow.Tensor or jax.numpy.ndarray, make the computation + transparent to automatic differentiation. + :type enable_autodiff: bool :returns: the Wasserstein distance of order q (1 <= q < infinity) between persistence diagrams with respect to the internal_p-norm as ground metric. If matching is set to True, also returns the optimal matching between X and Y. @@ -82,23 +98,30 @@ def wasserstein_distance(X, Y, matching=False, order=2., internal_p=2.): m = len(Y) # handle empty diagrams - if X.size == 0: - if Y.size == 0: + if n == 0: + if m == 0: if not matching: + # What if enable_autodiff? return 0. else: return 0., np.array([]) else: if not matching: - return _perstot(Y, order, internal_p) + return _perstot(Y, order, internal_p, enable_autodiff) else: - return _perstot(Y, order, internal_p), np.array([[-1, j] for j in range(m)]) - elif Y.size == 0: + return _perstot(Y, order, internal_p, enable_autodiff), np.array([[-1, j] for j in range(m)]) + elif m == 0: if not matching: - return _perstot(X, order, internal_p) + return _perstot(X, order, internal_p, enable_autodiff) else: - return _perstot(X, order, internal_p), np.array([[i, -1] for i in range(n)]) - + return _perstot(X, order, internal_p, enable_autodiff), np.array([[i, -1] for i in range(n)]) + + if enable_autodiff: + import eagerpy as ep + X_orig = ep.astensor(X) + Y_orig = ep.astensor(Y) + X = X_orig.numpy() + Y = Y_orig.numpy() M = _build_dist_matrix(X, Y, order=order, internal_p=internal_p) a = np.ones(n+1) # weight vector of the input diagram. Uniform here. a[-1] = m @@ -106,6 +129,7 @@ def wasserstein_distance(X, Y, matching=False, order=2., internal_p=2.): b[-1] = n if matching: + assert not enable_autodiff, "matching and enable_autodiff are currently incompatible" P = ot.emd(a=a,b=b,M=M, numItermax=2000000) ot_cost = np.sum(np.multiply(P,M)) P[-1, -1] = 0 # Remove matching corresponding to the diagonal @@ -115,6 +139,24 @@ def wasserstein_distance(X, Y, matching=False, order=2., internal_p=2.): match[:,1][match[:,1] >= m] = -1 return ot_cost ** (1./order) , match + if enable_autodiff: + P = ot.emd(a=a,b=b,M=M, numItermax=2000000) + pairs = np.argwhere(P[:-1, :-1]) + diag2 = np.nonzero(P[-1, :-1]) + diag1 = np.nonzero(P[:-1, -1]) + dists = [] + # empty arrays are not handled properly by the helpers, so we avoid calling them + if len(pairs): + dists.append((Y_orig[pairs[:, 1]] - X_orig[pairs[:, 0]]).norms.lp(internal_p, axis=-1).norms.lp(order)) + if len(diag1): + dists.append(_perstot_autodiff(X_orig[diag1], order, internal_p)) + if len(diag2): + dists.append(_perstot_autodiff(Y_orig[diag2], order, internal_p)) + dists = [ dist.reshape(1) for dist in dists ] + return ep.concatenate(dists).norms.lp(order) + # Should just compute the L^order norm manually? + # We can also concatenate the 3 vectors to compute just one norm. + # Comptuation of the otcost using the ot.emd2 library. # Note: it is the Wasserstein distance to the power q. # The default numItermax=100000 is not sufficient for some examples with 5000 points, what is a good value? diff --git a/src/python/test/test_wasserstein_distance.py b/src/python/test/test_wasserstein_distance.py index 7e0d0f5f..5bec5bd3 100755 --- a/src/python/test/test_wasserstein_distance.py +++ b/src/python/test/test_wasserstein_distance.py @@ -73,14 +73,20 @@ def _basic_wasserstein(wasserstein_distance, delta, test_infinity=True, test_mat -def hera_wrap(delta): +def hera_wrap(**extra): def fun(*kargs,**kwargs): - return hera(*kargs,**kwargs,delta=delta) + return hera(*kargs,**kwargs,**extra) + return fun + +def pot_wrap(**extra): + def fun(*kargs,**kwargs): + return pot(*kargs,**kwargs,**extra) return fun def test_wasserstein_distance_pot(): _basic_wasserstein(pot, 1e-15, test_infinity=False, test_matching=True) + _basic_wasserstein(pot_wrap(enable_autodiff=True), 1e-15, test_infinity=False, test_matching=False) def test_wasserstein_distance_hera(): - _basic_wasserstein(hera_wrap(1e-12), 1e-12, test_matching=False) - _basic_wasserstein(hera_wrap(.1), .1, test_matching=False) + _basic_wasserstein(hera_wrap(delta=1e-12), 1e-12, test_matching=False) + _basic_wasserstein(hera_wrap(delta=.1), .1, test_matching=False) -- cgit v1.2.3 From b2a9ba18ce33778abdd9f5032af4bfff04e8bbd2 Mon Sep 17 00:00:00 2001 From: Marc Glisse Date: Sun, 19 Apr 2020 09:06:08 +0200 Subject: Unwrap the result --- src/python/gudhi/wasserstein/wasserstein.py | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/src/python/gudhi/wasserstein/wasserstein.py b/src/python/gudhi/wasserstein/wasserstein.py index 9660b99b..f0c82962 100644 --- a/src/python/gudhi/wasserstein/wasserstein.py +++ b/src/python/gudhi/wasserstein/wasserstein.py @@ -71,6 +71,7 @@ def _perstot(X, order, internal_p, enable_autodiff): ''' if enable_autodiff: import eagerpy as ep + return _perstot_autodiff(ep.astensor(X), order, internal_p).raw else: return np.linalg.norm(_dist_to_diag(X, internal_p), ord=order) @@ -118,6 +119,7 @@ def wasserstein_distance(X, Y, matching=False, order=2., internal_p=2., enable_a if enable_autodiff: import eagerpy as ep + X_orig = ep.astensor(X) Y_orig = ep.astensor(Y) X = X_orig.numpy() @@ -140,10 +142,10 @@ def wasserstein_distance(X, Y, matching=False, order=2., internal_p=2., enable_a return ot_cost ** (1./order) , match if enable_autodiff: - P = ot.emd(a=a,b=b,M=M, numItermax=2000000) + P = ot.emd(a=a, b=b, M=M, numItermax=2000000) pairs = np.argwhere(P[:-1, :-1]) - diag2 = np.nonzero(P[-1, :-1]) diag1 = np.nonzero(P[:-1, -1]) + diag2 = np.nonzero(P[-1, :-1]) dists = [] # empty arrays are not handled properly by the helpers, so we avoid calling them if len(pairs): @@ -152,8 +154,8 @@ def wasserstein_distance(X, Y, matching=False, order=2., internal_p=2., enable_a dists.append(_perstot_autodiff(X_orig[diag1], order, internal_p)) if len(diag2): dists.append(_perstot_autodiff(Y_orig[diag2], order, internal_p)) - dists = [ dist.reshape(1) for dist in dists ] - return ep.concatenate(dists).norms.lp(order) + dists = [dist.reshape(1) for dist in dists] + return ep.concatenate(dists).norms.lp(order).raw # Should just compute the L^order norm manually? # We can also concatenate the 3 vectors to compute just one norm. -- cgit v1.2.3 From 1086b8cad7c1ea2a02742dfc44aef036a674f5d3 Mon Sep 17 00:00:00 2001 From: Marc Glisse Date: Sun, 19 Apr 2020 12:17:42 +0200 Subject: Test gradient --- src/python/test/test_wasserstein_distance.py | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/src/python/test/test_wasserstein_distance.py b/src/python/test/test_wasserstein_distance.py index 5bec5bd3..c6d6b346 100755 --- a/src/python/test/test_wasserstein_distance.py +++ b/src/python/test/test_wasserstein_distance.py @@ -90,3 +90,16 @@ def test_wasserstein_distance_pot(): def test_wasserstein_distance_hera(): _basic_wasserstein(hera_wrap(delta=1e-12), 1e-12, test_matching=False) _basic_wasserstein(hera_wrap(delta=.1), .1, test_matching=False) + +def test_wasserstein_distance_grad(): + import torch + + diag1 = torch.tensor([[2.7, 3.7], [9.6, 14.0], [34.2, 34.974]], requires_grad=True) + diag2 = torch.tensor([[2.8, 4.45], [9.5, 14.1]], requires_grad=True) + diag3 = torch.tensor([[2.8, 4.45], [9.5, 14.1]], requires_grad=True) + assert diag1.grad is None and diag2.grad is None and diag3.grad is None + dist1 = pot(diag1, diag2, internal_p=2, order=2, enable_autodiff=True) + dist2 = pot(diag3, torch.tensor([]), internal_p=2, order=2, enable_autodiff=True) + dist1.backward() + dist2.backward() + assert not torch.isnan(diag1.grad).any() and not torch.isnan(diag2.grad).any() and not torch.isnan(diag3.grad).any() -- cgit v1.2.3 From 8d9611206603f4f7506fe77a0273c73c9d67716b Mon Sep 17 00:00:00 2001 From: Marc Glisse Date: Sun, 19 Apr 2020 12:30:35 +0200 Subject: Drop redundant test torch.isnan(None) raises an exception anyway --- src/python/test/test_dtm.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/python/test/test_dtm.py b/src/python/test/test_dtm.py index de74c42b..859189fa 100755 --- a/src/python/test/test_dtm.py +++ b/src/python/test/test_dtm.py @@ -46,14 +46,14 @@ def test_dtm_compare_euclidean(): r6 = dtm.fit_transform(pts2) assert r6.detach().numpy() == pytest.approx(r0) r6.sum().backward() - assert pts2.grad is not None and not torch.isnan(pts2.grad).any() + assert not torch.isnan(pts2.grad).any() pts2 = torch.tensor(pts, requires_grad=True) assert pts2.grad is None dtm = DistanceToMeasure(k, implementation="ckdtree", enable_autodiff=True) r7 = dtm.fit_transform(pts2) assert r7.detach().numpy() == pytest.approx(r0) r7.sum().backward() - assert pts2.grad is not None and not torch.isnan(pts2.grad).any() + assert not torch.isnan(pts2.grad).any() def test_dtm_precomputed(): -- cgit v1.2.3 From 1fc55e54ed2f24969a691914edee642f97142fa9 Mon Sep 17 00:00:00 2001 From: Marc Glisse Date: Sun, 19 Apr 2020 13:43:23 +0200 Subject: Test comparison with persistence_pairs() --- src/python/test/test_simplex_generators.py | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/src/python/test/test_simplex_generators.py b/src/python/test/test_simplex_generators.py index e3bdc094..8a9b4844 100755 --- a/src/python/test/test_simplex_generators.py +++ b/src/python/test/test_simplex_generators.py @@ -24,6 +24,13 @@ def test_flag_generators(): assert np.array_equal(g[2], [0, 4]) assert len(g[3]) == 1 assert np.array_equal(g[3][0], [[7, 6]]) + # Compare trivial cases (where the simplex is the generator) with persistence_pairs. + # This still makes assumptions on the order of vertices in a simplex and could be more robust. + pairs = st.persistence_pairs() + assert {tuple(i) for i in g[0]} == {(i[0][0],) + tuple(i[1]) for i in pairs if len(i[0]) == 1 and len(i[1]) != 0} + assert {(i[0], i[1]) for i in g[1][0]} == {tuple(i[0]) for i in pairs if len(i[0]) == 2 and len(i[1]) != 0} + assert set(g[2]) == {i[0][0] for i in pairs if len(i[0]) == 1 and len(i[1]) == 0} + assert {(i[0], i[1]) for i in g[3][0]} == {tuple(i[0]) for i in pairs if len(i[0]) == 2 and len(i[1]) == 0} def test_lower_star_generators(): -- cgit v1.2.3 From 1c1a99074049e4ff04fa28e7d6e1b6fc2067397a Mon Sep 17 00:00:00 2001 From: Marc Glisse Date: Mon, 20 Apr 2020 10:38:41 +0200 Subject: Add __license__ --- src/python/gudhi/point_cloud/dtm.py | 4 ++++ src/python/gudhi/point_cloud/knn.py | 8 +++++++- 2 files changed, 11 insertions(+), 1 deletion(-) diff --git a/src/python/gudhi/point_cloud/dtm.py b/src/python/gudhi/point_cloud/dtm.py index 58dec536..13e16d24 100644 --- a/src/python/gudhi/point_cloud/dtm.py +++ b/src/python/gudhi/point_cloud/dtm.py @@ -9,6 +9,10 @@ from .knn import KNearestNeighbors +__author__ = "Marc Glisse" +__copyright__ = "Copyright (C) 2020 Inria" +__license__ = "MIT" + class DistanceToMeasure: """ diff --git a/src/python/gudhi/point_cloud/knn.py b/src/python/gudhi/point_cloud/knn.py index d7cf0b2a..4017e498 100644 --- a/src/python/gudhi/point_cloud/knn.py +++ b/src/python/gudhi/point_cloud/knn.py @@ -11,6 +11,10 @@ import numpy # TODO: https://github.com/facebookresearch/faiss +__author__ = "Marc Glisse" +__copyright__ = "Copyright (C) 2020 Inria" +__license__ = "MIT" + class KNearestNeighbors: """ @@ -156,7 +160,9 @@ class KNearestNeighbors: assert self.metric == "minkowski" p = self.params["p"] Y = ep.astensor(self.ref_points) - neighbor_pts = Y[neighbors,] + neighbor_pts = Y[ + neighbors, + ] diff = neighbor_pts - X[:, None, :] if isinstance(diff, ep.PyTorchTensor): # https://github.com/jonasrauber/eagerpy/issues/6 -- cgit v1.2.3 From 3a9105e0d3bea5cc64610b7c0c3fb15f0e00bb9d Mon Sep 17 00:00:00 2001 From: Marc Glisse Date: Mon, 20 Apr 2020 11:37:44 +0200 Subject: Reintroduce _proj_on_diag, with a unit test --- src/python/gudhi/wasserstein/wasserstein.py | 11 +++++++++++ src/python/test/test_wasserstein_distance.py | 7 +++++++ 2 files changed, 18 insertions(+) diff --git a/src/python/gudhi/wasserstein/wasserstein.py b/src/python/gudhi/wasserstein/wasserstein.py index 5df66cf9..efc851a0 100644 --- a/src/python/gudhi/wasserstein/wasserstein.py +++ b/src/python/gudhi/wasserstein/wasserstein.py @@ -15,6 +15,17 @@ try: except ImportError: print("POT (Python Optimal Transport) package is not installed. Try to run $ conda install -c conda-forge pot ; or $ pip install POT") + +# Currently unused, but Théo says it is likely to be used again. +def _proj_on_diag(X): + ''' + :param X: (n x 2) array encoding the points of a persistent diagram. + :returns: (n x 2) array encoding the (respective orthogonal) projections of the points onto the diagonal + ''' + Z = (X[:,0] + X[:,1]) / 2. + return np.array([Z , Z]).T + + def _dist_to_diag(X, internal_p): ''' :param X: (n x 2) array encoding the points of a persistent diagram. diff --git a/src/python/test/test_wasserstein_distance.py b/src/python/test/test_wasserstein_distance.py index 7e0d0f5f..1a4acc1d 100755 --- a/src/python/test/test_wasserstein_distance.py +++ b/src/python/test/test_wasserstein_distance.py @@ -8,6 +8,7 @@ - YYYY/MM Author: Description of the modification """ +from gudhi.wasserstein.wasserstein import _proj_on_diag from gudhi.wasserstein import wasserstein_distance as pot from gudhi.hera import wasserstein_distance as hera import numpy as np @@ -17,6 +18,12 @@ __author__ = "Theo Lacombe" __copyright__ = "Copyright (C) 2019 Inria" __license__ = "MIT" +def test_proj_on_diag(): + dgm = np.array([[1., 1.], [1., 2.], [3., 5.]]) + assert np.array_equal(_proj_on_diag(dgm), [[1., 1.], [1.5, 1.5], [4., 4.]]) + empty = np.empty((0, 2)) + assert np.array_equal(_proj_on_diag(empty), empty) + def _basic_wasserstein(wasserstein_distance, delta, test_infinity=True, test_matching=True): diag1 = np.array([[2.7, 3.7], [9.6, 14.0], [34.2, 34.974]]) diag2 = np.array([[2.8, 4.45], [9.5, 14.1]]) -- cgit v1.2.3 From 9ef7ba65367ab2ff92bf66b1b8166c5990530b76 Mon Sep 17 00:00:00 2001 From: Marc Glisse Date: Mon, 20 Apr 2020 12:16:15 +0200 Subject: Explicitly pass sort_results=True on some tests --- src/python/test/test_knn.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/python/test/test_knn.py b/src/python/test/test_knn.py index 415c9d48..a87ec212 100755 --- a/src/python/test/test_knn.py +++ b/src/python/test/test_knn.py @@ -54,12 +54,12 @@ def test_knn_explicit(): knn = KNearestNeighbors(2, metric="precomputed", return_index=True, return_distance=False) r = knn.fit_transform(dist) assert np.array_equal(r, [[0, 1], [1, 0], [2, 0]]) - knn = KNearestNeighbors(2, metric="precomputed", return_index=True, return_distance=True) + knn = KNearestNeighbors(2, metric="precomputed", return_index=True, return_distance=True, sort_results=True) r = knn.fit_transform(dist) assert np.array_equal(r[0], [[0, 1], [1, 0], [2, 0]]) assert np.array_equal(r[1], [[0, 3], [0, 1], [0, 1]]) # Second time in parallel - knn = KNearestNeighbors(2, metric="precomputed", return_index=True, return_distance=False, n_jobs=2) + knn = KNearestNeighbors(2, metric="precomputed", return_index=True, return_distance=False, n_jobs=2, sort_results=True) r = knn.fit_transform(dist) assert np.array_equal(r, [[0, 1], [1, 0], [2, 0]]) knn = KNearestNeighbors(2, metric="precomputed", return_index=True, return_distance=True, n_jobs=2) -- cgit v1.2.3 From bac284bf7f65c40f03ec8e47316d4f0fd0059c91 Mon Sep 17 00:00:00 2001 From: Marc Glisse Date: Mon, 20 Apr 2020 19:12:35 +0200 Subject: Check that dependencies are present before testing --- src/python/CMakeLists.txt | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/src/python/CMakeLists.txt b/src/python/CMakeLists.txt index 10dcd161..5ab63e5d 100644 --- a/src/python/CMakeLists.txt +++ b/src/python/CMakeLists.txt @@ -401,7 +401,9 @@ if(PYTHONINTERP_FOUND) # Wasserstein if(OT_FOUND AND PYBIND11_FOUND) - add_gudhi_py_test(test_wasserstein_distance) + if(TORCH_FOUND AND EAGERPY_FOUND) + add_gudhi_py_test(test_wasserstein_distance) + endif() add_gudhi_py_test(test_wasserstein_barycenter) endif() -- cgit v1.2.3 From 4ad650bc3184f57e1dda91f6b0a6358830f0562f Mon Sep 17 00:00:00 2001 From: Marc Glisse Date: Mon, 20 Apr 2020 19:42:34 +0200 Subject: Drop one comment --- src/python/gudhi/wasserstein/wasserstein.py | 1 - 1 file changed, 1 deletion(-) diff --git a/src/python/gudhi/wasserstein/wasserstein.py b/src/python/gudhi/wasserstein/wasserstein.py index 5b61d176..42c8dc2d 100644 --- a/src/python/gudhi/wasserstein/wasserstein.py +++ b/src/python/gudhi/wasserstein/wasserstein.py @@ -167,7 +167,6 @@ def wasserstein_distance(X, Y, matching=False, order=2., internal_p=2., enable_a dists.append(_perstot_autodiff(Y_orig[diag2], order, internal_p)) dists = [dist.reshape(1) for dist in dists] return ep.concatenate(dists).norms.lp(order).raw - # Should just compute the L^order norm manually? # We can also concatenate the 3 vectors to compute just one norm. # Comptuation of the otcost using the ot.emd2 library. -- cgit v1.2.3 From 70fb88a668c2cad837cbdea4863a136a1efc71c3 Mon Sep 17 00:00:00 2001 From: Marc Glisse Date: Mon, 20 Apr 2020 20:39:40 +0200 Subject: Random CircleCI tweak --- .circleci/config.yml | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/.circleci/config.yml b/.circleci/config.yml index 4f86cb12..40ddc08e 100644 --- a/.circleci/config.yml +++ b/.circleci/config.yml @@ -45,7 +45,6 @@ jobs: python: docker: - image: gudhi/ci_for_gudhi:latest - parallelism: 4 steps: - checkout - run: @@ -62,12 +61,12 @@ jobs: cd build; cmake -DCMAKE_BUILD_TYPE=Release -DWITH_GUDHI_EXAMPLE=OFF -DWITH_GUDHI_UTILITIES=OFF -DWITH_GUDHI_PYTHON=ON -DPython_ADDITIONAL_VERSIONS=3 ..; cd python; - python3 setup.py build_ext -j 4 --inplace; + python3 setup.py build_ext -j 2 --inplace; make sphinx; cp -R sphinx /tmp/sphinx; python3 setup.py install; python3 setup.py clean --all; - ctest -j 4 --output-on-failure; + ctest -j 2 --output-on-failure; - store_artifacts: path: /tmp/sphinx -- cgit v1.2.3 From 3e713cee177e10536ae8fc231e56fa04769a35ee Mon Sep 17 00:00:00 2001 From: ROUVREAU Vincent Date: Mon, 20 Apr 2020 22:06:38 +0200 Subject: Fix #279 --- src/python/CMakeLists.txt | 129 +++++++++++++++++++++++----------------------- 1 file changed, 65 insertions(+), 64 deletions(-) diff --git a/src/python/CMakeLists.txt b/src/python/CMakeLists.txt index 10dcd161..055d5b23 100644 --- a/src/python/CMakeLists.txt +++ b/src/python/CMakeLists.txt @@ -242,6 +242,71 @@ if(PYTHONINTERP_FOUND) install(CODE "execute_process(COMMAND ${PYTHON_EXECUTABLE} ${CMAKE_CURRENT_BINARY_DIR}/setup.py install)") + # Documentation generation is available through sphinx - requires all modules + # Make it first as sphinx test is by far the longest test which is nice when testing in parallel + if(SPHINX_PATH) + if(MATPLOTLIB_FOUND) + if(NUMPY_FOUND) + if(SCIPY_FOUND) + if(SKLEARN_FOUND) + if(OT_FOUND) + if(PYBIND11_FOUND) + if(NOT CGAL_WITH_EIGEN3_VERSION VERSION_LESS 4.11.0) + set (GUDHI_SPHINX_MESSAGE "Generating API documentation with Sphinx in ${CMAKE_CURRENT_BINARY_DIR}/sphinx/") + # User warning - Sphinx is a static pages generator, and configured to work fine with user_version + # Images and biblio warnings because not found on developper version + if (GUDHI_PYTHON_PATH STREQUAL "src/python") + set (GUDHI_SPHINX_MESSAGE "${GUDHI_SPHINX_MESSAGE} \n WARNING : Sphinx is configured for user version, you run it on developper version. Images and biblio will miss") + endif() + # sphinx target requires gudhi.so, because conf.py reads gudhi version from it + add_custom_target(sphinx + WORKING_DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR}/doc + COMMAND ${CMAKE_COMMAND} -E env "PYTHONPATH=${CMAKE_CURRENT_BINARY_DIR}" + ${SPHINX_PATH} -b html ${CMAKE_CURRENT_SOURCE_DIR}/doc ${CMAKE_CURRENT_BINARY_DIR}/sphinx + DEPENDS "${CMAKE_CURRENT_BINARY_DIR}/gudhi.so" + COMMENT "${GUDHI_SPHINX_MESSAGE}" VERBATIM) + + add_test(NAME sphinx_py_test + WORKING_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR} + COMMAND ${CMAKE_COMMAND} -E env "PYTHONPATH=${CMAKE_CURRENT_BINARY_DIR}" + ${SPHINX_PATH} -b doctest ${CMAKE_CURRENT_SOURCE_DIR}/doc ${CMAKE_CURRENT_BINARY_DIR}/doctest) + + # Set missing or not modules + set(GUDHI_MODULES ${GUDHI_MODULES} "python-documentation" CACHE INTERNAL "GUDHI_MODULES") + else(NOT CGAL_WITH_EIGEN3_VERSION VERSION_LESS 4.11.0) + message("++ Python documentation module will not be compiled because it requires a Eigen3 and CGAL version >= 4.11.0") + set(GUDHI_MISSING_MODULES ${GUDHI_MISSING_MODULES} "python-documentation" CACHE INTERNAL "GUDHI_MISSING_MODULES") + endif(NOT CGAL_WITH_EIGEN3_VERSION VERSION_LESS 4.11.0) + else(PYBIND11_FOUND) + message("++ Python documentation module will not be compiled because pybind11 was not found") + set(GUDHI_MISSING_MODULES ${GUDHI_MISSING_MODULES} "python-documentation" CACHE INTERNAL "GUDHI_MISSING_MODULES") + endif(PYBIND11_FOUND) + else(OT_FOUND) + message("++ Python documentation module will not be compiled because POT was not found") + set(GUDHI_MISSING_MODULES ${GUDHI_MISSING_MODULES} "python-documentation" CACHE INTERNAL "GUDHI_MISSING_MODULES") + endif(OT_FOUND) + else(SKLEARN_FOUND) + message("++ Python documentation module will not be compiled because scikit-learn was not found") + set(GUDHI_MISSING_MODULES ${GUDHI_MISSING_MODULES} "python-documentation" CACHE INTERNAL "GUDHI_MISSING_MODULES") + endif(SKLEARN_FOUND) + else(SCIPY_FOUND) + message("++ Python documentation module will not be compiled because scipy was not found") + set(GUDHI_MISSING_MODULES ${GUDHI_MISSING_MODULES} "python-documentation" CACHE INTERNAL "GUDHI_MISSING_MODULES") + endif(SCIPY_FOUND) + else(NUMPY_FOUND) + message("++ Python documentation module will not be compiled because numpy was not found") + set(GUDHI_MISSING_MODULES ${GUDHI_MISSING_MODULES} "python-documentation" CACHE INTERNAL "GUDHI_MISSING_MODULES") + endif(NUMPY_FOUND) + else(MATPLOTLIB_FOUND) + message("++ Python documentation module will not be compiled because matplotlib was not found") + set(GUDHI_MISSING_MODULES ${GUDHI_MISSING_MODULES} "python-documentation" CACHE INTERNAL "GUDHI_MISSING_MODULES") + endif(MATPLOTLIB_FOUND) + else(SPHINX_PATH) + message("++ Python documentation module will not be compiled because sphinx and sphinxcontrib-bibtex were not found") + set(GUDHI_MISSING_MODULES ${GUDHI_MISSING_MODULES} "python-documentation" CACHE INTERNAL "GUDHI_MISSING_MODULES") + endif(SPHINX_PATH) + + # Test examples if (NOT CGAL_WITH_EIGEN3_VERSION VERSION_LESS 4.11.0) # Bottleneck and Alpha @@ -419,70 +484,6 @@ if(PYTHONINTERP_FOUND) add_gudhi_py_test(test_dtm) endif() - # Documentation generation is available through sphinx - requires all modules - if(SPHINX_PATH) - if(MATPLOTLIB_FOUND) - if(NUMPY_FOUND) - if(SCIPY_FOUND) - if(SKLEARN_FOUND) - if(OT_FOUND) - if(PYBIND11_FOUND) - if(NOT CGAL_WITH_EIGEN3_VERSION VERSION_LESS 4.11.0) - set (GUDHI_SPHINX_MESSAGE "Generating API documentation with Sphinx in ${CMAKE_CURRENT_BINARY_DIR}/sphinx/") - # User warning - Sphinx is a static pages generator, and configured to work fine with user_version - # Images and biblio warnings because not found on developper version - if (GUDHI_PYTHON_PATH STREQUAL "src/python") - set (GUDHI_SPHINX_MESSAGE "${GUDHI_SPHINX_MESSAGE} \n WARNING : Sphinx is configured for user version, you run it on developper version. Images and biblio will miss") - endif() - # sphinx target requires gudhi.so, because conf.py reads gudhi version from it - add_custom_target(sphinx - WORKING_DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR}/doc - COMMAND ${CMAKE_COMMAND} -E env "PYTHONPATH=${CMAKE_CURRENT_BINARY_DIR}" - ${SPHINX_PATH} -b html ${CMAKE_CURRENT_SOURCE_DIR}/doc ${CMAKE_CURRENT_BINARY_DIR}/sphinx - DEPENDS "${CMAKE_CURRENT_BINARY_DIR}/gudhi.so" - COMMENT "${GUDHI_SPHINX_MESSAGE}" VERBATIM) - - add_test(NAME sphinx_py_test - WORKING_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR} - COMMAND ${CMAKE_COMMAND} -E env "PYTHONPATH=${CMAKE_CURRENT_BINARY_DIR}" - ${SPHINX_PATH} -b doctest ${CMAKE_CURRENT_SOURCE_DIR}/doc ${CMAKE_CURRENT_BINARY_DIR}/doctest) - - # Set missing or not modules - set(GUDHI_MODULES ${GUDHI_MODULES} "python-documentation" CACHE INTERNAL "GUDHI_MODULES") - else(NOT CGAL_WITH_EIGEN3_VERSION VERSION_LESS 4.11.0) - message("++ Python documentation module will not be compiled because it requires a Eigen3 and CGAL version >= 4.11.0") - set(GUDHI_MISSING_MODULES ${GUDHI_MISSING_MODULES} "python-documentation" CACHE INTERNAL "GUDHI_MISSING_MODULES") - endif(NOT CGAL_WITH_EIGEN3_VERSION VERSION_LESS 4.11.0) - else(PYBIND11_FOUND) - message("++ Python documentation module will not be compiled because pybind11 was not found") - set(GUDHI_MISSING_MODULES ${GUDHI_MISSING_MODULES} "python-documentation" CACHE INTERNAL "GUDHI_MISSING_MODULES") - endif(PYBIND11_FOUND) - else(OT_FOUND) - message("++ Python documentation module will not be compiled because POT was not found") - set(GUDHI_MISSING_MODULES ${GUDHI_MISSING_MODULES} "python-documentation" CACHE INTERNAL "GUDHI_MISSING_MODULES") - endif(OT_FOUND) - else(SKLEARN_FOUND) - message("++ Python documentation module will not be compiled because scikit-learn was not found") - set(GUDHI_MISSING_MODULES ${GUDHI_MISSING_MODULES} "python-documentation" CACHE INTERNAL "GUDHI_MISSING_MODULES") - endif(SKLEARN_FOUND) - else(SCIPY_FOUND) - message("++ Python documentation module will not be compiled because scipy was not found") - set(GUDHI_MISSING_MODULES ${GUDHI_MISSING_MODULES} "python-documentation" CACHE INTERNAL "GUDHI_MISSING_MODULES") - endif(SCIPY_FOUND) - else(NUMPY_FOUND) - message("++ Python documentation module will not be compiled because numpy was not found") - set(GUDHI_MISSING_MODULES ${GUDHI_MISSING_MODULES} "python-documentation" CACHE INTERNAL "GUDHI_MISSING_MODULES") - endif(NUMPY_FOUND) - else(MATPLOTLIB_FOUND) - message("++ Python documentation module will not be compiled because matplotlib was not found") - set(GUDHI_MISSING_MODULES ${GUDHI_MISSING_MODULES} "python-documentation" CACHE INTERNAL "GUDHI_MISSING_MODULES") - endif(MATPLOTLIB_FOUND) - else(SPHINX_PATH) - message("++ Python documentation module will not be compiled because sphinx and sphinxcontrib-bibtex were not found") - set(GUDHI_MISSING_MODULES ${GUDHI_MISSING_MODULES} "python-documentation" CACHE INTERNAL "GUDHI_MISSING_MODULES") - endif(SPHINX_PATH) - - # Set missing or not modules set(GUDHI_MODULES ${GUDHI_MODULES} "python" CACHE INTERNAL "GUDHI_MODULES") else(CYTHON_FOUND) -- cgit v1.2.3 From aa90b98bee73ab2aaf39ef91f39f5a750168e5d4 Mon Sep 17 00:00:00 2001 From: Marc Glisse Date: Wed, 22 Apr 2020 13:04:15 +0200 Subject: Document several optional dependencies of knn --- src/python/doc/installation.rst | 28 ++++++++++++++++++++++++++++ src/python/gudhi/point_cloud/knn.py | 3 ++- 2 files changed, 30 insertions(+), 1 deletion(-) diff --git a/src/python/doc/installation.rst b/src/python/doc/installation.rst index 48425d5e..09a843d5 100644 --- a/src/python/doc/installation.rst +++ b/src/python/doc/installation.rst @@ -211,6 +211,14 @@ The following examples requires CGAL version ≥ 4.11.0: * :download:`euclidean_strong_witness_complex_diagram_persistence_from_off_file_example.py <../example/euclidean_strong_witness_complex_diagram_persistence_from_off_file_example.py>` * :download:`euclidean_witness_complex_diagram_persistence_from_off_file_example.py <../example/euclidean_witness_complex_diagram_persistence_from_off_file_example.py>` +EagerPy +======= + +Some Python functions can handle automatic differentiation (possibly only when +a flag `enable_autodiff=True` is used). In order to reduce code duplication, we +use `EagerPy `_ which wraps arrays from +PyTorch, TensorFlow and JAX in a common interface. + Eigen ===== @@ -229,6 +237,13 @@ The following examples require `Eigen `_ version * :download:`euclidean_strong_witness_complex_diagram_persistence_from_off_file_example.py <../example/euclidean_strong_witness_complex_diagram_persistence_from_off_file_example.py>` * :download:`euclidean_witness_complex_diagram_persistence_from_off_file_example.py <../example/euclidean_witness_complex_diagram_persistence_from_off_file_example.py>` +Hnswlib +======= + +:class:`~gudhi.point_cloud.knn.KNearestNeighbors` can use the Python package +`Hnswlib `_ as a backend if explicitly +requested, to speed-up queries. + Matplotlib ========== @@ -251,6 +266,13 @@ The following examples require the `Matplotlib `_: * :download:`euclidean_strong_witness_complex_diagram_persistence_from_off_file_example.py <../example/euclidean_strong_witness_complex_diagram_persistence_from_off_file_example.py>` * :download:`euclidean_witness_complex_diagram_persistence_from_off_file_example.py <../example/euclidean_witness_complex_diagram_persistence_from_off_file_example.py>` +PyKeOps +======= + +:class:`~gudhi.point_cloud.knn.KNearestNeighbors` can use the Python package +`PyKeOps `_ as a backend if +explicitly requested, to speed-up queries using a GPU. + Python Optimal Transport ======================== @@ -258,6 +280,12 @@ The :doc:`Wasserstein distance ` module requires `POT `_, a library that provides several solvers for optimization problems related to Optimal Transport. +PyTorch +======= + +`PyTorch `_ is currently only used as a dependency of +`PyKeOps`_, and in some tests. + Scikit-learn ============ diff --git a/src/python/gudhi/point_cloud/knn.py b/src/python/gudhi/point_cloud/knn.py index 4017e498..07553d6d 100644 --- a/src/python/gudhi/point_cloud/knn.py +++ b/src/python/gudhi/point_cloud/knn.py @@ -44,7 +44,8 @@ class KNearestNeighbors: Otherwise, neighbors are returned in an arbitrary order. Defaults to True. enable_autodiff (bool): if the input is a torch.tensor, jax.numpy.ndarray or tensorflow.Tensor, this instructs the function to compute distances in a way that works with automatic differentiation. - This is experimental and not supported for all metrics. Defaults to False. + This is experimental, not supported for all metrics, and requires the package EagerPy. + Defaults to False. kwargs: additional parameters are forwarded to the backends. """ self.k = k -- cgit v1.2.3 From da2a7a68f8f57495080af37cf981f64228d165a2 Mon Sep 17 00:00:00 2001 From: Marc Glisse Date: Wed, 22 Apr 2020 14:06:02 +0200 Subject: Rename local variables --- src/python/gudhi/wasserstein/wasserstein.py | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/src/python/gudhi/wasserstein/wasserstein.py b/src/python/gudhi/wasserstein/wasserstein.py index 42c8dc2d..3d1caeb3 100644 --- a/src/python/gudhi/wasserstein/wasserstein.py +++ b/src/python/gudhi/wasserstein/wasserstein.py @@ -154,17 +154,17 @@ def wasserstein_distance(X, Y, matching=False, order=2., internal_p=2., enable_a if enable_autodiff: P = ot.emd(a=a, b=b, M=M, numItermax=2000000) - pairs = np.argwhere(P[:-1, :-1]) - diag1 = np.nonzero(P[:-1, -1]) - diag2 = np.nonzero(P[-1, :-1]) + pairs_X_Y = np.argwhere(P[:-1, :-1]) + pairs_X_diag = np.nonzero(P[:-1, -1]) + pairs_Y_diag = np.nonzero(P[-1, :-1]) dists = [] # empty arrays are not handled properly by the helpers, so we avoid calling them - if len(pairs): - dists.append((Y_orig[pairs[:, 1]] - X_orig[pairs[:, 0]]).norms.lp(internal_p, axis=-1).norms.lp(order)) - if len(diag1): - dists.append(_perstot_autodiff(X_orig[diag1], order, internal_p)) - if len(diag2): - dists.append(_perstot_autodiff(Y_orig[diag2], order, internal_p)) + if len(pairs_X_Y): + dists.append((Y_orig[pairs_X_Y[:, 1]] - X_orig[pairs_X_Y[:, 0]]).norms.lp(internal_p, axis=-1).norms.lp(order)) + if len(pairs_X_diag): + dists.append(_perstot_autodiff(X_orig[pairs_X_diag], order, internal_p)) + if len(pairs_Y_diag): + dists.append(_perstot_autodiff(Y_orig[pairs_Y_diag], order, internal_p)) dists = [dist.reshape(1) for dist in dists] return ep.concatenate(dists).norms.lp(order).raw # We can also concatenate the 3 vectors to compute just one norm. -- cgit v1.2.3 From 51f7b5bb15f351d08af4c26bd1ffdfe979199976 Mon Sep 17 00:00:00 2001 From: Marc Glisse Date: Wed, 22 Apr 2020 16:29:26 +0200 Subject: Test value of computed gradient --- src/python/test/test_wasserstein_distance.py | 19 +++++++++++++++---- 1 file changed, 15 insertions(+), 4 deletions(-) diff --git a/src/python/test/test_wasserstein_distance.py b/src/python/test/test_wasserstein_distance.py index 6bfcb2ee..90d26809 100755 --- a/src/python/test/test_wasserstein_distance.py +++ b/src/python/test/test_wasserstein_distance.py @@ -105,8 +105,19 @@ def test_wasserstein_distance_grad(): diag2 = torch.tensor([[2.8, 4.45], [9.5, 14.1]], requires_grad=True) diag3 = torch.tensor([[2.8, 4.45], [9.5, 14.1]], requires_grad=True) assert diag1.grad is None and diag2.grad is None and diag3.grad is None - dist1 = pot(diag1, diag2, internal_p=2, order=2, enable_autodiff=True) - dist2 = pot(diag3, torch.tensor([]), internal_p=2, order=2, enable_autodiff=True) - dist1.backward() - dist2.backward() + dist12 = pot(diag1, diag2, internal_p=2, order=2, enable_autodiff=True) + dist30 = pot(diag3, torch.tensor([]), internal_p=2, order=2, enable_autodiff=True) + dist12.backward() + dist30.backward() assert not torch.isnan(diag1.grad).any() and not torch.isnan(diag2.grad).any() and not torch.isnan(diag3.grad).any() + diag4 = torch.tensor([[0., 10.]], requires_grad=True) + diag5 = torch.tensor([[1., 11.], [3., 4.]], requires_grad=True) + dist45 = pot(diag4, diag5, internal_p=1, order=1, enable_autodiff=True) + assert dist45 == 3. + dist45.backward() + assert np.array_equal(diag4.grad, [[-1., -1.]]) + assert np.array_equal(diag5.grad, [[1., 1.], [-1., 1.]]) + diag6 = torch.tensor([[5., 10.]], requires_grad=True) + pot(diag6, diag6, internal_p=2, order=2, enable_autodiff=True).backward() + # https://github.com/jonasrauber/eagerpy/issues/6 + # assert np.array_equal(diag6.grad, [[0., 0.]]) -- cgit v1.2.3 From ba17759cf922d246a0a74ac5cf99f67d48a7d8c3 Mon Sep 17 00:00:00 2001 From: Marc Glisse Date: Wed, 22 Apr 2020 16:52:27 +0200 Subject: Clarify the doc of enable_autodiff --- src/python/gudhi/wasserstein/wasserstein.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/src/python/gudhi/wasserstein/wasserstein.py b/src/python/gudhi/wasserstein/wasserstein.py index 3d1caeb3..0d164eda 100644 --- a/src/python/gudhi/wasserstein/wasserstein.py +++ b/src/python/gudhi/wasserstein/wasserstein.py @@ -100,7 +100,10 @@ def wasserstein_distance(X, Y, matching=False, order=2., internal_p=2., enable_a :param internal_p: Ground metric on the (upper-half) plane (i.e. norm L^p in R^2); Default value is 2 (Euclidean norm). :param enable_autodiff: If X and Y are torch.tensor, tensorflow.Tensor or jax.numpy.ndarray, make the computation - transparent to automatic differentiation. + transparent to automatic differentiation. This requires the package EagerPy. + + .. note:: This considers the function defined on the coordinates of the off-diagonal points of X and Y + and lets the various frameworks compute its gradient. It never pulls new points from the diagonal. :type enable_autodiff: bool :returns: the Wasserstein distance of order q (1 <= q < infinity) between persistence diagrams with respect to the internal_p-norm as ground metric. -- cgit v1.2.3 From a643583a4740fc40cf1e06e6cc1b4d17ca14000f Mon Sep 17 00:00:00 2001 From: Marc Glisse Date: Wed, 22 Apr 2020 17:39:52 +0200 Subject: Document incompatibility of matching=True and enable_autodiff --- src/python/gudhi/wasserstein/wasserstein.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/python/gudhi/wasserstein/wasserstein.py b/src/python/gudhi/wasserstein/wasserstein.py index 0d164eda..89ecab1c 100644 --- a/src/python/gudhi/wasserstein/wasserstein.py +++ b/src/python/gudhi/wasserstein/wasserstein.py @@ -100,7 +100,8 @@ def wasserstein_distance(X, Y, matching=False, order=2., internal_p=2., enable_a :param internal_p: Ground metric on the (upper-half) plane (i.e. norm L^p in R^2); Default value is 2 (Euclidean norm). :param enable_autodiff: If X and Y are torch.tensor, tensorflow.Tensor or jax.numpy.ndarray, make the computation - transparent to automatic differentiation. This requires the package EagerPy. + transparent to automatic differentiation. This requires the package EagerPy and is currently incompatible + with `matching=True`. .. note:: This considers the function defined on the coordinates of the off-diagonal points of X and Y and lets the various frameworks compute its gradient. It never pulls new points from the diagonal. -- cgit v1.2.3 From c5db8c1aec523c0cdf72c75b29e4ba94b51487b8 Mon Sep 17 00:00:00 2001 From: Marc Glisse Date: Wed, 22 Apr 2020 19:46:29 +0200 Subject: Reduce the probability of failure of test_dtm It is expected that hnsw sometimes misses one neighbor, which has an impact on the DTM, especially if the number of neighbors considered is low. --- src/python/test/test_dtm.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/python/test/test_dtm.py b/src/python/test/test_dtm.py index 859189fa..bff4c267 100755 --- a/src/python/test/test_dtm.py +++ b/src/python/test/test_dtm.py @@ -16,7 +16,7 @@ import torch def test_dtm_compare_euclidean(): pts = numpy.random.rand(1000, 4) - k = 3 + k = 6 dtm = DistanceToMeasure(k, implementation="ckdtree") r0 = dtm.fit_transform(pts) dtm = DistanceToMeasure(k, implementation="sklearn") @@ -27,7 +27,7 @@ def test_dtm_compare_euclidean(): assert r2 == pytest.approx(r0) dtm = DistanceToMeasure(k, implementation="hnsw") r3 = dtm.fit_transform(pts) - assert r3 == pytest.approx(r0) + assert r3 == pytest.approx(r0, rel=0.1) from scipy.spatial.distance import cdist d = cdist(pts, pts) -- cgit v1.2.3 From 0f7fe01852dcf827da35460592bd3a17ca0ab08e Mon Sep 17 00:00:00 2001 From: Marc Glisse Date: Thu, 23 Apr 2020 13:30:32 +0200 Subject: Fix pasto in the doc --- src/python/gudhi/simplex_tree.pyx | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/python/gudhi/simplex_tree.pyx b/src/python/gudhi/simplex_tree.pyx index 7728ebfc..93f5b332 100644 --- a/src/python/gudhi/simplex_tree.pyx +++ b/src/python/gudhi/simplex_tree.pyx @@ -560,7 +560,7 @@ cdef class SimplexTree: """This function writes the persistence intervals of the simplicial complex in a user given file name. - :param persistence_file: The specific dimension. + :param persistence_file: Name of the file. :type persistence_file: string. :note: intervals_in_dim function requires -- cgit v1.2.3 From 65f6ca41a9cd6574a0ca8fa9b781c787064fe4ed Mon Sep 17 00:00:00 2001 From: Marc Glisse Date: Thu, 23 Apr 2020 14:40:44 +0200 Subject: Add missing DOI --- biblio/bibliography.bib | 2 ++ 1 file changed, 2 insertions(+) diff --git a/biblio/bibliography.bib b/biblio/bibliography.bib index b017a07e..07623a31 100644 --- a/biblio/bibliography.bib +++ b/biblio/bibliography.bib @@ -30,6 +30,7 @@ journal = {Foundations of Computational Mathematics}, number = {6}, pages = {1333--1396}, publisher = {Springer-Verlag}, +doi = {10.1007/s10208-017-9370-z}, title = {{Structure and stability of the one-dimensional Mapper}}, volume = {18}, year = {2017} @@ -47,6 +48,7 @@ journal = {Foundations of Computational Mathematics}, number = {1}, pages = {79--103}, publisher = {Springer-Verlag}, +doi = {10.1007/s10208-008-9027-z}, title = {{Extending persistence using Poincar{\'{e}} and Lefschetz duality}}, volume = {9}, year = {2009} -- cgit v1.2.3 From 658a754397287e8de216ae91d3c9a3c492e4db2d Mon Sep 17 00:00:00 2001 From: ROUVREAU Vincent Date: Fri, 24 Apr 2020 09:00:39 +0200 Subject: Fix bibliography for sphinx --- src/python/doc/alpha_complex_user.rst | 11 ++--------- src/python/doc/bottleneck_distance_user.rst | 6 ------ src/python/doc/cubical_complex_user.rst | 7 ------- src/python/doc/index.rst | 7 ------- src/python/doc/nerve_gic_complex_ref.rst | 7 ------- src/python/doc/nerve_gic_complex_user.rst | 7 ------- src/python/doc/persistent_cohomology_user.rst | 7 ------- src/python/doc/rips_complex_user.rst | 7 ------- src/python/doc/simplex_tree_user.rst | 7 ------- src/python/doc/tangential_complex_user.rst | 8 -------- src/python/doc/wasserstein_distance_user.rst | 7 ------- src/python/doc/witness_complex_user.rst | 7 ------- src/python/doc/zbibliography.rst | 10 ++++++++++ 13 files changed, 12 insertions(+), 86 deletions(-) create mode 100644 src/python/doc/zbibliography.rst diff --git a/src/python/doc/alpha_complex_user.rst b/src/python/doc/alpha_complex_user.rst index 265a82d2..c65e62c8 100644 --- a/src/python/doc/alpha_complex_user.rst +++ b/src/python/doc/alpha_complex_user.rst @@ -10,9 +10,8 @@ Definition .. include:: alpha_complex_sum.inc `AlphaComplex` is constructing a :doc:`SimplexTree ` using -`Delaunay Triangulation `_ -:cite:`cgal:hdj-t-19b` from `CGAL `_ (the Computational Geometry Algorithms Library -:cite:`cgal:eb-19b`). +`Delaunay Triangulation `_ +from `CGAL `_ (the Computational Geometry Algorithms Library). Remarks ^^^^^^^ @@ -203,9 +202,3 @@ the program output is: [4, 5, 6] -> 22.74 [3, 6] -> 30.25 -CGAL citations --------------- - -.. bibliography:: ../../biblio/how_to_cite_cgal.bib - :filter: docname in docnames - :style: unsrt diff --git a/src/python/doc/bottleneck_distance_user.rst b/src/python/doc/bottleneck_distance_user.rst index 206fcb63..89da89d3 100644 --- a/src/python/doc/bottleneck_distance_user.rst +++ b/src/python/doc/bottleneck_distance_user.rst @@ -66,9 +66,3 @@ The output is: Bottleneck distance approximation = 0.81 Bottleneck distance value = 0.75 -Bibliography ------------- - -.. bibliography:: ../../biblio/bibliography.bib - :filter: docname in docnames - :style: unsrt diff --git a/src/python/doc/cubical_complex_user.rst b/src/python/doc/cubical_complex_user.rst index e8c94bf6..e4733653 100644 --- a/src/python/doc/cubical_complex_user.rst +++ b/src/python/doc/cubical_complex_user.rst @@ -158,10 +158,3 @@ Examples. --------- End user programs are available in python/example/ folder. - -Bibliography ------------- - -.. bibliography:: ../../biblio/bibliography.bib - :filter: docname in docnames - :style: unsrt diff --git a/src/python/doc/index.rst b/src/python/doc/index.rst index c153cdfc..13e51047 100644 --- a/src/python/doc/index.rst +++ b/src/python/doc/index.rst @@ -86,10 +86,3 @@ Point cloud utilities ********************* .. include:: point_cloud_sum.inc - -Bibliography -************ - -.. bibliography:: ../../biblio/bibliography.bib - :filter: docname in docnames - :style: unsrt diff --git a/src/python/doc/nerve_gic_complex_ref.rst b/src/python/doc/nerve_gic_complex_ref.rst index 6a81b7af..abde2e8c 100644 --- a/src/python/doc/nerve_gic_complex_ref.rst +++ b/src/python/doc/nerve_gic_complex_ref.rst @@ -12,10 +12,3 @@ Cover complexes reference manual :show-inheritance: .. automethod:: gudhi.CoverComplex.__init__ - -Bibliography ------------- - -.. bibliography:: ../../biblio/bibliography.bib - :filter: docname in docnames - :style: unsrt diff --git a/src/python/doc/nerve_gic_complex_user.rst b/src/python/doc/nerve_gic_complex_user.rst index f709ce91..9101f45d 100644 --- a/src/python/doc/nerve_gic_complex_user.rst +++ b/src/python/doc/nerve_gic_complex_user.rst @@ -313,10 +313,3 @@ the program outputs again SC.dot which gives the following visualization after u :alt: Visualization with neato Visualization with neato - -Bibliography ------------- - -.. bibliography:: ../../biblio/bibliography.bib - :filter: docname in docnames - :style: unsrt diff --git a/src/python/doc/persistent_cohomology_user.rst b/src/python/doc/persistent_cohomology_user.rst index 506fa3a7..4d743aac 100644 --- a/src/python/doc/persistent_cohomology_user.rst +++ b/src/python/doc/persistent_cohomology_user.rst @@ -111,10 +111,3 @@ We provide several example files: run these examples with -h for details on thei * :download:`rips_complex_diagram_persistence_from_distance_matrix_file_example.py <../example/rips_complex_diagram_persistence_from_distance_matrix_file_example.py>` * :download:`random_cubical_complex_persistence_example.py <../example/random_cubical_complex_persistence_example.py>` * :download:`tangential_complex_plain_homology_from_off_file_example.py <../example/tangential_complex_plain_homology_from_off_file_example.py>` - -Bibliography ------------- - -.. bibliography:: ../../biblio/bibliography.bib - :filter: docname in docnames - :style: unsrt diff --git a/src/python/doc/rips_complex_user.rst b/src/python/doc/rips_complex_user.rst index c4bbcfb6..8efb12e6 100644 --- a/src/python/doc/rips_complex_user.rst +++ b/src/python/doc/rips_complex_user.rst @@ -347,10 +347,3 @@ until dimension 1 - one skeleton graph in other words), the output is: points in the persistence diagram will be under the diagonal, and bottleneck distance and persistence graphical tool will not work properly, this is a known issue. - -Bibliography ------------- - -.. bibliography:: ../../biblio/bibliography.bib - :filter: docname in docnames - :style: unsrt diff --git a/src/python/doc/simplex_tree_user.rst b/src/python/doc/simplex_tree_user.rst index 1b272c35..3df7617f 100644 --- a/src/python/doc/simplex_tree_user.rst +++ b/src/python/doc/simplex_tree_user.rst @@ -66,10 +66,3 @@ The output is: ([1, 2], 4.0) ([1], 0.0) ([2], 4.0) - -Bibliography ------------- - -.. bibliography:: ../../biblio/bibliography.bib - :filter: docname in docnames - :style: unsrt diff --git a/src/python/doc/tangential_complex_user.rst b/src/python/doc/tangential_complex_user.rst index cf8199cc..3d45473b 100644 --- a/src/python/doc/tangential_complex_user.rst +++ b/src/python/doc/tangential_complex_user.rst @@ -194,11 +194,3 @@ The output is: Tangential contains 4 vertices. Inconsistencies has been fixed. - - -Bibliography ------------- - -.. bibliography:: ../../biblio/bibliography.bib - :filter: docname in docnames - :style: unsrt diff --git a/src/python/doc/wasserstein_distance_user.rst b/src/python/doc/wasserstein_distance_user.rst index c24da74d..c443bab5 100644 --- a/src/python/doc/wasserstein_distance_user.rst +++ b/src/python/doc/wasserstein_distance_user.rst @@ -164,10 +164,3 @@ The output is: [[0.27916667 0.55416667] [0.7375 0.7625 ] [0.2375 0.2625 ]] - -Bibliography ------------- - -.. bibliography:: ../../biblio/bibliography.bib - :filter: docname in docnames - :style: unsrt diff --git a/src/python/doc/witness_complex_user.rst b/src/python/doc/witness_complex_user.rst index 799f5444..08dcd288 100644 --- a/src/python/doc/witness_complex_user.rst +++ b/src/python/doc/witness_complex_user.rst @@ -126,10 +126,3 @@ Example2: Computing persistence using strong relaxed witness complex Here is an example of constructing a strong witness complex filtration and computing persistence on it: * :download:`euclidean_strong_witness_complex_diagram_persistence_from_off_file_example.py <../example/euclidean_strong_witness_complex_diagram_persistence_from_off_file_example.py>` - -Bibliography ------------- - -.. bibliography:: ../../biblio/bibliography.bib - :filter: docname in docnames - :style: unsrt diff --git a/src/python/doc/zbibliography.rst b/src/python/doc/zbibliography.rst new file mode 100644 index 00000000..4c377b46 --- /dev/null +++ b/src/python/doc/zbibliography.rst @@ -0,0 +1,10 @@ +:orphan: + +.. To get rid of WARNING: document isn't included in any toctree + +Bibliography +------------ + +.. bibliography:: ../../biblio/bibliography.bib + :style: unsrt + -- cgit v1.2.3 From 66337063d2ee3770275268c264548e99db3ec7f0 Mon Sep 17 00:00:00 2001 From: ROUVREAU Vincent Date: Fri, 24 Apr 2020 19:11:05 +0200 Subject: Code review: plain instead of unsrt for biblio - concatenate biblio files - undo cgal citation removal --- src/cmake/modules/GUDHI_user_version_target.cmake | 6 +++++- src/python/doc/alpha_complex_user.rst | 3 ++- src/python/doc/zbibliography.rst | 2 +- 3 files changed, 8 insertions(+), 3 deletions(-) diff --git a/src/cmake/modules/GUDHI_user_version_target.cmake b/src/cmake/modules/GUDHI_user_version_target.cmake index 257d1939..9cf648e3 100644 --- a/src/cmake/modules/GUDHI_user_version_target.cmake +++ b/src/cmake/modules/GUDHI_user_version_target.cmake @@ -26,8 +26,12 @@ add_custom_command(TARGET user_version PRE_BUILD COMMAND ${CMAKE_COMMAND} -E # Generate bib files for Doxygen - cf. root CMakeLists.txt for explanation string(TIMESTAMP GUDHI_VERSION_YEAR "%Y") configure_file(${CMAKE_SOURCE_DIR}/biblio/how_to_cite_gudhi.bib.in "${CMAKE_CURRENT_BINARY_DIR}/biblio/how_to_cite_gudhi.bib" @ONLY) -file(COPY "${CMAKE_SOURCE_DIR}/biblio/how_to_cite_cgal.bib" DESTINATION "${CMAKE_CURRENT_BINARY_DIR}/biblio/") file(COPY "${CMAKE_SOURCE_DIR}/biblio/bibliography.bib" DESTINATION "${CMAKE_CURRENT_BINARY_DIR}/biblio/") + +# append cgal citation inside bibliography - sphinx cannot deal with more than one bib file +file(READ "${CMAKE_SOURCE_DIR}/biblio/how_to_cite_cgal.bib" CGAL_CITATION_CONTENT) +file(APPEND "${CMAKE_CURRENT_BINARY_DIR}/biblio/bibliography.bib" "${CGAL_CITATION_CONTENT}") + # Copy biblio directory for user version add_custom_command(TARGET user_version PRE_BUILD COMMAND ${CMAKE_COMMAND} -E copy_directory ${CMAKE_CURRENT_BINARY_DIR}/biblio ${GUDHI_USER_VERSION_DIR}/biblio) diff --git a/src/python/doc/alpha_complex_user.rst b/src/python/doc/alpha_complex_user.rst index c65e62c8..a3b35c10 100644 --- a/src/python/doc/alpha_complex_user.rst +++ b/src/python/doc/alpha_complex_user.rst @@ -11,7 +11,8 @@ Definition `AlphaComplex` is constructing a :doc:`SimplexTree ` using `Delaunay Triangulation `_ -from `CGAL `_ (the Computational Geometry Algorithms Library). +:cite:`cgal:hdj-t-19b` from `CGAL `_ (the Computational Geometry Algorithms Library +:cite:`cgal:eb-19b`). Remarks ^^^^^^^ diff --git a/src/python/doc/zbibliography.rst b/src/python/doc/zbibliography.rst index 4c377b46..e23fcf25 100644 --- a/src/python/doc/zbibliography.rst +++ b/src/python/doc/zbibliography.rst @@ -6,5 +6,5 @@ Bibliography ------------ .. bibliography:: ../../biblio/bibliography.bib - :style: unsrt + :style: plain -- cgit v1.2.3 From 9e9511152a0495d123091d04af264e187fc6ab21 Mon Sep 17 00:00:00 2001 From: ROUVREAU Vincent Date: Sat, 25 Apr 2020 11:02:14 +0200 Subject: Fix #259 --- src/python/gudhi/persistence_graphical_tools.py | 12 +++++------- 1 file changed, 5 insertions(+), 7 deletions(-) diff --git a/src/python/gudhi/persistence_graphical_tools.py b/src/python/gudhi/persistence_graphical_tools.py index cc3db467..03fc9066 100644 --- a/src/python/gudhi/persistence_graphical_tools.py +++ b/src/python/gudhi/persistence_graphical_tools.py @@ -109,9 +109,6 @@ def plot_persistence_barcode( plt.rc('text', usetex=True) plt.rc('font', family='serif') - - persistence = _array_handler(persistence) - if persistence_file != "": if path.isfile(persistence_file): # Reset persistence @@ -126,6 +123,8 @@ def plot_persistence_barcode( print("file " + persistence_file + " not found.") return None + persistence = _array_handler(persistence) + if max_barcodes != 1000: print("Deprecated parameter. It has been replaced by max_intervals") max_intervals = max_barcodes @@ -255,8 +254,6 @@ def plot_persistence_diagram( plt.rc('text', usetex=True) plt.rc('font', family='serif') - persistence = _array_handler(persistence) - if persistence_file != "": if path.isfile(persistence_file): # Reset persistence @@ -271,6 +268,8 @@ def plot_persistence_diagram( print("file " + persistence_file + " not found.") return None + persistence = _array_handler(persistence) + if max_plots != 1000: print("Deprecated parameter. It has been replaced by max_intervals") max_intervals = max_plots @@ -425,8 +424,6 @@ def plot_persistence_density( plt.rc('text', usetex=True) plt.rc('font', family='serif') - persistence = _array_handler(persistence) - if persistence_file != "": if dimension is None: # All dimension case @@ -440,6 +437,7 @@ def plot_persistence_density( return None if len(persistence) > 0: + persistence = _array_handler(persistence) persistence_dim = np.array( [ (dim_interval[1][0], dim_interval[1][1]) -- cgit v1.2.3 From ae80ba10d9bf333a418b255e72c0be2a3c7e73ae Mon Sep 17 00:00:00 2001 From: ROUVREAU Vincent Date: Sun, 26 Apr 2020 09:16:31 +0200 Subject: Fix alpha complex user sphinx warnings as sphinx was confusing bullet lists and bold font syntax --- src/python/doc/alpha_complex_user.rst | 39 +++++++++++++++++++---------------- 1 file changed, 21 insertions(+), 18 deletions(-) diff --git a/src/python/doc/alpha_complex_user.rst b/src/python/doc/alpha_complex_user.rst index a3b35c10..60a2f94e 100644 --- a/src/python/doc/alpha_complex_user.rst +++ b/src/python/doc/alpha_complex_user.rst @@ -89,25 +89,28 @@ In order to build the alpha complex, first, a Simplex tree is built from the cel Filtration value computation algorithm ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ - **for** i : dimension :math:`\rightarrow` 0 **do** - **for all** :math:`\sigma` of dimension i - **if** filtration(:math:`\sigma`) is NaN **then** - filtration(:math:`\sigma`) = :math:`\alpha^2(\sigma)` - **end if** +.. code-block:: bash + + for i : dimension → 0 do + for all σ of dimension i + if filtration(σ) is NaN then + filtration(σ)=α2(σ) + end if + for all τ face of σ do // propagate alpha filtration value + if filtration(τ) is not NaN then + filtration(τ) = min( filtration(τ), filtration(σ) ) + else + if τ is not Gabriel for σ then + filtration(τ) = filtration(σ) + end if + end if + end for + end for + end for + + make_filtration_non_decreasing() + prune_above_filtration() - *//propagate alpha filtration value* - - **for all** :math:`\tau` face of :math:`\sigma` - **if** filtration(:math:`\tau`) is not NaN **then** - filtration(:math:`\tau`) = filtration(:math:`\sigma`) - **end if** - **end for** - **end for** - **end for** - - make_filtration_non_decreasing() - - prune_above_filtration() Dimension 2 ^^^^^^^^^^^ -- cgit v1.2.3 From f47b9607519b5c8c89bbe40341cf5bcc1382f5ef Mon Sep 17 00:00:00 2001 From: ROUVREAU Vincent Date: Sun, 26 Apr 2020 10:08:29 +0200 Subject: Fix barycenter sphinx warnings --- src/python/doc/alpha_complex_user.rst | 2 +- src/python/gudhi/wasserstein/barycenter.py | 53 +++++++++++++----------------- 2 files changed, 24 insertions(+), 31 deletions(-) diff --git a/src/python/doc/alpha_complex_user.rst b/src/python/doc/alpha_complex_user.rst index 60a2f94e..02d85389 100644 --- a/src/python/doc/alpha_complex_user.rst +++ b/src/python/doc/alpha_complex_user.rst @@ -94,7 +94,7 @@ Filtration value computation algorithm for i : dimension → 0 do for all σ of dimension i if filtration(σ) is NaN then - filtration(σ)=α2(σ) + filtration(σ)=α²(σ) end if for all τ face of σ do // propagate alpha filtration value if filtration(τ) is not NaN then diff --git a/src/python/gudhi/wasserstein/barycenter.py b/src/python/gudhi/wasserstein/barycenter.py index de7aea81..1cf8edb3 100644 --- a/src/python/gudhi/wasserstein/barycenter.py +++ b/src/python/gudhi/wasserstein/barycenter.py @@ -18,8 +18,7 @@ from gudhi.wasserstein import wasserstein_distance def _mean(x, m): ''' :param x: a list of 2D-points, off diagonal, x_0... x_{k-1} - :param m: total amount of points taken into account, - that is we have (m-k) copies of diagonal + :param m: total amount of points taken into account, that is we have (m-k) copies of diagonal :returns: the weighted mean of x with (m-k) copies of the diagonal ''' k = len(x) @@ -33,37 +32,31 @@ def _mean(x, m): def lagrangian_barycenter(pdiagset, init=None, verbose=False): ''' - :param pdiagset: a list of ``numpy.array`` of shape `(n x 2)` - (`n` can variate), encoding a set of - persistence diagrams with only finite coordinates. + :param pdiagset: a list of ``numpy.array`` of shape `(n x 2)` (`n` can variate), encoding a set of persistence + diagrams with only finite coordinates. :param init: The initial value for barycenter estimate. - If ``None``, init is made on a random diagram from the dataset. - Otherwise, it can be an ``int`` - (then initialization is made on ``pdiagset[init]``) - or a `(n x 2)` ``numpy.array`` enconding - a persistence diagram with `n` points. + If ``None``, init is made on a random diagram from the dataset. + Otherwise, it can be an ``int`` (then initialization is made on ``pdiagset[init]``) + or a `(n x 2)` ``numpy.array`` enconding a persistence diagram with `n` points. :type init: ``int``, or (n x 2) ``np.array`` - :param verbose: if ``True``, returns additional information about the - barycenter. + :param verbose: if ``True``, returns additional information about the barycenter. :type verbose: boolean - :returns: If not verbose (default), a ``numpy.array`` encoding - the barycenter estimate of pdiagset - (local minimum of the energy function). - If ``pdiagset`` is empty, returns ``None``. - If verbose, returns a couple ``(Y, log)`` - where ``Y`` is the barycenter estimate, - and ``log`` is a ``dict`` that contains additional informations: - - - `"groupings"`, a list of list of pairs ``(i,j)``. - Namely, ``G[k] = [...(i, j)...]``, where ``(i,j)`` indicates - that ``pdiagset[k][i]`` is matched to ``Y[j]`` - if ``i = -1`` or ``j = -1``, it means they - represent the diagonal. - - - `"energy"`, ``float`` representing the Frechet energy value obtained. - It is the mean of squared distances of observations to the output. - - - `"nb_iter"`, ``int`` number of iterations performed before convergence of the algorithm. + :returns: If not verbose (default), a ``numpy.array`` encoding the barycenter estimate of pdiagset + (local minimum of the energy function). + If ``pdiagset`` is empty, returns ``None``. + If verbose, returns a couple ``(Y, log)`` where ``Y`` is the barycenter estimate, + and ``log`` is a ``dict`` that contains additional informations: + + - `"groupings"`, a list of list of pairs ``(i,j)``. + + Namely, ``G[k] = [...(i, j)...]``, where ``(i,j)`` indicates that ``pdiagset[k][i]`` is matched to ``Y[j]`` + if ``i = -1`` or ``j = -1``, it means they represent the diagonal. + + - `"energy"`, ``float`` representing the Frechet energy value obtained. + + It is the mean of squared distances of observations to the output. + + - `"nb_iter"`, ``int`` number of iterations performed before convergence of the algorithm. ''' X = pdiagset # to shorten notations, not a copy m = len(X) # number of diagrams we are averaging -- cgit v1.2.3 From 88043e6b9da458eee7bdb0b9793f94a4e7d0aaa0 Mon Sep 17 00:00:00 2001 From: ROUVREAU Vincent Date: Sun, 26 Apr 2020 10:24:30 +0200 Subject: vim code block has a better highlighting code --- src/python/doc/alpha_complex_user.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/python/doc/alpha_complex_user.rst b/src/python/doc/alpha_complex_user.rst index 02d85389..ec218969 100644 --- a/src/python/doc/alpha_complex_user.rst +++ b/src/python/doc/alpha_complex_user.rst @@ -89,7 +89,7 @@ In order to build the alpha complex, first, a Simplex tree is built from the cel Filtration value computation algorithm ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ -.. code-block:: bash +.. code-block:: vim for i : dimension → 0 do for all σ of dimension i -- cgit v1.2.3 From 484732c8ad30721ba4fa596bcb8a3835ad3bc431 Mon Sep 17 00:00:00 2001 From: ROUVREAU Vincent Date: Mon, 27 Apr 2020 07:06:16 +0200 Subject: lint pseudo code --- src/python/doc/alpha_complex_user.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/python/doc/alpha_complex_user.rst b/src/python/doc/alpha_complex_user.rst index ec218969..de706de9 100644 --- a/src/python/doc/alpha_complex_user.rst +++ b/src/python/doc/alpha_complex_user.rst @@ -94,7 +94,7 @@ Filtration value computation algorithm for i : dimension → 0 do for all σ of dimension i if filtration(σ) is NaN then - filtration(σ)=α²(σ) + filtration(σ) = α²(σ) end if for all τ face of σ do // propagate alpha filtration value if filtration(τ) is not NaN then -- cgit v1.2.3 From 87311ec2d59211320e763bc9bc531858b489ff7e Mon Sep 17 00:00:00 2001 From: MathieuCarriere Date: Tue, 28 Apr 2020 13:28:10 -0400 Subject: added call methods + other fixes --- .../diagram_vectorizations_distances_kernels.py | 98 +++++++--------------- src/python/gudhi/representations/kernel_methods.py | 88 +++++++++++++++---- src/python/gudhi/representations/metrics.py | 97 +++++++++++++++++---- src/python/gudhi/representations/preprocessing.py | 60 +++++++++++++ src/python/gudhi/representations/vector_methods.py | 84 +++++++++++++++++++ 5 files changed, 326 insertions(+), 101 deletions(-) diff --git a/src/python/example/diagram_vectorizations_distances_kernels.py b/src/python/example/diagram_vectorizations_distances_kernels.py index de22d9e7..ab7d8a16 100755 --- a/src/python/example/diagram_vectorizations_distances_kernels.py +++ b/src/python/example/diagram_vectorizations_distances_kernels.py @@ -9,26 +9,23 @@ from gudhi.representations import DiagramSelector, Clamping, Landscape, Silhouet TopologicalVector, DiagramScaler, BirthPersistenceTransform,\ PersistenceImage, PersistenceWeightedGaussianKernel, Entropy, \ PersistenceScaleSpaceKernel, SlicedWassersteinDistance,\ - SlicedWassersteinKernel, BottleneckDistance, PersistenceFisherKernel + SlicedWassersteinKernel, BottleneckDistance, PersistenceFisherKernel, WassersteinDistance -D = np.array([[0.,4.],[1.,2.],[3.,8.],[6.,8.], [0., np.inf], [5., np.inf]]) -diags = [D] +D1 = np.array([[0.,4.],[1.,2.],[3.,8.],[6.,8.], [0., np.inf], [5., np.inf]]) -diags = DiagramSelector(use=True, point_type="finite").fit_transform(diags) -diags = DiagramScaler(use=True, scalers=[([0,1], MinMaxScaler())]).fit_transform(diags) -diags = DiagramScaler(use=True, scalers=[([1], Clamping(maximum=.9))]).fit_transform(diags) +proc1, proc2, proc3 = DiagramSelector(use=True, point_type="finite"), DiagramScaler(use=True, scalers=[([0,1], MinMaxScaler())]), DiagramScaler(use=True, scalers=[([1], Clamping(maximum=.9))]) +D1 = proc3(proc2(proc1(D1))) -D = diags[0] -plt.scatter(D[:,0],D[:,1]) +plt.scatter(D1[:,0], D1[:,1]) plt.plot([0.,1.],[0.,1.]) plt.title("Test Persistence Diagram for vector methods") plt.show() LS = Landscape(resolution=1000) -L = LS.fit_transform(diags) -plt.plot(L[0][:1000]) -plt.plot(L[0][1000:2000]) -plt.plot(L[0][2000:3000]) +L = LS(D1) +plt.plot(L[:1000]) +plt.plot(L[1000:2000]) +plt.plot(L[2000:3000]) plt.title("Landscape") plt.show() @@ -36,50 +33,39 @@ def pow(n): return lambda x: np.power(x[1]-x[0],n) SH = Silhouette(resolution=1000, weight=pow(2)) -sh = SH.fit_transform(diags) -plt.plot(sh[0]) +plt.plot(SH(D1)) plt.title("Silhouette") plt.show() BC = BettiCurve(resolution=1000) -bc = BC.fit_transform(diags) -plt.plot(bc[0]) +plt.plot(BC(D1)) plt.title("Betti Curve") plt.show() CP = ComplexPolynomial(threshold=-1, polynomial_type="T") -cp = CP.fit_transform(diags) -print("Complex polynomial is " + str(cp[0,:])) +print("Complex polynomial is " + str(CP(D1))) TV = TopologicalVector(threshold=-1) -tv = TV.fit_transform(diags) -print("Topological vector is " + str(tv[0,:])) +print("Topological vector is " + str(TV(D1))) PI = PersistenceImage(bandwidth=.1, weight=lambda x: x[1], im_range=[0,1,0,1], resolution=[100,100]) -pi = PI.fit_transform(diags) -plt.imshow(np.flip(np.reshape(pi[0], [100,100]), 0)) +plt.imshow(np.flip(np.reshape(PI(D1), [100,100]), 0)) plt.title("Persistence Image") plt.show() ET = Entropy(mode="scalar") -et = ET.fit_transform(diags) -print("Entropy statistic is " + str(et[0,:])) +print("Entropy statistic is " + str(ET(D1))) ET = Entropy(mode="vector", normalized=False) -et = ET.fit_transform(diags) -plt.plot(et[0]) +plt.plot(ET(D1)) plt.title("Entropy function") plt.show() -D = np.array([[1.,5.],[3.,6.],[2.,7.]]) -diags2 = [D] +D2 = np.array([[1.,5.],[3.,6.],[2.,7.]]) +D2 = proc3(proc2(proc1(D2))) -diags2 = DiagramScaler(use=True, scalers=[([0,1], MinMaxScaler())]).fit_transform(diags2) - -D = diags[0] -plt.scatter(D[:,0],D[:,1]) -D = diags2[0] -plt.scatter(D[:,0],D[:,1]) +plt.scatter(D1[:,0], D1[:,1]) +plt.scatter(D2[:,0], D2[:,1]) plt.plot([0.,1.],[0.,1.]) plt.title("Test Persistence Diagrams for kernel methods") plt.show() @@ -88,56 +74,34 @@ def arctan(C,p): return lambda x: C*np.arctan(np.power(x[1], p)) PWG = PersistenceWeightedGaussianKernel(bandwidth=1., kernel_approx=None, weight=arctan(1.,1.)) -X = PWG.fit(diags) -Y = PWG.transform(diags2) -print("PWG kernel is " + str(Y[0][0])) +print("PWG kernel is " + str(PWG(D1, D2))) PWG = PersistenceWeightedGaussianKernel(kernel_approx=RBFSampler(gamma=1./2, n_components=100000).fit(np.ones([1,2])), weight=arctan(1.,1.)) -X = PWG.fit(diags) -Y = PWG.transform(diags2) -print("Approximate PWG kernel is " + str(Y[0][0])) +print("Approximate PWG kernel is " + str(PWG(D1, D2))) PSS = PersistenceScaleSpaceKernel(bandwidth=1.) -X = PSS.fit(diags) -Y = PSS.transform(diags2) -print("PSS kernel is " + str(Y[0][0])) +print("PSS kernel is " + str(PSS(D1, D2))) PSS = PersistenceScaleSpaceKernel(kernel_approx=RBFSampler(gamma=1./2, n_components=100000).fit(np.ones([1,2]))) -X = PSS.fit(diags) -Y = PSS.transform(diags2) -print("Approximate PSS kernel is " + str(Y[0][0])) +print("Approximate PSS kernel is " + str(PSS(D1, D2))) sW = SlicedWassersteinDistance(num_directions=100) -X = sW.fit(diags) -Y = sW.transform(diags2) -print("SW distance is " + str(Y[0][0])) +print("SW distance is " + str(sW(D1, D2))) SW = SlicedWassersteinKernel(num_directions=100, bandwidth=1.) -X = SW.fit(diags) -Y = SW.transform(diags2) -print("SW kernel is " + str(Y[0][0])) +print("SW kernel is " + str(SW(D1, D2))) W = WassersteinDistance(order=2, internal_p=2, mode="pot") -X = W.fit(diags) -Y = W.transform(diags2) -print("Wasserstein distance (POT) is " + str(Y[0][0])) +print("Wasserstein distance (POT) is " + str(W(D1, D2))) W = WassersteinDistance(order=2, internal_p=2, mode="hera", delta=0.0001) -X = W.fit(diags) -Y = W.transform(diags2) -print("Wasserstein distance (hera) is " + str(Y[0][0])) +print("Wasserstein distance (hera) is " + str(W(D1, D2))) W = BottleneckDistance(epsilon=.001) -X = W.fit(diags) -Y = W.transform(diags2) -print("Bottleneck distance is " + str(Y[0][0])) +print("Bottleneck distance is " + str(W(D1, D2))) PF = PersistenceFisherKernel(bandwidth_fisher=1., bandwidth=1.) -X = PF.fit(diags) -Y = PF.transform(diags2) -print("PF kernel is " + str(Y[0][0])) +print("PF kernel is " + str(PF(D1, D2))) PF = PersistenceFisherKernel(bandwidth_fisher=1., bandwidth=1., kernel_approx=RBFSampler(gamma=1./2, n_components=100000).fit(np.ones([1,2]))) -X = PF.fit(diags) -Y = PF.transform(diags2) -print("Approximate PF kernel is " + str(Y[0][0])) +print("Approximate PF kernel is " + str(PF(D1, D2))) diff --git a/src/python/gudhi/representations/kernel_methods.py b/src/python/gudhi/representations/kernel_methods.py index 50186d63..edd1382a 100644 --- a/src/python/gudhi/representations/kernel_methods.py +++ b/src/python/gudhi/representations/kernel_methods.py @@ -10,14 +10,14 @@ import numpy as np from sklearn.base import BaseEstimator, TransformerMixin from sklearn.metrics import pairwise_distances, pairwise_kernels -from .metrics import SlicedWassersteinDistance, PersistenceFisherDistance, sklearn_wrapper, pairwise_persistence_diagram_distances, sliced_wasserstein_distance, persistence_fisher_distance +from .metrics import SlicedWassersteinDistance, PersistenceFisherDistance, _sklearn_wrapper, pairwise_persistence_diagram_distances, _sliced_wasserstein_distance, _persistence_fisher_distance from .preprocessing import Padding ############################################# # Kernel methods ############################ ############################################# -def persistence_weighted_gaussian_kernel(D1, D2, weight=lambda x: 1, kernel_approx=None, bandwidth=1.): +def _persistence_weighted_gaussian_kernel(D1, D2, weight=lambda x: 1, kernel_approx=None, bandwidth=1.): """ This is a function for computing the persistence weighted Gaussian kernel value from two persistence diagrams. The persistence weighted Gaussian kernel is computed by convolving the persistence diagram points with weighted Gaussian kernels. See http://proceedings.mlr.press/v48/kusano16.html for more details. @@ -25,7 +25,7 @@ def persistence_weighted_gaussian_kernel(D1, D2, weight=lambda x: 1, kernel_appr D1: (n x 2) numpy.array encoding the (finite points of the) first diagram. Must not contain essential points (i.e. with infinite coordinate). D2: (m x 2) numpy.array encoding the second diagram. bandwidth (double): bandwidth of the Gaussian kernel with which persistence diagrams will be convolved - weight: weight function for the persistence diagram points. This function must be defined on 2D points, ie lists or numpy arrays of the form [p_x,p_y]. + weight: weight function for the persistence diagram points (default constant function, ie lambda x: 1). This function must be defined on 2D points, ie lists or numpy arrays of the form [p_x,p_y]. kernel_approx: kernel approximation class used to speed up computation. Common kernel approximations classes can be found in the scikit-learn library (such as RBFSampler for instance). Returns: @@ -42,7 +42,7 @@ def persistence_weighted_gaussian_kernel(D1, D2, weight=lambda x: 1, kernel_appr E = (1./(np.sqrt(2*np.pi)*bandwidth)) * np.exp(-np.square(pairwise_distances(D1,D2))/(2*bandwidth*bandwidth)) return np.sum(np.multiply(W, E)) -def persistence_scale_space_kernel(D1, D2, kernel_approx=None, bandwidth=1.): +def _persistence_scale_space_kernel(D1, D2, kernel_approx=None, bandwidth=1.): """ This is a function for computing the persistence scale space kernel value from two persistence diagrams. The persistence scale space kernel is computed by adding the symmetric to the diagonal of each point in each persistence diagram, with negative weight, and then convolving the points with a Gaussian kernel. See https://www.cv-foundation.org/openaccess/content_cvpr_2015/papers/Reininghaus_A_Stable_Multi-Scale_2015_CVPR_paper.pdf for more details. @@ -58,32 +58,32 @@ def persistence_scale_space_kernel(D1, D2, kernel_approx=None, bandwidth=1.): DD1 = np.concatenate([D1, D1[:,[1,0]]], axis=0) DD2 = np.concatenate([D2, D2[:,[1,0]]], axis=0) weight_pss = lambda x: 1 if x[1] >= x[0] else -1 - return 0.5 * persistence_weighted_gaussian_kernel(DD1, DD2, weight=weight_pss, kernel_approx=kernel_approx, bandwidth=bandwidth) + return 0.5 * _persistence_weighted_gaussian_kernel(DD1, DD2, weight=weight_pss, kernel_approx=kernel_approx, bandwidth=bandwidth) -def pairwise_persistence_diagram_kernels(X, Y=None, metric="sliced_wasserstein", **kwargs): +def pairwise_persistence_diagram_kernels(X, Y=None, kernel="sliced_wasserstein", **kwargs): """ This function computes the kernel matrix between two lists of persistence diagrams given as numpy arrays of shape (nx2). Parameters: X (list of n numpy arrays of shape (numx2)): first list of persistence diagrams. Y (list of m numpy arrays of shape (numx2)): second list of persistence diagrams (optional). If None, pairwise kernel values are computed from the first list only. - metric: kernel to use. It can be either a string ("sliced_wasserstein", "persistence_scale_space", "persistence_weighted_gaussian", "persistence_fisher") or a function taking two numpy arrays of shape (nx2) and (mx2) as inputs. + kernel: kernel to use. It can be either a string ("sliced_wasserstein", "persistence_scale_space", "persistence_weighted_gaussian", "persistence_fisher") or a function taking two numpy arrays of shape (nx2) and (mx2) as inputs. Returns: numpy array of shape (nxm): kernel matrix. """ XX = np.reshape(np.arange(len(X)), [-1,1]) YY = None if Y is None else np.reshape(np.arange(len(Y)), [-1,1]) - if metric == "sliced_wasserstein": + if kernel == "sliced_wasserstein": return np.exp(-pairwise_persistence_diagram_distances(X, Y, metric="sliced_wasserstein", num_directions=kwargs["num_directions"]) / kwargs["bandwidth"]) - elif metric == "persistence_fisher": + elif kernel == "persistence_fisher": return np.exp(-pairwise_persistence_diagram_distances(X, Y, metric="persistence_fisher", kernel_approx=kwargs["kernel_approx"], bandwidth=kwargs["bandwidth"]) / kwargs["bandwidth_fisher"]) - elif metric == "persistence_scale_space": - return pairwise_kernels(XX, YY, metric=sklearn_wrapper(persistence_scale_space_kernel, X, Y, **kwargs)) - elif metric == "persistence_weighted_gaussian": - return pairwise_kernels(XX, YY, metric=sklearn_wrapper(persistence_weighted_gaussian_kernel, X, Y, **kwargs)) + elif kernel == "persistence_scale_space": + return pairwise_kernels(XX, YY, metric=_sklearn_wrapper(_persistence_scale_space_kernel, X, Y, **kwargs)) + elif kernel == "persistence_weighted_gaussian": + return pairwise_kernels(XX, YY, metric=_sklearn_wrapper(_persistence_weighted_gaussian_kernel, X, Y, **kwargs)) else: - return pairwise_kernels(XX, YY, metric=sklearn_wrapper(metric, **kwargs)) + return pairwise_kernels(XX, YY, metric=_sklearn_wrapper(metric, **kwargs)) class SlicedWassersteinKernel(BaseEstimator, TransformerMixin): """ @@ -121,7 +121,20 @@ class SlicedWassersteinKernel(BaseEstimator, TransformerMixin): Returns: numpy array of shape (number of diagrams in **diagrams**) x (number of diagrams in X): matrix of pairwise sliced Wasserstein kernel values. """ - return pairwise_persistence_diagram_kernels(X, self.diagrams_, metric="sliced_wasserstein", bandwidth=self.bandwidth, num_directions=self.num_directions) + return pairwise_persistence_diagram_kernels(X, self.diagrams_, kernel="sliced_wasserstein", bandwidth=self.bandwidth, num_directions=self.num_directions) + + def __call__(self, diag1, diag2): + """ + Apply SlicedWassersteinKernel on a single pair of persistence diagrams and outputs the result. + + Parameters: + diag1 (n x 2 numpy array): first input persistence diagram. + diag2 (n x 2 numpy array): second input persistence diagram. + + Returns: + float: sliced Wasserstein kernel value. + """ + return np.exp(-_sliced_wasserstein_distance(diag1, diag2, num_directions=self.num_directions)) / self.bandwidth class PersistenceWeightedGaussianKernel(BaseEstimator, TransformerMixin): """ @@ -160,7 +173,20 @@ class PersistenceWeightedGaussianKernel(BaseEstimator, TransformerMixin): Returns: numpy array of shape (number of diagrams in **diagrams**) x (number of diagrams in X): matrix of pairwise persistence weighted Gaussian kernel values. """ - return pairwise_persistence_diagram_kernels(X, self.diagrams_, metric="persistence_weighted_gaussian", bandwidth=self.bandwidth, weight=self.weight, kernel_approx=self.kernel_approx) + return pairwise_persistence_diagram_kernels(X, self.diagrams_, kernel="persistence_weighted_gaussian", bandwidth=self.bandwidth, weight=self.weight, kernel_approx=self.kernel_approx) + + def __call__(self, diag1, diag2): + """ + Apply PersistenceWeightedGaussianKernel on a single pair of persistence diagrams and outputs the result. + + Parameters: + diag1 (n x 2 numpy array): first input persistence diagram. + diag2 (n x 2 numpy array): second input persistence diagram. + + Returns: + float: persistence weighted Gaussian kernel value. + """ + return _persistence_weighted_gaussian_kernel(diag1, diag2, weight=self.weight, kernel_approx=self.kernel_approx, bandwidth=self.bandwidth) class PersistenceScaleSpaceKernel(BaseEstimator, TransformerMixin): """ @@ -197,7 +223,20 @@ class PersistenceScaleSpaceKernel(BaseEstimator, TransformerMixin): Returns: numpy array of shape (number of diagrams in **diagrams**) x (number of diagrams in X): matrix of pairwise persistence scale space kernel values. """ - return pairwise_persistence_diagram_kernels(X, self.diagrams_, metric="persistence_scale_space", bandwidth=self.bandwidth, kernel_approx=self.kernel_approx) + return pairwise_persistence_diagram_kernels(X, self.diagrams_, kernel="persistence_scale_space", bandwidth=self.bandwidth, kernel_approx=self.kernel_approx) + + def __call__(self, diag1, diag2): + """ + Apply PersistenceScaleSpaceKernel on a single pair of persistence diagrams and outputs the result. + + Parameters: + diag1 (n x 2 numpy array): first input persistence diagram. + diag2 (n x 2 numpy array): second input persistence diagram. + + Returns: + float: persistence scale space kernel value. + """ + return _persistence_scale_space_kernel(diag1, diag2, bandwidth=self.bandwidth, kernel_approx=self.kernel_approx) class PersistenceFisherKernel(BaseEstimator, TransformerMixin): """ @@ -236,5 +275,18 @@ class PersistenceFisherKernel(BaseEstimator, TransformerMixin): Returns: numpy array of shape (number of diagrams in **diagrams**) x (number of diagrams in X): matrix of pairwise persistence Fisher kernel values. """ - return pairwise_persistence_diagram_kernels(X, self.diagrams_, metric="persistence_fisher", bandwidth=self.bandwidth, bandwidth_fisher=self.bandwidth_fisher, kernel_approx=self.kernel_approx) + return pairwise_persistence_diagram_kernels(X, self.diagrams_, kernel="persistence_fisher", bandwidth=self.bandwidth, bandwidth_fisher=self.bandwidth_fisher, kernel_approx=self.kernel_approx) + + def __call__(self, diag1, diag2): + """ + Apply PersistenceFisherKernel on a single pair of persistence diagrams and outputs the result. + + Parameters: + diag1 (n x 2 numpy array): first input persistence diagram. + diag2 (n x 2 numpy array): second input persistence diagram. + + Returns: + float: persistence Fisher kernel value. + """ + return np.exp(-_persistence_fisher_distance(diag1, diag2, bandwidth=self.bandwidth, kernel_approx=self.kernel_approx)) / self.bandwidth_fisher diff --git a/src/python/gudhi/representations/metrics.py b/src/python/gudhi/representations/metrics.py index 59440b1a..a4bf19a6 100644 --- a/src/python/gudhi/representations/metrics.py +++ b/src/python/gudhi/representations/metrics.py @@ -17,7 +17,7 @@ from .preprocessing import Padding # Metrics ################################### ############################################# -def sliced_wasserstein_distance(D1, D2, num_directions): +def _sliced_wasserstein_distance(D1, D2, num_directions): """ This is a function for computing the sliced Wasserstein distance from two persistence diagrams. The Sliced Wasserstein distance is computed by projecting the persistence diagrams onto lines, comparing the projections with the 1-norm, and finally averaging over the lines. See http://proceedings.mlr.press/v70/carriere17a.html for more details. @@ -42,7 +42,7 @@ def sliced_wasserstein_distance(D1, D2, num_directions): L1 = np.sum(np.abs(A-B), axis=0) return np.mean(L1) -def compute_persistence_diagram_projections(X, num_directions): +def _compute_persistence_diagram_projections(X, num_directions): """ This is a function for projecting the points of a list of persistence diagrams (as well as their diagonal projections) onto a fixed number of lines sampled uniformly on [-pi/2, pi/2]. This function can be used as a preprocessing step in order to speed up the running time for computing all pairwise sliced Wasserstein distances / kernel values on a list of persistence diagrams. @@ -51,14 +51,14 @@ def compute_persistence_diagram_projections(X, num_directions): num_directions (int): number of lines evenly sampled from [-pi/2,pi/2] in order to approximate and speed up the distance computation. Returns: - XX (list of n numpy arrays of shape (2*numx2)): list of projected persistence diagrams. + list of n numpy arrays of shape (2*numx2): list of projected persistence diagrams. """ thetas = np.linspace(-np.pi/2, np.pi/2, num=num_directions+1)[np.newaxis,:-1] lines = np.concatenate([np.cos(thetas), np.sin(thetas)], axis=0) XX = [np.vstack([np.matmul(D, lines), np.matmul(np.matmul(D, .5 * np.ones((2,2))), lines)]) for D in X] return XX -def sliced_wasserstein_distance_on_projections(D1, D2): +def _sliced_wasserstein_distance_on_projections(D1, D2): """ This is a function for computing the sliced Wasserstein distance between two persistence diagrams that have already been projected onto some lines. It simply amounts to comparing the sorted projections with the 1-norm, and averaging over the lines. See http://proceedings.mlr.press/v70/carriere17a.html for more details. @@ -76,7 +76,7 @@ def sliced_wasserstein_distance_on_projections(D1, D2): L1 = np.sum(np.abs(A-B), axis=0) return np.mean(L1) -def persistence_fisher_distance(D1, D2, kernel_approx=None, bandwidth=1.): +def _persistence_fisher_distance(D1, D2, kernel_approx=None, bandwidth=1.): """ This is a function for computing the persistence Fisher distance from two persistence diagrams. The persistence Fisher distance is obtained by computing the original Fisher distance between the probability distributions associated to the persistence diagrams given by convolving them with a Gaussian kernel. See http://papers.nips.cc/paper/8205-persistence-fisher-kernel-a-riemannian-manifold-kernel-for-persistence-diagrams for more details. @@ -118,7 +118,7 @@ def persistence_fisher_distance(D1, D2, kernel_approx=None, bandwidth=1.): vectorj = vectorj/vectorj_sum return np.arccos( min(np.dot(np.sqrt(vectori), np.sqrt(vectorj)), 1.) ) -def sklearn_wrapper(metric, X, Y, **kwargs): +def _sklearn_wrapper(metric, X, Y, **kwargs): """ This function is a wrapper for any metric between two persistence diagrams that takes two numpy arrays of shapes (nx2) and (mx2) as arguments. """ @@ -133,7 +133,7 @@ def sklearn_wrapper(metric, X, Y, **kwargs): PAIRWISE_DISTANCE_FUNCTIONS = { "wasserstein": hera_wasserstein_distance, "hera_wasserstein": hera_wasserstein_distance, - "persistence_fisher": persistence_fisher_distance, + "persistence_fisher": _persistence_fisher_distance, } def pairwise_persistence_diagram_distances(X, Y=None, metric="bottleneck", **kwargs): @@ -143,7 +143,7 @@ def pairwise_persistence_diagram_distances(X, Y=None, metric="bottleneck", **kwa Parameters: X (list of n numpy arrays of shape (numx2)): first list of persistence diagrams. Y (list of m numpy arrays of shape (numx2)): second list of persistence diagrams (optional). If None, pairwise distances are computed from the first list only. - metric: distance to use. It can be either a string ("sliced_wasserstein", "wasserstein", "hera_wasserstein" (Wasserstein distance computed with Hera---note that Hera is also used for the default option "wasserstein"), "pot_wasserstein" (Wasserstein distance computed with POT), "bottleneck", "persistence_fisher") or a function taking two numpy arrays of shape (nx2) and (mx2) as inputs. + metric: distance to use. It can be either a string ("sliced_wasserstein", "wasserstein", "hera_wasserstein" (Wasserstein distance computed with Hera---note that Hera is also used for the default option "wasserstein"), "pot_wasserstein" (Wasserstein distance computed with POT), "bottleneck", "persistence_fisher") or a symmetric function taking two numpy arrays of shape (nx2) and (mx2) as inputs. Returns: numpy array of shape (nxm): distance matrix @@ -153,25 +153,25 @@ def pairwise_persistence_diagram_distances(X, Y=None, metric="bottleneck", **kwa if metric == "bottleneck": try: from .. import bottleneck_distance - return pairwise_distances(XX, YY, metric=sklearn_wrapper(bottleneck_distance, X, Y, **kwargs)) + return pairwise_distances(XX, YY, metric=_sklearn_wrapper(bottleneck_distance, X, Y, **kwargs)) except ImportError: print("Gudhi built without CGAL") raise elif metric == "pot_wasserstein": try: from gudhi.wasserstein import wasserstein_distance as pot_wasserstein_distance - return pairwise_distances(XX, YY, metric=sklearn_wrapper(pot_wasserstein_distance, X, Y, **kwargs)) + return pairwise_distances(XX, YY, metric=_sklearn_wrapper(pot_wasserstein_distance, X, Y, **kwargs)) except ImportError: print("POT (Python Optimal Transport) is not installed. Please install POT or use metric='wasserstein' or metric='hera_wasserstein'") raise elif metric == "sliced_wasserstein": - Xproj = compute_persistence_diagram_projections(X, **kwargs) - Yproj = None if Y is None else compute_persistence_diagram_projections(Y, **kwargs) - return pairwise_distances(XX, YY, metric=sklearn_wrapper(sliced_wasserstein_distance_on_projections, Xproj, Yproj)) + Xproj = _compute_persistence_diagram_projections(X, **kwargs) + Yproj = None if Y is None else _compute_persistence_diagram_projections(Y, **kwargs) + return pairwise_distances(XX, YY, metric=_sklearn_wrapper(_sliced_wasserstein_distance_on_projections, Xproj, Yproj)) elif type(metric) == str: - return pairwise_distances(XX, YY, metric=sklearn_wrapper(PAIRWISE_DISTANCE_FUNCTIONS[metric], X, Y, **kwargs)) + return pairwise_distances(XX, YY, metric=_sklearn_wrapper(PAIRWISE_DISTANCE_FUNCTIONS[metric], X, Y, **kwargs)) else: - return pairwise_distances(XX, YY, metric=sklearn_wrapper(metric, X, Y, **kwargs)) + return pairwise_distances(XX, YY, metric=_sklearn_wrapper(metric, X, Y, **kwargs)) class SlicedWassersteinDistance(BaseEstimator, TransformerMixin): """ @@ -209,6 +209,19 @@ class SlicedWassersteinDistance(BaseEstimator, TransformerMixin): """ return pairwise_persistence_diagram_distances(X, self.diagrams_, metric="sliced_wasserstein", num_directions=self.num_directions) + def __call__(self, diag1, diag2): + """ + Apply SlicedWassersteinDistance on a single pair of persistence diagrams and outputs the result. + + Parameters: + diag1 (n x 2 numpy array): first input persistence diagram. + diag2 (n x 2 numpy array): second input persistence diagram. + + Returns: + float: sliced Wasserstein distance. + """ + return _sliced_wasserstein_distance(diag1, diag2, num_directions=self.num_directions) + class BottleneckDistance(BaseEstimator, TransformerMixin): """ This is a class for computing the bottleneck distance matrix from a list of persistence diagrams. @@ -246,6 +259,24 @@ class BottleneckDistance(BaseEstimator, TransformerMixin): Xfit = pairwise_persistence_diagram_distances(X, self.diagrams_, metric="bottleneck", e=self.epsilon) return Xfit + def __call__(self, diag1, diag2): + """ + Apply BottleneckDistance on a single pair of persistence diagrams and outputs the result. + + Parameters: + diag1 (n x 2 numpy array): first input persistence diagram. + diag2 (n x 2 numpy array): second input persistence diagram. + + Returns: + float: bottleneck distance. + """ + try: + from .. import bottleneck_distance + return bottleneck_distance(diag1, diag2, e=self.epsilon) + except ImportError: + print("Gudhi built without CGAL") + raise + class PersistenceFisherDistance(BaseEstimator, TransformerMixin): """ This is a class for computing the persistence Fisher distance matrix from a list of persistence diagrams. The persistence Fisher distance is obtained by computing the original Fisher distance between the probability distributions associated to the persistence diagrams given by convolving them with a Gaussian kernel. See http://papers.nips.cc/paper/8205-persistence-fisher-kernel-a-riemannian-manifold-kernel-for-persistence-diagrams for more details. @@ -283,6 +314,19 @@ class PersistenceFisherDistance(BaseEstimator, TransformerMixin): """ return pairwise_persistence_diagram_distances(X, self.diagrams_, metric="persistence_fisher", bandwidth=self.bandwidth, kernel_approx=self.kernel_approx) + def __call__(self, diag1, diag2): + """ + Apply PersistenceFisherDistance on a single pair of persistence diagrams and outputs the result. + + Parameters: + diag1 (n x 2 numpy array): first input persistence diagram. + diag2 (n x 2 numpy array): second input persistence diagram. + + Returns: + float: persistence Fisher distance. + """ + return _persistence_fisher_distance(diag1, diag2, bandwidth=self.bandwidth, kernel_approx=self.kernel_approx) + class WassersteinDistance(BaseEstimator, TransformerMixin): """ This is a class for computing the Wasserstein distance matrix from a list of persistence diagrams. @@ -325,5 +369,26 @@ class WassersteinDistance(BaseEstimator, TransformerMixin): if self.metric == "hera_wasserstein": Xfit = pairwise_persistence_diagram_distances(X, self.diagrams_, metric=self.metric, order=self.order, internal_p=self.internal_p, delta=self.delta) else: - Xfit = pairwise_persistence_diagram_distances(X, self.diagrams_, metric=self.metric, order=self.order, internal_p=self.internal_p) + Xfit = pairwise_persistence_diagram_distances(X, self.diagrams_, metric=self.metric, order=self.order, internal_p=self.internal_p, matching=False) return Xfit + + def __call__(self, diag1, diag2): + """ + Apply WassersteinDistance on a single pair of persistence diagrams and outputs the result. + + Parameters: + diag1 (n x 2 numpy array): first input persistence diagram. + diag2 (n x 2 numpy array): second input persistence diagram. + + Returns: + float: Wasserstein distance. + """ + if self.metric == "hera_wasserstein": + return hera_wasserstein_distance(diag1, diag2, order=self.order, internal_p=self.internal_p, delta=self.delta) + else: + try: + from gudhi.wasserstein import wasserstein_distance as pot_wasserstein_distance + return pot_wasserstein_distance(diag1, diag2, order=self.order, internal_p=self.internal_p, matching=False) + except ImportError: + print("POT (Python Optimal Transport) is not installed. Please install POT or use metric='wasserstein' or metric='hera_wasserstein'") + raise diff --git a/src/python/gudhi/representations/preprocessing.py b/src/python/gudhi/representations/preprocessing.py index a39b00e4..a8545349 100644 --- a/src/python/gudhi/representations/preprocessing.py +++ b/src/python/gudhi/representations/preprocessing.py @@ -54,6 +54,18 @@ class BirthPersistenceTransform(BaseEstimator, TransformerMixin): Xfit.append(new_diag) return Xfit + def __call__(self, diag): + """ + Apply BirthPersistenceTransform on a single persistence diagram and outputs the result. + + Parameters: + diag (n x 2 numpy array): input persistence diagram. + + Returns: + n x 2 numpy array: transformed persistence diagram. + """ + return self.fit_transform([diag])[0] + class Clamping(BaseEstimator, TransformerMixin): """ This is a class for clamping values. It can be used as a parameter for the DiagramScaler class, for instance if you want to clamp abscissae or ordinates of persistence diagrams. @@ -142,6 +154,18 @@ class DiagramScaler(BaseEstimator, TransformerMixin): Xfit[i][:,I] = np.squeeze(scaler.transform(np.reshape(Xfit[i][:,I], [-1,1]))) return Xfit + def __call__(self, diag): + """ + Apply DiagramScaler on a single persistence diagram and outputs the result. + + Parameters: + diag (n x 2 numpy array): input persistence diagram. + + Returns: + n x 2 numpy array: transformed persistence diagram. + """ + return self.fit_transform([diag])[0] + class Padding(BaseEstimator, TransformerMixin): """ This is a class for padding a list of persistence diagrams with dummy points, so that all persistence diagrams end up with the same number of points. @@ -186,6 +210,18 @@ class Padding(BaseEstimator, TransformerMixin): Xfit = X return Xfit + def __call__(self, diag): + """ + Apply Padding on a single persistence diagram and outputs the result. + + Parameters: + diag (n x 2 numpy array): input persistence diagram. + + Returns: + n x 2 numpy array: padded persistence diagram. + """ + return self.fit_transform([diag])[0] + class ProminentPoints(BaseEstimator, TransformerMixin): """ This is a class for removing points that are close or far from the diagonal in persistence diagrams. If persistence diagrams are n x 2 numpy arrays (i.e. persistence diagrams with ordinary features), points are ordered and thresholded by distance-to-diagonal. If persistence diagrams are n x 1 numpy arrays (i.e. persistence diagrams with essential features), points are not ordered and thresholded by first coordinate. @@ -259,6 +295,18 @@ class ProminentPoints(BaseEstimator, TransformerMixin): Xfit = X return Xfit + def __call__(self, diag): + """ + Apply ProminentPoints on a single persistence diagram and outputs the result. + + Parameters: + diag (n x 2 numpy array): input persistence diagram. + + Returns: + n x 2 numpy array: thresholded persistence diagram. + """ + return self.fit_transform([diag])[0] + class DiagramSelector(BaseEstimator, TransformerMixin): """ This is a class for extracting finite or essential points in persistence diagrams. @@ -303,3 +351,15 @@ class DiagramSelector(BaseEstimator, TransformerMixin): else: Xfit = X return Xfit + + def __call__(self, diag): + """ + Apply DiagramSelector on a single persistence diagram and outputs the result. + + Parameters: + diag (n x 2 numpy array): input persistence diagram. + + Returns: + n x 2 numpy array: extracted persistence diagram. + """ + return self.fit_transform([diag])[0] diff --git a/src/python/gudhi/representations/vector_methods.py b/src/python/gudhi/representations/vector_methods.py index fe26dbe2..46fee086 100644 --- a/src/python/gudhi/representations/vector_methods.py +++ b/src/python/gudhi/representations/vector_methods.py @@ -81,6 +81,18 @@ class PersistenceImage(BaseEstimator, TransformerMixin): return Xfit + def __call__(self, diag): + """ + Apply PersistenceImage on a single persistence diagram and outputs the result. + + Parameters: + diag (n x 2 numpy array): input persistence diagram. + + Returns: + numpy array with shape (number of pixels = **resolution[0]** x **resolution[1]**):: output persistence image. + """ + return self.fit_transform([diag])[0,:] + class Landscape(BaseEstimator, TransformerMixin): """ This is a class for computing persistence landscapes from a list of persistence diagrams. A persistence landscape is a collection of 1D piecewise-linear functions computed from the rank function associated to the persistence diagram. These piecewise-linear functions are then sampled evenly on a given range and the corresponding vectors of samples are concatenated and returned. See http://jmlr.org/papers/v16/bubenik15a.html for more details. @@ -170,6 +182,18 @@ class Landscape(BaseEstimator, TransformerMixin): return Xfit + def __call__(self, diag): + """ + Apply Landscape on a single persistence diagram and outputs the result. + + Parameters: + diag (n x 2 numpy array): input persistence diagram. + + Returns: + numpy array with shape (number of samples = **num_landscapes** x **resolution**): output persistence landscape. + """ + return self.fit_transform([diag])[0,:] + class Silhouette(BaseEstimator, TransformerMixin): """ This is a class for computing persistence silhouettes from a list of persistence diagrams. A persistence silhouette is computed by taking a weighted average of the collection of 1D piecewise-linear functions given by the persistence landscapes, and then by evenly sampling this average on a given range. Finally, the corresponding vector of samples is returned. See https://arxiv.org/abs/1312.0308 for more details. @@ -248,6 +272,18 @@ class Silhouette(BaseEstimator, TransformerMixin): return Xfit + def __call__(self, diag): + """ + Apply Silhouette on a single persistence diagram and outputs the result. + + Parameters: + diag (n x 2 numpy array): input persistence diagram. + + Returns: + numpy array with shape (**resolution**): output persistence silhouette. + """ + return self.fit_transform([diag])[0,:] + class BettiCurve(BaseEstimator, TransformerMixin): """ This is a class for computing Betti curves from a list of persistence diagrams. A Betti curve is a 1D piecewise-constant function obtained from the rank function. It is sampled evenly on a given range and the vector of samples is returned. See https://www.researchgate.net/publication/316604237_Time_Series_Classification_via_Topological_Data_Analysis for more details. @@ -308,6 +344,18 @@ class BettiCurve(BaseEstimator, TransformerMixin): return Xfit + def __call__(self, diag): + """ + Apply BettiCurve on a single persistence diagram and outputs the result. + + Parameters: + diag (n x 2 numpy array): input persistence diagram. + + Returns: + numpy array with shape (**resolution**): output Betti curve. + """ + return self.fit_transform([diag])[0,:] + class Entropy(BaseEstimator, TransformerMixin): """ This is a class for computing persistence entropy. Persistence entropy is a statistic for persistence diagrams inspired from Shannon entropy. This statistic can also be used to compute a feature vector, called the entropy summary function. See https://arxiv.org/pdf/1803.08304.pdf for more details. Note that a previous implementation was contributed by Manuel Soriano-Trigueros. @@ -378,6 +426,18 @@ class Entropy(BaseEstimator, TransformerMixin): return Xfit + def __call__(self, diag): + """ + Apply Entropy on a single persistence diagram and outputs the result. + + Parameters: + diag (n x 2 numpy array): input persistence diagram. + + Returns: + numpy array with shape (1 if **mode** = "scalar" else **resolution**): output entropy. + """ + return self.fit_transform([diag])[0,:] + class TopologicalVector(BaseEstimator, TransformerMixin): """ This is a class for computing topological vectors from a list of persistence diagrams. The topological vector associated to a persistence diagram is the sorted vector of a slight modification of the pairwise distances between the persistence diagram points. See https://diglib.eg.org/handle/10.1111/cgf12692 for more details. @@ -431,6 +491,18 @@ class TopologicalVector(BaseEstimator, TransformerMixin): return Xfit + def __call__(self, diag): + """ + Apply TopologicalVector on a single persistence diagram and outputs the result. + + Parameters: + diag (n x 2 numpy array): input persistence diagram. + + Returns: + numpy array with shape (**threshold**): output topological vector. + """ + return self.fit_transform([diag])[0,:] + class ComplexPolynomial(BaseEstimator, TransformerMixin): """ This is a class for computing complex polynomials from a list of persistence diagrams. The persistence diagram points are seen as the roots of some complex polynomial, whose coefficients are returned in a complex vector. See https://link.springer.com/chapter/10.1007%2F978-3-319-23231-7_27 for more details. @@ -490,3 +562,15 @@ class ComplexPolynomial(BaseEstimator, TransformerMixin): coeff = np.array(coeff[::-1])[1:] Xfit[d, :min(thresh, coeff.shape[0])] = coeff[:min(thresh, coeff.shape[0])] return Xfit + + def __call__(self, diag): + """ + Apply ComplexPolynomial on a single persistence diagram and outputs the result. + + Parameters: + diag (n x 2 numpy array): input persistence diagram. + + Returns: + numpy array with shape (**threshold**): output complex vector of coefficients. + """ + return self.fit_transform([diag])[0,:] -- cgit v1.2.3 From c2b6d95f0b01ca913ddc704350cbfe37bcf13c3a Mon Sep 17 00:00:00 2001 From: MathieuCarriere Date: Tue, 28 Apr 2020 19:28:24 -0400 Subject: update output --- .../include/gudhi/Bitmap_cubical_complex_base.h | 5 ++-- src/python/gudhi/cubical_complex.pyx | 33 ++++++++++++++++++---- src/python/gudhi/periodic_cubical_complex.pyx | 33 ++++++++++++++++++---- .../include/Persistent_cohomology_interface.h | 6 ++-- src/python/test/test_cubical_complex.py | 7 ++++- 5 files changed, 69 insertions(+), 15 deletions(-) diff --git a/src/Bitmap_cubical_complex/include/gudhi/Bitmap_cubical_complex_base.h b/src/Bitmap_cubical_complex/include/gudhi/Bitmap_cubical_complex_base.h index 6441c129..248ebdb6 100644 --- a/src/Bitmap_cubical_complex/include/gudhi/Bitmap_cubical_complex_base.h +++ b/src/Bitmap_cubical_complex/include/gudhi/Bitmap_cubical_complex_base.h @@ -110,8 +110,9 @@ class Bitmap_cubical_complex_base { virtual inline std::vector get_coboundary_of_a_cell(std::size_t cell) const; /** - * This function computes the index of one of the top-dimensional cubes (chosen arbitrarily) associated - * to a given simplex handle. Note that the input parameter is not necessarily a cube, it might also + * This function finds a top-dimensional cell that is incident to the input cell and has + * the same filtration value. In case several cells are suitable, an arbitrary one is + * returned. Note that the input parameter is not necessarily a cube, it might also * be an edge or vertex of a cube. On the other hand, the output is always indicating the position of * a cube in the data structure. **/ diff --git a/src/python/gudhi/cubical_complex.pyx b/src/python/gudhi/cubical_complex.pyx index 69d0f0b6..884b0664 100644 --- a/src/python/gudhi/cubical_complex.pyx +++ b/src/python/gudhi/cubical_complex.pyx @@ -187,18 +187,41 @@ cdef class CubicalComplex: top-dimensional cells have the same filtration value, we arbitrarily return one of the two when calling the function on one of their common faces. - :returns: The top-dimensional cells/cofaces of the positive and negative cells, together with the corresponding homological dimension. - :rtype: numpy array of integers of shape [number_of_persistence_points, 3], the integers of eah row being: (homological dimension, - index of positive top-dimensional cell, index of negative top-dimensional cell). If the homological feature is essential, i.e., if - the death time is +infinity, then the index of the corresponding negative top-dimensional cell is -1. + :returns: The top-dimensional cells/cofaces of the positive and negative cells, + together with the corresponding homological dimension, in two lists of numpy arrays of integers. + The first list contains the regular persistence pairs, grouped by dimension. + It contains numpy arrays of shape [number_of_persistence_points, 2]. + The indices of the arrays in the list correspond to the homological dimensions, and the + integers of each row in each array correspond to: (index of positive top-dimensional cell, + index of negative top-dimensional cell). + The second list contains the essential features, grouped by dimension. + It contains numpy arrays of shape [number_of_persistence_points, 1]. + The indices of the arrays in the list correspond to the homological dimensions, and the + integers of each row in each array correspond to: (index of positive top-dimensional cell). """ cdef vector[vector[int]] persistence_result if self.pcohptr != NULL: + output = [[],[]] persistence_result = self.pcohptr.cofaces_of_cubical_persistence_pairs() + pr = np.array(persistence_result) + + ess_ind = np.argwhere(pr[:,2] == -1)[:,0] + ess = pr[ess_ind] + max_h = max(ess[:,0])+1 + for h in range(max_h): + hidxs = np.argwhere(ess[:,0] == h)[:,0] + output[1].append(ess[hidxs][:,1]) + + reg_ind = np.setdiff1d(np.array(range(len(pr))), ess_ind) + reg = pr[reg_ind] + max_h = max(reg[:,0])+1 + for h in range(max_h): + hidxs = np.argwhere(reg[:,0] == h)[:,0] + output[0].append(reg[hidxs][:,1:]) else: print("cofaces_of_persistence_pairs function requires persistence function" " to be launched first.") - return np.array(persistence_result) + return output def betti_numbers(self): """This function returns the Betti numbers of the complex. diff --git a/src/python/gudhi/periodic_cubical_complex.pyx b/src/python/gudhi/periodic_cubical_complex.pyx index 78565cf8..3cf2ff01 100644 --- a/src/python/gudhi/periodic_cubical_complex.pyx +++ b/src/python/gudhi/periodic_cubical_complex.pyx @@ -192,18 +192,41 @@ cdef class PeriodicCubicalComplex: top-dimensional cells have the same filtration value, we arbitrarily return one of the two when calling the function on one of their common faces. - :returns: The top-dimensional cells/cofaces of the positive and negative cells, together with the corresponding homological dimension. - :rtype: numpy array of integers of shape [number_of_persistence_points, 3], the integers of eah row being: (homological dimension, - index of positive top-dimensional cell, index of negative top-dimensional cell). If the homological feature is essential, i.e., if - the death time is +infinity, then the index of the corresponding negative top-dimensional cell is -1. + :returns: The top-dimensional cells/cofaces of the positive and negative cells, + together with the corresponding homological dimension, in two lists of numpy arrays of integers. + The first list contains the regular persistence pairs, grouped by dimension. + It contains numpy arrays of shape [number_of_persistence_points, 2]. + The indices of the arrays in the list correspond to the homological dimensions, and the + integers of each row in each array correspond to: (index of positive top-dimensional cell, + index of negative top-dimensional cell). + The second list contains the essential features, grouped by dimension. + It contains numpy arrays of shape [number_of_persistence_points, 1]. + The indices of the arrays in the list correspond to the homological dimensions, and the + integers of each row in each array correspond to: (index of positive top-dimensional cell). """ cdef vector[vector[int]] persistence_result if self.pcohptr != NULL: + output = [[],[]] persistence_result = self.pcohptr.cofaces_of_cubical_persistence_pairs() + pr = np.array(persistence_result) + + ess_ind = np.argwhere(pr[:,2] == -1)[:,0] + ess = pr[ess_ind] + max_h = max(ess[:,0])+1 + for h in range(max_h): + hidxs = np.argwhere(ess[:,0] == h)[:,0] + output[1].append(ess[hidxs][:,1]) + + reg_ind = np.setdiff1d(np.array(range(len(pr))), ess_ind) + reg = pr[reg_ind] + max_h = max(reg[:,0])+1 + for h in range(max_h): + hidxs = np.argwhere(reg[:,0] == h)[:,0] + output[0].append(reg[hidxs][:,1:]) else: print("cofaces_of_persistence_pairs function requires persistence function" " to be launched first.") - return np.array(persistence_result) + return output def betti_numbers(self): """This function returns the Betti numbers of the complex. diff --git a/src/python/include/Persistent_cohomology_interface.h b/src/python/include/Persistent_cohomology_interface.h index 59024212..32e6ee9c 100644 --- a/src/python/include/Persistent_cohomology_interface.h +++ b/src/python/include/Persistent_cohomology_interface.h @@ -16,6 +16,7 @@ #include #include // for std::pair #include // for sort +#include namespace Gudhi { @@ -80,8 +81,9 @@ persistent_cohomology::Persistent_cohomology order; std::sort(max_splx.begin(), max_splx.end()); - for (unsigned int i = 0; i < max_splx.size(); i++) order.insert(std::make_pair(max_splx[i], i)); + std::unordered_map order; + //std::sort(max_splx.begin(), max_splx.end()); + for (unsigned int i = 0; i < max_splx.size(); i++) order.emplace(max_splx[i], i); std::vector> persistence_pairs; for (auto pair : pairs) { diff --git a/src/python/test/test_cubical_complex.py b/src/python/test/test_cubical_complex.py index dd7653c2..5c59db8f 100755 --- a/src/python/test/test_cubical_complex.py +++ b/src/python/test/test_cubical_complex.py @@ -151,4 +151,9 @@ def test_connected_sublevel_sets(): def test_cubical_generators(): cub = CubicalComplex(top_dimensional_cells = [[0, 0, 0], [0, 1, 0], [0, 0, 0]]) cub.persistence() - assert np.array_equal(cub.cofaces_of_persistence_pairs(), np.array([[1, 7, 4], [0, 8, -1]])) + g = cub.cofaces_of_persistence_pairs() + assert len(g[0]) == 2 + assert len(g[1]) == 1 + assert np.array_equal(g[0][0], np.empty(shape=[0,2])) + assert np.array_equal(g[0][1], np.array([[7, 4]])) + assert np.array_equal(g[1][0], np.array([8])) -- cgit v1.2.3 From 31080642b9029446efe85dabcf49145d9a7519b6 Mon Sep 17 00:00:00 2001 From: Marc Glisse Date: Wed, 29 Apr 2020 19:34:54 +0200 Subject: Make size() return size_t It probably returns the biggest integer used in the whole module, it doesn't make sense that it uses a smaller type. --- src/Bitmap_cubical_complex/include/gudhi/Bitmap_cubical_complex_base.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/Bitmap_cubical_complex/include/gudhi/Bitmap_cubical_complex_base.h b/src/Bitmap_cubical_complex/include/gudhi/Bitmap_cubical_complex_base.h index 1eb77c9c..e6a78a6d 100644 --- a/src/Bitmap_cubical_complex/include/gudhi/Bitmap_cubical_complex_base.h +++ b/src/Bitmap_cubical_complex/include/gudhi/Bitmap_cubical_complex_base.h @@ -197,7 +197,7 @@ class Bitmap_cubical_complex_base { /** * Returns number of all cubes in the data structure. **/ - inline unsigned size() const { return this->data.size(); } + inline std::size_t size() const { return this->data.size(); } /** * Writing to stream operator. By using it we get the values T of cells in order in which they are stored in the -- cgit v1.2.3 From 2b5586fd60848b159fb4fa4481e61bab0e0cd766 Mon Sep 17 00:00:00 2001 From: MathieuCarriere Date: Wed, 29 Apr 2020 18:31:24 -0400 Subject: small modifs --- .../include/gudhi/Bitmap_cubical_complex_base.h | 4 +-- src/python/gudhi/cubical_complex.pyx | 42 +++++++++++----------- .../include/Persistent_cohomology_interface.h | 7 +++- 3 files changed, 29 insertions(+), 24 deletions(-) diff --git a/src/Bitmap_cubical_complex/include/gudhi/Bitmap_cubical_complex_base.h b/src/Bitmap_cubical_complex/include/gudhi/Bitmap_cubical_complex_base.h index 248ebdb6..eaf8a0b6 100644 --- a/src/Bitmap_cubical_complex/include/gudhi/Bitmap_cubical_complex_base.h +++ b/src/Bitmap_cubical_complex/include/gudhi/Bitmap_cubical_complex_base.h @@ -116,7 +116,7 @@ class Bitmap_cubical_complex_base { * be an edge or vertex of a cube. On the other hand, the output is always indicating the position of * a cube in the data structure. **/ - inline int get_top_dimensional_coface_of_a_cell(int splx); + inline int get_top_dimensional_coface_of_a_cell(size_t splx); /** * This procedure compute incidence numbers between cubes. For a cube \f$A\f$ of @@ -612,7 +612,7 @@ void Bitmap_cubical_complex_base::setup_bitmap_based_on_top_dimensional_cells } template -int Bitmap_cubical_complex_base::get_top_dimensional_coface_of_a_cell(int splx) { +int Bitmap_cubical_complex_base::get_top_dimensional_coface_of_a_cell(size_t splx) { if (this->get_dimension_of_a_cell(splx) == this->dimension()){return splx;} else{ for (auto v : this->get_coboundary_of_a_cell(splx)){ diff --git a/src/python/gudhi/cubical_complex.pyx b/src/python/gudhi/cubical_complex.pyx index 884b0664..b16a037f 100644 --- a/src/python/gudhi/cubical_complex.pyx +++ b/src/python/gudhi/cubical_complex.pyx @@ -199,28 +199,28 @@ cdef class CubicalComplex: The indices of the arrays in the list correspond to the homological dimensions, and the integers of each row in each array correspond to: (index of positive top-dimensional cell). """ + + assert self.pcohptr != NULL, "cofaces_of_persistence_pairs function requires persistence function to be launched first." + cdef vector[vector[int]] persistence_result - if self.pcohptr != NULL: - output = [[],[]] - persistence_result = self.pcohptr.cofaces_of_cubical_persistence_pairs() - pr = np.array(persistence_result) - - ess_ind = np.argwhere(pr[:,2] == -1)[:,0] - ess = pr[ess_ind] - max_h = max(ess[:,0])+1 - for h in range(max_h): - hidxs = np.argwhere(ess[:,0] == h)[:,0] - output[1].append(ess[hidxs][:,1]) - - reg_ind = np.setdiff1d(np.array(range(len(pr))), ess_ind) - reg = pr[reg_ind] - max_h = max(reg[:,0])+1 - for h in range(max_h): - hidxs = np.argwhere(reg[:,0] == h)[:,0] - output[0].append(reg[hidxs][:,1:]) - else: - print("cofaces_of_persistence_pairs function requires persistence function" - " to be launched first.") + output = [[],[]] + persistence_result = self.pcohptr.cofaces_of_cubical_persistence_pairs() + pr = np.array(persistence_result) + + ess_ind = np.argwhere(pr[:,2] == -1)[:,0] + ess = pr[ess_ind] + max_h = max(ess[:,0])+1 + for h in range(max_h): + hidxs = np.argwhere(ess[:,0] == h)[:,0] + output[1].append(ess[hidxs][:,1]) + + reg_ind = np.setdiff1d(np.array(range(len(pr))), ess_ind) + reg = pr[reg_ind] + max_h = max(reg[:,0])+1 + for h in range(max_h): + hidxs = np.argwhere(reg[:,0] == h)[:,0] + output[0].append(reg[hidxs][:,1:]) + return output def betti_numbers(self): diff --git a/src/python/include/Persistent_cohomology_interface.h b/src/python/include/Persistent_cohomology_interface.h index c4e60a27..cec18546 100644 --- a/src/python/include/Persistent_cohomology_interface.h +++ b/src/python/include/Persistent_cohomology_interface.h @@ -68,11 +68,16 @@ persistent_cohomology::Persistent_cohomology> cofaces_of_cubical_persistence_pairs() { // Warning: this function is meant to be used with CubicalComplex only!! - auto pairs = persistent_cohomology::Persistent_cohomology::get_persistent_pairs(); // Gather all top-dimensional cells and store their simplex handles -- cgit v1.2.3 From b2177e897b575e0c8d17b8ae5ed3259541a06bea Mon Sep 17 00:00:00 2001 From: MathieuCarriere Date: Wed, 29 Apr 2020 19:16:50 -0400 Subject: small modifs --- src/python/doc/representations.rst | 2 +- src/python/example/diagram_vectorizations_distances_kernels.py | 4 +++- src/python/gudhi/representations/kernel_methods.py | 3 ++- src/python/gudhi/representations/metrics.py | 9 ++++----- 4 files changed, 10 insertions(+), 8 deletions(-) diff --git a/src/python/doc/representations.rst b/src/python/doc/representations.rst index 11dcbcf9..041e3247 100644 --- a/src/python/doc/representations.rst +++ b/src/python/doc/representations.rst @@ -10,7 +10,7 @@ Representations manual This module, originally available at https://github.com/MathieuCarriere/sklearn-tda and named sklearn_tda, aims at bridging the gap between persistence diagrams and machine learning, by providing implementations of most of the vector representations for persistence diagrams in the literature, in a scikit-learn format. More specifically, it provides tools, using the scikit-learn standard interface, to compute distances and kernels on persistence diagrams, and to convert these diagrams into vectors in Euclidean space. -A diagram is represented as a numpy array of shape (n,2), as can be obtained from :func:`~gudhi.SimplexTree.persistence_intervals_in_dimension` for instance. Points at infinity are represented as a numpy array of shape (n,1), storing only the birth time. +A diagram is represented as a numpy array of shape (n,2), as can be obtained from :func:`~gudhi.SimplexTree.persistence_intervals_in_dimension` for instance. Points at infinity are represented as a numpy array of shape (n,1), storing only the birth time. The classes in this module can handle several persistence diagrams at once. In that case, the diagrams are provided as a list of numpy arrays. Note that it is not necessary for the diagrams to have the same number of points, i.e., for the corresponding arrays to have the same number of rows: all classes can handle arrays with different shapes. A small example is provided diff --git a/src/python/example/diagram_vectorizations_distances_kernels.py b/src/python/example/diagram_vectorizations_distances_kernels.py index ab7d8a16..c4a71a7a 100755 --- a/src/python/example/diagram_vectorizations_distances_kernels.py +++ b/src/python/example/diagram_vectorizations_distances_kernels.py @@ -13,7 +13,9 @@ from gudhi.representations import DiagramSelector, Clamping, Landscape, Silhouet D1 = np.array([[0.,4.],[1.,2.],[3.,8.],[6.,8.], [0., np.inf], [5., np.inf]]) -proc1, proc2, proc3 = DiagramSelector(use=True, point_type="finite"), DiagramScaler(use=True, scalers=[([0,1], MinMaxScaler())]), DiagramScaler(use=True, scalers=[([1], Clamping(maximum=.9))]) +proc1 = DiagramSelector(use=True, point_type="finite") +proc2 = DiagramScaler(use=True, scalers=[([0,1], MinMaxScaler())]) +proc3 = DiagramScaler(use=True, scalers=[([1], Clamping(maximum=.9))]) D1 = proc3(proc2(proc1(D1))) plt.scatter(D1[:,0], D1[:,1]) diff --git a/src/python/gudhi/representations/kernel_methods.py b/src/python/gudhi/representations/kernel_methods.py index edd1382a..596f4f07 100644 --- a/src/python/gudhi/representations/kernel_methods.py +++ b/src/python/gudhi/representations/kernel_methods.py @@ -67,7 +67,8 @@ def pairwise_persistence_diagram_kernels(X, Y=None, kernel="sliced_wasserstein", Parameters: X (list of n numpy arrays of shape (numx2)): first list of persistence diagrams. Y (list of m numpy arrays of shape (numx2)): second list of persistence diagrams (optional). If None, pairwise kernel values are computed from the first list only. - kernel: kernel to use. It can be either a string ("sliced_wasserstein", "persistence_scale_space", "persistence_weighted_gaussian", "persistence_fisher") or a function taking two numpy arrays of shape (nx2) and (mx2) as inputs. + kernel: kernel to use. It can be either a string ("sliced_wasserstein", "persistence_scale_space", "persistence_weighted_gaussian", "persistence_fisher") or a function taking two numpy arrays of shape (nx2) and (mx2) as inputs. If it is a function, make sure that it is symmetric. + **kwargs: optional keyword parameters. Any further parameters are passed directly to the kernel function. See the docs of the various kernel classes in this module. Returns: numpy array of shape (nxm): kernel matrix. diff --git a/src/python/gudhi/representations/metrics.py b/src/python/gudhi/representations/metrics.py index a4bf19a6..ce416fb1 100644 --- a/src/python/gudhi/representations/metrics.py +++ b/src/python/gudhi/representations/metrics.py @@ -32,11 +32,9 @@ def _sliced_wasserstein_distance(D1, D2, num_directions): thetas = np.linspace(-np.pi/2, np.pi/2, num=num_directions+1)[np.newaxis,:-1] lines = np.concatenate([np.cos(thetas), np.sin(thetas)], axis=0) approx1 = np.matmul(D1, lines) - diag_proj1 = (1./2) * np.ones((2,2)) - approx_diag1 = np.matmul(np.matmul(D1, diag_proj1), lines) + approx_diag1 = np.matmul(np.broadcast_to(D1.sum(-1,keepdims=True)/2,(len(D1),2)), lines) approx2 = np.matmul(D2, lines) - diag_proj2 = (1./2) * np.ones((2,2)) - approx_diag2 = np.matmul(np.matmul(D2, diag_proj2), lines) + approx_diag2 = np.matmul(np.broadcast_to(D2.sum(-1,keepdims=True)/2,(len(D2),2)), lines) A = np.sort(np.concatenate([approx1, approx_diag2], axis=0), axis=0) B = np.sort(np.concatenate([approx2, approx_diag1], axis=0), axis=0) L1 = np.sum(np.abs(A-B), axis=0) @@ -143,7 +141,8 @@ def pairwise_persistence_diagram_distances(X, Y=None, metric="bottleneck", **kwa Parameters: X (list of n numpy arrays of shape (numx2)): first list of persistence diagrams. Y (list of m numpy arrays of shape (numx2)): second list of persistence diagrams (optional). If None, pairwise distances are computed from the first list only. - metric: distance to use. It can be either a string ("sliced_wasserstein", "wasserstein", "hera_wasserstein" (Wasserstein distance computed with Hera---note that Hera is also used for the default option "wasserstein"), "pot_wasserstein" (Wasserstein distance computed with POT), "bottleneck", "persistence_fisher") or a symmetric function taking two numpy arrays of shape (nx2) and (mx2) as inputs. + metric: distance to use. It can be either a string ("sliced_wasserstein", "wasserstein", "hera_wasserstein" (Wasserstein distance computed with Hera---note that Hera is also used for the default option "wasserstein"), "pot_wasserstein" (Wasserstein distance computed with POT), "bottleneck", "persistence_fisher") or a function taking two numpy arrays of shape (nx2) and (mx2) as inputs. If it is a function, make sure that it is symmetric and that it outputs 0 if called on the same two arrays. + **kwargs: optional keyword parameters. Any further parameters are passed directly to the distance function. See the docs of the various distance classes in this module. Returns: numpy array of shape (nxm): distance matrix -- cgit v1.2.3 From a51f4f177e29ad5b01e58c9d8dd2560fb9b4fb19 Mon Sep 17 00:00:00 2001 From: MathieuCarriere Date: Thu, 30 Apr 2020 00:52:52 -0400 Subject: int to size_t --- .../include/gudhi/Bitmap_cubical_complex_base.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/Bitmap_cubical_complex/include/gudhi/Bitmap_cubical_complex_base.h b/src/Bitmap_cubical_complex/include/gudhi/Bitmap_cubical_complex_base.h index eaf8a0b6..e0c567ae 100644 --- a/src/Bitmap_cubical_complex/include/gudhi/Bitmap_cubical_complex_base.h +++ b/src/Bitmap_cubical_complex/include/gudhi/Bitmap_cubical_complex_base.h @@ -116,7 +116,7 @@ class Bitmap_cubical_complex_base { * be an edge or vertex of a cube. On the other hand, the output is always indicating the position of * a cube in the data structure. **/ - inline int get_top_dimensional_coface_of_a_cell(size_t splx); + inline size_t get_top_dimensional_coface_of_a_cell(size_t splx); /** * This procedure compute incidence numbers between cubes. For a cube \f$A\f$ of @@ -612,7 +612,7 @@ void Bitmap_cubical_complex_base::setup_bitmap_based_on_top_dimensional_cells } template -int Bitmap_cubical_complex_base::get_top_dimensional_coface_of_a_cell(size_t splx) { +size_t Bitmap_cubical_complex_base::get_top_dimensional_coface_of_a_cell(size_t splx) { if (this->get_dimension_of_a_cell(splx) == this->dimension()){return splx;} else{ for (auto v : this->get_coboundary_of_a_cell(splx)){ -- cgit v1.2.3 From 8edcb434b45ef07828f8111dedcbed024f469314 Mon Sep 17 00:00:00 2001 From: ROUVREAU Vincent Date: Sun, 3 May 2020 09:04:17 +0200 Subject: Fix TBB Warning: tbb/task_scheduler_init.h is deprecated. --- .../example/rips_persistence_via_boundary_matrix.cpp | 13 ------------- 1 file changed, 13 deletions(-) diff --git a/src/Persistent_cohomology/example/rips_persistence_via_boundary_matrix.cpp b/src/Persistent_cohomology/example/rips_persistence_via_boundary_matrix.cpp index db456f70..8c5742aa 100644 --- a/src/Persistent_cohomology/example/rips_persistence_via_boundary_matrix.cpp +++ b/src/Persistent_cohomology/example/rips_persistence_via_boundary_matrix.cpp @@ -17,10 +17,6 @@ #include -#ifdef GUDHI_USE_TBB -#include -#endif - #include #include @@ -67,11 +63,6 @@ int main(int argc, char * argv[]) { std::clog << "The complex contains " << st.num_simplices() << " simplices \n"; std::clog << " and has dimension " << st.dimension() << " \n"; -#ifdef GUDHI_USE_TBB - // Unnecessary, but clarifies which operations are parallel. - tbb::task_scheduler_init ts; -#endif - // Sort the simplices in the order of the filtration st.initialize_filtration(); int count = 0; @@ -81,10 +72,6 @@ int main(int argc, char * argv[]) { // Convert to a more convenient representation. Gudhi::Hasse_complex<> hcpx(st); -#ifdef GUDHI_USE_TBB - ts.terminate(); -#endif - // Free some space. delete &st; -- cgit v1.2.3 From ac7917ab2cbece048e554e32cc653c14440dbcc0 Mon Sep 17 00:00:00 2001 From: Marc Glisse Date: Sun, 3 May 2020 20:43:11 +0200 Subject: Fewer copies and no GIL for hera Now the input arrays are not copied as long as they use a float64 data type, even if they are not contiguous. That's not important here, but I wanted an example of how to do it. More importantly, no need to hold the GIL. I was too lazy to benchmark to see if that changed anything... --- src/python/gudhi/hera.cc | 28 ++++++++++++++++++++-------- 1 file changed, 20 insertions(+), 8 deletions(-) diff --git a/src/python/gudhi/hera.cc b/src/python/gudhi/hera.cc index 0d562b4c..50d49c77 100644 --- a/src/python/gudhi/hera.cc +++ b/src/python/gudhi/hera.cc @@ -11,14 +11,24 @@ #include #include -#include +#include +#include #include // Hera -#include +#include namespace py = pybind11; -typedef py::array_t Dgm; +typedef py::array_t Dgm; + +// Get m[i,0] and m[i,1] as a pair +auto pairify(void* p, ssize_t h, ssize_t w) { + return [=](ssize_t i){ + char* birth = (char*)p + i * h; + char* death = birth + w; + return std::make_pair(*(double*)birth, *(double*)death); + }; +} double wasserstein_distance( Dgm d1, Dgm d2, @@ -27,16 +37,18 @@ double wasserstein_distance( { py::buffer_info buf1 = d1.request(); py::buffer_info buf2 = d2.request(); + + py::gil_scoped_release release; + // shape (n,2) or (0) for empty if((buf1.ndim!=2 || buf1.shape[1]!=2) && (buf1.ndim!=1 || buf1.shape[0]!=0)) throw std::runtime_error("Diagram 1 must be an array of size n x 2"); if((buf2.ndim!=2 || buf2.shape[1]!=2) && (buf2.ndim!=1 || buf2.shape[0]!=0)) throw std::runtime_error("Diagram 2 must be an array of size n x 2"); - typedef std::array Point; - auto p1 = (Point*)buf1.ptr; - auto p2 = (Point*)buf2.ptr; - auto diag1 = boost::make_iterator_range(p1, p1+buf1.shape[0]); - auto diag2 = boost::make_iterator_range(p2, p2+buf2.shape[0]); + auto cnt1 = boost::counting_range(0, buf1.shape[0]); + auto diag1 = boost::adaptors::transform(cnt1, pairify(buf1.ptr, buf1.strides[0], buf1.strides[1])); + auto cnt2 = boost::counting_range(0, buf2.shape[0]); + auto diag2 = boost::adaptors::transform(cnt2, pairify(buf2.ptr, buf2.strides[0], buf2.strides[1])); hera::AuctionParams params; params.wasserstein_power = wasserstein_power; -- cgit v1.2.3 From d2a9aed9ada419b7715a77322ad17ddf3535d133 Mon Sep 17 00:00:00 2001 From: ROUVREAU Vincent Date: Mon, 4 May 2020 19:23:40 +0200 Subject: Try to build with conda as brew fails --- azure-pipelines.yml | 35 ++++++++++++++++++----------------- 1 file changed, 18 insertions(+), 17 deletions(-) diff --git a/azure-pipelines.yml b/azure-pipelines.yml index 95b15db2..fccb7d57 100644 --- a/azure-pipelines.yml +++ b/azure-pipelines.yml @@ -4,35 +4,36 @@ jobs: displayName: "Build and test" timeoutInMinutes: 0 cancelTimeoutInMinutes: 60 - + pool: + vmImage: macOS-10.14 strategy: matrix: - macOSrelease: - imageName: 'macos-10.14' - CMakeBuildType: Release - customInstallation: 'brew update && brew install graphviz doxygen boost eigen gmp mpfr tbb cgal' + Python36: + python.version: '3.6' + Python37: + python.version: '3.7' + Python38: + python.version: '3.8' - pool: - vmImage: $(imageName) - steps: - - task: UsePythonVersion@0 - inputs: - versionSpec: '3.7' - architecture: 'x64' + - bash: echo "##vso[task.prependpath]$CONDA/bin" + displayName: Add conda to PATH + + - bash: conda create --yes --quiet --name gudhi_build_env + displayName: Create Anaconda environment - - script: | - $(customInstallation) + - bash: | + source activate gudhi_build_env + conda install --yes --quiet --name gudhi_build_env python=$PYTHON_VERSION git submodule update --init - python -m pip install --upgrade pip python -m pip install --user -r .github/build-requirements.txt python -m pip install --user -r .github/test-requirements.txt displayName: 'Install build dependencies' - - script: | + - bash: | mkdir build cd build cmake -DCMAKE_BUILD_TYPE:STRING=$(CMakeBuildType) -DWITH_GUDHI_TEST=ON -DWITH_GUDHI_UTILITIES=ON -DWITH_GUDHI_PYTHON=ON -DPython_ADDITIONAL_VERSIONS=3 .. make make doxygen - ctest -j 8 --output-on-failure -E sphinx # remove sphinx build as it fails + ctest -j 8 --output-on-failure # -E sphinx remove sphinx build as it fails displayName: 'Build, test and documentation generation' -- cgit v1.2.3 From 5d03351f22f2511e3f5159f19f54b21bf2a04d61 Mon Sep 17 00:00:00 2001 From: ROUVREAU Vincent Date: Mon, 4 May 2020 19:29:02 +0200 Subject: sudo ? --- azure-pipelines.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/azure-pipelines.yml b/azure-pipelines.yml index fccb7d57..b50bd91a 100644 --- a/azure-pipelines.yml +++ b/azure-pipelines.yml @@ -19,7 +19,7 @@ jobs: - bash: echo "##vso[task.prependpath]$CONDA/bin" displayName: Add conda to PATH - - bash: conda create --yes --quiet --name gudhi_build_env + - bash: sudo conda create --yes --quiet --name gudhi_build_env displayName: Create Anaconda environment - bash: | -- cgit v1.2.3 From 7bd1941307033193da4c1cfcef873e69ca7f68f3 Mon Sep 17 00:00:00 2001 From: ROUVREAU Vincent Date: Mon, 4 May 2020 19:30:55 +0200 Subject: sudo ? --- azure-pipelines.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/azure-pipelines.yml b/azure-pipelines.yml index b50bd91a..0fea11f6 100644 --- a/azure-pipelines.yml +++ b/azure-pipelines.yml @@ -24,7 +24,7 @@ jobs: - bash: | source activate gudhi_build_env - conda install --yes --quiet --name gudhi_build_env python=$PYTHON_VERSION + sudo conda install --yes --quiet --name gudhi_build_env python=$PYTHON_VERSION git submodule update --init python -m pip install --user -r .github/build-requirements.txt python -m pip install --user -r .github/test-requirements.txt -- cgit v1.2.3 From 5ad8f41550d94988214fbf128a179d918635c3cf Mon Sep 17 00:00:00 2001 From: Marc Glisse Date: Mon, 4 May 2020 20:13:05 +0200 Subject: Add some nogil for cython --- src/python/gudhi/alpha_complex.pyx | 17 +++++--- src/python/gudhi/bottleneck.pyx | 20 ++++++--- src/python/gudhi/rips_complex.pyx | 17 ++++---- src/python/gudhi/simplex_tree.pxd | 89 +++++++++++++++++++------------------- src/python/gudhi/simplex_tree.pyx | 14 ++++-- 5 files changed, 88 insertions(+), 69 deletions(-) diff --git a/src/python/gudhi/alpha_complex.pyx b/src/python/gudhi/alpha_complex.pyx index e04dc652..d75e374a 100644 --- a/src/python/gudhi/alpha_complex.pyx +++ b/src/python/gudhi/alpha_complex.pyx @@ -27,11 +27,11 @@ __license__ = "GPL v3" cdef extern from "Alpha_complex_interface.h" namespace "Gudhi": cdef cppclass Alpha_complex_interface "Gudhi::alpha_complex::Alpha_complex_interface": - Alpha_complex_interface(vector[vector[double]] points) except + + Alpha_complex_interface(vector[vector[double]] points) nogil except + # bool from_file is a workaround for cython to find the correct signature - Alpha_complex_interface(string off_file, bool from_file) except + - vector[double] get_point(int vertex) except + - void create_simplex_tree(Simplex_tree_interface_full_featured* simplex_tree, double max_alpha_square) except + + Alpha_complex_interface(string off_file, bool from_file) nogil except + + vector[double] get_point(int vertex) nogil except + + void create_simplex_tree(Simplex_tree_interface_full_featured* simplex_tree, double max_alpha_square) nogil except + # AlphaComplex python interface cdef class AlphaComplex: @@ -70,6 +70,7 @@ cdef class AlphaComplex: # The real cython constructor def __cinit__(self, points = None, off_file = ''): + cdef vector[vector[double]] pts if off_file: if os.path.isfile(off_file): self.thisptr = new Alpha_complex_interface( @@ -80,7 +81,9 @@ cdef class AlphaComplex: if points is None: # Empty Alpha construction points=[] - self.thisptr = new Alpha_complex_interface(points) + pts = points + with nogil: + self.thisptr = new Alpha_complex_interface(pts) def __dealloc__(self): @@ -113,6 +116,8 @@ cdef class AlphaComplex: :rtype: SimplexTree """ stree = SimplexTree() + cdef double mas = max_alpha_square cdef intptr_t stree_int_ptr=stree.thisptr - self.thisptr.create_simplex_tree(stree_int_ptr, max_alpha_square) + with nogil: + self.thisptr.create_simplex_tree(stree_int_ptr, mas) return stree diff --git a/src/python/gudhi/bottleneck.pyx b/src/python/gudhi/bottleneck.pyx index af011e88..6a88895e 100644 --- a/src/python/gudhi/bottleneck.pyx +++ b/src/python/gudhi/bottleneck.pyx @@ -17,8 +17,8 @@ __copyright__ = "Copyright (C) 2016 Inria" __license__ = "GPL v3" cdef extern from "Bottleneck_distance_interface.h" namespace "Gudhi::persistence_diagram": - double bottleneck(vector[pair[double, double]], vector[pair[double, double]], double) - double bottleneck(vector[pair[double, double]], vector[pair[double, double]]) + double bottleneck(vector[pair[double, double]], vector[pair[double, double]], double) nogil + double bottleneck(vector[pair[double, double]], vector[pair[double, double]]) nogil def bottleneck_distance(diagram_1, diagram_2, e=None): """This function returns the point corresponding to a given vertex. @@ -40,9 +40,17 @@ def bottleneck_distance(diagram_1, diagram_2, e=None): :rtype: float :returns: the bottleneck distance. """ + cdef vector[pair[double, double]] dgm1 = diagram_1 + cdef vector[pair[double, double]] dgm2 = diagram_2 + cdef double eps + cdef double ret if e is None: - # Default value is the smallest double value (not 0, 0 is for exact version) - return bottleneck(diagram_1, diagram_2) + with nogil: + # Default value is the smallest double value (not 0, 0 is for exact version) + ret = bottleneck(dgm1, dgm2) else: - # Can be 0 for exact version - return bottleneck(diagram_1, diagram_2, e) + eps = e + with nogil: + # Can be 0 for exact version + ret = bottleneck(dgm1, dgm2, eps) + return ret diff --git a/src/python/gudhi/rips_complex.pyx b/src/python/gudhi/rips_complex.pyx index deb8057a..72e82c79 100644 --- a/src/python/gudhi/rips_complex.pyx +++ b/src/python/gudhi/rips_complex.pyx @@ -23,12 +23,12 @@ __license__ = "MIT" cdef extern from "Rips_complex_interface.h" namespace "Gudhi": cdef cppclass Rips_complex_interface "Gudhi::rips_complex::Rips_complex_interface": - Rips_complex_interface() - void init_points(vector[vector[double]] values, double threshold) - void init_matrix(vector[vector[double]] values, double threshold) - void init_points_sparse(vector[vector[double]] values, double threshold, double sparse) - void init_matrix_sparse(vector[vector[double]] values, double threshold, double sparse) - void create_simplex_tree(Simplex_tree_interface_full_featured* simplex_tree, int dim_max) except + + Rips_complex_interface() nogil + void init_points(vector[vector[double]] values, double threshold) nogil + void init_matrix(vector[vector[double]] values, double threshold) nogil + void init_points_sparse(vector[vector[double]] values, double threshold, double sparse) nogil + void init_matrix_sparse(vector[vector[double]] values, double threshold, double sparse) nogil + void create_simplex_tree(Simplex_tree_interface_full_featured* simplex_tree, int dim_max) nogil except + # RipsComplex python interface cdef class RipsComplex: @@ -97,6 +97,7 @@ cdef class RipsComplex: """ stree = SimplexTree() cdef intptr_t stree_int_ptr=stree.thisptr - self.thisref.create_simplex_tree(stree_int_ptr, - max_dimension) + cdef int maxdim = max_dimension + with nogil: + self.thisref.create_simplex_tree(stree_int_ptr, maxdim) return stree diff --git a/src/python/gudhi/simplex_tree.pxd b/src/python/gudhi/simplex_tree.pxd index 1d4ed926..e748ac40 100644 --- a/src/python/gudhi/simplex_tree.pxd +++ b/src/python/gudhi/simplex_tree.pxd @@ -25,57 +25,56 @@ cdef extern from "Simplex_tree_interface.h" namespace "Gudhi": pass cdef cppclass Simplex_tree_simplices_iterator "Gudhi::Simplex_tree_interface::Complex_simplex_iterator": - Simplex_tree_simplices_iterator() - Simplex_tree_simplex_handle& operator*() - Simplex_tree_simplices_iterator operator++() - bint operator!=(Simplex_tree_simplices_iterator) + Simplex_tree_simplices_iterator() nogil + Simplex_tree_simplex_handle& operator*() nogil + Simplex_tree_simplices_iterator operator++() nogil + bint operator!=(Simplex_tree_simplices_iterator) nogil cdef cppclass Simplex_tree_skeleton_iterator "Gudhi::Simplex_tree_interface::Skeleton_simplex_iterator": - Simplex_tree_skeleton_iterator() - Simplex_tree_simplex_handle& operator*() - Simplex_tree_skeleton_iterator operator++() - bint operator!=(Simplex_tree_skeleton_iterator) + Simplex_tree_skeleton_iterator() nogil + Simplex_tree_simplex_handle& operator*() nogil + Simplex_tree_skeleton_iterator operator++() nogil + bint operator!=(Simplex_tree_skeleton_iterator) nogil cdef cppclass Simplex_tree_interface_full_featured "Gudhi::Simplex_tree_interface": - Simplex_tree() - double simplex_filtration(vector[int] simplex) - void assign_simplex_filtration(vector[int] simplex, double filtration) - void initialize_filtration() - int num_vertices() - int num_simplices() - void set_dimension(int dimension) - int dimension() - int upper_bound_dimension() - bool find_simplex(vector[int] simplex) - bool insert(vector[int] simplex, double filtration) - vector[pair[vector[int], double]] get_star(vector[int] simplex) - vector[pair[vector[int], double]] get_cofaces(vector[int] simplex, - int dimension) - void expansion(int max_dim) except + - void remove_maximal_simplex(vector[int] simplex) - bool prune_above_filtration(double filtration) - bool make_filtration_non_decreasing() - void compute_extended_filtration() - vector[vector[pair[int, pair[double, double]]]] compute_extended_persistence_subdiagrams(vector[pair[int, pair[double, double]]] dgm, double min_persistence) + Simplex_tree() nogil + double simplex_filtration(vector[int] simplex) nogil + void assign_simplex_filtration(vector[int] simplex, double filtration) nogil + void initialize_filtration() nogil + int num_vertices() nogil + int num_simplices() nogil + void set_dimension(int dimension) nogil + int dimension() nogil + int upper_bound_dimension() nogil + bool find_simplex(vector[int] simplex) nogil + bool insert(vector[int] simplex, double filtration) nogil + vector[pair[vector[int], double]] get_star(vector[int] simplex) nogil + vector[pair[vector[int], double]] get_cofaces(vector[int] simplex, int dimension) nogil + void expansion(int max_dim) nogil except + + void remove_maximal_simplex(vector[int] simplex) nogil + bool prune_above_filtration(double filtration) nogil + bool make_filtration_non_decreasing() nogil + void compute_extended_filtration() nogil + vector[vector[pair[int, pair[double, double]]]] compute_extended_persistence_subdiagrams(vector[pair[int, pair[double, double]]] dgm, double min_persistence) nogil # Iterators over Simplex tree - pair[vector[int], double] get_simplex_and_filtration(Simplex_tree_simplex_handle f_simplex) - Simplex_tree_simplices_iterator get_simplices_iterator_begin() - Simplex_tree_simplices_iterator get_simplices_iterator_end() - vector[Simplex_tree_simplex_handle].const_iterator get_filtration_iterator_begin() - vector[Simplex_tree_simplex_handle].const_iterator get_filtration_iterator_end() - Simplex_tree_skeleton_iterator get_skeleton_iterator_begin(int dimension) - Simplex_tree_skeleton_iterator get_skeleton_iterator_end(int dimension) + pair[vector[int], double] get_simplex_and_filtration(Simplex_tree_simplex_handle f_simplex) nogil + Simplex_tree_simplices_iterator get_simplices_iterator_begin() nogil + Simplex_tree_simplices_iterator get_simplices_iterator_end() nogil + vector[Simplex_tree_simplex_handle].const_iterator get_filtration_iterator_begin() nogil + vector[Simplex_tree_simplex_handle].const_iterator get_filtration_iterator_end() nogil + Simplex_tree_skeleton_iterator get_skeleton_iterator_begin(int dimension) nogil + Simplex_tree_skeleton_iterator get_skeleton_iterator_end(int dimension) nogil cdef extern from "Persistent_cohomology_interface.h" namespace "Gudhi": cdef cppclass Simplex_tree_persistence_interface "Gudhi::Persistent_cohomology_interface>": - Simplex_tree_persistence_interface(Simplex_tree_interface_full_featured * st, bool persistence_dim_max) - void compute_persistence(int homology_coeff_field, double min_persistence) - vector[pair[int, pair[double, double]]] get_persistence() - vector[int] betti_numbers() - vector[int] persistent_betti_numbers(double from_value, double to_value) - vector[pair[double,double]] intervals_in_dimension(int dimension) - void write_output_diagram(string diagram_file_name) except + - vector[pair[vector[int], vector[int]]] persistence_pairs() - pair[vector[vector[int]], vector[vector[int]]] lower_star_generators() - pair[vector[vector[int]], vector[vector[int]]] flag_generators() + Simplex_tree_persistence_interface(Simplex_tree_interface_full_featured * st, bool persistence_dim_max) nogil + void compute_persistence(int homology_coeff_field, double min_persistence) nogil + vector[pair[int, pair[double, double]]] get_persistence() nogil + vector[int] betti_numbers() nogil + vector[int] persistent_betti_numbers(double from_value, double to_value) nogil + vector[pair[double,double]] intervals_in_dimension(int dimension) nogil + void write_output_diagram(string diagram_file_name) nogil except + + vector[pair[vector[int], vector[int]]] persistence_pairs() nogil + pair[vector[vector[int]], vector[vector[int]]] lower_star_generators() nogil + pair[vector[vector[int]], vector[vector[int]]] flag_generators() nogil diff --git a/src/python/gudhi/simplex_tree.pyx b/src/python/gudhi/simplex_tree.pyx index 55115cca..e8e4943c 100644 --- a/src/python/gudhi/simplex_tree.pyx +++ b/src/python/gudhi/simplex_tree.pyx @@ -33,7 +33,7 @@ cdef class SimplexTree: cdef public intptr_t thisptr # Get the pointer casted as it should be - cdef Simplex_tree_interface_full_featured* get_ptr(self): + cdef Simplex_tree_interface_full_featured* get_ptr(self) nogil: return (self.thisptr) cdef Simplex_tree_persistence_interface * pcohptr @@ -343,7 +343,9 @@ cdef class SimplexTree: :param max_dim: The maximal dimension. :type max_dim: int. """ - self.get_ptr().expansion(max_dim) + cdef int maxdim = max_dim + with nogil: + self.get_ptr().expansion(maxdim) def make_filtration_non_decreasing(self): """This function ensures that each simplex has a higher filtration @@ -449,8 +451,12 @@ cdef class SimplexTree: """ if self.pcohptr != NULL: del self.pcohptr - self.pcohptr = new Simplex_tree_persistence_interface(self.get_ptr(), persistence_dim_max) - self.pcohptr.compute_persistence(homology_coeff_field, min_persistence) + cdef bool pdm = persistence_dim_max + cdef int coef = homology_coeff_field + cdef double minp = min_persistence + with nogil: + self.pcohptr = new Simplex_tree_persistence_interface(self.get_ptr(), pdm) + self.pcohptr.compute_persistence(coef, minp) def betti_numbers(self): """This function returns the Betti numbers of the simplicial complex. -- cgit v1.2.3 From 62139c92181b7f405ce0e36ef6b46777cee85b34 Mon Sep 17 00:00:00 2001 From: ROUVREAU Vincent Date: Mon, 4 May 2020 22:22:26 +0200 Subject: Add conda build requirements --- azure-pipelines.yml | 1 + 1 file changed, 1 insertion(+) diff --git a/azure-pipelines.yml b/azure-pipelines.yml index 0fea11f6..97c84136 100644 --- a/azure-pipelines.yml +++ b/azure-pipelines.yml @@ -25,6 +25,7 @@ jobs: - bash: | source activate gudhi_build_env sudo conda install --yes --quiet --name gudhi_build_env python=$PYTHON_VERSION + sudo conda install --yes -c conda-forge doxygen eigen boost-cpp=1.70.0 cgal-cpp>=5.0 git submodule update --init python -m pip install --user -r .github/build-requirements.txt python -m pip install --user -r .github/test-requirements.txt -- cgit v1.2.3 From b880228fb423aeb3d662416fbb477d3ced100e08 Mon Sep 17 00:00:00 2001 From: ROUVREAU Vincent Date: Mon, 4 May 2020 22:31:13 +0200 Subject: Need to activate conda env to build --- azure-pipelines.yml | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/azure-pipelines.yml b/azure-pipelines.yml index 97c84136..2fcff411 100644 --- a/azure-pipelines.yml +++ b/azure-pipelines.yml @@ -10,10 +10,10 @@ jobs: matrix: Python36: python.version: '3.6' - Python37: - python.version: '3.7' - Python38: - python.version: '3.8' + #Python37: + # python.version: '3.7' + #Python38: + # python.version: '3.8' steps: - bash: echo "##vso[task.prependpath]$CONDA/bin" @@ -26,11 +26,12 @@ jobs: source activate gudhi_build_env sudo conda install --yes --quiet --name gudhi_build_env python=$PYTHON_VERSION sudo conda install --yes -c conda-forge doxygen eigen boost-cpp=1.70.0 cgal-cpp>=5.0 - git submodule update --init python -m pip install --user -r .github/build-requirements.txt python -m pip install --user -r .github/test-requirements.txt displayName: 'Install build dependencies' - bash: | + source activate gudhi_build_env + git submodule update --init mkdir build cd build cmake -DCMAKE_BUILD_TYPE:STRING=$(CMakeBuildType) -DWITH_GUDHI_TEST=ON -DWITH_GUDHI_UTILITIES=ON -DWITH_GUDHI_PYTHON=ON -DPython_ADDITIONAL_VERSIONS=3 .. -- cgit v1.2.3 From 71d958891cc638b26541ca5cf6c569b43332d2b6 Mon Sep 17 00:00:00 2001 From: ROUVREAU Vincent Date: Mon, 4 May 2020 22:39:10 +0200 Subject: conda update and release cmake version --- azure-pipelines.yml | 2 ++ 1 file changed, 2 insertions(+) diff --git a/azure-pipelines.yml b/azure-pipelines.yml index 2fcff411..b3b0ea7f 100644 --- a/azure-pipelines.yml +++ b/azure-pipelines.yml @@ -10,6 +10,7 @@ jobs: matrix: Python36: python.version: '3.6' + CMakeBuildType: Release #Python37: # python.version: '3.7' #Python38: @@ -25,6 +26,7 @@ jobs: - bash: | source activate gudhi_build_env sudo conda install --yes --quiet --name gudhi_build_env python=$PYTHON_VERSION + sudo conda update --yes --quiet -n base -c defaults conda sudo conda install --yes -c conda-forge doxygen eigen boost-cpp=1.70.0 cgal-cpp>=5.0 python -m pip install --user -r .github/build-requirements.txt python -m pip install --user -r .github/test-requirements.txt -- cgit v1.2.3 From 03b8322e9ded09cc879867008d32baa3a91a45e5 Mon Sep 17 00:00:00 2001 From: ROUVREAU Vincent Date: Tue, 5 May 2020 07:05:45 +0200 Subject: brew install cgal & Cie instead of conda install because of link issue --- azure-pipelines.yml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/azure-pipelines.yml b/azure-pipelines.yml index b3b0ea7f..3ab2f112 100644 --- a/azure-pipelines.yml +++ b/azure-pipelines.yml @@ -11,6 +11,7 @@ jobs: Python36: python.version: '3.6' CMakeBuildType: Release + customInstallation: 'brew update && brew install graphviz doxygen boost eigen gmp mpfr tbb cgal' #Python37: # python.version: '3.7' #Python38: @@ -26,10 +27,9 @@ jobs: - bash: | source activate gudhi_build_env sudo conda install --yes --quiet --name gudhi_build_env python=$PYTHON_VERSION - sudo conda update --yes --quiet -n base -c defaults conda - sudo conda install --yes -c conda-forge doxygen eigen boost-cpp=1.70.0 cgal-cpp>=5.0 python -m pip install --user -r .github/build-requirements.txt python -m pip install --user -r .github/test-requirements.txt + $(customInstallation) displayName: 'Install build dependencies' - bash: | source activate gudhi_build_env -- cgit v1.2.3 From 8da9158e9a2ffb128eb1b5b05d4e8574ff70d771 Mon Sep 17 00:00:00 2001 From: ROUVREAU Vincent Date: Tue, 5 May 2020 07:48:16 +0200 Subject: Remove sphinx test and matrix --- azure-pipelines.yml | 22 ++++++++-------------- 1 file changed, 8 insertions(+), 14 deletions(-) diff --git a/azure-pipelines.yml b/azure-pipelines.yml index 3ab2f112..7b5334a7 100644 --- a/azure-pipelines.yml +++ b/azure-pipelines.yml @@ -6,16 +6,10 @@ jobs: cancelTimeoutInMinutes: 60 pool: vmImage: macOS-10.14 - strategy: - matrix: - Python36: - python.version: '3.6' - CMakeBuildType: Release - customInstallation: 'brew update && brew install graphviz doxygen boost eigen gmp mpfr tbb cgal' - #Python37: - # python.version: '3.7' - #Python38: - # python.version: '3.8' + variables: + pythonVersion: '3.6' + cmakeBuildType: Release + customInstallation: 'brew update && brew install graphviz doxygen boost eigen gmp mpfr tbb cgal' steps: - bash: echo "##vso[task.prependpath]$CONDA/bin" @@ -26,7 +20,7 @@ jobs: - bash: | source activate gudhi_build_env - sudo conda install --yes --quiet --name gudhi_build_env python=$PYTHON_VERSION + sudo conda install --yes --quiet --name gudhi_build_env python=$(pythonVersion) python -m pip install --user -r .github/build-requirements.txt python -m pip install --user -r .github/test-requirements.txt $(customInstallation) @@ -36,8 +30,8 @@ jobs: git submodule update --init mkdir build cd build - cmake -DCMAKE_BUILD_TYPE:STRING=$(CMakeBuildType) -DWITH_GUDHI_TEST=ON -DWITH_GUDHI_UTILITIES=ON -DWITH_GUDHI_PYTHON=ON -DPython_ADDITIONAL_VERSIONS=3 .. - make + cmake -DCMAKE_BUILD_TYPE:STRING=$(cmakeBuildType) -DWITH_GUDHI_TEST=ON -DWITH_GUDHI_UTILITIES=ON -DWITH_GUDHI_PYTHON=ON -DPython_ADDITIONAL_VERSIONS=3 .. + make -j 4 make doxygen - ctest -j 8 --output-on-failure # -E sphinx remove sphinx build as it fails + ctest -j 4 --output-on-failure -E sphinx # remove sphinx build as it fails displayName: 'Build, test and documentation generation' -- cgit v1.2.3 From 99549c20e9173b536ac816ab683bc13025f182a2 Mon Sep 17 00:00:00 2001 From: Marc Glisse Date: Tue, 5 May 2020 11:07:53 +0200 Subject: fix use of threads and n_jobs in Parallel --- src/python/gudhi/point_cloud/knn.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/src/python/gudhi/point_cloud/knn.py b/src/python/gudhi/point_cloud/knn.py index 07553d6d..34e80b5d 100644 --- a/src/python/gudhi/point_cloud/knn.py +++ b/src/python/gudhi/point_cloud/knn.py @@ -200,8 +200,8 @@ class KNearestNeighbors: from joblib import Parallel, delayed, effective_n_jobs from sklearn.utils import gen_even_slices - slices = gen_even_slices(len(X), effective_n_jobs(-1)) - parallel = Parallel(backend="threading", n_jobs=-1) + slices = gen_even_slices(len(X), effective_n_jobs(n_jobs)) + parallel = Parallel(prefer="threads", n_jobs=n_jobs) if self.params.get("sort_results", True): def func(M): @@ -242,8 +242,8 @@ class KNearestNeighbors: else: func = lambda M: numpy.partition(M, k - 1)[:, 0:k] - slices = gen_even_slices(len(X), effective_n_jobs(-1)) - parallel = Parallel(backend="threading", n_jobs=-1) + slices = gen_even_slices(len(X), effective_n_jobs(n_jobs)) + parallel = Parallel(prefer="threads", n_jobs=n_jobs) distances = numpy.concatenate(parallel(delayed(func)(X[s]) for s in slices)) return distances return None -- cgit v1.2.3 From dac92c5ae9da6aa21fdcd261737e08d6898dbbdc Mon Sep 17 00:00:00 2001 From: Marc Glisse Date: Wed, 6 May 2020 12:54:21 +0200 Subject: Avoid reading outside of allocated region The result was unused, but better be safe. --- src/python/gudhi/hera.cc | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/src/python/gudhi/hera.cc b/src/python/gudhi/hera.cc index 50d49c77..63bbb075 100644 --- a/src/python/gudhi/hera.cc +++ b/src/python/gudhi/hera.cc @@ -45,10 +45,12 @@ double wasserstein_distance( throw std::runtime_error("Diagram 1 must be an array of size n x 2"); if((buf2.ndim!=2 || buf2.shape[1]!=2) && (buf2.ndim!=1 || buf2.shape[0]!=0)) throw std::runtime_error("Diagram 2 must be an array of size n x 2"); + ssize_t stride11 = buf1.ndim == 2 ? buf1.strides[1] : 0; + ssize_t stride21 = buf2.ndim == 2 ? buf2.strides[1] : 0; auto cnt1 = boost::counting_range(0, buf1.shape[0]); - auto diag1 = boost::adaptors::transform(cnt1, pairify(buf1.ptr, buf1.strides[0], buf1.strides[1])); + auto diag1 = boost::adaptors::transform(cnt1, pairify(buf1.ptr, buf1.strides[0], stride11)); auto cnt2 = boost::counting_range(0, buf2.shape[0]); - auto diag2 = boost::adaptors::transform(cnt2, pairify(buf2.ptr, buf2.strides[0], buf2.strides[1])); + auto diag2 = boost::adaptors::transform(cnt2, pairify(buf2.ptr, buf2.strides[0], stride21)); hera::AuctionParams params; params.wasserstein_power = wasserstein_power; -- cgit v1.2.3 From 5c5e2c3075235079fda94fc6a159cc5275f85a0c Mon Sep 17 00:00:00 2001 From: Marc Glisse Date: Wed, 6 May 2020 14:13:14 +0200 Subject: Refactor the numpy -> C++ range conversion If we want to reuse it for bottleneck... --- src/python/gudhi/hera.cc | 31 ++++++++++++++++--------------- 1 file changed, 16 insertions(+), 15 deletions(-) diff --git a/src/python/gudhi/hera.cc b/src/python/gudhi/hera.cc index 63bbb075..5aec1806 100644 --- a/src/python/gudhi/hera.cc +++ b/src/python/gudhi/hera.cc @@ -22,7 +22,7 @@ namespace py = pybind11; typedef py::array_t Dgm; // Get m[i,0] and m[i,1] as a pair -auto pairify(void* p, ssize_t h, ssize_t w) { +static auto pairify(void* p, ssize_t h, ssize_t w) { return [=](ssize_t i){ char* birth = (char*)p + i * h; char* death = birth + w; @@ -30,28 +30,29 @@ auto pairify(void* p, ssize_t h, ssize_t w) { }; } +inline auto numpy_to_range_of_pairs(py::array_t dgm) { + py::buffer_info buf = dgm.request(); + // shape (n,2) or (0) for empty + if((buf.ndim!=2 || buf.shape[1]!=2) && (buf.ndim!=1 || buf.shape[0]!=0)) + throw std::runtime_error("Diagram must be an array of size n x 2"); + // In the case of shape (0), avoid reading non-existing strides[1] even if we won't use it. + ssize_t stride1 = buf.ndim == 2 ? buf.strides[1] : 0; + auto cnt = boost::counting_range(0, buf.shape[0]); + return boost::adaptors::transform(cnt, pairify(buf.ptr, buf.strides[0], stride1)); + // Be careful that the returned range cannot contain references to dead temporaries. +} + double wasserstein_distance( Dgm d1, Dgm d2, double wasserstein_power, double internal_p, double delta) { - py::buffer_info buf1 = d1.request(); - py::buffer_info buf2 = d2.request(); + // I *think* the call to request() has to be before releasing the GIL. + auto diag1 = numpy_to_range_of_pairs(d1); + auto diag2 = numpy_to_range_of_pairs(d2); py::gil_scoped_release release; - // shape (n,2) or (0) for empty - if((buf1.ndim!=2 || buf1.shape[1]!=2) && (buf1.ndim!=1 || buf1.shape[0]!=0)) - throw std::runtime_error("Diagram 1 must be an array of size n x 2"); - if((buf2.ndim!=2 || buf2.shape[1]!=2) && (buf2.ndim!=1 || buf2.shape[0]!=0)) - throw std::runtime_error("Diagram 2 must be an array of size n x 2"); - ssize_t stride11 = buf1.ndim == 2 ? buf1.strides[1] : 0; - ssize_t stride21 = buf2.ndim == 2 ? buf2.strides[1] : 0; - auto cnt1 = boost::counting_range(0, buf1.shape[0]); - auto diag1 = boost::adaptors::transform(cnt1, pairify(buf1.ptr, buf1.strides[0], stride11)); - auto cnt2 = boost::counting_range(0, buf2.shape[0]); - auto diag2 = boost::adaptors::transform(cnt2, pairify(buf2.ptr, buf2.strides[0], stride21)); - hera::AuctionParams params; params.wasserstein_power = wasserstein_power; // hera encodes infinity as -1... -- cgit v1.2.3 From 47e5ac79af3a354358515c0213b28848f878fde6 Mon Sep 17 00:00:00 2001 From: Marc Glisse Date: Wed, 6 May 2020 22:59:36 +0200 Subject: Reimplement the bottleneck python wrapper with pybind11 --- src/python/CMakeLists.txt | 33 ++++++++++--------- src/python/gudhi/bottleneck.cc | 51 +++++++++++++++++++++++++++++ src/python/gudhi/bottleneck.pyx | 48 --------------------------- src/python/gudhi/hera.cc | 32 +----------------- src/python/include/pybind11_diagram_utils.h | 39 ++++++++++++++++++++++ src/python/setup.py.in | 19 +++++++++-- 6 files changed, 125 insertions(+), 97 deletions(-) create mode 100644 src/python/gudhi/bottleneck.cc delete mode 100644 src/python/gudhi/bottleneck.pyx create mode 100644 src/python/include/pybind11_diagram_utils.h diff --git a/src/python/CMakeLists.txt b/src/python/CMakeLists.txt index d712e189..976a8b52 100644 --- a/src/python/CMakeLists.txt +++ b/src/python/CMakeLists.txt @@ -34,6 +34,7 @@ endfunction( add_gudhi_debug_info ) if(PYTHONINTERP_FOUND) if(PYBIND11_FOUND) add_gudhi_debug_info("Pybind11 version ${PYBIND11_VERSION}") + set(GUDHI_PYTHON_MODULES "${GUDHI_PYTHON_MODULES}'bottleneck', ") set(GUDHI_PYTHON_MODULES_EXTRA "${GUDHI_PYTHON_MODULES_EXTRA}'hera', ") endif() if(CYTHON_FOUND) @@ -46,7 +47,6 @@ if(PYTHONINTERP_FOUND) set(GUDHI_PYTHON_MODULES "${GUDHI_PYTHON_MODULES}'reader_utils', ") set(GUDHI_PYTHON_MODULES "${GUDHI_PYTHON_MODULES}'witness_complex', ") set(GUDHI_PYTHON_MODULES "${GUDHI_PYTHON_MODULES}'strong_witness_complex', ") - set(GUDHI_PYTHON_MODULES "${GUDHI_PYTHON_MODULES}'bottleneck', ") set(GUDHI_PYTHON_MODULES "${GUDHI_PYTHON_MODULES}'nerve_gic', ") set(GUDHI_PYTHON_MODULES "${GUDHI_PYTHON_MODULES}'subsampling', ") set(GUDHI_PYTHON_MODULES "${GUDHI_PYTHON_MODULES}'tangential_complex', ") @@ -120,24 +120,25 @@ if(PYTHONINTERP_FOUND) set(GUDHI_PYTHON_EXTRA_COMPILE_ARGS "${GUDHI_PYTHON_EXTRA_COMPILE_ARGS}'-DCGAL_EIGEN3_ENABLED', ") endif (EIGEN3_FOUND) - set(GUDHI_PYTHON_MODULES_TO_COMPILE "${GUDHI_PYTHON_MODULES_TO_COMPILE}'off_reader', ") - set(GUDHI_PYTHON_MODULES_TO_COMPILE "${GUDHI_PYTHON_MODULES_TO_COMPILE}'simplex_tree', ") - set(GUDHI_PYTHON_MODULES_TO_COMPILE "${GUDHI_PYTHON_MODULES_TO_COMPILE}'rips_complex', ") - set(GUDHI_PYTHON_MODULES_TO_COMPILE "${GUDHI_PYTHON_MODULES_TO_COMPILE}'cubical_complex', ") - set(GUDHI_PYTHON_MODULES_TO_COMPILE "${GUDHI_PYTHON_MODULES_TO_COMPILE}'periodic_cubical_complex', ") - set(GUDHI_PYTHON_MODULES_TO_COMPILE "${GUDHI_PYTHON_MODULES_TO_COMPILE}'reader_utils', ") - set(GUDHI_PYTHON_MODULES_TO_COMPILE "${GUDHI_PYTHON_MODULES_TO_COMPILE}'witness_complex', ") - set(GUDHI_PYTHON_MODULES_TO_COMPILE "${GUDHI_PYTHON_MODULES_TO_COMPILE}'strong_witness_complex', ") + set(GUDHI_CYTHON_MODULES "${GUDHI_CYTHON_MODULES}'off_reader', ") + set(GUDHI_CYTHON_MODULES "${GUDHI_CYTHON_MODULES}'simplex_tree', ") + set(GUDHI_CYTHON_MODULES "${GUDHI_CYTHON_MODULES}'rips_complex', ") + set(GUDHI_CYTHON_MODULES "${GUDHI_CYTHON_MODULES}'cubical_complex', ") + set(GUDHI_CYTHON_MODULES "${GUDHI_CYTHON_MODULES}'periodic_cubical_complex', ") + set(GUDHI_CYTHON_MODULES "${GUDHI_CYTHON_MODULES}'reader_utils', ") + set(GUDHI_CYTHON_MODULES "${GUDHI_CYTHON_MODULES}'witness_complex', ") + set(GUDHI_CYTHON_MODULES "${GUDHI_CYTHON_MODULES}'strong_witness_complex', ") + set(GUDHI_PYBIND11_MODULES "${GUDHI_PYBIND11_MODULES}'hera', ") if (NOT CGAL_VERSION VERSION_LESS 4.11.0) - set(GUDHI_PYTHON_MODULES_TO_COMPILE "${GUDHI_PYTHON_MODULES_TO_COMPILE}'bottleneck', ") - set(GUDHI_PYTHON_MODULES_TO_COMPILE "${GUDHI_PYTHON_MODULES_TO_COMPILE}'nerve_gic', ") + set(GUDHI_PYBIND11_MODULES "${GUDHI_PYBIND11_MODULES}'bottleneck', ") + set(GUDHI_CYTHON_MODULES "${GUDHI_CYTHON_MODULES}'nerve_gic', ") endif () if (NOT CGAL_WITH_EIGEN3_VERSION VERSION_LESS 4.11.0) - set(GUDHI_PYTHON_MODULES_TO_COMPILE "${GUDHI_PYTHON_MODULES_TO_COMPILE}'alpha_complex', ") - set(GUDHI_PYTHON_MODULES_TO_COMPILE "${GUDHI_PYTHON_MODULES_TO_COMPILE}'subsampling', ") - set(GUDHI_PYTHON_MODULES_TO_COMPILE "${GUDHI_PYTHON_MODULES_TO_COMPILE}'tangential_complex', ") - set(GUDHI_PYTHON_MODULES_TO_COMPILE "${GUDHI_PYTHON_MODULES_TO_COMPILE}'euclidean_witness_complex', ") - set(GUDHI_PYTHON_MODULES_TO_COMPILE "${GUDHI_PYTHON_MODULES_TO_COMPILE}'euclidean_strong_witness_complex', ") + set(GUDHI_CYTHON_MODULES "${GUDHI_CYTHON_MODULES}'alpha_complex', ") + set(GUDHI_CYTHON_MODULES "${GUDHI_CYTHON_MODULES}'subsampling', ") + set(GUDHI_CYTHON_MODULES "${GUDHI_CYTHON_MODULES}'tangential_complex', ") + set(GUDHI_CYTHON_MODULES "${GUDHI_CYTHON_MODULES}'euclidean_witness_complex', ") + set(GUDHI_CYTHON_MODULES "${GUDHI_CYTHON_MODULES}'euclidean_strong_witness_complex', ") endif () if(CGAL_FOUND) diff --git a/src/python/gudhi/bottleneck.cc b/src/python/gudhi/bottleneck.cc new file mode 100644 index 00000000..577e5e0b --- /dev/null +++ b/src/python/gudhi/bottleneck.cc @@ -0,0 +1,51 @@ +/* This file is part of the Gudhi Library - https://gudhi.inria.fr/ - which is released under MIT. + * See file LICENSE or go to https://gudhi.inria.fr/licensing/ for full license details. + * Author(s): Marc Glisse + * + * Copyright (C) 2020 Inria + * + * Modification(s): + * - YYYY/MM Author: Description of the modification + */ + +#include + +#include + +double bottleneck(Dgm d1, Dgm d2, double epsilon) +{ + // I *think* the call to request() has to be before releasing the GIL. + auto diag1 = numpy_to_range_of_pairs(d1); + auto diag2 = numpy_to_range_of_pairs(d2); + + py::gil_scoped_release release; + + return Gudhi::persistence_diagram::bottleneck_distance(diag1, diag2, epsilon); +} + +PYBIND11_MODULE(bottleneck, m) { + m.attr("__license__") = "GPL v3"; + m.def("bottleneck_distance", &bottleneck, + py::arg("diagram_1"), py::arg("diagram_2"), + py::arg("e") = (std::numeric_limits::min)(), + R"pbdoc( + This function returns the point corresponding to a given vertex. + + :param diagram_1: The first diagram. + :type diagram_1: vector[pair[double, double]] + :param diagram_2: The second diagram. + :type diagram_2: vector[pair[double, double]] + :param e: If `e` is 0, this uses an expensive algorithm to compute the + exact distance. + If `e` is not 0, it asks for an additive `e`-approximation, and + currently also allows a small multiplicative error (the last 2 or 3 + bits of the mantissa may be wrong). This version of the algorithm takes + advantage of the limited precision of `double` and is usually a lot + faster to compute, whatever the value of `e`. + + Thus, by default, `e` is the smallest positive double. + :type e: float + :rtype: float + :returns: the bottleneck distance. + )pbdoc"); +} diff --git a/src/python/gudhi/bottleneck.pyx b/src/python/gudhi/bottleneck.pyx deleted file mode 100644 index af011e88..00000000 --- a/src/python/gudhi/bottleneck.pyx +++ /dev/null @@ -1,48 +0,0 @@ -# This file is part of the Gudhi Library - https://gudhi.inria.fr/ - which is released under MIT. -# See file LICENSE or go to https://gudhi.inria.fr/licensing/ for full license details. -# Author(s): Vincent Rouvreau -# -# Copyright (C) 2016 Inria -# -# Modification(s): -# - YYYY/MM Author: Description of the modification - -from cython cimport numeric -from libcpp.vector cimport vector -from libcpp.utility cimport pair -import os - -__author__ = "Vincent Rouvreau" -__copyright__ = "Copyright (C) 2016 Inria" -__license__ = "GPL v3" - -cdef extern from "Bottleneck_distance_interface.h" namespace "Gudhi::persistence_diagram": - double bottleneck(vector[pair[double, double]], vector[pair[double, double]], double) - double bottleneck(vector[pair[double, double]], vector[pair[double, double]]) - -def bottleneck_distance(diagram_1, diagram_2, e=None): - """This function returns the point corresponding to a given vertex. - - :param diagram_1: The first diagram. - :type diagram_1: vector[pair[double, double]] - :param diagram_2: The second diagram. - :type diagram_2: vector[pair[double, double]] - :param e: If `e` is 0, this uses an expensive algorithm to compute the - exact distance. - If `e` is not 0, it asks for an additive `e`-approximation, and - currently also allows a small multiplicative error (the last 2 or 3 - bits of the mantissa may be wrong). This version of the algorithm takes - advantage of the limited precision of `double` and is usually a lot - faster to compute, whatever the value of `e`. - - Thus, by default, `e` is the smallest positive double. - :type e: float - :rtype: float - :returns: the bottleneck distance. - """ - if e is None: - # Default value is the smallest double value (not 0, 0 is for exact version) - return bottleneck(diagram_1, diagram_2) - else: - # Can be 0 for exact version - return bottleneck(diagram_1, diagram_2, e) diff --git a/src/python/gudhi/hera.cc b/src/python/gudhi/hera.cc index 5aec1806..ea80a9a8 100644 --- a/src/python/gudhi/hera.cc +++ b/src/python/gudhi/hera.cc @@ -8,39 +8,9 @@ * - YYYY/MM Author: Description of the modification */ -#include -#include - -#include -#include - #include // Hera -#include - -namespace py = pybind11; -typedef py::array_t Dgm; - -// Get m[i,0] and m[i,1] as a pair -static auto pairify(void* p, ssize_t h, ssize_t w) { - return [=](ssize_t i){ - char* birth = (char*)p + i * h; - char* death = birth + w; - return std::make_pair(*(double*)birth, *(double*)death); - }; -} - -inline auto numpy_to_range_of_pairs(py::array_t dgm) { - py::buffer_info buf = dgm.request(); - // shape (n,2) or (0) for empty - if((buf.ndim!=2 || buf.shape[1]!=2) && (buf.ndim!=1 || buf.shape[0]!=0)) - throw std::runtime_error("Diagram must be an array of size n x 2"); - // In the case of shape (0), avoid reading non-existing strides[1] even if we won't use it. - ssize_t stride1 = buf.ndim == 2 ? buf.strides[1] : 0; - auto cnt = boost::counting_range(0, buf.shape[0]); - return boost::adaptors::transform(cnt, pairify(buf.ptr, buf.strides[0], stride1)); - // Be careful that the returned range cannot contain references to dead temporaries. -} +#include double wasserstein_distance( Dgm d1, Dgm d2, diff --git a/src/python/include/pybind11_diagram_utils.h b/src/python/include/pybind11_diagram_utils.h new file mode 100644 index 00000000..d9627258 --- /dev/null +++ b/src/python/include/pybind11_diagram_utils.h @@ -0,0 +1,39 @@ +/* This file is part of the Gudhi Library - https://gudhi.inria.fr/ - which is released under MIT. + * See file LICENSE or go to https://gudhi.inria.fr/licensing/ for full license details. + * Author(s): Marc Glisse + * + * Copyright (C) 2020 Inria + * + * Modification(s): + * - YYYY/MM Author: Description of the modification + */ + +#include +#include + +#include +#include + +namespace py = pybind11; +typedef py::array_t Dgm; + +// Get m[i,0] and m[i,1] as a pair +static auto pairify(void* p, ssize_t h, ssize_t w) { + return [=](ssize_t i){ + char* birth = (char*)p + i * h; + char* death = birth + w; + return std::make_pair(*(double*)birth, *(double*)death); + }; +} + +inline auto numpy_to_range_of_pairs(py::array_t dgm) { + py::buffer_info buf = dgm.request(); + // shape (n,2) or (0) for empty + if((buf.ndim!=2 || buf.shape[1]!=2) && (buf.ndim!=1 || buf.shape[0]!=0)) + throw std::runtime_error("Diagram must be an array of size n x 2"); + // In the case of shape (0), avoid reading non-existing strides[1] even if we won't use it. + ssize_t stride1 = buf.ndim == 2 ? buf.strides[1] : 0; + auto cnt = boost::counting_range(0, buf.shape[0]); + return boost::adaptors::transform(cnt, pairify(buf.ptr, buf.strides[0], stride1)); + // Be careful that the returned range cannot contain references to dead temporaries. +} diff --git a/src/python/setup.py.in b/src/python/setup.py.in index f968bd59..852da910 100644 --- a/src/python/setup.py.in +++ b/src/python/setup.py.in @@ -18,7 +18,8 @@ __author__ = "Vincent Rouvreau" __copyright__ = "Copyright (C) 2016 Inria" __license__ = "MIT" -modules = [@GUDHI_PYTHON_MODULES_TO_COMPILE@] +cython_modules = [@GUDHI_CYTHON_MODULES@] +pybind11_modules = [@GUDHI_PYBIND11_MODULES@] source_dir='@CMAKE_CURRENT_SOURCE_DIR@/gudhi/' extra_compile_args=[@GUDHI_PYTHON_EXTRA_COMPILE_ARGS@] @@ -30,7 +31,7 @@ runtime_library_dirs=[@GUDHI_PYTHON_RUNTIME_LIBRARY_DIRS@] # Create ext_modules list from module list ext_modules = [] -for module in modules: +for module in cython_modules: ext_modules.append(Extension( 'gudhi.' + module, sources = [source_dir + module + '.pyx',], @@ -55,6 +56,20 @@ ext_modules.append(Extension( extra_compile_args=extra_compile_args + [@GUDHI_PYBIND11_EXTRA_COMPILE_ARGS@], )) +if "bottleneck" in pybind11_modules: + ext_modules.append(Extension( + 'gudhi.bottleneck', + sources = [source_dir + 'bottleneck.cc'], + language = 'c++', + include_dirs = include_dirs + + [pybind11.get_include(False), pybind11.get_include(True)], + extra_compile_args=extra_compile_args + [@GUDHI_PYBIND11_EXTRA_COMPILE_ARGS@], + extra_link_args=extra_link_args, + libraries=libraries, + library_dirs=library_dirs, + runtime_library_dirs=runtime_library_dirs, + )) + setup( name = 'gudhi', packages=find_packages(), # find_namespace_packages(include=["gudhi*"]) -- cgit v1.2.3 From d61bfd349274456f8d7e0ccd64839a2d84eea0a0 Mon Sep 17 00:00:00 2001 From: Marc Glisse Date: Thu, 7 May 2020 08:40:55 +0200 Subject: doc --- src/python/gudhi/bottleneck.cc | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/src/python/gudhi/bottleneck.cc b/src/python/gudhi/bottleneck.cc index 577e5e0b..732cb9a8 100644 --- a/src/python/gudhi/bottleneck.cc +++ b/src/python/gudhi/bottleneck.cc @@ -32,9 +32,9 @@ PYBIND11_MODULE(bottleneck, m) { This function returns the point corresponding to a given vertex. :param diagram_1: The first diagram. - :type diagram_1: vector[pair[double, double]] + :type diagram_1: numpy array of shape (m,2) :param diagram_2: The second diagram. - :type diagram_2: vector[pair[double, double]] + :type diagram_2: numpy array of shape (n,2) :param e: If `e` is 0, this uses an expensive algorithm to compute the exact distance. If `e` is not 0, it asks for an additive `e`-approximation, and @@ -42,7 +42,6 @@ PYBIND11_MODULE(bottleneck, m) { bits of the mantissa may be wrong). This version of the algorithm takes advantage of the limited precision of `double` and is usually a lot faster to compute, whatever the value of `e`. - Thus, by default, `e` is the smallest positive double. :type e: float :rtype: float -- cgit v1.2.3 From acc76eb90b8cfe3f8cbb8d30f101c7f879ab61c4 Mon Sep 17 00:00:00 2001 From: Marc Glisse Date: Thu, 7 May 2020 20:10:46 +0200 Subject: Warn for initialize_filtration --- src/python/gudhi/simplex_tree.pyx | 2 ++ 1 file changed, 2 insertions(+) diff --git a/src/python/gudhi/simplex_tree.pyx b/src/python/gudhi/simplex_tree.pyx index 55115cca..b23885b4 100644 --- a/src/python/gudhi/simplex_tree.pyx +++ b/src/python/gudhi/simplex_tree.pyx @@ -101,6 +101,8 @@ cdef class SimplexTree: .. deprecated:: 3.2.0 """ + import warnings + warnings.warn("Since Gudhi 3.2, calling SimplexTree.initialize_filtration is unnecessary.", DeprecationWarning) self.get_ptr().initialize_filtration() def num_vertices(self): -- cgit v1.2.3 From 778c0af7dea0c103db85986fe2e2eb5fddd7588f Mon Sep 17 00:00:00 2001 From: Marc Glisse Date: Fri, 8 May 2020 10:14:50 +0200 Subject: Loop on pybind11 modules --- src/python/setup.py.in | 22 +++++++--------------- 1 file changed, 7 insertions(+), 15 deletions(-) diff --git a/src/python/setup.py.in b/src/python/setup.py.in index 852da910..b9f4e3f0 100644 --- a/src/python/setup.py.in +++ b/src/python/setup.py.in @@ -46,23 +46,15 @@ for module in cython_modules: ext_modules = cythonize(ext_modules) -ext_modules.append(Extension( - 'gudhi.hera', - sources = [source_dir + 'hera.cc'], - language = 'c++', - include_dirs = include_dirs + - ['@HERA_WASSERSTEIN_INCLUDE_DIR@', - pybind11.get_include(False), pybind11.get_include(True)], - extra_compile_args=extra_compile_args + [@GUDHI_PYBIND11_EXTRA_COMPILE_ARGS@], - )) - -if "bottleneck" in pybind11_modules: +for module in pybind11_modules: + my_include_dirs = include_dirs + [pybind11.get_include(False), pybind11.get_include(True)] + if module == 'hera': + my_include_dirs = ['@HERA_WASSERSTEIN_INCLUDE_DIR@'] + my_include_dirs ext_modules.append(Extension( - 'gudhi.bottleneck', - sources = [source_dir + 'bottleneck.cc'], + 'gudhi.' + module, + sources = [source_dir + module + '.cc'], language = 'c++', - include_dirs = include_dirs + - [pybind11.get_include(False), pybind11.get_include(True)], + include_dirs = my_include_dirs, extra_compile_args=extra_compile_args + [@GUDHI_PYBIND11_EXTRA_COMPILE_ARGS@], extra_link_args=extra_link_args, libraries=libraries, -- cgit v1.2.3 From 5040c75893cb864f5e780b6644b8097f7beeb3a6 Mon Sep 17 00:00:00 2001 From: yuichi-ike Date: Mon, 11 May 2020 10:45:02 +0900 Subject: document and comments added, weights modified --- src/python/doc/rips_complex_ref.rst | 51 +++++++++++++++++++++++++++++++ src/python/gudhi/weighted_rips_complex.py | 18 ++++++----- src/python/test/test_weighted_rips.py | 2 +- 3 files changed, 63 insertions(+), 8 deletions(-) diff --git a/src/python/doc/rips_complex_ref.rst b/src/python/doc/rips_complex_ref.rst index 22b5616c..8fc7e1b0 100644 --- a/src/python/doc/rips_complex_ref.rst +++ b/src/python/doc/rips_complex_ref.rst @@ -12,3 +12,54 @@ Rips complex reference manual :show-inheritance: .. automethod:: gudhi.RipsComplex.__init__ + +====================================== +Weighted Rips complex reference manual +====================================== + +.. autoclass:: gudhi.WeightedRipsComplex + :members: + :undoc-members: + :show-inheritance: + + .. automethod:: gudhi.WeightedRipsComplex.__init__ + +Basic examples +------------- + +The following example computes the weighted Rips filtration associated with a distance matrix and weights on vertices. + +.. testcode:: + + from gudhi.weighted_rips_complex import WeightedRipsComplex + dist = [[], [1]] + weights = [1, 100] + w_rips = WeightedRipsComplex(distance_matrix=dist, weights=weights) + st = w_rips.create_simplex_tree(max_dimension=2) + print(st.get_filtration()) + +The output is: + +.. testoutput:: + + [([0], 2.0), ([1], 200.0), ([0, 1], 200.0)] + +Combining with DistanceToMeasure, one can compute the DTM-filtration of a point set, as in `this notebook `_. + +.. testcode:: + + import numpy as np + from scipy.spatial.distance import cdist + from gudhi.point_cloud.dtm import DistanceToMeasure + from gudhi.weighted_rips_complex import WeightedRipsComplex + pts = np.array([[2.0, 2.0], [0.0, 1.0], [3.0, 4.0]]) + dist = cdist(pts,pts) + dtm = DistanceToMeasure(2, q=2, metric="precomputed") + r = dtm.fit_transform(dist) + w_rips = WeightedRipsComplex(distance_matrix=dist, weights=r) + st = w_rips.create_simplex_tree(max_dimension=2) + print(st.persistence()) + +.. testoutput:: + + [(0, (3.1622776601683795, inf)), (0, (3.1622776601683795, 5.39834563766817)), (0, (3.1622776601683795, 5.39834563766817))] diff --git a/src/python/gudhi/weighted_rips_complex.py b/src/python/gudhi/weighted_rips_complex.py index 83fa82c5..7401c428 100644 --- a/src/python/gudhi/weighted_rips_complex.py +++ b/src/python/gudhi/weighted_rips_complex.py @@ -11,23 +11,26 @@ from gudhi import SimplexTree class WeightedRipsComplex: """ - Class to generate a weighted Rips complex from a distance matrix and weights on vertices. + Class to generate a weighted Rips complex from a distance matrix and weights on vertices, + in the way described in the paper 'DTM-based filtrations' https://arxiv.org/abs/1811.04757. + Remark that the filtration value of a vertex is twice of its weight for the consistency with + RipsComplex, which is different from the definition in the paper. """ def __init__(self, distance_matrix, - weights="diagonal", + weights=None, max_filtration=float('inf')): """ Args: - distance_matrix (list of list of float): distance matrix (full square or lower triangular). - weights (list of float): (one half of) weight for each vertex. + distance_matrix (Sequence[Sequence[float]]): distance matrix (full square or lower triangular). + weights (Sequence[float]): (one half of) weight for each vertex. max_filtration (float): specifies the maximal filtration value to be considered. """ self.distance_matrix = distance_matrix - if weights == "diagonal": - self.weights = [distance_matrix[i][i] for i in range(len(distance_matrix))] - else: + if weights is not None: self.weights = weights + else: + self.weights = [0] * len(distance_matrix) self.max_filtration = max_filtration def create_simplex_tree(self, max_dimension): @@ -47,6 +50,7 @@ class WeightedRipsComplex: for i in range(num_pts): for j in range(i): value = max(2*F[i], 2*F[j], dist[i][j] + F[i] + F[j]) + # max is needed when F is not 1-Lipschitz if value <= self.max_filtration: st.insert([i,j], filtration=value) diff --git a/src/python/test/test_weighted_rips.py b/src/python/test/test_weighted_rips.py index d3721115..59ec022a 100644 --- a/src/python/test/test_weighted_rips.py +++ b/src/python/test/test_weighted_rips.py @@ -51,7 +51,7 @@ def test_compatibility_with_filtered_rips(): assert st.num_vertices() == 4 def test_dtm_rips_complex(): - pts = np.array([[2.0, 2], [0, 1], [3, 4]]) + pts = np.array([[2.0, 2.0], [0.0, 1.0], [3.0, 4.0]]) dist = cdist(pts,pts) dtm = DistanceToMeasure(2, q=2, metric="precomputed") r = dtm.fit_transform(dist) -- cgit v1.2.3 From 0ed4c3bba47d1375acb49596db2c863c38e9a090 Mon Sep 17 00:00:00 2001 From: ROUVREAU Vincent Date: Mon, 11 May 2020 08:39:11 +0200 Subject: Fix #299 --- src/python/doc/alpha_complex_sum.inc | 28 ++++---- src/python/doc/cubical_complex_user.rst | 4 +- src/python/doc/fileformats.rst | 2 - src/python/doc/installation.rst | 84 +++++++++++++--------- src/python/doc/nerve_gic_complex_user.rst | 2 +- src/python/doc/persistence_graphical_tools_sum.inc | 22 +++--- .../doc/persistence_graphical_tools_user.rst | 9 +-- src/python/doc/point_cloud.rst | 2 + src/python/doc/point_cloud_sum.inc | 21 +++--- src/python/doc/representations_sum.inc | 22 +++--- src/python/doc/wasserstein_distance_user.rst | 15 +++- src/python/gudhi/persistence_graphical_tools.py | 18 ++--- src/python/gudhi/point_cloud/knn.py | 4 ++ src/python/gudhi/point_cloud/timedelay.py | 5 +- src/python/gudhi/representations/metrics.py | 4 +- 15 files changed, 135 insertions(+), 107 deletions(-) diff --git a/src/python/doc/alpha_complex_sum.inc b/src/python/doc/alpha_complex_sum.inc index 9e6414d0..74331333 100644 --- a/src/python/doc/alpha_complex_sum.inc +++ b/src/python/doc/alpha_complex_sum.inc @@ -1,17 +1,17 @@ .. table:: :widths: 30 40 30 - +----------------------------------------------------------------+------------------------------------------------------------------------+-----------------------------------------------------------------------------------------------------------------------------+ - | .. figure:: | Alpha complex is a simplicial complex constructed from the finite | :Author: Vincent Rouvreau | - | ../../doc/Alpha_complex/alpha_complex_representation.png | cells of a Delaunay Triangulation. | | - | :alt: Alpha complex representation | | :Since: GUDHI 2.0.0 | - | :figclass: align-center | The filtration value of each simplex is computed as the **square** of | | - | | the circumradius of the simplex if the circumsphere is empty (the | :License: MIT (`GPL v3 `_) | - | | simplex is then said to be Gabriel), and as the minimum of the | | - | | filtration values of the codimension 1 cofaces that make it not | :Requires: `Eigen `__ :math:`\geq` 3.1.0 and `CGAL `__ :math:`\geq` 4.11.0 | - | | Gabriel otherwise. | | - | | | | - | | For performances reasons, it is advised to use CGAL ≥ 5.0.0. | | - +----------------------------------------------------------------+------------------------------------------------------------------------+-----------------------------------------------------------------------------------------------------------------------------+ - | * :doc:`alpha_complex_user` | * :doc:`alpha_complex_ref` | - +----------------------------------------------------------------+------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ + +----------------------------------------------------------------+-------------------------------------------------------------------------+-----------------------------------------------------------------------------------------------------------------------------+ + | .. figure:: | Alpha complex is a simplicial complex constructed from the finite | :Author: Vincent Rouvreau | + | ../../doc/Alpha_complex/alpha_complex_representation.png | cells of a Delaunay Triangulation. | | + | :alt: Alpha complex representation | | :Since: GUDHI 2.0.0 | + | :figclass: align-center | The filtration value of each simplex is computed as the **square** of | | + | | the circumradius of the simplex if the circumsphere is empty (the | :License: MIT (`GPL v3 `_) | + | | simplex is then said to be Gabriel), and as the minimum of the | | + | | filtration values of the codimension 1 cofaces that make it not | :Requires: `Eigen `__ :math:`\geq` 3.1.0 and `CGAL `__ :math:`\geq` 4.11.0 | + | | Gabriel otherwise. | | + | | | | + | | For performances reasons, it is advised to use CGAL :math:`\geq` 5.0.0. | | + +----------------------------------------------------------------+-------------------------------------------------------------------------+-----------------------------------------------------------------------------------------------------------------------------+ + | * :doc:`alpha_complex_user` | * :doc:`alpha_complex_ref` | + +----------------------------------------------------------------+-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ diff --git a/src/python/doc/cubical_complex_user.rst b/src/python/doc/cubical_complex_user.rst index e4733653..e6e61d75 100644 --- a/src/python/doc/cubical_complex_user.rst +++ b/src/python/doc/cubical_complex_user.rst @@ -91,7 +91,7 @@ Currently one input from a text file is used. It uses a format inspired from the we allow any filtration values. As a consequence one cannot use ``-1``'s to indicate missing cubes. If you have missing cubes in your complex, please set their filtration to :math:`+\infty` (aka. ``inf`` in the file). -The file format is described in details in :ref:`Perseus file format` file format section. +The file format is described in details in `Perseus file format `__ section. .. testcode:: @@ -120,7 +120,7 @@ conditions are imposed in all directions, then complex :math:`\mathcal{K}` becam various constructors from the file Bitmap_cubical_complex_periodic_boundary_conditions_base.h to construct cubical complex with periodic boundary conditions. -One can also use Perseus style input files (see :doc:`Perseus `) for the specific periodic case: +One can also use Perseus style input files (see `Perseus file format `__) for the specific periodic case: .. testcode:: diff --git a/src/python/doc/fileformats.rst b/src/python/doc/fileformats.rst index 345dfdba..ae1b00f3 100644 --- a/src/python/doc/fileformats.rst +++ b/src/python/doc/fileformats.rst @@ -80,8 +80,6 @@ Here is a simple sample file in the 3D case:: 1. 1. 1. -.. _Perseus file format: - Perseus ******* diff --git a/src/python/doc/installation.rst b/src/python/doc/installation.rst index 09a843d5..d72e91b5 100644 --- a/src/python/doc/installation.rst +++ b/src/python/doc/installation.rst @@ -12,8 +12,8 @@ The easiest way to install the Python version of GUDHI is using Compiling ********* -The library uses c++14 and requires `Boost `_ ≥ 1.56.0, -`CMake `_ ≥ 3.1 to generate makefiles, +The library uses c++14 and requires `Boost `_ :math:`\geq` 1.56.0, +`CMake `_ :math:`\geq` 3.1 to generate makefiles, `NumPy `_, `Cython `_ and `pybind11 `_ to compile the GUDHI Python module. @@ -21,7 +21,7 @@ It is a multi-platform library and compiles on Linux, Mac OSX and Visual Studio 2017. On `Windows `_ , only Python -≥ 3.5 are available because of the required Visual Studio version. +:math:`\geq` 3.5 are available because of the required Visual Studio version. On other systems, if you have several Python/python installed, the version 2.X will be used by default, but you can force it by adding @@ -30,7 +30,8 @@ will be used by default, but you can force it by adding GUDHI Python module compilation =============================== -To build the GUDHI Python module, run the following commands in a terminal: +After making sure that the `Compilation dependencies`_ are properly installed, +one can build the GUDHI Python module, by running the following commands in a terminal: .. code-block:: bash @@ -188,8 +189,14 @@ Run the following commands in a terminal: Optional third-party library **************************** +Compilation dependencies +======================== + +These third party dependencies are detected by `CMake `_. +They have to be installed before performing the `GUDHI Python module compilation`_. + CGAL -==== +---- Some GUDHI modules (cf. :doc:`modules list `), and few examples require `CGAL `_, a C++ library that provides easy @@ -200,7 +207,7 @@ The procedure to install this library according to your operating system is detailed `here `_. -The following examples requires CGAL version ≥ 4.11.0: +The following examples requires CGAL version :math:`\geq` 4.11.0: .. only:: builder_html @@ -211,23 +218,15 @@ The following examples requires CGAL version ≥ 4.11.0: * :download:`euclidean_strong_witness_complex_diagram_persistence_from_off_file_example.py <../example/euclidean_strong_witness_complex_diagram_persistence_from_off_file_example.py>` * :download:`euclidean_witness_complex_diagram_persistence_from_off_file_example.py <../example/euclidean_witness_complex_diagram_persistence_from_off_file_example.py>` -EagerPy -======= - -Some Python functions can handle automatic differentiation (possibly only when -a flag `enable_autodiff=True` is used). In order to reduce code duplication, we -use `EagerPy `_ which wraps arrays from -PyTorch, TensorFlow and JAX in a common interface. - Eigen -===== +----- Some GUDHI modules (cf. :doc:`modules list `), and few examples require `Eigen `_, a C++ template library for linear algebra: matrices, vectors, numerical solvers, and related algorithms. -The following examples require `Eigen `_ version ≥ 3.1.0: +The following examples require `Eigen `_ version :math:`\geq` 3.1.0: .. only:: builder_html @@ -237,15 +236,39 @@ The following examples require `Eigen `_ version * :download:`euclidean_strong_witness_complex_diagram_persistence_from_off_file_example.py <../example/euclidean_strong_witness_complex_diagram_persistence_from_off_file_example.py>` * :download:`euclidean_witness_complex_diagram_persistence_from_off_file_example.py <../example/euclidean_witness_complex_diagram_persistence_from_off_file_example.py>` +Threading Building Blocks +------------------------- + +`Intel® TBB `_ lets you easily write +parallel C++ programs that take full advantage of multicore performance, that +are portable and composable, and that have future-proof scalability. + +Having Intel® TBB installed is recommended to parallelize and accelerate some +GUDHI computations. + +Run time dependencies +===================== + +These third party dependencies are detected by Python `import` mechanism at run time. +They can be installed when required. + +EagerPy +------- + +Some Python functions can handle automatic differentiation (possibly only when +a flag `enable_autodiff=True` is used). In order to reduce code duplication, we +use `EagerPy `_ which wraps arrays from +PyTorch, TensorFlow and JAX in a common interface. + Hnswlib -======= +------- :class:`~gudhi.point_cloud.knn.KNearestNeighbors` can use the Python package `Hnswlib `_ as a backend if explicitly requested, to speed-up queries. Matplotlib -========== +---------- The :doc:`persistence graphical tools ` module requires `Matplotlib `_, a Python 2D plotting @@ -267,49 +290,46 @@ The following examples require the `Matplotlib `_: * :download:`euclidean_witness_complex_diagram_persistence_from_off_file_example.py <../example/euclidean_witness_complex_diagram_persistence_from_off_file_example.py>` PyKeOps -======= +------- :class:`~gudhi.point_cloud.knn.KNearestNeighbors` can use the Python package `PyKeOps `_ as a backend if explicitly requested, to speed-up queries using a GPU. Python Optimal Transport -======================== +------------------------ The :doc:`Wasserstein distance ` module requires `POT `_, a library that provides several solvers for optimization problems related to Optimal Transport. PyTorch -======= +------- `PyTorch `_ is currently only used as a dependency of `PyKeOps`_, and in some tests. Scikit-learn -============ +------------ The :doc:`persistence representations ` module require `scikit-learn `_, a Python-based ecosystem of open-source software for machine learning. +:class:`~gudhi.point_cloud.knn.KNearestNeighbors` can use the Python package +`scikit-learn `_ as a backend if explicitly +requested. + SciPy -===== +----- The :doc:`persistence graphical tools ` and :doc:`Wasserstein distance ` modules require `SciPy `_, a Python-based ecosystem of open-source software for mathematics, science, and engineering. -Threading Building Blocks -========================= - -`Intel® TBB `_ lets you easily write -parallel C++ programs that take full advantage of multicore performance, that -are portable and composable, and that have future-proof scalability. - -Having Intel® TBB installed is recommended to parallelize and accelerate some -GUDHI computations. +:class:`~gudhi.point_cloud.knn.KNearestNeighbors` can use the Python package +`SciPy `_ as a backend if explicitly requested. Bug reports and contributions ***************************** diff --git a/src/python/doc/nerve_gic_complex_user.rst b/src/python/doc/nerve_gic_complex_user.rst index 9101f45d..d5c5438d 100644 --- a/src/python/doc/nerve_gic_complex_user.rst +++ b/src/python/doc/nerve_gic_complex_user.rst @@ -13,7 +13,7 @@ Visualizations of the simplicial complexes can be done with either neato (from `graphviz `_), `geomview `_, `KeplerMapper `_. -Input point clouds are assumed to be OFF files (cf. :doc:`fileformats`). +Input point clouds are assumed to be OFF files (cf. `OFF file format `__). Covers ------ diff --git a/src/python/doc/persistence_graphical_tools_sum.inc b/src/python/doc/persistence_graphical_tools_sum.inc index b68d3d7e..0f41b420 100644 --- a/src/python/doc/persistence_graphical_tools_sum.inc +++ b/src/python/doc/persistence_graphical_tools_sum.inc @@ -1,14 +1,14 @@ .. table:: :widths: 30 40 30 - +-----------------------------------------------------------------+-----------------------------------------------------------------------+-----------------------------------------------+ - | .. figure:: | These graphical tools comes on top of persistence results and allows | :Author: Vincent Rouvreau, Theo Lacombe | - | img/graphical_tools_representation.png | the user to display easily persistence barcode, diagram or density. | | - | | | :Since: GUDHI 2.0.0 | - | | Note that these functions return the matplotlib axis, allowing | | - | | for further modifications (title, aspect, etc.) | :License: MIT | - | | | | - | | | :Requires: matplotlib, numpy and scipy | - +-----------------------------------------------------------------+-----------------------------------------------------------------------+-----------------------------------------------+ - | * :doc:`persistence_graphical_tools_user` | * :doc:`persistence_graphical_tools_ref` | - +-----------------------------------------------------------------+-----------------------------------------------------------------------------------------------------------------------+ + +-----------------------------------------------------------------+-----------------------------------------------------------------------+----------------------------------------------------------+ + | .. figure:: | These graphical tools comes on top of persistence results and allows | :Author: Vincent Rouvreau, Theo Lacombe | + | img/graphical_tools_representation.png | the user to display easily persistence barcode, diagram or density. | | + | | | :Since: GUDHI 2.0.0 | + | | Note that these functions return the matplotlib axis, allowing | | + | | for further modifications (title, aspect, etc.) | :License: MIT | + | | | | + | | | :Requires: `Matplotlib `__ | + +-----------------------------------------------------------------+-----------------------------------------------------------------------+----------------------------------------------------------+ + | * :doc:`persistence_graphical_tools_user` | * :doc:`persistence_graphical_tools_ref` | + +-----------------------------------------------------------------+----------------------------------------------------------------------------------------------------------------------------------+ diff --git a/src/python/doc/persistence_graphical_tools_user.rst b/src/python/doc/persistence_graphical_tools_user.rst index 91e52703..fce628b1 100644 --- a/src/python/doc/persistence_graphical_tools_user.rst +++ b/src/python/doc/persistence_graphical_tools_user.rst @@ -12,9 +12,6 @@ Definition Show persistence as a barcode ----------------------------- -.. note:: - this function requires matplotlib and numpy to be available - This function can display the persistence result as a barcode: .. plot:: @@ -36,9 +33,6 @@ This function can display the persistence result as a barcode: Show persistence as a diagram ----------------------------- -.. note:: - this function requires matplotlib and numpy to be available - This function can display the persistence result as a diagram: .. plot:: @@ -73,8 +67,7 @@ of shape (N x 2) encoding a persistence diagram (in a given dimension). Persistence density ------------------- -.. note:: - this function requires matplotlib, numpy and scipy to be available +:Requires: `SciPy `__ If you want more information on a specific dimension, for instance: diff --git a/src/python/doc/point_cloud.rst b/src/python/doc/point_cloud.rst index 192f70db..523a9dfa 100644 --- a/src/python/doc/point_cloud.rst +++ b/src/python/doc/point_cloud.rst @@ -16,6 +16,8 @@ File Readers Subsampling ----------- +:Requires: `Eigen `__ :math:`\geq` 3.1.0 and `CGAL `__ :math:`\geq` 4.11.0 + .. automodule:: gudhi.subsampling :members: :special-members: diff --git a/src/python/doc/point_cloud_sum.inc b/src/python/doc/point_cloud_sum.inc index d4761aba..4315cea6 100644 --- a/src/python/doc/point_cloud_sum.inc +++ b/src/python/doc/point_cloud_sum.inc @@ -1,15 +1,12 @@ .. table:: :widths: 30 40 30 - +----------------------------------------------------------------+------------------------------------------------------------------------+-----------------------------------------------------------------------------------------------------------------------------+ - | | :math:`(x_1, x_2, \ldots, x_d)` | Utilities to process point clouds: read from file, subsample, | :Authors: Vincent Rouvreau, Marc Glisse, Masatoshi Takenouchi | - | | :math:`(y_1, y_2, \ldots, y_d)` | find neighbors, embed time series in higher dimension, etc. | | - | | | :Since: GUDHI 2.0.0 | - | | | | - | | | :License: MIT (`GPL v3 `_, BSD-3-Clause, Apache-2.0) | - | | Parts of this package require CGAL. | | - | | | :Requires: `Eigen `__ :math:`\geq` 3.1.0 and `CGAL `__ :math:`\geq` 4.11.0 | - | | | | - +----------------------------------------------------------------+------------------------------------------------------------------------+-----------------------------------------------------------------------------------------------------------------------------+ - | * :doc:`point_cloud` | - +----------------------------------------------------------------+------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ + +-----------------------------------+---------------------------------------------------------------+-------------------------------------------------------------------+ + | | :math:`(x_1, x_2, \ldots, x_d)` | Utilities to process point clouds: read from file, subsample, | :Authors: Vincent Rouvreau, Marc Glisse, Masatoshi Takenouchi | + | | :math:`(y_1, y_2, \ldots, y_d)` | find neighbors, embed time series in higher dimension, etc. | | + | | | :Since: GUDHI 2.0.0 | + | | | | + | | | :License: MIT (`GPL v3 `_, BSD-3-Clause, Apache-2.0) | + +-----------------------------------+---------------------------------------------------------------+-------------------------------------------------------------------+ + | * :doc:`point_cloud` | + +-----------------------------------+-----------------------------------------------------------------------------------------------------------------------------------+ diff --git a/src/python/doc/representations_sum.inc b/src/python/doc/representations_sum.inc index eac89b9d..cdad4716 100644 --- a/src/python/doc/representations_sum.inc +++ b/src/python/doc/representations_sum.inc @@ -1,14 +1,14 @@ .. table:: :widths: 30 40 30 - +------------------------------------------------------------------+----------------------------------------------------------------+-----------------------------------------------+ - | .. figure:: | Vectorizations, distances and kernels that work on persistence | :Author: Mathieu Carrière | - | img/sklearn-tda.png | diagrams, compatible with scikit-learn. | | - | | | :Since: GUDHI 3.1.0 | - | | | | - | | | :License: MIT | - | | | | - | | | :Requires: scikit-learn | - +------------------------------------------------------------------+----------------------------------------------------------------+-----------------------------------------------+ - | * :doc:`representations` | - +------------------------------------------------------------------+----------------------------------------------------------------------------------------------------------------+ + +------------------------------------------------------------------+----------------------------------------------------------------+--------------------------------------------------------------+ + | .. figure:: | Vectorizations, distances and kernels that work on persistence | :Author: Mathieu Carrière | + | img/sklearn-tda.png | diagrams, compatible with scikit-learn. | | + | | | :Since: GUDHI 3.1.0 | + | | | | + | | | :License: MIT | + | | | | + | | | :Requires: `Scikit-learn `__ | + +------------------------------------------------------------------+----------------------------------------------------------------+--------------------------------------------------------------+ + | * :doc:`representations` | + +------------------------------------------------------------------+-------------------------------------------------------------------------------------------------------------------------------+ diff --git a/src/python/doc/wasserstein_distance_user.rst b/src/python/doc/wasserstein_distance_user.rst index c443bab5..2d2e2ae7 100644 --- a/src/python/doc/wasserstein_distance_user.rst +++ b/src/python/doc/wasserstein_distance_user.rst @@ -17,12 +17,21 @@ are measured in norm p, for :math:`1 \leq p \leq \infty`. Distance Functions ------------------ -This first implementation uses the Python Optimal Transport library and is based -on ideas from "Large Scale Computation of Means and Cluster for Persistence + +Optimal Transport +***************** + +:Requires: `Python Optimal Transport `__ (POT) :math:`\geq` 0.5.1 + +This first implementation uses the `Python Optimal Transport `__ +library and is based on ideas from "Large Scale Computation of Means and Cluster for Persistence Diagrams via Optimal Transport" :cite:`10.5555/3327546.3327645`. .. autofunction:: gudhi.wasserstein.wasserstein_distance +Hera +**** + This other implementation comes from `Hera `_ (BSD-3-Clause) which is based on "Geometry Helps to Compare Persistence Diagrams" @@ -94,6 +103,8 @@ The output is: Barycenters ----------- +:Requires: `Python Optimal Transport `__ (POT) :math:`\geq` 0.5.1 + A Frechet mean (or barycenter) is a generalization of the arithmetic mean in a non linear space such as the one of persistence diagrams. Given a set of persistence diagrams :math:`\mu_1 \dots \mu_n`, it is diff --git a/src/python/gudhi/persistence_graphical_tools.py b/src/python/gudhi/persistence_graphical_tools.py index cc3db467..e36af304 100644 --- a/src/python/gudhi/persistence_graphical_tools.py +++ b/src/python/gudhi/persistence_graphical_tools.py @@ -72,11 +72,11 @@ def plot_persistence_barcode( """This function plots the persistence bar code from persistence values list , a np.array of shape (N x 2) (representing a diagram in a single homology dimension), - or from a :doc:`persistence file `. + or from a `persistence diagram `__ file. :param persistence: Persistence intervals values list. Can be grouped by dimension or not. :type persistence: an array of (dimension, array of (birth, death)) or an array of (birth, death). - :param persistence_file: A :doc:`persistence file ` style name + :param persistence_file: A `persistence diagram `__ file style name (reset persistence if both are set). :type persistence_file: string :param alpha: barcode transparency value (0.0 transparent through 1.0 @@ -214,11 +214,11 @@ def plot_persistence_diagram( ): """This function plots the persistence diagram from persistence values list, a np.array of shape (N x 2) representing a diagram in a single - homology dimension, or from a :doc:`persistence file `. + homology dimension, or from a `persistence diagram `__ file`. :param persistence: Persistence intervals values list. Can be grouped by dimension or not. :type persistence: an array of (dimension, array of (birth, death)) or an array of (birth, death). - :param persistence_file: A :doc:`persistence file ` style name + :param persistence_file: A `persistence diagram `__ file style name (reset persistence if both are set). :type persistence_file: string :param alpha: plot transparency value (0.0 transparent through 1.0 @@ -369,17 +369,19 @@ def plot_persistence_density( """This function plots the persistence density from persistence values list, np.array of shape (N x 2) representing a diagram in a single homology dimension, - or from a :doc:`persistence file `. Be - aware that this function does not distinguish the dimension, it is + or from a `persistence diagram `__ file. + Be aware that this function does not distinguish the dimension, it is up to you to select the required one. This function also does not handle degenerate data set (scipy correlation matrix inversion can fail). + :Requires: `SciPy `__ + :param persistence: Persistence intervals values list. Can be grouped by dimension or not. :type persistence: an array of (dimension, array of (birth, death)) or an array of (birth, death). - :param persistence_file: A :doc:`persistence file ` - style name (reset persistence if both are set). + :param persistence_file: A `persistence diagram `__ + file style name (reset persistence if both are set). :type persistence_file: string :param nbins: Evaluate a gaussian kde on a regular grid of nbins x nbins over data extents (default is 300) diff --git a/src/python/gudhi/point_cloud/knn.py b/src/python/gudhi/point_cloud/knn.py index 34e80b5d..19363097 100644 --- a/src/python/gudhi/point_cloud/knn.py +++ b/src/python/gudhi/point_cloud/knn.py @@ -19,6 +19,10 @@ __license__ = "MIT" class KNearestNeighbors: """ Class wrapping several implementations for computing the k nearest neighbors in a point set. + + :Requires: `PyKeOps `__, `SciPy `__, + `Scikit-learn `__, and/or `Hnswlib `__ + in function of the selected `implementation`. """ def __init__(self, k, return_index=True, return_distance=False, metric="euclidean", **kwargs): diff --git a/src/python/gudhi/point_cloud/timedelay.py b/src/python/gudhi/point_cloud/timedelay.py index f01df442..5292e752 100644 --- a/src/python/gudhi/point_cloud/timedelay.py +++ b/src/python/gudhi/point_cloud/timedelay.py @@ -10,9 +10,8 @@ import numpy as np class TimeDelayEmbedding: - """Point cloud transformation class. - Embeds time-series data in the R^d according to [Takens' Embedding Theorem] - (https://en.wikipedia.org/wiki/Takens%27s_theorem) and obtains the + """Point cloud transformation class. Embeds time-series data in the R^d according to + `Takens' Embedding Theorem `_ and obtains the coordinates of each point. Parameters diff --git a/src/python/gudhi/representations/metrics.py b/src/python/gudhi/representations/metrics.py index ce416fb1..0a6dd680 100644 --- a/src/python/gudhi/representations/metrics.py +++ b/src/python/gudhi/representations/metrics.py @@ -223,7 +223,9 @@ class SlicedWassersteinDistance(BaseEstimator, TransformerMixin): class BottleneckDistance(BaseEstimator, TransformerMixin): """ - This is a class for computing the bottleneck distance matrix from a list of persistence diagrams. + This is a class for computing the bottleneck distance matrix from a list of persistence diagrams. + + :Requires: `CGAL `__ :math:`\geq` 4.11.0 """ def __init__(self, epsilon=None): """ -- cgit v1.2.3 From 627772e4c5bc7038b0814182dbb918b08356c892 Mon Sep 17 00:00:00 2001 From: Vincent Rouvreau <10407034+VincentRouvreau@users.noreply.github.com> Date: Mon, 11 May 2020 08:42:40 +0200 Subject: Fixed by @tlacombe MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Co-authored-by: Théo Lacombe --- src/python/gudhi/wasserstein/barycenter.py | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/src/python/gudhi/wasserstein/barycenter.py b/src/python/gudhi/wasserstein/barycenter.py index 1cf8edb3..7eeeae7a 100644 --- a/src/python/gudhi/wasserstein/barycenter.py +++ b/src/python/gudhi/wasserstein/barycenter.py @@ -52,9 +52,7 @@ def lagrangian_barycenter(pdiagset, init=None, verbose=False): Namely, ``G[k] = [...(i, j)...]``, where ``(i,j)`` indicates that ``pdiagset[k][i]`` is matched to ``Y[j]`` if ``i = -1`` or ``j = -1``, it means they represent the diagonal. - - `"energy"`, ``float`` representing the Frechet energy value obtained. - - It is the mean of squared distances of observations to the output. + - `"energy"`, ``float`` representing the Frechet energy value obtained. It is the mean of squared distances of observations to the output. - `"nb_iter"`, ``int`` number of iterations performed before convergence of the algorithm. ''' @@ -149,4 +147,3 @@ def lagrangian_barycenter(pdiagset, init=None, verbose=False): return Y, log else: return Y - -- cgit v1.2.3 From 779e4c4e8225e279ef8322988d4d06a6c2e06529 Mon Sep 17 00:00:00 2001 From: Vincent Rouvreau <10407034+VincentRouvreau@users.noreply.github.com> Date: Mon, 11 May 2020 08:43:06 +0200 Subject: Fixed by @tlacombe MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Co-authored-by: Théo Lacombe --- src/python/gudhi/wasserstein/barycenter.py | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/src/python/gudhi/wasserstein/barycenter.py b/src/python/gudhi/wasserstein/barycenter.py index 7eeeae7a..d67bcde7 100644 --- a/src/python/gudhi/wasserstein/barycenter.py +++ b/src/python/gudhi/wasserstein/barycenter.py @@ -47,10 +47,7 @@ def lagrangian_barycenter(pdiagset, init=None, verbose=False): If verbose, returns a couple ``(Y, log)`` where ``Y`` is the barycenter estimate, and ``log`` is a ``dict`` that contains additional informations: - - `"groupings"`, a list of list of pairs ``(i,j)``. - - Namely, ``G[k] = [...(i, j)...]``, where ``(i,j)`` indicates that ``pdiagset[k][i]`` is matched to ``Y[j]`` - if ``i = -1`` or ``j = -1``, it means they represent the diagonal. + - `"groupings"`, a list of list of pairs ``(i,j)``. Namely, ``G[k] = [...(i, j)...]``, where ``(i,j)`` indicates that `pdiagset[k][i]`` is matched to ``Y[j]`` if ``i = -1`` or ``j = -1``, it means they represent the diagonal. - `"energy"`, ``float`` representing the Frechet energy value obtained. It is the mean of squared distances of observations to the output. -- cgit v1.2.3 From 7e85b0451c686f043b61cde2e5f78674cf8de248 Mon Sep 17 00:00:00 2001 From: ROUVREAU Vincent Date: Mon, 11 May 2020 09:31:49 +0200 Subject: Double underscore is not the correct syntax --- src/python/doc/alpha_complex_sum.inc | 28 +++++++++++----------- src/python/doc/bottleneck_distance_sum.inc | 22 ++++++++--------- src/python/doc/cubical_complex_user.rst | 4 ++-- src/python/doc/nerve_gic_complex_sum.inc | 26 ++++++++++---------- src/python/doc/nerve_gic_complex_user.rst | 2 +- src/python/doc/persistence_graphical_tools_sum.inc | 22 ++++++++--------- .../doc/persistence_graphical_tools_user.rst | 2 +- src/python/doc/point_cloud.rst | 2 +- src/python/doc/representations_sum.inc | 22 ++++++++--------- src/python/doc/tangential_complex_sum.inc | 22 ++++++++--------- src/python/doc/wasserstein_distance_user.rst | 6 ++--- src/python/doc/witness_complex_sum.inc | 28 +++++++++++----------- src/python/gudhi/persistence_graphical_tools.py | 14 +++++------ src/python/gudhi/point_cloud/knn.py | 4 ++-- src/python/gudhi/representations/metrics.py | 2 +- 15 files changed, 103 insertions(+), 103 deletions(-) diff --git a/src/python/doc/alpha_complex_sum.inc b/src/python/doc/alpha_complex_sum.inc index 74331333..3aba0d71 100644 --- a/src/python/doc/alpha_complex_sum.inc +++ b/src/python/doc/alpha_complex_sum.inc @@ -1,17 +1,17 @@ .. table:: :widths: 30 40 30 - +----------------------------------------------------------------+-------------------------------------------------------------------------+-----------------------------------------------------------------------------------------------------------------------------+ - | .. figure:: | Alpha complex is a simplicial complex constructed from the finite | :Author: Vincent Rouvreau | - | ../../doc/Alpha_complex/alpha_complex_representation.png | cells of a Delaunay Triangulation. | | - | :alt: Alpha complex representation | | :Since: GUDHI 2.0.0 | - | :figclass: align-center | The filtration value of each simplex is computed as the **square** of | | - | | the circumradius of the simplex if the circumsphere is empty (the | :License: MIT (`GPL v3 `_) | - | | simplex is then said to be Gabriel), and as the minimum of the | | - | | filtration values of the codimension 1 cofaces that make it not | :Requires: `Eigen `__ :math:`\geq` 3.1.0 and `CGAL `__ :math:`\geq` 4.11.0 | - | | Gabriel otherwise. | | - | | | | - | | For performances reasons, it is advised to use CGAL :math:`\geq` 5.0.0. | | - +----------------------------------------------------------------+-------------------------------------------------------------------------+-----------------------------------------------------------------------------------------------------------------------------+ - | * :doc:`alpha_complex_user` | * :doc:`alpha_complex_ref` | - +----------------------------------------------------------------+-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ + +----------------------------------------------------------------+-------------------------------------------------------------------------+---------------------------------------------------------------------------------------------------------------------------+ + | .. figure:: | Alpha complex is a simplicial complex constructed from the finite | :Author: Vincent Rouvreau | + | ../../doc/Alpha_complex/alpha_complex_representation.png | cells of a Delaunay Triangulation. | | + | :alt: Alpha complex representation | | :Since: GUDHI 2.0.0 | + | :figclass: align-center | The filtration value of each simplex is computed as the **square** of | | + | | the circumradius of the simplex if the circumsphere is empty (the | :License: MIT (`GPL v3 `_) | + | | simplex is then said to be Gabriel), and as the minimum of the | | + | | filtration values of the codimension 1 cofaces that make it not | :Requires: `Eigen `_ :math:`\geq` 3.1.0 and `CGAL `_ :math:`\geq` 4.11.0 | + | | Gabriel otherwise. | | + | | | | + | | For performances reasons, it is advised to use CGAL :math:`\geq` 5.0.0. | | + +----------------------------------------------------------------+-------------------------------------------------------------------------+---------------------------------------------------------------------------------------------------------------------------+ + | * :doc:`alpha_complex_user` | * :doc:`alpha_complex_ref` | + +----------------------------------------------------------------+-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ diff --git a/src/python/doc/bottleneck_distance_sum.inc b/src/python/doc/bottleneck_distance_sum.inc index 0de4625c..77dc368d 100644 --- a/src/python/doc/bottleneck_distance_sum.inc +++ b/src/python/doc/bottleneck_distance_sum.inc @@ -1,14 +1,14 @@ .. table:: :widths: 30 40 30 - +-----------------------------------------------------------------+----------------------------------------------------------------------+------------------------------------------------------------------+ - | .. figure:: | Bottleneck distance measures the similarity between two persistence | :Author: François Godi | - | ../../doc/Bottleneck_distance/perturb_pd.png | diagrams. It's the shortest distance b for which there exists a | | - | :figclass: align-center | perfect matching between the points of the two diagrams (+ all the | :Since: GUDHI 2.0.0 | - | | diagonal points) such that any couple of matched points are at | | - | Bottleneck distance is the length of | distance at most b, where the distance between points is the sup | :License: MIT (`GPL v3 `_) | - | the longest edge | norm in :math:`\mathbb{R}^2`. | | - | | | :Requires: `CGAL `__ :math:`\geq` 4.11.0 | - +-----------------------------------------------------------------+----------------------------------------------------------------------+------------------------------------------------------------------+ - | * :doc:`bottleneck_distance_user` | | - +-----------------------------------------------------------------+-----------------------------------------------------------------------------------------------------------------------------------------+ + +-----------------------------------------------------------------+----------------------------------------------------------------------+-----------------------------------------------------------------+ + | .. figure:: | Bottleneck distance measures the similarity between two persistence | :Author: François Godi | + | ../../doc/Bottleneck_distance/perturb_pd.png | diagrams. It's the shortest distance b for which there exists a | | + | :figclass: align-center | perfect matching between the points of the two diagrams (+ all the | :Since: GUDHI 2.0.0 | + | | diagonal points) such that any couple of matched points are at | | + | Bottleneck distance is the length of | distance at most b, where the distance between points is the sup | :License: MIT (`GPL v3 `_) | + | the longest edge | norm in :math:`\mathbb{R}^2`. | | + | | | :Requires: `CGAL `_ :math:`\geq` 4.11.0 | + +-----------------------------------------------------------------+----------------------------------------------------------------------+-----------------------------------------------------------------+ + | * :doc:`bottleneck_distance_user` | | + +-----------------------------------------------------------------+----------------------------------------------------------------------------------------------------------------------------------------+ diff --git a/src/python/doc/cubical_complex_user.rst b/src/python/doc/cubical_complex_user.rst index e6e61d75..3fd4e27a 100644 --- a/src/python/doc/cubical_complex_user.rst +++ b/src/python/doc/cubical_complex_user.rst @@ -91,7 +91,7 @@ Currently one input from a text file is used. It uses a format inspired from the we allow any filtration values. As a consequence one cannot use ``-1``'s to indicate missing cubes. If you have missing cubes in your complex, please set their filtration to :math:`+\infty` (aka. ``inf`` in the file). -The file format is described in details in `Perseus file format `__ section. +The file format is described in details in `Perseus file format `_ section. .. testcode:: @@ -120,7 +120,7 @@ conditions are imposed in all directions, then complex :math:`\mathcal{K}` becam various constructors from the file Bitmap_cubical_complex_periodic_boundary_conditions_base.h to construct cubical complex with periodic boundary conditions. -One can also use Perseus style input files (see `Perseus file format `__) for the specific periodic case: +One can also use Perseus style input files (see `Perseus file format `_) for the specific periodic case: .. testcode:: diff --git a/src/python/doc/nerve_gic_complex_sum.inc b/src/python/doc/nerve_gic_complex_sum.inc index 7fe55aff..7db6c124 100644 --- a/src/python/doc/nerve_gic_complex_sum.inc +++ b/src/python/doc/nerve_gic_complex_sum.inc @@ -1,16 +1,16 @@ .. table:: :widths: 30 40 30 - +----------------------------------------------------------------+------------------------------------------------------------------------+------------------------------------------------------------------+ - | .. figure:: | Nerves and Graph Induced Complexes are cover complexes, i.e. | :Author: Mathieu Carrière | - | ../../doc/Nerve_GIC/gicvisu.jpg | simplicial complexes that provably contain topological information | | - | :alt: Graph Induced Complex of a point cloud. | about the input data. They can be computed with a cover of the data, | :Since: GUDHI 2.3.0 | - | :figclass: align-center | that comes i.e. from the preimage of a family of intervals covering | | - | | the image of a scalar-valued function defined on the data. | :License: MIT (`GPL v3 `_) | - | | | | - | | | :Requires: `CGAL `__ :math:`\geq` 4.11.0 | - | | | | - | | | | - +----------------------------------------------------------------+------------------------------------------------------------------------+------------------------------------------------------------------+ - | * :doc:`nerve_gic_complex_user` | * :doc:`nerve_gic_complex_ref` | - +----------------------------------------------------------------+-------------------------------------------------------------------------------------------------------------------------------------------+ + +----------------------------------------------------------------+------------------------------------------------------------------------+-----------------------------------------------------------------+ + | .. figure:: | Nerves and Graph Induced Complexes are cover complexes, i.e. | :Author: Mathieu Carrière | + | ../../doc/Nerve_GIC/gicvisu.jpg | simplicial complexes that provably contain topological information | | + | :alt: Graph Induced Complex of a point cloud. | about the input data. They can be computed with a cover of the data, | :Since: GUDHI 2.3.0 | + | :figclass: align-center | that comes i.e. from the preimage of a family of intervals covering | | + | | the image of a scalar-valued function defined on the data. | :License: MIT (`GPL v3 `_) | + | | | | + | | | :Requires: `CGAL `_ :math:`\geq` 4.11.0 | + | | | | + | | | | + +----------------------------------------------------------------+------------------------------------------------------------------------+-----------------------------------------------------------------+ + | * :doc:`nerve_gic_complex_user` | * :doc:`nerve_gic_complex_ref` | + +----------------------------------------------------------------+------------------------------------------------------------------------------------------------------------------------------------------+ diff --git a/src/python/doc/nerve_gic_complex_user.rst b/src/python/doc/nerve_gic_complex_user.rst index d5c5438d..0e67fc78 100644 --- a/src/python/doc/nerve_gic_complex_user.rst +++ b/src/python/doc/nerve_gic_complex_user.rst @@ -13,7 +13,7 @@ Visualizations of the simplicial complexes can be done with either neato (from `graphviz `_), `geomview `_, `KeplerMapper `_. -Input point clouds are assumed to be OFF files (cf. `OFF file format `__). +Input point clouds are assumed to be OFF files (cf. `OFF file format `_). Covers ------ diff --git a/src/python/doc/persistence_graphical_tools_sum.inc b/src/python/doc/persistence_graphical_tools_sum.inc index 0f41b420..7ff63ae2 100644 --- a/src/python/doc/persistence_graphical_tools_sum.inc +++ b/src/python/doc/persistence_graphical_tools_sum.inc @@ -1,14 +1,14 @@ .. table:: :widths: 30 40 30 - +-----------------------------------------------------------------+-----------------------------------------------------------------------+----------------------------------------------------------+ - | .. figure:: | These graphical tools comes on top of persistence results and allows | :Author: Vincent Rouvreau, Theo Lacombe | - | img/graphical_tools_representation.png | the user to display easily persistence barcode, diagram or density. | | - | | | :Since: GUDHI 2.0.0 | - | | Note that these functions return the matplotlib axis, allowing | | - | | for further modifications (title, aspect, etc.) | :License: MIT | - | | | | - | | | :Requires: `Matplotlib `__ | - +-----------------------------------------------------------------+-----------------------------------------------------------------------+----------------------------------------------------------+ - | * :doc:`persistence_graphical_tools_user` | * :doc:`persistence_graphical_tools_ref` | - +-----------------------------------------------------------------+----------------------------------------------------------------------------------------------------------------------------------+ + +-----------------------------------------------------------------+-----------------------------------------------------------------------+---------------------------------------------------------+ + | .. figure:: | These graphical tools comes on top of persistence results and allows | :Author: Vincent Rouvreau, Theo Lacombe | + | img/graphical_tools_representation.png | the user to display easily persistence barcode, diagram or density. | | + | | | :Since: GUDHI 2.0.0 | + | | Note that these functions return the matplotlib axis, allowing | | + | | for further modifications (title, aspect, etc.) | :License: MIT | + | | | | + | | | :Requires: `Matplotlib `_ | + +-----------------------------------------------------------------+-----------------------------------------------------------------------+---------------------------------------------------------+ + | * :doc:`persistence_graphical_tools_user` | * :doc:`persistence_graphical_tools_ref` | + +-----------------------------------------------------------------+---------------------------------------------------------------------------------------------------------------------------------+ diff --git a/src/python/doc/persistence_graphical_tools_user.rst b/src/python/doc/persistence_graphical_tools_user.rst index fce628b1..b5a38eb1 100644 --- a/src/python/doc/persistence_graphical_tools_user.rst +++ b/src/python/doc/persistence_graphical_tools_user.rst @@ -67,7 +67,7 @@ of shape (N x 2) encoding a persistence diagram (in a given dimension). Persistence density ------------------- -:Requires: `SciPy `__ +:Requires: `SciPy `_ If you want more information on a specific dimension, for instance: diff --git a/src/python/doc/point_cloud.rst b/src/python/doc/point_cloud.rst index 523a9dfa..ffd8f85b 100644 --- a/src/python/doc/point_cloud.rst +++ b/src/python/doc/point_cloud.rst @@ -16,7 +16,7 @@ File Readers Subsampling ----------- -:Requires: `Eigen `__ :math:`\geq` 3.1.0 and `CGAL `__ :math:`\geq` 4.11.0 +:Requires: `Eigen `_ :math:`\geq` 3.1.0 and `CGAL `_ :math:`\geq` 4.11.0 .. automodule:: gudhi.subsampling :members: diff --git a/src/python/doc/representations_sum.inc b/src/python/doc/representations_sum.inc index cdad4716..323a0920 100644 --- a/src/python/doc/representations_sum.inc +++ b/src/python/doc/representations_sum.inc @@ -1,14 +1,14 @@ .. table:: :widths: 30 40 30 - +------------------------------------------------------------------+----------------------------------------------------------------+--------------------------------------------------------------+ - | .. figure:: | Vectorizations, distances and kernels that work on persistence | :Author: Mathieu Carrière | - | img/sklearn-tda.png | diagrams, compatible with scikit-learn. | | - | | | :Since: GUDHI 3.1.0 | - | | | | - | | | :License: MIT | - | | | | - | | | :Requires: `Scikit-learn `__ | - +------------------------------------------------------------------+----------------------------------------------------------------+--------------------------------------------------------------+ - | * :doc:`representations` | - +------------------------------------------------------------------+-------------------------------------------------------------------------------------------------------------------------------+ + +------------------------------------------------------------------+----------------------------------------------------------------+-------------------------------------------------------------+ + | .. figure:: | Vectorizations, distances and kernels that work on persistence | :Author: Mathieu Carrière | + | img/sklearn-tda.png | diagrams, compatible with scikit-learn. | | + | | | :Since: GUDHI 3.1.0 | + | | | | + | | | :License: MIT | + | | | | + | | | :Requires: `Scikit-learn `_ | + +------------------------------------------------------------------+----------------------------------------------------------------+-------------------------------------------------------------+ + | * :doc:`representations` | + +------------------------------------------------------------------+------------------------------------------------------------------------------------------------------------------------------+ diff --git a/src/python/doc/tangential_complex_sum.inc b/src/python/doc/tangential_complex_sum.inc index 45ce2a66..22314a2d 100644 --- a/src/python/doc/tangential_complex_sum.inc +++ b/src/python/doc/tangential_complex_sum.inc @@ -1,14 +1,14 @@ .. table:: :widths: 30 40 30 - +----------------------------------------------------------------+------------------------------------------------------------------------+-----------------------------------------------------------------------------------------------------------------------------+ - | .. figure:: | A Tangential Delaunay complex is a simplicial complex designed to | :Author: Clément Jamin | - | ../../doc/Tangential_complex/tc_examples.png | reconstruct a :math:`k`-dimensional manifold embedded in :math:`d`- | | - | :figclass: align-center | dimensional Euclidean space. The input is a point sample coming from | :Since: GUDHI 2.0.0 | - | | an unknown manifold. The running time depends only linearly on the | | - | | extrinsic dimension :math:`d` and exponentially on the intrinsic | :License: MIT (`GPL v3 `_) | - | | dimension :math:`k`. | | - | | | :Requires: `Eigen `__ :math:`\geq` 3.1.0 and `CGAL `__ :math:`\geq` 4.11.0 | - +----------------------------------------------------------------+------------------------------------------------------------------------+-----------------------------------------------------------------------------------------------------------------------------+ - | * :doc:`tangential_complex_user` | * :doc:`tangential_complex_ref` | - +----------------------------------------------------------------+------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ + +----------------------------------------------------------------+------------------------------------------------------------------------+---------------------------------------------------------------------------------------------------------------------------+ + | .. figure:: | A Tangential Delaunay complex is a simplicial complex designed to | :Author: Clément Jamin | + | ../../doc/Tangential_complex/tc_examples.png | reconstruct a :math:`k`-dimensional manifold embedded in :math:`d`- | | + | :figclass: align-center | dimensional Euclidean space. The input is a point sample coming from | :Since: GUDHI 2.0.0 | + | | an unknown manifold. The running time depends only linearly on the | | + | | extrinsic dimension :math:`d` and exponentially on the intrinsic | :License: MIT (`GPL v3 `_) | + | | dimension :math:`k`. | | + | | | :Requires: `Eigen `_ :math:`\geq` 3.1.0 and `CGAL `_ :math:`\geq` 4.11.0 | + +----------------------------------------------------------------+------------------------------------------------------------------------+---------------------------------------------------------------------------------------------------------------------------+ + | * :doc:`tangential_complex_user` | * :doc:`tangential_complex_ref` | + +----------------------------------------------------------------+----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ diff --git a/src/python/doc/wasserstein_distance_user.rst b/src/python/doc/wasserstein_distance_user.rst index 2d2e2ae7..96ec7872 100644 --- a/src/python/doc/wasserstein_distance_user.rst +++ b/src/python/doc/wasserstein_distance_user.rst @@ -21,9 +21,9 @@ Distance Functions Optimal Transport ***************** -:Requires: `Python Optimal Transport `__ (POT) :math:`\geq` 0.5.1 +:Requires: `Python Optimal Transport `_ (POT) :math:`\geq` 0.5.1 -This first implementation uses the `Python Optimal Transport `__ +This first implementation uses the `Python Optimal Transport `_ library and is based on ideas from "Large Scale Computation of Means and Cluster for Persistence Diagrams via Optimal Transport" :cite:`10.5555/3327546.3327645`. @@ -103,7 +103,7 @@ The output is: Barycenters ----------- -:Requires: `Python Optimal Transport `__ (POT) :math:`\geq` 0.5.1 +:Requires: `Python Optimal Transport `_ (POT) :math:`\geq` 0.5.1 A Frechet mean (or barycenter) is a generalization of the arithmetic mean in a non linear space such as the one of persistence diagrams. diff --git a/src/python/doc/witness_complex_sum.inc b/src/python/doc/witness_complex_sum.inc index 34d4df4a..4416fec0 100644 --- a/src/python/doc/witness_complex_sum.inc +++ b/src/python/doc/witness_complex_sum.inc @@ -1,18 +1,18 @@ .. table:: :widths: 30 40 30 - +-------------------------------------------------------------------+----------------------------------------------------------------------+----------------------------------------------------------------------------------------------------------------------------------------------------------+ - | .. figure:: | Witness complex :math:`Wit(W,L)` is a simplicial complex defined on | :Author: Siargey Kachanovich | - | ../../doc/Witness_complex/Witness_complex_representation.png | two sets of points in :math:`\mathbb{R}^D`. | | - | :alt: Witness complex representation | | :Since: GUDHI 2.0.0 | - | :figclass: align-center | The data structure is described in | | - | | :cite:`boissonnatmariasimplextreealgorithmica`. | :License: MIT (`GPL v3 `_ for Euclidean versions only) | - | | | | - | | | :Requires: `Eigen `__ :math:`\geq` 3.1.0 and `CGAL `__ :math:`\geq` 4.11.0 for Euclidean versions only | - +-------------------------------------------------------------------+----------------------------------------------------------------------+----------------------------------------------------------------------------------------------------------------------------------------------------------+ - | * :doc:`witness_complex_user` | * :doc:`witness_complex_ref` | - | | * :doc:`strong_witness_complex_ref` | - | | * :doc:`euclidean_witness_complex_ref` | - | | * :doc:`euclidean_strong_witness_complex_ref` | - +-------------------------------------------------------------------+---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ + +-------------------------------------------------------------------+----------------------------------------------------------------------+--------------------------------------------------------------------------------------------------------------------------------------------------------+ + | .. figure:: | Witness complex :math:`Wit(W,L)` is a simplicial complex defined on | :Author: Siargey Kachanovich | + | ../../doc/Witness_complex/Witness_complex_representation.png | two sets of points in :math:`\mathbb{R}^D`. | | + | :alt: Witness complex representation | | :Since: GUDHI 2.0.0 | + | :figclass: align-center | The data structure is described in | | + | | :cite:`boissonnatmariasimplextreealgorithmica`. | :License: MIT (`GPL v3 `_ for Euclidean versions only) | + | | | | + | | | :Requires: `Eigen `_ :math:`\geq` 3.1.0 and `CGAL `_ :math:`\geq` 4.11.0 for Euclidean versions only | + +-------------------------------------------------------------------+----------------------------------------------------------------------+--------------------------------------------------------------------------------------------------------------------------------------------------------+ + | * :doc:`witness_complex_user` | * :doc:`witness_complex_ref` | + | | * :doc:`strong_witness_complex_ref` | + | | * :doc:`euclidean_witness_complex_ref` | + | | * :doc:`euclidean_strong_witness_complex_ref` | + +-------------------------------------------------------------------+-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ diff --git a/src/python/gudhi/persistence_graphical_tools.py b/src/python/gudhi/persistence_graphical_tools.py index e36af304..d59e51a0 100644 --- a/src/python/gudhi/persistence_graphical_tools.py +++ b/src/python/gudhi/persistence_graphical_tools.py @@ -72,11 +72,11 @@ def plot_persistence_barcode( """This function plots the persistence bar code from persistence values list , a np.array of shape (N x 2) (representing a diagram in a single homology dimension), - or from a `persistence diagram `__ file. + or from a `persistence diagram `_ file. :param persistence: Persistence intervals values list. Can be grouped by dimension or not. :type persistence: an array of (dimension, array of (birth, death)) or an array of (birth, death). - :param persistence_file: A `persistence diagram `__ file style name + :param persistence_file: A `persistence diagram `_ file style name (reset persistence if both are set). :type persistence_file: string :param alpha: barcode transparency value (0.0 transparent through 1.0 @@ -214,11 +214,11 @@ def plot_persistence_diagram( ): """This function plots the persistence diagram from persistence values list, a np.array of shape (N x 2) representing a diagram in a single - homology dimension, or from a `persistence diagram `__ file`. + homology dimension, or from a `persistence diagram `_ file`. :param persistence: Persistence intervals values list. Can be grouped by dimension or not. :type persistence: an array of (dimension, array of (birth, death)) or an array of (birth, death). - :param persistence_file: A `persistence diagram `__ file style name + :param persistence_file: A `persistence diagram `_ file style name (reset persistence if both are set). :type persistence_file: string :param alpha: plot transparency value (0.0 transparent through 1.0 @@ -369,18 +369,18 @@ def plot_persistence_density( """This function plots the persistence density from persistence values list, np.array of shape (N x 2) representing a diagram in a single homology dimension, - or from a `persistence diagram `__ file. + or from a `persistence diagram `_ file. Be aware that this function does not distinguish the dimension, it is up to you to select the required one. This function also does not handle degenerate data set (scipy correlation matrix inversion can fail). - :Requires: `SciPy `__ + :Requires: `SciPy `_ :param persistence: Persistence intervals values list. Can be grouped by dimension or not. :type persistence: an array of (dimension, array of (birth, death)) or an array of (birth, death). - :param persistence_file: A `persistence diagram `__ + :param persistence_file: A `persistence diagram `_ file style name (reset persistence if both are set). :type persistence_file: string :param nbins: Evaluate a gaussian kde on a regular grid of nbins x diff --git a/src/python/gudhi/point_cloud/knn.py b/src/python/gudhi/point_cloud/knn.py index 19363097..86008bc3 100644 --- a/src/python/gudhi/point_cloud/knn.py +++ b/src/python/gudhi/point_cloud/knn.py @@ -20,8 +20,8 @@ class KNearestNeighbors: """ Class wrapping several implementations for computing the k nearest neighbors in a point set. - :Requires: `PyKeOps `__, `SciPy `__, - `Scikit-learn `__, and/or `Hnswlib `__ + :Requires: `PyKeOps `_, `SciPy `_, + `Scikit-learn `_, and/or `Hnswlib `_ in function of the selected `implementation`. """ diff --git a/src/python/gudhi/representations/metrics.py b/src/python/gudhi/representations/metrics.py index 0a6dd680..8a32f7e9 100644 --- a/src/python/gudhi/representations/metrics.py +++ b/src/python/gudhi/representations/metrics.py @@ -225,7 +225,7 @@ class BottleneckDistance(BaseEstimator, TransformerMixin): """ This is a class for computing the bottleneck distance matrix from a list of persistence diagrams. - :Requires: `CGAL `__ :math:`\geq` 4.11.0 + :Requires: `CGAL `_ :math:`\geq` 4.11.0 """ def __init__(self, epsilon=None): """ -- cgit v1.2.3 From 9bfee982ae6fa6d4ca64b16d4c37e6eadf27c27a Mon Sep 17 00:00:00 2001 From: ROUVREAU Vincent Date: Mon, 11 May 2020 11:10:12 +0200 Subject: Fix duplicate link --- src/python/doc/alpha_complex_user.rst | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/python/doc/alpha_complex_user.rst b/src/python/doc/alpha_complex_user.rst index de706de9..373853c8 100644 --- a/src/python/doc/alpha_complex_user.rst +++ b/src/python/doc/alpha_complex_user.rst @@ -11,8 +11,8 @@ Definition `AlphaComplex` is constructing a :doc:`SimplexTree ` using `Delaunay Triangulation `_ -:cite:`cgal:hdj-t-19b` from `CGAL `_ (the Computational Geometry Algorithms Library -:cite:`cgal:eb-19b`). +:cite:`cgal:hdj-t-19b` from the `Computational Geometry Algorithms Library `_ +(CGAL Library :cite:`cgal:eb-19b`). Remarks ^^^^^^^ -- cgit v1.2.3 From a9fa1ba093b13f847dd3921d0c3d2d44342a4dcd Mon Sep 17 00:00:00 2001 From: Vincent Rouvreau <10407034+VincentRouvreau@users.noreply.github.com> Date: Mon, 11 May 2020 17:06:50 +0200 Subject: Update src/python/doc/installation.rst Co-authored-by: Marc Glisse --- src/python/doc/installation.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/python/doc/installation.rst b/src/python/doc/installation.rst index d72e91b5..de09c5b3 100644 --- a/src/python/doc/installation.rst +++ b/src/python/doc/installation.rst @@ -207,7 +207,7 @@ The procedure to install this library according to your operating system is detailed `here `_. -The following examples requires CGAL version :math:`\geq` 4.11.0: +The following examples require CGAL version :math:`\geq` 4.11.0: .. only:: builder_html -- cgit v1.2.3 From 0c64c706fa2c298cac079c00f71ef95061f9e6f8 Mon Sep 17 00:00:00 2001 From: ROUVREAU Vincent Date: Mon, 11 May 2020 17:14:22 +0200 Subject: doc review --- src/python/doc/alpha_complex_user.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/python/doc/alpha_complex_user.rst b/src/python/doc/alpha_complex_user.rst index 373853c8..d49f45b4 100644 --- a/src/python/doc/alpha_complex_user.rst +++ b/src/python/doc/alpha_complex_user.rst @@ -12,7 +12,7 @@ Definition `AlphaComplex` is constructing a :doc:`SimplexTree ` using `Delaunay Triangulation `_ :cite:`cgal:hdj-t-19b` from the `Computational Geometry Algorithms Library `_ -(CGAL Library :cite:`cgal:eb-19b`). +:cite:`cgal:eb-19b`. Remarks ^^^^^^^ -- cgit v1.2.3 From 9b66423fefca29e9e18f08d524b1fa0ce4db85a1 Mon Sep 17 00:00:00 2001 From: Marc Glisse Date: Mon, 11 May 2020 19:13:44 +0200 Subject: Reformat doc --- src/python/gudhi/point_cloud/dtm.py | 14 ++++++++++---- 1 file changed, 10 insertions(+), 4 deletions(-) diff --git a/src/python/gudhi/point_cloud/dtm.py b/src/python/gudhi/point_cloud/dtm.py index c5405526..ef6eef05 100644 --- a/src/python/gudhi/point_cloud/dtm.py +++ b/src/python/gudhi/point_cloud/dtm.py @@ -73,7 +73,9 @@ class DistanceToMeasure: class DTMDensity: """ - Density estimator based on the distance to the empirical measure defined by a point set, as defined in :cite:`dtmdensity`. Note that this implementation does not renormalize so the total measure is not 1, see the reference for suitable normalization factors in the Euclidean case. + Density estimator based on the distance to the empirical measure defined by a point set, as defined + in :cite:`dtmdensity`. Note that this implementation does not renormalize so the total measure is not 1, + see the reference for suitable normalization factors in the Euclidean case. """ def __init__(self, k=None, weights=None, q=None, dim=None, **kwargs): @@ -82,8 +84,10 @@ class DTMDensity: k (int): number of neighbors (possibly including the point itself). weights (numpy.array): weights of each of the k neighbors, optional. q (float): order used to compute the distance to measure. Defaults to dim. - dim (float): final exponent representing the dimension. Defaults to the dimension, and must be specified when the dimension cannot be read from the input (metric="neighbors" or metric="precomputed"). - kwargs: same parameters as :class:`~gudhi.point_cloud.knn.KNN`, except that metric="neighbors" means that :func:`transform` expects an array with the distances to the k nearest neighbors. + dim (float): final exponent representing the dimension. Defaults to the dimension, and must be specified + when the dimension cannot be read from the input (metric="neighbors" or metric="precomputed"). + kwargs: same parameters as :class:`~gudhi.point_cloud.knn.KNN`, except that metric="neighbors" means that + :func:`transform` expects an array with the distances to the k nearest neighbors. """ if weights is None: assert k is not None, "Must specify k or weights" @@ -113,7 +117,9 @@ class DTMDensity: def transform(self, X): """ Args: - X (numpy.array): coordinates for query points, or distance matrix if metric is "precomputed", or distances to the k nearest neighbors if metric is "neighbors" (if the array has more than k columns, the remaining ones are ignored). + X (numpy.array): coordinates for query points, or distance matrix if metric is "precomputed", + or distances to the k nearest neighbors if metric is "neighbors" (if the array has more + than k columns, the remaining ones are ignored). """ q = self.q dim = self.dim -- cgit v1.2.3 From f94c2e1b7ba982fda62239f5c6b378bda867cd40 Mon Sep 17 00:00:00 2001 From: Marc Glisse Date: Mon, 11 May 2020 19:56:06 +0200 Subject: More DOI in the biblio and update references from a preprint to the published version --- biblio/bibliography.bib | 8 +++++++- src/Persistent_cohomology/doc/Intro_persistent_cohomology.h | 2 +- src/common/doc/main_page.md | 2 +- src/python/doc/persistent_cohomology_sum.inc | 2 +- src/python/doc/persistent_cohomology_user.rst | 2 +- 5 files changed, 11 insertions(+), 5 deletions(-) diff --git a/biblio/bibliography.bib b/biblio/bibliography.bib index 99a15c5e..3ea2f59f 100644 --- a/biblio/bibliography.bib +++ b/biblio/bibliography.bib @@ -13,7 +13,9 @@ pages = {1--39}, publisher = {JMLR.org}, title = {{Statistical analysis and parameter selection for Mapper}}, volume = {19}, -year = {2018} +year = {2018}, +url = {http://jmlr.org/papers/v19/17-291.html}, +doi = {10.5555/3291125.3291137} } @inproceedings{Dey13, @@ -22,6 +24,7 @@ year = {2018} booktitle = {Proceedings of the Twenty-ninth Annual Symposium on Computational Geometry}, year = {2013}, pages = {107--116}, + doi = {10.1145/2462356.2462387} } @article{Carriere16, @@ -832,6 +835,7 @@ book{hatcher2002algebraic, number = {4}, year = {2010}, pages = {367-405}, + doi = {10.1007/s10208-010-9066-0}, ee = {http://dx.doi.org/10.1007/s10208-010-9066-0}, bibsource = {DBLP, http://dblp.uni-trier.de} } @@ -927,6 +931,7 @@ language={English} booktitle = {Symposium on Computational Geometry}, year = {2014}, pages = {345}, + doi = {10.1145/2582112.2582165}, ee = {http://doi.acm.org/10.1145/2582112.2582165}, bibsource = {DBLP, http://dblp.uni-trier.de} } @@ -1241,6 +1246,7 @@ year = "2011" title={Fr{\'e}chet means for distributions of persistence diagrams}, author={Turner, Katharine and Mileyko, Yuriy and Mukherjee, Sayan and Harer, John}, journal={Discrete \& Computational Geometry}, + doi={10.1007/s00454-014-9604-7}, volume={52}, number={1}, pages={44--70}, diff --git a/src/Persistent_cohomology/doc/Intro_persistent_cohomology.h b/src/Persistent_cohomology/doc/Intro_persistent_cohomology.h index 46b784d8..b4f9fd2c 100644 --- a/src/Persistent_cohomology/doc/Intro_persistent_cohomology.h +++ b/src/Persistent_cohomology/doc/Intro_persistent_cohomology.h @@ -21,7 +21,7 @@ namespace persistent_cohomology { \author Clément Maria Computation of persistent cohomology using the algorithm of - \cite DBLP:journals/dcg/SilvaMV11 and \cite DBLP:journals/corr/abs-1208-5018 + \cite DBLP:journals/dcg/SilvaMV11 and \cite DBLP:conf/compgeom/DeyFW14 and the Compressed Annotation Matrix implementation of \cite DBLP:conf/esa/BoissonnatDM13 diff --git a/src/common/doc/main_page.md b/src/common/doc/main_page.md index 6ea10b88..a33d98cd 100644 --- a/src/common/doc/main_page.md +++ b/src/common/doc/main_page.md @@ -312,7 +312,7 @@ theory is essentially composed of three elements: topological spaces, their homology groups and an evolution scheme. Computation of persistent cohomology using the algorithm of \cite DBLP:journals/dcg/SilvaMV11 and - \cite DBLP:journals/corr/abs-1208-5018 and the Compressed Annotation Matrix implementation of + \cite DBLP:conf/compgeom/DeyFW14 and the Compressed Annotation Matrix implementation of \cite DBLP:conf/esa/BoissonnatDM13 . diff --git a/src/python/doc/persistent_cohomology_sum.inc b/src/python/doc/persistent_cohomology_sum.inc index 0effb50f..a1ff2eee 100644 --- a/src/python/doc/persistent_cohomology_sum.inc +++ b/src/python/doc/persistent_cohomology_sum.inc @@ -12,7 +12,7 @@ | | | | | | Computation of persistent cohomology using the algorithm of | | | | :cite:`DBLP:journals/dcg/SilvaMV11` and | | - | | :cite:`DBLP:journals/corr/abs-1208-5018` and the Compressed | | + | | :cite:`DBLP:conf/compgeom/DeyFW14` and the Compressed | | | | Annotation Matrix implementation of | | | | :cite:`DBLP:conf/esa/BoissonnatDM13`. | | | | | | diff --git a/src/python/doc/persistent_cohomology_user.rst b/src/python/doc/persistent_cohomology_user.rst index 4d743aac..a3f294b2 100644 --- a/src/python/doc/persistent_cohomology_user.rst +++ b/src/python/doc/persistent_cohomology_user.rst @@ -21,7 +21,7 @@ Definition Computation of persistent cohomology using the algorithm of :cite:`DBLP:journals/dcg/SilvaMV11` and -:cite:`DBLP:journals/corr/abs-1208-5018` and the Compressed Annotation Matrix implementation of +:cite:`DBLP:conf/compgeom/DeyFW14` and the Compressed Annotation Matrix implementation of :cite:`DBLP:conf/esa/BoissonnatDM13`. The theory of homology consists in attaching to a topological space a sequence of (homology) groups, capturing global -- cgit v1.2.3 From d86676e247bfa6f29b625a9a5752bf2a2fab438f Mon Sep 17 00:00:00 2001 From: Marc Glisse Date: Mon, 11 May 2020 21:31:34 +0200 Subject: Normalize the density if asked --- src/python/gudhi/point_cloud/dtm.py | 38 +++++++++++++++++++++++++++++-------- 1 file changed, 30 insertions(+), 8 deletions(-) diff --git a/src/python/gudhi/point_cloud/dtm.py b/src/python/gudhi/point_cloud/dtm.py index ef6eef05..2f30908d 100644 --- a/src/python/gudhi/point_cloud/dtm.py +++ b/src/python/gudhi/point_cloud/dtm.py @@ -74,19 +74,27 @@ class DistanceToMeasure: class DTMDensity: """ Density estimator based on the distance to the empirical measure defined by a point set, as defined - in :cite:`dtmdensity`. Note that this implementation does not renormalize so the total measure is not 1, - see the reference for suitable normalization factors in the Euclidean case. + in :cite:`dtmdensity`. Note that this implementation only renormalizes when asked, and the renormalization + only works for a Euclidean metric, so in other cases the total measure may not be 1. """ - def __init__(self, k=None, weights=None, q=None, dim=None, **kwargs): + def __init__(self, k=None, weights=None, q=None, dim=None, normalize=False, n_samples=None, **kwargs): """ Args: k (int): number of neighbors (possibly including the point itself). - weights (numpy.array): weights of each of the k neighbors, optional. + weights (numpy.array): weights of each of the k neighbors, optional. They are supposed to sum to 1. q (float): order used to compute the distance to measure. Defaults to dim. dim (float): final exponent representing the dimension. Defaults to the dimension, and must be specified when the dimension cannot be read from the input (metric="neighbors" or metric="precomputed"). - kwargs: same parameters as :class:`~gudhi.point_cloud.knn.KNN`, except that metric="neighbors" means that + + .. note:: when the dimension is high, using it as an exponent can quickly lead to under- or overflows. + We recommend using a small fixed value instead in those cases, even if it won't have the same nice + theoretical properties as the dimension. + normalize (bool): normalize the density so it corresponds to a probability measure on ℝᵈ. + Only available for the Euclidean metric, defaults to False. + n_samples (int): number of sample points used for fitting. Only needed if `normalize` is True and + metric is "neighbors". + kwargs: same parameters as :class:`~gudhi.point_cloud.knn.KNearestNeighbors`, except that metric="neighbors" means that :func:`transform` expects an array with the distances to the k nearest neighbors. """ if weights is None: @@ -100,6 +108,8 @@ class DTMDensity: self.q = q self.dim = dim self.params = kwargs + self.normalize=normalize + self.n_samples=n_samples def fit_transform(self, X, y=None): return self.fit(X).transform(X) @@ -110,8 +120,10 @@ class DTMDensity: X (numpy.array): coordinates for mass points. """ if self.params.setdefault("metric", "euclidean") != "neighbors": - self.knn = KNN(self.k, return_index=False, return_distance=True, sort_results=False, **self.params) + self.knn = KNearestNeighbors(self.k, return_index=False, return_distance=True, sort_results=False, **self.params) self.knn.fit(X) + if self.params["metric"] != "precomputed": + self.n_samples = len(X) return self def transform(self, X): @@ -136,7 +148,17 @@ class DTMDensity: else: distances = self.knn.transform(X) distances = distances ** q - dtm = (distances * weights).sum(-1) - return dtm ** (-dim / q) + dtm = (distances * self.weights).sum(-1) + if self.normalize: + dtm /= (np.arange(1, self.k + 1) ** (q / dim) * self.weights).sum() + density = dtm ** (-dim / q) + if self.normalize: + import math + if self.params["metric"] == "precomputed": + self.n_samples = len(X[0]) + # Volume of d-ball + Vd = math.pi ** (dim / 2) / math.gamma(dim / 2 + 1) + density /= self.n_samples * Vd + return density # We compute too many powers, 1/p in knn then q in dtm, d/q in dtm then whatever in the caller. # Add option to skip the final root? -- cgit v1.2.3 From 8c9a1c674dcacc8b66e88897b6116561bb811ffa Mon Sep 17 00:00:00 2001 From: Marc Glisse Date: Mon, 11 May 2020 21:55:21 +0200 Subject: Handle k=1 in KNearestNeighbors with SciPy --- src/python/gudhi/point_cloud/knn.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/src/python/gudhi/point_cloud/knn.py b/src/python/gudhi/point_cloud/knn.py index 34e80b5d..65896847 100644 --- a/src/python/gudhi/point_cloud/knn.py +++ b/src/python/gudhi/point_cloud/knn.py @@ -302,6 +302,10 @@ class KNearestNeighbors: if self.params["implementation"] == "ckdtree": qargs = {key: val for key, val in self.params.items() if key in {"p", "eps", "n_jobs"}} distances, neighbors = self.kdtree.query(X, k=self.k, **qargs) + if k == 1: + # SciPy decided to squeeze the last dimension for k=1 + distances = distances[:, None] + neighbors = neighbors[:, None] if self.return_index: if self.return_distance: return neighbors, distances -- cgit v1.2.3 From 7bbbe63ffa2a812dc49c37c77b4f4a4be46b2a49 Mon Sep 17 00:00:00 2001 From: Marc Glisse Date: Mon, 11 May 2020 23:34:23 +0200 Subject: move note --- src/python/gudhi/point_cloud/dtm.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/src/python/gudhi/point_cloud/dtm.py b/src/python/gudhi/point_cloud/dtm.py index 2f30908d..f8cca2c1 100644 --- a/src/python/gudhi/point_cloud/dtm.py +++ b/src/python/gudhi/point_cloud/dtm.py @@ -76,6 +76,10 @@ class DTMDensity: Density estimator based on the distance to the empirical measure defined by a point set, as defined in :cite:`dtmdensity`. Note that this implementation only renormalizes when asked, and the renormalization only works for a Euclidean metric, so in other cases the total measure may not be 1. + + .. note:: When the dimension is high, using it as an exponent can quickly lead to under- or overflows. + We recommend using a small fixed value instead in those cases, even if it won't have the same nice + theoretical properties as the dimension. """ def __init__(self, k=None, weights=None, q=None, dim=None, normalize=False, n_samples=None, **kwargs): @@ -86,10 +90,6 @@ class DTMDensity: q (float): order used to compute the distance to measure. Defaults to dim. dim (float): final exponent representing the dimension. Defaults to the dimension, and must be specified when the dimension cannot be read from the input (metric="neighbors" or metric="precomputed"). - - .. note:: when the dimension is high, using it as an exponent can quickly lead to under- or overflows. - We recommend using a small fixed value instead in those cases, even if it won't have the same nice - theoretical properties as the dimension. normalize (bool): normalize the density so it corresponds to a probability measure on ℝᵈ. Only available for the Euclidean metric, defaults to False. n_samples (int): number of sample points used for fitting. Only needed if `normalize` is True and -- cgit v1.2.3 From 6c17494e02721ca826750155bac14c7f91a173fa Mon Sep 17 00:00:00 2001 From: yuichi-ike Date: Tue, 12 May 2020 09:37:32 +0900 Subject: reference and comments added --- biblio/bibliography.bib | 26 ++++++++++++++++++++++++++ src/python/CMakeLists.txt | 4 +++- src/python/doc/rips_complex_ref.rst | 4 +++- src/python/gudhi/weighted_rips_complex.py | 6 +++--- src/python/test/test_weighted_rips.py | 4 ++-- 5 files changed, 37 insertions(+), 7 deletions(-) diff --git a/biblio/bibliography.bib b/biblio/bibliography.bib index 99a15c5e..f405b9bb 100644 --- a/biblio/bibliography.bib +++ b/biblio/bibliography.bib @@ -1247,3 +1247,29 @@ year = "2011" year={2014}, publisher={Springer} } + +@inproceedings{dtmfiltrations, + author = {Hirokazu Anai and + Fr{\'{e}}d{\'{e}}ric Chazal and + Marc Glisse and + Yuichi Ike and + Hiroya Inakoshi and + Rapha{\"{e}}l Tinarrage and + Yuhei Umeda}, + editor = {Gill Barequet and + Yusu Wang}, + title = {DTM-Based Filtrations}, + booktitle = {35th International Symposium on Computational Geometry, SoCG 2019, + June 18-21, 2019, Portland, Oregon, {USA}}, + series = {LIPIcs}, + volume = {129}, + pages = {58:1--58:15}, + publisher = {Schloss Dagstuhl - Leibniz-Zentrum f{\"{u}}r Informatik}, + year = {2019}, + url = {https://doi.org/10.4230/LIPIcs.SoCG.2019.58}, + doi = {10.4230/LIPIcs.SoCG.2019.58}, + timestamp = {Tue, 11 Feb 2020 15:52:14 +0100}, + biburl = {https://dblp.org/rec/conf/compgeom/AnaiCGIITU19.bib}, + bibsource = {dblp computer science bibliography, https://dblp.org} +} + diff --git a/src/python/CMakeLists.txt b/src/python/CMakeLists.txt index adf4923b..0aa55467 100644 --- a/src/python/CMakeLists.txt +++ b/src/python/CMakeLists.txt @@ -487,7 +487,9 @@ if(PYTHONINTERP_FOUND) endif() # Weighted Rips - add_gudhi_py_test(test_weighted_rips) + if(SCIPY_FOUND) + add_gudhi_py_test(test_weighted_rips) + endif() # Set missing or not modules set(GUDHI_MODULES ${GUDHI_MODULES} "python" CACHE INTERNAL "GUDHI_MODULES") diff --git a/src/python/doc/rips_complex_ref.rst b/src/python/doc/rips_complex_ref.rst index 8fc7e1b0..3c25564a 100644 --- a/src/python/doc/rips_complex_ref.rst +++ b/src/python/doc/rips_complex_ref.rst @@ -25,7 +25,7 @@ Weighted Rips complex reference manual .. automethod:: gudhi.WeightedRipsComplex.__init__ Basic examples -------------- +-------------- The following example computes the weighted Rips filtration associated with a distance matrix and weights on vertices. @@ -60,6 +60,8 @@ Combining with DistanceToMeasure, one can compute the DTM-filtration of a point st = w_rips.create_simplex_tree(max_dimension=2) print(st.persistence()) +The output is: + .. testoutput:: [(0, (3.1622776601683795, inf)), (0, (3.1622776601683795, 5.39834563766817)), (0, (3.1622776601683795, 5.39834563766817))] diff --git a/src/python/gudhi/weighted_rips_complex.py b/src/python/gudhi/weighted_rips_complex.py index 7401c428..bccac1ff 100644 --- a/src/python/gudhi/weighted_rips_complex.py +++ b/src/python/gudhi/weighted_rips_complex.py @@ -12,9 +12,9 @@ from gudhi import SimplexTree class WeightedRipsComplex: """ Class to generate a weighted Rips complex from a distance matrix and weights on vertices, - in the way described in the paper 'DTM-based filtrations' https://arxiv.org/abs/1811.04757. - Remark that the filtration value of a vertex is twice of its weight for the consistency with - RipsComplex, which is different from the definition in the paper. + in the way described in :cite:`dtmfiltrations`. + Remark that all the filtration values of vertices are twice of the given weights for the consistency + with RipsComplex, which is different from the definition in the paper. """ def __init__(self, distance_matrix, diff --git a/src/python/test/test_weighted_rips.py b/src/python/test/test_weighted_rips.py index 59ec022a..7ef48333 100644 --- a/src/python/test/test_weighted_rips.py +++ b/src/python/test/test_weighted_rips.py @@ -35,8 +35,8 @@ def test_compatibility_with_rips(): ([0, 2], 1.0), ([1, 3], 1.0), ([2, 3], 1.0), - ([1, 2], 1.4142135623730951), - ([0, 3], 1.4142135623730951), + ([1, 2], sqrt(2)), + ([0, 3], sqrt(2)), ] def test_compatibility_with_filtered_rips(): -- cgit v1.2.3 From a9c1e13e7f994e5c8d9f1c3d0311a5815df1e67d Mon Sep 17 00:00:00 2001 From: yuichi-ike Date: Tue, 12 May 2020 11:10:16 +0900 Subject: document fixed --- src/python/doc/rips_complex_ref.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/python/doc/rips_complex_ref.rst b/src/python/doc/rips_complex_ref.rst index 3c25564a..8946d156 100644 --- a/src/python/doc/rips_complex_ref.rst +++ b/src/python/doc/rips_complex_ref.rst @@ -22,7 +22,7 @@ Weighted Rips complex reference manual :undoc-members: :show-inheritance: - .. automethod:: gudhi.WeightedRipsComplex.__init__ + .. automethod:: gudhi.weighted_rips_complex.WeightedRipsComplex.__init__ Basic examples -------------- -- cgit v1.2.3 From 23547c0cbbe9e42b4dfadec3a116751302fd19ab Mon Sep 17 00:00:00 2001 From: yuichi-ike Date: Tue, 12 May 2020 11:41:03 +0900 Subject: document fixed --- src/python/doc/rips_complex_ref.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/python/doc/rips_complex_ref.rst b/src/python/doc/rips_complex_ref.rst index 8946d156..1f73f95b 100644 --- a/src/python/doc/rips_complex_ref.rst +++ b/src/python/doc/rips_complex_ref.rst @@ -17,7 +17,7 @@ Rips complex reference manual Weighted Rips complex reference manual ====================================== -.. autoclass:: gudhi.WeightedRipsComplex +.. autoclass:: gudhi.weighted_rips_complex.WeightedRipsComplex :members: :undoc-members: :show-inheritance: -- cgit v1.2.3 From 2c4049895bb2844c2ad1b43b9df51ad5b259fc39 Mon Sep 17 00:00:00 2001 From: yuichi-ike Date: Tue, 12 May 2020 13:09:40 +0900 Subject: a test in a document fixed --- src/python/doc/rips_complex_ref.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/python/doc/rips_complex_ref.rst b/src/python/doc/rips_complex_ref.rst index 1f73f95b..a5b4ffed 100644 --- a/src/python/doc/rips_complex_ref.rst +++ b/src/python/doc/rips_complex_ref.rst @@ -36,7 +36,7 @@ The following example computes the weighted Rips filtration associated with a di weights = [1, 100] w_rips = WeightedRipsComplex(distance_matrix=dist, weights=weights) st = w_rips.create_simplex_tree(max_dimension=2) - print(st.get_filtration()) + print(list(st.get_filtration())) The output is: -- cgit v1.2.3 From c60caee5623d0b1ef55e7b2a5854604080419df1 Mon Sep 17 00:00:00 2001 From: yuichi-ike Date: Tue, 12 May 2020 15:06:55 +0900 Subject: comment modified --- src/python/gudhi/weighted_rips_complex.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/python/gudhi/weighted_rips_complex.py b/src/python/gudhi/weighted_rips_complex.py index bccac1ff..0541572b 100644 --- a/src/python/gudhi/weighted_rips_complex.py +++ b/src/python/gudhi/weighted_rips_complex.py @@ -13,8 +13,8 @@ class WeightedRipsComplex: """ Class to generate a weighted Rips complex from a distance matrix and weights on vertices, in the way described in :cite:`dtmfiltrations`. - Remark that all the filtration values of vertices are twice of the given weights for the consistency - with RipsComplex, which is different from the definition in the paper. + Remark that all the filtration values are doubled compared to the definition in the paper + for the consistency with RipsComplex. """ def __init__(self, distance_matrix, -- cgit v1.2.3 From c87a1f10e048477d210ae0abd657da87bba1102a Mon Sep 17 00:00:00 2001 From: Marc Glisse Date: Tue, 12 May 2020 20:36:38 +0200 Subject: test + reformat --- src/python/gudhi/point_cloud/dtm.py | 9 ++++++--- src/python/test/test_dtm.py | 11 ++++++++++- 2 files changed, 16 insertions(+), 4 deletions(-) diff --git a/src/python/gudhi/point_cloud/dtm.py b/src/python/gudhi/point_cloud/dtm.py index f8cca2c1..4454d8a2 100644 --- a/src/python/gudhi/point_cloud/dtm.py +++ b/src/python/gudhi/point_cloud/dtm.py @@ -108,8 +108,8 @@ class DTMDensity: self.q = q self.dim = dim self.params = kwargs - self.normalize=normalize - self.n_samples=n_samples + self.normalize = normalize + self.n_samples = n_samples def fit_transform(self, X, y=None): return self.fit(X).transform(X) @@ -120,7 +120,9 @@ class DTMDensity: X (numpy.array): coordinates for mass points. """ if self.params.setdefault("metric", "euclidean") != "neighbors": - self.knn = KNearestNeighbors(self.k, return_index=False, return_distance=True, sort_results=False, **self.params) + self.knn = KNearestNeighbors( + self.k, return_index=False, return_distance=True, sort_results=False, **self.params + ) self.knn.fit(X) if self.params["metric"] != "precomputed": self.n_samples = len(X) @@ -154,6 +156,7 @@ class DTMDensity: density = dtm ** (-dim / q) if self.normalize: import math + if self.params["metric"] == "precomputed": self.n_samples = len(X[0]) # Volume of d-ball diff --git a/src/python/test/test_dtm.py b/src/python/test/test_dtm.py index bff4c267..34d28d4d 100755 --- a/src/python/test/test_dtm.py +++ b/src/python/test/test_dtm.py @@ -8,10 +8,11 @@ - YYYY/MM Author: Description of the modification """ -from gudhi.point_cloud.dtm import DistanceToMeasure +from gudhi.point_cloud.dtm import DistanceToMeasure, DTMDensity import numpy import pytest import torch +import math def test_dtm_compare_euclidean(): @@ -66,3 +67,11 @@ def test_dtm_precomputed(): dtm = DistanceToMeasure(2, q=2, metric="neighbors") r = dtm.fit_transform(dist) assert r == pytest.approx([2.0, 0.707, 3.5355], rel=0.01) + + +def test_density_normalized(): + sample = numpy.random.normal(0, 1, (1000000, 2)) + queries = numpy.array([[0.0, 0.0], [-0.5, 0.7], [0.4, 1.7]]) + expected = numpy.exp(-(queries ** 2).sum(-1) / 2) / (2 * math.pi) + estimated = DTMDensity(k=150, normalize=True).fit(sample).transform(queries) + assert estimated == pytest.approx(expected, rel=0.4) -- cgit v1.2.3 From c5fca5477cc6fff77acedf7b5324eb5f8b417ed3 Mon Sep 17 00:00:00 2001 From: Marc Glisse Date: Tue, 12 May 2020 22:31:42 +0200 Subject: More test --- src/python/doc/point_cloud_sum.inc | 4 ++-- src/python/gudhi/point_cloud/dtm.py | 4 ++-- src/python/test/test_dtm.py | 7 +++++++ 3 files changed, 11 insertions(+), 4 deletions(-) diff --git a/src/python/doc/point_cloud_sum.inc b/src/python/doc/point_cloud_sum.inc index d4761aba..d28f387a 100644 --- a/src/python/doc/point_cloud_sum.inc +++ b/src/python/doc/point_cloud_sum.inc @@ -3,8 +3,8 @@ +----------------------------------------------------------------+------------------------------------------------------------------------+-----------------------------------------------------------------------------------------------------------------------------+ | | :math:`(x_1, x_2, \ldots, x_d)` | Utilities to process point clouds: read from file, subsample, | :Authors: Vincent Rouvreau, Marc Glisse, Masatoshi Takenouchi | - | | :math:`(y_1, y_2, \ldots, y_d)` | find neighbors, embed time series in higher dimension, etc. | | - | | | :Since: GUDHI 2.0.0 | + | | :math:`(y_1, y_2, \ldots, y_d)` | find neighbors, embed time series in higher dimension, estimate | | + | | a density, etc. | :Since: GUDHI 2.0.0 | | | | | | | | :License: MIT (`GPL v3 `_, BSD-3-Clause, Apache-2.0) | | | Parts of this package require CGAL. | | diff --git a/src/python/gudhi/point_cloud/dtm.py b/src/python/gudhi/point_cloud/dtm.py index 4454d8a2..88f197e7 100644 --- a/src/python/gudhi/point_cloud/dtm.py +++ b/src/python/gudhi/point_cloud/dtm.py @@ -89,7 +89,7 @@ class DTMDensity: weights (numpy.array): weights of each of the k neighbors, optional. They are supposed to sum to 1. q (float): order used to compute the distance to measure. Defaults to dim. dim (float): final exponent representing the dimension. Defaults to the dimension, and must be specified - when the dimension cannot be read from the input (metric="neighbors" or metric="precomputed"). + when the dimension cannot be read from the input (metric is "neighbors" or "precomputed"). normalize (bool): normalize the density so it corresponds to a probability measure on ℝᵈ. Only available for the Euclidean metric, defaults to False. n_samples (int): number of sample points used for fitting. Only needed if `normalize` is True and @@ -146,7 +146,7 @@ class DTMDensity: if q is None: q = dim if self.params["metric"] == "neighbors": - distances = X[:, : self.k] + distances = np.asarray(X)[:, : self.k] else: distances = self.knn.transform(X) distances = distances ** q diff --git a/src/python/test/test_dtm.py b/src/python/test/test_dtm.py index 34d28d4d..8ab0cc44 100755 --- a/src/python/test/test_dtm.py +++ b/src/python/test/test_dtm.py @@ -75,3 +75,10 @@ def test_density_normalized(): expected = numpy.exp(-(queries ** 2).sum(-1) / 2) / (2 * math.pi) estimated = DTMDensity(k=150, normalize=True).fit(sample).transform(queries) assert estimated == pytest.approx(expected, rel=0.4) + + +def test_density(): + distances = [[0, 1, 10], [2, 0, 30], [1, 3, 5]] + density = DTMDensity(k=2, metric="neighbors", dim=1).fit_transform(distances) + expected = numpy.array([2.0, 1.0, 0.5]) + assert density == pytest.approx(expected) -- cgit v1.2.3 From fd7112b7e665d495543d9647f675a14f75061bbf Mon Sep 17 00:00:00 2001 From: yuichi-ike Date: Wed, 13 May 2020 09:54:47 +0900 Subject: documents modified --- src/python/doc/rips_complex_ref.rst | 42 ------------------------------- src/python/doc/rips_complex_sum.inc | 3 +++ src/python/doc/rips_complex_user.rst | 48 ++++++++++++++++++++++++++++++++++++ 3 files changed, 51 insertions(+), 42 deletions(-) diff --git a/src/python/doc/rips_complex_ref.rst b/src/python/doc/rips_complex_ref.rst index a5b4ffed..9ae3c49c 100644 --- a/src/python/doc/rips_complex_ref.rst +++ b/src/python/doc/rips_complex_ref.rst @@ -23,45 +23,3 @@ Weighted Rips complex reference manual :show-inheritance: .. automethod:: gudhi.weighted_rips_complex.WeightedRipsComplex.__init__ - -Basic examples --------------- - -The following example computes the weighted Rips filtration associated with a distance matrix and weights on vertices. - -.. testcode:: - - from gudhi.weighted_rips_complex import WeightedRipsComplex - dist = [[], [1]] - weights = [1, 100] - w_rips = WeightedRipsComplex(distance_matrix=dist, weights=weights) - st = w_rips.create_simplex_tree(max_dimension=2) - print(list(st.get_filtration())) - -The output is: - -.. testoutput:: - - [([0], 2.0), ([1], 200.0), ([0, 1], 200.0)] - -Combining with DistanceToMeasure, one can compute the DTM-filtration of a point set, as in `this notebook `_. - -.. testcode:: - - import numpy as np - from scipy.spatial.distance import cdist - from gudhi.point_cloud.dtm import DistanceToMeasure - from gudhi.weighted_rips_complex import WeightedRipsComplex - pts = np.array([[2.0, 2.0], [0.0, 1.0], [3.0, 4.0]]) - dist = cdist(pts,pts) - dtm = DistanceToMeasure(2, q=2, metric="precomputed") - r = dtm.fit_transform(dist) - w_rips = WeightedRipsComplex(distance_matrix=dist, weights=r) - st = w_rips.create_simplex_tree(max_dimension=2) - print(st.persistence()) - -The output is: - -.. testoutput:: - - [(0, (3.1622776601683795, inf)), (0, (3.1622776601683795, 5.39834563766817)), (0, (3.1622776601683795, 5.39834563766817))] diff --git a/src/python/doc/rips_complex_sum.inc b/src/python/doc/rips_complex_sum.inc index 6feb74cd..f7580714 100644 --- a/src/python/doc/rips_complex_sum.inc +++ b/src/python/doc/rips_complex_sum.inc @@ -11,6 +11,9 @@ | | | | | | This complex can be built from a point cloud and a distance function, | | | | or from a distance matrix. | | + | | | | + | | Weighted Rips complex constructs a simplicial complex from a distance | | + | | matrix and weights on vertices. | | +----------------------------------------------------------------+------------------------------------------------------------------------+----------------------------------------------------------------------+ | * :doc:`rips_complex_user` | * :doc:`rips_complex_ref` | +----------------------------------------------------------------+-----------------------------------------------------------------------------------------------------------------------------------------------+ diff --git a/src/python/doc/rips_complex_user.rst b/src/python/doc/rips_complex_user.rst index 8efb12e6..adb002a8 100644 --- a/src/python/doc/rips_complex_user.rst +++ b/src/python/doc/rips_complex_user.rst @@ -347,3 +347,51 @@ until dimension 1 - one skeleton graph in other words), the output is: points in the persistence diagram will be under the diagonal, and bottleneck distance and persistence graphical tool will not work properly, this is a known issue. + +Weighted Rips Complex +--------------------- + +Example from a distance matrix and weights +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +The following example computes the weighted Rips filtration associated with a distance matrix and weights on vertices. + +.. testcode:: + + from gudhi.weighted_rips_complex import WeightedRipsComplex + dist = [[], [1]] + weights = [1, 100] + w_rips = WeightedRipsComplex(distance_matrix=dist, weights=weights) + st = w_rips.create_simplex_tree(max_dimension=2) + print(list(st.get_filtration())) + +The output is: + +.. testoutput:: + + [([0], 2.0), ([1], 200.0), ([0, 1], 200.0)] + +Example from a point cloud combined with DistanceToMeasure +---------------------------------------------------------- + +Combining with DistanceToMeasure, one can compute the DTM-filtration of a point set, as in `this notebook `_. + +.. testcode:: + + import numpy as np + from scipy.spatial.distance import cdist + from gudhi.point_cloud.dtm import DistanceToMeasure + from gudhi.weighted_rips_complex import WeightedRipsComplex + pts = np.array([[2.0, 2.0], [0.0, 1.0], [3.0, 4.0]]) + dist = cdist(pts,pts) + dtm = DistanceToMeasure(2, q=2, metric="precomputed") + r = dtm.fit_transform(dist) + w_rips = WeightedRipsComplex(distance_matrix=dist, weights=r) + st = w_rips.create_simplex_tree(max_dimension=2) + print(st.persistence()) + +The output is: + +.. testoutput:: + + [(0, (3.1622776601683795, inf)), (0, (3.1622776601683795, 5.39834563766817)), (0, (3.1622776601683795, 5.39834563766817))] -- cgit v1.2.3 From 7b4ffb762edae9036cbec12b34eeb64f2cffd0e7 Mon Sep 17 00:00:00 2001 From: Marc Glisse Date: Wed, 13 May 2020 18:12:58 +0200 Subject: Rephrase comment about cubes --- .../include/gudhi/Bitmap_cubical_complex_base.h | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/Bitmap_cubical_complex/include/gudhi/Bitmap_cubical_complex_base.h b/src/Bitmap_cubical_complex/include/gudhi/Bitmap_cubical_complex_base.h index e0c567ae..99487dc3 100644 --- a/src/Bitmap_cubical_complex/include/gudhi/Bitmap_cubical_complex_base.h +++ b/src/Bitmap_cubical_complex/include/gudhi/Bitmap_cubical_complex_base.h @@ -112,9 +112,9 @@ class Bitmap_cubical_complex_base { /** * This function finds a top-dimensional cell that is incident to the input cell and has * the same filtration value. In case several cells are suitable, an arbitrary one is - * returned. Note that the input parameter is not necessarily a cube, it might also - * be an edge or vertex of a cube. On the other hand, the output is always indicating the position of - * a cube in the data structure. + * returned. Note that the input parameter can be a cell of any dimension (vertex, edge, etc). + * On the other hand, the output is always indicating the position of + * a top-dimensional cube in the data structure. **/ inline size_t get_top_dimensional_coface_of_a_cell(size_t splx); -- cgit v1.2.3 From 5c3e042628b7db2b82d92f644f7ab0fc409a357b Mon Sep 17 00:00:00 2001 From: Marc Glisse Date: Wed, 13 May 2020 18:45:28 +0200 Subject: BOOST_UNREACHABLE_RETURN + comment --- .../include/gudhi/Bitmap_cubical_complex_base.h | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/src/Bitmap_cubical_complex/include/gudhi/Bitmap_cubical_complex_base.h b/src/Bitmap_cubical_complex/include/gudhi/Bitmap_cubical_complex_base.h index 99487dc3..5927bbec 100644 --- a/src/Bitmap_cubical_complex/include/gudhi/Bitmap_cubical_complex_base.h +++ b/src/Bitmap_cubical_complex/include/gudhi/Bitmap_cubical_complex_base.h @@ -13,6 +13,8 @@ #include +#include + #include #include #include @@ -115,6 +117,7 @@ class Bitmap_cubical_complex_base { * returned. Note that the input parameter can be a cell of any dimension (vertex, edge, etc). * On the other hand, the output is always indicating the position of * a top-dimensional cube in the data structure. + * \pre The filtration values are assigned as per `impose_lower_star_filtration()`. **/ inline size_t get_top_dimensional_coface_of_a_cell(size_t splx); @@ -621,7 +624,7 @@ size_t Bitmap_cubical_complex_base::get_top_dimensional_coface_of_a_cell(size } } } - return splx; + BOOST_UNREACHABLE_RETURN(-2); } template -- cgit v1.2.3 From b2118cde83056b43cea095f5208d37744c9f088f Mon Sep 17 00:00:00 2001 From: Marc Glisse Date: Wed, 13 May 2020 18:51:16 +0200 Subject: compute_persistence --- src/python/gudhi/cubical_complex.pyx | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/python/gudhi/cubical_complex.pyx b/src/python/gudhi/cubical_complex.pyx index b16a037f..9ebd0b30 100644 --- a/src/python/gudhi/cubical_complex.pyx +++ b/src/python/gudhi/cubical_complex.pyx @@ -200,7 +200,7 @@ cdef class CubicalComplex: integers of each row in each array correspond to: (index of positive top-dimensional cell). """ - assert self.pcohptr != NULL, "cofaces_of_persistence_pairs function requires persistence function to be launched first." + assert self.pcohptr != NULL, "compute_persistence() must be called before cofaces_of_persistence_pairs()" cdef vector[vector[int]] persistence_result output = [[],[]] -- cgit v1.2.3 From 7bbc1ae35d492123c517a54a9595188938e52dff Mon Sep 17 00:00:00 2001 From: Marc Glisse Date: Wed, 13 May 2020 19:32:21 +0200 Subject: More size_t --- .../include/Persistent_cohomology_interface.h | 28 +++++++++++----------- 1 file changed, 14 insertions(+), 14 deletions(-) diff --git a/src/python/include/Persistent_cohomology_interface.h b/src/python/include/Persistent_cohomology_interface.h index cec18546..e5a3dfba 100644 --- a/src/python/include/Persistent_cohomology_interface.h +++ b/src/python/include/Persistent_cohomology_interface.h @@ -13,6 +13,7 @@ #include +#include #include #include // for std::pair #include // for sort @@ -81,32 +82,31 @@ persistent_cohomology::Persistent_cohomology::get_persistent_pairs(); // Gather all top-dimensional cells and store their simplex handles - std::vector max_splx; for (auto splx : stptr_->top_dimensional_cells_range()){ - max_splx.push_back(splx); - } + std::vector max_splx; + for (auto splx : stptr_->top_dimensional_cells_range()) + max_splx.push_back(splx); // Sort these simplex handles and compute the ordering function - // This function allows to go directly from the simplex handle to the position of the corresponding top-dimensional cell in the input data - std::unordered_map order; - //std::sort(max_splx.begin(), max_splx.end()); + // This function allows to go directly from the simplex handle to the position of the corresponding top-dimensional cell in the input data + std::unordered_map order; + //std::sort(max_splx.begin(), max_splx.end()); for (unsigned int i = 0; i < max_splx.size(); i++) order.emplace(max_splx[i], i); std::vector> persistence_pairs; for (auto pair : pairs) { int h = stptr_->dimension(get<0>(pair)); // Recursively get the top-dimensional cell / coface associated to the persistence generator - int face0 = stptr_->get_top_dimensional_coface_of_a_cell(get<0>(pair)); + std::size_t face0 = stptr_->get_top_dimensional_coface_of_a_cell(get<0>(pair)); // Retrieve the index of the corresponding top-dimensional cell in the input data int splx0 = order[face0]; int splx1 = -1; - if (isfinite(stptr_->filtration(get<1>(pair)))){ - // Recursively get the top-dimensional cell / coface associated to the persistence generator - int face1 = stptr_->get_top_dimensional_coface_of_a_cell(get<1>(pair)); - // Retrieve the index of the corresponding top-dimensional cell in the input data - splx1 = order[face1]; + if (get<1>(pair) != stptr_->null_simplex()){ + // Recursively get the top-dimensional cell / coface associated to the persistence generator + std::size_t face1 = stptr_->get_top_dimensional_coface_of_a_cell(get<1>(pair)); + // Retrieve the index of the corresponding top-dimensional cell in the input data + splx1 = order[face1]; } - std::vector vect{ h, splx0, splx1}; - persistence_pairs.push_back(vect); + persistence_pairs.push_back({ h, splx0, splx1 }); } return persistence_pairs; } -- cgit v1.2.3 From b0ae08e93fdba8a1faec56c2230b6f542653c49e Mon Sep 17 00:00:00 2001 From: Marc Glisse Date: Wed, 13 May 2020 20:17:26 +0200 Subject: Trailing whitespace --- .../include/gudhi/Bitmap_cubical_complex_base.h | 8 ++--- src/python/gudhi/cubical_complex.pyx | 34 +++++++++++----------- src/python/gudhi/periodic_cubical_complex.pyx | 34 +++++++++++----------- 3 files changed, 38 insertions(+), 38 deletions(-) diff --git a/src/Bitmap_cubical_complex/include/gudhi/Bitmap_cubical_complex_base.h b/src/Bitmap_cubical_complex/include/gudhi/Bitmap_cubical_complex_base.h index 5927bbec..58d9208d 100644 --- a/src/Bitmap_cubical_complex/include/gudhi/Bitmap_cubical_complex_base.h +++ b/src/Bitmap_cubical_complex/include/gudhi/Bitmap_cubical_complex_base.h @@ -112,8 +112,8 @@ class Bitmap_cubical_complex_base { virtual inline std::vector get_coboundary_of_a_cell(std::size_t cell) const; /** - * This function finds a top-dimensional cell that is incident to the input cell and has - * the same filtration value. In case several cells are suitable, an arbitrary one is + * This function finds a top-dimensional cell that is incident to the input cell and has + * the same filtration value. In case several cells are suitable, an arbitrary one is * returned. Note that the input parameter can be a cell of any dimension (vertex, edge, etc). * On the other hand, the output is always indicating the position of * a top-dimensional cube in the data structure. @@ -617,12 +617,12 @@ void Bitmap_cubical_complex_base::setup_bitmap_based_on_top_dimensional_cells template size_t Bitmap_cubical_complex_base::get_top_dimensional_coface_of_a_cell(size_t splx) { if (this->get_dimension_of_a_cell(splx) == this->dimension()){return splx;} - else{ + else{ for (auto v : this->get_coboundary_of_a_cell(splx)){ if(this->get_cell_data(v) == this->get_cell_data(splx)){ return this->get_top_dimensional_coface_of_a_cell(v); } - } + } } BOOST_UNREACHABLE_RETURN(-2); } diff --git a/src/python/gudhi/cubical_complex.pyx b/src/python/gudhi/cubical_complex.pyx index 9ebd0b30..ca979eda 100644 --- a/src/python/gudhi/cubical_complex.pyx +++ b/src/python/gudhi/cubical_complex.pyx @@ -172,31 +172,31 @@ cdef class CubicalComplex: return self.pcohptr.get_persistence() def cofaces_of_persistence_pairs(self): - """A persistence interval is described by a pair of cells, one that creates the - feature and one that kills it. The filtration values of those 2 cells give coordinates - for a point in a persistence diagram, or a bar in a barcode. Structurally, in the - cubical complexes provided here, the filtration value of any cell is the minimum of the - filtration values of the maximal cells that contain it. Connecting persistence diagram - coordinates to the corresponding value in the input (i.e. the filtration values of + """A persistence interval is described by a pair of cells, one that creates the + feature and one that kills it. The filtration values of those 2 cells give coordinates + for a point in a persistence diagram, or a bar in a barcode. Structurally, in the + cubical complexes provided here, the filtration value of any cell is the minimum of the + filtration values of the maximal cells that contain it. Connecting persistence diagram + coordinates to the corresponding value in the input (i.e. the filtration values of the top-dimensional cells) is useful for differentiation purposes. - This function returns a list of pairs of top-dimensional cells corresponding to - the persistence birth and death cells of the filtration. The cells are represented by - their indices in the input list of top-dimensional cells (and not their indices in the - internal datastructure that includes non-maximal cells). Note that when two adjacent + This function returns a list of pairs of top-dimensional cells corresponding to + the persistence birth and death cells of the filtration. The cells are represented by + their indices in the input list of top-dimensional cells (and not their indices in the + internal datastructure that includes non-maximal cells). Note that when two adjacent top-dimensional cells have the same filtration value, we arbitrarily return one of the two when calling the function on one of their common faces. - :returns: The top-dimensional cells/cofaces of the positive and negative cells, + :returns: The top-dimensional cells/cofaces of the positive and negative cells, together with the corresponding homological dimension, in two lists of numpy arrays of integers. - The first list contains the regular persistence pairs, grouped by dimension. + The first list contains the regular persistence pairs, grouped by dimension. It contains numpy arrays of shape [number_of_persistence_points, 2]. - The indices of the arrays in the list correspond to the homological dimensions, and the - integers of each row in each array correspond to: (index of positive top-dimensional cell, - index of negative top-dimensional cell). - The second list contains the essential features, grouped by dimension. + The indices of the arrays in the list correspond to the homological dimensions, and the + integers of each row in each array correspond to: (index of positive top-dimensional cell, + index of negative top-dimensional cell). + The second list contains the essential features, grouped by dimension. It contains numpy arrays of shape [number_of_persistence_points, 1]. - The indices of the arrays in the list correspond to the homological dimensions, and the + The indices of the arrays in the list correspond to the homological dimensions, and the integers of each row in each array correspond to: (index of positive top-dimensional cell). """ diff --git a/src/python/gudhi/periodic_cubical_complex.pyx b/src/python/gudhi/periodic_cubical_complex.pyx index 3cf2ff01..06309772 100644 --- a/src/python/gudhi/periodic_cubical_complex.pyx +++ b/src/python/gudhi/periodic_cubical_complex.pyx @@ -177,31 +177,31 @@ cdef class PeriodicCubicalComplex: return self.pcohptr.get_persistence() def cofaces_of_persistence_pairs(self): - """A persistence interval is described by a pair of cells, one that creates the - feature and one that kills it. The filtration values of those 2 cells give coordinates - for a point in a persistence diagram, or a bar in a barcode. Structurally, in the - cubical complexes provided here, the filtration value of any cell is the minimum of the - filtration values of the maximal cells that contain it. Connecting persistence diagram - coordinates to the corresponding value in the input (i.e. the filtration values of + """A persistence interval is described by a pair of cells, one that creates the + feature and one that kills it. The filtration values of those 2 cells give coordinates + for a point in a persistence diagram, or a bar in a barcode. Structurally, in the + cubical complexes provided here, the filtration value of any cell is the minimum of the + filtration values of the maximal cells that contain it. Connecting persistence diagram + coordinates to the corresponding value in the input (i.e. the filtration values of the top-dimensional cells) is useful for differentiation purposes. - This function returns a list of pairs of top-dimensional cells corresponding to - the persistence birth and death cells of the filtration. The cells are represented by - their indices in the input list of top-dimensional cells (and not their indices in the - internal datastructure that includes non-maximal cells). Note that when two adjacent + This function returns a list of pairs of top-dimensional cells corresponding to + the persistence birth and death cells of the filtration. The cells are represented by + their indices in the input list of top-dimensional cells (and not their indices in the + internal datastructure that includes non-maximal cells). Note that when two adjacent top-dimensional cells have the same filtration value, we arbitrarily return one of the two when calling the function on one of their common faces. - :returns: The top-dimensional cells/cofaces of the positive and negative cells, + :returns: The top-dimensional cells/cofaces of the positive and negative cells, together with the corresponding homological dimension, in two lists of numpy arrays of integers. - The first list contains the regular persistence pairs, grouped by dimension. + The first list contains the regular persistence pairs, grouped by dimension. It contains numpy arrays of shape [number_of_persistence_points, 2]. - The indices of the arrays in the list correspond to the homological dimensions, and the - integers of each row in each array correspond to: (index of positive top-dimensional cell, - index of negative top-dimensional cell). - The second list contains the essential features, grouped by dimension. + The indices of the arrays in the list correspond to the homological dimensions, and the + integers of each row in each array correspond to: (index of positive top-dimensional cell, + index of negative top-dimensional cell). + The second list contains the essential features, grouped by dimension. It contains numpy arrays of shape [number_of_persistence_points, 1]. - The indices of the arrays in the list correspond to the homological dimensions, and the + The indices of the arrays in the list correspond to the homological dimensions, and the integers of each row in each array correspond to: (index of positive top-dimensional cell). """ cdef vector[vector[int]] persistence_result -- cgit v1.2.3 From 4d27d32308f94e63d76bbd5564b8837b94b24339 Mon Sep 17 00:00:00 2001 From: yuichi-ike Date: Thu, 14 May 2020 17:56:10 +0900 Subject: document modified --- src/python/doc/rips_complex_ref.rst | 2 ++ src/python/doc/rips_complex_user.rst | 5 ++++- 2 files changed, 6 insertions(+), 1 deletion(-) diff --git a/src/python/doc/rips_complex_ref.rst b/src/python/doc/rips_complex_ref.rst index 9ae3c49c..5f3e46c1 100644 --- a/src/python/doc/rips_complex_ref.rst +++ b/src/python/doc/rips_complex_ref.rst @@ -13,6 +13,8 @@ Rips complex reference manual .. automethod:: gudhi.RipsComplex.__init__ +.. _weighted-rips-complex-reference-manual: + ====================================== Weighted Rips complex reference manual ====================================== diff --git a/src/python/doc/rips_complex_user.rst b/src/python/doc/rips_complex_user.rst index adb002a8..819568be 100644 --- a/src/python/doc/rips_complex_user.rst +++ b/src/python/doc/rips_complex_user.rst @@ -351,6 +351,9 @@ until dimension 1 - one skeleton graph in other words), the output is: Weighted Rips Complex --------------------- +`WeightedRipsComplex `_ builds a simplicial complex from a distance matrix and weights on vertices. + + Example from a distance matrix and weights ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ @@ -372,7 +375,7 @@ The output is: [([0], 2.0), ([1], 200.0), ([0, 1], 200.0)] Example from a point cloud combined with DistanceToMeasure ----------------------------------------------------------- +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ Combining with DistanceToMeasure, one can compute the DTM-filtration of a point set, as in `this notebook `_. -- cgit v1.2.3 From a74503eca0f30a8183719008cd02b48823ba72d4 Mon Sep 17 00:00:00 2001 From: ROUVREAU Vincent Date: Sat, 16 May 2020 09:52:47 +0200 Subject: Release note for version 3.2.0 --- .github/next_release.md | 41 +++++++++++++++++++++++++++++++++++------ 1 file changed, 35 insertions(+), 6 deletions(-) diff --git a/.github/next_release.md b/.github/next_release.md index 83b98a1c..1112ef70 100644 --- a/.github/next_release.md +++ b/.github/next_release.md @@ -1,21 +1,50 @@ -We are pleased to announce the release 3.X.X of the GUDHI library. +We are pleased to announce the release 3.2.0 of the GUDHI library. As a major new feature, the GUDHI library now offers a Python interface to [Hera](https://bitbucket.org/grey_narn/hera/src/master/) to compute the Wasserstein distance. [PyBind11](https://github.com/pybind/pybind11) is now required to build the Python module. -We are now using GitHub to develop the GUDHI library, do not hesitate to [fork the GUDHI project on GitHub](https://github.com/GUDHI/gudhi-devel). From a user point of view, we recommend to download GUDHI user version (gudhi.3.X.X.tar.gz). +We are now using GitHub to develop the GUDHI library, do not hesitate to [fork the GUDHI project on GitHub](https://github.com/GUDHI/gudhi-devel). From a user point of view, we recommend to download GUDHI user version (gudhi.3.2.0.tar.gz). Below is a list of changes made since GUDHI 3.1.1: +- Point cloud utilities + - A new module [Time Delay Embedding](https://gudhi.inria.fr/python/latest/point_cloud.html#time-delay-embedding) + to embed time-series data in the R^d according to [Takens' Embedding Theorem](https://en.wikipedia.org/wiki/Takens%27s_theorem) + and obtain the coordinates of each point. + - A new module [K Nearest Neighbors](https://gudhi.inria.fr/python/latest/point_cloud.html#k-nearest-neighbors) + that wraps several implementations for computing the k nearest neighbors in a point set. + - A new module [Distance To Measure](https://gudhi.inria.fr/python/latest/point_cloud.html#distance-to-measure) + to compute the distance to the empirical measure defined by a point set + +- [Persistence representations](https://gudhi.inria.fr/python/latest/representations.html) + - Interface to Wasserstein distances. + +- Rips complex + - A new module [Weighted Rips Complex](https://gudhi.inria.fr/python/latest/rips_complex_user.html#weighted-rips-complex) + to construct a simplicial complex from a distance matrix and weights on vertices. + - [Wassertein distance](https://gudhi.inria.fr/python/latest/wasserstein_distance_user.html) - - An another implementation comes from Hera (BSD-3-Clause) which is based on [Geometry Helps to Compare Persistence Diagrams](http://doi.acm.org/10.1145/3064175) by Michael Kerber, Dmitriy Morozov, and Arnur Nigmetov. + - An [another implementation](https://gudhi.inria.fr/python/latest/wasserstein_distance_user.html#hera) + comes from Hera (BSD-3-Clause) which is based on [Geometry Helps to Compare Persistence Diagrams](http://doi.acm.org/10.1145/3064175) + by Michael Kerber, Dmitriy Morozov, and Arnur Nigmetov. - `gudhi.wasserstein.wasserstein_distance` has now an option to return the optimal matching that achieves the distance between the two diagrams. + - A new module [Barycenters](https://gudhi.inria.fr/python/latest/wasserstein_distance_user.html#barycenters) + to estimate the Frechet mean (aka Wasserstein barycenter) between persistence diagrams. + +- [Simplex tree](https://gudhi.inria.fr/python/latest/simplex_tree_ref.html) + - Extend filtration method to compute extended persistence + - Flag and lower star persistence pairs generators + - A new interface to filtration, simplices and skeleton getters to return an iterator + +- [Alpha complex](https://gudhi.inria.fr/doc/latest/group__alpha__complex.html) + - Improve computations (cache circumcenters computation and point comparison improvement) -- [Module](link) - - ... +- [Persistence graphical tools](https://gudhi.inria.fr/python/latest/persistence_graphical_tools_user.html) + - Use LaTeX style and grey block + - (N x 2) numpy arrays as input - Miscellaneous - - The [list of bugs that were solved since GUDHI-3.1.1](https://github.com/GUDHI/gudhi-devel/issues?q=label%3A3.2.0+is%3Aclosed) is available on GitHub. + - The [list of bugs that were solved since GUDHI-3.2.0](https://github.com/GUDHI/gudhi-devel/issues?q=label%3A3.2.0+is%3Aclosed) is available on GitHub. All modules are distributed under the terms of the MIT license. However, there are still GPL dependencies for many modules. We invite you to check our [license dedicated web page](https://gudhi.inria.fr/licensing/) for further details. -- cgit v1.2.3 From 8dfc31c57586b07524728c939593f216c5d640f5 Mon Sep 17 00:00:00 2001 From: Vincent Rouvreau <10407034+VincentRouvreau@users.noreply.github.com> Date: Sat, 16 May 2020 10:33:23 +0200 Subject: Add submodule init in the worflow --- .github/for_maintainers/new_gudhi_version_creation.md | 1 + 1 file changed, 1 insertion(+) diff --git a/.github/for_maintainers/new_gudhi_version_creation.md b/.github/for_maintainers/new_gudhi_version_creation.md index f176d392..8674222b 100644 --- a/.github/for_maintainers/new_gudhi_version_creation.md +++ b/.github/for_maintainers/new_gudhi_version_creation.md @@ -16,6 +16,7 @@ rm -rf data/points/COIL_database/lucky_cat.off_dist data/points/COIL_database/lu Checkin the modifications, build and test the version: ```bash +git submodule update --init mkdir build cd build cmake -DCGAL_DIR=/your/path/to/CGAL -DWITH_GUDHI_EXAMPLE=ON -DWITH_GUDHI_BENCHMARK=ON -DUSER_VERSION_DIR=gudhi.@GUDHI_VERSION@ -DPython_ADDITIONAL_VERSIONS=3 .. -- cgit v1.2.3 From 84b823b6436746a06cb8323fecd7b1f38d7ba244 Mon Sep 17 00:00:00 2001 From: Marc Glisse Date: Sat, 16 May 2020 13:30:19 +0200 Subject: Minimal nogil support for cubical complexes --- src/python/gudhi/cubical_complex.pyx | 29 +++++++++++++++------------ src/python/gudhi/periodic_cubical_complex.pyx | 29 +++++++++++++++------------ 2 files changed, 32 insertions(+), 26 deletions(-) diff --git a/src/python/gudhi/cubical_complex.pyx b/src/python/gudhi/cubical_complex.pyx index ca979eda..308b5099 100644 --- a/src/python/gudhi/cubical_complex.pyx +++ b/src/python/gudhi/cubical_complex.pyx @@ -27,20 +27,20 @@ __license__ = "MIT" cdef extern from "Cubical_complex_interface.h" namespace "Gudhi": cdef cppclass Bitmap_cubical_complex_base_interface "Gudhi::Cubical_complex::Cubical_complex_interface<>": - Bitmap_cubical_complex_base_interface(vector[unsigned] dimensions, vector[double] top_dimensional_cells) - Bitmap_cubical_complex_base_interface(string perseus_file) - int num_simplices() - int dimension() + Bitmap_cubical_complex_base_interface(vector[unsigned] dimensions, vector[double] top_dimensional_cells) nogil + Bitmap_cubical_complex_base_interface(string perseus_file) nogil + int num_simplices() nogil + int dimension() nogil cdef extern from "Persistent_cohomology_interface.h" namespace "Gudhi": cdef cppclass Cubical_complex_persistence_interface "Gudhi::Persistent_cohomology_interface>": - Cubical_complex_persistence_interface(Bitmap_cubical_complex_base_interface * st, bool persistence_dim_max) - void compute_persistence(int homology_coeff_field, double min_persistence) - vector[pair[int, pair[double, double]]] get_persistence() - vector[vector[int]] cofaces_of_cubical_persistence_pairs() - vector[int] betti_numbers() - vector[int] persistent_betti_numbers(double from_value, double to_value) - vector[pair[double,double]] intervals_in_dimension(int dimension) + Cubical_complex_persistence_interface(Bitmap_cubical_complex_base_interface * st, bool persistence_dim_max) nogil + void compute_persistence(int homology_coeff_field, double min_persistence) nogil + vector[pair[int, pair[double, double]]] get_persistence() nogil + vector[vector[int]] cofaces_of_cubical_persistence_pairs() nogil + vector[int] betti_numbers() nogil + vector[int] persistent_betti_numbers(double from_value, double to_value) nogil + vector[pair[double,double]] intervals_in_dimension(int dimension) nogil # CubicalComplex python interface cdef class CubicalComplex: @@ -151,8 +151,11 @@ cdef class CubicalComplex: if self.pcohptr != NULL: del self.pcohptr assert self.__is_defined() - self.pcohptr = new Cubical_complex_persistence_interface(self.thisptr, True) - self.pcohptr.compute_persistence(homology_coeff_field, min_persistence) + cdef int field = homology_coeff_field + cdef double minp = min_persistence + with nogil: + self.pcohptr = new Cubical_complex_persistence_interface(self.thisptr, 1) + self.pcohptr.compute_persistence(field, minp) def persistence(self, homology_coeff_field=11, min_persistence=0): """This function computes and returns the persistence of the complex. diff --git a/src/python/gudhi/periodic_cubical_complex.pyx b/src/python/gudhi/periodic_cubical_complex.pyx index 06309772..dcca7b63 100644 --- a/src/python/gudhi/periodic_cubical_complex.pyx +++ b/src/python/gudhi/periodic_cubical_complex.pyx @@ -24,20 +24,20 @@ __license__ = "MIT" cdef extern from "Cubical_complex_interface.h" namespace "Gudhi": cdef cppclass Periodic_cubical_complex_base_interface "Gudhi::Cubical_complex::Cubical_complex_interface>": - Periodic_cubical_complex_base_interface(vector[unsigned] dimensions, vector[double] top_dimensional_cells, vector[bool] periodic_dimensions) - Periodic_cubical_complex_base_interface(string perseus_file) - int num_simplices() - int dimension() + Periodic_cubical_complex_base_interface(vector[unsigned] dimensions, vector[double] top_dimensional_cells, vector[bool] periodic_dimensions) nogil + Periodic_cubical_complex_base_interface(string perseus_file) nogil + int num_simplices() nogil + int dimension() nogil cdef extern from "Persistent_cohomology_interface.h" namespace "Gudhi": cdef cppclass Periodic_cubical_complex_persistence_interface "Gudhi::Persistent_cohomology_interface>>": - Periodic_cubical_complex_persistence_interface(Periodic_cubical_complex_base_interface * st, bool persistence_dim_max) - void compute_persistence(int homology_coeff_field, double min_persistence) - vector[pair[int, pair[double, double]]] get_persistence() - vector[vector[int]] cofaces_of_cubical_persistence_pairs() - vector[int] betti_numbers() - vector[int] persistent_betti_numbers(double from_value, double to_value) - vector[pair[double,double]] intervals_in_dimension(int dimension) + Periodic_cubical_complex_persistence_interface(Periodic_cubical_complex_base_interface * st, bool persistence_dim_max) nogil + void compute_persistence(int homology_coeff_field, double min_persistence) nogil + vector[pair[int, pair[double, double]]] get_persistence() nogil + vector[vector[int]] cofaces_of_cubical_persistence_pairs() nogil + vector[int] betti_numbers() nogil + vector[int] persistent_betti_numbers(double from_value, double to_value) nogil + vector[pair[double,double]] intervals_in_dimension(int dimension) nogil # PeriodicCubicalComplex python interface cdef class PeriodicCubicalComplex: @@ -156,8 +156,11 @@ cdef class PeriodicCubicalComplex: if self.pcohptr != NULL: del self.pcohptr assert self.__is_defined() - self.pcohptr = new Periodic_cubical_complex_persistence_interface(self.thisptr, True) - self.pcohptr.compute_persistence(homology_coeff_field, min_persistence) + cdef int field = homology_coeff_field + cdef double minp = min_persistence + with nogil: + self.pcohptr = new Periodic_cubical_complex_persistence_interface(self.thisptr, 1) + self.pcohptr.compute_persistence(field, minp) def persistence(self, homology_coeff_field=11, min_persistence=0): """This function computes and returns the persistence of the complex. -- cgit v1.2.3 From 207050fb1f5af375a98c70dbd5fc22149d6f6e22 Mon Sep 17 00:00:00 2001 From: Marc Glisse Date: Sat, 16 May 2020 14:08:23 +0200 Subject: nogil for cubical constructor There may be some extra copying until cython3, but it is probably not that bad. --- src/python/gudhi/cubical_complex.pyx | 14 +++++++++++--- src/python/gudhi/periodic_cubical_complex.pyx | 18 +++++++++++------- 2 files changed, 22 insertions(+), 10 deletions(-) diff --git a/src/python/gudhi/cubical_complex.pyx b/src/python/gudhi/cubical_complex.pyx index 308b5099..0068f2ff 100644 --- a/src/python/gudhi/cubical_complex.pyx +++ b/src/python/gudhi/cubical_complex.pyx @@ -80,7 +80,7 @@ cdef class CubicalComplex: perseus_file=''): if ((dimensions is not None) and (top_dimensional_cells is not None) and (perseus_file == '')): - self.thisptr = new Bitmap_cubical_complex_base_interface(dimensions, top_dimensional_cells) + self._construct_from_cells(dimensions, top_dimensional_cells) elif ((dimensions is None) and (top_dimensional_cells is not None) and (perseus_file == '')): top_dimensional_cells = np.array(top_dimensional_cells, @@ -88,11 +88,11 @@ cdef class CubicalComplex: order = 'F') dimensions = top_dimensional_cells.shape top_dimensional_cells = top_dimensional_cells.ravel(order='F') - self.thisptr = new Bitmap_cubical_complex_base_interface(dimensions, top_dimensional_cells) + self._construct_from_cells(dimensions, top_dimensional_cells) elif ((dimensions is None) and (top_dimensional_cells is None) and (perseus_file != '')): if os.path.isfile(perseus_file): - self.thisptr = new Bitmap_cubical_complex_base_interface(perseus_file.encode('utf-8')) + self._construct_from_file(perseus_file.encode('utf-8')) else: raise FileNotFoundError(errno.ENOENT, os.strerror(errno.ENOENT), perseus_file) @@ -101,6 +101,14 @@ cdef class CubicalComplex: "top_dimensional_cells or from a Perseus-style file name.", file=sys.stderr) + def _construct_from_cells(self, vector[unsigned] dimensions, vector[double] top_dimensional_cells): + with nogil: + self.thisptr = new Bitmap_cubical_complex_base_interface(dimensions, top_dimensional_cells) + + def _construct_from_file(self, string filename): + with nogil: + self.thisptr = new Bitmap_cubical_complex_base_interface(filename) + def __dealloc__(self): if self.thisptr != NULL: del self.thisptr diff --git a/src/python/gudhi/periodic_cubical_complex.pyx b/src/python/gudhi/periodic_cubical_complex.pyx index dcca7b63..11e1766c 100644 --- a/src/python/gudhi/periodic_cubical_complex.pyx +++ b/src/python/gudhi/periodic_cubical_complex.pyx @@ -81,9 +81,7 @@ cdef class PeriodicCubicalComplex: periodic_dimensions=None, perseus_file=''): if ((dimensions is not None) and (top_dimensional_cells is not None) and (periodic_dimensions is not None) and (perseus_file == '')): - self.thisptr = new Periodic_cubical_complex_base_interface(dimensions, - top_dimensional_cells, - periodic_dimensions) + self._construct_from_cells(dimensions, top_dimensional_cells, periodic_dimensions) elif ((dimensions is None) and (top_dimensional_cells is not None) and (periodic_dimensions is not None) and (perseus_file == '')): top_dimensional_cells = np.array(top_dimensional_cells, @@ -91,13 +89,11 @@ cdef class PeriodicCubicalComplex: order = 'F') dimensions = top_dimensional_cells.shape top_dimensional_cells = top_dimensional_cells.ravel(order='F') - self.thisptr = new Periodic_cubical_complex_base_interface(dimensions, - top_dimensional_cells, - periodic_dimensions) + self._construct_from_cells(dimensions, top_dimensional_cells, periodic_dimensions) elif ((dimensions is None) and (top_dimensional_cells is None) and (periodic_dimensions is None) and (perseus_file != '')): if os.path.isfile(perseus_file): - self.thisptr = new Periodic_cubical_complex_base_interface(perseus_file.encode('utf-8')) + self._construct_from_file(perseus_file.encode('utf-8')) else: print("file " + perseus_file + " not found.", file=sys.stderr) else: @@ -106,6 +102,14 @@ cdef class PeriodicCubicalComplex: "top_dimensional_cells and periodic_dimensions or from " "a Perseus-style file name.", file=sys.stderr) + def _construct_from_cells(self, vector[unsigned] dimensions, vector[double] top_dimensional_cells, vector[bool] periodic_dimensions): + with nogil: + self.thisptr = new Periodic_cubical_complex_base_interface(dimensions, top_dimensional_cells, periodic_dimensions) + + def _construct_from_file(self, string filename): + with nogil: + self.thisptr = new Periodic_cubical_complex_base_interface(filename) + def __dealloc__(self): if self.thisptr != NULL: del self.thisptr -- cgit v1.2.3 From c156309dfd00c6180f2fd2dc03be159fd21c2626 Mon Sep 17 00:00:00 2001 From: Marc Glisse Date: Sun, 17 May 2020 23:32:21 +0200 Subject: One more nogil in cubical --- src/python/gudhi/cubical_complex.pyx | 3 ++- src/python/gudhi/periodic_cubical_complex.pyx | 3 ++- 2 files changed, 4 insertions(+), 2 deletions(-) diff --git a/src/python/gudhi/cubical_complex.pyx b/src/python/gudhi/cubical_complex.pyx index 0068f2ff..3ace2517 100644 --- a/src/python/gudhi/cubical_complex.pyx +++ b/src/python/gudhi/cubical_complex.pyx @@ -215,7 +215,8 @@ cdef class CubicalComplex: cdef vector[vector[int]] persistence_result output = [[],[]] - persistence_result = self.pcohptr.cofaces_of_cubical_persistence_pairs() + with nogil: + persistence_result = self.pcohptr.cofaces_of_cubical_persistence_pairs() pr = np.array(persistence_result) ess_ind = np.argwhere(pr[:,2] == -1)[:,0] diff --git a/src/python/gudhi/periodic_cubical_complex.pyx b/src/python/gudhi/periodic_cubical_complex.pyx index 11e1766c..bed55101 100644 --- a/src/python/gudhi/periodic_cubical_complex.pyx +++ b/src/python/gudhi/periodic_cubical_complex.pyx @@ -214,7 +214,8 @@ cdef class PeriodicCubicalComplex: cdef vector[vector[int]] persistence_result if self.pcohptr != NULL: output = [[],[]] - persistence_result = self.pcohptr.cofaces_of_cubical_persistence_pairs() + with nogil: + persistence_result = self.pcohptr.cofaces_of_cubical_persistence_pairs() pr = np.array(persistence_result) ess_ind = np.argwhere(pr[:,2] == -1)[:,0] -- cgit v1.2.3 From beadbbbefa1f8f30233a534b6c9cdf11ffb65f93 Mon Sep 17 00:00:00 2001 From: ROUVREAU Vincent Date: Mon, 18 May 2020 07:40:59 +0200 Subject: When Reviewing dependencies, I missed this one --- src/python/doc/wasserstein_distance_sum.inc | 20 +++++++++----------- 1 file changed, 9 insertions(+), 11 deletions(-) diff --git a/src/python/doc/wasserstein_distance_sum.inc b/src/python/doc/wasserstein_distance_sum.inc index f9308e5e..c41de017 100644 --- a/src/python/doc/wasserstein_distance_sum.inc +++ b/src/python/doc/wasserstein_distance_sum.inc @@ -1,14 +1,12 @@ .. table:: :widths: 30 40 30 - +-----------------------------------------------------------------+----------------------------------------------------------------------+------------------------------------------------------------------+ - | .. figure:: | The q-Wasserstein distance measures the similarity between two | :Author: Theo Lacombe | - | ../../doc/Bottleneck_distance/perturb_pd.png | persistence diagrams using the sum of all edges lengths (instead of | | - | :figclass: align-center | the maximum). It allows to define sophisticated objects such as | :Since: GUDHI 3.1.0 | - | | barycenters of a family of persistence diagrams. | | - | | | :License: MIT | - | | | | - | | | :Requires: Python Optimal Transport (POT) :math:`\geq` 0.5.1 | - +-----------------------------------------------------------------+----------------------------------------------------------------------+------------------------------------------------------------------+ - | * :doc:`wasserstein_distance_user` | | - +-----------------------------------------------------------------+-----------------------------------------------------------------------------------------------------------------------------------------+ + +-----------------------------------------------------------------+----------------------------------------------------------------------+-----------------------------------------+ + | .. figure:: | The q-Wasserstein distance measures the similarity between two | :Author: Theo Lacombe, Marc Glisse | + | ../../doc/Bottleneck_distance/perturb_pd.png | persistence diagrams using the sum of all edges lengths (instead of | | + | :figclass: align-center | the maximum). It allows to define sophisticated objects such as | :Since: GUDHI 3.1.0 | + | | barycenters of a family of persistence diagrams. | | + | | | :License: MIT, BSD-3-Clause | + +-----------------------------------------------------------------+----------------------------------------------------------------------+-----------------------------------------+ + | * :doc:`wasserstein_distance_user` | | + +-----------------------------------------------------------------+----------------------------------------------------------------------------------------------------------------+ -- cgit v1.2.3 From 2e8a4a71a22350b9301cc6052165d97357f12f83 Mon Sep 17 00:00:00 2001 From: ROUVREAU Vincent Date: Mon, 18 May 2020 08:54:20 +0200 Subject: Update gudhi version --- CMakeGUDHIVersion.txt | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/CMakeGUDHIVersion.txt b/CMakeGUDHIVersion.txt index 0f827b9e..cf9fd4f7 100644 --- a/CMakeGUDHIVersion.txt +++ b/CMakeGUDHIVersion.txt @@ -1,6 +1,6 @@ set (GUDHI_MAJOR_VERSION 3) -set (GUDHI_MINOR_VERSION 1) -set (GUDHI_PATCH_VERSION 1) +set (GUDHI_MINOR_VERSION 2) +set (GUDHI_PATCH_VERSION 0.rc2) set(GUDHI_VERSION ${GUDHI_MAJOR_VERSION}.${GUDHI_MINOR_VERSION}.${GUDHI_PATCH_VERSION}) message(STATUS "GUDHI version : ${GUDHI_VERSION}") -- cgit v1.2.3 From 3f14070864e4556bb137ee16d80496185435b469 Mon Sep 17 00:00:00 2001 From: Vincent Rouvreau <10407034+VincentRouvreau@users.noreply.github.com> Date: Mon, 18 May 2020 09:27:15 +0200 Subject: Update .github/next_release.md MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Co-authored-by: Théo Lacombe --- .github/next_release.md | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/.github/next_release.md b/.github/next_release.md index 1112ef70..cd9488c6 100644 --- a/.github/next_release.md +++ b/.github/next_release.md @@ -41,7 +41,7 @@ Below is a list of changes made since GUDHI 3.1.1: - [Persistence graphical tools](https://gudhi.inria.fr/python/latest/persistence_graphical_tools_user.html) - Use LaTeX style and grey block - - (N x 2) numpy arrays as input + - Can now handle (N x 2) numpy arrays as input - Miscellaneous - The [list of bugs that were solved since GUDHI-3.2.0](https://github.com/GUDHI/gudhi-devel/issues?q=label%3A3.2.0+is%3Aclosed) is available on GitHub. @@ -56,4 +56,3 @@ We provide [bibtex entries](https://gudhi.inria.fr/doc/latest/_citation.html) fo Feel free to [contact us](https://gudhi.inria.fr/contact/) in case you have any questions or remarks. For further information about downloading and installing the library ([C++](https://gudhi.inria.fr/doc/latest/installation.html) or [Python](https://gudhi.inria.fr/python/latest/installation.html)), please visit the [GUDHI web site](https://gudhi.inria.fr/). - -- cgit v1.2.3 From 3e52b65a55b615929556597acc963246f76475ff Mon Sep 17 00:00:00 2001 From: Vincent Rouvreau <10407034+VincentRouvreau@users.noreply.github.com> Date: Mon, 18 May 2020 09:27:25 +0200 Subject: Update .github/next_release.md MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Co-authored-by: Théo Lacombe --- .github/next_release.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/next_release.md b/.github/next_release.md index cd9488c6..d3c9ce68 100644 --- a/.github/next_release.md +++ b/.github/next_release.md @@ -40,7 +40,7 @@ Below is a list of changes made since GUDHI 3.1.1: - Improve computations (cache circumcenters computation and point comparison improvement) - [Persistence graphical tools](https://gudhi.inria.fr/python/latest/persistence_graphical_tools_user.html) - - Use LaTeX style and grey block + - New rendering option proposed (use LaTeX style, add grey block, improved positioning of labels, etc.). - Can now handle (N x 2) numpy arrays as input - Miscellaneous -- cgit v1.2.3 From 2287b727126ffb9fc47869ac9ed6b6bd61c6605a Mon Sep 17 00:00:00 2001 From: Marc Glisse Date: Mon, 18 May 2020 23:54:02 +0200 Subject: Infer k when we pass the distances to the nearest neighbors --- src/python/gudhi/point_cloud/dtm.py | 23 +++++++++++++++++------ src/python/test/test_dtm.py | 4 ++++ 2 files changed, 21 insertions(+), 6 deletions(-) diff --git a/src/python/gudhi/point_cloud/dtm.py b/src/python/gudhi/point_cloud/dtm.py index 88f197e7..d836c28d 100644 --- a/src/python/gudhi/point_cloud/dtm.py +++ b/src/python/gudhi/point_cloud/dtm.py @@ -85,7 +85,8 @@ class DTMDensity: def __init__(self, k=None, weights=None, q=None, dim=None, normalize=False, n_samples=None, **kwargs): """ Args: - k (int): number of neighbors (possibly including the point itself). + k (int): number of neighbors (possibly including the point itself). Optional if it can be guessed + from weights or metric="neighbors". weights (numpy.array): weights of each of the k neighbors, optional. They are supposed to sum to 1. q (float): order used to compute the distance to measure. Defaults to dim. dim (float): final exponent representing the dimension. Defaults to the dimension, and must be specified @@ -98,9 +99,12 @@ class DTMDensity: :func:`transform` expects an array with the distances to the k nearest neighbors. """ if weights is None: - assert k is not None, "Must specify k or weights" self.k = k - self.weights = np.full(k, 1.0 / k) + if k is None: + assert kwargs.get("metric") == "neighbors", 'Must specify k or weights, unless metric is "neighbors"' + self.weights = None + else: + self.weights = np.full(k, 1.0 / k) else: self.weights = weights self.k = len(weights) @@ -145,14 +149,21 @@ class DTMDensity: dim = len(X[0]) if q is None: q = dim + k = self.k + weights = self.weights if self.params["metric"] == "neighbors": - distances = np.asarray(X)[:, : self.k] + distances = np.asarray(X) + if weights is None: + k = distances.shape[1] + weights = np.full(k, 1.0 / k) + else: + distances = distances[:, :k] else: distances = self.knn.transform(X) distances = distances ** q - dtm = (distances * self.weights).sum(-1) + dtm = (distances * weights).sum(-1) if self.normalize: - dtm /= (np.arange(1, self.k + 1) ** (q / dim) * self.weights).sum() + dtm /= (np.arange(1, k + 1) ** (q / dim) * weights).sum() density = dtm ** (-dim / q) if self.normalize: import math diff --git a/src/python/test/test_dtm.py b/src/python/test/test_dtm.py index 8ab0cc44..8d400c7e 100755 --- a/src/python/test/test_dtm.py +++ b/src/python/test/test_dtm.py @@ -82,3 +82,7 @@ def test_density(): density = DTMDensity(k=2, metric="neighbors", dim=1).fit_transform(distances) expected = numpy.array([2.0, 1.0, 0.5]) assert density == pytest.approx(expected) + distances = [[0, 1], [2, 0], [1, 3]] + density = DTMDensity(metric="neighbors", dim=1).fit_transform(distances) + expected = numpy.array([2.0, 1.0, 0.5]) + assert density == pytest.approx(expected) -- cgit v1.2.3 From 35d5ac4e6bb79ec41b35c0df611207b9cd578f49 Mon Sep 17 00:00:00 2001 From: Marc Glisse Date: Tue, 19 May 2020 18:10:16 +0200 Subject: Test with explicit weights and remove duplicated assignment --- src/python/test/test_dtm.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/python/test/test_dtm.py b/src/python/test/test_dtm.py index 8d400c7e..0a52279e 100755 --- a/src/python/test/test_dtm.py +++ b/src/python/test/test_dtm.py @@ -84,5 +84,6 @@ def test_density(): assert density == pytest.approx(expected) distances = [[0, 1], [2, 0], [1, 3]] density = DTMDensity(metric="neighbors", dim=1).fit_transform(distances) - expected = numpy.array([2.0, 1.0, 0.5]) + assert density == pytest.approx(expected) + density = DTMDensity(weights=[0.5, 0.5], metric="neighbors", dim=1).fit_transform(distances) assert density == pytest.approx(expected) -- cgit v1.2.3 From bb9b6b2a58d3b31a0e25d473339f2bde6430a52d Mon Sep 17 00:00:00 2001 From: Marc Glisse Date: Tue, 19 May 2020 20:16:32 +0200 Subject: long line --- src/python/gudhi/point_cloud/dtm.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/src/python/gudhi/point_cloud/dtm.py b/src/python/gudhi/point_cloud/dtm.py index d836c28d..55ac58e6 100644 --- a/src/python/gudhi/point_cloud/dtm.py +++ b/src/python/gudhi/point_cloud/dtm.py @@ -95,8 +95,9 @@ class DTMDensity: Only available for the Euclidean metric, defaults to False. n_samples (int): number of sample points used for fitting. Only needed if `normalize` is True and metric is "neighbors". - kwargs: same parameters as :class:`~gudhi.point_cloud.knn.KNearestNeighbors`, except that metric="neighbors" means that - :func:`transform` expects an array with the distances to the k nearest neighbors. + kwargs: same parameters as :class:`~gudhi.point_cloud.knn.KNearestNeighbors`, except that + metric="neighbors" means that :func:`transform` expects an array with the distances to + the k nearest neighbors. """ if weights is None: self.k = k -- cgit v1.2.3 From 97e889f34e929f3c2306803b6c37b57926bd1245 Mon Sep 17 00:00:00 2001 From: ROUVREAU Vincent Date: Wed, 20 May 2020 07:32:26 +0200 Subject: 3.2.0 version --- CMakeGUDHIVersion.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/CMakeGUDHIVersion.txt b/CMakeGUDHIVersion.txt index cf9fd4f7..ac89fa4d 100644 --- a/CMakeGUDHIVersion.txt +++ b/CMakeGUDHIVersion.txt @@ -1,6 +1,6 @@ set (GUDHI_MAJOR_VERSION 3) set (GUDHI_MINOR_VERSION 2) -set (GUDHI_PATCH_VERSION 0.rc2) +set (GUDHI_PATCH_VERSION 0) set(GUDHI_VERSION ${GUDHI_MAJOR_VERSION}.${GUDHI_MINOR_VERSION}.${GUDHI_PATCH_VERSION}) message(STATUS "GUDHI version : ${GUDHI_VERSION}") -- cgit v1.2.3 From 0583c72cd729fb2d4a3e704949051e98b24726b3 Mon Sep 17 00:00:00 2001 From: MathieuCarriere Date: Wed, 20 May 2020 14:13:49 -0400 Subject: fix bug --- src/python/gudhi/cubical_complex.pyx | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/python/gudhi/cubical_complex.pyx b/src/python/gudhi/cubical_complex.pyx index ca979eda..ff0a9ed8 100644 --- a/src/python/gudhi/cubical_complex.pyx +++ b/src/python/gudhi/cubical_complex.pyx @@ -209,14 +209,14 @@ cdef class CubicalComplex: ess_ind = np.argwhere(pr[:,2] == -1)[:,0] ess = pr[ess_ind] - max_h = max(ess[:,0])+1 + max_h = max(ess[:,0])+1 if len(ess) > 0 else 0 for h in range(max_h): hidxs = np.argwhere(ess[:,0] == h)[:,0] output[1].append(ess[hidxs][:,1]) reg_ind = np.setdiff1d(np.array(range(len(pr))), ess_ind) reg = pr[reg_ind] - max_h = max(reg[:,0])+1 + max_h = max(reg[:,0])+1 if len(reg) > 0 else 0 for h in range(max_h): hidxs = np.argwhere(reg[:,0] == h)[:,0] output[0].append(reg[hidxs][:,1:]) -- cgit v1.2.3 From 82befbd4731b7e12f9e09a9c3bb1277b71044273 Mon Sep 17 00:00:00 2001 From: ROUVREAU Vincent Date: Thu, 21 May 2020 08:15:43 +0200 Subject: Next release --- .github/next_release.md | 50 ++++++++++--------------------------------------- CMakeGUDHIVersion.txt | 4 ++-- 2 files changed, 12 insertions(+), 42 deletions(-) diff --git a/.github/next_release.md b/.github/next_release.md index d3c9ce68..a2805a55 100644 --- a/.github/next_release.md +++ b/.github/next_release.md @@ -1,50 +1,19 @@ -We are pleased to announce the release 3.2.0 of the GUDHI library. +We are pleased to announce the release 3.X.X of the GUDHI library. -As a major new feature, the GUDHI library now offers a Python interface to [Hera](https://bitbucket.org/grey_narn/hera/src/master/) to compute the Wasserstein distance. -[PyBind11](https://github.com/pybind/pybind11) is now required to build the Python module. +As a major new feature, the GUDHI library now offers ... -We are now using GitHub to develop the GUDHI library, do not hesitate to [fork the GUDHI project on GitHub](https://github.com/GUDHI/gudhi-devel). From a user point of view, we recommend to download GUDHI user version (gudhi.3.2.0.tar.gz). +We are now using GitHub to develop the GUDHI library, do not hesitate to [fork the GUDHI project on GitHub](https://github.com/GUDHI/gudhi-devel). From a user point of view, we recommend to download GUDHI user version (gudhi.3.X.X.tar.gz). -Below is a list of changes made since GUDHI 3.1.1: +Below is a list of changes made since GUDHI 3.X-1.X-1: -- Point cloud utilities - - A new module [Time Delay Embedding](https://gudhi.inria.fr/python/latest/point_cloud.html#time-delay-embedding) - to embed time-series data in the R^d according to [Takens' Embedding Theorem](https://en.wikipedia.org/wiki/Takens%27s_theorem) - and obtain the coordinates of each point. - - A new module [K Nearest Neighbors](https://gudhi.inria.fr/python/latest/point_cloud.html#k-nearest-neighbors) - that wraps several implementations for computing the k nearest neighbors in a point set. - - A new module [Distance To Measure](https://gudhi.inria.fr/python/latest/point_cloud.html#distance-to-measure) - to compute the distance to the empirical measure defined by a point set +- [Module](link) + - ... -- [Persistence representations](https://gudhi.inria.fr/python/latest/representations.html) - - Interface to Wasserstein distances. - -- Rips complex - - A new module [Weighted Rips Complex](https://gudhi.inria.fr/python/latest/rips_complex_user.html#weighted-rips-complex) - to construct a simplicial complex from a distance matrix and weights on vertices. - -- [Wassertein distance](https://gudhi.inria.fr/python/latest/wasserstein_distance_user.html) - - An [another implementation](https://gudhi.inria.fr/python/latest/wasserstein_distance_user.html#hera) - comes from Hera (BSD-3-Clause) which is based on [Geometry Helps to Compare Persistence Diagrams](http://doi.acm.org/10.1145/3064175) - by Michael Kerber, Dmitriy Morozov, and Arnur Nigmetov. - - `gudhi.wasserstein.wasserstein_distance` has now an option to return the optimal matching that achieves the distance between the two diagrams. - - A new module [Barycenters](https://gudhi.inria.fr/python/latest/wasserstein_distance_user.html#barycenters) - to estimate the Frechet mean (aka Wasserstein barycenter) between persistence diagrams. - -- [Simplex tree](https://gudhi.inria.fr/python/latest/simplex_tree_ref.html) - - Extend filtration method to compute extended persistence - - Flag and lower star persistence pairs generators - - A new interface to filtration, simplices and skeleton getters to return an iterator - -- [Alpha complex](https://gudhi.inria.fr/doc/latest/group__alpha__complex.html) - - Improve computations (cache circumcenters computation and point comparison improvement) - -- [Persistence graphical tools](https://gudhi.inria.fr/python/latest/persistence_graphical_tools_user.html) - - New rendering option proposed (use LaTeX style, add grey block, improved positioning of labels, etc.). - - Can now handle (N x 2) numpy arrays as input +- [Module](link) + - ... - Miscellaneous - - The [list of bugs that were solved since GUDHI-3.2.0](https://github.com/GUDHI/gudhi-devel/issues?q=label%3A3.2.0+is%3Aclosed) is available on GitHub. + - The [list of bugs that were solved since GUDHI-3.X-1.X-1](https://github.com/GUDHI/gudhi-devel/issues?q=label%3A3.1.1+is%3Aclosed) is available on GitHub. All modules are distributed under the terms of the MIT license. However, there are still GPL dependencies for many modules. We invite you to check our [license dedicated web page](https://gudhi.inria.fr/licensing/) for further details. @@ -56,3 +25,4 @@ We provide [bibtex entries](https://gudhi.inria.fr/doc/latest/_citation.html) fo Feel free to [contact us](https://gudhi.inria.fr/contact/) in case you have any questions or remarks. For further information about downloading and installing the library ([C++](https://gudhi.inria.fr/doc/latest/installation.html) or [Python](https://gudhi.inria.fr/python/latest/installation.html)), please visit the [GUDHI web site](https://gudhi.inria.fr/). + diff --git a/CMakeGUDHIVersion.txt b/CMakeGUDHIVersion.txt index ac89fa4d..b9895bb1 100644 --- a/CMakeGUDHIVersion.txt +++ b/CMakeGUDHIVersion.txt @@ -1,6 +1,6 @@ set (GUDHI_MAJOR_VERSION 3) -set (GUDHI_MINOR_VERSION 2) -set (GUDHI_PATCH_VERSION 0) +set (GUDHI_MINOR_VERSION 3) +set (GUDHI_PATCH_VERSION 0.rc1) set(GUDHI_VERSION ${GUDHI_MAJOR_VERSION}.${GUDHI_MINOR_VERSION}.${GUDHI_PATCH_VERSION}) message(STATUS "GUDHI version : ${GUDHI_VERSION}") -- cgit v1.2.3 From d00bd43a7f3651e67c1572fea38550d310a223ec Mon Sep 17 00:00:00 2001 From: ROUVREAU Vincent Date: Thu, 21 May 2020 09:20:37 +0200 Subject: periodic fix and adds corresponding unitary tests for bug fix --- src/python/gudhi/periodic_cubical_complex.pyx | 42 +++++++++++++-------------- src/python/test/test_cubical_complex.py | 17 +++++++++++ 2 files changed, 37 insertions(+), 22 deletions(-) diff --git a/src/python/gudhi/periodic_cubical_complex.pyx b/src/python/gudhi/periodic_cubical_complex.pyx index bed55101..d353d2af 100644 --- a/src/python/gudhi/periodic_cubical_complex.pyx +++ b/src/python/gudhi/periodic_cubical_complex.pyx @@ -211,29 +211,27 @@ cdef class PeriodicCubicalComplex: The indices of the arrays in the list correspond to the homological dimensions, and the integers of each row in each array correspond to: (index of positive top-dimensional cell). """ + assert self.pcohptr != NULL, "compute_persistence() must be called before cofaces_of_persistence_pairs()" cdef vector[vector[int]] persistence_result - if self.pcohptr != NULL: - output = [[],[]] - with nogil: - persistence_result = self.pcohptr.cofaces_of_cubical_persistence_pairs() - pr = np.array(persistence_result) - - ess_ind = np.argwhere(pr[:,2] == -1)[:,0] - ess = pr[ess_ind] - max_h = max(ess[:,0])+1 - for h in range(max_h): - hidxs = np.argwhere(ess[:,0] == h)[:,0] - output[1].append(ess[hidxs][:,1]) - - reg_ind = np.setdiff1d(np.array(range(len(pr))), ess_ind) - reg = pr[reg_ind] - max_h = max(reg[:,0])+1 - for h in range(max_h): - hidxs = np.argwhere(reg[:,0] == h)[:,0] - output[0].append(reg[hidxs][:,1:]) - else: - print("cofaces_of_persistence_pairs function requires persistence function" - " to be launched first.") + + output = [[],[]] + with nogil: + persistence_result = self.pcohptr.cofaces_of_cubical_persistence_pairs() + pr = np.array(persistence_result) + + ess_ind = np.argwhere(pr[:,2] == -1)[:,0] + ess = pr[ess_ind] + max_h = max(ess[:,0])+1 if len(ess) > 0 else 0 + for h in range(max_h): + hidxs = np.argwhere(ess[:,0] == h)[:,0] + output[1].append(ess[hidxs][:,1]) + + reg_ind = np.setdiff1d(np.array(range(len(pr))), ess_ind) + reg = pr[reg_ind] + max_h = max(reg[:,0])+1 if len(reg) > 0 else 0 + for h in range(max_h): + hidxs = np.argwhere(reg[:,0] == h)[:,0] + output[0].append(reg[hidxs][:,1:]) return output def betti_numbers(self): diff --git a/src/python/test/test_cubical_complex.py b/src/python/test/test_cubical_complex.py index 5c59db8f..d0e4e9e8 100755 --- a/src/python/test/test_cubical_complex.py +++ b/src/python/test/test_cubical_complex.py @@ -157,3 +157,20 @@ def test_cubical_generators(): assert np.array_equal(g[0][0], np.empty(shape=[0,2])) assert np.array_equal(g[0][1], np.array([[7, 4]])) assert np.array_equal(g[1][0], np.array([8])) + +def test_cubical_cofaces_of_persistence_pairs_when_pd_has_no_paired_birth_and_death(): + cubCpx = CubicalComplex(dimensions=[1,2], top_dimensional_cells=[0.0, 1.0]) + Diag = cubCpx.persistence(homology_coeff_field=2, min_persistence=0) + pairs = cubCpx.cofaces_of_persistence_pairs() + assert pairs[0] == [] + assert np.array_equal(pairs[1][0], np.array([0])) + +def test_periodic_cofaces_of_persistence_pairs_when_pd_has_no_paired_birth_and_death(): + perCubCpx = PeriodicCubicalComplex(dimensions=[1,2], top_dimensional_cells=[0.0, 1.0], + periodic_dimensions=[True, True]) + Diag = perCubCpx.persistence(homology_coeff_field=2, min_persistence=0) + pairs = perCubCpx.cofaces_of_persistence_pairs() + assert pairs[0] == [] + assert np.array_equal(pairs[1][0], np.array([0])) + assert np.array_equal(pairs[1][1], np.array([0, 1])) + assert np.array_equal(pairs[1][2], np.array([1])) -- cgit v1.2.3 From c3412f236bd82b12abedb8e6239ff4bbcdaa526c Mon Sep 17 00:00:00 2001 From: ROUVREAU Vincent Date: Fri, 22 May 2020 17:47:55 +0200 Subject: Update new gudhi version process --- .github/for_maintainers/new_gudhi_version_creation.md | 10 +++++----- Dockerfile_gudhi_installation | 6 +++--- 2 files changed, 8 insertions(+), 8 deletions(-) diff --git a/.github/for_maintainers/new_gudhi_version_creation.md b/.github/for_maintainers/new_gudhi_version_creation.md index 8674222b..86c393a0 100644 --- a/.github/for_maintainers/new_gudhi_version_creation.md +++ b/.github/for_maintainers/new_gudhi_version_creation.md @@ -17,8 +17,7 @@ rm -rf data/points/COIL_database/lucky_cat.off_dist data/points/COIL_database/lu Checkin the modifications, build and test the version: ```bash git submodule update --init -mkdir build -cd build +rm -rf build; mkdir build; cd build cmake -DCGAL_DIR=/your/path/to/CGAL -DWITH_GUDHI_EXAMPLE=ON -DWITH_GUDHI_BENCHMARK=ON -DUSER_VERSION_DIR=gudhi.@GUDHI_VERSION@ -DPython_ADDITIONAL_VERSIONS=3 .. make user_version date +"%d-%m-%Y-%T" > gudhi.@GUDHI_VERSION@/timestamp.txt @@ -27,7 +26,7 @@ md5sum gudhi.@GUDHI_VERSION@.tar.gz > md5sum.txt sha256sum gudhi.@GUDHI_VERSION@.tar.gz > sha256sum.txt sha512sum gudhi.@GUDHI_VERSION@.tar.gz > sha512sum.txt -make -j all test +make -j 4 all && ctest -j 4 --output-on-failure ``` ***[Check there are no error]*** @@ -43,7 +42,8 @@ make doxygen 2>&1 | tee dox.log && grep warning dox.log ```bash cp -R gudhi.@GUDHI_VERSION@/doc/html gudhi.doc.@GUDHI_VERSION@/cpp cd gudhi.@GUDHI_VERSION@ -rm -rf build; mkdir build; cd build; cmake -DCGAL_DIR=/your/path/to/CGAL -DWITH_GUDHI_EXAMPLE=ON -DPython_ADDITIONAL_VERSIONS=3 .. +rm -rf build; mkdir build; cd build +cmake -DCGAL_DIR=/your/path/to/CGAL -DWITH_GUDHI_EXAMPLE=ON -DPython_ADDITIONAL_VERSIONS=3 .. export LC_ALL=en_US.UTF-8 # cf. bug make sphinx ``` @@ -56,7 +56,7 @@ cd ../.. tar -czvf gudhi.doc.@GUDHI_VERSION@.tar.gz gudhi.doc.@GUDHI_VERSION@ cd gudhi.@GUDHI_VERSION@/build -make all test +make -j 4 all && ctest -j 4 --output-on-failure ``` ***[Check there are no error]*** diff --git a/Dockerfile_gudhi_installation b/Dockerfile_gudhi_installation index f9e8813b..461a8a19 100644 --- a/Dockerfile_gudhi_installation +++ b/Dockerfile_gudhi_installation @@ -58,9 +58,9 @@ RUN pip3 install \ # apt clean up RUN apt autoremove && rm -rf /var/lib/apt/lists/* -RUN curl -LO "https://github.com/GUDHI/gudhi-devel/releases/download/tags%2Fgudhi-release-3.1.1/gudhi.3.1.1.tar.gz" \ -&& tar xf gudhi.3.1.1.tar.gz \ -&& cd gudhi.3.1.1 \ +RUN curl -LO "https://github.com/GUDHI/gudhi-devel/releases/download/tags%2Fgudhi-release-3.2.0/gudhi.3.2.0.tar.gz" \ +&& tar xf gudhi.3.2.0.tar.gz \ +&& cd gudhi.3.2.0 \ && mkdir build && cd build && cmake -DCMAKE_BUILD_TYPE=Release -DWITH_GUDHI_PYTHON=OFF -DPython_ADDITIONAL_VERSIONS=3 .. \ && make all test install \ && cmake -DWITH_GUDHI_PYTHON=ON . \ -- cgit v1.2.3