From b8d6f1f5969de7edbb66f285ac0a9ccffa8878fb Mon Sep 17 00:00:00 2001 From: Marc Glisse Date: Thu, 17 Nov 2022 00:07:25 +0100 Subject: Advertise new SimplexTree functions in Rips user manual --- src/python/doc/rips_complex_user.rst | 120 +++++++++-------------------------- 1 file changed, 30 insertions(+), 90 deletions(-) diff --git a/src/python/doc/rips_complex_user.rst b/src/python/doc/rips_complex_user.rst index c41a7803..489fbb4a 100644 --- a/src/python/doc/rips_complex_user.rst +++ b/src/python/doc/rips_complex_user.rst @@ -34,9 +34,6 @@ A vertex name corresponds to the index of the point in the given range (aka. the On this example, as edges (4,5), (4,6) and (5,6) are in the complex, simplex (4,5,6) is added with the filtration value set with :math:`max(filtration(4,5), filtration(4,6), filtration(5,6))`. And so on for simplex (0,1,2,3). -If the :doc:`RipsComplex ` interfaces are not detailed enough for your need, please refer to -rips_persistence_step_by_step.cpp C++ example, where the graph construction over the Simplex_tree is more detailed. - A Rips complex can easily become huge, even if we limit the length of the edges and the dimension of the simplices. One easy trick, before building a Rips complex on a point cloud, is to call :func:`~gudhi.sparsify_point_set` which removes points @@ -117,54 +114,44 @@ Notice that if we use asking for a very sparse version (theory only gives some guarantee on the meaning of the output if `sparse<1`), 2 to 5 edges disappear, depending on the random vertex used to start the sparsification. -Example from OFF file -^^^^^^^^^^^^^^^^^^^^^ +Example step by step +^^^^^^^^^^^^^^^^^^^^ -This example builds the :doc:`RipsComplex ` from the given -points in an OFF file, and max_edge_length value. -Then it creates a :doc:`SimplexTree ` with it. +While :doc:`RipsComplex ` is convenient, for instance to build a simplicial complex in one line -Finally, it is asked to display information about the Rips complex. +.. testcode:: + import gudhi + points = [[1, 1], [7, 0], [4, 6], [9, 6], [0, 14], [2, 19], [9, 17]] + cplx = gudhi.RipsComplex(points=points, max_edge_length=12.0).create_simplex_tree(max_dimension=2) + +you can achieve the same result without this class for more flexibility .. testcode:: - import gudhi - off_file = gudhi.__root_source_dir__ + '/data/points/alphacomplexdoc.off' - point_cloud = gudhi.read_points_from_off_file(off_file = off_file) - rips_complex = gudhi.RipsComplex(points=point_cloud, max_edge_length=12.0) - simplex_tree = rips_complex.create_simplex_tree(max_dimension=1) - result_str = 'Rips complex is of dimension ' + repr(simplex_tree.dimension()) + ' - ' + \ - repr(simplex_tree.num_simplices()) + ' simplices - ' + \ - repr(simplex_tree.num_vertices()) + ' vertices.' - print(result_str) - fmt = '%s -> %.2f' - for filtered_value in simplex_tree.get_filtration(): - print(fmt % tuple(filtered_value)) + import gudhi + from scipy.spatial.distance import cdist + points = [[1, 1], [7, 0], [4, 6], [9, 6], [0, 14], [2, 19], [9, 17]] + distance_matrix = cdist(points, points) + cplx = gudhi.SimplexTree.create_from_array(distance_matrix, max_filtration=12.0) + cplx.expansion(2) -the program output is: +or -.. testoutput:: +.. testcode:: - Rips complex is of dimension 1 - 18 simplices - 7 vertices. - [0] -> 0.00 - [1] -> 0.00 - [2] -> 0.00 - [3] -> 0.00 - [4] -> 0.00 - [5] -> 0.00 - [6] -> 0.00 - [2, 3] -> 5.00 - [4, 5] -> 5.39 - [0, 2] -> 5.83 - [0, 1] -> 6.08 - [1, 3] -> 6.32 - [1, 2] -> 6.71 - [5, 6] -> 7.28 - [2, 4] -> 8.94 - [0, 3] -> 9.43 - [4, 6] -> 9.49 - [3, 6] -> 11.00 + import gudhi + from scipy.spatial import cKDTree + points = [[1, 1], [7, 0], [4, 6], [9, 6], [0, 14], [2, 19], [9, 17]] + tree = cKDTree(points) + edges = tree.sparse_distance_matrix(tree, max_distance=12.0, output_type="coo_matrix") + cplx = gudhi.SimplexTree() + cplx.insert_edges_from_coo_matrix(edges) + cplx.expansion(2) + + +This way, you can easily add a call to :func:`~gudhi.SimplexTree.collapse_edges` before the expansion, +use a different metric to compute the matrix, or other variations. Distance matrix --------------- @@ -223,54 +210,7 @@ until dimension 1 - one skeleton graph in other words), the output is: [4, 6] -> 9.49 [3, 6] -> 11.00 -Example from csv file -^^^^^^^^^^^^^^^^^^^^^ - -This example builds the :doc:`RipsComplex ` from the given -distance matrix in a csv file, and max_edge_length value. -Then it creates a :doc:`SimplexTree ` with it. - -Finally, it is asked to display information about the Rips complex. - - -.. testcode:: - - import gudhi - distance_matrix = gudhi.read_lower_triangular_matrix_from_csv_file(csv_file=gudhi.__root_source_dir__ + \ - '/data/distance_matrix/full_square_distance_matrix.csv') - rips_complex = gudhi.RipsComplex(distance_matrix=distance_matrix, max_edge_length=12.0) - simplex_tree = rips_complex.create_simplex_tree(max_dimension=1) - result_str = 'Rips complex is of dimension ' + repr(simplex_tree.dimension()) + ' - ' + \ - repr(simplex_tree.num_simplices()) + ' simplices - ' + \ - repr(simplex_tree.num_vertices()) + ' vertices.' - print(result_str) - fmt = '%s -> %.2f' - for filtered_value in simplex_tree.get_filtration(): - print(fmt % tuple(filtered_value)) - -the program output is: - -.. testoutput:: - - Rips complex is of dimension 1 - 18 simplices - 7 vertices. - [0] -> 0.00 - [1] -> 0.00 - [2] -> 0.00 - [3] -> 0.00 - [4] -> 0.00 - [5] -> 0.00 - [6] -> 0.00 - [2, 3] -> 5.00 - [4, 5] -> 5.39 - [0, 2] -> 5.83 - [0, 1] -> 6.08 - [1, 3] -> 6.32 - [1, 2] -> 6.71 - [5, 6] -> 7.28 - [2, 4] -> 8.94 - [0, 3] -> 9.43 - [4, 6] -> 9.49 - [3, 6] -> 11.00 +In case this lower triangular matrix is stored in a CSV file, like data/distance_matrix/full_square_distance_matrix.csv in the Gudhi distribution, you can read it with :func:`~gudhi.read_lower_triangular_matrix_from_csv_file`. Correlation matrix ------------------ -- cgit v1.2.3 From a6e742504416a7e214251dd1b541e49df98c01f9 Mon Sep 17 00:00:00 2001 From: Vincent Rouvreau Date: Sat, 17 Dec 2022 09:49:37 +0100 Subject: reset for next release --- .github/next_release.md | 27 ++++++++++----------------- CMakeGUDHIVersion.txt | 4 ++-- 2 files changed, 12 insertions(+), 19 deletions(-) diff --git a/.github/next_release.md b/.github/next_release.md index f2192878..937fc557 100644 --- a/.github/next_release.md +++ b/.github/next_release.md @@ -1,24 +1,20 @@ -We are pleased to announce the release 3.7.0 of the GUDHI library. +We are pleased to announce the release 3.8.0 of the GUDHI library. -As a major new feature, the GUDHI library now offers new functions to initialize a Simplex tree. Universal wheel for OSx pip package and python 3.11 are now available. +As a major new feature, the GUDHI library now offers ... We are now using GitHub to develop the GUDHI library, do not hesitate to [fork the GUDHI project on GitHub](https://github.com/GUDHI/gudhi-devel). From a user point of view, we recommend to download GUDHI user version (gudhi.3.X.X.tar.gz). -Below is a list of changes made since GUDHI 3.6.0: +Below is a list of changes made since GUDHI 3.7.0: -- [Simplex tree](https://gudhi.inria.fr/python/latest/simplex_tree_ref.html) - - New functions to initialize from a matrix or insert batches of simplices of the same dimension. +- [Module](link) + - ... -- [Rips complex](https://gudhi.inria.fr/python/latest/rips_complex_user.html) - - Construction now rejects positional arguments, you need to specify `points=X`. +- [Module](link) + - ... -- Installation - - C++17 is the new minimal standard to compile the library. This implies Visual Studio minimal version is now 2017. - - OSx ARM pip package is now available thanks to a universal wheel - - Python 3.11 pip package - Miscellaneous - - The [list of bugs that were solved since GUDHI-3.6.0](https://github.com/GUDHI/gudhi-devel/issues?q=label%3A3.7.0+is%3Aclosed) is available on GitHub. + - The [list of bugs that were solved since GUDHI-3.7.0](https://github.com/GUDHI/gudhi-devel/issues?q=label%3A3.8.0+is%3Aclosed) is available on GitHub. All modules are distributed under the terms of the MIT license. However, there are still GPL dependencies for many modules. We invite you to check our [license dedicated web page](https://gudhi.inria.fr/licensing/) for further details. @@ -33,8 +29,5 @@ For further information about downloading and installing the library ([C++](http ## Contributors -- @albert-github -- @hschreiber -- @mglisse -- @VincentRouvreau -- @wreise +- ... +- ... \ No newline at end of file diff --git a/CMakeGUDHIVersion.txt b/CMakeGUDHIVersion.txt index 319031b2..69c8e9fc 100644 --- a/CMakeGUDHIVersion.txt +++ b/CMakeGUDHIVersion.txt @@ -1,8 +1,8 @@ # Must be conform to pep440 - https://www.python.org/dev/peps/pep-0440/#pre-releases set (GUDHI_MAJOR_VERSION 3) -set (GUDHI_MINOR_VERSION 7) +set (GUDHI_MINOR_VERSION 8) # GUDHI_PATCH_VERSION can be 'ZaN' for Alpha release, 'ZbN' for Beta release, 'ZrcN' for release candidate or 'Z' for a final release. -set (GUDHI_PATCH_VERSION 0) +set (GUDHI_PATCH_VERSION 0a0) set(GUDHI_VERSION ${GUDHI_MAJOR_VERSION}.${GUDHI_MINOR_VERSION}.${GUDHI_PATCH_VERSION}) message(STATUS "GUDHI version : ${GUDHI_VERSION}") -- cgit v1.2.3 From 8319f4d1042cffaf1a64b1ca5fc4344a2e00320f Mon Sep 17 00:00:00 2001 From: Marc Glisse Date: Fri, 18 Nov 2022 20:28:40 +0100 Subject: Fix Atol doctest the skip marks were on the wrong lines, which resulted in WARNING: ignoring invalid doctest code Reminder: testoutput was disabled because it was too random (the lines are swapped if I run it right now). --- src/python/gudhi/representations/vector_methods.py | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/src/python/gudhi/representations/vector_methods.py b/src/python/gudhi/representations/vector_methods.py index e1402aea..a6d86821 100644 --- a/src/python/gudhi/representations/vector_methods.py +++ b/src/python/gudhi/representations/vector_methods.py @@ -711,16 +711,16 @@ class Atol(BaseEstimator, TransformerMixin): >>> c = np.array([[3, 2, -1], [1, 2, -1]]) >>> atol_vectoriser = Atol(quantiser=KMeans(n_clusters=2, random_state=202006)) >>> atol_vectoriser.fit(X=[a, b, c]).centers # doctest: +SKIP - >>> # array([[ 2. , 0.66666667, 3.33333333], - >>> # [ 2.6 , 2.8 , -0.4 ]]) - >>> atol_vectoriser(a) - >>> # array([1.18168665, 0.42375966]) # doctest: +SKIP - >>> atol_vectoriser(c) - >>> # array([0.02062512, 1.25157463]) # doctest: +SKIP + array([[ 2. , 0.66666667, 3.33333333], + [ 2.6 , 2.8 , -0.4 ]]) + >>> atol_vectoriser(a) # doctest: +SKIP + array([1.18168665, 0.42375966]) + >>> atol_vectoriser(c) # doctest: +SKIP + array([0.02062512, 1.25157463]) >>> atol_vectoriser.transform(X=[a, b, c]) # doctest: +SKIP - >>> # array([[1.18168665, 0.42375966], - >>> # [0.29861028, 1.06330156], - >>> # [0.02062512, 1.25157463]]) + array([[1.18168665, 0.42375966], + [0.29861028, 1.06330156], + [0.02062512, 1.25157463]]) """ # Note the example above must be up to date with the one in tests called test_atol_doc def __init__(self, quantiser, weighting_method="cloud", contrast="gaussian"): -- cgit v1.2.3 From 01fed963e4e51f0d2fdd342de5317822e7a55ecb Mon Sep 17 00:00:00 2001 From: Marc Glisse Date: Sat, 17 Dec 2022 13:27:07 +0100 Subject: Make the order of atol centers a bit more reproducible. --- src/python/gudhi/representations/vector_methods.py | 24 ++++++++++++---------- 1 file changed, 13 insertions(+), 11 deletions(-) diff --git a/src/python/gudhi/representations/vector_methods.py b/src/python/gudhi/representations/vector_methods.py index a6d86821..745fe1e5 100644 --- a/src/python/gudhi/representations/vector_methods.py +++ b/src/python/gudhi/representations/vector_methods.py @@ -710,17 +710,17 @@ class Atol(BaseEstimator, TransformerMixin): >>> b = np.array([[4, 2, 0], [4, 4, 0], [4, 0, 2]]) >>> c = np.array([[3, 2, -1], [1, 2, -1]]) >>> atol_vectoriser = Atol(quantiser=KMeans(n_clusters=2, random_state=202006)) - >>> atol_vectoriser.fit(X=[a, b, c]).centers # doctest: +SKIP - array([[ 2. , 0.66666667, 3.33333333], - [ 2.6 , 2.8 , -0.4 ]]) - >>> atol_vectoriser(a) # doctest: +SKIP - array([1.18168665, 0.42375966]) - >>> atol_vectoriser(c) # doctest: +SKIP - array([0.02062512, 1.25157463]) - >>> atol_vectoriser.transform(X=[a, b, c]) # doctest: +SKIP - array([[1.18168665, 0.42375966], - [0.29861028, 1.06330156], - [0.02062512, 1.25157463]]) + >>> atol_vectoriser.fit(X=[a, b, c]).centers + array([[ 2.6 , 2.8 , -0.4 ], + [ 2. , 0.66666667, 3.33333333]]) + >>> atol_vectoriser(a) + array([0.42375966, 1.18168665]) + >>> atol_vectoriser(c) + array([1.25157463, 0.02062512]) + >>> atol_vectoriser.transform(X=[a, b, c]) + array([[0.42375966, 1.18168665], + [1.06330156, 0.29861028], + [1.25157463, 0.02062512]]) """ # Note the example above must be up to date with the one in tests called test_atol_doc def __init__(self, quantiser, weighting_method="cloud", contrast="gaussian"): @@ -771,6 +771,8 @@ class Atol(BaseEstimator, TransformerMixin): measures_concat = np.concatenate(X) self.quantiser.fit(X=measures_concat, sample_weight=sample_weight) self.centers = self.quantiser.cluster_centers_ + # Hack, but some people are unhappy if the order depends on the version of sklearn + self.centers = self.centers[np.lexsort(self.centers.T)] if self.quantiser.n_clusters == 1: dist_centers = pairwise.pairwise_distances(measures_concat) np.fill_diagonal(dist_centers, 0) -- cgit v1.2.3 From 7fdc81289f63d43aab884d3de3b4f0242fd984ff Mon Sep 17 00:00:00 2001 From: Marc Glisse Date: Sat, 17 Dec 2022 20:28:10 +0100 Subject: Advertise edge collapses more. --- src/python/doc/rips_complex_user.rst | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/src/python/doc/rips_complex_user.rst b/src/python/doc/rips_complex_user.rst index 489fbb4a..a4e83462 100644 --- a/src/python/doc/rips_complex_user.rst +++ b/src/python/doc/rips_complex_user.rst @@ -52,6 +52,13 @@ construction of a :class:`~gudhi.RipsComplex` object asks it to build a sparse R parameter :math:`\varepsilon=0.3`, while the default `sparse=None` builds the regular Rips complex. +Another option which is especially useful if you want to compute persistent homology in "high" dimension (2 or more, +sometimes even 1), is to build the Rips complex only up to dimension 1 (a graph), then use +:func:`~gudhi.SimplexTree.collapse_edges` to reduce the size of this graph, and finally call +:func:`~gudhi.SimplexTree.expansion` to get a simplicial complex of a suitable dimension to compute its homology. This +trick gives the same persistence diagram as one would get with a plain use of `RipsComplex`, with a complex that is +often significantly smaller and thus faster to process. + Point cloud ----------- @@ -210,7 +217,7 @@ until dimension 1 - one skeleton graph in other words), the output is: [4, 6] -> 9.49 [3, 6] -> 11.00 -In case this lower triangular matrix is stored in a CSV file, like data/distance_matrix/full_square_distance_matrix.csv in the Gudhi distribution, you can read it with :func:`~gudhi.read_lower_triangular_matrix_from_csv_file`. +In case this lower triangular matrix is stored in a CSV file, like `data/distance_matrix/full_square_distance_matrix.csv` in the Gudhi distribution, you can read it with :func:`~gudhi.read_lower_triangular_matrix_from_csv_file`. Correlation matrix ------------------ -- cgit v1.2.3 From 86689f89bf896e41683fd7b1a4568f2b34ea505d Mon Sep 17 00:00:00 2001 From: Marc Glisse Date: Tue, 3 Jan 2023 21:23:49 +0100 Subject: fix get_params --- src/python/gudhi/representations/vector_methods.py | 18 +++++++++--------- src/python/test/test_representations.py | 6 +++++- 2 files changed, 14 insertions(+), 10 deletions(-) diff --git a/src/python/gudhi/representations/vector_methods.py b/src/python/gudhi/representations/vector_methods.py index 745fe1e5..ce74aee5 100644 --- a/src/python/gudhi/representations/vector_methods.py +++ b/src/python/gudhi/representations/vector_methods.py @@ -138,13 +138,13 @@ def _trim_endpoints(x, are_endpoints_nan): def _grid_from_sample_range(self, X): - sample_range = np.array(self.sample_range_init) + sample_range = np.array(self.sample_range) self.nan_in_range = np.isnan(sample_range) self.new_resolution = self.resolution if not self.keep_endpoints: self.new_resolution += self.nan_in_range.sum() - self.sample_range = _automatic_sample_range(sample_range, X) - self.grid_ = np.linspace(self.sample_range[0], self.sample_range[1], self.new_resolution) + self.sample_range_fixed = _automatic_sample_range(sample_range, X) + self.grid_ = np.linspace(self.sample_range_fixed[0], self.sample_range_fixed[1], self.new_resolution) if not self.keep_endpoints: self.grid_ = _trim_endpoints(self.grid_, self.nan_in_range) @@ -166,7 +166,7 @@ class Landscape(BaseEstimator, TransformerMixin): sample_range ([double, double]): minimum and maximum of all piecewise-linear function domains, of the form [x_min, x_max] (default [numpy.nan, numpy.nan]). It is the interval on which samples will be drawn evenly. If one of the values is numpy.nan, it can be computed from the persistence diagrams with the fit() method. keep_endpoints (bool): when computing `sample_range`, use the exact extremities (where the value is always 0). This is mostly useful for plotting, the default is to use a slightly smaller range. """ - self.num_landscapes, self.resolution, self.sample_range_init = num_landscapes, resolution, sample_range + self.num_landscapes, self.resolution, self.sample_range = num_landscapes, resolution, sample_range self.keep_endpoints = keep_endpoints def fit(self, X, y=None): @@ -240,7 +240,7 @@ class Silhouette(BaseEstimator, TransformerMixin): sample_range ([double, double]): minimum and maximum for the weighted average domain, of the form [x_min, x_max] (default [numpy.nan, numpy.nan]). It is the interval on which samples will be drawn evenly. If one of the values is numpy.nan, it can be computed from the persistence diagrams with the fit() method. keep_endpoints (bool): when computing `sample_range`, use the exact extremities (where the value is always 0). This is mostly useful for plotting, the default is to use a slightly smaller range. """ - self.weight, self.resolution, self.sample_range_init = weight, resolution, sample_range + self.weight, self.resolution, self.sample_range = weight, resolution, sample_range self.keep_endpoints = keep_endpoints def fit(self, X, y=None): @@ -334,7 +334,7 @@ class BettiCurve(BaseEstimator, TransformerMixin): self.predefined_grid = predefined_grid self.resolution = resolution - self.sample_range_init = sample_range + self.sample_range = sample_range self.keep_endpoints = keep_endpoints def is_fitted(self): @@ -468,7 +468,7 @@ class Entropy(BaseEstimator, TransformerMixin): sample_range ([double, double]): minimum and maximum of the entropy summary function domain, of the form [x_min, x_max] (default [numpy.nan, numpy.nan]). It is the interval on which samples will be drawn evenly. If one of the values is numpy.nan, it can be computed from the persistence diagrams with the fit() method. Used only if **mode** = "vector". keep_endpoints (bool): when computing `sample_range`, use the exact extremities. This is mostly useful for plotting, the default is to use a slightly smaller range. """ - self.mode, self.normalized, self.resolution, self.sample_range_init = mode, normalized, resolution, sample_range + self.mode, self.normalized, self.resolution, self.sample_range = mode, normalized, resolution, sample_range self.keep_endpoints = keep_endpoints def fit(self, X, y=None): @@ -509,8 +509,8 @@ class Entropy(BaseEstimator, TransformerMixin): ent = np.zeros(self.resolution) for j in range(num_pts_in_diag): [px,py] = orig_diagram[j,:2] - min_idx = np.clip(np.ceil((px - self.sample_range[0]) / self.step_).astype(int), 0, self.resolution) - max_idx = np.clip(np.ceil((py - self.sample_range[0]) / self.step_).astype(int), 0, self.resolution) + min_idx = np.clip(np.ceil((px - self.sample_range_fixed[0]) / self.step_).astype(int), 0, self.resolution) + max_idx = np.clip(np.ceil((py - self.sample_range_fixed[0]) / self.step_).astype(int), 0, self.resolution) ent[min_idx:max_idx]-=p[j]*np.log(p[j]) if self.normalized: ent = ent / np.linalg.norm(ent, ord=1) diff --git a/src/python/test/test_representations.py b/src/python/test/test_representations.py index ae0362f8..f4ffbdc1 100755 --- a/src/python/test/test_representations.py +++ b/src/python/test/test_representations.py @@ -249,7 +249,7 @@ def test_landscape_nan_range(): dgm = np.array([[2., 6.], [3., 5.]]) lds = Landscape(num_landscapes=2, resolution=9, sample_range=[np.nan, 6.]) lds_dgm = lds(dgm) - assert (lds.sample_range[0] == 2) & (lds.sample_range[1] == 6) + assert (lds.sample_range_fixed[0] == 2) & (lds.sample_range_fixed[1] == 6) assert lds.new_resolution == 10 def test_endpoints(): @@ -263,3 +263,7 @@ def test_endpoints(): vec = BettiCurve(resolution=None) vec.fit(diags) assert np.equal(vec.grid_, [-np.inf, 2., 3.]).all() + +def test_get_params(): + for vec in [ Landscape(), Silhouette(), BettiCurve(), Entropy(mode="vector") ]: + vec.get_params() -- cgit v1.2.3