Merge branch 'master' of https://github.com/GUDHI/gudhi-devel into perslay

author: MathieuCarriere <mathieu.carriere3@gmail.com> 2023-01-06 08:02:13 +0100
committer: MathieuCarriere <mathieu.carriere3@gmail.com> 2023-01-06 08:02:13 +0100
commit: 79464e39d3edc9df8cf4def807479900d3c7b125 (patch)
tree: f9b2809a3d9001edc89bebf1d26f11ae65160772 /src
parent: 81d8de84fedfc26032a8bf38aa8898d82db32a12 (diff)
parent: b6171a12cfdeb26aa782a9dfe5d58177ca2239fb (diff)
3 files changed, 62 insertions, 109 deletions
diff --git a/src/python/doc/rips_complex_user.rst b/src/python/doc/rips_complex_user.rst
index c41a7803..a4e83462 100644
--- a/src/python/doc/rips_complex_user.rst
+++ b/src/python/doc/rips_complex_user.rst
@@ -34,9 +34,6 @@ A vertex name corresponds to the index of the point in the given range (aka. the
 On this example, as edges (4,5), (4,6) and (5,6) are in the complex, simplex (4,5,6) is added with the filtration value
 set with :math:`max(filtration(4,5), filtration(4,6), filtration(5,6))`. And so on for simplex (0,1,2,3).
 
-If the :doc:`RipsComplex <rips_complex_ref>` interfaces are not detailed enough for your need, please refer to
-rips_persistence_step_by_step.cpp C++ example, where the graph construction over the Simplex_tree is more detailed.
-
 A Rips complex can easily become huge, even if we limit the length of the edges
 and the dimension of the simplices. One easy trick, before building a Rips
 complex on a point cloud, is to call :func:`~gudhi.sparsify_point_set` which removes points
@@ -55,6 +52,13 @@ construction of a :class:`~gudhi.RipsComplex` object asks it to build a sparse R
 parameter :math:`\varepsilon=0.3`, while the default `sparse=None` builds the
 regular Rips complex.
 
+Another option which is especially useful if you want to compute persistent homology in "high" dimension (2 or more,
+sometimes even 1), is to build the Rips complex only up to dimension 1 (a graph), then use
+:func:`~gudhi.SimplexTree.collapse_edges` to reduce the size of this graph, and finally call
+:func:`~gudhi.SimplexTree.expansion` to get a simplicial complex of a suitable dimension to compute its homology. This
+trick gives the same persistence diagram as one would get with a plain use of `RipsComplex`, with a complex that is
+often significantly smaller and thus faster to process.
+
 
 Point cloud
 -----------
@@ -117,54 +121,44 @@ Notice that if we use
 asking for a very sparse version (theory only gives some guarantee on the meaning of the output if `sparse<1`),
 2 to 5 edges disappear, depending on the random vertex used to start the sparsification.
 
-Example from OFF file
-^^^^^^^^^^^^^^^^^^^^^
+Example step by step
+^^^^^^^^^^^^^^^^^^^^
 
-This example builds the :doc:`RipsComplex <rips_complex_ref>` from the given
-points in an OFF file, and max_edge_length value.
-Then it creates a :doc:`SimplexTree <simplex_tree_ref>` with it.
+While :doc:`RipsComplex <rips_complex_ref>` is convenient, for instance to build a simplicial complex in one line
+
+.. testcode::
 
-Finally, it is asked to display information about the Rips complex.
+   import gudhi
+   points = [[1, 1], [7, 0], [4, 6], [9, 6], [0, 14], [2, 19], [9, 17]]
+   cplx = gudhi.RipsComplex(points=points, max_edge_length=12.0).create_simplex_tree(max_dimension=2)
 
+you can achieve the same result without this class for more flexibility
 
 .. testcode::
 
-    import gudhi
-    off_file = gudhi.__root_source_dir__ + '/data/points/alphacomplexdoc.off'
-    point_cloud = gudhi.read_points_from_off_file(off_file = off_file)
-    rips_complex = gudhi.RipsComplex(points=point_cloud, max_edge_length=12.0)
-    simplex_tree = rips_complex.create_simplex_tree(max_dimension=1)
-    result_str = 'Rips complex is of dimension ' + repr(simplex_tree.dimension()) + ' - ' + \
-        repr(simplex_tree.num_simplices()) + ' simplices - ' + \
-        repr(simplex_tree.num_vertices()) + ' vertices.'
-    print(result_str)
-    fmt = '%s -> %.2f'
-    for filtered_value in simplex_tree.get_filtration():
-        print(fmt % tuple(filtered_value))
+   import gudhi
+   from scipy.spatial.distance import cdist
+   points = [[1, 1], [7, 0], [4, 6], [9, 6], [0, 14], [2, 19], [9, 17]]
+   distance_matrix = cdist(points, points)
+   cplx = gudhi.SimplexTree.create_from_array(distance_matrix, max_filtration=12.0)
+   cplx.expansion(2)
 
-the program output is:
+or
 
-.. testoutput::
+.. testcode::
+
+   import gudhi
+   from scipy.spatial import cKDTree
+   points = [[1, 1], [7, 0], [4, 6], [9, 6], [0, 14], [2, 19], [9, 17]]
+   tree = cKDTree(points)
+   edges = tree.sparse_distance_matrix(tree, max_distance=12.0, output_type="coo_matrix")
+   cplx = gudhi.SimplexTree()
+   cplx.insert_edges_from_coo_matrix(edges)
+   cplx.expansion(2)
 
-    Rips complex is of dimension 1 - 18 simplices - 7 vertices.
-    [0] -> 0.00
-    [1] -> 0.00
-    [2] -> 0.00
-    [3] -> 0.00
-    [4] -> 0.00
-    [5] -> 0.00
-    [6] -> 0.00
-    [2, 3] -> 5.00
-    [4, 5] -> 5.39
-    [0, 2] -> 5.83
-    [0, 1] -> 6.08
-    [1, 3] -> 6.32
-    [1, 2] -> 6.71
-    [5, 6] -> 7.28
-    [2, 4] -> 8.94
-    [0, 3] -> 9.43
-    [4, 6] -> 9.49
-    [3, 6] -> 11.00
+
+This way, you can easily add a call to :func:`~gudhi.SimplexTree.collapse_edges` before the expansion,
+use a different metric to compute the matrix, or other variations.
 
 Distance matrix
 ---------------
@@ -223,54 +217,7 @@ until dimension 1 - one skeleton graph in other words), the output is:
     [4, 6] -> 9.49
     [3, 6] -> 11.00
 
-Example from csv file
-^^^^^^^^^^^^^^^^^^^^^
-
-This example builds the :doc:`RipsComplex <rips_complex_ref>` from the given
-distance matrix in a csv file, and max_edge_length value.
-Then it creates a :doc:`SimplexTree <simplex_tree_ref>` with it.
-
-Finally, it is asked to display information about the Rips complex.
-
-
-.. testcode::
-
-    import gudhi
-    distance_matrix = gudhi.read_lower_triangular_matrix_from_csv_file(csv_file=gudhi.__root_source_dir__ + \
-        '/data/distance_matrix/full_square_distance_matrix.csv')
-    rips_complex = gudhi.RipsComplex(distance_matrix=distance_matrix, max_edge_length=12.0)
-    simplex_tree = rips_complex.create_simplex_tree(max_dimension=1)
-    result_str = 'Rips complex is of dimension ' + repr(simplex_tree.dimension()) + ' - ' + \
-        repr(simplex_tree.num_simplices()) + ' simplices - ' + \
-        repr(simplex_tree.num_vertices()) + ' vertices.'
-    print(result_str)
-    fmt = '%s -> %.2f'
-    for filtered_value in simplex_tree.get_filtration():
-        print(fmt % tuple(filtered_value))
-
-the program output is:
-
-.. testoutput::
-
-    Rips complex is of dimension 1 - 18 simplices - 7 vertices.
-    [0] -> 0.00
-    [1] -> 0.00
-    [2] -> 0.00
-    [3] -> 0.00
-    [4] -> 0.00
-    [5] -> 0.00
-    [6] -> 0.00
-    [2, 3] -> 5.00
-    [4, 5] -> 5.39
-    [0, 2] -> 5.83
-    [0, 1] -> 6.08
-    [1, 3] -> 6.32
-    [1, 2] -> 6.71
-    [5, 6] -> 7.28
-    [2, 4] -> 8.94
-    [0, 3] -> 9.43
-    [4, 6] -> 9.49
-    [3, 6] -> 11.00
+In case this lower triangular matrix is stored in a CSV file, like `data/distance_matrix/full_square_distance_matrix.csv` in the Gudhi distribution, you can read it with :func:`~gudhi.read_lower_triangular_matrix_from_csv_file`.
 
 Correlation matrix
 ------------------
diff --git a/src/python/gudhi/representations/vector_methods.py b/src/python/gudhi/representations/vector_methods.py
index e1402aea..ce74aee5 100644
--- a/src/python/gudhi/representations/vector_methods.py
+++ b/src/python/gudhi/representations/vector_methods.py
@@ -138,13 +138,13 @@ def _trim_endpoints(x, are_endpoints_nan):
 
 
 def _grid_from_sample_range(self, X):
-    sample_range = np.array(self.sample_range_init)
+    sample_range = np.array(self.sample_range)
     self.nan_in_range = np.isnan(sample_range)
     self.new_resolution = self.resolution
     if not self.keep_endpoints:
         self.new_resolution += self.nan_in_range.sum()
-    self.sample_range = _automatic_sample_range(sample_range, X)
-    self.grid_ = np.linspace(self.sample_range[0], self.sample_range[1], self.new_resolution)
+    self.sample_range_fixed = _automatic_sample_range(sample_range, X)
+    self.grid_ = np.linspace(self.sample_range_fixed[0], self.sample_range_fixed[1], self.new_resolution)
     if not self.keep_endpoints:
         self.grid_ = _trim_endpoints(self.grid_, self.nan_in_range)
 
@@ -166,7 +166,7 @@ class Landscape(BaseEstimator, TransformerMixin):
             sample_range ([double, double]): minimum and maximum of all piecewise-linear function domains, of the form [x_min, x_max] (default [numpy.nan, numpy.nan]). It is the interval on which samples will be drawn evenly. If one of the values is numpy.nan, it can be computed from the persistence diagrams with the fit() method.
             keep_endpoints (bool): when computing `sample_range`, use the exact extremities (where the value is always 0). This is mostly useful for plotting, the default is to use a slightly smaller range.
         """
-        self.num_landscapes, self.resolution, self.sample_range_init = num_landscapes, resolution, sample_range
+        self.num_landscapes, self.resolution, self.sample_range = num_landscapes, resolution, sample_range
         self.keep_endpoints = keep_endpoints
 
     def fit(self, X, y=None):
@@ -240,7 +240,7 @@ class Silhouette(BaseEstimator, TransformerMixin):
             sample_range ([double, double]): minimum and maximum for the weighted average domain, of the form [x_min, x_max] (default [numpy.nan, numpy.nan]). It is the interval on which samples will be drawn evenly. If one of the values is numpy.nan, it can be computed from the persistence diagrams with the fit() method.
             keep_endpoints (bool): when computing `sample_range`, use the exact extremities (where the value is always 0). This is mostly useful for plotting, the default is to use a slightly smaller range.
         """
-        self.weight, self.resolution, self.sample_range_init = weight, resolution, sample_range
+        self.weight, self.resolution, self.sample_range = weight, resolution, sample_range
         self.keep_endpoints = keep_endpoints
 
     def fit(self, X, y=None):
@@ -334,7 +334,7 @@ class BettiCurve(BaseEstimator, TransformerMixin):
 
         self.predefined_grid = predefined_grid
         self.resolution = resolution
-        self.sample_range_init = sample_range
+        self.sample_range = sample_range
         self.keep_endpoints = keep_endpoints
 
     def is_fitted(self):
@@ -468,7 +468,7 @@ class Entropy(BaseEstimator, TransformerMixin):
             sample_range ([double, double]): minimum and maximum of the entropy summary function domain, of the form [x_min, x_max] (default [numpy.nan, numpy.nan]). It is the interval on which samples will be drawn evenly. If one of the values is numpy.nan, it can be computed from the persistence diagrams with the fit() method. Used only if **mode** = "vector".
             keep_endpoints (bool): when computing `sample_range`, use the exact extremities. This is mostly useful for plotting, the default is to use a slightly smaller range.
         """
-        self.mode, self.normalized, self.resolution, self.sample_range_init = mode, normalized, resolution, sample_range
+        self.mode, self.normalized, self.resolution, self.sample_range = mode, normalized, resolution, sample_range
         self.keep_endpoints = keep_endpoints
 
     def fit(self, X, y=None):
@@ -509,8 +509,8 @@ class Entropy(BaseEstimator, TransformerMixin):
                 ent = np.zeros(self.resolution)
                 for j in range(num_pts_in_diag):
                     [px,py] = orig_diagram[j,:2]
-                    min_idx = np.clip(np.ceil((px - self.sample_range[0]) / self.step_).astype(int), 0, self.resolution)
-                    max_idx = np.clip(np.ceil((py - self.sample_range[0]) / self.step_).astype(int), 0, self.resolution)
+                    min_idx = np.clip(np.ceil((px - self.sample_range_fixed[0]) / self.step_).astype(int), 0, self.resolution)
+                    max_idx = np.clip(np.ceil((py - self.sample_range_fixed[0]) / self.step_).astype(int), 0, self.resolution)
                     ent[min_idx:max_idx]-=p[j]*np.log(p[j])
                 if self.normalized:
                     ent = ent / np.linalg.norm(ent, ord=1)
@@ -710,17 +710,17 @@ class Atol(BaseEstimator, TransformerMixin):
     >>> b = np.array([[4, 2, 0], [4, 4, 0], [4, 0, 2]])
     >>> c = np.array([[3, 2, -1], [1, 2, -1]])
     >>> atol_vectoriser = Atol(quantiser=KMeans(n_clusters=2, random_state=202006))
-    >>> atol_vectoriser.fit(X=[a, b, c]).centers # doctest: +SKIP
-    >>> # array([[ 2.        ,  0.66666667,  3.33333333],
-    >>> #        [ 2.6       ,  2.8       , -0.4       ]])
+    >>> atol_vectoriser.fit(X=[a, b, c]).centers
+    array([[ 2.6       ,  2.8       , -0.4       ],
+           [ 2.        ,  0.66666667,  3.33333333]])
     >>> atol_vectoriser(a)
-    >>> # array([1.18168665, 0.42375966]) # doctest: +SKIP
+    array([0.42375966, 1.18168665])
     >>> atol_vectoriser(c)
-    >>> # array([0.02062512, 1.25157463]) # doctest: +SKIP
-    >>> atol_vectoriser.transform(X=[a, b, c]) # doctest: +SKIP
-    >>> # array([[1.18168665, 0.42375966],
-    >>> #        [0.29861028, 1.06330156],
-    >>> #        [0.02062512, 1.25157463]])
+    array([1.25157463, 0.02062512])
+    >>> atol_vectoriser.transform(X=[a, b, c])
+    array([[0.42375966, 1.18168665],
+           [1.06330156, 0.29861028],
+           [1.25157463, 0.02062512]])
     """
     # Note the example above must be up to date with the one in tests called test_atol_doc
     def __init__(self, quantiser, weighting_method="cloud", contrast="gaussian"):
@@ -771,6 +771,8 @@ class Atol(BaseEstimator, TransformerMixin):
         measures_concat = np.concatenate(X)
         self.quantiser.fit(X=measures_concat, sample_weight=sample_weight)
         self.centers = self.quantiser.cluster_centers_
+        # Hack, but some people are unhappy if the order depends on the version of sklearn
+        self.centers = self.centers[np.lexsort(self.centers.T)]
         if self.quantiser.n_clusters == 1:
             dist_centers = pairwise.pairwise_distances(measures_concat)
             np.fill_diagonal(dist_centers, 0)
diff --git a/src/python/test/test_representations.py b/src/python/test/test_representations.py
index ae0362f8..f4ffbdc1 100755
--- a/src/python/test/test_representations.py
+++ b/src/python/test/test_representations.py
@@ -249,7 +249,7 @@ def test_landscape_nan_range():
     dgm = np.array([[2., 6.], [3., 5.]])
     lds = Landscape(num_landscapes=2, resolution=9, sample_range=[np.nan, 6.])
     lds_dgm = lds(dgm)
-    assert (lds.sample_range[0] == 2) & (lds.sample_range[1] == 6)
+    assert (lds.sample_range_fixed[0] == 2) & (lds.sample_range_fixed[1] == 6)
     assert lds.new_resolution == 10
 
 def test_endpoints():
@@ -263,3 +263,7 @@ def test_endpoints():
     vec = BettiCurve(resolution=None)
     vec.fit(diags)
     assert np.equal(vec.grid_, [-np.inf, 2., 3.]).all()
+
+def test_get_params():
+    for vec in [ Landscape(), Silhouette(), BettiCurve(), Entropy(mode="vector") ]:
+        vec.get_params()
author	MathieuCarriere <mathieu.carriere3@gmail.com>	2023-01-06 08:02:13 +0100
committer	MathieuCarriere <mathieu.carriere3@gmail.com>	2023-01-06 08:02:13 +0100
commit	79464e39d3edc9df8cf4def807479900d3c7b125 (patch)
tree	f9b2809a3d9001edc89bebf1d26f11ae65160772 /src
parent	81d8de84fedfc26032a8bf38aa8898d82db32a12 (diff)
parent	b6171a12cfdeb26aa782a9dfe5d58177ca2239fb (diff)