From 4a64eef12722de3faa8ac73416aaea91658e20b6 Mon Sep 17 00:00:00 2001
From: ROUVREAU Vincent <vincent.rouvreau@inria.fr>
Date: Tue, 1 Jun 2021 19:12:50 +0200
Subject: Add cubical scikit learn interface documentation and example

---
 src/python/doc/cubical_complex_user.rst | 58 ++++++++++++++++++++++++++++-----
 1 file changed, 50 insertions(+), 8 deletions(-)

(limited to 'src/python/doc/cubical_complex_user.rst')

diff --git a/src/python/doc/cubical_complex_user.rst b/src/python/doc/cubical_complex_user.rst
index 6a211347..12971243 100644
--- a/src/python/doc/cubical_complex_user.rst
+++ b/src/python/doc/cubical_complex_user.rst
@@ -7,14 +7,19 @@ Cubical complex user manual
 Definition
 ----------
 
-=====================================  =====================================  =====================================
-:Author: Pawel Dlotko                  :Since: GUDHI PYTHON 2.0.0             :License: GPL v3
-=====================================  =====================================  =====================================
+.. list-table::
+   :widths: 25 50 25
+   :header-rows: 0
+
+   * - :Author: Pawel Dlotko
+     - :Since: GUDHI 2.0.0
+     - :License: MIT
+   * - :doc:`cubical_complex_user`
+     - * :doc:`cubical_complex_ref`
+       * :doc:`periodic_cubical_complex_ref`
+       * :doc:`cubical_complex_sklearn_itf_ref`
+     -
 
-+---------------------------------------------+----------------------------------------------------------------------+
-| :doc:`cubical_complex_user`                 | * :doc:`cubical_complex_ref`                                         |
-|                                             | * :doc:`periodic_cubical_complex_ref`                                |
-+---------------------------------------------+----------------------------------------------------------------------+
 
 The cubical complex is an example of a structured complex useful in computational mathematics (specially rigorous
 numerics) and image analysis.
@@ -163,4 +168,41 @@ Tutorial
 --------
 
 This `notebook <https://github.com/GUDHI/TDA-tutorial/blob/master/Tuto-GUDHI-cubical-complexes.ipynb>`_
-explains how to represent sublevels sets of functions using cubical complexes.
\ No newline at end of file
+explains how to represent sublevels sets of functions using cubical complexes.
+
+Scikit-learn like interface example
+-----------------------------------
+
+.. plot::
+   :include-source:
+
+    # Standard scientific Python imports
+    import matplotlib.pyplot as plt
+    from sklearn import datasets
+    
+    # Import cubical persistence computation scikit-learn interfaces
+    from gudhi.sklearn.cubical_persistence import CubicalPersistence
+    # Import persistence representation
+    from gudhi.representations import PersistenceImage, DiagramSelector
+    
+    # Get the first 10 images from scikit-learn hand digits dataset
+    digits = datasets.load_digits().images[:10]
+    targets = datasets.load_digits().target[:10]
+    
+    # TDA pipeline
+    cub = CubicalPersistence(persistence_dim = 0, n_jobs=-2)
+    diags = cub.fit_transform(digits)
+    
+    finite = DiagramSelector(use=True, point_type="finite")
+    finite_diags = finite.fit_transform(diags)
+    
+    persim = PersistenceImage(im_range=[0,16,0,16], resolution=[16, 16])
+    pers_images = persim.fit_transform(finite_diags)
+    
+    # Display persistence images
+    _, axes = plt.subplots(nrows=1, ncols=10, figsize=(15, 3))
+    for ax, image, label in zip(axes, pers_images, targets):
+        ax.set_axis_off()
+        ax.imshow(image.reshape(16, 16), cmap=plt.cm.gray_r, interpolation='nearest')
+        ax.set_title('Target: %i' % label)
+    plt.show()
-- 
cgit v1.2.3


From 546b059af6c0581d06bfe9cebbe853f2f7bd4589 Mon Sep 17 00:00:00 2001
From: ROUVREAU Vincent <vincent.rouvreau@inria.fr>
Date: Fri, 4 Jun 2021 11:56:59 +0200
Subject: Add a more relevant example inspired from
 https://dioscuri-tda.org/Paris_TDA_Tutorial_2021.html

---
 src/python/doc/cubical_complex_user.rst | 66 +++++++++++++++++++++------------
 1 file changed, 43 insertions(+), 23 deletions(-)

(limited to 'src/python/doc/cubical_complex_user.rst')

diff --git a/src/python/doc/cubical_complex_user.rst b/src/python/doc/cubical_complex_user.rst
index 12971243..ebecb592 100644
--- a/src/python/doc/cubical_complex_user.rst
+++ b/src/python/doc/cubical_complex_user.rst
@@ -173,36 +173,56 @@ explains how to represent sublevels sets of functions using cubical complexes.
 Scikit-learn like interface example
 -----------------------------------
 
-.. plot::
-   :include-source:
+.. code-block:: python
 
     # Standard scientific Python imports
-    import matplotlib.pyplot as plt
-    from sklearn import datasets
+    import numpy as np
+    # Standard scikit-learn imports
+    from sklearn.datasets import fetch_openml
+    from sklearn.pipeline import Pipeline
+    from sklearn.model_selection import train_test_split
+    from sklearn.svm import SVC
+    from sklearn import metrics
     
-    # Import cubical persistence computation scikit-learn interfaces
+    # Import TDA pipeline requirements
     from gudhi.sklearn.cubical_persistence import CubicalPersistence
-    # Import persistence representation
     from gudhi.representations import PersistenceImage, DiagramSelector
     
-    # Get the first 10 images from scikit-learn hand digits dataset
-    digits = datasets.load_digits().images[:10]
-    targets = datasets.load_digits().target[:10]
+    X, y = fetch_openml('mnist_784', version=1, return_X_y=True, as_frame=False)
     
-    # TDA pipeline
-    cub = CubicalPersistence(persistence_dim = 0, n_jobs=-2)
-    diags = cub.fit_transform(digits)
+    # Target is: "is an eight ?"
+    y = (y == '8') * 1
+    print('There are', np.sum(y), 'eights out of', len(y), 'numbers.')
     
-    finite = DiagramSelector(use=True, point_type="finite")
-    finite_diags = finite.fit_transform(diags)
+    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.4, random_state=0)
+    pipe = Pipeline([('cub_pers', CubicalPersistence(persistence_dim = 0, dimensions=[28,28], n_jobs=-2)),
+                     ('finite_diags', DiagramSelector(use=True, point_type="finite")),
+                     ('pers_img', PersistenceImage(bandwidth=50,
+                                                   weight=lambda x: x[1]**2,
+                                                   im_range=[0,256,0,256],
+                                                   resolution=[20,20])),
+                     ('svc', SVC())])
     
-    persim = PersistenceImage(im_range=[0,16,0,16], resolution=[16, 16])
-    pers_images = persim.fit_transform(finite_diags)
+    predicted = pipe.predict(X_test)
     
-    # Display persistence images
-    _, axes = plt.subplots(nrows=1, ncols=10, figsize=(15, 3))
-    for ax, image, label in zip(axes, pers_images, targets):
-        ax.set_axis_off()
-        ax.imshow(image.reshape(16, 16), cmap=plt.cm.gray_r, interpolation='nearest')
-        ax.set_title('Target: %i' % label)
-    plt.show()
+    print(f"Classification report for TDA pipeline {pipe}:\n"
+          f"{metrics.classification_report(y_test, predicted)}\n")
+
+.. code-block:: none
+
+    There are 6825 eights out of 70000 numbers.
+    Classification report for TDA pipeline Pipeline(steps=[('cub_pers',
+                     CubicalPersistence(dimensions=[28, 28], n_jobs=-2)),
+                    ('finite_diags', DiagramSelector(use=True)),
+                    ('pers_img',
+                     PersistenceImage(bandwidth=50, im_range=[0, 256, 0, 256],
+                                      weight=<function <lambda> at 0x7f3e54137ae8>)),
+                    ('svc', SVC())]):
+                  precision    recall  f1-score   support
+    
+               0       0.97      0.99      0.98     25284
+               1       0.92      0.68      0.78      2716
+    
+        accuracy                           0.96     28000
+       macro avg       0.94      0.84      0.88     28000
+    weighted avg       0.96      0.96      0.96     28000
\ No newline at end of file
-- 
cgit v1.2.3


From b7de9c211e9cfe361aa7bba9be32b88570972c38 Mon Sep 17 00:00:00 2001
From: ROUVREAU Vincent <vincent.rouvreau@inria.fr>
Date: Mon, 7 Jun 2021 14:57:02 +0200
Subject: Improve documentation

---
 src/python/doc/cubical_complex_user.rst         | 45 ++++++++++++++++++-------
 src/python/gudhi/sklearn/cubical_persistence.py | 18 ++++++++--
 2 files changed, 48 insertions(+), 15 deletions(-)

(limited to 'src/python/doc/cubical_complex_user.rst')

diff --git a/src/python/doc/cubical_complex_user.rst b/src/python/doc/cubical_complex_user.rst
index ebecb592..3fd9fd84 100644
--- a/src/python/doc/cubical_complex_user.rst
+++ b/src/python/doc/cubical_complex_user.rst
@@ -173,10 +173,24 @@ explains how to represent sublevels sets of functions using cubical complexes.
 Scikit-learn like interface example
 -----------------------------------
 
+In this example, hand written digits are used as an input.
+a TDA scikit-learn pipeline is constructed and is composed of:
+
+#. :class:`~gudhi.sklearn.cubical_persistence.CubicalPersistence` that builds a cubical complex from the inputs and
+   returns its persistence diagrams
+#. :class:`~gudhi.representations.DiagramSelector` that removes non-finite persistence diagrams values
+#. :class:`~gudhi.representations.PersistenceImage` that builds the persistence images from persistence diagrams
+#. `SVC <https://scikit-learn.org/stable/modules/generated/sklearn.svm.SVC.html>`_ which is a scikit-learn support
+   vector classifier.
+
+This ML pipeline is trained to detect if the hand written digit is an '8' or not, thanks to the fact that an '8' has
+two holes in :math:`\mathbf{H}_1`, or, like in this example, three connected components in :math:`\mathbf{H}_0`.
+
 .. code-block:: python
 
     # Standard scientific Python imports
     import numpy as np
+    
     # Standard scikit-learn imports
     from sklearn.datasets import fetch_openml
     from sklearn.pipeline import Pipeline
@@ -188,25 +202,32 @@ Scikit-learn like interface example
     from gudhi.sklearn.cubical_persistence import CubicalPersistence
     from gudhi.representations import PersistenceImage, DiagramSelector
     
-    X, y = fetch_openml('mnist_784', version=1, return_X_y=True, as_frame=False)
+    X, y = fetch_openml("mnist_784", version=1, return_X_y=True, as_frame=False)
     
     # Target is: "is an eight ?"
-    y = (y == '8') * 1
-    print('There are', np.sum(y), 'eights out of', len(y), 'numbers.')
+    y = (y == "8") * 1
+    print("There are", np.sum(y), "eights out of", len(y), "numbers.")
     
     X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.4, random_state=0)
-    pipe = Pipeline([('cub_pers', CubicalPersistence(persistence_dim = 0, dimensions=[28,28], n_jobs=-2)),
-                     ('finite_diags', DiagramSelector(use=True, point_type="finite")),
-                     ('pers_img', PersistenceImage(bandwidth=50,
-                                                   weight=lambda x: x[1]**2,
-                                                   im_range=[0,256,0,256],
-                                                   resolution=[20,20])),
-                     ('svc', SVC())])
+    pipe = Pipeline(
+        [
+            ("cub_pers", CubicalPersistence(persistence_dim=0, dimensions=[28, 28], n_jobs=-2)),
+            ("finite_diags", DiagramSelector(use=True, point_type="finite")),
+            (
+                "pers_img",
+                PersistenceImage(bandwidth=50, weight=lambda x: x[1] ** 2, im_range=[0, 256, 0, 256], resolution=[20, 20]),
+            ),
+            ("svc", SVC()),
+        ]
+    )
     
+    # Learn from the train subset
+    pipe.fit(X_train, y_train)
+    # Predict from the test subset
     predicted = pipe.predict(X_test)
     
-    print(f"Classification report for TDA pipeline {pipe}:\n"
-          f"{metrics.classification_report(y_test, predicted)}\n")
+    print(f"Classification report for TDA pipeline {pipe}:\n" f"{metrics.classification_report(y_test, predicted)}\n")
+
 
 .. code-block:: none
 
diff --git a/src/python/gudhi/sklearn/cubical_persistence.py b/src/python/gudhi/sklearn/cubical_persistence.py
index f4341bf6..251e240f 100644
--- a/src/python/gudhi/sklearn/cubical_persistence.py
+++ b/src/python/gudhi/sklearn/cubical_persistence.py
@@ -1,3 +1,12 @@
+# This file is part of the Gudhi Library - https://gudhi.inria.fr/ - which is released under MIT.
+# See file LICENSE or go to https://gudhi.inria.fr/licensing/ for full license details.
+# Author(s):       Vincent Rouvreau
+#
+# Copyright (C) 2021 Inria
+#
+# Modification(s):
+#   - YYYY/MM Author: Description of the modification
+
 from .. import CubicalComplex
 from sklearn.base import BaseEstimator, TransformerMixin
 
@@ -17,7 +26,8 @@ class CubicalPersistence(BaseEstimator, TransformerMixin):
         Constructor for the CubicalPersistence class.
 
         Parameters:
-            dimensions (list of int): A list of number of top dimensional cells.
+            dimensions (list of int): A list of number of top dimensional cells if cells filtration values will require
+                to be reshaped (cf. :func:`~gudhi.sklearn.cubical_persistence.CubicalPersistence.transform`)
             persistence_dim (int): The returned persistence diagrams dimension. Default value is `0`.
             min_persistence (float): The minimum persistence value to take into account (strictly greater than
                 `min_persistence`). Default value is `0.0`. Sets `min_persistence` to `-1.0` to see all values.
@@ -39,7 +49,7 @@ class CubicalPersistence(BaseEstimator, TransformerMixin):
 
     def fit(self, X, Y=None):
         """
-        Nothing to be done.
+        Nothing to be done, but useful when included in a scikit-learn Pipeline.
         """
         return self
 
@@ -56,7 +66,9 @@ class CubicalPersistence(BaseEstimator, TransformerMixin):
         Compute all the cubical complexes and their associated persistence diagrams.
 
         Parameters:
-            X (list of list of double OR list of numpy.ndarray): List of cells filtration values.
+            X (list of list of double OR list of numpy.ndarray): List of cells filtration values that can be flatten if
+                dimensions is set in the constructor, or already with the correct shape in a numpy.ndarray (and
+                dimensions must not be set).
 
         Returns:
             Persistence diagrams
-- 
cgit v1.2.3


From 5c35605763273cb34efe4227b6d748992e99ab09 Mon Sep 17 00:00:00 2001
From: ROUVREAU Vincent <vincent.rouvreau@inria.fr>
Date: Mon, 9 Aug 2021 10:38:31 +0200
Subject: Make CubicalPersistence returns all dimensions. Post processing
 DimensionSelector can select the desired dimension

---
 src/python/CMakeLists.txt                          |  1 +
 src/python/doc/cubical_complex_user.rst            |  2 +-
 src/python/gudhi/sklearn/cubical_persistence.py    | 49 +++++++++++++----
 src/python/gudhi/sklearn/post_processing.py        | 61 ++++++++++++++++++++++
 .../test/test_sklearn_cubical_persistence.py       | 21 ++++++--
 src/python/test/test_sklearn_post_processing.py    | 48 +++++++++++++++++
 6 files changed, 167 insertions(+), 15 deletions(-)
 create mode 100644 src/python/gudhi/sklearn/post_processing.py
 create mode 100644 src/python/test/test_sklearn_post_processing.py

(limited to 'src/python/doc/cubical_complex_user.rst')

diff --git a/src/python/CMakeLists.txt b/src/python/CMakeLists.txt
index a91aab37..b38bb9aa 100644
--- a/src/python/CMakeLists.txt
+++ b/src/python/CMakeLists.txt
@@ -546,6 +546,7 @@ if(PYTHONINTERP_FOUND)
     # sklearn
     if(SKLEARN_FOUND)
       add_gudhi_py_test(test_sklearn_cubical_persistence)
+      add_gudhi_py_test(test_sklearn_post_processing)
     endif()
 
 
diff --git a/src/python/doc/cubical_complex_user.rst b/src/python/doc/cubical_complex_user.rst
index 3fd9fd84..a140a279 100644
--- a/src/python/doc/cubical_complex_user.rst
+++ b/src/python/doc/cubical_complex_user.rst
@@ -211,7 +211,7 @@ two holes in :math:`\mathbf{H}_1`, or, like in this example, three connected com
     X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.4, random_state=0)
     pipe = Pipeline(
         [
-            ("cub_pers", CubicalPersistence(persistence_dim=0, dimensions=[28, 28], n_jobs=-2)),
+            ("cub_pers", CubicalPersistence(only_this_dim=0, dimensions=[28, 28], n_jobs=-2)),
             ("finite_diags", DiagramSelector(use=True, point_type="finite")),
             (
                 "pers_img",
diff --git a/src/python/gudhi/sklearn/cubical_persistence.py b/src/python/gudhi/sklearn/cubical_persistence.py
index 9af683d7..7b77000d 100644
--- a/src/python/gudhi/sklearn/cubical_persistence.py
+++ b/src/python/gudhi/sklearn/cubical_persistence.py
@@ -13,27 +13,44 @@ from sklearn.base import BaseEstimator, TransformerMixin
 # joblib is required by scikit-learn
 from joblib import Parallel, delayed
 
+# Mermaid sequence diagram - https://mermaid-js.github.io/mermaid-live-editor/
+# sequenceDiagram
+#     USER->>CubicalPersistence: fit_transform(X)
+#     CubicalPersistence->>thread1: _tranform(X[0])
+#     CubicalPersistence->>thread2: _tranform(X[1])
+#     Note right of CubicalPersistence: ...
+#     thread1->>CubicalPersistence: [array( H0(X[0]) ), array( H1(X[0]) )]
+#     thread2->>CubicalPersistence: [array( H0(X[1]) ), array( H1(X[1]) )]
+#     Note right of CubicalPersistence: ...
+#     CubicalPersistence->>USER: [[array( H0(X[0]) ), array( H1(X[0]) )],<br/> [array( H0(X[1]) ), array( H1(X[1]) )],<br/> ...]
+
 
 class CubicalPersistence(BaseEstimator, TransformerMixin):
     """
     This is a class for computing the persistence diagrams from a cubical complex.
     """
 
-    def __init__(self, dimensions=None, persistence_dim=0, homology_coeff_field=11, min_persistence=0., n_jobs=None):
+    def __init__(self, dimensions=None, max_persistence_dimension=0, only_this_dim=-1, homology_coeff_field=11, min_persistence=0., n_jobs=None):
         """
         Constructor for the CubicalPersistence class.
 
         Parameters:
             dimensions (list of int): A list of number of top dimensional cells if cells filtration values will require
                 to be reshaped (cf. :func:`~gudhi.sklearn.cubical_persistence.CubicalPersistence.transform`)
-            persistence_dim (int): The returned persistence diagrams dimension. Default value is `0`.
+            max_persistence_dimension (int): The returned persistence diagrams maximal dimension. Default value is `0`.
+                Ignored if `only_this_dim` is set.
+            only_this_dim (int): The returned persistence diagrams dimension. If `only_this_dim` is set,
+                `max_persistence_dimension` will be ignored. 
+                Short circuit the use of :class:`~gudhi.sklearn.post_processing.DimensionSelector` when only one
+                dimension matters.
             homology_coeff_field (int): The homology coefficient field. Must be a prime number. Default value is 11.
             min_persistence (float): The minimum persistence value to take into account (strictly greater than
                 `min_persistence`). Default value is `0.0`. Sets `min_persistence` to `-1.0` to see all values.
             n_jobs (int): cf. https://joblib.readthedocs.io/en/latest/generated/joblib.Parallel.html
         """
         self.dimensions = dimensions
-        self.persistence_dim = persistence_dim
+        self.max_persistence_dimension = max_persistence_dimension
+        self.only_this_dim = only_this_dim
         self.homology_coeff_field = homology_coeff_field
         self.min_persistence = min_persistence
         self.n_jobs = n_jobs
@@ -49,8 +66,14 @@ class CubicalPersistence(BaseEstimator, TransformerMixin):
         cubical_complex.compute_persistence(
             homology_coeff_field=self.homology_coeff_field, min_persistence=self.min_persistence
         )
-        diagrams = cubical_complex.persistence_intervals_in_dimension(self.persistence_dim)
-        return diagrams
+        return [cubical_complex.persistence_intervals_in_dimension(dim) for dim in range(self.max_persistence_dimension + 1)]
+
+    def __transform_only_this_dim(self, cells):
+        cubical_complex = CubicalComplex(top_dimensional_cells=cells, dimensions=self.dimensions)
+        cubical_complex.compute_persistence(
+            homology_coeff_field=self.homology_coeff_field, min_persistence=self.min_persistence
+        )
+        return cubical_complex.persistence_intervals_in_dimension(self.only_this_dim)
 
     def transform(self, X, Y=None):
         """
@@ -58,12 +81,18 @@ class CubicalPersistence(BaseEstimator, TransformerMixin):
 
         Parameters:
             X (list of list of double OR list of numpy.ndarray): List of cells filtration values that can be flatten if
-                dimensions is set in the constructor, or already with the correct shape in a numpy.ndarray (and
-                dimensions must not be set).
+                `dimensions` is set in the constructor, or already with the correct shape in a numpy.ndarray (and
+                `dimensions` must not be set).
 
         Returns:
-            Persistence diagrams
+            Persistence diagrams in the format:
+            - If `only_this_dim` was set to `n`: `[array( Hn(X[0]) ), array( Hn(X[1]) ), ...]` 
+            - else: `[[array( H0(X[0]) ), array( H1(X[0]) ), ...], [array( H0(X[1]) ), array( H1(X[1]) ), ...], ...]` 
         """
 
-        # threads is preferred as cubical construction and persistence computation releases the GIL
-        return Parallel(n_jobs=self.n_jobs, prefer="threads")(delayed(self.__transform)(cells) for cells in X)
+        if self.only_this_dim == -1:
+            # threads is preferred as cubical construction and persistence computation releases the GIL
+            return Parallel(n_jobs=self.n_jobs, prefer="threads")(delayed(self.__transform)(cells) for cells in X)
+        else:
+            # threads is preferred as cubical construction and persistence computation releases the GIL
+            return Parallel(n_jobs=self.n_jobs, prefer="threads")(delayed(self.__transform_only_this_dim)(cells) for cells in X)
diff --git a/src/python/gudhi/sklearn/post_processing.py b/src/python/gudhi/sklearn/post_processing.py
new file mode 100644
index 00000000..79276e1e
--- /dev/null
+++ b/src/python/gudhi/sklearn/post_processing.py
@@ -0,0 +1,61 @@
+# This file is part of the Gudhi Library - https://gudhi.inria.fr/ - which is released under MIT.
+# See file LICENSE or go to https://gudhi.inria.fr/licensing/ for full license details.
+# Author(s):       Vincent Rouvreau
+#
+# Copyright (C) 2021 Inria
+#
+# Modification(s):
+#   - YYYY/MM Author: Description of the modification
+
+from sklearn.base import BaseEstimator, TransformerMixin
+
+# joblib is required by scikit-learn
+from joblib import Parallel, delayed
+
+# Mermaid sequence diagram - https://mermaid-js.github.io/mermaid-live-editor/
+# sequenceDiagram
+#     USER->>DimensionSelector: fit_transform(<br/>[[array( H0(X0) ), array( H1(X0) ), ...],<br/> [array( H0(X1) ), array( H1(X1) ), ...],<br/> ...])
+#     DimensionSelector->>thread1: _transform([array( H0(X0) ), array( H1(X0) )], ...)
+#     DimensionSelector->>thread2: _transform([array( H0(X1) ), array( H1(X1) )], ...)
+#     Note right of DimensionSelector: ...
+#     thread1->>DimensionSelector: array( Hn(X0) )
+#     thread2->>DimensionSelector: array( Hn(X1) )
+#     Note right of DimensionSelector: ...
+#     DimensionSelector->>USER: [array( Hn(X0) ), <br/> array( Hn(X1) ), <br/> ...]
+
+
+class DimensionSelector(BaseEstimator, TransformerMixin):
+    """
+    This is a class to select persistence diagrams in a specific dimension.
+    """
+
+    def __init__(self, persistence_dimension=0, n_jobs=None):
+        """
+        Constructor for the DimensionSelector class.
+
+        Parameters:
+            persistence_dimension (int): The returned persistence diagrams dimension. Default value is `0`.
+        """
+        self.persistence_dimension = persistence_dimension
+        self.n_jobs = n_jobs
+
+    def fit(self, X, Y=None):
+        """
+        Nothing to be done, but useful when included in a scikit-learn Pipeline.
+        """
+        return self
+
+    def transform(self, X, Y=None):
+        """
+        Select persistence diagrams from its dimension.
+
+        Parameters:
+            X (list of list of pairs): List of list of persistence pairs, i.e.
+            `[[array( H0(X0) ), array( H1(X0) ), ...], [array( H0(X1) ), array( H1(X1) ), ...], ...]` 
+
+        Returns:
+            Persistence diagrams in a specific dimension, i.e.
+            `[array( Hn(X0) ), array( Hn(X1), ...]`
+        """
+
+        return [persistence[self.persistence_dimension] for persistence in X]
diff --git a/src/python/test/test_sklearn_cubical_persistence.py b/src/python/test/test_sklearn_cubical_persistence.py
index c0082547..506985f1 100644
--- a/src/python/test/test_sklearn_cubical_persistence.py
+++ b/src/python/test/test_sklearn_cubical_persistence.py
@@ -16,17 +16,30 @@ __author__ = "Vincent Rouvreau"
 __copyright__ = "Copyright (C) 2021 Inria"
 __license__ = "MIT"
 
+CUBICAL_PERSISTENCE_H0_IMG0 = np.array([[0., 6.], [0., 8.], [ 0., np.inf]])
+
 def test_simple_constructor_from_top_cells():
     cells = datasets.load_digits().images[0]
-    cp = CubicalPersistence(persistence_dim = 0)
+    cp = CubicalPersistence(only_this_dim = 0)
     np.testing.assert_array_equal(cp._CubicalPersistence__transform(cells),
-                                  np.array([[0., 6.], [0., 8.], [ 0., np.inf]]))
+                                  [CUBICAL_PERSISTENCE_H0_IMG0])
+    cp = CubicalPersistence(max_persistence_dimension = 2)
+    diags = cp._CubicalPersistence__transform(cells)
+    assert len(diags) == 3
+    np.testing.assert_array_equal(diags[0],
+                                  CUBICAL_PERSISTENCE_H0_IMG0)
 
 def test_simple_constructor_from_top_cells_list():
     digits = datasets.load_digits().images[:10]
-    cp = CubicalPersistence(persistence_dim = 0, n_jobs=-2)
+    cp = CubicalPersistence(only_this_dim = 0, n_jobs=-2)
 
     diags = cp.fit_transform(digits)
     assert len(diags) == 10
     np.testing.assert_array_equal(diags[0],
-                                  np.array([[0., 6.], [0., 8.], [ 0., np.inf]]))
+                                  CUBICAL_PERSISTENCE_H0_IMG0)
+
+    cp = CubicalPersistence(max_persistence_dimension = 1, n_jobs=-1)
+    diagsH0H1 = cp.fit_transform(digits)
+    assert len(diagsH0H1) == 10
+    for idx in range(10):
+        np.testing.assert_array_equal(diags[idx], diagsH0H1[idx][0])
diff --git a/src/python/test/test_sklearn_post_processing.py b/src/python/test/test_sklearn_post_processing.py
new file mode 100644
index 00000000..3a251d34
--- /dev/null
+++ b/src/python/test/test_sklearn_post_processing.py
@@ -0,0 +1,48 @@
+""" This file is part of the Gudhi Library - https://gudhi.inria.fr/ - which is released under MIT.
+    See file LICENSE or go to https://gudhi.inria.fr/licensing/ for full license details.
+    Author(s):       Vincent Rouvreau
+
+    Copyright (C) 2021 Inria
+
+    Modification(s):
+      - YYYY/MM Author: Description of the modification
+"""
+
+from gudhi.sklearn.post_processing import DimensionSelector
+import numpy as np
+import pytest
+
+__author__ = "Vincent Rouvreau"
+__copyright__ = "Copyright (C) 2021 Inria"
+__license__ = "MIT"
+
+H0_0 = np.array([0., 0.])
+H1_0 = np.array([1., 0.])
+H0_1 = np.array([0., 1.])
+H1_1 = np.array([1., 1.])
+H0_2 = np.array([0., 2.])
+H1_2 = np.array([1., 2.])
+
+def test_dimension_selector():
+    X = [[H0_0, H1_0], [H0_1, H1_1], [H0_2, H1_2]]
+    ds = DimensionSelector(persistence_dimension = 0, n_jobs=-2)
+    h0 = ds.fit_transform(X)
+    np.testing.assert_array_equal(h0[0],
+                                  H0_0)
+    np.testing.assert_array_equal(h0[1],
+                                  H0_1)
+    np.testing.assert_array_equal(h0[2],
+                                  H0_2)
+    
+    ds = DimensionSelector(persistence_dimension = 1, n_jobs=-1)
+    h1 = ds.fit_transform(X)
+    np.testing.assert_array_equal(h1[0],
+                                  H1_0)
+    np.testing.assert_array_equal(h1[1],
+                                  H1_1)
+    np.testing.assert_array_equal(h1[2],
+                                  H1_2)
+
+    ds = DimensionSelector(persistence_dimension = 2, n_jobs=-2)
+    with pytest.raises(IndexError):
+        h2 = ds.fit_transform([[H0_0, H1_0], [H0_1, H1_1], [H0_2, H1_2]])
-- 
cgit v1.2.3


From f55ae9257a7006fd0906a21bd3033f47b2958c6b Mon Sep 17 00:00:00 2001
From: VincentRouvreau <vincent.rouvreau@inria.fr>
Date: Mon, 4 Oct 2021 16:46:01 +0200
Subject: review: modification proposed from EB + HM comments fix

---
 src/python/CMakeLists.txt                          |  7 ++-
 src/python/doc/cubical_complex_user.rst            |  5 +-
 src/python/gudhi/representations/preprocessing.py  | 51 ++++++++++++++++++-
 src/python/gudhi/sklearn/cubical_persistence.py    | 40 ++++++++-------
 src/python/gudhi/sklearn/post_processing.py        | 57 ----------------------
 .../test/test_representations_preprocessing.py     | 39 +++++++++++++++
 .../test/test_sklearn_cubical_persistence.py       | 16 +++---
 src/python/test/test_sklearn_post_processing.py    | 43 ----------------
 8 files changed, 123 insertions(+), 135 deletions(-)
 delete mode 100644 src/python/gudhi/sklearn/post_processing.py
 create mode 100644 src/python/test/test_representations_preprocessing.py
 delete mode 100644 src/python/test/test_sklearn_post_processing.py

(limited to 'src/python/doc/cubical_complex_user.rst')

diff --git a/src/python/CMakeLists.txt b/src/python/CMakeLists.txt
index b38bb9aa..2ff05384 100644
--- a/src/python/CMakeLists.txt
+++ b/src/python/CMakeLists.txt
@@ -519,6 +519,11 @@ if(PYTHONINTERP_FOUND)
       add_gudhi_py_test(test_representations)
     endif()
 
+    # Representations preprocessing
+    if(SKLEARN_FOUND)
+      add_gudhi_py_test(test_representations_preprocessing)
+    endif()
+
     # Time Delay
     add_gudhi_py_test(test_time_delay)
 
@@ -546,10 +551,8 @@ if(PYTHONINTERP_FOUND)
     # sklearn
     if(SKLEARN_FOUND)
       add_gudhi_py_test(test_sklearn_cubical_persistence)
-      add_gudhi_py_test(test_sklearn_post_processing)
     endif()
 
-
     # Set missing or not modules
     set(GUDHI_MODULES ${GUDHI_MODULES} "python" CACHE INTERNAL "GUDHI_MODULES")
   else(CYTHON_FOUND)
diff --git a/src/python/doc/cubical_complex_user.rst b/src/python/doc/cubical_complex_user.rst
index a140a279..e62a4395 100644
--- a/src/python/doc/cubical_complex_user.rst
+++ b/src/python/doc/cubical_complex_user.rst
@@ -211,7 +211,10 @@ two holes in :math:`\mathbf{H}_1`, or, like in this example, three connected com
     X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.4, random_state=0)
     pipe = Pipeline(
         [
-            ("cub_pers", CubicalPersistence(only_this_dim=0, dimensions=[28, 28], n_jobs=-2)),
+            ("cub_pers", CubicalPersistence(persistence_dimension=0, dimensions=[28, 28], n_jobs=-2)),
+            # Or for multiple persistence dimension computation
+            # ("cub_pers", CubicalPersistence(persistence_dimension=[0, 1], dimensions=[28, 28], n_jobs=-2)),
+            # ("H0_diags", DimensionSelector(index=0), # where index is the index in persistence_dimension array
             ("finite_diags", DiagramSelector(use=True, point_type="finite")),
             (
                 "pers_img",
diff --git a/src/python/gudhi/representations/preprocessing.py b/src/python/gudhi/representations/preprocessing.py
index a8545349..823e3954 100644
--- a/src/python/gudhi/representations/preprocessing.py
+++ b/src/python/gudhi/representations/preprocessing.py
@@ -1,10 +1,11 @@
 # This file is part of the Gudhi Library - https://gudhi.inria.fr/ - which is released under MIT.
 # See file LICENSE or go to https://gudhi.inria.fr/licensing/ for full license details.
-# Author(s):       Mathieu Carrière
+# Author(s):       Mathieu Carrière, Vincent Rouvreau
 #
 # Copyright (C) 2018-2019 Inria
 #
 # Modification(s):
+#   - 2021/10 Vincent Rouvreau: Add DimensionSelector
 #   - YYYY/MM Author: Description of the modification
 
 import numpy as np
@@ -363,3 +364,51 @@ class DiagramSelector(BaseEstimator, TransformerMixin):
             n x 2 numpy array: extracted persistence diagram.
         """
         return self.fit_transform([diag])[0]
+
+
+# Mermaid sequence diagram - https://mermaid-js.github.io/mermaid-live-editor/
+# sequenceDiagram
+#     USER->>DimensionSelector: fit_transform(<br/>[[array( Hi(X0) ), array( Hj(X0) ), ...],<br/> [array( Hi(X1) ), array( Hj(X1) ), ...],<br/> ...])
+#     DimensionSelector->>thread1: _transform([array( Hi(X0) ), array( Hj(X0) )], ...)
+#     DimensionSelector->>thread2: _transform([array( Hi(X1) ), array( Hj(X1) )], ...)
+#     Note right of DimensionSelector: ...
+#     thread1->>DimensionSelector: array( Hn(X0) )
+#     thread2->>DimensionSelector: array( Hn(X1) )
+#     Note right of DimensionSelector: ...
+#     DimensionSelector->>USER: [array( Hn(X0) ), <br/> array( Hn(X1) ), <br/> ...]
+
+class DimensionSelector(BaseEstimator, TransformerMixin):
+    """
+    This is a class to select persistence diagrams in a specific dimension from its index.
+    """
+
+    def __init__(self, index=0):
+        """
+        Constructor for the DimensionSelector class.
+
+        Parameters:
+            index (int): The returned persistence diagrams dimension index. Default value is `0`.
+        """
+        self.index = index
+
+    def fit(self, X, Y=None):
+        """
+        Nothing to be done, but useful when included in a scikit-learn Pipeline.
+        """
+        return self
+
+    def transform(self, X, Y=None):
+        """
+        Select persistence diagrams from its dimension.
+
+        Parameters:
+            X (list of list of pairs): List of list of persistence pairs, i.e.
+                `[[array( Hi(X0) ), array( Hj(X0) ), ...], [array( Hi(X1) ), array( Hj(X1) ), ...], ...]` 
+
+        Returns:
+            list of pairs:
+            Persistence diagrams in a specific dimension. i.e. if `index` was set to `m` and `Hn` is at index `n` of
+            the input, it returns `[array( Hn(X0) ), array( Hn(X1), ...]`
+        """
+
+        return [persistence[self.index] for persistence in X]
diff --git a/src/python/gudhi/sklearn/cubical_persistence.py b/src/python/gudhi/sklearn/cubical_persistence.py
index 329c9435..454cdd07 100644
--- a/src/python/gudhi/sklearn/cubical_persistence.py
+++ b/src/python/gudhi/sklearn/cubical_persistence.py
@@ -33,8 +33,7 @@ class CubicalPersistence(BaseEstimator, TransformerMixin):
     def __init__(
         self,
         dimensions=None,
-        max_persistence_dimension=0,
-        only_this_dim=-1,
+        persistence_dimension=-1,
         homology_coeff_field=11,
         min_persistence=0.0,
         n_jobs=None,
@@ -45,20 +44,16 @@ class CubicalPersistence(BaseEstimator, TransformerMixin):
         Parameters:
             dimensions (list of int): A list of number of top dimensional cells if cells filtration values will require
                 to be reshaped (cf. :func:`~gudhi.sklearn.cubical_persistence.CubicalPersistence.transform`)
-            max_persistence_dimension (int): The returned persistence diagrams maximal dimension. Default value is `0`.
-                Ignored if `only_this_dim` is set.
-            only_this_dim (int): The returned persistence diagrams dimension. If `only_this_dim` is set,
-                `max_persistence_dimension` will be ignored. 
-                Short circuit the use of :class:`~gudhi.sklearn.post_processing.DimensionSelector` when only one
-                dimension matters.
+            persistence_dimension (int or list of int): The returned persistence diagrams dimension(s).
+                Short circuit the use of :class:`~gudhi.representations.preprocessing.DimensionSelector` when only one
+                dimension matters (in other words, when `persistence_dimension` is an int).
             homology_coeff_field (int): The homology coefficient field. Must be a prime number. Default value is 11.
             min_persistence (float): The minimum persistence value to take into account (strictly greater than
-                `min_persistence`). Default value is `0.0`. Sets `min_persistence` to `-1.0` to see all values.
+                `min_persistence`). Default value is `0.0`. Set `min_persistence` to `-1.0` to see all values.
             n_jobs (int): cf. https://joblib.readthedocs.io/en/latest/generated/joblib.Parallel.html
         """
         self.dimensions = dimensions
-        self.max_persistence_dimension = max_persistence_dimension
-        self.only_this_dim = only_this_dim
+        self.persistence_dimension = persistence_dimension
         self.homology_coeff_field = homology_coeff_field
         self.min_persistence = min_persistence
         self.n_jobs = n_jobs
@@ -75,7 +70,7 @@ class CubicalPersistence(BaseEstimator, TransformerMixin):
             homology_coeff_field=self.homology_coeff_field, min_persistence=self.min_persistence
         )
         return [
-            cubical_complex.persistence_intervals_in_dimension(dim) for dim in range(self.max_persistence_dimension + 1)
+            cubical_complex.persistence_intervals_in_dimension(dim) for dim in self.persistence_dimension
         ]
 
     def __transform_only_this_dim(self, cells):
@@ -83,28 +78,31 @@ class CubicalPersistence(BaseEstimator, TransformerMixin):
         cubical_complex.compute_persistence(
             homology_coeff_field=self.homology_coeff_field, min_persistence=self.min_persistence
         )
-        return cubical_complex.persistence_intervals_in_dimension(self.only_this_dim)
+        return cubical_complex.persistence_intervals_in_dimension(self.persistence_dimension)
 
     def transform(self, X, Y=None):
         """
         Compute all the cubical complexes and their associated persistence diagrams.
 
         Parameters:
-            X (list of list of double OR list of numpy.ndarray): List of cells filtration values that can be flatten if
-                `dimensions` is set in the constructor, or already with the correct shape in a numpy.ndarray (and
+            X (list of list of double OR list of numpy.ndarray): List of cells filtration values that should be flatten
+                if `dimensions` is set in the constructor, or already with the correct shape in a numpy.ndarray (and
                 `dimensions` must not be set).
 
         Returns:
+            list of pairs or list of list of pairs:
             Persistence diagrams in the format:
-            - If `only_this_dim` was set to `n`: `[array( Hn(X[0]) ), array( Hn(X[1]) ), ...]` 
-            - else: `[[array( H0(X[0]) ), array( H1(X[0]) ), ...], [array( H0(X[1]) ), array( H1(X[1]) ), ...], ...]` 
+              - If `persistence_dimension` was set to `n`: `[array( Hn(X[0]) ), array( Hn(X[1]) ), ...]` 
+              - If `persistence_dimension` was set to `[i, j]`: `[[array( Hi(X[0]) ), array( Hj(X[0]) )], [array( Hi(X[1]) ), array( Hj(X[1]) )], ...]`
         """
 
-        if self.only_this_dim == -1:
-            # threads is preferred as cubical construction and persistence computation releases the GIL
-            return Parallel(n_jobs=self.n_jobs, prefer="threads")(delayed(self.__transform)(cells) for cells in X)
-        else:
+        # Depends on persistence_dimension is an integer or a list of integer (else case)
+        if isinstance(self.persistence_dimension, int):
             # threads is preferred as cubical construction and persistence computation releases the GIL
             return Parallel(n_jobs=self.n_jobs, prefer="threads")(
                 delayed(self.__transform_only_this_dim)(cells) for cells in X
             )
+        else:
+            # threads is preferred as cubical construction and persistence computation releases the GIL
+            return Parallel(n_jobs=self.n_jobs, prefer="threads")(delayed(self.__transform)(cells) for cells in X)
+
diff --git a/src/python/gudhi/sklearn/post_processing.py b/src/python/gudhi/sklearn/post_processing.py
deleted file mode 100644
index 3b12466b..00000000
--- a/src/python/gudhi/sklearn/post_processing.py
+++ /dev/null
@@ -1,57 +0,0 @@
-# This file is part of the Gudhi Library - https://gudhi.inria.fr/ - which is released under MIT.
-# See file LICENSE or go to https://gudhi.inria.fr/licensing/ for full license details.
-# Author(s):       Vincent Rouvreau
-#
-# Copyright (C) 2021 Inria
-#
-# Modification(s):
-#   - YYYY/MM Author: Description of the modification
-
-from sklearn.base import BaseEstimator, TransformerMixin
-
-# Mermaid sequence diagram - https://mermaid-js.github.io/mermaid-live-editor/
-# sequenceDiagram
-#     USER->>DimensionSelector: fit_transform(<br/>[[array( H0(X0) ), array( H1(X0) ), ...],<br/> [array( H0(X1) ), array( H1(X1) ), ...],<br/> ...])
-#     DimensionSelector->>thread1: _transform([array( H0(X0) ), array( H1(X0) )], ...)
-#     DimensionSelector->>thread2: _transform([array( H0(X1) ), array( H1(X1) )], ...)
-#     Note right of DimensionSelector: ...
-#     thread1->>DimensionSelector: array( Hn(X0) )
-#     thread2->>DimensionSelector: array( Hn(X1) )
-#     Note right of DimensionSelector: ...
-#     DimensionSelector->>USER: [array( Hn(X0) ), <br/> array( Hn(X1) ), <br/> ...]
-
-
-class DimensionSelector(BaseEstimator, TransformerMixin):
-    """
-    This is a class to select persistence diagrams in a specific dimension.
-    """
-
-    def __init__(self, persistence_dimension=0):
-        """
-        Constructor for the DimensionSelector class.
-
-        Parameters:
-            persistence_dimension (int): The returned persistence diagrams dimension. Default value is `0`.
-        """
-        self.persistence_dimension = persistence_dimension
-
-    def fit(self, X, Y=None):
-        """
-        Nothing to be done, but useful when included in a scikit-learn Pipeline.
-        """
-        return self
-
-    def transform(self, X, Y=None):
-        """
-        Select persistence diagrams from its dimension.
-
-        Parameters:
-            X (list of list of pairs): List of list of persistence pairs, i.e.
-            `[[array( H0(X0) ), array( H1(X0) ), ...], [array( H0(X1) ), array( H1(X1) ), ...], ...]` 
-
-        Returns:
-            Persistence diagrams in a specific dimension, i.e.
-            `[array( Hn(X0) ), array( Hn(X1), ...]`
-        """
-
-        return [persistence[self.persistence_dimension] for persistence in X]
diff --git a/src/python/test/test_representations_preprocessing.py b/src/python/test/test_representations_preprocessing.py
new file mode 100644
index 00000000..838cf30c
--- /dev/null
+++ b/src/python/test/test_representations_preprocessing.py
@@ -0,0 +1,39 @@
+""" This file is part of the Gudhi Library - https://gudhi.inria.fr/ - which is released under MIT.
+    See file LICENSE or go to https://gudhi.inria.fr/licensing/ for full license details.
+    Author(s):       Vincent Rouvreau
+
+    Copyright (C) 2021 Inria
+
+    Modification(s):
+      - YYYY/MM Author: Description of the modification
+"""
+
+from gudhi.representations.preprocessing import DimensionSelector
+import numpy as np
+import pytest
+
+H0_0 = np.array([0.0, 0.0])
+H1_0 = np.array([1.0, 0.0])
+H0_1 = np.array([0.0, 1.0])
+H1_1 = np.array([1.0, 1.0])
+H0_2 = np.array([0.0, 2.0])
+H1_2 = np.array([1.0, 2.0])
+
+
+def test_dimension_selector():
+    X = [[H0_0, H1_0], [H0_1, H1_1], [H0_2, H1_2]]
+    ds = DimensionSelector(index=0)
+    h0 = ds.fit_transform(X)
+    np.testing.assert_array_equal(h0[0], H0_0)
+    np.testing.assert_array_equal(h0[1], H0_1)
+    np.testing.assert_array_equal(h0[2], H0_2)
+
+    ds = DimensionSelector(index=1)
+    h1 = ds.fit_transform(X)
+    np.testing.assert_array_equal(h1[0], H1_0)
+    np.testing.assert_array_equal(h1[1], H1_1)
+    np.testing.assert_array_equal(h1[2], H1_2)
+
+    ds = DimensionSelector(index=2)
+    with pytest.raises(IndexError):
+        h2 = ds.fit_transform([[H0_0, H1_0], [H0_1, H1_1], [H0_2, H1_2]])
diff --git a/src/python/test/test_sklearn_cubical_persistence.py b/src/python/test/test_sklearn_cubical_persistence.py
index 488495d1..bd728a29 100644
--- a/src/python/test/test_sklearn_cubical_persistence.py
+++ b/src/python/test/test_sklearn_cubical_persistence.py
@@ -12,32 +12,28 @@ from gudhi.sklearn.cubical_persistence import CubicalPersistence
 import numpy as np
 from sklearn import datasets
 
-__author__ = "Vincent Rouvreau"
-__copyright__ = "Copyright (C) 2021 Inria"
-__license__ = "MIT"
-
 CUBICAL_PERSISTENCE_H0_IMG0 = np.array([[0.0, 6.0], [0.0, 8.0], [0.0, np.inf]])
 
 
 def test_simple_constructor_from_top_cells():
     cells = datasets.load_digits().images[0]
-    cp = CubicalPersistence(only_this_dim=0)
-    np.testing.assert_array_equal(cp._CubicalPersistence__transform(cells), [CUBICAL_PERSISTENCE_H0_IMG0])
-    cp = CubicalPersistence(max_persistence_dimension=2)
+    cp = CubicalPersistence(persistence_dimension=0)
+    np.testing.assert_array_equal(cp._CubicalPersistence__transform_only_this_dim(cells), CUBICAL_PERSISTENCE_H0_IMG0)
+    cp = CubicalPersistence(persistence_dimension=[0, 2])
     diags = cp._CubicalPersistence__transform(cells)
-    assert len(diags) == 3
+    assert len(diags) == 2
     np.testing.assert_array_equal(diags[0], CUBICAL_PERSISTENCE_H0_IMG0)
 
 
 def test_simple_constructor_from_top_cells_list():
     digits = datasets.load_digits().images[:10]
-    cp = CubicalPersistence(only_this_dim=0, n_jobs=-2)
+    cp = CubicalPersistence(persistence_dimension=0, n_jobs=-2)
 
     diags = cp.fit_transform(digits)
     assert len(diags) == 10
     np.testing.assert_array_equal(diags[0], CUBICAL_PERSISTENCE_H0_IMG0)
 
-    cp = CubicalPersistence(max_persistence_dimension=1, n_jobs=-1)
+    cp = CubicalPersistence(persistence_dimension=[0, 1], n_jobs=-1)
     diagsH0H1 = cp.fit_transform(digits)
     assert len(diagsH0H1) == 10
     for idx in range(10):
diff --git a/src/python/test/test_sklearn_post_processing.py b/src/python/test/test_sklearn_post_processing.py
deleted file mode 100644
index e60eadc6..00000000
--- a/src/python/test/test_sklearn_post_processing.py
+++ /dev/null
@@ -1,43 +0,0 @@
-""" This file is part of the Gudhi Library - https://gudhi.inria.fr/ - which is released under MIT.
-    See file LICENSE or go to https://gudhi.inria.fr/licensing/ for full license details.
-    Author(s):       Vincent Rouvreau
-
-    Copyright (C) 2021 Inria
-
-    Modification(s):
-      - YYYY/MM Author: Description of the modification
-"""
-
-from gudhi.sklearn.post_processing import DimensionSelector
-import numpy as np
-import pytest
-
-__author__ = "Vincent Rouvreau"
-__copyright__ = "Copyright (C) 2021 Inria"
-__license__ = "MIT"
-
-H0_0 = np.array([0.0, 0.0])
-H1_0 = np.array([1.0, 0.0])
-H0_1 = np.array([0.0, 1.0])
-H1_1 = np.array([1.0, 1.0])
-H0_2 = np.array([0.0, 2.0])
-H1_2 = np.array([1.0, 2.0])
-
-
-def test_dimension_selector():
-    X = [[H0_0, H1_0], [H0_1, H1_1], [H0_2, H1_2]]
-    ds = DimensionSelector(persistence_dimension=0)
-    h0 = ds.fit_transform(X)
-    np.testing.assert_array_equal(h0[0], H0_0)
-    np.testing.assert_array_equal(h0[1], H0_1)
-    np.testing.assert_array_equal(h0[2], H0_2)
-
-    ds = DimensionSelector(persistence_dimension=1)
-    h1 = ds.fit_transform(X)
-    np.testing.assert_array_equal(h1[0], H1_0)
-    np.testing.assert_array_equal(h1[1], H1_1)
-    np.testing.assert_array_equal(h1[2], H1_2)
-
-    ds = DimensionSelector(persistence_dimension=2)
-    with pytest.raises(IndexError):
-        h2 = ds.fit_transform([[H0_0, H1_0], [H0_1, H1_1], [H0_2, H1_2]])
-- 
cgit v1.2.3


From 8f14977760d05f8f08d2a7babdc197da27a6c53a Mon Sep 17 00:00:00 2001
From: Vincent Rouvreau <vincent.rouvreau@inria.fr>
Date: Fri, 5 Nov 2021 11:28:42 +0100
Subject: change doc according to proposal

---
 src/python/doc/cubical_complex_sklearn_itf_ref.rst | 88 +++++++++++++++++++-
 src/python/doc/cubical_complex_sum.inc             | 24 +++---
 src/python/doc/cubical_complex_user.rst            | 95 +---------------------
 3 files changed, 100 insertions(+), 107 deletions(-)

(limited to 'src/python/doc/cubical_complex_user.rst')

diff --git a/src/python/doc/cubical_complex_sklearn_itf_ref.rst b/src/python/doc/cubical_complex_sklearn_itf_ref.rst
index b5c7a2e5..c585f9ab 100644
--- a/src/python/doc/cubical_complex_sklearn_itf_ref.rst
+++ b/src/python/doc/cubical_complex_sklearn_itf_ref.rst
@@ -2,8 +2,8 @@
 
 .. To get rid of WARNING: document isn't included in any toctree
 
-Cubical complex persistence scikit-learn like interfaces reference manual
-#########################################################################
+Cubical complex persistence scikit-learn like interface
+#######################################################
 
 .. list-table::
    :widths: 40 30 30
@@ -13,8 +13,90 @@ Cubical complex persistence scikit-learn like interfaces reference manual
      - :License: MIT
      - :Requires: `Scikit-learn <installation.html#scikit-learn>`_
 
+Cubical complex persistence scikit-learn like interface example
+---------------------------------------------------------------
+
+In this example, hand written digits are used as an input.
+a TDA scikit-learn pipeline is constructed and is composed of:
+
+#. :class:`~gudhi.sklearn.cubical_persistence.CubicalPersistence` that builds a cubical complex from the inputs and
+   returns its persistence diagrams
+#. :class:`~gudhi.representations.DiagramSelector` that removes non-finite persistence diagrams values
+#. :class:`~gudhi.representations.PersistenceImage` that builds the persistence images from persistence diagrams
+#. `SVC <https://scikit-learn.org/stable/modules/generated/sklearn.svm.SVC.html>`_ which is a scikit-learn support
+   vector classifier.
+
+This ML pipeline is trained to detect if the hand written digit is an '8' or not, thanks to the fact that an '8' has
+two holes in :math:`\mathbf{H}_1`, or, like in this example, three connected components in :math:`\mathbf{H}_0`.
+
+.. code-block:: python
+
+    # Standard scientific Python imports
+    import numpy as np
+    
+    # Standard scikit-learn imports
+    from sklearn.datasets import fetch_openml
+    from sklearn.pipeline import Pipeline
+    from sklearn.model_selection import train_test_split
+    from sklearn.svm import SVC
+    from sklearn import metrics
+    
+    # Import TDA pipeline requirements
+    from gudhi.sklearn.cubical_persistence import CubicalPersistence
+    from gudhi.representations import PersistenceImage, DiagramSelector
+    
+    X, y = fetch_openml("mnist_784", version=1, return_X_y=True, as_frame=False)
+    
+    # Target is: "is an eight ?"
+    y = (y == "8") * 1
+    print("There are", np.sum(y), "eights out of", len(y), "numbers.")
+    
+    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.4, random_state=0)
+    pipe = Pipeline(
+        [
+            ("cub_pers", CubicalPersistence(persistence_dimension=0, dimensions=[28, 28], n_jobs=-2)),
+            # Or for multiple persistence dimension computation
+            # ("cub_pers", CubicalPersistence(persistence_dimension=[0, 1], dimensions=[28, 28], n_jobs=-2)),
+            # ("H0_diags", DimensionSelector(index=0), # where index is the index in persistence_dimension array
+            ("finite_diags", DiagramSelector(use=True, point_type="finite")),
+            (
+                "pers_img",
+                PersistenceImage(bandwidth=50, weight=lambda x: x[1] ** 2, im_range=[0, 256, 0, 256], resolution=[20, 20]),
+            ),
+            ("svc", SVC()),
+        ]
+    )
+    
+    # Learn from the train subset
+    pipe.fit(X_train, y_train)
+    # Predict from the test subset
+    predicted = pipe.predict(X_test)
+    
+    print(f"Classification report for TDA pipeline {pipe}:\n" f"{metrics.classification_report(y_test, predicted)}\n")
+
+.. code-block:: none
+
+    There are 6825 eights out of 70000 numbers.
+    Classification report for TDA pipeline Pipeline(steps=[('cub_pers',
+                     CubicalPersistence(dimensions=[28, 28], n_jobs=-2)),
+                    ('finite_diags', DiagramSelector(use=True)),
+                    ('pers_img',
+                     PersistenceImage(bandwidth=50, im_range=[0, 256, 0, 256],
+                                      weight=<function <lambda> at 0x7f3e54137ae8>)),
+                    ('svc', SVC())]):
+                  precision    recall  f1-score   support
+
+               0       0.97      0.99      0.98     25284
+               1       0.92      0.68      0.78      2716
+
+        accuracy                           0.96     28000
+       macro avg       0.94      0.84      0.88     28000
+    weighted avg       0.96      0.96      0.96     28000
+
+Cubical complex persistence scikit-learn like interface reference
+-----------------------------------------------------------------
 
 .. autoclass:: gudhi.sklearn.cubical_persistence.CubicalPersistence
    :members:
    :special-members: __init__
-   :show-inheritance:
+   :show-inheritance:
\ No newline at end of file
diff --git a/src/python/doc/cubical_complex_sum.inc b/src/python/doc/cubical_complex_sum.inc
index 2a1bde8d..e2fd55bb 100644
--- a/src/python/doc/cubical_complex_sum.inc
+++ b/src/python/doc/cubical_complex_sum.inc
@@ -1,13 +1,17 @@
 .. table::
    :widths: 30 40 30
 
-   +--------------------------------------------------------------------------+----------------------------------------------------------------------+-----------------------------+
-   | .. figure::                                                              | The cubical complex represents a grid as a cell complex with         | :Author: Pawel Dlotko       |
-   |      ../../doc/Bitmap_cubical_complex/Cubical_complex_representation.png | cells of all dimensions.                                             | :Since: GUDHI 2.0.0         |
-   |      :alt: Cubical complex representation                                |                                                                      | :License: MIT               |
-   |      :figclass: align-center                                             |                                                                      |                             |
-   +--------------------------------------------------------------------------+----------------------------------------------------------------------+-----------------------------+
-   | * :doc:`cubical_complex_user`                                            | * :doc:`cubical_complex_ref`                                                                       |
-   |                                                                          | * :doc:`periodic_cubical_complex_ref`                                                              |
-   |                                                                          | * :doc:`cubical_complex_sklearn_itf_ref`                                                           |
-   +--------------------------------------------------------------------------+----------------------------------------------------------------------------------------------------+
+   +--------------------------------------------------------------------------+--------------------------------------------------------------+-------------------------------------------------------------+
+   | .. figure::                                                              | The cubical complex represents a grid as a cell complex with | :Author: Pawel Dlotko                                       |
+   |      ../../doc/Bitmap_cubical_complex/Cubical_complex_representation.png | cells of all dimensions.                                     | :Since: GUDHI 2.0.0                                         |
+   |      :alt: Cubical complex representation                                |                                                              | :License: MIT                                               |
+   |      :figclass: align-center                                             |                                                              |                                                             |
+   +--------------------------------------------------------------------------+--------------------------------------------------------------+-------------------------------------------------------------+
+   | * :doc:`cubical_complex_user`                                            | * :doc:`cubical_complex_ref`                                                                                               |
+   |                                                                          | * :doc:`periodic_cubical_complex_ref`                                                                                      |
+   +--------------------------------------------------------------------------+--------------------------------------------------------------+-------------------------------------------------------------+
+   | .. image::                                                               | * :doc:`cubical_complex_sklearn_itf_ref`                     | :Requires: `Scikit-learn <installation.html#scikit-learn>`_ |
+   |      img/sklearn.png                                                     |                                                              |                                                             |
+   |      :target: https://scikit-learn.org                                   |                                                              |                                                             |
+   |      :height: 30                                                         |                                                              |                                                             |
+   +--------------------------------------------------------------------------+--------------------------------------------------------------+-------------------------------------------------------------+
diff --git a/src/python/doc/cubical_complex_user.rst b/src/python/doc/cubical_complex_user.rst
index e62a4395..42a23875 100644
--- a/src/python/doc/cubical_complex_user.rst
+++ b/src/python/doc/cubical_complex_user.rst
@@ -7,19 +7,7 @@ Cubical complex user manual
 Definition
 ----------
 
-.. list-table::
-   :widths: 25 50 25
-   :header-rows: 0
-
-   * - :Author: Pawel Dlotko
-     - :Since: GUDHI 2.0.0
-     - :License: MIT
-   * - :doc:`cubical_complex_user`
-     - * :doc:`cubical_complex_ref`
-       * :doc:`periodic_cubical_complex_ref`
-       * :doc:`cubical_complex_sklearn_itf_ref`
-     -
-
+.. include:: cubical_complex_sum.inc
 
 The cubical complex is an example of a structured complex useful in computational mathematics (specially rigorous
 numerics) and image analysis.
@@ -169,84 +157,3 @@ Tutorial
 
 This `notebook <https://github.com/GUDHI/TDA-tutorial/blob/master/Tuto-GUDHI-cubical-complexes.ipynb>`_
 explains how to represent sublevels sets of functions using cubical complexes.
-
-Scikit-learn like interface example
------------------------------------
-
-In this example, hand written digits are used as an input.
-a TDA scikit-learn pipeline is constructed and is composed of:
-
-#. :class:`~gudhi.sklearn.cubical_persistence.CubicalPersistence` that builds a cubical complex from the inputs and
-   returns its persistence diagrams
-#. :class:`~gudhi.representations.DiagramSelector` that removes non-finite persistence diagrams values
-#. :class:`~gudhi.representations.PersistenceImage` that builds the persistence images from persistence diagrams
-#. `SVC <https://scikit-learn.org/stable/modules/generated/sklearn.svm.SVC.html>`_ which is a scikit-learn support
-   vector classifier.
-
-This ML pipeline is trained to detect if the hand written digit is an '8' or not, thanks to the fact that an '8' has
-two holes in :math:`\mathbf{H}_1`, or, like in this example, three connected components in :math:`\mathbf{H}_0`.
-
-.. code-block:: python
-
-    # Standard scientific Python imports
-    import numpy as np
-    
-    # Standard scikit-learn imports
-    from sklearn.datasets import fetch_openml
-    from sklearn.pipeline import Pipeline
-    from sklearn.model_selection import train_test_split
-    from sklearn.svm import SVC
-    from sklearn import metrics
-    
-    # Import TDA pipeline requirements
-    from gudhi.sklearn.cubical_persistence import CubicalPersistence
-    from gudhi.representations import PersistenceImage, DiagramSelector
-    
-    X, y = fetch_openml("mnist_784", version=1, return_X_y=True, as_frame=False)
-    
-    # Target is: "is an eight ?"
-    y = (y == "8") * 1
-    print("There are", np.sum(y), "eights out of", len(y), "numbers.")
-    
-    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.4, random_state=0)
-    pipe = Pipeline(
-        [
-            ("cub_pers", CubicalPersistence(persistence_dimension=0, dimensions=[28, 28], n_jobs=-2)),
-            # Or for multiple persistence dimension computation
-            # ("cub_pers", CubicalPersistence(persistence_dimension=[0, 1], dimensions=[28, 28], n_jobs=-2)),
-            # ("H0_diags", DimensionSelector(index=0), # where index is the index in persistence_dimension array
-            ("finite_diags", DiagramSelector(use=True, point_type="finite")),
-            (
-                "pers_img",
-                PersistenceImage(bandwidth=50, weight=lambda x: x[1] ** 2, im_range=[0, 256, 0, 256], resolution=[20, 20]),
-            ),
-            ("svc", SVC()),
-        ]
-    )
-    
-    # Learn from the train subset
-    pipe.fit(X_train, y_train)
-    # Predict from the test subset
-    predicted = pipe.predict(X_test)
-    
-    print(f"Classification report for TDA pipeline {pipe}:\n" f"{metrics.classification_report(y_test, predicted)}\n")
-
-
-.. code-block:: none
-
-    There are 6825 eights out of 70000 numbers.
-    Classification report for TDA pipeline Pipeline(steps=[('cub_pers',
-                     CubicalPersistence(dimensions=[28, 28], n_jobs=-2)),
-                    ('finite_diags', DiagramSelector(use=True)),
-                    ('pers_img',
-                     PersistenceImage(bandwidth=50, im_range=[0, 256, 0, 256],
-                                      weight=<function <lambda> at 0x7f3e54137ae8>)),
-                    ('svc', SVC())]):
-                  precision    recall  f1-score   support
-    
-               0       0.97      0.99      0.98     25284
-               1       0.92      0.68      0.78      2716
-    
-        accuracy                           0.96     28000
-       macro avg       0.94      0.84      0.88     28000
-    weighted avg       0.96      0.96      0.96     28000
\ No newline at end of file
-- 
cgit v1.2.3