From ce58cc97866605fe64df479e96d455e90f56f8e2 Mon Sep 17 00:00:00 2001
From: MathieuCarriere <mathieu.carriere3@gmail.com>
Date: Sun, 8 Dec 2019 21:22:09 -0500
Subject: fixed useless coordinates in Landscape if min and max are computed
 from data

---
 src/python/doc/representations.rst                 | 25 ++++++++++++++++++++--
 .../diagram_vectorizations_distances_kernels.py    |  6 +++---
 src/python/gudhi/representations/vector_methods.py | 12 ++++++++---
 3 files changed, 35 insertions(+), 8 deletions(-)

diff --git a/src/python/doc/representations.rst b/src/python/doc/representations.rst
index b3131a25..b338f7f0 100644
--- a/src/python/doc/representations.rst
+++ b/src/python/doc/representations.rst
@@ -8,9 +8,9 @@ Representations manual
 
 .. include:: representations_sum.inc
 
-This module, originally named sklearn_tda, aims at bridging the gap between persistence diagrams and machine learning tools, in particular scikit-learn. It provides tools, using the scikit-learn standard interface, to compute distances and kernels on diagrams, and to convert diagrams into vectors.
+This module, originally available at https://github.com/MathieuCarriere/sklearn-tda and named sklearn_tda, aims at bridging the gap between persistence diagrams and machine learning, by providing implementations of most of the vector representations for persistence diagrams in the literature, in a scikit-learn format. More specifically, it provides tools, using the scikit-learn standard interface, to compute distances and kernels on persistence diagrams, and to convert these diagrams into vectors in Euclidean space.
 
-A diagram is represented as a numpy array of shape (n,2), as can be obtained from :func:`~gudhi.SimplexTree.persistence_intervals_in_dimension` for instance. Points at infinity are represented as a numpy array of shape (n,1), storing only the birth time.
+A diagram is represented as a numpy array of shape (n,2), as can be obtained from `SimplexTree.persistence_intervals_in_dimension` for instance. Points at infinity are represented as a numpy array of shape (n,1), storing only the birth time.
 
 A small example is provided
 
@@ -46,3 +46,24 @@ Metrics
    :members:
    :special-members:
    :show-inheritance:
+
+Basic example
+-------------
+
+This example computes the first two Landscapes associated to a persistence diagram with four points. The landscapes are evaluated on ten samples, leading to two vectors with ten coordinates each, that are eventually concatenated in order to produce a single vector representation.
+
+.. testcode::
+
+    import numpy as np
+    from gudhi.representations import Landscape
+    # A single diagram with 4 points
+    D = np.array([[0.,4.],[1.,2.],[3.,8.],[6.,8.]])
+    diags = [D]
+    l=Landscape(num_landscapes=2,resolution=10).fit_transform(diags)
+    print(l) 
+
+The output is:
+
+.. testoutput::
+
+    [[0.         1.25707872 2.51415744 1.88561808 0.7856742  2.04275292 3.29983165 2.51415744 1.25707872 0.         0.         0.         0.31426968 0.         0.62853936 0.         0.         0.31426968 1.25707872 0.        ]] 
diff --git a/src/python/example/diagram_vectorizations_distances_kernels.py b/src/python/example/diagram_vectorizations_distances_kernels.py
index 119072eb..f777984c 100755
--- a/src/python/example/diagram_vectorizations_distances_kernels.py
+++ b/src/python/example/diagram_vectorizations_distances_kernels.py
@@ -26,9 +26,9 @@ plt.show()
 
 LS = Landscape(resolution=1000)
 L = LS.fit_transform(diags)
-plt.plot(L[0][:1000])
-plt.plot(L[0][1000:2000])
-plt.plot(L[0][2000:3000])
+plt.plot(L[0][:999])
+plt.plot(L[0][999:2*999])
+plt.plot(L[0][2*999:3*999])
 plt.title("Landscape")
 plt.show()
 
diff --git a/src/python/gudhi/representations/vector_methods.py b/src/python/gudhi/representations/vector_methods.py
index 61c4fb84..083551a4 100644
--- a/src/python/gudhi/representations/vector_methods.py
+++ b/src/python/gudhi/representations/vector_methods.py
@@ -104,10 +104,11 @@ class Landscape(BaseEstimator, TransformerMixin):
             X (list of n x 2 numpy arrays): input persistence diagrams.
             y (n x 1 array): persistence diagram labels (unused).
         """
+        self.nan_in_range = np.isnan(np.array(self.sample_range))
         if np.isnan(np.array(self.sample_range)).any():
             pre = DiagramScaler(use=True, scalers=[([0], MinMaxScaler()), ([1], MinMaxScaler())]).fit(X,y)
             [mx,my],[Mx,My] = [pre.scalers[0][1].data_min_[0], pre.scalers[1][1].data_min_[0]], [pre.scalers[0][1].data_max_[0], pre.scalers[1][1].data_max_[0]]
-            self.sample_range = np.where(np.isnan(np.array(self.sample_range)), np.array([mx, My]), np.array(self.sample_range))
+            self.sample_range = np.where(self.nan_in_range, np.array([mx, My]), np.array(self.sample_range))
         return self
 
     def transform(self, X):
@@ -121,7 +122,7 @@ class Landscape(BaseEstimator, TransformerMixin):
             numpy array with shape (number of diagrams) x (number of samples = **num_landscapes** x **resolution**): output persistence landscapes.
         """
         num_diag, Xfit = len(X), []
-        x_values = np.linspace(self.sample_range[0], self.sample_range[1], self.resolution)
+        x_values = np.linspace(self.sample_range[0], self.sample_range[1], self.resolution + self.nan_in_range.sum())
         step_x = x_values[1] - x_values[0]
 
         for i in range(num_diag):
@@ -157,7 +158,12 @@ class Landscape(BaseEstimator, TransformerMixin):
                 for k in range( min(self.num_landscapes, len(events[j])) ):
                     ls[k,j] = events[j][k]
 
-            Xfit.append(np.sqrt(2)*np.reshape(ls,[1,-1]))
+            if self.nan_in_range[0]:
+                ls = ls[:,1:]
+            if self.nan_in_range[1]:
+                ls = ls[:,:-1]
+            ls = np.sqrt(2)*np.reshape(ls,[1,-1])
+            Xfit.append(ls)
 
         Xfit = np.concatenate(Xfit,0)
 
-- 
cgit v1.2.3


From 5ecc15ba30e7a20604d50c1fdec9e7da2de64898 Mon Sep 17 00:00:00 2001
From: mathieu <mathieu.carriere3@gmail.com>
Date: Tue, 10 Dec 2019 14:24:52 -0500
Subject: fixed doc and examples

---
 src/python/doc/representations.rst                             | 4 ++--
 src/python/example/diagram_vectorizations_distances_kernels.py | 6 +++---
 2 files changed, 5 insertions(+), 5 deletions(-)

diff --git a/src/python/doc/representations.rst b/src/python/doc/representations.rst
index b338f7f0..409e97da 100644
--- a/src/python/doc/representations.rst
+++ b/src/python/doc/representations.rst
@@ -10,7 +10,7 @@ Representations manual
 
 This module, originally available at https://github.com/MathieuCarriere/sklearn-tda and named sklearn_tda, aims at bridging the gap between persistence diagrams and machine learning, by providing implementations of most of the vector representations for persistence diagrams in the literature, in a scikit-learn format. More specifically, it provides tools, using the scikit-learn standard interface, to compute distances and kernels on persistence diagrams, and to convert these diagrams into vectors in Euclidean space.
 
-A diagram is represented as a numpy array of shape (n,2), as can be obtained from `SimplexTree.persistence_intervals_in_dimension` for instance. Points at infinity are represented as a numpy array of shape (n,1), storing only the birth time.
+A diagram is represented as a numpy array of shape (n,2), as can be obtained from :func:`~gudhi.SimplexTree.persistence_intervals_in_dimension` for instance. Points at infinity are represented as a numpy array of shape (n,1), storing only the birth time.
 
 A small example is provided
 
@@ -66,4 +66,4 @@ The output is:
 
 .. testoutput::
 
-    [[0.         1.25707872 2.51415744 1.88561808 0.7856742  2.04275292 3.29983165 2.51415744 1.25707872 0.         0.         0.         0.31426968 0.         0.62853936 0.         0.         0.31426968 1.25707872 0.        ]] 
+    [[1.02851895 2.05703791 2.57129739 1.54277843 0.89995409 1.92847304 2.95699199 3.08555686 0.         0.64282435 0.         0.         0.51425948 0.         0.         0.        ]]
diff --git a/src/python/example/diagram_vectorizations_distances_kernels.py b/src/python/example/diagram_vectorizations_distances_kernels.py
index f777984c..0ea4ba79 100755
--- a/src/python/example/diagram_vectorizations_distances_kernels.py
+++ b/src/python/example/diagram_vectorizations_distances_kernels.py
@@ -26,9 +26,9 @@ plt.show()
 
 LS = Landscape(resolution=1000)
 L = LS.fit_transform(diags)
-plt.plot(L[0][:999])
-plt.plot(L[0][999:2*999])
-plt.plot(L[0][2*999:3*999])
+plt.plot(L[0][:998])
+plt.plot(L[0][998:2*998])
+plt.plot(L[0][2*998:3*998])
 plt.title("Landscape")
 plt.show()
 
-- 
cgit v1.2.3


From 682f8c8cb18ba898a3d23a82fff454e862541aed Mon Sep 17 00:00:00 2001
From: Mathieu Carrière <mathieu.carriere3@gmail.com>
Date: Wed, 11 Dec 2019 13:48:26 -0500
Subject: Update src/python/doc/representations.rst

Co-Authored-By: Vincent Rouvreau <10407034+VincentRouvreau@users.noreply.github.com>
---
 src/python/doc/representations.rst | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/src/python/doc/representations.rst b/src/python/doc/representations.rst
index 409e97da..470b57bf 100644
--- a/src/python/doc/representations.rst
+++ b/src/python/doc/representations.rst
@@ -66,4 +66,6 @@ The output is:
 
 .. testoutput::
 
-    [[1.02851895 2.05703791 2.57129739 1.54277843 0.89995409 1.92847304 2.95699199 3.08555686 0.         0.64282435 0.         0.         0.51425948 0.         0.         0.        ]]
+    [[1.02851895 2.05703791 2.57129739 1.54277843 0.89995409 1.92847304
+      2.95699199 3.08555686 0.         0.64282435 0.         0.
+      0.51425948 0.         0.         0.        ]]
-- 
cgit v1.2.3


From 363ae171ee7f45cf11d01653e4d4e9580117cfd0 Mon Sep 17 00:00:00 2001
From: mathieu <mathieu.carriere3@gmail.com>
Date: Wed, 11 Dec 2019 13:50:21 -0500
Subject: fixed landscape

---
 .../example/diagram_vectorizations_distances_kernels.py    |  6 +++---
 src/python/gudhi/representations/vector_methods.py         | 14 +++++++-------
 2 files changed, 10 insertions(+), 10 deletions(-)

diff --git a/src/python/example/diagram_vectorizations_distances_kernels.py b/src/python/example/diagram_vectorizations_distances_kernels.py
index 0ea4ba79..119072eb 100755
--- a/src/python/example/diagram_vectorizations_distances_kernels.py
+++ b/src/python/example/diagram_vectorizations_distances_kernels.py
@@ -26,9 +26,9 @@ plt.show()
 
 LS = Landscape(resolution=1000)
 L = LS.fit_transform(diags)
-plt.plot(L[0][:998])
-plt.plot(L[0][998:2*998])
-plt.plot(L[0][2*998:3*998])
+plt.plot(L[0][:1000])
+plt.plot(L[0][1000:2000])
+plt.plot(L[0][2000:3000])
 plt.title("Landscape")
 plt.show()
 
diff --git a/src/python/gudhi/representations/vector_methods.py b/src/python/gudhi/representations/vector_methods.py
index 083551a4..cd532275 100644
--- a/src/python/gudhi/representations/vector_methods.py
+++ b/src/python/gudhi/representations/vector_methods.py
@@ -129,19 +129,19 @@ class Landscape(BaseEstimator, TransformerMixin):
 
             diagram, num_pts_in_diag = X[i], X[i].shape[0]
 
-            ls = np.zeros([self.num_landscapes, self.resolution])
+            ls = np.zeros([self.num_landscapes, self.resolution + self.nan_in_range.sum()])
 
             events = []
-            for j in range(self.resolution):
+            for j in range(self.resolution + self.nan_in_range.sum()):
                 events.append([])
 
             for j in range(num_pts_in_diag):
                 [px,py] = diagram[j,:2]
-                min_idx = np.clip(np.ceil((px          - self.sample_range[0]) / step_x).astype(int), 0, self.resolution)
-                mid_idx = np.clip(np.ceil((0.5*(py+px) - self.sample_range[0]) / step_x).astype(int), 0, self.resolution)
-                max_idx = np.clip(np.ceil((py          - self.sample_range[0]) / step_x).astype(int), 0, self.resolution)
+                min_idx = np.clip(np.ceil((px          - self.sample_range[0]) / step_x).astype(int), 0, self.resolution + self.nan_in_range.sum())
+                mid_idx = np.clip(np.ceil((0.5*(py+px) - self.sample_range[0]) / step_x).astype(int), 0, self.resolution + self.nan_in_range.sum())
+                max_idx = np.clip(np.ceil((py          - self.sample_range[0]) / step_x).astype(int), 0, self.resolution + self.nan_in_range.sum())
 
-                if min_idx < self.resolution and max_idx > 0:
+                if min_idx < self.resolution + self.nan_in_range.sum() and max_idx > 0:
 
                     landscape_value = self.sample_range[0] + min_idx * step_x - px
                     for k in range(min_idx, mid_idx):
@@ -153,7 +153,7 @@ class Landscape(BaseEstimator, TransformerMixin):
                         events[k].append(landscape_value)
                         landscape_value -= step_x
 
-            for j in range(self.resolution):
+            for j in range(self.resolution + self.nan_in_range.sum()):
                 events[j].sort(reverse=True)
                 for k in range( min(self.num_landscapes, len(events[j])) ):
                     ls[k,j] = events[j][k]
-- 
cgit v1.2.3


From 9e75cc1832403f8ffec38fc3a4f6b1081fe4770e Mon Sep 17 00:00:00 2001
From: mathieu <mathieu.carriere3@gmail.com>
Date: Wed, 11 Dec 2019 13:57:15 -0500
Subject: update example

---
 src/python/doc/representations.rst | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/src/python/doc/representations.rst b/src/python/doc/representations.rst
index 470b57bf..11dcbcf9 100644
--- a/src/python/doc/representations.rst
+++ b/src/python/doc/representations.rst
@@ -67,5 +67,6 @@ The output is:
 .. testoutput::
 
     [[1.02851895 2.05703791 2.57129739 1.54277843 0.89995409 1.92847304
-      2.95699199 3.08555686 0.         0.64282435 0.         0.
-      0.51425948 0.         0.         0.        ]]
+      2.95699199 3.08555686 2.05703791 1.02851895 0.         0.64282435
+      0.         0.         0.51425948 0.         0.         0.
+      0.77138922 1.02851895]]
-- 
cgit v1.2.3


From 2886885ff4cf1f134863de0fa97b64f824d67622 Mon Sep 17 00:00:00 2001
From: mathieu <mathieu.carriere3@gmail.com>
Date: Wed, 11 Dec 2019 15:30:45 -0500
Subject: cleanup

---
 src/python/gudhi/representations/vector_methods.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/src/python/gudhi/representations/vector_methods.py b/src/python/gudhi/representations/vector_methods.py
index cd532275..9b280f68 100644
--- a/src/python/gudhi/representations/vector_methods.py
+++ b/src/python/gudhi/representations/vector_methods.py
@@ -95,6 +95,7 @@ class Landscape(BaseEstimator, TransformerMixin):
             sample_range ([double, double]): minimum and maximum of all piecewise-linear function domains, of the form [x_min, x_max] (default [numpy.nan, numpy.nan]). It is the interval on which samples will be drawn evenly. If one of the values is numpy.nan, it can be computed from the persistence diagrams with the fit() method.
         """
         self.num_landscapes, self.resolution, self.sample_range = num_landscapes, resolution, sample_range
+        self.nan_in_range = np.isnan(np.array(self.sample_range))
 
     def fit(self, X, y=None):
         """
@@ -104,8 +105,7 @@ class Landscape(BaseEstimator, TransformerMixin):
             X (list of n x 2 numpy arrays): input persistence diagrams.
             y (n x 1 array): persistence diagram labels (unused).
         """
-        self.nan_in_range = np.isnan(np.array(self.sample_range))
-        if np.isnan(np.array(self.sample_range)).any():
+        if self.nan_in_range.any():
             pre = DiagramScaler(use=True, scalers=[([0], MinMaxScaler()), ([1], MinMaxScaler())]).fit(X,y)
             [mx,my],[Mx,My] = [pre.scalers[0][1].data_min_[0], pre.scalers[1][1].data_min_[0]], [pre.scalers[0][1].data_max_[0], pre.scalers[1][1].data_max_[0]]
             self.sample_range = np.where(self.nan_in_range, np.array([mx, My]), np.array(self.sample_range))
-- 
cgit v1.2.3


From 7bd6907e577e22803fec179f652ecf0ec64dcb4a Mon Sep 17 00:00:00 2001
From: mathieu <mathieu.carriere3@gmail.com>
Date: Wed, 11 Dec 2019 15:38:00 -0500
Subject: cleanup for landscape resolution

---
 src/python/gudhi/representations/vector_methods.py | 17 +++++++++--------
 1 file changed, 9 insertions(+), 8 deletions(-)

diff --git a/src/python/gudhi/representations/vector_methods.py b/src/python/gudhi/representations/vector_methods.py
index 9b280f68..fe26dbe2 100644
--- a/src/python/gudhi/representations/vector_methods.py
+++ b/src/python/gudhi/representations/vector_methods.py
@@ -96,6 +96,7 @@ class Landscape(BaseEstimator, TransformerMixin):
         """
         self.num_landscapes, self.resolution, self.sample_range = num_landscapes, resolution, sample_range
         self.nan_in_range = np.isnan(np.array(self.sample_range))
+        self.new_resolution = self.resolution + self.nan_in_range.sum()
 
     def fit(self, X, y=None):
         """
@@ -122,26 +123,26 @@ class Landscape(BaseEstimator, TransformerMixin):
             numpy array with shape (number of diagrams) x (number of samples = **num_landscapes** x **resolution**): output persistence landscapes.
         """
         num_diag, Xfit = len(X), []
-        x_values = np.linspace(self.sample_range[0], self.sample_range[1], self.resolution + self.nan_in_range.sum())
+        x_values = np.linspace(self.sample_range[0], self.sample_range[1], self.new_resolution)
         step_x = x_values[1] - x_values[0]
 
         for i in range(num_diag):
 
             diagram, num_pts_in_diag = X[i], X[i].shape[0]
 
-            ls = np.zeros([self.num_landscapes, self.resolution + self.nan_in_range.sum()])
+            ls = np.zeros([self.num_landscapes, self.new_resolution])
 
             events = []
-            for j in range(self.resolution + self.nan_in_range.sum()):
+            for j in range(self.new_resolution):
                 events.append([])
 
             for j in range(num_pts_in_diag):
                 [px,py] = diagram[j,:2]
-                min_idx = np.clip(np.ceil((px          - self.sample_range[0]) / step_x).astype(int), 0, self.resolution + self.nan_in_range.sum())
-                mid_idx = np.clip(np.ceil((0.5*(py+px) - self.sample_range[0]) / step_x).astype(int), 0, self.resolution + self.nan_in_range.sum())
-                max_idx = np.clip(np.ceil((py          - self.sample_range[0]) / step_x).astype(int), 0, self.resolution + self.nan_in_range.sum())
+                min_idx = np.clip(np.ceil((px          - self.sample_range[0]) / step_x).astype(int), 0, self.new_resolution)
+                mid_idx = np.clip(np.ceil((0.5*(py+px) - self.sample_range[0]) / step_x).astype(int), 0, self.new_resolution)
+                max_idx = np.clip(np.ceil((py          - self.sample_range[0]) / step_x).astype(int), 0, self.new_resolution)
 
-                if min_idx < self.resolution + self.nan_in_range.sum() and max_idx > 0:
+                if min_idx < self.new_resolution and max_idx > 0:
 
                     landscape_value = self.sample_range[0] + min_idx * step_x - px
                     for k in range(min_idx, mid_idx):
@@ -153,7 +154,7 @@ class Landscape(BaseEstimator, TransformerMixin):
                         events[k].append(landscape_value)
                         landscape_value -= step_x
 
-            for j in range(self.resolution + self.nan_in_range.sum()):
+            for j in range(self.new_resolution):
                 events[j].sort(reverse=True)
                 for k in range( min(self.num_landscapes, len(events[j])) ):
                     ls[k,j] = events[j][k]
-- 
cgit v1.2.3