5 files changed, 59 insertions, 32 deletions
diff --git a/src/python/example/ex_diagrams.py b/src/python/example/ex_diagrams.py
index a3efbac9..f12304bd 100755
--- a/src/python/example/ex_diagrams.py
+++ b/src/python/example/ex_diagrams.py
@@ -3,25 +3,33 @@
 import matplotlib.pyplot as plt
 import numpy as np
 from sklearn.kernel_approximation import RBFSampler
+from sklearn.preprocessing import MinMaxScaler
 
-from gudhi.sktda import Landscape, Silhouette, BettiCurve, ComplexPolynomial,\
+from gudhi.sktda import DiagramSelector, Clamping, Landscape, Silhouette, BettiCurve, ComplexPolynomial,\
   TopologicalVector, DiagramScaler, BirthPersistenceTransform,\
-  PersistenceImage, PersistenceWeightedGaussianKernel,\
+  PersistenceImage, PersistenceWeightedGaussianKernel, Entropy, \
   PersistenceScaleSpaceKernel, SlicedWassersteinDistance,\
   SlicedWassersteinKernel, BottleneckDistance, PersistenceFisherKernel
 
-D = np.array([[0.,4.],[1.,2.],[3.,8.],[6.,8.]])
+D = np.array([[0.,4.],[1.,2.],[3.,8.],[6.,8.], [0., np.inf], [5., np.inf]])
+diags = [D]
+
+diags = DiagramSelector(use=True, point_type="finite").fit_transform(diags)
+diags = DiagramScaler(use=True, scalers=[([0,1], MinMaxScaler())]).fit_transform(diags)
+diags = DiagramScaler(use=True, scalers=[([1], Clamping(limit=.9))]).fit_transform(diags)
+
+D = diags[0]
 plt.scatter(D[:,0],D[:,1])
-plt.plot([0.,10.],[0.,10.])
+plt.plot([0.,1.],[0.,1.])
+plt.title("Test Persistence Diagram for vector methods")
 plt.show()
 
-diags = [D]
-
-LS = Landscape(resolution = 1000)
+LS = Landscape(resolution=1000)
 L = LS.fit_transform(diags)
 plt.plot(L[0][:1000])
 plt.plot(L[0][1000:2000])
 plt.plot(L[0][2000:3000])
+plt.title("Landscape")
 plt.show()
 
 def pow(n):
@@ -30,11 +38,13 @@ def pow(n):
 SH = Silhouette(resolution=1000, weight=pow(2))
 sh = SH.fit_transform(diags)
 plt.plot(sh[0])
+plt.title("Silhouette")
 plt.show()
 
 BC = BettiCurve(resolution=1000)
 bc = BC.fit_transform(diags)
 plt.plot(bc[0])
+plt.title("Betti Curve")
 plt.show()
 
 CP = ComplexPolynomial(threshold=-1, F="T")
@@ -45,20 +55,35 @@ TV = TopologicalVector(threshold=-1)
 tv = TV.fit_transform(diags)
 print("Topological vector is " + str(tv[0,:]))
 
-#diagsT = DiagramPreprocessor(use=True, scalers=[([0,1], BirthPersistenceTransform())]).fit_transform(diags)
-#PI = PersistenceImage(bandwidth=1., weight=lambda x: x[1], im_range=[0,10,0,10], resolution=[100,100])
-#pi = PI.fit_transform(diagsT)
-#plt.imshow(np.flip(np.reshape(pi[0], [100,100]), 0))
-#plt.show()
+PI = PersistenceImage(bandwidth=.1, weight=lambda x: x[1], im_range=[0,1,0,1], resolution=[100,100])
+pi = PI.fit_transform(diags)
+plt.imshow(np.flip(np.reshape(pi[0], [100,100]), 0))
+plt.title("Persistence Image")
+plt.show()
 
-plt.scatter(D[:,0],D[:,1])
-D = np.array([[1.,5.],[3.,6.],[2.,7.]])
-plt.scatter(D[:,0],D[:,1])
-plt.plot([0.,10.],[0.,10.])
+ET = Entropy(mode="scalar")
+et = ET.fit_transform(diags)
+print("Entropy statistic is " + str(et[0,:]))
+
+ET = Entropy(mode="vector", normalized=False)
+et = ET.fit_transform(diags)
+plt.plot(et[0])
+plt.title("Entropy function")
 plt.show()
 
+D = np.array([[1.,5.],[3.,6.],[2.,7.]])
 diags2 = [D]
 
+diags2 = DiagramScaler(use=True, scalers=[([0,1], MinMaxScaler())]).fit_transform(diags2)
+
+D = diags[0]
+plt.scatter(D[:,0],D[:,1])
+D = diags2[0]
+plt.scatter(D[:,0],D[:,1])
+plt.plot([0.,1.],[0.,1.])
+plt.title("Test Persistence Diagrams for kernel methods")
+plt.show()
+
 def arctan(C,p):
   return lambda x: C*np.arctan(np.power(x[1], p))
 
diff --git a/src/python/gudhi/sktda/kernel_methods.py b/src/python/gudhi/sktda/kernel_methods.py
index d90bf164..b49bdf60 100644
--- a/src/python/gudhi/sktda/kernel_methods.py
+++ b/src/python/gudhi/sktda/kernel_methods.py
@@ -18,11 +18,11 @@ class SlicedWassersteinKernel(BaseEstimator, TransformerMixin):
     """
     def __init__(self, num_directions=10, bandwidth=1.0):
         """
-        Constructor for the SlicedWassersteinDistance class.
+        Constructor for the SlicedWassersteinKernel class.
 
         Attributes:
             bandwidth (double): bandwidth of the Gaussian kernel applied to the sliced Wasserstein distance (default 1.).
-            num_directions (int): number of lines to sample uniformly from [-pi,pi] in order to approximate and speed up the kernel computation (default 10). If -1, the exact kernel is computed.
+            num_directions (int): number of lines evenly sampled on [-pi,pi] in order to approximate and speed up the kernel computation (default 10). If -1, the exact kernel is computed.
         """
         self.bandwidth = bandwidth
         self.sw_ = SlicedWassersteinDistance(num_directions=num_directions)
@@ -82,7 +82,7 @@ class PersistenceWeightedGaussianKernel(BaseEstimator, TransformerMixin):
 
     def transform(self, X):
         """
-        Compute all sliced Wasserstein kernel values between the persistence diagrams that were stored after calling the fit() method, and a given list of (possibly different) persistence diagrams.
+        Compute all persistence weighted Gaussian kernel values between the persistence diagrams that were stored after calling the fit() method, and a given list of (possibly different) persistence diagrams.
 
         Parameters:
             X (list of n x 2 numpy arrays): input persistence diagrams.
@@ -118,7 +118,7 @@ class PersistenceWeightedGaussianKernel(BaseEstimator, TransformerMixin):
 
 class PersistenceScaleSpaceKernel(BaseEstimator, TransformerMixin):
     """
-    This is a class for computing the persistence scale space kernel matrix from a list of persistence diagrams. The persistence scale space kernel is computed by adding the symmetric to the diagonal of each point in each persistence diagram, and then convolving the points with a Gaussian kernel. See https://www.cv-foundation.org/openaccess/content_cvpr_2015/papers/Reininghaus_A_Stable_Multi-Scale_2015_CVPR_paper.pdf for more details. 
+    This is a class for computing the persistence scale space kernel matrix from a list of persistence diagrams. The persistence scale space kernel is computed by adding the symmetric to the diagonal of each point in each persistence diagram, with negative weight, and then convolving the points with a Gaussian kernel. See https://www.cv-foundation.org/openaccess/content_cvpr_2015/papers/Reininghaus_A_Stable_Multi-Scale_2015_CVPR_paper.pdf for more details. 
     """
     def __init__(self, bandwidth=1., kernel_approx=None):
         """
diff --git a/src/python/gudhi/sktda/metrics.py b/src/python/gudhi/sktda/metrics.py
index 8092f7af..816441b6 100644
--- a/src/python/gudhi/sktda/metrics.py
+++ b/src/python/gudhi/sktda/metrics.py
@@ -34,7 +34,7 @@ class SlicedWassersteinDistance(BaseEstimator, TransformerMixin):
 
     def fit(self, X, y=None):
         """
-        Fit the SlicedWassersteinDistance class on a list of persistence diagrams: persistence diagrams are projected onto the different lines. The diagrams themselves and their projections are then stored in a numpy array called **diagrams**.
+        Fit the SlicedWassersteinDistance class on a list of persistence diagrams: persistence diagrams are projected onto the different lines. The diagrams themselves and their projections are then stored in numpy arrays, called **diagrams_** and **approx_diag_**.
 
         Parameters:
             X (list of n x 2 numpy arrays): input persistence diagrams.
diff --git a/src/python/gudhi/sktda/preprocessing.py b/src/python/gudhi/sktda/preprocessing.py
index 3c625053..784e300f 100644
--- a/src/python/gudhi/sktda/preprocessing.py
+++ b/src/python/gudhi/sktda/preprocessing.py
@@ -43,8 +43,9 @@ class BirthPersistenceTransform(BaseEstimator, TransformerMixin):
         """
         Xfit = []
         for diag in X:
-            new_diag = np.empty(diag.shape)
-            np.copyto(new_diag, diag)
+            #new_diag = np.empty(diag.shape)
+            #np.copyto(new_diag, diag)
+            new_diag = np.copy(diag)
             new_diag[:,1] = new_diag[:,1] - new_diag[:,0]
             Xfit.append(new_diag)
         return Xfit
@@ -82,7 +83,8 @@ class Clamping(BaseEstimator, TransformerMixin):
         Returns:
             Xfit (numpy array of size n): output list of values.
         """
-        Xfit = np.where(X >= self.limit, self.limit * np.ones(X.shape), X)
+        Xfit = np.minimum(X, self.limit)
+        #Xfit = np.where(X >= self.limit, self.limit * np.ones(X.shape), X)
         return Xfit
 
 class DiagramScaler(BaseEstimator, TransformerMixin):
@@ -91,7 +93,7 @@ class DiagramScaler(BaseEstimator, TransformerMixin):
     """
     def __init__(self, use=False, scalers=[]):
         """
-        Constructor for the DiagramPreprocessor class.
+        Constructor for the DiagramScaler class.
 
         Attributes:
             use (bool): whether to use the class or not (default False).
@@ -102,7 +104,7 @@ class DiagramScaler(BaseEstimator, TransformerMixin):
 
     def fit(self, X, y=None):
         """
-        Fit the DiagramPreprocessor class on a list of persistence diagrams: persistence diagrams are concatenated in a big numpy array, and scalers are fit (by calling their fit() method) on their corresponding coordinates in this big array.
+        Fit the DiagramScaler class on a list of persistence diagrams: persistence diagrams are concatenated in a big numpy array, and scalers are fit (by calling their fit() method) on their corresponding coordinates in this big array.
 
         Parameters:
             X (list of n x 2 or n x 1 numpy arrays): input persistence diagrams.
@@ -119,7 +121,7 @@ class DiagramScaler(BaseEstimator, TransformerMixin):
 
     def transform(self, X):
         """
-        Apply the DiagramPreprocessor function on the persistence diagrams. The fitted scalers are applied (by calling their transform() method) to their corresponding coordinates in each persistence diagram individually.  
+        Apply the DiagramScaler function on the persistence diagrams. The fitted scalers are applied (by calling their transform() method) to their corresponding coordinates in each persistence diagram individually.  
 
         Parameters:
             X (list of n x 2 or n x 1 numpy arrays): input persistence diagrams.
@@ -293,7 +295,7 @@ class DiagramSelector(BaseEstimator, TransformerMixin):
             if self.point_type == "finite":
                 Xfit = [ diag[diag[:,1] < self.limit] if diag.shape[0] != 0 else diag for diag in X]
             else:
-                Xfit = [ diag[diag[:,1] == self.limit, 0:1] if diag.shape[0] != 0 else diag for diag in X]
+                Xfit = [ diag[diag[:,1] >= self.limit, 0:1] if diag.shape[0] != 0 else diag for diag in X]
         else:
             Xfit = X
         return Xfit
diff --git a/src/python/gudhi/sktda/vector_methods.py b/src/python/gudhi/sktda/vector_methods.py
index 3862f815..d767a952 100644
--- a/src/python/gudhi/sktda/vector_methods.py
+++ b/src/python/gudhi/sktda/vector_methods.py
@@ -69,7 +69,7 @@ class PersistenceImage(BaseEstimator, TransformerMixin):
 
             x_values, y_values = np.linspace(self.im_range[0], self.im_range[1], self.resolution[0]), np.linspace(self.im_range[2], self.im_range[3], self.resolution[1])
             Xs, Ys = np.tile((diagram[:,0][:,np.newaxis,np.newaxis]-x_values[np.newaxis,np.newaxis,:]),[1,self.resolution[1],1]), np.tile(diagram[:,1][:,np.newaxis,np.newaxis]-y_values[np.newaxis,:,np.newaxis],[1,1,self.resolution[0]])
-            image = np.tensordot(w, np.exp((-np.square(Xs)-np.square(Ys))/(2*np.square(self.bandwidth)))/(self.bandwidth*np.sqrt(2*np.pi)), 1)
+            image = np.tensordot(w, np.exp((-np.square(Xs)-np.square(Ys))/(2*np.square(self.bandwidth)))/(np.square(self.bandwidth)*2*np.pi), 1)
 
             Xfit.append(image.flatten()[np.newaxis,:])
 
@@ -299,7 +299,7 @@ class BettiCurve(BaseEstimator, TransformerMixin):
 
 class Entropy(BaseEstimator, TransformerMixin):
     """
-    This is a class for computing persistence entropy. Persistence entropy is a statistic for persistence diagrams inspired from Shannon entropy. This statistic can also be used to compute a feature vector, called the entropy summary function. See https://arxiv.org/pdf/1803.08304.pdf for more details.
+    This is a class for computing persistence entropy. Persistence entropy is a statistic for persistence diagrams inspired from Shannon entropy. This statistic can also be used to compute a feature vector, called the entropy summary function. See https://arxiv.org/pdf/1803.08304.pdf for more details. Note that a previous implementation was contributed by Manuel Soriano-Trigueros.
     """
     def __init__(self, mode="scalar", normalized=True, resolution=100, sample_range=[np.nan, np.nan]):
         """
@@ -376,7 +376,7 @@ class TopologicalVector(BaseEstimator, TransformerMixin):
         Constructor for the TopologicalVector class.
 
         Attributes:
-            threshold (int): number of distances to keep (default 10). This is the dimension of the topological vector. If -1, this threshold is computed from the list of persistence diagrams by considering the one with the largest number of points and using the dimension of its corresponding topological vector as threshold. 
+            threshold (int): number of distances to keep (default 10). This is the dimension of the topological vector. If , this threshold is computed from the list of persistence diagrams by considering the one with the largest number of points and using the dimension of its corresponding topological vector as threshold. 
         """
         self.threshold = threshold
 
@@ -430,7 +430,7 @@ class ComplexPolynomial(BaseEstimator, TransformerMixin):
 
         Attributes:
            F (char): either "R", "S" or "T" (default "R"). Type of complex polynomial that is going to be computed (explained in https://link.springer.com/chapter/10.1007%2F978-3-319-23231-7_27).
-           threshold (int): number of coefficients (default 10). This is the dimension of the complex vector of coefficients. If -1, this threshold is computed from the list of persistence diagrams by considering the one with the largest number of points and using the dimension of its corresponding complex vector of coefficients as threshold. 
+           threshold (int): number of coefficients (default 10). This is the dimension of the complex vector of coefficients, i.e. the number of coefficients corresponding to the largest degree terms of the polynomial. If -1, this threshold is computed from the list of persistence diagrams by considering the one with the largest number of points and using the dimension of its corresponding complex vector of coefficients as threshold. 
         """
         self.threshold, self.F = threshold, F