summaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
authorMathieuCarriere <mathieu.carriere3@gmail.com>2020-04-29 19:16:50 -0400
committerMathieuCarriere <mathieu.carriere3@gmail.com>2020-04-29 19:16:50 -0400
commitb2177e897b575e0c8d17b8ae5ed3259541a06bea (patch)
treee51227c643de04061a27043fab86fe93b7a31e4a /src
parente7e885f707acde12bfbf632e4275048534a2023f (diff)
small modifs
Diffstat (limited to 'src')
-rw-r--r--src/python/doc/representations.rst2
-rwxr-xr-xsrc/python/example/diagram_vectorizations_distances_kernels.py4
-rw-r--r--src/python/gudhi/representations/kernel_methods.py3
-rw-r--r--src/python/gudhi/representations/metrics.py9
4 files changed, 10 insertions, 8 deletions
diff --git a/src/python/doc/representations.rst b/src/python/doc/representations.rst
index 11dcbcf9..041e3247 100644
--- a/src/python/doc/representations.rst
+++ b/src/python/doc/representations.rst
@@ -10,7 +10,7 @@ Representations manual
This module, originally available at https://github.com/MathieuCarriere/sklearn-tda and named sklearn_tda, aims at bridging the gap between persistence diagrams and machine learning, by providing implementations of most of the vector representations for persistence diagrams in the literature, in a scikit-learn format. More specifically, it provides tools, using the scikit-learn standard interface, to compute distances and kernels on persistence diagrams, and to convert these diagrams into vectors in Euclidean space.
-A diagram is represented as a numpy array of shape (n,2), as can be obtained from :func:`~gudhi.SimplexTree.persistence_intervals_in_dimension` for instance. Points at infinity are represented as a numpy array of shape (n,1), storing only the birth time.
+A diagram is represented as a numpy array of shape (n,2), as can be obtained from :func:`~gudhi.SimplexTree.persistence_intervals_in_dimension` for instance. Points at infinity are represented as a numpy array of shape (n,1), storing only the birth time. The classes in this module can handle several persistence diagrams at once. In that case, the diagrams are provided as a list of numpy arrays. Note that it is not necessary for the diagrams to have the same number of points, i.e., for the corresponding arrays to have the same number of rows: all classes can handle arrays with different shapes.
A small example is provided
diff --git a/src/python/example/diagram_vectorizations_distances_kernels.py b/src/python/example/diagram_vectorizations_distances_kernels.py
index ab7d8a16..c4a71a7a 100755
--- a/src/python/example/diagram_vectorizations_distances_kernels.py
+++ b/src/python/example/diagram_vectorizations_distances_kernels.py
@@ -13,7 +13,9 @@ from gudhi.representations import DiagramSelector, Clamping, Landscape, Silhouet
D1 = np.array([[0.,4.],[1.,2.],[3.,8.],[6.,8.], [0., np.inf], [5., np.inf]])
-proc1, proc2, proc3 = DiagramSelector(use=True, point_type="finite"), DiagramScaler(use=True, scalers=[([0,1], MinMaxScaler())]), DiagramScaler(use=True, scalers=[([1], Clamping(maximum=.9))])
+proc1 = DiagramSelector(use=True, point_type="finite")
+proc2 = DiagramScaler(use=True, scalers=[([0,1], MinMaxScaler())])
+proc3 = DiagramScaler(use=True, scalers=[([1], Clamping(maximum=.9))])
D1 = proc3(proc2(proc1(D1)))
plt.scatter(D1[:,0], D1[:,1])
diff --git a/src/python/gudhi/representations/kernel_methods.py b/src/python/gudhi/representations/kernel_methods.py
index edd1382a..596f4f07 100644
--- a/src/python/gudhi/representations/kernel_methods.py
+++ b/src/python/gudhi/representations/kernel_methods.py
@@ -67,7 +67,8 @@ def pairwise_persistence_diagram_kernels(X, Y=None, kernel="sliced_wasserstein",
Parameters:
X (list of n numpy arrays of shape (numx2)): first list of persistence diagrams.
Y (list of m numpy arrays of shape (numx2)): second list of persistence diagrams (optional). If None, pairwise kernel values are computed from the first list only.
- kernel: kernel to use. It can be either a string ("sliced_wasserstein", "persistence_scale_space", "persistence_weighted_gaussian", "persistence_fisher") or a function taking two numpy arrays of shape (nx2) and (mx2) as inputs.
+ kernel: kernel to use. It can be either a string ("sliced_wasserstein", "persistence_scale_space", "persistence_weighted_gaussian", "persistence_fisher") or a function taking two numpy arrays of shape (nx2) and (mx2) as inputs. If it is a function, make sure that it is symmetric.
+ **kwargs: optional keyword parameters. Any further parameters are passed directly to the kernel function. See the docs of the various kernel classes in this module.
Returns:
numpy array of shape (nxm): kernel matrix.
diff --git a/src/python/gudhi/representations/metrics.py b/src/python/gudhi/representations/metrics.py
index a4bf19a6..ce416fb1 100644
--- a/src/python/gudhi/representations/metrics.py
+++ b/src/python/gudhi/representations/metrics.py
@@ -32,11 +32,9 @@ def _sliced_wasserstein_distance(D1, D2, num_directions):
thetas = np.linspace(-np.pi/2, np.pi/2, num=num_directions+1)[np.newaxis,:-1]
lines = np.concatenate([np.cos(thetas), np.sin(thetas)], axis=0)
approx1 = np.matmul(D1, lines)
- diag_proj1 = (1./2) * np.ones((2,2))
- approx_diag1 = np.matmul(np.matmul(D1, diag_proj1), lines)
+ approx_diag1 = np.matmul(np.broadcast_to(D1.sum(-1,keepdims=True)/2,(len(D1),2)), lines)
approx2 = np.matmul(D2, lines)
- diag_proj2 = (1./2) * np.ones((2,2))
- approx_diag2 = np.matmul(np.matmul(D2, diag_proj2), lines)
+ approx_diag2 = np.matmul(np.broadcast_to(D2.sum(-1,keepdims=True)/2,(len(D2),2)), lines)
A = np.sort(np.concatenate([approx1, approx_diag2], axis=0), axis=0)
B = np.sort(np.concatenate([approx2, approx_diag1], axis=0), axis=0)
L1 = np.sum(np.abs(A-B), axis=0)
@@ -143,7 +141,8 @@ def pairwise_persistence_diagram_distances(X, Y=None, metric="bottleneck", **kwa
Parameters:
X (list of n numpy arrays of shape (numx2)): first list of persistence diagrams.
Y (list of m numpy arrays of shape (numx2)): second list of persistence diagrams (optional). If None, pairwise distances are computed from the first list only.
- metric: distance to use. It can be either a string ("sliced_wasserstein", "wasserstein", "hera_wasserstein" (Wasserstein distance computed with Hera---note that Hera is also used for the default option "wasserstein"), "pot_wasserstein" (Wasserstein distance computed with POT), "bottleneck", "persistence_fisher") or a symmetric function taking two numpy arrays of shape (nx2) and (mx2) as inputs.
+ metric: distance to use. It can be either a string ("sliced_wasserstein", "wasserstein", "hera_wasserstein" (Wasserstein distance computed with Hera---note that Hera is also used for the default option "wasserstein"), "pot_wasserstein" (Wasserstein distance computed with POT), "bottleneck", "persistence_fisher") or a function taking two numpy arrays of shape (nx2) and (mx2) as inputs. If it is a function, make sure that it is symmetric and that it outputs 0 if called on the same two arrays.
+ **kwargs: optional keyword parameters. Any further parameters are passed directly to the distance function. See the docs of the various distance classes in this module.
Returns:
numpy array of shape (nxm): distance matrix