summaryrefslogtreecommitdiff
path: root/src/python
diff options
context:
space:
mode:
Diffstat (limited to 'src/python')
-rw-r--r--src/python/CMakeLists.txt4
-rw-r--r--src/python/doc/installation.rst11
-rw-r--r--src/python/doc/perslay_params.md95
-rw-r--r--src/python/doc/representations.rst77
-rw-r--r--src/python/doc/representations_sum.inc24
-rw-r--r--src/python/gudhi/tensorflow/__init__.py3
-rw-r--r--src/python/gudhi/tensorflow/perslay.py284
-rw-r--r--src/python/test/test_perslay.py147
8 files changed, 623 insertions, 22 deletions
diff --git a/src/python/CMakeLists.txt b/src/python/CMakeLists.txt
index 5f323935..643c3c91 100644
--- a/src/python/CMakeLists.txt
+++ b/src/python/CMakeLists.txt
@@ -290,8 +290,8 @@ if(PYTHONINTERP_FOUND)
# Other .py files
file(COPY "gudhi/persistence_graphical_tools.py" DESTINATION "${CMAKE_CURRENT_BINARY_DIR}/gudhi")
file(COPY "gudhi/representations" DESTINATION "${CMAKE_CURRENT_BINARY_DIR}/gudhi/")
- file(COPY "gudhi/wasserstein" DESTINATION "${CMAKE_CURRENT_BINARY_DIR}/gudhi")
- file(COPY "gudhi/tensorflow" DESTINATION "${CMAKE_CURRENT_BINARY_DIR}/gudhi")
+ file(COPY "gudhi/tensorflow" DESTINATION "${CMAKE_CURRENT_BINARY_DIR}/gudhi/")
+ file(COPY "gudhi/wasserstein" DESTINATION "${CMAKE_CURRENT_BINARY_DIR}/gudhi")
file(COPY "gudhi/point_cloud" DESTINATION "${CMAKE_CURRENT_BINARY_DIR}/gudhi")
file(COPY "gudhi/clustering" DESTINATION "${CMAKE_CURRENT_BINARY_DIR}/gudhi" FILES_MATCHING PATTERN "*.py")
file(COPY "gudhi/weighted_rips_complex.py" DESTINATION "${CMAKE_CURRENT_BINARY_DIR}/gudhi")
diff --git a/src/python/doc/installation.rst b/src/python/doc/installation.rst
index 4eefd415..639378ed 100644
--- a/src/python/doc/installation.rst
+++ b/src/python/doc/installation.rst
@@ -371,6 +371,14 @@ PyTorch
`PyTorch <https://pytorch.org/>`_ is currently only used as a dependency of
`PyKeOps`_, and in some tests.
+TensorFlow
+----------
+
+:class:`~gudhi.tensorflow.perslay` from the :doc:`persistence representations </representations>` module
+requires `TensorFlow <https://https://www.tensorflow.org/install/>`_.
+
+`TensorFlow <https://www.tensorflow.org>`_ is also used in some automatic differentiation tests.
+
Scikit-learn
------------
@@ -393,6 +401,8 @@ mathematics, science, and engineering.
:class:`~gudhi.point_cloud.knn.KNearestNeighbors` can use the Python package
`SciPy <http://scipy.org>`_ as a backend if explicitly requested.
+<<<<<<< HEAD
+=======
TensorFlow
----------
@@ -401,6 +411,7 @@ and :doc:`Rips complex </rips_complex_tflow_itf_ref>` modules require `TensorFlo
for incorporating them in neural nets.
`TensorFlow <https://www.tensorflow.org>`_ is also used in some automatic differentiation tests.
+>>>>>>> 3e0e47b81ba488f6893933d8685fc1e7eec0e501
Bug reports and contributions
*****************************
diff --git a/src/python/doc/perslay_params.md b/src/python/doc/perslay_params.md
new file mode 100644
index 00000000..58537939
--- /dev/null
+++ b/src/python/doc/perslay_params.md
@@ -0,0 +1,95 @@
+PersLay parameters
+------------------
+
+In the following description of PersLay parameters, each parameter, or dictionary key, that contains `_init` in its name is optimized and learned by PersLay during training. If you do not want to optimize the vectorization, set the keys **train_vect** and **train_weight** to False.
+
+* The following keys are mandatory:
+ + layer
+ Either "PermutationEquivariant", "Image", "Landscape", "BettiCurve", "Entropy", "Exponential", "Rational" or "RationalHat". Type of the PersLay layer. "Image" is for `persistence images <https://arxiv.org/abs/1507.06217>`_, "Landscape" is for `persistence landscapes <http://www.jmlr.org/papers/volume16/bubenik15a/bubenik15a.pdf>`_, "Exponential", "Rational" and "RationalHat" are for `structure elements <http://jmlr.org/beta/papers/v20/18-358.html>`_, "PermutationEquivariant" is for the original DeepSet layer, defined in `this article <https://arxiv.org/abs/1703.06114>`_, "BettiCurve" is for `Betti curves <https://www.jstage.jst.go.jp/article/tjsai/32/3/32_D-G72/_pdf>`_ and "Entropy" is for `entropy <https://arxiv.org/abs/1803.08304>`_.
+ + perm_op
+ Either "sum", "mean", "max", "topk". Permutation invariant operation.
+ + keep
+ Number of top values to keep. Used only if **perm_op** is "topk".
+ + pweight
+ Either "power", "grid", "gmix" or None. Weight function to be applied on persistence diagram points. If "power", this function is a (trainable) coefficient times the distances to the diagonal of the points to a certain power. If "grid", this function is piecewise-constant and defined with pixel values of a grid. If "gmix", this function is defined as a mixture of Gaussians. If None, no weighting is applied.
+ + final_model
+ A Tensorflow / Keras model used to postprocess the persistence diagrams in each channel. Use "identity" if you don't want to postprocess.
+* Depending on what **pweight** is, the following additional keys are requested:
+ + if **pweight** is "power":
+ - pweight_init
+ Initializer of the coefficient of the power weight function. It can be either a single value, or a random initializer from tensorflow, such as tensorflow.random_uniform_initializer(0., 1.).
+ - pweight_power
+ Integer used for exponentiating the distances to the diagonal of the persistence diagram points.
+ + if **pweight** is "grid":
+ - pweight_size
+ Grid size of the grid weight function. It is a tuple of integer values, such as (10,10).
+ - pweight_bnds
+ Grid boundaries of the grid weight function. It is a tuple containing two tuples, each containing the minimum and maximum values of each axis of the plane. Example: ((-0.01, 1.01), (-0.01, 1.01)).
+ - pweight_init
+ Initializer for the pixel values of the grid weight function. It can be either a numpy array of values, or a random initializer from tensorflow, such as tensorflow.random_uniform_initializer(0., 1.).
+ + if **pweight** is "gmix":
+ - pweight_num
+ Number of Gaussian functions of the mixture of Gaussians weight function.
+ - pweight_init
+ Initializer of the means and variances of the mixture of Gaussians weight function. It can be either a numpy array of values, or a random initializer from tensorflow, such as tensorflow.random_uniform_initializer(0., 1.).
+* Depending on what **layer** is, the following additional keys are requested:
+ + if **layer** is "PermutationEquivariant":
+ - lpeq
+ Sequence of permutation equivariant operations, as defined in [the DeepSet article](). It is a list of tuples of the form (*dim*, *operation*). Each tuple defines a permutation equivariant function of dimension *dim* and second permutation operation *operation* (string, either "max", "min", "sum" or None). Second permutation operation is optional and is not applied if *operation* is set to None. Example: [(150, "max"), (75, None)].
+ - lweight_init
+ Initializer for the weight matrices of the permutation equivariant operations. It can be either a numpy array of values, or a random initializer from tensorflow, such as tensorflow.random_uniform_initializer(0., 1.).
+ - lbias_init
+ Initializer for the biases of the permutation equivariant operations. It can be either a numpy array of values, or a random initializer from tensorflow, such as tensorflow.random_uniform_initializer(0., 1.).
+ - lgamma_init
+ Initializer for the Gamma matrices of the permutation equivariant operations. It can be either a numpy array of values, or a random initializer from tensorflow, such as tensorflow.random_uniform_initializer(0., 1.).
+ + if **layer** is "Image":
+ - image_size
+ Persistence image size. It is a tuple of integer values, such as (10,10).
+ - image_bnds
+ Persistence image boundaries. It is a tuple containing two tuples, each containing the minimum and maximum values of each axis of the plane. Example: ((-0.01, 1.01), (-0.01, 1.01)).
+ - lvariance_init
+ Initializer for the bandwidths of the Gaussian functions centered on the persistence image pixels. It can be either a single value, or a random initializer from tensorflow, such as tensorflow.random_uniform_initializer(0., 3.).
+ + if **layer** is "Landscape":
+ - lsample_num
+ Number of samples of the diagonal that will be evaluated on the persistence landscapes.
+ - lsample_init
+ Initializer of the samples of the diagonal. It can be either a numpy array of values, or a random initializer from tensorflow, such as tensorflow.random_uniform_initializer(0., 1.).
+ + if **layer** is "BettiCurve":
+ - lsample_num
+ Number of samples of the diagonal that will be evaluated on the Betti curves.
+ - lsample_init
+ Initializer of the samples of the diagonal. It can be either a numpy array of values, or a random initializer from tensorflow, such as tensorflow.random_uniform_initializer(0., 1.).
+ - theta
+ Sigmoid parameter used for approximating the piecewise constant functions associated to the persistence diagram points.
+ + if **layer** is "Entropy":
+ - lsample_num
+ Number of samples on the diagonal that will be evaluated on the persistence entropies.
+ - lsample_init
+ Initializer of the samples of the diagonal. It can be either a numpy array of values, or a random initializer from tensorflow, such as tensorflow.random_uniform_initializer(0., 1.).
+ - theta
+ Sigmoid parameter used for approximating the piecewise constant functions associated to the persistence diagram points.
+ + if **layer** is "Exponential":
+ - lnum
+ Number of exponential structure elements that will be evaluated on the persistence diagram points.
+ - lmean_init
+ Initializer of the means of the exponential structure elements. It can be either a numpy array of values, or a random initializer from tensorflow, such as tensorflow.random_uniform_initializer(0., 1.).
+ - lvariance_init
+ Initializer of the bandwidths of the exponential structure elements. It can be either a numpy array of values, or a random initializer from tensorflow, such as tensorflow.random_uniform_initializer(3., 3.).
+ + if **layer** is "Rational":
+ - lnum
+ Number of rational structure elements that will be evaluated on the persistence diagram points.
+ - lmean_init
+ Initializer of the means of the rational structure elements. It can be either a numpy array of values, or a random initializer from tensorflow, such as tensorflow.random_uniform_initializer(0., 1.).
+ - lvariance_init
+ Initializer of the bandwidths of the rational structure elements. It can be either a numpy array of values, or a random initializer from tensorflow, such as tensorflow.random_uniform_initializer(3., 3.).
+ - lalpha_init
+ Initializer of the exponents of the rational structure elements. It can be either a numpy array of values, or a random initializer from tensorflow, such as tensorflow.random_uniform_initializer(3., 3.).
+ + if **layer** is "RationalHat":
+ - lnum
+ Number of rational hat structure elements that will be evaluated on the persistence diagram points.
+ - lmean_init
+ Initializer of the means of the rational hat structure elements. It can be either a numpy array of values, or a random initializer from tensorflow, such as tensorflow.random_uniform_initializer(0., 1.).
+ - lr_init
+ Initializer of the threshold of the rational hat structure elements. It can be either a numpy array of values, or a random initializer from tensorflow, such as tensorflow.random_uniform_initializer(3., 3.).
+ - q
+ Norm parameter.
diff --git a/src/python/doc/representations.rst b/src/python/doc/representations.rst
index b0477197..2d66fa68 100644
--- a/src/python/doc/representations.rst
+++ b/src/python/doc/representations.rst
@@ -8,10 +8,16 @@ Representations manual
.. include:: representations_sum.inc
-This module, originally available at https://github.com/MathieuCarriere/sklearn-tda and named sklearn_tda, aims at bridging the gap between persistence diagrams and machine learning, by providing implementations of most of the vector representations for persistence diagrams in the literature, in a scikit-learn format. More specifically, it provides tools, using the scikit-learn standard interface, to compute distances and kernels on persistence diagrams, and to convert these diagrams into vectors in Euclidean space.
+This module aims at bridging the gap between persistence diagrams and machine learning, by providing implementations of most of the vector representations for persistence diagrams in the literature, in a scikit-learn format. More specifically, it provides tools, using the scikit-learn standard interface, to compute distances and kernels on persistence diagrams, and to convert these diagrams into vectors in Euclidean space. Moreover, this module also contains `PersLay <http://proceedings.mlr.press/v108/carriere20a.html>`_, which is a general neural network layer for performing deep learning with persistence diagrams, implemented in TensorFlow.
A diagram is represented as a numpy array of shape (n,2), as can be obtained from :func:`~gudhi.SimplexTree.persistence_intervals_in_dimension` for instance. Points at infinity are represented as a numpy array of shape (n,1), storing only the birth time. The classes in this module can handle several persistence diagrams at once. In that case, the diagrams are provided as a list of numpy arrays. Note that it is not necessary for the diagrams to have the same number of points, i.e., for the corresponding arrays to have the same number of rows: all classes can handle arrays with different shapes.
+This `notebook <https://github.com/GUDHI/TDA-tutorial/blob/master/Tuto-GUDHI-representations.ipynb>`_ explains how to
+efficiently combine machine learning and topological data analysis with the
+:doc:`representations module<representations>` in a scikit-learn fashion. This `notebook <https://github.com/MathieuCarriere/tda-tutorials/blob/perslay/Tuto-GUDHI-perslay-expe.ipynb>`_
+and `this one <https://github.com/MathieuCarriere/tda-tutorials/blob/perslay/Tuto-GUDHI-perslay-visu.ipynb>`_ explain how to use PersLay.
+
+
Examples
--------
@@ -30,8 +36,6 @@ This example computes the first two Landscapes associated to a persistence diagr
l=Landscape(num_landscapes=2,resolution=10).fit_transform(diags)
print(l)
-The output is:
-
.. testoutput::
[[1.02851895 2.05703791 2.57129739 1.54277843 0.89995409 1.92847304
@@ -45,13 +49,62 @@ Various kernels
This small example is also provided
:download:`diagram_vectorizations_distances_kernels.py <../example/diagram_vectorizations_distances_kernels.py>`
-Machine Learning and Topological Data Analysis
-^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+PersLay
+^^^^^^^
-This `notebook <https://github.com/GUDHI/TDA-tutorial/blob/master/Tuto-GUDHI-representations.ipynb>`_ explains how to
-efficiently combine machine learning and topological data analysis with the
-:doc:`representations module<representations>`.
+.. testcode::
+ import numpy as np
+ import tensorflow as tf
+ from sklearn.preprocessing import MinMaxScaler
+ import gudhi.representations as gdr
+ import gudhi.tensorflow as gdtf
+
+ diagrams = [np.array([[0.,4.],[1.,2.],[3.,8.],[6.,8.]])]
+ diagrams = gdr.DiagramScaler(use=True, scalers=[([0,1], MinMaxScaler())]).fit_transform(diagrams)
+ diagrams = tf.RaggedTensor.from_tensor(tf.constant(diagrams, dtype=tf.float32))
+
+ rho = tf.identity
+ phi = gdtf.GaussianPerslayPhi((100, 100), ((-.5, 1.5), (-.5, 1.5)), .1)
+ weight = gdtf.PowerPerslayWeight(1.,0.)
+ perm_op = tf.math.reduce_sum
+
+ perslay = gdtf.Perslay(phi=phi, weight=weight, perm_op=perm_op, rho=rho)
+ vectors = perslay(diagrams)
+ print(vectors)
+
+.. testoutput::
+
+ tf.Tensor(
+ [[[[1.7266072e-16]
+ [4.1706043e-09]
+ [1.1336876e-08]
+ [8.5738821e-12]
+ [2.1243891e-14]]
+
+ [[4.1715076e-09]
+ [1.0074080e-01]
+ [2.7384272e-01]
+ [3.0724244e-02]
+ [7.6157507e-05]]
+
+ [[8.0382870e-06]
+ [1.5802664e+00]
+ [8.2997030e-01]
+ [1.2395413e+01]
+ [3.0724116e-02]]
+
+ [[8.0269419e-06]
+ [1.3065740e+00]
+ [9.0923014e+00]
+ [6.1664842e-02]
+ [1.3949171e-06]]
+
+ [[9.0331329e-13]
+ [1.4954816e-07]
+ [1.5145997e-04]
+ [1.0205092e-06]
+ [7.8093526e-16]]]], shape=(1, 5, 5, 1), dtype=float32)
Preprocessing
-------------
@@ -80,3 +133,11 @@ Metrics
:members:
:special-members:
:show-inheritance:
+
+PersLay
+-------
+.. automodule:: gudhi.tensorflow.perslay
+ :members:
+ :special-members:
+ :show-inheritance:
+
diff --git a/src/python/doc/representations_sum.inc b/src/python/doc/representations_sum.inc
index 4298aea9..cce91975 100644
--- a/src/python/doc/representations_sum.inc
+++ b/src/python/doc/representations_sum.inc
@@ -1,14 +1,16 @@
.. table::
:widths: 30 40 30
- +------------------------------------------------------------------+----------------------------------------------------------------+-------------------------------------------------------------+
- | .. figure:: | Vectorizations, distances and kernels that work on persistence | :Author: Mathieu Carrière, Martin Royer |
- | img/sklearn-tda.png | diagrams, compatible with scikit-learn. | |
- | | | :Since: GUDHI 3.1.0 |
- | | | |
- | | | :License: MIT |
- | | | |
- | | | :Requires: `Scikit-learn <installation.html#scikit-learn>`_ |
- +------------------------------------------------------------------+----------------------------------------------------------------+-------------------------------------------------------------+
- | * :doc:`representations` |
- +------------------------------------------------------------------+------------------------------------------------------------------------------------------------------------------------------+
+ +------------------------------------------------------------------+----------------------------------------------------------------+------------------------------------------------------------------------------------------------------------+
+ | .. figure:: | Vectorizations, distances and kernels that work on persistence | :Author: Mathieu Carrière, Martin Royer |
+ | img/sklearn-tda.png | diagrams, compatible with scikit-learn and tensorflow. | |
+ | | | :Since: GUDHI 3.1.0 |
+ | | | |
+ | | | :License: MIT |
+ | | | |
+ | | | :Requires: `Scikit-learn <installation.html#scikit-learn>`_, `TensorFlow <installation.html#tensorflow>`_ |
+ | | | |
+ +------------------------------------------------------------------+----------------------------------------------------------------+------------------------------------------------------------------------------------------------------------+
+ | * :doc:`representations` |
+ +------------------------------------------------------------------+-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
+
diff --git a/src/python/gudhi/tensorflow/__init__.py b/src/python/gudhi/tensorflow/__init__.py
index 1599cf52..fe01b9cc 100644
--- a/src/python/gudhi/tensorflow/__init__.py
+++ b/src/python/gudhi/tensorflow/__init__.py
@@ -1,5 +1,6 @@
from .cubical_layer import CubicalLayer
from .lower_star_simplex_tree_layer import LowerStarSimplexTreeLayer
from .rips_layer import RipsLayer
+from .perslay import *
-__all__ = ["LowerStarSimplexTreeLayer", "RipsLayer", "CubicalLayer"]
+__all__ = ["Perslay", "GridPerslayWeight", "GaussianMixturePerslayWeight", "PowerPerslayWeight", "GaussianPerslayPhi", "TentPerslayPhi", "FlatPerslayPhi", "LowerStarSimplexTreeLayer", "RipsLayer", "CubicalLayer"]
diff --git a/src/python/gudhi/tensorflow/perslay.py b/src/python/gudhi/tensorflow/perslay.py
new file mode 100644
index 00000000..69acc529
--- /dev/null
+++ b/src/python/gudhi/tensorflow/perslay.py
@@ -0,0 +1,284 @@
+# This file is part of the Gudhi Library - https://gudhi.inria.fr/ - which is released under MIT.
+# See file LICENSE or go to https://gudhi.inria.fr/licensing/ for full license details.
+# Author(s): Mathieu Carrière
+#
+# Copyright (C) 2021 Inria
+#
+# Modification(s):
+# - YYYY/MM Author: Description of the modification
+
+import tensorflow as tf
+import numpy as np
+
+class GridPerslayWeight(tf.keras.layers.Layer):
+ """
+ This is a class for computing a differentiable weight function for persistence diagram points. This function is defined from an array that contains its values on a 2D grid.
+ """
+ def __init__(self, grid, grid_bnds, **kwargs):
+ """
+ Constructor for the GridPerslayWeight class.
+
+ Parameters:
+ grid (n x n numpy array): grid of values.
+ grid_bnds (2 x 2 numpy array): boundaries of the grid, of the form [[min_x, max_x], [min_y, max_y]].
+ """
+ super().__init__(dynamic=True, **kwargs)
+ self.grid = tf.Variable(initial_value=grid, trainable=True)
+ self.grid_bnds = grid_bnds
+
+ def build(self, input_shape):
+ return self
+
+ def call(self, diagrams):
+ """
+ Apply GridPerslayWeight on a ragged tensor containing a list of persistence diagrams.
+
+ Parameters:
+ diagrams (n x None x 2): ragged tensor containing n persistence diagrams. The second dimension is ragged since persistence diagrams can have different numbers of points.
+
+ Returns:
+ weight (n x None): ragged tensor containing the weights of the points in the n persistence diagrams. The second dimension is ragged since persistence diagrams can have different numbers of points.
+ """
+ grid_shape = self.grid.shape
+ indices = []
+ for dim in range(2):
+ [m,M] = self.grid_bnds[dim]
+ coords = tf.expand_dims(diagrams[:,:,dim],-1)
+ ids = grid_shape[dim]*(coords-m)/(M-m)
+ indices.append(tf.cast(ids, tf.int32))
+ weight = tf.gather_nd(params=self.grid, indices=tf.concat(indices, axis=2))
+ return weight
+
+class GaussianMixturePerslayWeight(tf.keras.layers.Layer):
+ """
+ This is a class for computing a differentiable weight function for persistence diagram points. This function is defined from a mixture of Gaussian functions.
+ """
+ def __init__(self, gaussians, **kwargs):
+ """
+ Constructor for the GridPerslayWeight class.
+
+ Parameters:
+ gaussians (4 x n numpy array): parameters of the n Gaussian functions, of the form transpose([[mu_x^1, mu_y^1, sigma_x^1, sigma_y^1], ..., [mu_x^n, mu_y^n, sigma_x^n, sigma_y^n]]).
+ """
+ super().__init__(dynamic=True, **kwargs)
+ self.W = tf.Variable(initial_value=gaussians, trainable=True)
+
+ def build(self, input_shape):
+ return self
+
+ def call(self, diagrams):
+ """
+ Apply GaussianMixturePerslayWeight on a ragged tensor containing a list of persistence diagrams.
+
+ Parameters:
+ diagrams (n x None x 2): ragged tensor containing n persistence diagrams. The second dimension is ragged since persistence diagrams can have different numbers of points.
+
+ Returns:
+ weight (n x None): ragged tensor containing the weights of the points in the n persistence diagrams. The second dimension is ragged since persistence diagrams can have different numbers of points.
+ """
+ means = tf.expand_dims(tf.expand_dims(self.W[:2,:],0),0)
+ variances = tf.expand_dims(tf.expand_dims(self.W[2:,:],0),0)
+ diagrams = tf.expand_dims(diagrams, -1)
+ dists = tf.math.multiply(tf.math.square(diagrams-means), 1/tf.math.square(variances))
+ weight = tf.math.reduce_sum(tf.math.exp(tf.math.reduce_sum(-dists, axis=2)), axis=2)
+ return weight
+
+class PowerPerslayWeight(tf.keras.layers.Layer):
+ """
+ This is a class for computing a differentiable weight function for persistence diagram points. This function is defined as a constant multiplied by the distance to the diagonal of the persistence diagram point raised to some power.
+ """
+ def __init__(self, constant, power, **kwargs):
+ """
+ Constructor for the PowerPerslayWeight class.
+
+ Parameters:
+ constant (float): constant value.
+ power (float): power applied to the distance to the diagonal.
+ """
+ super().__init__(dynamic=True, **kwargs)
+ self.constant = tf.Variable(initial_value=constant, trainable=True)
+ self.power = power
+
+ def build(self, input_shape):
+ return self
+
+ def call(self, diagrams):
+ """
+ Apply PowerPerslayWeight on a ragged tensor containing a list of persistence diagrams.
+
+ Parameters:
+ diagrams (n x None x 2): ragged tensor containing n persistence diagrams. The second dimension is ragged since persistence diagrams can have different numbers of points.
+
+ Returns:
+ weight (n x None): ragged tensor containing the weights of the points in the n persistence diagrams. The second dimension is ragged since persistence diagrams can have different numbers of points.
+ """
+ weight = self.constant * tf.math.pow(tf.math.abs(diagrams[:,:,1]-diagrams[:,:,0]), self.power)
+ return weight
+
+
+class GaussianPerslayPhi(tf.keras.layers.Layer):
+ """
+ This is a class for computing a transformation function for persistence diagram points. This function turns persistence diagram points into 2D Gaussian functions centered on the points, that are then evaluated on a regular 2D grid.
+ """
+ def __init__(self, image_size, image_bnds, variance, **kwargs):
+ """
+ Constructor for the GaussianPerslayPhi class.
+
+ Parameters:
+ image_size (int numpy array): number of grid elements on each grid axis, of the form [n_x, n_y].
+ image_bnds (2 x 2 numpy array): boundaries of the grid, of the form [[min_x, max_x], [min_y, max_y]].
+ variance (float): variance of the Gaussian functions.
+ """
+ super().__init__(dynamic=True, **kwargs)
+ self.image_size = image_size
+ self.image_bnds = image_bnds
+ self.variance = tf.Variable(initial_value=variance, trainable=True)
+
+ def build(self, input_shape):
+ return self
+
+ def call(self, diagrams):
+ """
+ Apply GaussianPerslayPhi on a ragged tensor containing a list of persistence diagrams.
+
+ Parameters:
+ diagrams (n x None x 2): ragged tensor containing n persistence diagrams. The second dimension is ragged since persistence diagrams can have different numbers of points.
+
+ Returns:
+ output (n x None x image_size x image_size x 1): ragged tensor containing the evaluations on the 2D grid of the 2D Gaussian functions corresponding to the persistence diagram points, in the form of a 2D image with 1 channel that can be processed with, e.g., convolutional layers. The second dimension is ragged since persistence diagrams can have different numbers of points.
+ output_shape (int numpy array): shape of the output tensor.
+ """
+ diagrams_d = tf.concat([diagrams[:,:,0:1], diagrams[:,:,1:2]-diagrams[:,:,0:1]], axis=2)
+ step = [(self.image_bnds[i][1]-self.image_bnds[i][0])/self.image_size[i] for i in range(2)]
+ coords = [tf.range(self.image_bnds[i][0], self.image_bnds[i][1], step[i]) for i in range(2)]
+ M = tf.meshgrid(*coords)
+ mu = tf.concat([tf.expand_dims(tens, 0) for tens in M], axis=0)
+ for _ in range(2):
+ diagrams_d = tf.expand_dims(diagrams_d,-1)
+ dists = tf.math.square(diagrams_d-mu) / (2*tf.math.square(self.variance))
+ gauss = tf.math.exp(tf.math.reduce_sum(-dists, axis=2)) / (2*np.pi*tf.math.square(self.variance))
+ output = tf.expand_dims(gauss,-1)
+ output_shape = M[0].shape + tuple([1])
+ return output, output_shape
+
+class TentPerslayPhi(tf.keras.layers.Layer):
+ """
+ This is a class for computing a transformation function for persistence diagram points. This function turns persistence diagram points into 1D tent functions (linearly increasing on the first half of the bar corresponding to the point from zero to half of the bar length, linearly decreasing on the second half and zero elsewhere) centered on the points, that are then evaluated on a regular 1D grid.
+ """
+ def __init__(self, samples, **kwargs):
+ """
+ Constructor for the GaussianPerslayPhi class.
+
+ Parameters:
+ samples (float numpy array): grid elements on which to evaluate the tent functions, of the form [x_1, ..., x_n].
+ """
+ super().__init__(dynamic=True, **kwargs)
+ self.samples = tf.Variable(initial_value=samples, trainable=True)
+
+ def build(self, input_shape):
+ return self
+
+ def call(self, diagrams):
+ """
+ Apply TentPerslayPhi on a ragged tensor containing a list of persistence diagrams.
+
+ Parameters:
+ diagrams (n x None x 2): ragged tensor containing n persistence diagrams. The second dimension is ragged since persistence diagrams can have different numbers of points.
+
+ Returns:
+ output (n x None x num_samples): ragged tensor containing the evaluations on the 1D grid of the 1D tent functions corresponding to the persistence diagram points. The second dimension is ragged since persistence diagrams can have different numbers of points.
+ output_shape (int numpy array): shape of the output tensor.
+ """
+ samples_d = tf.expand_dims(tf.expand_dims(self.samples,0),0)
+ xs, ys = diagrams[:,:,0:1], diagrams[:,:,1:2]
+ output = tf.math.maximum(.5*(ys-xs) - tf.math.abs(samples_d-.5*(ys+xs)), np.array([0.]))
+ output_shape = self.samples.shape
+ return output, output_shape
+
+class FlatPerslayPhi(tf.keras.layers.Layer):
+ """
+ This is a class for computing a transformation function for persistence diagram points. This function turns persistence diagram points into 1D constant functions (that evaluate to half of the bar length on the bar corresponding to the point and zero elsewhere), that are then evaluated on a regular 1D grid.
+ """
+ def __init__(self, samples, theta, **kwargs):
+ """
+ Constructor for the FlatPerslayPhi class.
+
+ Parameters:
+ samples (float numpy array): grid elements on which to evaluate the constant functions, of the form [x_1, ..., x_n].
+ theta (float): sigmoid parameter used to approximate the constant function with a differentiable sigmoid function. The bigger the theta, the closer to a constant function the output will be.
+ """
+ super().__init__(dynamic=True, **kwargs)
+ self.samples = tf.Variable(initial_value=samples, trainable=True)
+ self.theta = tf.Variable(initial_value=theta, trainable=True)
+
+ def build(self, input_shape):
+ return self
+
+ def call(self, diagrams):
+ """
+ Apply FlatPerslayPhi on a ragged tensor containing a list of persistence diagrams.
+
+ Parameters:
+ diagrams (n x None x 2): ragged tensor containing n persistence diagrams. The second dimension is ragged since persistence diagrams can have different numbers of points.
+
+ Returns:
+ output (n x None x num_samples): ragged tensor containing the evaluations on the 1D grid of the 1D constant functions corresponding to the persistence diagram points. The second dimension is ragged since persistence diagrams can have different numbers of points.
+ output_shape (int numpy array): shape of the output tensor.
+ """
+ samples_d = tf.expand_dims(tf.expand_dims(self.samples,0),0)
+ xs, ys = diagrams[:,:,0:1], diagrams[:,:,1:2]
+ output = 1./(1.+tf.math.exp(-self.theta*(.5*(ys-xs)-tf.math.abs(samples_d-.5*(ys+xs)))))
+ output_shape = self.samples.shape
+ return output, output_shape
+
+class Perslay(tf.keras.layers.Layer):
+ """
+ This is a TensorFlow layer for vectorizing persistence diagrams in a differentiable way within a neural network. This function implements the PersLay equation, see `the corresponding article <http://proceedings.mlr.press/v108/carriere20a.html>`_.
+ """
+ def __init__(self, weight, phi, perm_op, rho, **kwargs):
+ """
+ Constructor for the Perslay class.
+
+ Parameters:
+ weight (function): weight function for the persistence diagram points. Can be either :class:`~gudhi.tensorflow.GridPerslayWeight`, :class:`~gudhi.tensorflow.GaussianMixturePerslayWeight`, :class:`~gudhi.tensorflow.PowerPerslayWeight`, or a custom function.
+ phi (function): transformation function for the persistence diagram points. Can be either :class:`~gudhi.tensorflow.GaussianPerslayPhi`, :class:`~gudhi.tensorflow.TentPerslayPhi`, :class:`~gudhi.tensorflow.FlatPerslayPhi`, or a custom function.
+ perm_op (function): permutation invariant function, such as `tf.math.reduce_sum`, `tf.math.reduce_mean`, `tf.math.reduce_max`, `tf.math.reduce_min`, or a custom function. If perm_op is the string "topk" (where k is a number), this function will be computed as `tf.math.top_k` with parameter `int(k)`.
+ rho (function): postprocessing function that is applied after the permutation invariant operation. Can be any TensorFlow layer.
+ """
+ super().__init__(dynamic=True, **kwargs)
+ self.weight = weight
+ self.phi = phi
+ self.pop = perm_op
+ self.rho = rho
+
+ def build(self, input_shape):
+ return self
+
+ def call(self, diagrams):
+ """
+ Apply Perslay on a ragged tensor containing a list of persistence diagrams.
+
+ Parameters:
+ diagrams (n x None x 2): ragged tensor containing n persistence diagrams. The second dimension is ragged since persistence diagrams can have different numbers of points.
+
+ Returns:
+ vector (n x output_shape): tensor containing the vectorizations of the persistence diagrams.
+ """
+ vector, dim = self.phi(diagrams)
+ weight = self.weight(diagrams)
+ for _ in range(len(dim)):
+ weight = tf.expand_dims(weight, -1)
+ vector = tf.math.multiply(vector, weight)
+
+ permop = self.pop
+ if type(permop) == str and permop[:3] == 'top':
+ k = int(permop[3:])
+ vector = vector.to_tensor(default_value=-1e10)
+ vector = tf.math.top_k(tf.transpose(vector, perm=[0, 2, 1]), k=k).values
+ vector = tf.reshape(vector, [-1,k*dim[0]])
+ else:
+ vector = permop(vector, axis=1)
+
+ vector = self.rho(vector)
+
+ return vector
diff --git a/src/python/test/test_perslay.py b/src/python/test/test_perslay.py
new file mode 100644
index 00000000..bf0d54af
--- /dev/null
+++ b/src/python/test/test_perslay.py
@@ -0,0 +1,147 @@
+import numpy as np
+import tensorflow as tf
+from sklearn.preprocessing import MinMaxScaler
+from gudhi.tensorflow import *
+import gudhi.representations as gdr
+
+def test_gaussian_perslay():
+
+ diagrams = [np.array([[0.,4.],[1.,2.],[3.,8.],[6.,8.]])]
+ diagrams = gdr.DiagramScaler(use=True, scalers=[([0,1], MinMaxScaler())]).fit_transform(diagrams)
+ diagrams = tf.RaggedTensor.from_tensor(tf.constant(diagrams, dtype=tf.float32))
+
+ rho = tf.identity
+ phi = GaussianPerslayPhi((5, 5), ((-.5, 1.5), (-.5, 1.5)), .1)
+ weight = PowerPerslayWeight(1.,0.)
+ perm_op = tf.math.reduce_sum
+
+ perslay = Perslay(phi=phi, weight=weight, perm_op=perm_op, rho=rho)
+ vectors = perslay(diagrams)
+
+ print(vectors.shape)
+
+ assert np.linalg.norm(vectors.numpy() - np.array(
+[[[[1.7266072e-16],
+ [4.1706043e-09],
+ [1.1336876e-08],
+ [8.5738821e-12],
+ [2.1243891e-14]],
+
+ [[4.1715076e-09],
+ [1.0074080e-01],
+ [2.7384272e-01],
+ [3.0724244e-02],
+ [7.6157507e-05]],
+
+ [[8.0382870e-06],
+ [1.5802664e+00],
+ [8.2997030e-01],
+ [1.2395413e+01],
+ [3.0724116e-02]],
+
+ [[8.0269419e-06],
+ [1.3065740e+00],
+ [9.0923014e+00],
+ [6.1664842e-02],
+ [1.3949171e-06]],
+
+ [[9.0331329e-13],
+ [1.4954816e-07],
+ [1.5145997e-04],
+ [1.0205092e-06],
+ [7.8093526e-16]]]]) <= 1e-7)
+
+test_gaussian_perslay()
+
+def test_tent_perslay():
+
+ diagrams = [np.array([[0.,4.],[1.,2.],[3.,8.],[6.,8.]])]
+ diagrams = gdr.DiagramScaler(use=True, scalers=[([0,1], MinMaxScaler())]).fit_transform(diagrams)
+ diagrams = tf.RaggedTensor.from_tensor(tf.constant(diagrams, dtype=tf.float32))
+
+ rho = tf.identity
+ phi = TentPerslayPhi(np.array(np.arange(-1.,2.,.1), dtype=np.float32))
+ weight = PowerPerslayWeight(1.,0.)
+ perm_op = 'top3'
+
+ perslay = Perslay(phi=phi, weight=weight, perm_op=perm_op, rho=rho)
+ vectors = perslay(diagrams)
+
+ assert np.linalg.norm(vectors-np.array([[0., 0., 0., 0., 0., 0.,
+ 0., 0., 0., 0., 0., 0.,
+ 0., 0., 0., 0., 0., 0.,
+ 0., 0., 0., 0., 0., 0.,
+ 0., 0., 0., 0., 0., 0.,
+ 0., 0., 0., 0.09999999, 0., 0.,
+ 0.2, 0.05, 0., 0.19999999, 0., 0.,
+ 0.09999999, 0.02500001, 0., 0.125, 0., 0.,
+ 0.22500002, 0., 0., 0.3, 0., 0.,
+ 0.19999999, 0.05000001, 0., 0.10000002, 0.10000002, 0.,
+ 0., 0., 0., 0., 0., 0.,
+ 0., 0., 0., 0., 0., 0.,
+ 0., 0., 0., 0., 0., 0.,
+ 0., 0., 0., 0., 0., 0.,
+ 0., 0., 0., 0., 0., 0. ]])) <= 1e-7
+
+def test_flat_perslay():
+
+ diagrams = [np.array([[0.,4.],[1.,2.],[3.,8.],[6.,8.]])]
+ diagrams = gdr.DiagramScaler(use=True, scalers=[([0,1], MinMaxScaler())]).fit_transform(diagrams)
+ diagrams = tf.RaggedTensor.from_tensor(tf.constant(diagrams, dtype=tf.float32))
+
+ rho = tf.identity
+ phi = FlatPerslayPhi(np.array(np.arange(-1.,2.,.1), dtype=np.float32), 100.)
+ weight = PowerPerslayWeight(1.,0.)
+ perm_op = tf.math.reduce_sum
+
+ perslay = Perslay(phi=phi, weight=weight, perm_op=perm_op, rho=rho)
+ vectors = perslay(diagrams)
+
+ assert np.linalg.norm(vectors-np.array([[0.0000000e+00, 0.0000000e+00, 1.8048651e-35, 3.9754645e-31, 8.7565101e-27,
+ 1.9287571e-22, 4.2483860e-18, 9.3576392e-14, 2.0611652e-09, 4.5398087e-05,
+ 5.0000376e-01, 1.0758128e+00, 1.9933071e+00, 1.0072457e+00, 1.9240967e+00,
+ 1.4999963e+00, 1.0000458e+00, 1.0066929e+00, 1.9933071e+00, 1.9999092e+00,
+ 1.0000000e+00, 9.0795562e-05, 4.1222914e-09, 1.8715316e-13, 8.4967405e-18,
+ 3.8574998e-22, 1.7512956e-26, 7.9508388e-31, 3.6097302e-35, 0.0000000e+00]]) <= 1e-7)
+
+def test_gmix_weight():
+
+ diagrams = [np.array([[0.,4.],[1.,2.],[3.,8.],[6.,8.]])]
+ diagrams = gdr.DiagramScaler(use=True, scalers=[([0,1], MinMaxScaler())]).fit_transform(diagrams)
+ diagrams = tf.RaggedTensor.from_tensor(tf.constant(diagrams, dtype=tf.float32))
+
+ rho = tf.identity
+ phi = FlatPerslayPhi(np.array(np.arange(-1.,2.,.1), dtype=np.float32), 100.)
+ weight = GaussianMixturePerslayWeight(np.array([[.5],[.5],[5],[5]], dtype=np.float32))
+ perm_op = tf.math.reduce_sum
+
+ perslay = Perslay(phi=phi, weight=weight, perm_op=perm_op, rho=rho)
+ vectors = perslay(diagrams)
+
+ assert np.linalg.norm(vectors-np.array([[0.0000000e+00, 0.0000000e+00, 1.7869064e-35, 3.9359080e-31, 8.6693818e-27,
+ 1.9095656e-22, 4.2061142e-18, 9.2645292e-14, 2.0406561e-09, 4.4946366e-05,
+ 4.9502861e-01, 1.0652492e+00, 1.9753191e+00, 9.9723548e-01, 1.9043801e+00,
+ 1.4844525e+00, 9.8947650e-01, 9.9604094e-01, 1.9703994e+00, 1.9769192e+00,
+ 9.8850453e-01, 8.9751818e-05, 4.0749040e-09, 1.8500175e-13, 8.3990662e-18,
+ 3.8131562e-22, 1.7311636e-26, 7.8594399e-31, 3.5682349e-35, 0.0000000e+00]]) <= 1e-7)
+
+def test_grid_weight():
+
+ diagrams = [np.array([[0.,4.],[1.,2.],[3.,8.],[6.,8.]])]
+ diagrams = gdr.DiagramScaler(use=True, scalers=[([0,1], MinMaxScaler())]).fit_transform(diagrams)
+ diagrams = tf.RaggedTensor.from_tensor(tf.constant(diagrams, dtype=tf.float32))
+
+ rho = tf.identity
+ phi = FlatPerslayPhi(np.array(np.arange(-1.,2.,.1), dtype=np.float32), 100.)
+ weight = GridPerslayWeight(np.array(np.random.uniform(size=[100,100]),dtype=np.float32),((-0.01, 1.01),(-0.01, 1.01)))
+ perm_op = tf.math.reduce_sum
+
+ perslay = Perslay(phi=phi, weight=weight, perm_op=perm_op, rho=rho)
+ vectors = perslay(diagrams)
+
+ assert np.linalg.norm(vectors-np.array([[0.0000000e+00, 0.0000000e+00, 1.5124093e-37, 3.3314498e-33, 7.3379791e-29,
+ 1.6163036e-24, 3.5601592e-20, 7.8417273e-16, 1.7272621e-11, 3.8043717e-07,
+ 4.1902456e-03, 1.7198652e-02, 1.2386327e-01, 9.2694648e-03, 1.9515079e-01,
+ 2.0629172e-01, 2.0210314e-01, 2.0442720e-01, 5.4709727e-01, 5.4939687e-01,
+ 2.7471092e-01, 2.4942532e-05, 1.1324385e-09, 5.1413016e-14, 2.3341474e-18,
+ 1.0596973e-22, 4.8110000e-27, 2.1841823e-31, 9.9163230e-36, 0.0000000e+00]]) <= 1e-7)