From 4922f305b7601d9e5d7eb39c73a88ee53bf1ca87 Mon Sep 17 00:00:00 2001 From: Marc Glisse Date: Thu, 26 Dec 2019 18:31:47 +0100 Subject: Update doc --- src/python/doc/wasserstein_distance_user.rst | 14 +++++++++++--- 1 file changed, 11 insertions(+), 3 deletions(-) (limited to 'src/python/doc/wasserstein_distance_user.rst') diff --git a/src/python/doc/wasserstein_distance_user.rst b/src/python/doc/wasserstein_distance_user.rst index a049cfb5..13f6f1af 100644 --- a/src/python/doc/wasserstein_distance_user.rst +++ b/src/python/doc/wasserstein_distance_user.rst @@ -9,12 +9,20 @@ Definition .. include:: wasserstein_distance_sum.inc -This implementation is based on ideas from "Large Scale Computation of Means and Cluster for Persistence Diagrams via Optimal Transport". +Functions +--------- +This implementation is based on ideas from "Large Scale Computation of Means +and Cluster for Persistence Diagrams via Optimal Transport". -Function --------- .. autofunction:: gudhi.wasserstein.wasserstein_distance +This other implementation comes from `Hera +`_ and is based on `"Geometry +Helps to Compare Persistence Diagrams." +`_ by Michael Kerber, Dmitriy +Morozov, and Arnur Nigmetov, at ALENEX 2016. + +.. autofunction:: gudhi.hera.wasserstein_distance Basic example ------------- -- cgit v1.2.3 From b8701d847db37b80a58770e00b91494889df00e8 Mon Sep 17 00:00:00 2001 From: Marc Glisse Date: Fri, 27 Dec 2019 00:56:08 +0100 Subject: Expose more options --- src/python/doc/wasserstein_distance_user.rst | 4 ++-- src/python/gudhi/hera.cc | 31 +++++++++++++++++++++------- 2 files changed, 26 insertions(+), 9 deletions(-) (limited to 'src/python/doc/wasserstein_distance_user.rst') diff --git a/src/python/doc/wasserstein_distance_user.rst b/src/python/doc/wasserstein_distance_user.rst index 13f6f1af..6cd7f3a0 100644 --- a/src/python/doc/wasserstein_distance_user.rst +++ b/src/python/doc/wasserstein_distance_user.rst @@ -17,8 +17,8 @@ and Cluster for Persistence Diagrams via Optimal Transport". .. autofunction:: gudhi.wasserstein.wasserstein_distance This other implementation comes from `Hera -`_ and is based on `"Geometry -Helps to Compare Persistence Diagrams." +`_ (BSD-3-Clause) and is +based on `"Geometry Helps to Compare Persistence Diagrams." `_ by Michael Kerber, Dmitriy Morozov, and Arnur Nigmetov, at ALENEX 2016. diff --git a/src/python/gudhi/hera.cc b/src/python/gudhi/hera.cc index 04f5990f..898040fb 100644 --- a/src/python/gudhi/hera.cc +++ b/src/python/gudhi/hera.cc @@ -12,7 +12,6 @@ typedef py::array_t Dgm; namespace hera { template <> struct DiagramTraits{ - //using Container = void; using PointType = std::array; using RealType = double; @@ -22,15 +21,17 @@ template <> struct DiagramTraits{ } double wasserstein_distance( - Dgm d1, - Dgm d2) + Dgm d1, Dgm d2, + double wasserstein_power, double internal_p, + double delta) { py::buffer_info buf1 = d1.request(); py::buffer_info buf2 = d2.request(); - if(buf1.ndim!=2 || buf1.shape[1]!=2) - throw std::runtime_error("Diagram 1 must be an array of size n x 2"); - if(buf2.ndim!=2 || buf2.shape[1]!=2) + // shape (n,2) or (0) for empty + if((buf1.ndim!=2 || buf1.shape[1]!=2) && (buf1.ndim!=1 || buf1.shape[0]!=0)) throw std::runtime_error("Diagram 1 must be an array of size n x 2"); + if((buf2.ndim!=2 || buf2.shape[1]!=2) && (buf2.ndim!=1 || buf2.shape[0]!=0)) + throw std::runtime_error("Diagram 2 must be an array of size n x 2"); typedef hera::DiagramTraits::PointType Point; auto p1 = (Point*)buf1.ptr; auto p2 = (Point*)buf2.ptr; @@ -38,17 +39,33 @@ double wasserstein_distance( auto diag2 = boost::make_iterator_range(p2, p2+buf2.shape[0]); hera::AuctionParams params; + params.wasserstein_power = wasserstein_power; + // hera encodes infinity as -1... + if(std::isinf(internal_p)) internal_p = hera::get_infinity(); + params.internal_p = internal_p; + params.delta = delta; + // The extra parameters are purposedly not exposed for now. return hera::wasserstein_dist(diag1, diag2, params); } PYBIND11_MODULE(hera, m) { m.def("wasserstein_distance", &wasserstein_distance, py::arg("X"), py::arg("Y"), + // Should we name those q, p and d instead? + py::arg("wasserstein_power") = 1, + py::arg("internal_p") = std::numeric_limits::infinity(), + py::arg("delta") = .01, R"pbdoc( - Compute the Wasserstein distance between two diagrams + Compute the Wasserstein distance between two diagrams. Points at infinity are supported. Parameters: X (n x 2 numpy array): First diagram Y (n x 2 numpy array): Second diagram + wasserstein_power (float): Wasserstein degree W_q + internal_p (float): Internal Minkowski norm L^p in R^2 + delta (float): Relative error 1+delta + + Returns: + float: Approximate Wasserstein distance W_q(X,Y) )pbdoc"); } -- cgit v1.2.3 From a7f3167ffb465bd6d1e3b9e40bc6f1c35daf87fc Mon Sep 17 00:00:00 2001 From: Marc Glisse Date: Mon, 20 Jan 2020 16:43:37 +0100 Subject: Simplify the pybind11 code --- src/python/doc/wasserstein_distance_user.rst | 5 +++-- src/python/gudhi/hera.cc | 19 +++++-------------- 2 files changed, 8 insertions(+), 16 deletions(-) (limited to 'src/python/doc/wasserstein_distance_user.rst') diff --git a/src/python/doc/wasserstein_distance_user.rst b/src/python/doc/wasserstein_distance_user.rst index 6cd7f3a0..355ad247 100644 --- a/src/python/doc/wasserstein_distance_user.rst +++ b/src/python/doc/wasserstein_distance_user.rst @@ -11,8 +11,9 @@ Definition Functions --------- -This implementation is based on ideas from "Large Scale Computation of Means -and Cluster for Persistence Diagrams via Optimal Transport". +This implementation uses the Python Optimal Transport library and is based on +ideas from "Large Scale Computation of Means and Cluster for Persistence +Diagrams via Optimal Transport". .. autofunction:: gudhi.wasserstein.wasserstein_distance diff --git a/src/python/gudhi/hera.cc b/src/python/gudhi/hera.cc index 898040fb..61f0da10 100644 --- a/src/python/gudhi/hera.cc +++ b/src/python/gudhi/hera.cc @@ -10,16 +10,6 @@ namespace py = pybind11; typedef py::array_t Dgm; -namespace hera { -template <> struct DiagramTraits{ - using PointType = std::array; - using RealType = double; - - static RealType get_x(const PointType& p) { return std::get<0>(p); } - static RealType get_y(const PointType& p) { return std::get<1>(p); } -}; -} - double wasserstein_distance( Dgm d1, Dgm d2, double wasserstein_power, double internal_p, @@ -32,7 +22,7 @@ double wasserstein_distance( throw std::runtime_error("Diagram 1 must be an array of size n x 2"); if((buf2.ndim!=2 || buf2.shape[1]!=2) && (buf2.ndim!=1 || buf2.shape[0]!=0)) throw std::runtime_error("Diagram 2 must be an array of size n x 2"); - typedef hera::DiagramTraits::PointType Point; + typedef std::array Point; auto p1 = (Point*)buf1.ptr; auto p2 = (Point*)buf2.ptr; auto diag1 = boost::make_iterator_range(p1, p1+buf1.shape[0]); @@ -52,16 +42,17 @@ PYBIND11_MODULE(hera, m) { m.def("wasserstein_distance", &wasserstein_distance, py::arg("X"), py::arg("Y"), // Should we name those q, p and d instead? - py::arg("wasserstein_power") = 1, + py::arg("order") = 1, py::arg("internal_p") = std::numeric_limits::infinity(), py::arg("delta") = .01, R"pbdoc( - Compute the Wasserstein distance between two diagrams. Points at infinity are supported. + Compute the Wasserstein distance between two diagrams. + Points at infinity are supported. Parameters: X (n x 2 numpy array): First diagram Y (n x 2 numpy array): Second diagram - wasserstein_power (float): Wasserstein degree W_q + order (float): Wasserstein exponent W_q internal_p (float): Internal Minkowski norm L^p in R^2 delta (float): Relative error 1+delta -- cgit v1.2.3 From 518c619d578dc6f168b6369417f15872e3cd0056 Mon Sep 17 00:00:00 2001 From: Marc Glisse Date: Thu, 6 Feb 2020 21:54:44 +0100 Subject: use bibtex --- biblio/bibliography.bib | 12 ++++++++++++ src/python/doc/wasserstein_distance_user.rst | 10 +++++----- 2 files changed, 17 insertions(+), 5 deletions(-) (limited to 'src/python/doc/wasserstein_distance_user.rst') diff --git a/biblio/bibliography.bib b/biblio/bibliography.bib index a1b951e0..3bbe7960 100644 --- a/biblio/bibliography.bib +++ b/biblio/bibliography.bib @@ -1180,3 +1180,15 @@ language={English} booktitle = {In Neural Information Processing Systems}, year = {2007} } +@inproceedings{10.5555/3327546.3327645, +author = {Lacombe, Th\'{e}o and Cuturi, Marco and Oudot, Steve}, +title = {Large Scale Computation of Means and Clusters for Persistence Diagrams Using Optimal Transport}, +year = {2018}, +publisher = {Curran Associates Inc.}, +address = {Red Hook, NY, USA}, +booktitle = {Proceedings of the 32nd International Conference on Neural Information Processing Systems}, +pages = {9792–9802}, +numpages = {11}, +location = {Montr\'{e}al, Canada}, +series = {NIPS’18} +} diff --git a/src/python/doc/wasserstein_distance_user.rst b/src/python/doc/wasserstein_distance_user.rst index 648cc568..99445b99 100644 --- a/src/python/doc/wasserstein_distance_user.rst +++ b/src/python/doc/wasserstein_distance_user.rst @@ -13,15 +13,15 @@ Functions --------- This implementation uses the Python Optimal Transport library and is based on ideas from "Large Scale Computation of Means and Cluster for Persistence -Diagrams via Optimal Transport". +Diagrams via Optimal Transport" :cite:`10.5555/3327546.3327645`. .. autofunction:: gudhi.wasserstein.wasserstein_distance This other implementation comes from `Hera -`_ (BSD-3-Clause) and is -based on `"Geometry Helps to Compare Persistence Diagrams." -`_ by Michael Kerber, Dmitriy -Morozov, and Arnur Nigmetov, at ALENEX 2016. +`_ (BSD-3-Clause) which is +based on "Geometry Helps to Compare Persistence Diagrams" +:cite:`Kerber:2017:GHC:3047249.3064175` by Michael Kerber, Dmitriy +Morozov, and Arnur Nigmetov. .. autofunction:: gudhi.hera.wasserstein_distance -- cgit v1.2.3 From b75123eeda446e7f778d4939da67a78e4c8c6abc Mon Sep 17 00:00:00 2001 From: Marc Glisse Date: Sat, 8 Feb 2020 17:39:05 +0100 Subject: Euclidean with a capital E --- src/Bottleneck_distance/include/gudhi/Persistence_graph.h | 2 +- src/python/doc/wasserstein_distance_user.rst | 2 +- src/python/gudhi/wasserstein.py | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) (limited to 'src/python/doc/wasserstein_distance_user.rst') diff --git a/src/Bottleneck_distance/include/gudhi/Persistence_graph.h b/src/Bottleneck_distance/include/gudhi/Persistence_graph.h index f791e37c..e1e3522e 100644 --- a/src/Bottleneck_distance/include/gudhi/Persistence_graph.h +++ b/src/Bottleneck_distance/include/gudhi/Persistence_graph.h @@ -25,7 +25,7 @@ namespace Gudhi { namespace persistence_diagram { -/** \internal \brief Structure representing an euclidean bipartite graph containing +/** \internal \brief Structure representing a Euclidean bipartite graph containing * the points from the two persistence diagrams (including the projections). * * \ingroup bottleneck_distance diff --git a/src/python/doc/wasserstein_distance_user.rst b/src/python/doc/wasserstein_distance_user.rst index 99445b99..94b454e2 100644 --- a/src/python/doc/wasserstein_distance_user.rst +++ b/src/python/doc/wasserstein_distance_user.rst @@ -28,7 +28,7 @@ Morozov, and Arnur Nigmetov. Basic example ------------- -This example computes the 1-Wasserstein distance from 2 persistence diagrams with euclidean ground metric. +This example computes the 1-Wasserstein distance from 2 persistence diagrams with Euclidean ground metric. Note that persistence diagrams must be submitted as (n x 2) numpy arrays and must not contain inf values. .. testcode:: diff --git a/src/python/gudhi/wasserstein.py b/src/python/gudhi/wasserstein.py index b1cfd588..13102094 100644 --- a/src/python/gudhi/wasserstein.py +++ b/src/python/gudhi/wasserstein.py @@ -67,7 +67,7 @@ def wasserstein_distance(X, Y, order=2., internal_p=2.): :param X: (n x 2) numpy.array encoding the (finite points of the) first diagram. Must not contain essential points (i.e. with infinite coordinate). :param Y: (m x 2) numpy.array encoding the second diagram. :param order: exponent for Wasserstein; Default value is 2. - :param internal_p: Ground metric on the (upper-half) plane (i.e. norm L^p in R^2); Default value is 2 (euclidean norm). + :param internal_p: Ground metric on the (upper-half) plane (i.e. norm L^p in R^2); Default value is 2 (Euclidean norm). :returns: the Wasserstein distance of order q (1 <= q < infinity) between persistence diagrams with respect to the internal_p-norm as ground metric. :rtype: float ''' -- cgit v1.2.3