Merge remote-tracking branch 'origin/master' into filtration

author: Marc Glisse <marc.glisse@inria.fr> 2020-04-17 11:22:42 +0200
committer: Marc Glisse <marc.glisse@inria.fr> 2020-04-17 11:22:42 +0200
commit: 6dcedaeb83e7aef1e1a5e20a72b6ae00651e186f (patch)
tree: 908186725f317b867dd1ae00a1f6bc656e329d1f
parent: c5c565dfd92ce1ad5b318dca40edf9429d6334c2 (diff)
parent: 44996b632685eb077e61f79b1dd07d172776acb9 (diff)
28 files changed, 469 insertions, 96 deletions
diff --git a/.github/next_release.md b/.github/next_release.md
index 3166b0a8..83b98a1c 100644
--- a/.github/next_release.md
+++ b/.github/next_release.md
@@ -9,6 +9,7 @@ Below is a list of changes made since GUDHI 3.1.1:
 
 - [Wassertein distance](https://gudhi.inria.fr/python/latest/wasserstein_distance_user.html)
      - An another implementation comes from Hera (BSD-3-Clause) which is based on [Geometry Helps to Compare Persistence Diagrams](http://doi.acm.org/10.1145/3064175) by Michael Kerber, Dmitriy Morozov, and Arnur Nigmetov.
+     - `gudhi.wasserstein.wasserstein_distance` has now an option to return the optimal matching that achieves the distance between the two diagrams.
 
 - [Module](link)
      - ...
diff --git a/.github/test-requirements.txt b/.github/test-requirements.txt
index 4f9dcefb..fb1df134 100644
--- a/.github/test-requirements.txt
+++ b/.github/test-requirements.txt
@@ -10,3 +10,4 @@ tensorflow
 torch
 pykeops
 hnswlib
+eagerpy
diff --git a/Dockerfile_for_circleci_image b/Dockerfile_for_circleci_image
index 20754e2a..c2e8a8f5 100644
--- a/Dockerfile_for_circleci_image
+++ b/Dockerfile_for_circleci_image
@@ -43,6 +43,7 @@ RUN apt-get install -y make \
     python3 \
     python3-pip \
     python3-tk \
+    python3-grpcio \
     libfreetype6-dev \
     pkg-config \
     curl
diff --git a/biblio/bibliography.bib b/biblio/bibliography.bib
index b017a07e..a57be224 100644
--- a/biblio/bibliography.bib
+++ b/biblio/bibliography.bib
@@ -1208,3 +1208,14 @@ numpages = {11},
 location = {Montr\'{e}al, Canada},
 series = {NIPS’18}
 }
+
+@article{turner2014frechet,
+  title={Fr{\'e}chet means for distributions of persistence diagrams},
+  author={Turner, Katharine and Mileyko, Yuriy and Mukherjee, Sayan and Harer, John},
+  journal={Discrete \& Computational Geometry},
+  volume={52},
+  number={1},
+  pages={44--70},
+  year={2014},
+  publisher={Springer}
+}
diff --git a/src/Alpha_complex/concept/SimplicialComplexForAlpha.h b/src/Alpha_complex/concept/SimplicialComplexForAlpha.h
index 1c6c3b0c..c20c3201 100644
--- a/src/Alpha_complex/concept/SimplicialComplexForAlpha.h
+++ b/src/Alpha_complex/concept/SimplicialComplexForAlpha.h
@@ -72,6 +72,24 @@ struct SimplicialComplexForAlpha {
   /** \brief Return type of an insertion of a simplex
    */
   typedef unspecified Insertion_result_type;
+
+  /** \name Map interface
+   * Conceptually a `std::unordered_map<Simplex_handle,std::size_t>`.
+   *  @{ */
+  /** \brief Data stored for each simplex.
+   *
+   * Must be an integer type. */
+  typedef unspecified      Simplex_key;
+  /** \brief Returns a constant dummy number that is either negative,
+   * or at least as large as the number of simplices.  Suggested value: -1.  */
+  Simplex_key              null_key ();
+  /** \brief Returns the number stored for a simplex by `assign_key()`.
+   *
+   * If `assign_key()` has not been called, it must return `null_key()`.  */
+  Simplex_key              key      ( Simplex_handle sh );
+  /** \brief Store a number for a simplex, which can later be retrieved with `key()`.  */
+  void                     assign_key(Simplex_handle sh, Simplex_key n);
+  /** @} */
 };
 
 }  // namespace alpha_complex
diff --git a/src/Alpha_complex/include/gudhi/Alpha_complex.h b/src/Alpha_complex/include/gudhi/Alpha_complex.h
index 1b5d6997..ba91998d 100644
--- a/src/Alpha_complex/include/gudhi/Alpha_complex.h
+++ b/src/Alpha_complex/include/gudhi/Alpha_complex.h
@@ -112,9 +112,6 @@ class Alpha_complex {
   typedef typename Kernel::Side_of_bounded_sphere_d Is_Gabriel;
   typedef typename Kernel::Point_dimension_d        Point_Dimension;
 
-  // Type required to compute squared radius, or side of bounded sphere on a vector of points.
-  typedef typename std::vector<Point_d> Vector_of_CGAL_points;
-
   // Vertex_iterator type from CGAL.
   typedef typename Delaunay_triangulation::Vertex_iterator CGAL_vertex_iterator;
 
@@ -124,6 +121,9 @@ class Alpha_complex {
   // Structure to switch from simplex tree vertex handle to CGAL vertex iterator.
   typedef typename std::vector< CGAL_vertex_iterator > Vector_vertex_iterator;
 
+  // Numeric type of coordinates in the kernel
+  typedef typename Kernel::FT FT;
+
  private:
   /** \brief Vertex iterator vector to switch from simplex tree vertex handle to CGAL vertex iterator.
    * Vertex handles are inserted sequentially, starting at 0.*/
@@ -132,6 +132,8 @@ class Alpha_complex {
   Delaunay_triangulation* triangulation_;
   /** \brief Kernel for triangulation_ functions access.*/
   Kernel kernel_;
+  /** \brief Cache for geometric constructions: circumcenter and squared radius of a simplex.*/
+  std::vector<std::pair<Point_d, FT>> cache_, old_cache_;
 
  public:
   /** \brief Alpha_complex constructor from an OFF file name.
@@ -246,6 +248,49 @@ class Alpha_complex {
     }
   }
 
+  /** \brief get_point_ returns the point corresponding to the vertex given as parameter.
+   * Only for internal use for faster access.
+   *
+   * @param[in] vertex Vertex handle of the point to retrieve.
+   * @return The point found.
+   */
+  const Point_d& get_point_(std::size_t vertex) const {
+    return vertex_handle_to_iterator_[vertex]->point();
+  }
+
+  /// Return a reference to the circumcenter and circumradius, writing them in the cache if necessary.
+  template<class SimplicialComplexForAlpha>
+  auto& get_cache(SimplicialComplexForAlpha& cplx, typename SimplicialComplexForAlpha::Simplex_handle s) {
+    auto k = cplx.key(s);
+    if(k==cplx.null_key()){
+      k = cache_.size();
+      cplx.assign_key(s, k);
+      // Using a transform_range is slower, currently.
+      thread_local std::vector<Point_d> v;
+      v.clear();
+      for (auto vertex : cplx.simplex_vertex_range(s))
+        v.push_back(get_point_(vertex));
+      Point_d c = kernel_.construct_circumcenter_d_object()(v.cbegin(), v.cend());
+      FT r = kernel_.squared_distance_d_object()(c, v[0]);
+      cache_.emplace_back(std::move(c), std::move(r));
+    }
+    return cache_[k];
+  }
+
+  /// Return the circumradius, either from the old cache or computed, without writing to the cache.
+  template<class SimplicialComplexForAlpha>
+  auto radius(SimplicialComplexForAlpha& cplx, typename SimplicialComplexForAlpha::Simplex_handle s) {
+    auto k = cplx.key(s);
+    if(k!=cplx.null_key())
+      return old_cache_[k].second;
+    // Using a transform_range is slower, currently.
+    thread_local std::vector<Point_d> v;
+    v.clear();
+    for (auto vertex : cplx.simplex_vertex_range(s))
+      v.push_back(get_point_(vertex));
+    return kernel_.compute_squared_radius_d_object()(v.cbegin(), v.cend());
+  }
+
  public:
   /** \brief Inserts all Delaunay triangulation into the simplicial complex.
    * It also computes the filtration values accordingly to the \ref createcomplexalgorithm if default_filtration_value
@@ -324,52 +369,36 @@ class Alpha_complex {
 
     if (!default_filtration_value) {
       // --------------------------------------------------------------------------------------------
-      // Will be re-used many times
-      Vector_of_CGAL_points pointVector;
       // ### For i : d -> 0
       for (int decr_dim = triangulation_->maximal_dimension(); decr_dim >= 0; decr_dim--) {
         // ### Foreach Sigma of dim i
         for (Simplex_handle f_simplex : complex.skeleton_simplex_range(decr_dim)) {
           int f_simplex_dim = complex.dimension(f_simplex);
           if (decr_dim == f_simplex_dim) {
-            pointVector.clear();
-  #ifdef DEBUG_TRACES
-            std::clog << "Sigma of dim " << decr_dim << " is";
-  #endif  // DEBUG_TRACES
-            for (auto vertex : complex.simplex_vertex_range(f_simplex)) {
-              pointVector.push_back(get_point(vertex));
-  #ifdef DEBUG_TRACES
-              std::clog << " " << vertex;
-  #endif  // DEBUG_TRACES
-            }
-  #ifdef DEBUG_TRACES
-            std::clog << std::endl;
-  #endif  // DEBUG_TRACES
             // ### If filt(Sigma) is NaN : filt(Sigma) = alpha(Sigma)
             if (std::isnan(complex.filtration(f_simplex))) {
               Filtration_value alpha_complex_filtration = 0.0;
               // No need to compute squared_radius on a single point - alpha is 0.0
               if (f_simplex_dim > 0) {
-                // squared_radius function initialization
-                Squared_Radius squared_radius = kernel_.compute_squared_radius_d_object();
-  
-                CGAL::NT_converter<typename Geom_traits::FT, Filtration_value> cv;
-                auto sqrad = squared_radius(pointVector.begin(), pointVector.end());
-  #if CGAL_VERSION_NR >= 1050000000
+                auto const& sqrad = radius(complex, f_simplex);
+#if CGAL_VERSION_NR >= 1050000000
                 if(exact) CGAL::exact(sqrad);
-  #endif
+#endif
+                CGAL::NT_converter<FT, Filtration_value> cv;
                 alpha_complex_filtration = cv(sqrad);
               }
               complex.assign_filtration(f_simplex, alpha_complex_filtration);
-  #ifdef DEBUG_TRACES
+#ifdef DEBUG_TRACES
               std::clog << "filt(Sigma) is NaN : filt(Sigma) =" << complex.filtration(f_simplex) << std::endl;
-  #endif  // DEBUG_TRACES
+#endif  // DEBUG_TRACES
             }
             // No need to propagate further, unweighted points all have value 0
             if (decr_dim > 1)
               propagate_alpha_filtration(complex, f_simplex);
           }
         }
+        old_cache_ = std::move(cache_);
+        cache_.clear();
       }
       // --------------------------------------------------------------------------------------------
   
@@ -388,9 +417,7 @@ class Alpha_complex {
   void propagate_alpha_filtration(SimplicialComplexForAlpha& complex, Simplex_handle f_simplex) {
     // From SimplicialComplexForAlpha type required to assign filtration values.
     typedef typename SimplicialComplexForAlpha::Filtration_value Filtration_value;
-#ifdef DEBUG_TRACES
     typedef typename SimplicialComplexForAlpha::Vertex_handle Vertex_handle;
-#endif  // DEBUG_TRACES
 
     // ### Foreach Tau face of Sigma
     for (auto f_boundary : complex.boundary_simplex_range(f_simplex)) {
@@ -414,33 +441,18 @@ class Alpha_complex {
 #endif  // DEBUG_TRACES
         // ### Else
       } else {
-        // insert the Tau points in a vector for is_gabriel function
-        Vector_of_CGAL_points pointVector;
-#ifdef DEBUG_TRACES
-        Vertex_handle vertexForGabriel = Vertex_handle();
-#endif  // DEBUG_TRACES
-        for (auto vertex : complex.simplex_vertex_range(f_boundary)) {
-          pointVector.push_back(get_point(vertex));
-        }
-        // Retrieve the Sigma point that is not part of Tau - parameter for is_gabriel function
-        Point_d point_for_gabriel;
-        for (auto vertex : complex.simplex_vertex_range(f_simplex)) {
-          point_for_gabriel = get_point(vertex);
-          if (std::find(pointVector.begin(), pointVector.end(), point_for_gabriel) == pointVector.end()) {
-#ifdef DEBUG_TRACES
-            // vertex is not found in Tau
-            vertexForGabriel = vertex;
-#endif  // DEBUG_TRACES
-            // No need to continue loop
-            break;
-          }
-        }
-        // is_gabriel function initialization
-        Is_Gabriel is_gabriel = kernel_.side_of_bounded_sphere_d_object();
-        bool is_gab = is_gabriel(pointVector.begin(), pointVector.end(), point_for_gabriel)
-          != CGAL::ON_BOUNDED_SIDE;
+        // Find which vertex of f_simplex is missing in f_boundary. We could actually write a variant of boundary_simplex_range that gives pairs (f_boundary, vertex). We rely on the fact that simplex_vertex_range is sorted.
+        auto longlist = complex.simplex_vertex_range(f_simplex);
+        auto shortlist = complex.simplex_vertex_range(f_boundary);
+        auto longiter = std::begin(longlist);
+        auto shortiter = std::begin(shortlist);
+        auto enditer = std::end(shortlist);
+        while(shortiter != enditer && *longiter == *shortiter) { ++longiter; ++shortiter; }
+        Vertex_handle extra = *longiter;
+        auto const& cache=get_cache(complex, f_boundary);
+        bool is_gab = kernel_.squared_distance_d_object()(cache.first, get_point_(extra)) >= cache.second;
 #ifdef DEBUG_TRACES
-        std::clog << " | Tau is_gabriel(Sigma)=" << is_gab << " - vertexForGabriel=" << vertexForGabriel << std::endl;
+        std::clog << " | Tau is_gabriel(Sigma)=" << is_gab << " - vertexForGabriel=" << extra << std::endl;
 #endif  // DEBUG_TRACES
         // ### If Tau is not Gabriel of Sigma
         if (false == is_gab) {
diff --git a/src/Simplex_tree/include/gudhi/Simplex_tree.h b/src/Simplex_tree/include/gudhi/Simplex_tree.h
index 43250795..889dbd00 100644
--- a/src/Simplex_tree/include/gudhi/Simplex_tree.h
+++ b/src/Simplex_tree/include/gudhi/Simplex_tree.h
@@ -486,7 +486,7 @@ class Simplex_tree {
  public:
   /** \brief Returns the key associated to a simplex.
    *
-   * The filtration must be initialized.
+   * If no key has been assigned, returns `null_key()`.
    * \pre SimplexTreeOptions::store_key
    */
   static Simplex_key key(Simplex_handle sh) {
@@ -496,7 +496,6 @@ class Simplex_tree {
   /** \brief Returns the simplex that has index idx in the filtration.
    *
    * The filtration must be initialized.
-   * \pre SimplexTreeOptions::store_key
    */
   Simplex_handle simplex(Simplex_key idx) const {
     return filtration_vect_[idx];
@@ -532,8 +531,7 @@ class Simplex_tree {
     return Dictionary_it(nullptr);
   }
 
-  /** \brief Returns a key different for all keys associated to the
-   * simplices of the simplicial complex. */
+  /** \brief Returns a fixed number not in the interval [0, `num_simplices()`).  */
   static Simplex_key null_key() {
     return -1;
   }
diff --git a/src/python/CMakeLists.txt b/src/python/CMakeLists.txt
index f00966a5..a91ca30a 100644
--- a/src/python/CMakeLists.txt
+++ b/src/python/CMakeLists.txt
@@ -216,7 +216,7 @@ if(PYTHONINTERP_FOUND)
     # Other .py files
     file(COPY "gudhi/persistence_graphical_tools.py" DESTINATION "${CMAKE_CURRENT_BINARY_DIR}/gudhi")
     file(COPY "gudhi/representations" DESTINATION "${CMAKE_CURRENT_BINARY_DIR}/gudhi/")
-    file(COPY "gudhi/wasserstein.py" DESTINATION "${CMAKE_CURRENT_BINARY_DIR}/gudhi")
+    file(COPY "gudhi/wasserstein" DESTINATION "${CMAKE_CURRENT_BINARY_DIR}/gudhi")
     file(COPY "gudhi/point_cloud" DESTINATION "${CMAKE_CURRENT_BINARY_DIR}/gudhi")
 
     add_custom_command(
@@ -389,6 +389,7 @@ if(PYTHONINTERP_FOUND)
     # Wasserstein
     if(OT_FOUND AND PYBIND11_FOUND)
       add_gudhi_py_test(test_wasserstein_distance)
+      add_gudhi_py_test(test_wasserstein_barycenter)
     endif()
 
     # Representations
diff --git a/src/python/doc/alpha_complex_user.rst b/src/python/doc/alpha_complex_user.rst
index 60319e84..265a82d2 100644
--- a/src/python/doc/alpha_complex_user.rst
+++ b/src/python/doc/alpha_complex_user.rst
@@ -204,8 +204,8 @@ the program output is:
    [3, 6] -> 30.25
 
 CGAL citations
-==============
+--------------
 
 .. bibliography:: ../../biblio/how_to_cite_cgal.bib
-   :filter: docnames
+   :filter: docname in docnames
    :style: unsrt
diff --git a/src/python/doc/bottleneck_distance_user.rst b/src/python/doc/bottleneck_distance_user.rst
index 9435c7f1..206fcb63 100644
--- a/src/python/doc/bottleneck_distance_user.rst
+++ b/src/python/doc/bottleneck_distance_user.rst
@@ -65,3 +65,10 @@ The output is:
 
     Bottleneck distance approximation = 0.81
     Bottleneck distance value = 0.75
+
+Bibliography
+------------
+
+.. bibliography:: ../../biblio/bibliography.bib
+   :filter: docname in docnames
+   :style: unsrt
diff --git a/src/python/doc/cubical_complex_user.rst b/src/python/doc/cubical_complex_user.rst
index 93ca6b24..e8c94bf6 100644
--- a/src/python/doc/cubical_complex_user.rst
+++ b/src/python/doc/cubical_complex_user.rst
@@ -160,8 +160,8 @@ Examples.
 End user programs are available in python/example/ folder.
 
 Bibliography
-============
+------------
 
 .. bibliography:: ../../biblio/bibliography.bib
-   :filter: docnames
+   :filter: docname in docnames
    :style: unsrt
diff --git a/src/python/doc/img/barycenter.png b/src/python/doc/img/barycenter.png
new file mode 100644
index 00000000..cad6af70
--- /dev/null
+++ b/src/python/doc/img/barycenter.png
diff --git a/src/python/doc/index.rst b/src/python/doc/index.rst
index 3387a64f..c153cdfc 100644
--- a/src/python/doc/index.rst
+++ b/src/python/doc/index.rst
@@ -71,6 +71,7 @@ Wasserstein distance
 
 .. include:: wasserstein_distance_sum.inc
 
+
 Persistence representations
 ===========================
 
@@ -90,5 +91,5 @@ Bibliography
 ************
 
 .. bibliography:: ../../biblio/bibliography.bib
-   :filter: docnames
+   :filter: docname in docnames
    :style: unsrt
diff --git a/src/python/doc/installation.rst b/src/python/doc/installation.rst
index d459145b..48425d5e 100644
--- a/src/python/doc/installation.rst
+++ b/src/python/doc/installation.rst
@@ -175,8 +175,8 @@ Documentation
 To build the documentation, `sphinx-doc <http://www.sphinx-doc.org>`_ and
 `sphinxcontrib-bibtex <https://sphinxcontrib-bibtex.readthedocs.io>`_ are
 required. As the documentation is auto-tested, `CGAL`_, `Eigen`_,
-`Matplotlib`_, `NumPy`_ and `SciPy`_ are also mandatory to build the
-documentation.
+`Matplotlib`_, `NumPy`_, `POT`_, `Scikit-learn`_ and `SciPy`_ are
+also mandatory to build the documentation.
 
 Run the following commands in a terminal:
 
@@ -192,8 +192,8 @@ CGAL
 ====
 
 Some GUDHI modules (cf. :doc:`modules list </index>`), and few examples
-require CGAL, a C++ library that provides easy access to efficient and
-reliable geometric algorithms.
+require `CGAL <https://www.cgal.org/>`_, a C++ library that provides easy
+access to efficient and reliable geometric algorithms.
 
 
 The procedure to install this library
diff --git a/src/python/doc/nerve_gic_complex_ref.rst b/src/python/doc/nerve_gic_complex_ref.rst
index abde2e8c..6a81b7af 100644
--- a/src/python/doc/nerve_gic_complex_ref.rst
+++ b/src/python/doc/nerve_gic_complex_ref.rst
@@ -12,3 +12,10 @@ Cover complexes reference manual
    :show-inheritance:
 
    .. automethod:: gudhi.CoverComplex.__init__
+
+Bibliography
+------------
+
+.. bibliography:: ../../biblio/bibliography.bib
+   :filter: docname in docnames
+   :style: unsrt
diff --git a/src/python/doc/nerve_gic_complex_user.rst b/src/python/doc/nerve_gic_complex_user.rst
index 9101f45d..f709ce91 100644
--- a/src/python/doc/nerve_gic_complex_user.rst
+++ b/src/python/doc/nerve_gic_complex_user.rst
@@ -313,3 +313,10 @@ the program outputs again SC.dot which gives the following visualization after u
     :alt: Visualization with neato
 
     Visualization with neato
+
+Bibliography
+------------
+
+.. bibliography:: ../../biblio/bibliography.bib
+   :filter: docname in docnames
+   :style: unsrt
diff --git a/src/python/doc/persistent_cohomology_user.rst b/src/python/doc/persistent_cohomology_user.rst
index 5f931b3a..506fa3a7 100644
--- a/src/python/doc/persistent_cohomology_user.rst
+++ b/src/python/doc/persistent_cohomology_user.rst
@@ -113,8 +113,8 @@ We provide several example files: run these examples with -h for details on thei
     * :download:`tangential_complex_plain_homology_from_off_file_example.py <../example/tangential_complex_plain_homology_from_off_file_example.py>`
 
 Bibliography
-============
+------------
 
 .. bibliography:: ../../biblio/bibliography.bib
-   :filter: docnames
+   :filter: docname in docnames
    :style: unsrt
diff --git a/src/python/doc/rips_complex_user.rst b/src/python/doc/rips_complex_user.rst
index 8efb12e6..c4bbcfb6 100644
--- a/src/python/doc/rips_complex_user.rst
+++ b/src/python/doc/rips_complex_user.rst
@@ -347,3 +347,10 @@ until dimension 1 - one skeleton graph in other words), the output is:
     points in the persistence diagram will be under the diagonal, and
     bottleneck distance and persistence graphical tool will not work properly,
     this is a known issue.
+
+Bibliography
+------------
+
+.. bibliography:: ../../biblio/bibliography.bib
+   :filter: docname in docnames
+   :style: unsrt
diff --git a/src/python/doc/simplex_tree_user.rst b/src/python/doc/simplex_tree_user.rst
index 3df7617f..1b272c35 100644
--- a/src/python/doc/simplex_tree_user.rst
+++ b/src/python/doc/simplex_tree_user.rst
@@ -66,3 +66,10 @@ The output is:
     ([1, 2], 4.0)
     ([1], 0.0)
     ([2], 4.0)
+
+Bibliography
+------------
+
+.. bibliography:: ../../biblio/bibliography.bib
+   :filter: docname in docnames
+   :style: unsrt
diff --git a/src/python/doc/tangential_complex_user.rst b/src/python/doc/tangential_complex_user.rst
index 852cf5b6..cf8199cc 100644
--- a/src/python/doc/tangential_complex_user.rst
+++ b/src/python/doc/tangential_complex_user.rst
@@ -197,8 +197,8 @@ The output is:
 
 
 Bibliography
-============
+------------
 
 .. bibliography:: ../../biblio/bibliography.bib
-   :filter: docnames
+   :filter: docname in docnames
    :style: unsrt
diff --git a/src/python/doc/wasserstein_distance_sum.inc b/src/python/doc/wasserstein_distance_sum.inc
index 0ff22035..f9308e5e 100644
--- a/src/python/doc/wasserstein_distance_sum.inc
+++ b/src/python/doc/wasserstein_distance_sum.inc
@@ -3,11 +3,11 @@
 
    +-----------------------------------------------------------------+----------------------------------------------------------------------+------------------------------------------------------------------+
    | .. figure::                                                     | The q-Wasserstein distance measures the similarity between two       | :Author: Theo Lacombe                                            |
-   |      ../../doc/Bottleneck_distance/perturb_pd.png               | persistence diagrams. It's the minimum value c that can be achieved  |                                                                  |
-   |      :figclass: align-center                                    | by a perfect matching between the points of the two diagrams (+ all  | :Since: GUDHI 3.1.0                                              |
-   |                                                                 | diagonal points), where the value of a matching is defined as the    |                                                                  |
-   |      Wasserstein distance is the q-th root of the sum of the    | q-th root of the sum of all edge lengths to the power q. Edge lengths| :License: MIT                                                    |
-   |      edge lengths to the power q.                               | are measured in norm p, for :math:`1 \leq p \leq \infty`.            |                                                                  |
+   |      ../../doc/Bottleneck_distance/perturb_pd.png               | persistence diagrams using the sum of all edges lengths (instead of  |                                                                  |
+   |      :figclass: align-center                                    | the maximum). It allows to define sophisticated objects such as      | :Since: GUDHI 3.1.0                                              |
+   |                                                                 | barycenters of a family of persistence diagrams.                     |                                                                  |
+   |                                                                 |                                                                      | :License: MIT                                                    |
+   |                                                                 |                                                                      |                                                                  |
    |                                                                 |                                                                      | :Requires: Python Optimal Transport (POT) :math:`\geq` 0.5.1     |
    +-----------------------------------------------------------------+----------------------------------------------------------------------+------------------------------------------------------------------+
    | * :doc:`wasserstein_distance_user`                              |                                                                                                                                         |
diff --git a/src/python/doc/wasserstein_distance_user.rst b/src/python/doc/wasserstein_distance_user.rst
index a9b21fa5..c24da74d 100644
--- a/src/python/doc/wasserstein_distance_user.rst
+++ b/src/python/doc/wasserstein_distance_user.rst
@@ -9,10 +9,16 @@ Definition
 
 .. include:: wasserstein_distance_sum.inc
 
-Functions
----------
-This implementation uses the Python Optimal Transport library and is based on
-ideas from "Large Scale Computation of Means and Cluster for Persistence
+The q-Wasserstein distance is defined as the minimal value achieved
+by a perfect matching between the points of the two diagrams (+ all
+diagonal points), where the value of a matching is defined as the
+q-th root of the sum of all edge lengths to the power q. Edge lengths
+are measured in norm p, for :math:`1 \leq p \leq \infty`.
+
+Distance Functions
+------------------
+This first implementation uses the Python Optimal Transport library and is based
+on ideas from "Large Scale Computation of Means and Cluster for Persistence
 Diagrams via Optimal Transport" :cite:`10.5555/3327546.3327645`.
 
 .. autofunction:: gudhi.wasserstein.wasserstein_distance
@@ -26,7 +32,7 @@ Morozov, and Arnur Nigmetov.
 .. autofunction:: gudhi.hera.wasserstein_distance
 
 Basic example
--------------
+*************
 
 This example computes the 1-Wasserstein distance from 2 persistence diagrams with Euclidean ground metric.
 Note that persistence diagrams must be submitted as (n x 2) numpy arrays and must not contain inf values.
@@ -48,9 +54,9 @@ The output is:
 
     Wasserstein distance value = 1.45
 
-We can also have access to the optimal matching by letting `matching=True`. 
+We can also have access to the optimal matching by letting `matching=True`.
 It is encoded as a list of indices (i,j), meaning that the i-th point in X
-is mapped to the j-th point in Y. 
+is mapped to the j-th point in Y.
 An index of -1 represents the diagonal.
 
 .. testcode::
@@ -78,9 +84,90 @@ An index of -1 represents the diagonal.
 The output is:
 
 .. testoutput::
-    
+
     Wasserstein distance value = 2.15
     point 0 in dgm1 is matched to point 0 in dgm2
     point 1 in dgm1 is matched to point 2 in dgm2
     point 2 in dgm1 is matched to the diagonal
     point 1 in dgm2 is matched to the diagonal
+
+Barycenters
+-----------
+
+A Frechet mean (or barycenter) is a generalization of the arithmetic
+mean in a non linear space such as the one of persistence diagrams.
+Given a set of persistence diagrams :math:`\mu_1 \dots \mu_n`, it is
+defined as a minimizer of the variance functional, that is of
+:math:`\mu \mapsto \sum_{i=1}^n d_2(\mu,\mu_i)^2`.
+where :math:`d_2` denotes the Wasserstein-2 distance between
+persistence diagrams.
+It is known to exist and is generically unique. However, an exact
+computation is in general untractable. Current implementation
+available is based on (Turner et al., 2014),
+:cite:`turner2014frechet`
+and uses an EM-scheme to
+provide a local minimum of the variance functional (somewhat similar
+to the Lloyd algorithm to estimate a solution to the k-means
+problem). The local minimum returned depends on the initialization of
+the barycenter.
+The combinatorial structure of the algorithm limits its
+performances on large scale problems (thousands of diagrams and of points
+per diagram).
+
+.. figure::
+     ./img/barycenter.png
+     :figclass: align-center
+
+     Illustration of Frechet mean between persistence
+     diagrams.
+
+
+.. autofunction:: gudhi.wasserstein.barycenter.lagrangian_barycenter
+
+Basic example
+*************
+
+This example estimates the Frechet mean (aka Wasserstein barycenter) between
+four persistence diagrams.
+It is initialized on the 4th diagram.
+As the algorithm is not convex, its output depends on the initialization and
+is only a local minimum of the objective function.
+Initialization can be either given as an integer (in which case the i-th
+diagram of the list is used as initial estimate) or as a diagram.
+If None, it will randomly select one of the diagrams of the list
+as initial estimate.
+Note that persistence diagrams must be submitted as
+(n x 2) numpy arrays and must not contain inf values.
+
+
+.. testcode::
+
+    from gudhi.wasserstein.barycenter import lagrangian_barycenter
+    import numpy as np
+
+    dg1 = np.array([[0.2, 0.5]])
+    dg2 = np.array([[0.2, 0.7]])
+    dg3 = np.array([[0.3, 0.6], [0.7, 0.8], [0.2, 0.3]])
+    dg4 = np.array([])
+    pdiagset = [dg1, dg2, dg3, dg4]
+    bary = lagrangian_barycenter(pdiagset=pdiagset,init=3)
+
+    message = "Wasserstein barycenter estimated:"
+    print(message)
+    print(bary)
+
+The output is:
+
+.. testoutput::
+
+    Wasserstein barycenter estimated:
+    [[0.27916667 0.55416667]
+     [0.7375     0.7625    ]
+     [0.2375     0.2625    ]]
+
+Bibliography
+------------
+
+.. bibliography:: ../../biblio/bibliography.bib
+   :filter: docname in docnames
+   :style: unsrt
diff --git a/src/python/doc/witness_complex_user.rst b/src/python/doc/witness_complex_user.rst
index 7087fa98..799f5444 100644
--- a/src/python/doc/witness_complex_user.rst
+++ b/src/python/doc/witness_complex_user.rst
@@ -128,8 +128,8 @@ Here is an example of constructing a strong witness complex filtration and compu
 * :download:`euclidean_strong_witness_complex_diagram_persistence_from_off_file_example.py <../example/euclidean_strong_witness_complex_diagram_persistence_from_off_file_example.py>`
 
 Bibliography
-============
+------------
 
 .. bibliography:: ../../biblio/bibliography.bib
-   :filter: docnames
+   :filter: docname in docnames
    :style: unsrt
diff --git a/src/python/gudhi/wasserstein/__init__.py b/src/python/gudhi/wasserstein/__init__.py
new file mode 100644
index 00000000..ed225ba4
--- /dev/null
+++ b/src/python/gudhi/wasserstein/__init__.py
@@ -0,0 +1 @@
+from .wasserstein import wasserstein_distance
diff --git a/src/python/gudhi/wasserstein/barycenter.py b/src/python/gudhi/wasserstein/barycenter.py
new file mode 100644
index 00000000..de7aea81
--- /dev/null
+++ b/src/python/gudhi/wasserstein/barycenter.py
@@ -0,0 +1,159 @@
+# This file is part of the Gudhi Library - https://gudhi.inria.fr/ - which is released under MIT.
+# See file LICENSE or go to https://gudhi.inria.fr/licensing/ for full license details.
+# Author(s):       Theo Lacombe
+#
+# Copyright (C) 2019 Inria
+#
+# Modification(s):
+#   - YYYY/MM Author: Description of the modification
+
+
+import ot
+import numpy as np
+import scipy.spatial.distance as sc
+
+from gudhi.wasserstein import wasserstein_distance
+
+
+def _mean(x, m):
+    '''
+    :param x: a list of 2D-points, off diagonal, x_0... x_{k-1}
+    :param m: total amount of points taken into account,
+                that is we have (m-k) copies of diagonal
+    :returns: the weighted mean of x with (m-k) copies of the diagonal
+    '''
+    k = len(x)
+    if k > 0:
+        w = np.mean(x, axis=0)
+        w_delta = (w[0] + w[1]) / 2 * np.ones(2)
+        return (k * w + (m-k) * w_delta) / m
+    else:
+        return np.array([0, 0])
+
+
+def lagrangian_barycenter(pdiagset, init=None, verbose=False):
+    '''
+    :param pdiagset: a list of ``numpy.array`` of shape `(n x 2)`
+                    (`n` can variate), encoding a set of
+                    persistence diagrams with only finite coordinates.
+    :param init: The initial value for barycenter estimate.
+                    If ``None``, init is made on a random diagram from the dataset.
+                    Otherwise, it can be an ``int``
+                    (then initialization is made on ``pdiagset[init]``)
+                    or a `(n x 2)` ``numpy.array`` enconding
+                    a persistence diagram with `n` points.
+    :type init: ``int``, or (n x 2) ``np.array``
+    :param verbose: if ``True``, returns additional information about the
+                    barycenter.
+    :type verbose: boolean
+    :returns: If not verbose (default), a ``numpy.array`` encoding
+              the barycenter estimate of pdiagset
+              (local minimum of the energy function).
+              If ``pdiagset`` is empty, returns ``None``.
+              If verbose, returns a couple ``(Y, log)``
+              where ``Y`` is the barycenter estimate,
+              and ``log`` is a ``dict`` that contains additional informations:
+
+              - `"groupings"`, a list of list of pairs ``(i,j)``.
+              Namely, ``G[k] = [...(i, j)...]``, where ``(i,j)`` indicates
+              that ``pdiagset[k][i]`` is matched to ``Y[j]``
+              if ``i = -1`` or ``j = -1``, it means they
+              represent the diagonal.
+
+              - `"energy"`, ``float`` representing the Frechet energy value obtained.
+              It is the mean of squared distances of observations to the output.
+
+              - `"nb_iter"`, ``int`` number of iterations performed before convergence of the algorithm.
+    '''
+    X = pdiagset  # to shorten notations, not a copy
+    m = len(X)  # number of diagrams we are averaging
+    if m == 0:
+        print("Warning: computing barycenter of empty diag set. Returns None")
+        return None
+
+    # store the number of off-diagonal point for each of the X_i
+    nb_off_diag = np.array([len(X_i) for X_i in X])
+    # Initialisation of barycenter
+    if init is None:
+        i0 = np.random.randint(m)  # Index of first state for the barycenter
+        Y = X[i0].copy()
+    else:
+        if type(init)==int:
+            Y = X[init].copy()
+        else:
+            Y = init.copy()
+
+    nb_iter = 0
+
+    converged = False  # stoping criterion
+    while not converged:
+        nb_iter += 1
+        K = len(Y)  # current nb of points in Y (some might be on diagonal)
+        G = np.full((K, m), -1, dtype=int)  # will store for each j, the (index)
+                              # point matched in each other diagram
+                              #(might be the diagonal).
+                              # that is G[j, i] = k <=> y_j is matched to
+                              # x_k in the diagram i-th diagram X[i]
+        updated_points = np.zeros((K, 2))  # will store the new positions of
+                                           # the points of Y.
+                                           # If points disappear, there thrown
+                                           # on [0,0] by default.
+        new_created_points = []  # will store potential new points.
+
+        # Step 1 : compute optimal matching (Y, X_i) for each X_i
+        #          and create new points in Y if needed
+        for i in range(m):
+            _, indices = wasserstein_distance(Y, X[i], matching=True, order=2., internal_p=2.)
+            for y_j, x_i_j in indices:
+                if y_j >= 0:  # we matched an off diagonal point to x_i_j...
+                    if x_i_j >= 0:  # ...which is also an off-diagonal point.
+                        G[y_j, i] = x_i_j
+                    else:  # ...which is a diagonal point
+                        G[y_j, i] = -1  # -1 stands for the diagonal (mask)
+                else:  # We matched a diagonal point to x_i_j...
+                    if x_i_j >= 0:  # which is a off-diag point !
+                                                # need to create new point in Y
+                        new_y = _mean(np.array([X[i][x_i_j]]), m)
+                        # Average this point with (m-1) copies of Delta
+                        new_created_points.append(new_y)
+
+        # Step 2 : Update current point position thanks to groupings computed
+        to_delete = []
+        for j in range(K):
+            matched_points = [X[i][G[j, i]] for i in range(m) if G[j, i] > -1]
+            new_y_j = _mean(matched_points, m)
+            if not np.array_equal(new_y_j, np.array([0,0])):
+                updated_points[j] = new_y_j
+            else: # this points is no longer of any use.
+                to_delete.append(j)
+        # we remove the point to be deleted now.
+        updated_points = np.delete(updated_points, to_delete, axis=0)
+
+        # we cannot converge if there have been new created points.
+        if new_created_points:
+            Y = np.concatenate((updated_points, new_created_points))
+        else:
+            # Step 3 : we check convergence
+            if np.array_equal(updated_points, Y):
+                converged = True
+            Y = updated_points
+
+
+    if verbose:
+        groupings = []
+        energy = 0
+        log = {}
+        n_y = len(Y)
+        for i in range(m):
+            cost, edges = wasserstein_distance(Y, X[i], matching=True, order=2., internal_p=2.)
+            groupings.append(edges)
+            energy += cost
+            log["groupings"] = groupings
+        energy = energy/m
+        log["energy"] = energy
+        log["nb_iter"] = nb_iter
+
+        return Y, log
+    else:
+        return Y
+
diff --git a/src/python/gudhi/wasserstein.py b/src/python/gudhi/wasserstein/wasserstein.py
index 3dd993f9..35315939 100644
--- a/src/python/gudhi/wasserstein.py
+++ b/src/python/gudhi/wasserstein/wasserstein.py
@@ -9,6 +9,7 @@
 
 import numpy as np
 import scipy.spatial.distance as sc
+
 try:
     import ot
 except ImportError:
@@ -29,9 +30,9 @@ def _build_dist_matrix(X, Y, order=2., internal_p=2.):
     :param Y: (m x 2) numpy.array encoding the second diagram.
     :param order: exponent for the Wasserstein metric.
     :param internal_p: Ground metric (i.e. norm L^p).
-    :returns: (n+1) x (m+1) np.array encoding the cost matrix C. 
-                For 0 <= i < n, 0 <= j < m, C[i,j] encodes the distance between X[i] and Y[j], 
-                while C[i, m] (resp. C[n, j]) encodes the distance (to the p) between X[i] (resp Y[j]) 
+    :returns: (n+1) x (m+1) np.array encoding the cost matrix C.
+                For 0 <= i < n, 0 <= j < m, C[i,j] encodes the distance between X[i] and Y[j],
+                while C[i, m] (resp. C[n, j]) encodes the distance (to the p) between X[i] (resp Y[j])
                 and its orthogonal projection onto the diagonal.
                 note also that C[n, m] = 0  (it costs nothing to move from the diagonal to the diagonal).
     '''
@@ -58,7 +59,7 @@ def _perstot(X, order, internal_p):
     :param X: (n x 2) numpy.array (points of a given diagram).
     :param order: exponent for Wasserstein. Default value is 2.
     :param internal_p: Ground metric on the (upper-half) plane (i.e. norm L^p in R^2); Default value is 2 (Euclidean norm).
-    :returns: float, the total persistence of the diagram (that is, its distance to the empty diagram).    
+    :returns: float, the total persistence of the diagram (that is, its distance to the empty diagram).
     '''
     Xdiag = _proj_on_diag(X)
     return (np.sum(np.linalg.norm(X - Xdiag, ord=internal_p, axis=1)**order))**(1./order)
@@ -66,16 +67,16 @@ def _perstot(X, order, internal_p):
 
 def wasserstein_distance(X, Y, matching=False, order=2., internal_p=2.):
     '''
-    :param X: (n x 2) numpy.array encoding the (finite points of the) first diagram. Must not contain essential points 
+    :param X: (n x 2) numpy.array encoding the (finite points of the) first diagram. Must not contain essential points
                 (i.e. with infinite coordinate).
     :param Y: (m x 2) numpy.array encoding the second diagram.
     :param matching: if True, computes and returns the optimal matching between X and Y, encoded as
                      a (n x 2) np.array  [...[i,j]...], meaning the i-th point in X is matched to
                      the j-th point in Y, with the convention (-1) represents the diagonal.
     :param order: exponent for Wasserstein; Default value is 2.
-    :param internal_p: Ground metric on the (upper-half) plane (i.e. norm L^p in R^2); 
+    :param internal_p: Ground metric on the (upper-half) plane (i.e. norm L^p in R^2);
                        Default value is 2 (Euclidean norm).
-    :returns: the Wasserstein distance of order q (1 <= q < infinity) between persistence diagrams with 
+    :returns: the Wasserstein distance of order q (1 <= q < infinity) between persistence diagrams with
               respect to the internal_p-norm as ground metric.
               If matching is set to True, also returns the optimal matching between X and Y.
     '''
diff --git a/src/python/test/test_wasserstein_barycenter.py b/src/python/test/test_wasserstein_barycenter.py
new file mode 100755
index 00000000..f68c748e
--- /dev/null
+++ b/src/python/test/test_wasserstein_barycenter.py
@@ -0,0 +1,46 @@
+from gudhi.wasserstein.barycenter import lagrangian_barycenter
+import numpy as np
+
+""" This file is part of the Gudhi Library - https://gudhi.inria.fr/ - which is released under MIT.
+    See file LICENSE or go to https://gudhi.inria.fr/licensing/ for full license details.
+    Author(s):       Theo Lacombe
+
+    Copyright (C) 2019 Inria
+
+    Modification(s):
+      - YYYY/MM Author: Description of the modification
+"""
+
+__author__ = "Theo Lacombe"
+__copyright__ = "Copyright (C) 2019 Inria"
+__license__ = "MIT"
+
+
+def test_lagrangian_barycenter():
+
+    dg1 = np.array([[0.2, 0.5]])
+    dg2 = np.array([[0.2, 0.7]])
+    dg3 = np.array([[0.3, 0.6], [0.7, 0.8], [0.2, 0.3]])
+    dg4 = np.array([])
+    dg5 = np.array([])
+    dg6 = np.array([])
+    res = np.array([[0.27916667, 0.55416667], [0.7375,  0.7625], [0.2375, 0.2625]])
+
+    dg7 = np.array([[0.1, 0.15], [0.1, 0.7], [0.2, 0.22], [0.55, 0.84], [0.11, 0.91], [0.61, 0.75], [0.33, 0.46], [0.12, 0.41], [0.32, 0.48]])
+    dg8 = np.array([[0., 4.], [4, 8]])
+
+    # error crit.
+    eps = 1e-7
+
+
+    assert np.linalg.norm(lagrangian_barycenter(pdiagset=[dg1, dg2, dg3, dg4],init=3, verbose=False) - res) < eps
+    assert np.array_equal(lagrangian_barycenter(pdiagset=[dg4, dg5, dg6], verbose=False), np.empty(shape=(0,2)))
+    assert np.linalg.norm(lagrangian_barycenter(pdiagset=[dg7], verbose=False) - dg7) < eps
+    Y, log = lagrangian_barycenter(pdiagset=[dg4, dg8], verbose=True)
+    assert np.linalg.norm(Y - np.array([[1,3], [5, 7]])) < eps
+    assert np.abs(log["energy"] - 2) < eps
+    assert np.array_equal(log["groupings"][0] , np.array([[0, -1], [1, -1]]))
+    assert np.array_equal(log["groupings"][1] , np.array([[0, 0], [1, 1]]))
+    assert np.linalg.norm(lagrangian_barycenter(pdiagset=[dg8, dg4], init=np.array([[0.2, 0.6], [0.5, 0.7]]), verbose=False) - np.array([[1, 3], [5, 7]])) < eps
+    assert lagrangian_barycenter(pdiagset = []) is None
+
diff --git a/src/python/test/test_wasserstein_distance.py b/src/python/test/test_wasserstein_distance.py
index 0d70e11a..7e0d0f5f 100755
--- a/src/python/test/test_wasserstein_distance.py
+++ b/src/python/test/test_wasserstein_distance.py
@@ -70,7 +70,7 @@ def _basic_wasserstein(wasserstein_distance, delta, test_infinity=True, test_mat
         assert np.array_equal(match , [[0, -1], [1, -1]])
         match = wasserstein_distance(diag1, diag2, matching=True, internal_p=2., order=2.)[1]
         assert np.array_equal(match, [[0, 0], [1, 1], [2, -1]])
-        
+
 
 
 def hera_wrap(delta):
author	Marc Glisse <marc.glisse@inria.fr>	2020-04-17 11:22:42 +0200
committer	Marc Glisse <marc.glisse@inria.fr>	2020-04-17 11:22:42 +0200
commit	6dcedaeb83e7aef1e1a5e20a72b6ae00651e186f (patch)
tree	908186725f317b867dd1ae00a1f6bc656e329d1f
parent	c5c565dfd92ce1ad5b318dca40edf9429d6334c2 (diff)
parent	44996b632685eb077e61f79b1dd07d172776acb9 (diff)