summaryrefslogtreecommitdiff
path: root/src/Persistence_representations/include/gudhi/Persistence_weighted_gaussian.h
blob: 76c43e65392694298bf0dae5698cb45196c80723 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
/*    This file is part of the Gudhi Library. The Gudhi library
 *    (Geometric Understanding in Higher Dimensions) is a generic C++
 *    library for computational topology.
 *
 *    Author(s):       Mathieu Carriere
 *
 *    Copyright (C) 2018  INRIA (France)
 *
 *    This program is free software: you can redistribute it and/or modify
 *    it under the terms of the GNU General Public License as published by
 *    the Free Software Foundation, either version 3 of the License, or
 *    (at your option) any later version.
 *
 *    This program is distributed in the hope that it will be useful,
 *    but WITHOUT ANY WARRANTY; without even the implied warranty of
 *    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 *    GNU General Public License for more details.
 *
 *    You should have received a copy of the GNU General Public License
 *    along with this program.  If not, see <http://www.gnu.org/licenses/>.
 */

#ifndef PERSISTENCE_WEIGHTED_GAUSSIAN_H_
#define PERSISTENCE_WEIGHTED_GAUSSIAN_H_

// gudhi include
#include <gudhi/read_persistence_from_file.h>
#include <gudhi/common_persistence_representations.h>
#include <gudhi/Weight_functions.h>

// standard include
#include <cmath>
#include <iostream>
#include <vector>
#include <limits>
#include <fstream>
#include <sstream>
#include <algorithm>
#include <string>
#include <utility>
#include <functional>

namespace Gudhi {
namespace Persistence_representations {
/**
 * \class Persistence_weighted_gaussian gudhi/Persistence_weighted_gaussian.h
 * \brief A class implementing the Persistence Weighted Gaussian kernel and a specific case thereof called the Persistence Scale Space kernel.
 *
 * \ingroup Persistence_representations
 *
 * \details
 * The Persistence Weighted Gaussian kernel is built with Gaussian Kernel Mean Embedding, meaning that each persistence diagram is first
 * sent to the Hilbert space of a Gaussian kernel with bandwidth parameter \f$\sigma >0\f$ using a weighted mean embedding \f$\Phi\f$:
 *
 * \f$ \Phi\,:\,D\,\rightarrow\,\sum_{p\in D}\,w(p)\,{\rm exp}\left(-\frac{\|p-\cdot\|_2^2}{2\sigma^2}\right) \f$,
 *
 * Usually, the weight function is chosen to be an arctan function of the distance of the point to the diagonal:
 * \f$w(p) = {\rm arctan}(C\,|y-x|^\alpha)\f$, for some parameters \f$C,\alpha >0\f$.
 * Then, their scalar product in this space is computed:
 *
 * \f$ k(D_1,D_2)=\langle\Phi(D_1),\Phi(D_2)\rangle
 * \,=\,\sum_{p\in D_1}\,\sum_{q\in D_2}\,w(p)\,w(q)\,{\rm exp}\left(-\frac{\|p-q\|_2^2}{2\sigma^2}\right).\f$
 *
 * Note that one may apply a second Gaussian kernel to their distance in this space and still get a kernel.
 *
 * It follows that the computation time is \f$O(n^2)\f$ where \f$n\f$ is the number of points
 * in the diagrams. This time can be improved by computing approximations of the kernel
 * with \f$m\f$ Fourier features \cite Rahimi07randomfeatures. In that case, the computation time becomes \f$O(mn)\f$.
 *
 * The Persistence Scale Space kernel is a Persistence Weighted Gaussian kernel between modified diagrams:
 * the symmetric of each point with respect to the diagonal is first added in each diagram, and then the weight function
 * is set to be +1 if the point is above the diagonal and -1 otherwise.
 * 
 * For more details, please see \cite Kusano_Fukumizu_Hiraoka_PWGK
 * and \cite Reininghaus_Huber_ALL_PSSK .
 *
**/
class Persistence_weighted_gaussian{

 protected:
    Persistence_diagram diagram;
    Weight weight;
    double sigma;
    int approx;

 public:

  /** \brief Persistence Weighted Gaussian kernel constructor.
   * \ingroup Persistence_weighted_gaussian
   *
   * @param[in] _diagram       persistence diagram.
   * @param[in] _sigma         bandwidth parameter of the Gaussian kernel used for the Kernel Mean Embedding of the diagrams.
   * @param[in] _approx        number of random Fourier features in case of approximate computation, set to -1 for exact computation.
   * @param[in] _weight        weight function for the points in the diagrams.
   *
   */
  Persistence_weighted_gaussian(const Persistence_diagram & _diagram, double _sigma = 1.0, int _approx = 1000, const Weight & _weight = arctan_weight(1,1)){diagram = _diagram; sigma = _sigma; approx = _approx; weight = _weight;}


  // **********************************
  // Utils.
  // **********************************

  std::vector<std::pair<double,double> > Fourier_feat(const Persistence_diagram & diag, const std::vector<std::pair<double,double> > & z, const Weight & weight = arctan_weight(1,1)) const {
    int md = diag.size(); std::vector<std::pair<double,double> > b; int mz = z.size();
    for(int i = 0; i < mz; i++){
      double d1 = 0; double d2 = 0; double zx = z[i].first; double zy = z[i].second;
      for(int j = 0; j < md; j++){
        double x = diag[j].first; double y = diag[j].second;
        d1 += weight(diag[j])*cos(x*zx + y*zy);
        d2 += weight(diag[j])*sin(x*zx + y*zy);
      }
      b.emplace_back(d1,d2);
    }
    return b;
  }

  std::vector<std::pair<double,double> > random_Fourier(double sigma, int m = 1000) const {
    std::normal_distribution<double> distrib(0,1); std::vector<std::pair<double,double> > z; std::random_device rd;
    for(int i = 0; i < m; i++){
      std::mt19937 e1(rd()); std::mt19937 e2(rd());
      double zx = distrib(e1); double zy = distrib(e2);
      z.emplace_back(zx/sigma,zy/sigma);
    }
    return z;
  }



  // **********************************
  // Scalar product + distance.
  // **********************************

  /** \brief Evaluation of the kernel on a pair of diagrams.
   * \ingroup Persistence_weighted_gaussian
   *
   * @pre       sigma, approx and weight attributes need to be the same for both instances.
   * @param[in] second other instance of class Persistence_weighted_gaussian.
   *
   */
  double compute_scalar_product(const Persistence_weighted_gaussian & second) const {

    GUDHI_CHECK(this->sigma != second.sigma || this->approx != second.approx || this->weight != second.weight, std::invalid_argument("Error: different values for representations"));
    Persistence_diagram diagram1 = this->diagram; Persistence_diagram diagram2 = second.diagram;

    if(this->approx == -1){
      int num_pts1 = diagram1.size(); int num_pts2 = diagram2.size(); double k = 0;
      for(int i = 0; i < num_pts1; i++)
        for(int j = 0; j < num_pts2; j++)
          k += this->weight(diagram1[i])*this->weight(diagram2[j])*exp(-((diagram1[i].first  - diagram2[j].first)  *  (diagram1[i].first  - diagram2[j].first) +
                                                                         (diagram1[i].second - diagram2[j].second) *  (diagram1[i].second - diagram2[j].second))
                                                                        /(2*this->sigma*this->sigma));
      return k;
    }
    else{
      std::vector<std::pair<double,double> > z  = random_Fourier(this->sigma, this->approx);
      std::vector<std::pair<double,double> > b1 = Fourier_feat(diagram1,z,this->weight);
      std::vector<std::pair<double,double> > b2 = Fourier_feat(diagram2,z,this->weight);
      double d = 0; for(int i = 0; i < this->approx; i++) d += b1[i].first*b2[i].first + b1[i].second*b2[i].second;
      return d/this->approx;
    }
  }

  /** \brief Evaluation of the distance between images of diagrams in the Hilbert space of the kernel.
   * \ingroup Persistence_weighted_gaussian
   *
   * @pre       sigma, approx and weight attributes need to be the same for both instances.
   * @param[in] second other instance of class Persistence_weighted_gaussian.
   *
   */
  double distance(const Persistence_weighted_gaussian & second) const {
    GUDHI_CHECK(this->sigma != second.sigma || this->approx != second.approx || this->weight != second.weight, std::invalid_argument("Error: different values for representations"));
    return std::pow(this->compute_scalar_product(*this) + second.compute_scalar_product(second)-2*this->compute_scalar_product(second), 0.5);
  }


}; // class Persistence_weighted_gaussian
}  // namespace Persistence_representations
}  // namespace Gudhi

#endif  // PERSISTENCE_WEIGHTED_GAUSSIAN_H_