summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--src/Alpha_complex/include/gudhi/Alpha_complex.h39
-rw-r--r--src/python/gudhi/representations/vector_methods.py123
-rwxr-xr-xsrc/python/test/test_representations.py64
3 files changed, 136 insertions, 90 deletions
diff --git a/src/Alpha_complex/include/gudhi/Alpha_complex.h b/src/Alpha_complex/include/gudhi/Alpha_complex.h
index aec8c1b1..a7372f19 100644
--- a/src/Alpha_complex/include/gudhi/Alpha_complex.h
+++ b/src/Alpha_complex/include/gudhi/Alpha_complex.h
@@ -17,8 +17,7 @@
// to construct Alpha_complex from a OFF file of points
#include <gudhi/Points_off_io.h>
-#include <stdlib.h>
-#include <math.h> // isnan, fmax
+#include <cmath> // isnan, fmax
#include <memory> // for std::unique_ptr
#include <cstddef> // for std::size_t
@@ -45,6 +44,7 @@
#include <utility> // std::pair
#include <stdexcept>
#include <numeric> // for std::iota
+#include <algorithm> // for std::sort
// Make compilation fail - required for external projects - https://github.com/GUDHI/gudhi-devel/issues/10
#if CGAL_VERSION_NR < 1041101000
@@ -101,13 +101,17 @@ template<typename D> struct Is_Epeck_D<CGAL::Epeck_d<D>> { static const bool val
*/
template<class Kernel = CGAL::Epeck_d<CGAL::Dynamic_dimension_tag>, bool Weighted = false>
class Alpha_complex {
+ private:
+ // Vertex_handle internal type (required by triangulation_ and vertices_).
+ using Internal_vertex_handle = std::ptrdiff_t;
+
public:
/** \brief Geometric traits class that provides the geometric types and predicates needed by the triangulations.*/
using Geom_traits = std::conditional_t<Weighted, CGAL::Regular_triangulation_traits_adapter<Kernel>, Kernel>;
// Add an int in TDS to save point index in the structure
using TDS = CGAL::Triangulation_data_structure<typename Geom_traits::Dimension,
- CGAL::Triangulation_vertex<Geom_traits, std::ptrdiff_t>,
+ CGAL::Triangulation_vertex<Geom_traits, Internal_vertex_handle>,
CGAL::Triangulation_full_cell<Geom_traits> >;
/** \brief A (Weighted or not) Delaunay triangulation of a set of points in \f$ \mathbb{R}^D\f$.*/
@@ -132,9 +136,6 @@ class Alpha_complex {
// Vertex_iterator type from CGAL.
using CGAL_vertex_iterator = typename Triangulation::Vertex_iterator;
- // size_type type from CGAL.
- using size_type = typename Triangulation::size_type;
-
// Structure to switch from simplex tree vertex handle to CGAL vertex iterator.
using Vector_vertex_iterator = std::vector< CGAL_vertex_iterator >;
@@ -146,6 +147,10 @@ class Alpha_complex {
std::unique_ptr<Triangulation> triangulation_;
/** \brief Kernel for triangulation_ functions access.*/
A_kernel_d kernel_;
+ /** \brief Vertices to be inserted first by the create_complex method to avoid quadratic complexity.
+ * It isn't just [0, n) if some points have multiplicity (only one copy appears in the complex).
+ */
+ std::vector<Internal_vertex_handle> vertices_;
/** \brief Cache for geometric constructions: circumcenter and squared radius of a simplex.*/
std::vector<Sphere> cache_, old_cache_;
@@ -257,11 +262,11 @@ class Alpha_complex {
std::vector<Point_d> point_cloud(first, last);
// Creates a vector {0, 1, ..., N-1}
- std::vector<std::ptrdiff_t> indices(boost::counting_iterator<std::ptrdiff_t>(0),
- boost::counting_iterator<std::ptrdiff_t>(point_cloud.size()));
+ std::vector<Internal_vertex_handle> indices(boost::counting_iterator<Internal_vertex_handle>(0),
+ boost::counting_iterator<Internal_vertex_handle>(point_cloud.size()));
using Point_property_map = boost::iterator_property_map<typename std::vector<Point_d>::iterator,
- CGAL::Identity_property_map<std::ptrdiff_t>>;
+ CGAL::Identity_property_map<Internal_vertex_handle>>;
using Search_traits_d = CGAL::Spatial_sort_traits_adapter_d<Geom_traits, Point_property_map>;
CGAL::spatial_sort(indices.begin(), indices.end(), Search_traits_d(std::begin(point_cloud)));
@@ -279,6 +284,9 @@ class Alpha_complex {
// structure to retrieve CGAL points from vertex handle - one vertex handle per point.
// Needs to be constructed before as vertex handles arrives in no particular order.
vertex_handle_to_iterator_.resize(point_cloud.size());
+ // List of sorted unique vertices in the triangulation. We take advantage of the existing loop to construct it
+ // Vertices list avoids quadratic complexity with the Simplex_tree. We should not fill it up with Toplex_map e.g.
+ vertices_.reserve(triangulation_->number_of_vertices());
// Loop on triangulation vertices list
for (CGAL_vertex_iterator vit = triangulation_->vertices_begin(); vit != triangulation_->vertices_end(); ++vit) {
if (!triangulation_->is_infinite(*vit)) {
@@ -286,8 +294,10 @@ class Alpha_complex {
std::clog << "Vertex insertion - " << vit->data() << " -> " << vit->point() << std::endl;
#endif // DEBUG_TRACES
vertex_handle_to_iterator_[vit->data()] = vit;
+ vertices_.push_back(vit->data());
}
}
+ std::sort(vertices_.begin(), vertices_.end());
// --------------------------------------------------------------------------------------------
}
}
@@ -384,12 +394,21 @@ class Alpha_complex {
// --------------------------------------------------------------------------------------------
// Simplex_tree construction from loop on triangulation finite full cells list
if (num_vertices() > 0) {
+ std::vector<Vertex_handle> one_vertex(1);
+ for (auto vertex : vertices_) {
+#ifdef DEBUG_TRACES
+ std::clog << "SimplicialComplex insertion " << vertex << std::endl;
+#endif // DEBUG_TRACES
+ one_vertex[0] = vertex;
+ complex.insert_simplex_and_subfaces(one_vertex, std::numeric_limits<double>::quiet_NaN());
+ }
+
for (auto cit = triangulation_->finite_full_cells_begin();
cit != triangulation_->finite_full_cells_end();
++cit) {
Vector_vertex vertexVector;
#ifdef DEBUG_TRACES
- std::clog << "Simplex_tree insertion ";
+ std::clog << "SimplicialComplex insertion ";
#endif // DEBUG_TRACES
for (auto vit = cit->vertices_begin(); vit != cit->vertices_end(); ++vit) {
if (*vit != nullptr) {
diff --git a/src/python/gudhi/representations/vector_methods.py b/src/python/gudhi/representations/vector_methods.py
index 69ff5e1e..a169aee8 100644
--- a/src/python/gudhi/representations/vector_methods.py
+++ b/src/python/gudhi/representations/vector_methods.py
@@ -85,7 +85,7 @@ class PersistenceImage(BaseEstimator, TransformerMixin):
Xfit.append(image.flatten()[np.newaxis,:])
- Xfit = np.concatenate(Xfit,0)
+ Xfit = np.concatenate(Xfit, 0)
return Xfit
@@ -123,6 +123,15 @@ def _automatic_sample_range(sample_range, X, y):
pass
return sample_range
+
+def _trim_on_edges(x, are_endpoints_nan):
+ if are_endpoints_nan[0]:
+ x = x[1:]
+ if are_endpoints_nan[1]:
+ x = x[:-1]
+ return x
+
+
class Landscape(BaseEstimator, TransformerMixin):
"""
This is a class for computing persistence landscapes from a list of persistence diagrams. A persistence landscape is a collection of 1D piecewise-linear functions computed from the rank function associated to the persistence diagram. These piecewise-linear functions are then sampled evenly on a given range and the corresponding vectors of samples are concatenated and returned. See http://jmlr.org/papers/v16/bubenik15a.html for more details.
@@ -149,6 +158,8 @@ class Landscape(BaseEstimator, TransformerMixin):
y (n x 1 array): persistence diagram labels (unused).
"""
self.sample_range = _automatic_sample_range(np.array(self.sample_range), X, y)
+ self.im_range = np.linspace(self.sample_range[0], self.sample_range[1], self.new_resolution)
+ self.im_range = _trim_on_edges(self.im_range, self.nan_in_range)
return self
def transform(self, X):
@@ -161,53 +172,26 @@ class Landscape(BaseEstimator, TransformerMixin):
Returns:
numpy array with shape (number of diagrams) x (number of samples = **num_landscapes** x **resolution**): output persistence landscapes.
"""
- num_diag, Xfit = len(X), []
- x_values = np.linspace(self.sample_range[0], self.sample_range[1], self.new_resolution)
- step_x = x_values[1] - x_values[0]
-
- for i in range(num_diag):
-
- diagram, num_pts_in_diag = X[i], X[i].shape[0]
- ls = np.zeros([self.num_landscapes, self.new_resolution])
+ Xfit = []
+ x_values = self.im_range
+ for diag in X:
+ midpoints, heights = (diag[:, 0] + diag[:, 1]) / 2., (diag[:, 1] - diag[:, 0]) / 2.
+ tent_functions = np.maximum(heights[None, :] - np.abs(x_values[:, None] - midpoints[None, :]), 0)
+ n_points = diag.shape[0]
+ # Complete the array with zeros to get the right number of landscapes
+ if self.num_landscapes > n_points:
+ tent_functions = np.concatenate(
+ [tent_functions, np.zeros((tent_functions.shape[0], self.num_landscapes-n_points))],
+ axis=1
+ )
+ tent_functions.partition(tent_functions.shape[1]-self.num_landscapes, axis=1)
+ landscapes = np.sort(tent_functions[:, -self.num_landscapes:], axis=1)[:, ::-1].T
- events = []
- for j in range(self.new_resolution):
- events.append([])
+ landscapes = np.sqrt(2) * np.ravel(landscapes)
+ Xfit.append(landscapes)
- for j in range(num_pts_in_diag):
- [px,py] = diagram[j,:2]
- min_idx = np.clip(np.ceil((px - self.sample_range[0]) / step_x).astype(int), 0, self.new_resolution)
- mid_idx = np.clip(np.ceil((0.5*(py+px) - self.sample_range[0]) / step_x).astype(int), 0, self.new_resolution)
- max_idx = np.clip(np.ceil((py - self.sample_range[0]) / step_x).astype(int), 0, self.new_resolution)
-
- if min_idx < self.new_resolution and max_idx > 0:
-
- landscape_value = self.sample_range[0] + min_idx * step_x - px
- for k in range(min_idx, mid_idx):
- events[k].append(landscape_value)
- landscape_value += step_x
-
- landscape_value = py - self.sample_range[0] - mid_idx * step_x
- for k in range(mid_idx, max_idx):
- events[k].append(landscape_value)
- landscape_value -= step_x
-
- for j in range(self.new_resolution):
- events[j].sort(reverse=True)
- for k in range( min(self.num_landscapes, len(events[j])) ):
- ls[k,j] = events[j][k]
-
- if self.nan_in_range[0]:
- ls = ls[:,1:]
- if self.nan_in_range[1]:
- ls = ls[:,:-1]
- ls = np.sqrt(2)*np.reshape(ls,[1,-1])
- Xfit.append(ls)
-
- Xfit = np.concatenate(Xfit,0)
-
- return Xfit
+ return np.stack(Xfit, axis=0)
def __call__(self, diag):
"""
@@ -219,7 +203,7 @@ class Landscape(BaseEstimator, TransformerMixin):
Returns:
numpy array with shape (number of samples = **num_landscapes** x **resolution**): output persistence landscape.
"""
- return self.fit_transform([diag])[0,:]
+ return self.fit_transform([diag])[0, :]
class Silhouette(BaseEstimator, TransformerMixin):
"""
@@ -235,6 +219,8 @@ class Silhouette(BaseEstimator, TransformerMixin):
sample_range ([double, double]): minimum and maximum for the weighted average domain, of the form [x_min, x_max] (default [numpy.nan, numpy.nan]). It is the interval on which samples will be drawn evenly. If one of the values is numpy.nan, it can be computed from the persistence diagrams with the fit() method.
"""
self.weight, self.resolution, self.sample_range = weight, resolution, sample_range
+ self.nan_in_range = np.isnan(np.array(self.sample_range))
+ self.new_resolution = self.resolution + self.nan_in_range.sum()
def fit(self, X, y=None):
"""
@@ -245,6 +231,8 @@ class Silhouette(BaseEstimator, TransformerMixin):
y (n x 1 array): persistence diagram labels (unused).
"""
self.sample_range = _automatic_sample_range(np.array(self.sample_range), X, y)
+ self.im_range = np.linspace(self.sample_range[0], self.sample_range[1], self.new_resolution)
+ self.im_range = _trim_on_edges(self.im_range, self.nan_in_range)
return self
def transform(self, X):
@@ -257,44 +245,19 @@ class Silhouette(BaseEstimator, TransformerMixin):
Returns:
numpy array with shape (number of diagrams) x (**resolution**): output persistence silhouettes.
"""
- num_diag, Xfit = len(X), []
- x_values = np.linspace(self.sample_range[0], self.sample_range[1], self.resolution)
- step_x = x_values[1] - x_values[0]
-
- for i in range(num_diag):
-
- diagram, num_pts_in_diag = X[i], X[i].shape[0]
+ Xfit = []
+ x_values = self.im_range
- sh, weights = np.zeros(self.resolution), np.zeros(num_pts_in_diag)
- for j in range(num_pts_in_diag):
- weights[j] = self.weight(diagram[j,:])
+ for diag in X:
+ midpoints, heights = (diag[:, 0] + diag[:, 1]) / 2., (diag[:, 1] - diag[:, 0]) / 2.
+ weights = np.array([self.weight(pt) for pt in diag])
total_weight = np.sum(weights)
- for j in range(num_pts_in_diag):
-
- [px,py] = diagram[j,:2]
- weight = weights[j] / total_weight
- min_idx = np.clip(np.ceil((px - self.sample_range[0]) / step_x).astype(int), 0, self.resolution)
- mid_idx = np.clip(np.ceil((0.5*(py+px) - self.sample_range[0]) / step_x).astype(int), 0, self.resolution)
- max_idx = np.clip(np.ceil((py - self.sample_range[0]) / step_x).astype(int), 0, self.resolution)
-
- if min_idx < self.resolution and max_idx > 0:
-
- silhouette_value = self.sample_range[0] + min_idx * step_x - px
- for k in range(min_idx, mid_idx):
- sh[k] += weight * silhouette_value
- silhouette_value += step_x
-
- silhouette_value = py - self.sample_range[0] - mid_idx * step_x
- for k in range(mid_idx, max_idx):
- sh[k] += weight * silhouette_value
- silhouette_value -= step_x
-
- Xfit.append(np.reshape(np.sqrt(2) * sh, [1,-1]))
-
- Xfit = np.concatenate(Xfit, 0)
+ tent_functions = np.maximum(heights[None, :] - np.abs(x_values[:, None] - midpoints[None, :]), 0)
+ silhouette = np.sum(weights[None, :] / total_weight * tent_functions, axis=1)
+ Xfit.append(silhouette * np.sqrt(2))
- return Xfit
+ return np.stack(Xfit, axis=0)
def __call__(self, diag):
"""
diff --git a/src/python/test/test_representations.py b/src/python/test/test_representations.py
index 4a455bb6..58caab21 100755
--- a/src/python/test/test_representations.py
+++ b/src/python/test/test_representations.py
@@ -187,3 +187,67 @@ def test_kernel_empty_diagrams():
# PersistenceFisherKernel(bandwidth_fisher=1., bandwidth=1.)(empty_diag, empty_diag)
# PersistenceFisherKernel(bandwidth_fisher=1., bandwidth=1., kernel_approx=RBFSampler(gamma=1./2, n_components=100000).fit(np.ones([1,2])))(empty_diag, empty_diag)
+
+def test_silhouette_permutation_invariance():
+ dgm = _n_diags(1)[0]
+ dgm_permuted = dgm[np.random.permutation(dgm.shape[0]).astype(int)]
+ random_resolution = random.randint(50, 100) * 10
+ slt = Silhouette(resolution=random_resolution, weight=pow(2))
+
+ assert np.all(np.isclose(slt(dgm), slt(dgm_permuted)))
+
+
+def test_silhouette_multiplication_invariance():
+ dgm = _n_diags(1)[0]
+ n_repetitions = np.random.randint(2, high=10)
+ dgm_augmented = np.repeat(dgm, repeats=n_repetitions, axis=0)
+
+ random_resolution = random.randint(50, 100) * 10
+ slt = Silhouette(resolution=random_resolution, weight=pow(2))
+ assert np.all(np.isclose(slt(dgm), slt(dgm_augmented)))
+
+
+def test_silhouette_numeric():
+ dgm = np.array([[2., 3.], [5., 6.]])
+ slt = Silhouette(resolution=9, weight=pow(1), sample_range=[2., 6.])
+ #slt.fit([dgm])
+ # x_values = array([2., 2.5, 3., 3.5, 4., 4.5, 5., 5.5, 6.])
+
+ expected_silhouette = np.array([0., 0.5, 0., 0., 0., 0., 0., 0.5, 0.])/np.sqrt(2)
+ output_silhouette = slt(dgm)
+ assert np.all(np.isclose(output_silhouette, expected_silhouette))
+
+
+def test_landscape_small_persistence_invariance():
+ dgm = np.array([[2., 6.], [2., 5.], [3., 7.]])
+ small_persistence_pts = np.random.rand(10, 2)
+ small_persistence_pts[:, 1] += small_persistence_pts[:, 0]
+ small_persistence_pts += np.min(dgm)
+ dgm_augmented = np.concatenate([dgm, small_persistence_pts], axis=0)
+
+ lds = Landscape(num_landscapes=2, resolution=5)
+ lds_dgm, lds_dgm_augmented = lds(dgm), lds(dgm_augmented)
+
+ assert np.all(np.isclose(lds_dgm, lds_dgm_augmented))
+
+
+def test_landscape_numeric():
+ dgm = np.array([[2., 6.], [3., 5.]])
+ lds_ref = np.array([
+ 0., 0.5, 1., 1.5, 2., 1.5, 1., 0.5, 0., # tent of [2, 6]
+ 0., 0., 0., 0.5, 1., 0.5, 0., 0., 0.,
+ 0., 0., 0., 0., 0., 0., 0., 0., 0.,
+ 0., 0., 0., 0., 0., 0., 0., 0., 0.,
+ ])
+ lds_ref *= np.sqrt(2)
+ lds = Landscape(num_landscapes=4, resolution=9, sample_range=[2., 6.])
+ lds_dgm = lds(dgm)
+ assert np.all(np.isclose(lds_dgm, lds_ref))
+
+
+def test_landscape_nan_range():
+ dgm = np.array([[2., 6.], [3., 5.]])
+ lds = Landscape(num_landscapes=2, resolution=9, sample_range=[np.nan, 6.])
+ lds_dgm = lds(dgm)
+ assert (lds.sample_range[0] == 2) & (lds.sample_range[1] == 6)
+ assert lds.new_resolution == 10