src/python/gudhi/point_cloud/dtm.py


1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136

# This file is part of the Gudhi Library - https://gudhi.inria.fr/ - which is released under MIT.
# See file LICENSE or go to https://gudhi.inria.fr/licensing/ for full license details.
# Author(s):       Marc Glisse
#
# Copyright (C) 2020 Inria
#
# Modification(s):
#   - YYYY/MM Author: Description of the modification

from .knn import KNearestNeighbors
import numpy as np

__author__ = "Marc Glisse"
__copyright__ = "Copyright (C) 2020 Inria"
__license__ = "MIT"


class DistanceToMeasure:
    """
    Class to compute the distance to the empirical measure defined by a point set, as introduced in :cite:`dtm`.
    """

    def __init__(self, k, q=2, **kwargs):
        """
        Args:
            k (int): number of neighbors (possibly including the point itself).
            q (float): order used to compute the distance to measure. Defaults to 2.
            kwargs: same parameters as :class:`~gudhi.point_cloud.knn.KNearestNeighbors`, except that
                metric="neighbors" means that :func:`transform` expects an array with the distances
                to the k nearest neighbors.
        """
        self.k = k
        self.q = q
        self.params = kwargs

    def fit_transform(self, X, y=None):
        return self.fit(X).transform(X)

    def fit(self, X, y=None):
        """
        Args:
            X (numpy.array): coordinates for mass points.
        """
        if self.params.setdefault("metric", "euclidean") != "neighbors":
            self.knn = KNearestNeighbors(
                self.k, return_index=False, return_distance=True, sort_results=False, **self.params
            )
            self.knn.fit(X)
        return self

    def transform(self, X):
        """
        Args:
            X (numpy.array): coordinates for query points, or distance matrix if metric is "precomputed",
                or distances to the k nearest neighbors if metric is "neighbors" (if the array has more
                than k columns, the remaining ones are ignored).

        Returns:
            numpy.array: a 1-d array with, for each point of X, its distance to the measure defined
            by the argument of :func:`fit`.
        """
        if self.params["metric"] == "neighbors":
            distances = X[:, : self.k]
        else:
            distances = self.knn.transform(X)
        distances = distances ** self.q
        dtm = distances.sum(-1) / self.k
        dtm = dtm ** (1.0 / self.q)
        # We compute too many powers, 1/p in knn then q in dtm, 1/q in dtm then q or some log in the caller.
        # Add option to skip the final root?
        return dtm


class DTMDensity:
    """
    Density estimator based on the distance to the empirical measure defined by a point set, as defined in :cite:`dtmdensity`. Note that this implementation does not renormalize so the total measure is not 1, see the reference for suitable normalization factors in the Euclidean case.
    """

    def __init__(self, k=None, weights=None, q=None, dim=None, **kwargs):
        """
        Args:
            k (int): number of neighbors (possibly including the point itself).
            weights (numpy.array): weights of each of the k neighbors, optional.
            q (float): order used to compute the distance to measure. Defaults to dim.
            dim (float): final exponent representing the dimension. Defaults to the dimension, and must be specified when the dimension cannot be read from the input (metric="neighbors" or metric="precomputed").
            kwargs: same parameters as :class:`~gudhi.point_cloud.knn.KNN`, except that metric="neighbors" means that :func:`transform` expects an array with the distances to the k nearest neighbors.
        """
        if weights is None:
            assert k is not None, "Must specify k or weights"
            self.k = k
            self.weights = np.full(k, 1.0 / k)
        else:
            self.weights = weights
            self.k = len(weights)
            assert k is None or k == self.k, "k differs from the length of weights"
        self.q = q
        self.dim = dim
        self.params = kwargs

    def fit_transform(self, X, y=None):
        return self.fit(X).transform(X)

    def fit(self, X, y=None):
        """
        Args:
            X (numpy.array): coordinates for mass points.
        """
        if self.params.setdefault("metric", "euclidean") != "neighbors":
            self.knn = KNN(self.k, return_index=False, return_distance=True, sort_results=False, **self.params)
            self.knn.fit(X)
        return self

    def transform(self, X):
        """
        Args:
            X (numpy.array): coordinates for query points, or distance matrix if metric is "precomputed", or distances to the k nearest neighbors if metric is "neighbors" (if the array has more than k columns, the remaining ones are ignored).
        """
        q = self.q
        dim = self.dim
        if dim is None:
            assert self.params["metric"] not in {
                "neighbors",
                "precomputed",
            }, "dim not specified and cannot guess the dimension"
            dim = len(X[0])
        if q is None:
            q = dim
        if self.params["metric"] == "neighbors":
            distances = X[:, : self.k]
        else:
            distances = self.knn.transform(X)
        distances = distances ** q
        dtm = (distances * weights).sum(-1)
        return dtm ** (-dim / q)
        # We compute too many powers, 1/p in knn then q in dtm, d/q in dtm then whatever in the caller.
        # Add option to skip the final root?