1 files changed, 258 insertions, 1 deletions
diff --git a/src/python/test/test_representations.py b/src/python/test/test_representations.py
index dba7f952..f4ffbdc1 100755
--- a/src/python/test/test_representations.py
+++ b/src/python/test/test_representations.py
@@ -1,12 +1,269 @@
 import os
 import sys
 import matplotlib.pyplot as plt
+import numpy as np
+import pytest
+import random
+
+from sklearn.cluster import KMeans
+
+# Vectorization
+from gudhi.representations import (Landscape, Silhouette, BettiCurve, ComplexPolynomial,\
+  TopologicalVector, PersistenceImage, Entropy)
+
+# Preprocessing
+from gudhi.representations import (BirthPersistenceTransform, Clamping, DiagramScaler, Padding, ProminentPoints, \
+  DiagramSelector)
+
+# Kernel
+from gudhi.representations import (PersistenceWeightedGaussianKernel, \
+  PersistenceScaleSpaceKernel, SlicedWassersteinDistance,\
+  SlicedWassersteinKernel, PersistenceFisherKernel, WassersteinDistance)
+
 
 def test_representations_examples():
     # Disable graphics for testing purposes
-    plt.show = lambda:None
+    plt.show = lambda: None
     here = os.path.dirname(os.path.realpath(__file__))
     sys.path.append(here + "/../example")
     import diagram_vectorizations_distances_kernels
 
     return None
+
+
+from gudhi.representations.vector_methods import Atol
+from gudhi.representations.metrics import *
+from gudhi.representations.kernel_methods import *
+
+
+def _n_diags(n):
+    l = []
+    for _ in range(n):
+        a = np.random.rand(50, 2)
+        a[:, 1] += a[:, 0]  # So that y >= x
+        l.append(a)
+    return l
+
+
+def test_multiple():
+    l1 = _n_diags(9)
+    l2 = _n_diags(11)
+    l1b = l1.copy()
+    d1 = pairwise_persistence_diagram_distances(l1, e=0.00001, n_jobs=4)
+    d2 = BottleneckDistance(epsilon=0.00001).fit_transform(l1)
+    d3 = pairwise_persistence_diagram_distances(l1, l1b, e=0.00001, n_jobs=4)
+    assert d1 == pytest.approx(d2)
+    assert d3 == pytest.approx(d2, abs=1e-5)  # Because of 0 entries (on the diagonal)
+    d1 = pairwise_persistence_diagram_distances(l1, l2, metric="wasserstein", order=2, internal_p=2)
+    d2 = WassersteinDistance(order=2, internal_p=2, n_jobs=4).fit(l2).transform(l1)
+    print(d1.shape, d2.shape)
+    assert d1 == pytest.approx(d2, rel=0.02)
+
+
+# Test sorted values as points order can be inverted, and sorted test is not documentation-friendly
+# Note the test below must be up to date with the Atol class documentation
+def test_atol_doc():
+    a = np.array([[1, 2, 4], [1, 4, 0], [1, 0, 4]])
+    b = np.array([[4, 2, 0], [4, 4, 0], [4, 0, 2]])
+    c = np.array([[3, 2, -1], [1, 2, -1]])
+
+    atol_vectoriser = Atol(quantiser=KMeans(n_clusters=2, random_state=202006))
+    # Atol will do
+    # X = np.concatenate([a,b,c])
+    # kmeans = KMeans(n_clusters=2, random_state=202006).fit(X) 
+    # kmeans.labels_ will be : array([1, 0, 1, 0, 0, 1, 0, 0])
+    first_cluster = np.asarray([a[0], a[2], b[2]])
+    second_cluster = np.asarray([a[1], b[0], b[2], c[0], c[1]])
+
+    # Check the center of the first_cluster and second_cluster are in Atol centers
+    centers = atol_vectoriser.fit(X=[a, b, c]).centers
+    np.isclose(centers, first_cluster.mean(axis=0)).all(1).any() 
+    np.isclose(centers, second_cluster.mean(axis=0)).all(1).any() 
+
+    vectorization = atol_vectoriser.transform(X=[a, b, c])
+    assert np.allclose(vectorization[0], atol_vectoriser(a))
+    assert np.allclose(vectorization[1], atol_vectoriser(b))
+    assert np.allclose(vectorization[2], atol_vectoriser(c))
+
+
+def test_dummy_atol():
+    a = np.array([[1, 2, 4], [1, 4, 0], [1, 0, 4]])
+    b = np.array([[4, 2, 0], [4, 4, 0], [4, 0, 2]])
+    c = np.array([[3, 2, -1], [1, 2, -1]])
+
+    for weighting_method in ["cloud", "iidproba"]:
+        for contrast in ["gaussian", "laplacian", "indicator"]:
+            atol_vectoriser = Atol(
+                quantiser=KMeans(n_clusters=1, random_state=202006),
+                weighting_method=weighting_method,
+                contrast=contrast,
+            )
+            atol_vectoriser.fit([a, b, c])
+            atol_vectoriser(a)
+            atol_vectoriser.transform(X=[a, b, c])
+
+
+from gudhi.representations.vector_methods import BettiCurve
+
+def test_infinity():
+    a = np.array([[1.0, 8.0], [2.0, np.inf], [3.0, 4.0]])
+    c = BettiCurve(20, [0.0, 10.0])(a)
+    assert c[1] == 0
+    assert c[7] == 3
+    assert c[9] == 2
+
+def test_preprocessing_empty_diagrams():
+    empty_diag = np.empty(shape = [0, 2])
+    assert not np.any(BirthPersistenceTransform()(empty_diag))
+    assert not np.any(Clamping().fit_transform(empty_diag))
+    assert not np.any(DiagramScaler()(empty_diag))
+    assert not np.any(Padding()(empty_diag))
+    assert not np.any(ProminentPoints()(empty_diag))
+    assert not np.any(DiagramSelector()(empty_diag))
+
+def pow(n):
+  return lambda x: np.power(x[1]-x[0],n)
+
+def test_vectorization_empty_diagrams():
+    empty_diag = np.empty(shape = [0, 2])
+    random_resolution = random.randint(50,100)*10 # between 500 and 1000
+    print("resolution = ", random_resolution)
+    lsc = Landscape(resolution=random_resolution)(empty_diag)
+    assert not np.any(lsc)
+    assert lsc.shape[0]%random_resolution == 0
+    slt = Silhouette(resolution=random_resolution, weight=pow(2))(empty_diag)
+    assert not np.any(slt)
+    assert slt.shape[0] == random_resolution
+    btc = BettiCurve(resolution=random_resolution)(empty_diag)
+    assert not np.any(btc)
+    assert btc.shape[0] == random_resolution
+    cpp = ComplexPolynomial(threshold=random_resolution, polynomial_type="T")(empty_diag)
+    assert not np.any(cpp)
+    assert cpp.shape[0] == random_resolution
+    tpv = TopologicalVector(threshold=random_resolution)(empty_diag)
+    assert tpv.shape[0] == random_resolution
+    assert not np.any(tpv)
+    prmg = PersistenceImage(resolution=[random_resolution,random_resolution])(empty_diag)
+    assert not np.any(prmg)
+    assert prmg.shape[0] == random_resolution * random_resolution
+    sce = Entropy(mode="scalar", resolution=random_resolution)(empty_diag)
+    assert not np.any(sce)
+    assert sce.shape[0] == 1
+    scv = Entropy(mode="vector", normalized=False, resolution=random_resolution)(empty_diag)
+    assert not np.any(scv)
+    assert scv.shape[0] == random_resolution
+    
+def test_entropy_miscalculation():
+    diag_ex = np.array([[0.0,1.0], [0.0,1.0], [0.0,2.0]])
+    def pe(pd):
+        l = pd[:,1] - pd[:,0]
+        l = l/sum(l)
+        return -np.dot(l, np.log(l))
+    sce = Entropy(mode="scalar")
+    assert [[pe(diag_ex)]] == sce.fit_transform([diag_ex])
+    sce = Entropy(mode="vector", resolution=4, normalized=False, keep_endpoints=True)
+    pef = [-1/4*np.log(1/4)-1/4*np.log(1/4)-1/2*np.log(1/2),
+           -1/4*np.log(1/4)-1/4*np.log(1/4)-1/2*np.log(1/2),
+           -1/2*np.log(1/2), 
+           0.0]
+    assert all(([pef] == sce.fit_transform([diag_ex]))[0])
+    sce = Entropy(mode="vector", resolution=4, normalized=True)
+    pefN = (sce.fit_transform([diag_ex]))[0]
+    area = np.linalg.norm(pefN, ord=1)
+    assert area==pytest.approx(1)
+        
+def test_kernel_empty_diagrams():
+    empty_diag = np.empty(shape = [0, 2])
+    assert SlicedWassersteinDistance(num_directions=100)(empty_diag, empty_diag) == 0.
+    assert SlicedWassersteinKernel(num_directions=100, bandwidth=1.)(empty_diag, empty_diag) == 1.
+    assert WassersteinDistance(mode="hera", delta=0.0001)(empty_diag, empty_diag) == 0.
+    assert WassersteinDistance(mode="pot")(empty_diag, empty_diag) == 0.
+    assert BottleneckDistance(epsilon=.001)(empty_diag, empty_diag) == 0.
+    assert BottleneckDistance()(empty_diag, empty_diag) == 0.
+#    PersistenceWeightedGaussianKernel(bandwidth=1., kernel_approx=None, weight=arctan(1.,1.))(empty_diag, empty_diag)
+#    PersistenceWeightedGaussianKernel(kernel_approx=RBFSampler(gamma=1./2, n_components=100000).fit(np.ones([1,2])), weight=arctan(1.,1.))(empty_diag, empty_diag)
+#    PersistenceScaleSpaceKernel(bandwidth=1.)(empty_diag, empty_diag)
+#    PersistenceScaleSpaceKernel(kernel_approx=RBFSampler(gamma=1./2, n_components=100000).fit(np.ones([1,2])))(empty_diag, empty_diag)
+#    PersistenceFisherKernel(bandwidth_fisher=1., bandwidth=1.)(empty_diag, empty_diag)
+#    PersistenceFisherKernel(bandwidth_fisher=1., bandwidth=1., kernel_approx=RBFSampler(gamma=1./2, n_components=100000).fit(np.ones([1,2])))(empty_diag, empty_diag)
+
+
+def test_silhouette_permutation_invariance():
+    dgm = _n_diags(1)[0]
+    dgm_permuted = dgm[np.random.permutation(dgm.shape[0]).astype(int)]
+    random_resolution = random.randint(50, 100) * 10
+    slt = Silhouette(resolution=random_resolution, weight=pow(2))
+
+    assert np.all(np.isclose(slt(dgm), slt(dgm_permuted)))
+
+
+def test_silhouette_multiplication_invariance():
+    dgm = _n_diags(1)[0]
+    n_repetitions = np.random.randint(2, high=10)
+    dgm_augmented = np.repeat(dgm, repeats=n_repetitions, axis=0)
+
+    random_resolution = random.randint(50, 100) * 10
+    slt = Silhouette(resolution=random_resolution, weight=pow(2))
+    assert np.all(np.isclose(slt(dgm), slt(dgm_augmented)))
+
+
+def test_silhouette_numeric():
+    dgm = np.array([[2., 3.], [5., 6.]])
+    slt = Silhouette(resolution=9, weight=pow(1), sample_range=[2., 6.])
+    #slt.fit([dgm])
+    # x_values = array([2., 2.5, 3., 3.5, 4., 4.5, 5., 5.5, 6.])
+
+    expected_silhouette = np.array([0., 0.5, 0., 0., 0., 0., 0., 0.5, 0.])/np.sqrt(2)
+    output_silhouette = slt(dgm)
+    assert np.all(np.isclose(output_silhouette, expected_silhouette))
+
+
+def test_landscape_small_persistence_invariance():
+    dgm = np.array([[2., 6.], [2., 5.], [3., 7.]])
+    small_persistence_pts = np.random.rand(10, 2)
+    small_persistence_pts[:, 1] += small_persistence_pts[:, 0]
+    small_persistence_pts += np.min(dgm)
+    dgm_augmented = np.concatenate([dgm, small_persistence_pts], axis=0)
+
+    lds = Landscape(num_landscapes=2, resolution=5)
+    lds_dgm, lds_dgm_augmented = lds(dgm), lds(dgm_augmented)
+
+    assert np.all(np.isclose(lds_dgm, lds_dgm_augmented))
+
+
+def test_landscape_numeric():
+    dgm = np.array([[2., 6.], [3., 5.]])
+    lds_ref = np.array([
+        0., 0.5, 1., 1.5, 2., 1.5, 1., 0.5, 0.,  # tent of [2, 6]
+        0., 0., 0., 0.5, 1., 0.5, 0., 0., 0.,
+        0., 0., 0., 0., 0., 0., 0., 0., 0.,
+        0., 0., 0., 0., 0., 0., 0., 0., 0.,
+    ])
+    lds_ref *= np.sqrt(2)
+    lds = Landscape(num_landscapes=4, resolution=9, sample_range=[2., 6.])
+    lds_dgm = lds(dgm)
+    assert np.all(np.isclose(lds_dgm, lds_ref))
+
+
+def test_landscape_nan_range():
+    dgm = np.array([[2., 6.], [3., 5.]])
+    lds = Landscape(num_landscapes=2, resolution=9, sample_range=[np.nan, 6.])
+    lds_dgm = lds(dgm)
+    assert (lds.sample_range_fixed[0] == 2) & (lds.sample_range_fixed[1] == 6)
+    assert lds.new_resolution == 10
+
+def test_endpoints():
+    diags = [ np.array([[2., 3.]]) ]
+    for vec in [ Landscape(), Silhouette(), BettiCurve(), Entropy(mode="vector") ]:
+        vec.fit(diags)
+        assert vec.grid_[0] > 2 and vec.grid_[-1] < 3
+    for vec in [ Landscape(keep_endpoints=True), Silhouette(keep_endpoints=True), BettiCurve(keep_endpoints=True), Entropy(mode="vector", keep_endpoints=True)]:
+        vec.fit(diags)
+        assert vec.grid_[0] == 2 and vec.grid_[-1] == 3
+    vec = BettiCurve(resolution=None)
+    vec.fit(diags)
+    assert np.equal(vec.grid_, [-np.inf, 2., 3.]).all()
+
+def test_get_params():
+    for vec in [ Landscape(), Silhouette(), BettiCurve(), Entropy(mode="vector") ]:
+        vec.get_params()