diff options
author | Vincent Rouvreau <10407034+VincentRouvreau@users.noreply.github.com> | 2022-08-09 14:46:07 +0200 |
---|---|---|
committer | GitHub <noreply@github.com> | 2022-08-09 14:46:07 +0200 |
commit | 61dca80e02d94559934a1ecff8f0923fbcc3436c (patch) | |
tree | 212c59f34931c8ebbf0c289100633efa8f32274d /src | |
parent | 48d759239026f08b326edf5ba8c46f9547dcdd02 (diff) | |
parent | 3e703a44f81623049dd481537e1e2e3457d32943 (diff) |
Merge pull request #584 from Soriano-Trigueros/master
A bug in the Entropy function has been solved
Diffstat (limited to 'src')
-rw-r--r-- | src/python/gudhi/representations/vector_methods.py | 18 | ||||
-rwxr-xr-x | src/python/test/test_representations.py | 21 |
2 files changed, 26 insertions, 13 deletions
diff --git a/src/python/gudhi/representations/vector_methods.py b/src/python/gudhi/representations/vector_methods.py index f8078d03..69ff5e1e 100644 --- a/src/python/gudhi/representations/vector_methods.py +++ b/src/python/gudhi/representations/vector_methods.py @@ -508,26 +508,20 @@ class Entropy(BaseEstimator, TransformerMixin): new_X = BirthPersistenceTransform().fit_transform(X) for i in range(num_diag): - orig_diagram, diagram, num_pts_in_diag = X[i], new_X[i], X[i].shape[0] - try: - new_diagram = DiagramScaler(use=True, scalers=[([1], MaxAbsScaler())]).fit_transform([diagram])[0] - except ValueError: - # Empty persistence diagram case - https://github.com/GUDHI/gudhi-devel/issues/507 - assert len(diagram) == 0 - new_diagram = np.empty(shape = [0, 2]) - + orig_diagram, new_diagram, num_pts_in_diag = X[i], new_X[i], X[i].shape[0] + + p = new_diagram[:,1] + p = p/np.sum(p) if self.mode == "scalar": - ent = - np.sum( np.multiply(new_diagram[:,1], np.log(new_diagram[:,1])) ) + ent = -np.dot(p, np.log(p)) Xfit.append(np.array([[ent]])) - else: ent = np.zeros(self.resolution) for j in range(num_pts_in_diag): [px,py] = orig_diagram[j,:2] min_idx = np.clip(np.ceil((px - self.sample_range[0]) / step_x).astype(int), 0, self.resolution) max_idx = np.clip(np.ceil((py - self.sample_range[0]) / step_x).astype(int), 0, self.resolution) - for k in range(min_idx, max_idx): - ent[k] += (-1) * new_diagram[j,1] * np.log(new_diagram[j,1]) + ent[min_idx:max_idx]-=p[j]*np.log(p[j]) if self.normalized: ent = ent / np.linalg.norm(ent, ord=1) Xfit.append(np.reshape(ent,[1,-1])) diff --git a/src/python/test/test_representations.py b/src/python/test/test_representations.py index d219ce7a..4a455bb6 100755 --- a/src/python/test/test_representations.py +++ b/src/python/test/test_representations.py @@ -152,7 +152,26 @@ def test_vectorization_empty_diagrams(): scv = Entropy(mode="vector", normalized=False, resolution=random_resolution)(empty_diag) assert not np.any(scv) assert scv.shape[0] == random_resolution - + +def test_entropy_miscalculation(): + diag_ex = np.array([[0.0,1.0], [0.0,1.0], [0.0,2.0]]) + def pe(pd): + l = pd[:,1] - pd[:,0] + l = l/sum(l) + return -np.dot(l, np.log(l)) + sce = Entropy(mode="scalar") + assert [[pe(diag_ex)]] == sce.fit_transform([diag_ex]) + sce = Entropy(mode="vector", resolution=4, normalized=False) + pef = [-1/4*np.log(1/4)-1/4*np.log(1/4)-1/2*np.log(1/2), + -1/4*np.log(1/4)-1/4*np.log(1/4)-1/2*np.log(1/2), + -1/2*np.log(1/2), + 0.0] + assert all(([pef] == sce.fit_transform([diag_ex]))[0]) + sce = Entropy(mode="vector", resolution=4, normalized=True) + pefN = (sce.fit_transform([diag_ex]))[0] + area = np.linalg.norm(pefN, ord=1) + assert area==1 + def test_kernel_empty_diagrams(): empty_diag = np.empty(shape = [0, 2]) assert SlicedWassersteinDistance(num_directions=100)(empty_diag, empty_diag) == 0. |