summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorRĂ©mi Flamary <remi.flamary@gmail.com>2018-07-18 11:34:37 +0200
committerGitHub <noreply@github.com>2018-07-18 11:34:37 +0200
commit5cd6c0aae23a36fe27c188cdd18c7f0fba8a0360 (patch)
treed7b0968d9d50f0e40d225aeddc85b10d8e6c4cca
parent7c5c8803b2bdb67545783db3321b9d5a81a063d6 (diff)
parent0764e356325df7e18f72c0ff468bfa8f8ee35059 (diff)
Merge pull request #57 from LeoGautheron/master
Speed-up Sinkhorn
-rw-r--r--ot/bregman.py15
-rw-r--r--ot/utils.py31
2 files changed, 42 insertions, 4 deletions
diff --git a/ot/bregman.py b/ot/bregman.py
index b017c1a..c8e69ce 100644
--- a/ot/bregman.py
+++ b/ot/bregman.py
@@ -344,8 +344,14 @@ def sinkhorn_knopp(a, b, M, reg, numItermax=1000,
# print(reg)
- K = np.exp(-M / reg)
+ # Next 3 lines equivalent to K= np.exp(-M/reg), but faster to compute
+ K = np.empty(M.shape, dtype=M.dtype)
+ np.divide(M, -reg, out=K)
+ np.exp(K, out=K)
+
# print(np.min(K))
+ tmp = np.empty(K.shape, dtype=M.dtype)
+ tmp2 = np.empty(b.shape, dtype=M.dtype)
Kp = (1 / a).reshape(-1, 1) * K
cpt = 0
@@ -373,8 +379,11 @@ def sinkhorn_knopp(a, b, M, reg, numItermax=1000,
err = np.sum((u - uprev)**2) / np.sum((u)**2) + \
np.sum((v - vprev)**2) / np.sum((v)**2)
else:
- transp = u.reshape(-1, 1) * (K * v)
- err = np.linalg.norm((np.sum(transp, axis=0) - b))**2
+ np.multiply(u.reshape(-1, 1), K, out=tmp)
+ np.multiply(tmp, v.reshape(1, -1), out=tmp)
+ np.sum(tmp, axis=0, out=tmp2)
+ tmp2 -= b
+ err = np.linalg.norm(tmp2)**2
if log:
log['err'].append(err)
diff --git a/ot/utils.py b/ot/utils.py
index 7dac283..bb21b38 100644
--- a/ot/utils.py
+++ b/ot/utils.py
@@ -77,6 +77,34 @@ def clean_zeros(a, b, M):
return a2, b2, M2
+def euclidean_distances(X, Y, squared=False):
+ """
+ Considering the rows of X (and Y=X) as vectors, compute the
+ distance matrix between each pair of vectors.
+ Parameters
+ ----------
+ X : {array-like}, shape (n_samples_1, n_features)
+ Y : {array-like}, shape (n_samples_2, n_features)
+ squared : boolean, optional
+ Return squared Euclidean distances.
+ Returns
+ -------
+ distances : {array}, shape (n_samples_1, n_samples_2)
+ """
+ XX = np.einsum('ij,ij->i', X, X)[:, np.newaxis]
+ YY = np.einsum('ij,ij->i', Y, Y)[np.newaxis, :]
+ distances = np.dot(X, Y.T)
+ distances *= -2
+ distances += XX
+ distances += YY
+ np.maximum(distances, 0, out=distances)
+ if X is Y:
+ # Ensure that distances between vectors and themselves are set to 0.0.
+ # This may not be the case due to floating point rounding errors.
+ distances.flat[::distances.shape[0] + 1] = 0.0
+ return distances if squared else np.sqrt(distances, out=distances)
+
+
def dist(x1, x2=None, metric='sqeuclidean'):
"""Compute distance between samples in x1 and x2 using function scipy.spatial.distance.cdist
@@ -104,7 +132,8 @@ def dist(x1, x2=None, metric='sqeuclidean'):
"""
if x2 is None:
x2 = x1
-
+ if metric == "sqeuclidean":
+ return euclidean_distances(x1, x2, squared=True)
return cdist(x1, x2, metric=metric)