diff options
author | RĂ©mi Flamary <remi.flamary@gmail.com> | 2018-07-18 11:34:37 +0200 |
---|---|---|
committer | GitHub <noreply@github.com> | 2018-07-18 11:34:37 +0200 |
commit | 5cd6c0aae23a36fe27c188cdd18c7f0fba8a0360 (patch) | |
tree | d7b0968d9d50f0e40d225aeddc85b10d8e6c4cca | |
parent | 7c5c8803b2bdb67545783db3321b9d5a81a063d6 (diff) | |
parent | 0764e356325df7e18f72c0ff468bfa8f8ee35059 (diff) |
Merge pull request #57 from LeoGautheron/master
Speed-up Sinkhorn
-rw-r--r-- | ot/bregman.py | 15 | ||||
-rw-r--r-- | ot/utils.py | 31 |
2 files changed, 42 insertions, 4 deletions
diff --git a/ot/bregman.py b/ot/bregman.py index b017c1a..c8e69ce 100644 --- a/ot/bregman.py +++ b/ot/bregman.py @@ -344,8 +344,14 @@ def sinkhorn_knopp(a, b, M, reg, numItermax=1000, # print(reg) - K = np.exp(-M / reg) + # Next 3 lines equivalent to K= np.exp(-M/reg), but faster to compute + K = np.empty(M.shape, dtype=M.dtype) + np.divide(M, -reg, out=K) + np.exp(K, out=K) + # print(np.min(K)) + tmp = np.empty(K.shape, dtype=M.dtype) + tmp2 = np.empty(b.shape, dtype=M.dtype) Kp = (1 / a).reshape(-1, 1) * K cpt = 0 @@ -373,8 +379,11 @@ def sinkhorn_knopp(a, b, M, reg, numItermax=1000, err = np.sum((u - uprev)**2) / np.sum((u)**2) + \ np.sum((v - vprev)**2) / np.sum((v)**2) else: - transp = u.reshape(-1, 1) * (K * v) - err = np.linalg.norm((np.sum(transp, axis=0) - b))**2 + np.multiply(u.reshape(-1, 1), K, out=tmp) + np.multiply(tmp, v.reshape(1, -1), out=tmp) + np.sum(tmp, axis=0, out=tmp2) + tmp2 -= b + err = np.linalg.norm(tmp2)**2 if log: log['err'].append(err) diff --git a/ot/utils.py b/ot/utils.py index 7dac283..bb21b38 100644 --- a/ot/utils.py +++ b/ot/utils.py @@ -77,6 +77,34 @@ def clean_zeros(a, b, M): return a2, b2, M2 +def euclidean_distances(X, Y, squared=False): + """ + Considering the rows of X (and Y=X) as vectors, compute the + distance matrix between each pair of vectors. + Parameters + ---------- + X : {array-like}, shape (n_samples_1, n_features) + Y : {array-like}, shape (n_samples_2, n_features) + squared : boolean, optional + Return squared Euclidean distances. + Returns + ------- + distances : {array}, shape (n_samples_1, n_samples_2) + """ + XX = np.einsum('ij,ij->i', X, X)[:, np.newaxis] + YY = np.einsum('ij,ij->i', Y, Y)[np.newaxis, :] + distances = np.dot(X, Y.T) + distances *= -2 + distances += XX + distances += YY + np.maximum(distances, 0, out=distances) + if X is Y: + # Ensure that distances between vectors and themselves are set to 0.0. + # This may not be the case due to floating point rounding errors. + distances.flat[::distances.shape[0] + 1] = 0.0 + return distances if squared else np.sqrt(distances, out=distances) + + def dist(x1, x2=None, metric='sqeuclidean'): """Compute distance between samples in x1 and x2 using function scipy.spatial.distance.cdist @@ -104,7 +132,8 @@ def dist(x1, x2=None, metric='sqeuclidean'): """ if x2 is None: x2 = x1 - + if metric == "sqeuclidean": + return euclidean_distances(x1, x2, squared=True) return cdist(x1, x2, metric=metric) |