sinkhorn GPU implementation

author: Leo gautheron <gautheron@iv-cm-359.creatis.insa-lyon.fr> 2017-04-20 12:12:15 +0200
committer: Leo gautheron <gautheron@iv-cm-359.creatis.insa-lyon.fr> 2017-04-20 12:12:15 +0200
commit: 16f51f971607efab2c73958d207c582b389406c8 (patch)
tree: 299a4f6f13faf8545d2144767e9a7791098aacf8 /ot/gpu/bregman.py
parent: 48ec27d8e1c2599bd6d9015d15f4204b8116af28 (diff)
1 files changed, 83 insertions, 0 deletions
diff --git a/ot/gpu/bregman.py b/ot/gpu/bregman.py
new file mode 100644
index 0000000..3fdf11b
--- /dev/null
+++ b/ot/gpu/bregman.py
@@ -0,0 +1,83 @@
+# -*- coding: utf-8 -*-
+"""
+Bregman projections for regularized OT with GPU
+"""
+
+import numpy as np
+
+
+def sinkhornGPU(a, b, M_GPU, reg, numItermax=1000, stopThr=1e-9, verbose=False,
+                log=False, cudamat=None):
+    # init data
+    Nini = len(a)
+    Nfin = len(b)
+
+    if log:
+        log = {'err': []}
+
+    # we assume that no distances are null except those of the diagonal of
+    # distances
+    u = (np.ones(Nini)/Nini).reshape((Nini, 1))
+    u_GPU = cudamat.CUDAMatrix(u)
+    a_GPU = cudamat.CUDAMatrix(a.reshape((Nini, 1)))
+    ones_GPU = cudamat.empty(u_GPU.shape).assign(1)
+    v = (np.ones(Nfin)/Nfin).reshape((Nfin, 1))
+    v_GPU = cudamat.CUDAMatrix(v)
+    b_GPU = cudamat.CUDAMatrix(b.reshape((Nfin, 1)))
+
+    M_GPU.divide(-reg)
+
+    K_GPU = cudamat.exp(M_GPU)
+
+    ones_GPU.divide(a_GPU, target=a_GPU)
+    Kp_GPU = cudamat.empty(K_GPU.shape)
+    K_GPU.mult_by_col(a_GPU, target=Kp_GPU)
+
+    tmp_GPU = cudamat.empty(K_GPU.shape)
+
+    cpt = 0
+    err = 1
+    while (err > stopThr and cpt < numItermax):
+        uprev_GPU = u_GPU.copy()
+        vprev_GPU = v_GPU.copy()
+
+        KtransposeU_GPU = K_GPU.transpose().dot(u_GPU)
+        b_GPU.divide(KtransposeU_GPU, target=v_GPU)
+        ones_GPU.divide(Kp_GPU.dot(v_GPU), target=u_GPU)
+
+        if (np.any(KtransposeU_GPU.asarray() == 0) or
+           not u_GPU.allfinite() or not v_GPU.allfinite()):
+            # we have reached the machine precision
+            # come back to previous solution and quit loop
+            print('Warning: numerical errors at iteration', cpt)
+            u_GPU = uprev_GPU.copy()
+            v_GPU = vprev_GPU.copy()
+            break
+        if cpt % 10 == 0:
+            # we can speed up the process by checking for the error only all
+            # the 10th iterations
+            K_GPU.mult_by_col(u_GPU, target=tmp_GPU)
+            tmp_GPU.mult_by_row(v_GPU.transpose(), target=tmp_GPU)
+
+            bcopy_GPU = b_GPU.copy().transpose()
+            bcopy_GPU.add_sums(tmp_GPU, axis=0, beta=-1)
+            err = bcopy_GPU.euclid_norm()**2
+            if log:
+                log['err'].append(err)
+
+            if verbose:
+                if cpt % 200 == 0:
+                    print('{:5s}|{:12s}'.format('It.', 'Err')+'\n'+'-'*19)
+                print('{:5d}|{:8e}|'.format(cpt, err))
+        cpt += 1
+    if log:
+        log['u'] = u_GPU.asarray()
+        log['v'] = v_GPU.asarray()
+
+    # print('err=',err,' cpt=',cpt)
+    K_GPU.mult_by_col(u_GPU, target=K_GPU)
+    K_GPU.mult_by_row(v_GPU.transpose(), target=K_GPU)
+    if log:
+        return K_GPU.asarray(), log
+    else:
+        return K_GPU.asarray()
author	Leo gautheron <gautheron@iv-cm-359.creatis.insa-lyon.fr>	2017-04-20 12:12:15 +0200
committer	Leo gautheron <gautheron@iv-cm-359.creatis.insa-lyon.fr>	2017-04-20 12:12:15 +0200
commit	16f51f971607efab2c73958d207c582b389406c8 (patch)
tree	299a4f6f13faf8545d2144767e9a7791098aacf8 /ot/gpu/bregman.py
parent	48ec27d8e1c2599bd6d9015d15f4204b8116af28 (diff)