ot/gpu/da.py


1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174

# -*- coding: utf-8 -*-
"""
Domain adaptation with optimal transport and GPU
"""

import numpy as np
from ..utils import unif
from ..da import OTDA
from .bregman import sinkhorn
import cudamat


def pairwiseEuclideanGPU(a, b, returnAsGPU=False, squared=False):
    """
    Compute the pairwise euclidean distance between matrices a and b.


    Parameters
    ----------
    a : np.ndarray (n, f)
        first matrice
    b : np.ndarray (m, f)
        second matrice
    returnAsGPU : boolean, optional (default False)
        if True, returns cudamat matrix still on GPU, else return np.ndarray
    squared : boolean, optional (default False)
        if True, return squared euclidean distance matrice


    Returns
    -------
    c : (n x m) np.ndarray or cudamat.CUDAMatrix
        pairwise euclidean distance distance matrix
    """
    # a is shape (n, f) and b shape (m, f). Return matrix c of shape (n, m).
    # First compute in c_GPU the squared euclidean distance. And return its
    # square root. At each cell [i,j] of c, we want to have
    # sum{k in range(f)} ( (a[i,k] - b[j,k])^2 ). We know that
    # (a-b)^2 = a^2 -2ab +b^2. Thus we want to have in each cell of c:
    # sum{k in range(f)} ( a[i,k]^2 -2a[i,k]b[j,k] +b[j,k]^2).

    a_GPU = cudamat.CUDAMatrix(a)
    b_GPU = cudamat.CUDAMatrix(b)

    # Multiply a by b transpose to obtain in each cell [i,j] of c the
    # value sum{k in range(f)} ( a[i,k]b[j,k] )
    c_GPU = cudamat.dot(a_GPU, b_GPU.transpose())
    # multiply by -2 to have sum{k in range(f)} ( -2a[i,k]b[j,k] )
    c_GPU.mult(-2)

    # Compute the vectors of the sum of squared elements.
    a_GPU = cudamat.pow(a_GPU, 2).sum(axis=1)
    b_GPU = cudamat.pow(b_GPU, 2).sum(axis=1)

    # Add the vectors in each columns (respectivly rows) of c.
    # sum{k in range(f)} ( a[i,k]^2 -2a[i,k]b[j,k] )
    c_GPU.add_col_vec(a_GPU)
    # sum{k in range(f)} ( a[i,k]^2 -2a[i,k]b[j,k] +b[j,k]^2)
    c_GPU.add_row_vec(b_GPU.transpose())

    if not squared:
        c_GPU = cudamat.sqrt(c_GPU)

    if returnAsGPU:
        return c_GPU
    else:
        return c_GPU.asarray()


def sinkhorn_lpl1_mm(a, labels_a, b, M_GPU, reg, eta=0.1, numItermax=10,
                     numInnerItermax=200, stopInnerThr=1e-9,
                     verbose=False, log=False):
    p = 0.5
    epsilon = 1e-3
    Nfin = len(b)

    indices_labels = []
    classes = np.unique(labels_a)
    for c in classes:
        idxc, = np.where(labels_a == c)
        indices_labels.append(cudamat.CUDAMatrix(idxc.reshape(1, -1)))

    Mreg_GPU = cudamat.empty(M_GPU.shape)
    W_GPU = cudamat.empty(M_GPU.shape).assign(0)

    for cpt in range(numItermax):
        Mreg_GPU.assign(M_GPU)
        Mreg_GPU.add_mult(W_GPU, eta)
        transp_GPU = sinkhorn(a, b, Mreg_GPU, reg, numItermax=numInnerItermax,
                              stopThr=stopInnerThr, returnAsGPU=True)
        # the transport has been computed. Check if classes are really
        # separated
        W_GPU.assign(1)
        W_GPU = W_GPU.transpose()
        for (i, c) in enumerate(classes):
            (_, nbRow) = indices_labels[i].shape
            tmpC_GPU = cudamat.empty((Nfin, nbRow)).assign(0)
            transp_GPU.transpose().select_columns(indices_labels[i], tmpC_GPU)
            majs_GPU = tmpC_GPU.sum(axis=1).add(epsilon)
            cudamat.pow(majs_GPU, (p-1))
            majs_GPU.mult(p)

            tmpC_GPU.assign(0)
            tmpC_GPU.add_col_vec(majs_GPU)
            W_GPU.set_selected_columns(indices_labels[i], tmpC_GPU)

        W_GPU = W_GPU.transpose()

    return transp_GPU.asarray()


class OTDA_GPU(OTDA):
    def normalizeM(self, norm):
        if norm == "median":
            self.M_GPU.divide(float(np.median(self.M_GPU.asarray())))
        elif norm == "max":
            self.M_GPU.divide(float(np.max(self.M_GPU.asarray())))
        elif norm == "log":
            self.M_GPU.add(1)
            cudamat.log(self.M_GPU)
        elif norm == "loglog":
            self.M_GPU.add(1)
            cudamat.log(self.M_GPU)
            self.M_GPU.add(1)
            cudamat.log(self.M_GPU)


class OTDA_sinkhorn(OTDA_GPU):
    def fit(self, xs, xt, reg=1, ws=None, wt=None, norm=None, **kwargs):
        cudamat.init()
        xs = np.asarray(xs, dtype=np.float64)
        xt = np.asarray(xt, dtype=np.float64)

        self.xs = xs
        self.xt = xt

        if wt is None:
            wt = unif(xt.shape[0])
        if ws is None:
            ws = unif(xs.shape[0])

        self.ws = ws
        self.wt = wt

        self.M_GPU = pairwiseEuclideanGPU(xs, xt, returnAsGPU=True,
                                          squared=True)
        self.normalizeM(norm)
        self.G = sinkhorn(ws, wt, self.M_GPU, reg, **kwargs)
        self.computed = True


class OTDA_lpl1(OTDA_GPU):
    def fit(self, xs, ys, xt, reg=1, eta=1, ws=None, wt=None, norm=None,
            **kwargs):
        cudamat.init()
        xs = np.asarray(xs, dtype=np.float64)
        xt = np.asarray(xt, dtype=np.float64)

        self.xs = xs
        self.xt = xt

        if wt is None:
            wt = unif(xt.shape[0])
        if ws is None:
            ws = unif(xs.shape[0])

        self.ws = ws
        self.wt = wt

        self.M_GPU = pairwiseEuclideanGPU(xs, xt, returnAsGPU=True,
                                          squared=True)
        self.normalizeM(norm)
        self.G = sinkhorn_lpl1_mm(ws, ys, wt, self.M_GPU, reg, eta, **kwargs)
        self.computed = True