From 95db977e8b931277af5dadbd79eccbd5fbb8bb62 Mon Sep 17 00:00:00 2001 From: Alexandre Gramfort Date: Wed, 12 Jul 2017 23:56:27 +0200 Subject: pep8 --- test/test_emd_multi.py | 27 +++++++++++++-------------- test/test_gpu_sinkhorn.py | 6 ++++-- test/test_load_module.py | 4 ++-- 3 files changed, 19 insertions(+), 18 deletions(-) (limited to 'test') diff --git a/test/test_emd_multi.py b/test/test_emd_multi.py index ee0a20e..99173e9 100644 --- a/test/test_emd_multi.py +++ b/test/test_emd_multi.py @@ -7,31 +7,30 @@ Created on Fri Mar 10 09:56:06 2017 """ import numpy as np -import pylab as pl -import ot +import ot from ot.datasets import get_1D_gauss as gauss -reload(ot.lp) +# reload(ot.lp) #%% parameters -n=5000 # nb bins +n = 5000 # nb bins # bin positions -x=np.arange(n,dtype=np.float64) +x = np.arange(n, dtype=np.float64) # Gaussian distributions -a=gauss(n,m=20,s=5) # m= mean, s= std +a = gauss(n, m=20, s=5) # m= mean, s= std -ls= range(20,1000,10) -nb=len(ls) -b=np.zeros((n,nb)) +ls = range(20, 1000, 10) +nb = len(ls) +b = np.zeros((n, nb)) for i in range(nb): - b[:,i]=gauss(n,m=ls[i],s=10) + b[:, i] = gauss(n, m=ls[i], s=10) # loss matrix -M=ot.dist(x.reshape((n,1)),x.reshape((n,1))) -#M/=M.max() +M = ot.dist(x.reshape((n, 1)), x.reshape((n, 1))) +# M/=M.max() #%% @@ -39,10 +38,10 @@ print('Computing {} EMD '.format(nb)) # emd loss 1 proc ot.tic() -emd_loss4=ot.emd2(a,b,M,1) +emd_loss4 = ot.emd2(a, b, M, 1) ot.toc('1 proc : {} s') # emd loss multipro proc ot.tic() -emd_loss4=ot.emd2(a,b,M) +emd_loss4 = ot.emd2(a, b, M) ot.toc('multi proc : {} s') diff --git a/test/test_gpu_sinkhorn.py b/test/test_gpu_sinkhorn.py index bfa2cd2..841f062 100644 --- a/test/test_gpu_sinkhorn.py +++ b/test/test_gpu_sinkhorn.py @@ -3,8 +3,10 @@ import numpy as np import time import ot.gpu + def describeRes(r): - print("min:{:.3E}, max::{:.3E}, mean::{:.3E}, std::{:.3E}".format(np.min(r),np.max(r),np.mean(r),np.std(r))) + print("min:{:.3E}, max::{:.3E}, mean::{:.3E}, std::{:.3E}".format( + np.min(r), np.max(r), np.mean(r), np.std(r))) for n in [5000, 10000, 15000, 20000]: @@ -23,4 +25,4 @@ for n in [5000, 10000, 15000, 20000]: print("Normal sinkhorn, time: {:6.2f} sec ".format(time2 - time1)) describeRes(G1) print(" GPU sinkhorn, time: {:6.2f} sec ".format(time3 - time2)) - describeRes(G2) \ No newline at end of file + describeRes(G2) diff --git a/test/test_load_module.py b/test/test_load_module.py index a04c5df..d77261e 100644 --- a/test/test_load_module.py +++ b/test/test_load_module.py @@ -4,7 +4,7 @@ import ot import doctest # test lp solver -doctest.testmod(ot.lp,verbose=True) +doctest.testmod(ot.lp, verbose=True) # test bregman solver -doctest.testmod(ot.bregman,verbose=True) +doctest.testmod(ot.bregman, verbose=True) -- cgit v1.2.3 From cd9842dc2978cba757a51c32cce0272858c9a385 Mon Sep 17 00:00:00 2001 From: Alexandre Gramfort Date: Thu, 13 Jul 2017 00:04:49 +0200 Subject: more --- .travis.yml | 2 +- test/test_emd_multi.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) (limited to 'test') diff --git a/.travis.yml b/.travis.yml index 050510b..8a95d7c 100644 --- a/.travis.yml +++ b/.travis.yml @@ -22,4 +22,4 @@ install: script: - python test/test_load_module.py -v - flake8 examples/ ot/ test/ - - py.test ot test + # - py.test ot test diff --git a/test/test_emd_multi.py b/test/test_emd_multi.py index 99173e9..2eef242 100644 --- a/test/test_emd_multi.py +++ b/test/test_emd_multi.py @@ -22,7 +22,7 @@ x = np.arange(n, dtype=np.float64) # Gaussian distributions a = gauss(n, m=20, s=5) # m= mean, s= std -ls = range(20, 1000, 10) +ls = np.arange(20, 1000, 10) nb = len(ls) b = np.zeros((n, nb)) for i in range(nb): -- cgit v1.2.3 From 6ada23e5a672b08f28e21123c4135bc787e83b19 Mon Sep 17 00:00:00 2001 From: Alexandre Gramfort Date: Thu, 20 Jul 2017 15:39:50 +0200 Subject: pep8 --- examples/plot_OTDA_color_images.py | 2 ++ examples/plot_OTDA_mapping_color_images.py | 2 ++ examples/plot_optim_OTreg.py | 3 +++ ot/utils.py | 6 ++++-- test/test_gpu_sinkhorn_lpl1.py | 1 + 5 files changed, 12 insertions(+), 2 deletions(-) (limited to 'test') diff --git a/examples/plot_OTDA_color_images.py b/examples/plot_OTDA_color_images.py index a8861c6..75ac5b6 100644 --- a/examples/plot_OTDA_color_images.py +++ b/examples/plot_OTDA_color_images.py @@ -48,6 +48,7 @@ def mat2im(X, shape): """Converts back a matrix to an image""" return X.reshape(shape) + X1 = im2mat(I1) X2 = im2mat(I2) @@ -102,6 +103,7 @@ X2te = da_entrop.predict(X2, -1) def minmax(I): return np.clip(I, 0, 1) + I1t = minmax(mat2im(X1t, I1.shape)) I2t = minmax(mat2im(X2t, I2.shape)) diff --git a/examples/plot_OTDA_mapping_color_images.py b/examples/plot_OTDA_mapping_color_images.py index 85c4b6b..9710461 100644 --- a/examples/plot_OTDA_mapping_color_images.py +++ b/examples/plot_OTDA_mapping_color_images.py @@ -48,6 +48,7 @@ def mat2im(X, shape): """Converts back a matrix to an image""" return X.reshape(shape) + X1 = im2mat(I1) X2 = im2mat(I2) @@ -85,6 +86,7 @@ pl.tight_layout() def minmax(I): return np.clip(I, 0, 1) + # LP problem da_emd = ot.da.OTDA() # init class da_emd.fit(xs, xt) # fit distributions diff --git a/examples/plot_optim_OTreg.py b/examples/plot_optim_OTreg.py index e38253c..276b250 100644 --- a/examples/plot_optim_OTreg.py +++ b/examples/plot_optim_OTreg.py @@ -44,6 +44,7 @@ def f(G): def df(G): return G + reg = 1e-1 Gl2 = ot.optim.cg(a, b, M, reg, f, df, verbose=True) @@ -61,6 +62,7 @@ def f(G): def df(G): return np.log(G) + 1. + reg = 1e-3 Ge = ot.optim.cg(a, b, M, reg, f, df, verbose=True) @@ -78,6 +80,7 @@ def f(G): def df(G): return G + reg1 = 1e-3 reg2 = 1e-1 diff --git a/ot/utils.py b/ot/utils.py index 6a43f61..1dee932 100644 --- a/ot/utils.py +++ b/ot/utils.py @@ -2,11 +2,13 @@ """ Various function that can be usefull """ +import multiprocessing +from functools import reduce +import time + import numpy as np from scipy.spatial.distance import cdist -import multiprocessing -import time __time_tic_toc = time.time() diff --git a/test/test_gpu_sinkhorn_lpl1.py b/test/test_gpu_sinkhorn_lpl1.py index e6cdd31..f0eb7e6 100644 --- a/test/test_gpu_sinkhorn_lpl1.py +++ b/test/test_gpu_sinkhorn_lpl1.py @@ -8,6 +8,7 @@ def describeRes(r): print("min:{:.3E}, max:{:.3E}, mean:{:.3E}, std:{:.3E}" .format(np.min(r), np.max(r), np.mean(r), np.std(r))) + for n in [5000, 10000, 15000, 20000]: print(n) a = np.random.rand(n // 4, 100) -- cgit v1.2.3 From 5a6b5de9b2f28c93bef1a9db2e3b94693c05ff4f Mon Sep 17 00:00:00 2001 From: Rémi Flamary Date: Mon, 24 Jul 2017 11:15:33 +0200 Subject: add proper testing --- .travis.yml | 2 +- Makefile | 13 ++++++---- docs/source/readme.rst | 52 +++++++++++++++++++++++++++++-------- test/test_emd_multi.py | 47 --------------------------------- test/test_gpu.py | 59 ++++++++++++++++++++++++++++++++++++++++++ test/test_gpu_sinkhorn.py | 28 -------------------- test/test_gpu_sinkhorn_lpl1.py | 29 --------------------- test/test_load_module.py | 10 ------- test/test_ot.py | 55 +++++++++++++++++++++++++++++++++++++++ 9 files changed, 164 insertions(+), 131 deletions(-) delete mode 100644 test/test_emd_multi.py create mode 100644 test/test_gpu.py delete mode 100644 test/test_gpu_sinkhorn.py delete mode 100644 test/test_gpu_sinkhorn_lpl1.py delete mode 100644 test/test_load_module.py create mode 100644 test/test_ot.py (limited to 'test') diff --git a/.travis.yml b/.travis.yml index 8a95d7c..1c3a18c 100644 --- a/.travis.yml +++ b/.travis.yml @@ -20,6 +20,6 @@ install: - python setup.py install # command to run tests + check syntax style script: - - python test/test_load_module.py -v - flake8 examples/ ot/ test/ + - python -m py.test -v # - py.test ot test diff --git a/Makefile b/Makefile index c6a83c8..ff03a63 100644 --- a/Makefile +++ b/Makefile @@ -31,22 +31,25 @@ sremove : tr '\n' '\0' < files.txt | sudo xargs -0 rm -f -- rm files.txt -clean : +clean : FORCE $(PYTHON) setup.py clean pep8 : flake8 examples/ ot/ test/ -test: - pytest +test : FORCE pep8 + python -m py.test -v -uploadpypi: +uploadpypi : #python setup.py register python setup.py sdist upload -r pypi -rdoc: +rdoc : pandoc --from=markdown --to=rst --output=docs/source/readme.rst README.md notebook : ipython notebook --matplotlib=inline --notebook-dir=notebooks/ + + +FORCE : diff --git a/docs/source/readme.rst b/docs/source/readme.rst index 611001b..c1e0017 100644 --- a/docs/source/readme.rst +++ b/docs/source/readme.rst @@ -28,8 +28,8 @@ available in the examples folder. Installation ------------ -The Library has been tested on Linux and MacOSX. It requires a C++ -compiler for using the EMD solver and rely on the following Python +The library has been tested on Linux, MacOSX and Windows. It requires a +C++ compiler for using the EMD solver and relies on the following Python modules: - Numpy (>=1.11) @@ -37,25 +37,34 @@ modules: - Cython (>=0.23) - Matplotlib (>=1.5) -Under debian based linux the dependencies can be installed with +Pip installation +^^^^^^^^^^^^^^^^ + +You can install the toolbox through PyPI with: :: - sudo apt-get install python-numpy python-scipy python-matplotlib cython + pip install POT -To install the library, you can install it locally (after downloading -it) on you machine using +or get the very latest version by downloading it and then running: :: python setup.py install --user # for user install (no root) -The toolbox is also available on PyPI with a possibly slightly older -version. You can install it with: +Anaconda installation with conda-forge +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +If you use the Anaconda python distribution, POT is available in +`conda-forge `__. To install it and the +required dependencies: :: - pip install POT + conda install -c conda-forge pot + +Post installation check +^^^^^^^^^^^^^^^^^^^^^^^ After a correct installation, you should be able to import the module without errors: @@ -109,6 +118,7 @@ Short examples # a,b are 1D histograms (sum to 1 and positive) # M is the ground cost matrix Wd=ot.emd2(a,b,M) # exact linear program + Wd_reg=ot.sinkhorn2(a,b,M,reg) # entropic regularized OT # if b is a matrix compute all distances to a and return a vector - Compute OT matrix @@ -117,8 +127,8 @@ Short examples # a,b are 1D histograms (sum to 1 and positive) # M is the ground cost matrix - Totp=ot.emd(a,b,M) # exact linear program - Totp_reg=ot.sinkhorn(a,b,M,reg) # entropic regularized OT + T=ot.emd(a,b,M) # exact linear program + T_reg=ot.sinkhorn(a,b,M,reg) # entropic regularized OT - Compute Wasserstein barycenter @@ -172,6 +182,7 @@ The contributors to this library are: - `Rémi Flamary `__ - `Nicolas Courty `__ +- `Alexandre Gramfort `__ - `Laetitia Chapel `__ - `Michael Perrot `__ (Mapping estimation) @@ -189,6 +200,25 @@ languages): - `Marco Cuturi `__ (Sinkhorn Knopp in Matlab/Cuda) +Contributions and code of conduct +--------------------------------- + +Every contribution is welcome and should respect the `contribution +guidelines `__. Each member of the project is expected +to follow the `code of conduct `__. + +Support +------- + +You can ask questions and join the development discussion: + +- On the `POT Slack channel `__ +- On the POT `mailing + list `__ + +You can also post bug reports and feature requests in Github issues. +Make sure to read our `guidelines `__ first. + References ---------- diff --git a/test/test_emd_multi.py b/test/test_emd_multi.py deleted file mode 100644 index 2eef242..0000000 --- a/test/test_emd_multi.py +++ /dev/null @@ -1,47 +0,0 @@ -#!/usr/bin/env python2 -# -*- coding: utf-8 -*- -""" -Created on Fri Mar 10 09:56:06 2017 - -@author: rflamary -""" - -import numpy as np - -import ot -from ot.datasets import get_1D_gauss as gauss -# reload(ot.lp) - -#%% parameters - -n = 5000 # nb bins - -# bin positions -x = np.arange(n, dtype=np.float64) - -# Gaussian distributions -a = gauss(n, m=20, s=5) # m= mean, s= std - -ls = np.arange(20, 1000, 10) -nb = len(ls) -b = np.zeros((n, nb)) -for i in range(nb): - b[:, i] = gauss(n, m=ls[i], s=10) - -# loss matrix -M = ot.dist(x.reshape((n, 1)), x.reshape((n, 1))) -# M/=M.max() - -#%% - -print('Computing {} EMD '.format(nb)) - -# emd loss 1 proc -ot.tic() -emd_loss4 = ot.emd2(a, b, M, 1) -ot.toc('1 proc : {} s') - -# emd loss multipro proc -ot.tic() -emd_loss4 = ot.emd2(a, b, M) -ot.toc('multi proc : {} s') diff --git a/test/test_gpu.py b/test/test_gpu.py new file mode 100644 index 0000000..312a2d4 --- /dev/null +++ b/test/test_gpu.py @@ -0,0 +1,59 @@ +import ot +import numpy as np +import time +import pytest + + +@pytest.mark.skip(reason="No way to test GPU on travis yet") +def test_gpu_sinkhorn(): + import ot.gpu + + def describeRes(r): + print("min:{:.3E}, max::{:.3E}, mean::{:.3E}, std::{:.3E}".format( + np.min(r), np.max(r), np.mean(r), np.std(r))) + + for n in [5000]: + print(n) + a = np.random.rand(n // 4, 100) + b = np.random.rand(n, 100) + time1 = time.time() + transport = ot.da.OTDA_sinkhorn() + transport.fit(a, b) + G1 = transport.G + time2 = time.time() + transport = ot.gpu.da.OTDA_sinkhorn() + transport.fit(a, b) + G2 = transport.G + time3 = time.time() + print("Normal sinkhorn, time: {:6.2f} sec ".format(time2 - time1)) + describeRes(G1) + print(" GPU sinkhorn, time: {:6.2f} sec ".format(time3 - time2)) + describeRes(G2) + + +@pytest.mark.skip(reason="No way to test GPU on travis yet") +def test_gpu_sinkhorn_lpl1(): + def describeRes(r): + print("min:{:.3E}, max:{:.3E}, mean:{:.3E}, std:{:.3E}" + .format(np.min(r), np.max(r), np.mean(r), np.std(r))) + + for n in [5000]: + print(n) + a = np.random.rand(n // 4, 100) + labels_a = np.random.randint(10, size=(n // 4)) + b = np.random.rand(n, 100) + time1 = time.time() + transport = ot.da.OTDA_lpl1() + transport.fit(a, labels_a, b) + G1 = transport.G + time2 = time.time() + transport = ot.gpu.da.OTDA_lpl1() + transport.fit(a, labels_a, b) + G2 = transport.G + time3 = time.time() + print("Normal sinkhorn lpl1, time: {:6.2f} sec ".format( + time2 - time1)) + describeRes(G1) + print(" GPU sinkhorn lpl1, time: {:6.2f} sec ".format( + time3 - time2)) + describeRes(G2) diff --git a/test/test_gpu_sinkhorn.py b/test/test_gpu_sinkhorn.py deleted file mode 100644 index 841f062..0000000 --- a/test/test_gpu_sinkhorn.py +++ /dev/null @@ -1,28 +0,0 @@ -import ot -import numpy as np -import time -import ot.gpu - - -def describeRes(r): - print("min:{:.3E}, max::{:.3E}, mean::{:.3E}, std::{:.3E}".format( - np.min(r), np.max(r), np.mean(r), np.std(r))) - - -for n in [5000, 10000, 15000, 20000]: - print(n) - a = np.random.rand(n // 4, 100) - b = np.random.rand(n, 100) - time1 = time.time() - transport = ot.da.OTDA_sinkhorn() - transport.fit(a, b) - G1 = transport.G - time2 = time.time() - transport = ot.gpu.da.OTDA_sinkhorn() - transport.fit(a, b) - G2 = transport.G - time3 = time.time() - print("Normal sinkhorn, time: {:6.2f} sec ".format(time2 - time1)) - describeRes(G1) - print(" GPU sinkhorn, time: {:6.2f} sec ".format(time3 - time2)) - describeRes(G2) diff --git a/test/test_gpu_sinkhorn_lpl1.py b/test/test_gpu_sinkhorn_lpl1.py deleted file mode 100644 index f0eb7e6..0000000 --- a/test/test_gpu_sinkhorn_lpl1.py +++ /dev/null @@ -1,29 +0,0 @@ -import ot -import numpy as np -import time -import ot.gpu - - -def describeRes(r): - print("min:{:.3E}, max:{:.3E}, mean:{:.3E}, std:{:.3E}" - .format(np.min(r), np.max(r), np.mean(r), np.std(r))) - - -for n in [5000, 10000, 15000, 20000]: - print(n) - a = np.random.rand(n // 4, 100) - labels_a = np.random.randint(10, size=(n // 4)) - b = np.random.rand(n, 100) - time1 = time.time() - transport = ot.da.OTDA_lpl1() - transport.fit(a, labels_a, b) - G1 = transport.G - time2 = time.time() - transport = ot.gpu.da.OTDA_lpl1() - transport.fit(a, labels_a, b) - G2 = transport.G - time3 = time.time() - print("Normal sinkhorn lpl1, time: {:6.2f} sec ".format(time2 - time1)) - describeRes(G1) - print(" GPU sinkhorn lpl1, time: {:6.2f} sec ".format(time3 - time2)) - describeRes(G2) diff --git a/test/test_load_module.py b/test/test_load_module.py deleted file mode 100644 index d77261e..0000000 --- a/test/test_load_module.py +++ /dev/null @@ -1,10 +0,0 @@ - - -import ot -import doctest - -# test lp solver -doctest.testmod(ot.lp, verbose=True) - -# test bregman solver -doctest.testmod(ot.bregman, verbose=True) diff --git a/test/test_ot.py b/test/test_ot.py new file mode 100644 index 0000000..51ee510 --- /dev/null +++ b/test/test_ot.py @@ -0,0 +1,55 @@ + + +import ot +import numpy as np + +#import pytest + + +def test_doctest(): + + import doctest + + # test lp solver + doctest.testmod(ot.lp, verbose=True) + + # test bregman solver + doctest.testmod(ot.bregman, verbose=True) + + +#@pytest.mark.skip(reason="Seems to be a conflict between pytest and multiprocessing") +def test_emd_multi(): + + from ot.datasets import get_1D_gauss as gauss + + n = 1000 # nb bins + + # bin positions + x = np.arange(n, dtype=np.float64) + + # Gaussian distributions + a = gauss(n, m=20, s=5) # m= mean, s= std + + ls = np.arange(20, 1000, 10) + nb = len(ls) + b = np.zeros((n, nb)) + for i in range(nb): + b[:, i] = gauss(n, m=ls[i], s=10) + + # loss matrix + M = ot.dist(x.reshape((n, 1)), x.reshape((n, 1))) + # M/=M.max() + + print('Computing {} EMD '.format(nb)) + + # emd loss 1 proc + ot.tic() + emd1 = ot.emd2(a, b, M, 1) + ot.toc('1 proc : {} s') + + # emd loss multipro proc + ot.tic() + emdn = ot.emd2(a, b, M) + ot.toc('multi proc : {} s') + + assert np.allclose(emd1, emdn) -- cgit v1.2.3 From ff104a6dde2d652283f72d7901bbe79dfb8571ed Mon Sep 17 00:00:00 2001 From: Rémi Flamary Date: Mon, 24 Jul 2017 11:54:59 +0200 Subject: add test for emd and emd2 --- test/test_ot.py | 24 ++++++++++++++++++++++-- 1 file changed, 22 insertions(+), 2 deletions(-) (limited to 'test') diff --git a/test/test_ot.py b/test/test_ot.py index 51ee510..6976818 100644 --- a/test/test_ot.py +++ b/test/test_ot.py @@ -3,7 +3,7 @@ import ot import numpy as np -#import pytest +# import pytest def test_doctest(): @@ -17,8 +17,28 @@ def test_doctest(): doctest.testmod(ot.bregman, verbose=True) +def test_emd_emd2(): + # test emd + n = 100 + + x = np.random.randn(n, 2) + u = ot.utils.unif(n) + + M = ot.dist(x, x) + + G = ot.emd(u, u, M) + + # check G is identity + assert np.allclose(G, np.eye(n) / n) + + w = ot.emd2(u, u, M) + + # check loss=0 + assert np.allclose(w, 0) + + #@pytest.mark.skip(reason="Seems to be a conflict between pytest and multiprocessing") -def test_emd_multi(): +def test_emd2_multi(): from ot.datasets import get_1D_gauss as gauss -- cgit v1.2.3 From 75492827c89a47cbc6807d4859be178d255c49bc Mon Sep 17 00:00:00 2001 From: Rémi Flamary Date: Mon, 24 Jul 2017 12:09:15 +0200 Subject: add test sinkhorn --- Makefile | 3 +++ ot/gpu/bregman.py | 2 +- test/test_ot.py | 46 +++++++++++++++++++++++++++++++++++++++++----- 3 files changed, 45 insertions(+), 6 deletions(-) (limited to 'test') diff --git a/Makefile b/Makefile index cabe6a9..577bbbe 100644 --- a/Makefile +++ b/Makefile @@ -39,6 +39,9 @@ pep8 : test : FORCE pep8 python -m py.test -v test/ + +pytest : FORCE + python -m py.test -v test/ uploadpypi : #python setup.py register diff --git a/ot/gpu/bregman.py b/ot/gpu/bregman.py index 7881c65..2302f80 100644 --- a/ot/gpu/bregman.py +++ b/ot/gpu/bregman.py @@ -9,7 +9,7 @@ import cudamat def sinkhorn(a, b, M_GPU, reg, numItermax=1000, stopThr=1e-9, verbose=False, log=False, returnAsGPU=False): - """ + r""" Solve the entropic regularization optimal transport problem on GPU The function solves the following optimization problem: diff --git a/test/test_ot.py b/test/test_ot.py index 6976818..b69d080 100644 --- a/test/test_ot.py +++ b/test/test_ot.py @@ -18,8 +18,9 @@ def test_doctest(): def test_emd_emd2(): - # test emd + # test emd and emd2 for simple identity n = 100 + np.random.seed(0) x = np.random.randn(n, 2) u = ot.utils.unif(n) @@ -35,14 +36,13 @@ def test_emd_emd2(): # check loss=0 assert np.allclose(w, 0) - - -#@pytest.mark.skip(reason="Seems to be a conflict between pytest and multiprocessing") + def test_emd2_multi(): from ot.datasets import get_1D_gauss as gauss n = 1000 # nb bins + np.random.seed(0) # bin positions x = np.arange(n, dtype=np.float64) @@ -72,4 +72,40 @@ def test_emd2_multi(): emdn = ot.emd2(a, b, M) ot.toc('multi proc : {} s') - assert np.allclose(emd1, emdn) + assert np.allclose(emd1, emdn) + + +def test_sinkhorn(): + # test sinkhorn + n = 100 + np.random.seed(0) + + x = np.random.randn(n, 2) + u = ot.utils.unif(n) + + M = ot.dist(x, x) + + G = ot.sinkhorn(u, u, M,1,stopThr=1e-10) + + # check constratints + assert np.allclose(u, G.sum(1), atol=1e-05) # cf convergence sinkhorn + assert np.allclose(u, G.sum(0), atol=1e-05) # cf convergence sinkhorn + +def test_sinkhorn_variants(): + # test sinkhorn + n = 100 + np.random.seed(0) + + x = np.random.randn(n, 2) + u = ot.utils.unif(n) + + M = ot.dist(x, x) + + G0 = ot.sinkhorn(u, u, M,1, method='sinkhorn',stopThr=1e-10) + Gs = ot.sinkhorn(u, u, M,1, method='sinkhorn_stabilized',stopThr=1e-10) + Ges = ot.sinkhorn(u, u, M,1, method='sinkhorn_epsilon_scaling',stopThr=1e-10) + + # check constratints + assert np.allclose(G0, Gs, atol=1e-05) + assert np.allclose(G0, Ges, atol=1e-05) # + -- cgit v1.2.3 From a8a0995edefd437f56b91b95c2628fb031428a08 Mon Sep 17 00:00:00 2001 From: Rémi Flamary Date: Mon, 24 Jul 2017 12:11:34 +0200 Subject: pep8 tests --- test/test_bregman.py | 43 +++++++++++++++++++++++++++++++++++++++++++ test/test_ot.py | 34 ++++++++++++++++++---------------- 2 files changed, 61 insertions(+), 16 deletions(-) create mode 100644 test/test_bregman.py (limited to 'test') diff --git a/test/test_bregman.py b/test/test_bregman.py new file mode 100644 index 0000000..fd2c972 --- /dev/null +++ b/test/test_bregman.py @@ -0,0 +1,43 @@ + + +import ot +import numpy as np + +# import pytest + + +def test_sinkhorn(): + # test sinkhorn + n = 100 + np.random.seed(0) + + x = np.random.randn(n, 2) + u = ot.utils.unif(n) + + M = ot.dist(x, x) + + G = ot.sinkhorn(u, u, M, 1, stopThr=1e-10) + + # check constratints + assert np.allclose(u, G.sum(1), atol=1e-05) # cf convergence sinkhorn + assert np.allclose(u, G.sum(0), atol=1e-05) # cf convergence sinkhorn + + +def test_sinkhorn_variants(): + # test sinkhorn + n = 100 + np.random.seed(0) + + x = np.random.randn(n, 2) + u = ot.utils.unif(n) + + M = ot.dist(x, x) + + G0 = ot.sinkhorn(u, u, M, 1, method='sinkhorn', stopThr=1e-10) + Gs = ot.sinkhorn(u, u, M, 1, method='sinkhorn_stabilized', stopThr=1e-10) + Ges = ot.sinkhorn( + u, u, M, 1, method='sinkhorn_epsilon_scaling', stopThr=1e-10) + + # check constratints + assert np.allclose(G0, Gs, atol=1e-05) + assert np.allclose(G0, Ges, atol=1e-05) diff --git a/test/test_ot.py b/test/test_ot.py index b69d080..9103ac8 100644 --- a/test/test_ot.py +++ b/test/test_ot.py @@ -36,7 +36,8 @@ def test_emd_emd2(): # check loss=0 assert np.allclose(w, 0) - + + def test_emd2_multi(): from ot.datasets import get_1D_gauss as gauss @@ -72,11 +73,11 @@ def test_emd2_multi(): emdn = ot.emd2(a, b, M) ot.toc('multi proc : {} s') - assert np.allclose(emd1, emdn) - - + assert np.allclose(emd1, emdn) + + def test_sinkhorn(): - # test sinkhorn + # test sinkhorn n = 100 np.random.seed(0) @@ -85,14 +86,15 @@ def test_sinkhorn(): M = ot.dist(x, x) - G = ot.sinkhorn(u, u, M,1,stopThr=1e-10) + G = ot.sinkhorn(u, u, M, 1, stopThr=1e-10) # check constratints - assert np.allclose(u, G.sum(1), atol=1e-05) # cf convergence sinkhorn - assert np.allclose(u, G.sum(0), atol=1e-05) # cf convergence sinkhorn - + assert np.allclose(u, G.sum(1), atol=1e-05) # cf convergence sinkhorn + assert np.allclose(u, G.sum(0), atol=1e-05) # cf convergence sinkhorn + + def test_sinkhorn_variants(): - # test sinkhorn + # test sinkhorn n = 100 np.random.seed(0) @@ -101,11 +103,11 @@ def test_sinkhorn_variants(): M = ot.dist(x, x) - G0 = ot.sinkhorn(u, u, M,1, method='sinkhorn',stopThr=1e-10) - Gs = ot.sinkhorn(u, u, M,1, method='sinkhorn_stabilized',stopThr=1e-10) - Ges = ot.sinkhorn(u, u, M,1, method='sinkhorn_epsilon_scaling',stopThr=1e-10) + G0 = ot.sinkhorn(u, u, M, 1, method='sinkhorn', stopThr=1e-10) + Gs = ot.sinkhorn(u, u, M, 1, method='sinkhorn_stabilized', stopThr=1e-10) + Ges = ot.sinkhorn( + u, u, M, 1, method='sinkhorn_epsilon_scaling', stopThr=1e-10) # check constratints - assert np.allclose(G0, Gs, atol=1e-05) - assert np.allclose(G0, Ges, atol=1e-05) # - + assert np.allclose(G0, Gs, atol=1e-05) + assert np.allclose(G0, Ges, atol=1e-05) -- cgit v1.2.3 From d9205c886219d5410bc4705b46d9f14710c81ddd Mon Sep 17 00:00:00 2001 From: Rémi Flamary Date: Mon, 24 Jul 2017 12:13:37 +0200 Subject: clean tests --- test/test_ot.py | 37 ------------------------------------- 1 file changed, 37 deletions(-) (limited to 'test') diff --git a/test/test_ot.py b/test/test_ot.py index 9103ac8..3fa1bc4 100644 --- a/test/test_ot.py +++ b/test/test_ot.py @@ -74,40 +74,3 @@ def test_emd2_multi(): ot.toc('multi proc : {} s') assert np.allclose(emd1, emdn) - - -def test_sinkhorn(): - # test sinkhorn - n = 100 - np.random.seed(0) - - x = np.random.randn(n, 2) - u = ot.utils.unif(n) - - M = ot.dist(x, x) - - G = ot.sinkhorn(u, u, M, 1, stopThr=1e-10) - - # check constratints - assert np.allclose(u, G.sum(1), atol=1e-05) # cf convergence sinkhorn - assert np.allclose(u, G.sum(0), atol=1e-05) # cf convergence sinkhorn - - -def test_sinkhorn_variants(): - # test sinkhorn - n = 100 - np.random.seed(0) - - x = np.random.randn(n, 2) - u = ot.utils.unif(n) - - M = ot.dist(x, x) - - G0 = ot.sinkhorn(u, u, M, 1, method='sinkhorn', stopThr=1e-10) - Gs = ot.sinkhorn(u, u, M, 1, method='sinkhorn_stabilized', stopThr=1e-10) - Ges = ot.sinkhorn( - u, u, M, 1, method='sinkhorn_epsilon_scaling', stopThr=1e-10) - - # check constratints - assert np.allclose(G0, Gs, atol=1e-05) - assert np.allclose(G0, Ges, atol=1e-05) -- cgit v1.2.3 From 1cf304cee298e2752ce29c83e5201f593722c3af Mon Sep 17 00:00:00 2001 From: Rémi Flamary Date: Mon, 24 Jul 2017 13:40:51 +0200 Subject: add tests for utils --- test/test_utils.py | 76 ++++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 76 insertions(+) create mode 100644 test/test_utils.py (limited to 'test') diff --git a/test/test_utils.py b/test/test_utils.py new file mode 100644 index 0000000..3219fce --- /dev/null +++ b/test/test_utils.py @@ -0,0 +1,76 @@ + + +import ot +import numpy as np + +# import pytest + + +def test_parmap(): + + n = 100 + + def f(i): + return 1.0 * i * i + + a = np.arange(n) + + l1 = map(f, a) + + l2 = ot.utils.parmap(f, a) + + assert np.allclose(l1, l2) + + +def test_tic_toc(): + + import time + + ot.tic() + time.sleep(0.5) + t = ot.toc() + t2 = ot.toq() + + # test timing + assert np.allclose(0.5, t, rtol=1e-2, atol=1e-2) + + # test toc vs toq + assert np.allclose(t, t2, rtol=1e-2, atol=1e-2) + + +def test_kernel(): + + n = 100 + + x = np.random.randn(n, 2) + + K = ot.utils.kernel(x, x) + + # gaussian kernel has ones on the diagonal + assert np.allclose(np.diag(K), np.ones(n)) + + +def test_unif(): + + n = 100 + + u = ot.unif(n) + + assert np.allclose(1, np.sum(u)) + + +def test_dist(): + + n = 100 + + x = np.random.randn(n, 2) + + D = np.zeros((n, n)) + for i in range(n): + for j in range(n): + D[i, j] = np.sum(np.square(x[i, :] - x[j, :])) + + D2 = ot.dist(x, x) + + # dist shoul return squared euclidean + assert np.allclose(D, D2) -- cgit v1.2.3 From b2f91f24796a996a82db41e91f56ba6a51989159 Mon Sep 17 00:00:00 2001 From: Rémi Flamary Date: Mon, 24 Jul 2017 14:26:25 +0200 Subject: full coveragre utils --- Makefile | 4 ++-- test/test_gpu.py | 18 ++++++++++++++---- test/test_ot.py | 4 +++- test/test_utils.py | 46 ++++++++++++++++++++++++++++++++++++++++++++++ 4 files changed, 65 insertions(+), 7 deletions(-) (limited to 'test') diff --git a/Makefile b/Makefile index 577bbbe..98f5614 100644 --- a/Makefile +++ b/Makefile @@ -38,10 +38,10 @@ pep8 : flake8 examples/ ot/ test/ test : FORCE pep8 - python -m py.test -v test/ + python -m py.test -v test/ --cov=ot --cov-report html:cov_html pytest : FORCE - python -m py.test -v test/ + python -m py.test -v test/ --cov=ot uploadpypi : #python setup.py register diff --git a/test/test_gpu.py b/test/test_gpu.py index 312a2d4..49b98d0 100644 --- a/test/test_gpu.py +++ b/test/test_gpu.py @@ -3,8 +3,14 @@ import numpy as np import time import pytest +try: # test if cudamat installed + import ot.gpu + nogpu = False +except ImportError: + nogpu = True + -@pytest.mark.skip(reason="No way to test GPU on travis yet") +@pytest.mark.skipif(nogpu, reason="No GPU available") def test_gpu_sinkhorn(): import ot.gpu @@ -12,7 +18,7 @@ def test_gpu_sinkhorn(): print("min:{:.3E}, max::{:.3E}, mean::{:.3E}, std::{:.3E}".format( np.min(r), np.max(r), np.mean(r), np.std(r))) - for n in [5000]: + for n in [50, 100, 500, 1000]: print(n) a = np.random.rand(n // 4, 100) b = np.random.rand(n, 100) @@ -30,14 +36,16 @@ def test_gpu_sinkhorn(): print(" GPU sinkhorn, time: {:6.2f} sec ".format(time3 - time2)) describeRes(G2) + assert np.allclose(G1, G2, rtol=1e-5, atol=1e-5) -@pytest.mark.skip(reason="No way to test GPU on travis yet") + +@pytest.mark.skipif(nogpu, reason="No GPU available") def test_gpu_sinkhorn_lpl1(): def describeRes(r): print("min:{:.3E}, max:{:.3E}, mean:{:.3E}, std:{:.3E}" .format(np.min(r), np.max(r), np.mean(r), np.std(r))) - for n in [5000]: + for n in [50, 100, 500, 1000]: print(n) a = np.random.rand(n // 4, 100) labels_a = np.random.randint(10, size=(n // 4)) @@ -57,3 +65,5 @@ def test_gpu_sinkhorn_lpl1(): print(" GPU sinkhorn lpl1, time: {:6.2f} sec ".format( time3 - time2)) describeRes(G2) + + assert np.allclose(G1, G2, rtol=1e-5, atol=1e-5) diff --git a/test/test_ot.py b/test/test_ot.py index 3fa1bc4..16fd510 100644 --- a/test/test_ot.py +++ b/test/test_ot.py @@ -31,9 +31,11 @@ def test_emd_emd2(): # check G is identity assert np.allclose(G, np.eye(n) / n) + # check constratints + assert np.allclose(u, G.sum(1)) # cf convergence sinkhorn + assert np.allclose(u, G.sum(0)) # cf convergence sinkhorn w = ot.emd2(u, u, M) - # check loss=0 assert np.allclose(w, 0) diff --git a/test/test_utils.py b/test/test_utils.py index 3219fce..e85e5b7 100644 --- a/test/test_utils.py +++ b/test/test_utils.py @@ -71,6 +71,52 @@ def test_dist(): D[i, j] = np.sum(np.square(x[i, :] - x[j, :])) D2 = ot.dist(x, x) + D3 = ot.dist(x) # dist shoul return squared euclidean assert np.allclose(D, D2) + assert np.allclose(D, D3) + + +def test_dist0(): + + n = 100 + M = ot.utils.dist0(n, method='lin_square') + + # dist0 default to linear sampling with quadratic loss + assert np.allclose(M[0, -1], (n - 1) * (n - 1)) + + +def test_dots(): + + n1, n2, n3, n4 = 100, 50, 200, 100 + + A = np.random.randn(n1, n2) + B = np.random.randn(n2, n3) + C = np.random.randn(n3, n4) + + X1 = ot.utils.dots(A, B, C) + + X2 = A.dot(B.dot(C)) + + assert np.allclose(X1, X2) + + +def test_clean_zeros(): + + n = 100 + nz = 50 + nz2 = 20 + u1 = ot.unif(n) + u1[:nz] = 0 + u1 = u1 / u1.sum() + u2 = ot.unif(n) + u2[:nz2] = 0 + u2 = u2 / u2.sum() + + M = ot.utils.dist0(n) + + a, b, M2 = ot.utils.clean_zeros(u1, u2, M) + + assert len(a) == n - nz + assert len(b) == n - nz2 -- cgit v1.2.3 From c6e648fbebd1297428f514d7bd48d3eb8814aafd Mon Sep 17 00:00:00 2001 From: Rémi Flamary Date: Mon, 24 Jul 2017 14:31:00 +0200 Subject: test parmap python 3.5 --- test/test_utils.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'test') diff --git a/test/test_utils.py b/test/test_utils.py index e85e5b7..1a1ab02 100644 --- a/test/test_utils.py +++ b/test/test_utils.py @@ -15,9 +15,9 @@ def test_parmap(): a = np.arange(n) - l1 = map(f, a) + l1 = np.array(map(f, a)) - l2 = ot.utils.parmap(f, a) + l2 = np.array(ot.utils.parmap(f, a)) assert np.allclose(l1, l2) -- cgit v1.2.3 From a31d3c2375ffec7eb3754ab4b66f75ce9a51eddd Mon Sep 17 00:00:00 2001 From: Rémi Flamary Date: Mon, 24 Jul 2017 14:35:20 +0200 Subject: map to list --- test/test_utils.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'test') diff --git a/test/test_utils.py b/test/test_utils.py index 1a1ab02..0883a8e 100644 --- a/test/test_utils.py +++ b/test/test_utils.py @@ -15,9 +15,9 @@ def test_parmap(): a = np.arange(n) - l1 = np.array(map(f, a)) + l1 = list(map(f, a)) - l2 = np.array(ot.utils.parmap(f, a)) + l2 = list(ot.utils.parmap(f, a)) assert np.allclose(l1, l2) -- cgit v1.2.3 From f8e822c48eff02a3d65fc83d09dc0471bc9555aa Mon Sep 17 00:00:00 2001 From: Rémi Flamary Date: Mon, 24 Jul 2017 14:49:14 +0200 Subject: test sinkhorn with empty marginals --- test/test_bregman.py | 31 ++++++++++++++++++++++++++++++- test/test_ot.py | 23 +++++++++++++++++++++++ 2 files changed, 53 insertions(+), 1 deletion(-) (limited to 'test') diff --git a/test/test_bregman.py b/test/test_bregman.py index fd2c972..b65de11 100644 --- a/test/test_bregman.py +++ b/test/test_bregman.py @@ -23,6 +23,33 @@ def test_sinkhorn(): assert np.allclose(u, G.sum(0), atol=1e-05) # cf convergence sinkhorn +def test_sinkhorn_empty(): + # test sinkhorn + n = 100 + np.random.seed(0) + + x = np.random.randn(n, 2) + u = ot.utils.unif(n) + + M = ot.dist(x, x) + + G = ot.sinkhorn([], [], M, 1, stopThr=1e-10) + # check constratints + assert np.allclose(u, G.sum(1), atol=1e-05) # cf convergence sinkhorn + assert np.allclose(u, G.sum(0), atol=1e-05) # cf convergence sinkhorn + + G = ot.sinkhorn([], [], M, 1, stopThr=1e-10, method='sinkhorn_stabilized') + # check constratints + assert np.allclose(u, G.sum(1), atol=1e-05) # cf convergence sinkhorn + assert np.allclose(u, G.sum(0), atol=1e-05) # cf convergence sinkhorn + + G = ot.sinkhorn( + [], [], M, 1, stopThr=1e-10, method='sinkhorn_epsilon_scaling') + # check constratints + assert np.allclose(u, G.sum(1), atol=1e-05) # cf convergence sinkhorn + assert np.allclose(u, G.sum(0), atol=1e-05) # cf convergence sinkhorn + + def test_sinkhorn_variants(): # test sinkhorn n = 100 @@ -37,7 +64,9 @@ def test_sinkhorn_variants(): Gs = ot.sinkhorn(u, u, M, 1, method='sinkhorn_stabilized', stopThr=1e-10) Ges = ot.sinkhorn( u, u, M, 1, method='sinkhorn_epsilon_scaling', stopThr=1e-10) + Gerr = ot.sinkhorn(u, u, M, 1, method='do_not_exists', stopThr=1e-10) - # check constratints + # check values assert np.allclose(G0, Gs, atol=1e-05) assert np.allclose(G0, Ges, atol=1e-05) + assert np.allclose(G0, Gerr) diff --git a/test/test_ot.py b/test/test_ot.py index 16fd510..3897397 100644 --- a/test/test_ot.py +++ b/test/test_ot.py @@ -40,6 +40,29 @@ def test_emd_emd2(): assert np.allclose(w, 0) +def test_emd_empty(): + # test emd and emd2 for simple identity + n = 100 + np.random.seed(0) + + x = np.random.randn(n, 2) + u = ot.utils.unif(n) + + M = ot.dist(x, x) + + G = ot.emd([], [], M) + + # check G is identity + assert np.allclose(G, np.eye(n) / n) + # check constratints + assert np.allclose(u, G.sum(1)) # cf convergence sinkhorn + assert np.allclose(u, G.sum(0)) # cf convergence sinkhorn + + w = ot.emd2([], [], M) + # check loss=0 + assert np.allclose(w, 0) + + def test_emd2_multi(): from ot.datasets import get_1D_gauss as gauss -- cgit v1.2.3 From 7d9c5e7ef81cfb1cd4725058c09a7f683ca03eef Mon Sep 17 00:00:00 2001 From: Rémi Flamary Date: Mon, 24 Jul 2017 14:58:15 +0200 Subject: add test optim --- test/test_optim.py | 65 ++++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 65 insertions(+) create mode 100644 test/test_optim.py (limited to 'test') diff --git a/test/test_optim.py b/test/test_optim.py new file mode 100644 index 0000000..43cba7d --- /dev/null +++ b/test/test_optim.py @@ -0,0 +1,65 @@ + + +import ot +import numpy as np + +# import pytest + + +def test_conditional_gradient(): + + n = 100 # nb bins + + # bin positions + x = np.arange(n, dtype=np.float64) + + # Gaussian distributions + a = ot.datasets.get_1D_gauss(n, m=20, s=5) # m= mean, s= std + b = ot.datasets.get_1D_gauss(n, m=60, s=10) + + # loss matrix + M = ot.dist(x.reshape((n, 1)), x.reshape((n, 1))) + M /= M.max() + + def f(G): + return 0.5 * np.sum(G**2) + + def df(G): + return G + + reg = 1e-1 + + G, log = ot.optim.cg(a, b, M, reg, f, df, verbose=True, log=True) + + assert np.allclose(a, G.sum(1)) + assert np.allclose(b, G.sum(0)) + + +def test_generalized_conditional_gradient(): + + n = 100 # nb bins + + # bin positions + x = np.arange(n, dtype=np.float64) + + # Gaussian distributions + a = ot.datasets.get_1D_gauss(n, m=20, s=5) # m= mean, s= std + b = ot.datasets.get_1D_gauss(n, m=60, s=10) + + # loss matrix + M = ot.dist(x.reshape((n, 1)), x.reshape((n, 1))) + M /= M.max() + + def f(G): + return 0.5 * np.sum(G**2) + + def df(G): + return G + + reg1 = 1e-3 + reg2 = 1e-1 + + G, log = ot.optim.gcg(a, b, M, reg1, reg2, f, df, verbose=True, log=True) + + assert np.allclose(a, G.sum(1), atol=1e-05) + assert np.allclose(b, G.sum(0), atol=1e-05) -- cgit v1.2.3 From 709d8cbc9f9961a5175eb64ae497b854e0b9b184 Mon Sep 17 00:00:00 2001 From: Rémi Flamary Date: Mon, 24 Jul 2017 15:14:59 +0200 Subject: add dr tests --- test/test_dr.py | 63 ++++++++++++++++++++++++++++++++++++++++++++++++++++++ test/test_gpu.py | 53 +++++++++++++++++++++++---------------------- test/test_optim.py | 4 ++-- 3 files changed, 93 insertions(+), 27 deletions(-) create mode 100644 test/test_dr.py (limited to 'test') diff --git a/test/test_dr.py b/test/test_dr.py new file mode 100644 index 0000000..24ccaa1 --- /dev/null +++ b/test/test_dr.py @@ -0,0 +1,63 @@ +import ot +import numpy as np +import pytest + +try: # test if cudamat installed + import ot.dr + nogo = False +except ImportError: + nogo = True + + +@pytest.mark.skipif(nogo, reason="Missing modules (autograd or pymanopt)") +def test_fda(): + + n = 100 # nb samples in source and target datasets + nz = 0.2 + np.random.seed(0) + + # generate circle dataset + t = np.random.rand(n) * 2 * np.pi + ys = np.floor((np.arange(n) * 1.0 / n * 3)) + 1 + xs = np.concatenate( + (np.cos(t).reshape((-1, 1)), np.sin(t).reshape((-1, 1))), 1) + xs = xs * ys.reshape(-1, 1) + nz * np.random.randn(n, 2) + + nbnoise = 8 + + xs = np.hstack((xs, np.random.randn(n, nbnoise))) + + p = 2 + + Pfda, projfda = ot.dr.fda(xs, ys, p) + + projfda(xs) + + assert np.allclose(np.sum(Pfda**2, 0), np.ones(p)) + + +@pytest.mark.skipif(nogo, reason="Missing modules (autograd or pymanopt)") +def test_wda(): + + n = 100 # nb samples in source and target datasets + nz = 0.2 + np.random.seed(0) + + # generate circle dataset + t = np.random.rand(n) * 2 * np.pi + ys = np.floor((np.arange(n) * 1.0 / n * 3)) + 1 + xs = np.concatenate( + (np.cos(t).reshape((-1, 1)), np.sin(t).reshape((-1, 1))), 1) + xs = xs * ys.reshape(-1, 1) + nz * np.random.randn(n, 2) + + nbnoise = 8 + + xs = np.hstack((xs, np.random.randn(n, nbnoise))) + + p = 2 + + Pwda, projwda = ot.dr.wda(xs, ys, p, maxiter=10) + + projwda(xs) + + assert np.allclose(np.sum(Pwda**2, 0), np.ones(p)) diff --git a/test/test_gpu.py b/test/test_gpu.py index 49b98d0..9cc39d7 100644 --- a/test/test_gpu.py +++ b/test/test_gpu.py @@ -12,7 +12,8 @@ except ImportError: @pytest.mark.skipif(nogpu, reason="No GPU available") def test_gpu_sinkhorn(): - import ot.gpu + + np.random.seed(0) def describeRes(r): print("min:{:.3E}, max::{:.3E}, mean::{:.3E}, std::{:.3E}".format( @@ -41,29 +42,31 @@ def test_gpu_sinkhorn(): @pytest.mark.skipif(nogpu, reason="No GPU available") def test_gpu_sinkhorn_lpl1(): - def describeRes(r): - print("min:{:.3E}, max:{:.3E}, mean:{:.3E}, std:{:.3E}" - .format(np.min(r), np.max(r), np.mean(r), np.std(r))) + np.random.seed(0) + + def describeRes(r): + print("min:{:.3E}, max:{:.3E}, mean:{:.3E}, std:{:.3E}" + .format(np.min(r), np.max(r), np.mean(r), np.std(r))) - for n in [50, 100, 500, 1000]: - print(n) - a = np.random.rand(n // 4, 100) - labels_a = np.random.randint(10, size=(n // 4)) - b = np.random.rand(n, 100) - time1 = time.time() - transport = ot.da.OTDA_lpl1() - transport.fit(a, labels_a, b) - G1 = transport.G - time2 = time.time() - transport = ot.gpu.da.OTDA_lpl1() - transport.fit(a, labels_a, b) - G2 = transport.G - time3 = time.time() - print("Normal sinkhorn lpl1, time: {:6.2f} sec ".format( - time2 - time1)) - describeRes(G1) - print(" GPU sinkhorn lpl1, time: {:6.2f} sec ".format( - time3 - time2)) - describeRes(G2) + for n in [50, 100, 500, 1000]: + print(n) + a = np.random.rand(n // 4, 100) + labels_a = np.random.randint(10, size=(n // 4)) + b = np.random.rand(n, 100) + time1 = time.time() + transport = ot.da.OTDA_lpl1() + transport.fit(a, labels_a, b) + G1 = transport.G + time2 = time.time() + transport = ot.gpu.da.OTDA_lpl1() + transport.fit(a, labels_a, b) + G2 = transport.G + time3 = time.time() + print("Normal sinkhorn lpl1, time: {:6.2f} sec ".format( + time2 - time1)) + describeRes(G1) + print(" GPU sinkhorn lpl1, time: {:6.2f} sec ".format( + time3 - time2)) + describeRes(G2) - assert np.allclose(G1, G2, rtol=1e-5, atol=1e-5) + assert np.allclose(G1, G2, rtol=1e-5, atol=1e-5) diff --git a/test/test_optim.py b/test/test_optim.py index 43cba7d..a77a37c 100644 --- a/test/test_optim.py +++ b/test/test_optim.py @@ -9,7 +9,7 @@ import numpy as np def test_conditional_gradient(): n = 100 # nb bins - + np.random.seed(0) # bin positions x = np.arange(n, dtype=np.float64) @@ -38,7 +38,7 @@ def test_conditional_gradient(): def test_generalized_conditional_gradient(): n = 100 # nb bins - + np.random.seed(0) # bin positions x = np.arange(n, dtype=np.float64) -- cgit v1.2.3 From 83ecc6df836d1a6b05bd641dfef465cc02b25b8f Mon Sep 17 00:00:00 2001 From: Rémi Flamary Date: Mon, 24 Jul 2017 15:23:14 +0200 Subject: bregman coverage --- test/test_bregman.py | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) (limited to 'test') diff --git a/test/test_bregman.py b/test/test_bregman.py index b65de11..78666c7 100644 --- a/test/test_bregman.py +++ b/test/test_bregman.py @@ -33,18 +33,20 @@ def test_sinkhorn_empty(): M = ot.dist(x, x) - G = ot.sinkhorn([], [], M, 1, stopThr=1e-10) + G, log = ot.sinkhorn([], [], M, 1, stopThr=1e-10, verbose=True, log=True) # check constratints assert np.allclose(u, G.sum(1), atol=1e-05) # cf convergence sinkhorn assert np.allclose(u, G.sum(0), atol=1e-05) # cf convergence sinkhorn - G = ot.sinkhorn([], [], M, 1, stopThr=1e-10, method='sinkhorn_stabilized') + G, log = ot.sinkhorn([], [], M, 1, stopThr=1e-10, + method='sinkhorn_stabilized', verbose=True, log=True) # check constratints assert np.allclose(u, G.sum(1), atol=1e-05) # cf convergence sinkhorn assert np.allclose(u, G.sum(0), atol=1e-05) # cf convergence sinkhorn - G = ot.sinkhorn( - [], [], M, 1, stopThr=1e-10, method='sinkhorn_epsilon_scaling') + G, log = ot.sinkhorn( + [], [], M, 1, stopThr=1e-10, method='sinkhorn_epsilon_scaling', + verbose=True, log=True) # check constratints assert np.allclose(u, G.sum(1), atol=1e-05) # cf convergence sinkhorn assert np.allclose(u, G.sum(0), atol=1e-05) # cf convergence sinkhorn -- cgit v1.2.3 From 64cf2fc4f9a9331d510afd93e9bd3b8963ff879e Mon Sep 17 00:00:00 2001 From: Rémi Flamary Date: Mon, 24 Jul 2017 15:28:43 +0200 Subject: tets barycenter --- test/test_bregman.py | 29 +++++++++++++++++++++++++++++ 1 file changed, 29 insertions(+) (limited to 'test') diff --git a/test/test_bregman.py b/test/test_bregman.py index 78666c7..2dd3498 100644 --- a/test/test_bregman.py +++ b/test/test_bregman.py @@ -72,3 +72,32 @@ def test_sinkhorn_variants(): assert np.allclose(G0, Gs, atol=1e-05) assert np.allclose(G0, Ges, atol=1e-05) assert np.allclose(G0, Gerr) + + +def test_bary(): + + n = 100 # nb bins + + # bin positions + x = np.arange(n, dtype=np.float64) + + # Gaussian distributions + a1 = ot.datasets.get_1D_gauss(n, m=30, s=10) # m= mean, s= std + a2 = ot.datasets.get_1D_gauss(n, m=40, s=10) + + # creating matrix A containing all distributions + A = np.vstack((a1, a2)).T + n_distributions = A.shape[1] + + # loss matrix + normalization + M = ot.utils.dist0(n) + M /= M.max() + + alpha = 0.5 # 0<=alpha<=1 + weights = np.array([1 - alpha, alpha]) + + # wasserstein + reg = 1e-3 + bary_wass = ot.bregman.barycenter(A, M, reg, weights) + + assert np.allclose(1, np.sum(bary_wass)) -- cgit v1.2.3 From 33f3d309209baa8c5e127d02f00aae0660ed7bfb Mon Sep 17 00:00:00 2001 From: Rémi Flamary Date: Mon, 24 Jul 2017 15:29:48 +0200 Subject: clean pep8 --- test/test_bregman.py | 4 ---- 1 file changed, 4 deletions(-) (limited to 'test') diff --git a/test/test_bregman.py b/test/test_bregman.py index 2dd3498..b204fe4 100644 --- a/test/test_bregman.py +++ b/test/test_bregman.py @@ -78,16 +78,12 @@ def test_bary(): n = 100 # nb bins - # bin positions - x = np.arange(n, dtype=np.float64) - # Gaussian distributions a1 = ot.datasets.get_1D_gauss(n, m=30, s=10) # m= mean, s= std a2 = ot.datasets.get_1D_gauss(n, m=40, s=10) # creating matrix A containing all distributions A = np.vstack((a1, a2)).T - n_distributions = A.shape[1] # loss matrix + normalization M = ot.utils.dist0(n) -- cgit v1.2.3 From bd705ed847dd7e43082e9d2771a59e539d6b7440 Mon Sep 17 00:00:00 2001 From: Rémi Flamary Date: Mon, 24 Jul 2017 15:45:09 +0200 Subject: add test yunmlix and bary --- test/test_bregman.py | 34 ++++++++++++++++++++++++++++++++++ test/test_gpu.py | 2 +- test/test_ot.py | 2 +- 3 files changed, 36 insertions(+), 2 deletions(-) (limited to 'test') diff --git a/test/test_bregman.py b/test/test_bregman.py index b204fe4..025568c 100644 --- a/test/test_bregman.py +++ b/test/test_bregman.py @@ -97,3 +97,37 @@ def test_bary(): bary_wass = ot.bregman.barycenter(A, M, reg, weights) assert np.allclose(1, np.sum(bary_wass)) + + ot.bregman.barycenter(A, M, reg, log=True, verbose=True) + + +def test_unmix(): + + n = 50 # nb bins + + # Gaussian distributions + a1 = ot.datasets.get_1D_gauss(n, m=20, s=10) # m= mean, s= std + a2 = ot.datasets.get_1D_gauss(n, m=40, s=10) + + a = ot.datasets.get_1D_gauss(n, m=30, s=10) + + # creating matrix A containing all distributions + D = np.vstack((a1, a2)).T + + # loss matrix + normalization + M = ot.utils.dist0(n) + M /= M.max() + + M0 = ot.utils.dist0(2) + M0 /= M0.max() + h0 = ot.unif(2) + + # wasserstein + reg = 1e-3 + um = ot.bregman.unmix(a, D, M, M0, h0, reg, 1, alpha=0.01,) + + assert np.allclose(1, np.sum(um), rtol=1e-03, atol=1e-03) + assert np.allclose([0.5, 0.5], um, rtol=1e-03, atol=1e-03) + + ot.bregman.unmix(a, D, M, M0, h0, reg, + 1, alpha=0.01, log=True, verbose=True) diff --git a/test/test_gpu.py b/test/test_gpu.py index 9cc39d7..24797f2 100644 --- a/test/test_gpu.py +++ b/test/test_gpu.py @@ -48,7 +48,7 @@ def test_gpu_sinkhorn_lpl1(): print("min:{:.3E}, max:{:.3E}, mean:{:.3E}, std:{:.3E}" .format(np.min(r), np.max(r), np.mean(r), np.std(r))) - for n in [50, 100, 500, 1000]: + for n in [50, 100, 500]: print(n) a = np.random.rand(n // 4, 100) labels_a = np.random.randint(10, size=(n // 4)) diff --git a/test/test_ot.py b/test/test_ot.py index 3897397..5bf65c6 100644 --- a/test/test_ot.py +++ b/test/test_ot.py @@ -76,7 +76,7 @@ def test_emd2_multi(): # Gaussian distributions a = gauss(n, m=20, s=5) # m= mean, s= std - ls = np.arange(20, 1000, 10) + ls = np.arange(20, 1000, 20) nb = len(ls) b = np.zeros((n, nb)) for i in range(nb): -- cgit v1.2.3 From f204e983d969ed38c46d0bc85d0868a84c585db0 Mon Sep 17 00:00:00 2001 From: Rémi Flamary Date: Mon, 24 Jul 2017 16:02:15 +0200 Subject: add test da 58% coverage --- test/test_da.py | 67 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 67 insertions(+) create mode 100644 test/test_da.py (limited to 'test') diff --git a/test/test_da.py b/test/test_da.py new file mode 100644 index 0000000..50d3aba --- /dev/null +++ b/test/test_da.py @@ -0,0 +1,67 @@ + + +import ot +import numpy as np + +# import pytest + + +def test_OTDA(): + + n = 150 # nb bins + + xs, ys = ot.datasets.get_data_classif('3gauss', n) + xt, yt = ot.datasets.get_data_classif('3gauss2', n) + + a, b = ot.unif(n), ot.unif(n) + + # LP problem + da_emd = ot.da.OTDA() # init class + da_emd.fit(xs, xt) # fit distributions + da_emd.interp() # interpolation of source samples + da_emd.predict(xs) # interpolation of source samples + + assert np.allclose(a, np.sum(da_emd.G, 1)) + assert np.allclose(b, np.sum(da_emd.G, 0)) + + # sinkhorn regularization + lambd = 1e-1 + da_entrop = ot.da.OTDA_sinkhorn() + da_entrop.fit(xs, xt, reg=lambd) + da_entrop.interp() + da_entrop.predict(xs) + + assert np.allclose(a, np.sum(da_entrop.G, 1), rtol=1e-3, atol=1e-3) + assert np.allclose(b, np.sum(da_entrop.G, 0), rtol=1e-3, atol=1e-3) + + # non-convex Group lasso regularization + reg = 1e-1 + eta = 1e0 + da_lpl1 = ot.da.OTDA_lpl1() + da_lpl1.fit(xs, ys, xt, reg=reg, eta=eta) + da_lpl1.interp() + da_lpl1.predict(xs) + + assert np.allclose(a, np.sum(da_lpl1.G, 1), rtol=1e-3, atol=1e-3) + assert np.allclose(b, np.sum(da_lpl1.G, 0), rtol=1e-3, atol=1e-3) + + # True Group lasso regularization + reg = 1e-1 + eta = 2e0 + da_l1l2 = ot.da.OTDA_l1l2() + da_l1l2.fit(xs, ys, xt, reg=reg, eta=eta, numItermax=20, verbose=True) + da_l1l2.interp() + da_l1l2.predict(xs) + + assert np.allclose(a, np.sum(da_l1l2.G, 1), rtol=1e-3, atol=1e-3) + assert np.allclose(b, np.sum(da_l1l2.G, 0), rtol=1e-3, atol=1e-3) + + # linear mapping + da_emd = ot.da.OTDA_mapping_linear() # init class + da_emd.fit(xs, xt, numItermax=10) # fit distributions + da_emd.predict(xs) # interpolation of source samples + + # nonlinear mapping + da_emd = ot.da.OTDA_mapping_kernel() # init class + da_emd.fit(xs, xt, numItermax=10) # fit distributions + da_emd.predict(xs) # interpolation of source samples -- cgit v1.2.3 From 5aad08aff3e1a171ef9263af4488d175139085a0 Mon Sep 17 00:00:00 2001 From: Rémi Flamary Date: Mon, 24 Jul 2017 16:30:57 +0200 Subject: add test plot --- test/test_plot.py | 42 ++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 42 insertions(+) create mode 100644 test/test_plot.py (limited to 'test') diff --git a/test/test_plot.py b/test/test_plot.py new file mode 100644 index 0000000..8916a85 --- /dev/null +++ b/test/test_plot.py @@ -0,0 +1,42 @@ + + +import ot +import numpy as np + +# import pytest + + +def test_plot1D_mat(): + + n = 100 # nb bins + + # bin positions + x = np.arange(n, dtype=np.float64) + + # Gaussian distributions + a = ot.datasets.get_1D_gauss(n, m=20, s=5) # m= mean, s= std + b = ot.datasets.get_1D_gauss(n, m=60, s=10) + + # loss matrix + M = ot.dist(x.reshape((n, 1)), x.reshape((n, 1))) + M /= M.max() + + ot.plot.plot1D_mat(a, b, M, 'Cost matrix M') + + +def test_plot2D_samples_mat(): + + n = 50 # nb samples + + mu_s = np.array([0, 0]) + cov_s = np.array([[1, 0], [0, 1]]) + + mu_t = np.array([4, 4]) + cov_t = np.array([[1, -.8], [-.8, 1]]) + + xs = ot.datasets.get_2D_samples_gauss(n, mu_s, cov_s) + xt = ot.datasets.get_2D_samples_gauss(n, mu_t, cov_t) + + G = 1.0 * (np.random.rand(n, n) < 0.01) + + ot.plot.plot2D_samples_mat(xs, xt, G, thr=1e-5) -- cgit v1.2.3 From a8d7301c132a225b5e4d78cae64683a5e08eae7f Mon Sep 17 00:00:00 2001 From: Rémi Flamary Date: Mon, 24 Jul 2017 16:39:19 +0200 Subject: add test plot and dataset --- test/test_dr.py | 11 +++-------- 1 file changed, 3 insertions(+), 8 deletions(-) (limited to 'test') diff --git a/test/test_dr.py b/test/test_dr.py index 24ccaa1..3da7705 100644 --- a/test/test_dr.py +++ b/test/test_dr.py @@ -12,22 +12,17 @@ except ImportError: @pytest.mark.skipif(nogo, reason="Missing modules (autograd or pymanopt)") def test_fda(): - n = 100 # nb samples in source and target datasets - nz = 0.2 + n = 90 # nb samples in source and target datasets np.random.seed(0) # generate circle dataset - t = np.random.rand(n) * 2 * np.pi - ys = np.floor((np.arange(n) * 1.0 / n * 3)) + 1 - xs = np.concatenate( - (np.cos(t).reshape((-1, 1)), np.sin(t).reshape((-1, 1))), 1) - xs = xs * ys.reshape(-1, 1) + nz * np.random.randn(n, 2) + xs, ys = ot.datasets.get_data_classif('gaussrot', n) nbnoise = 8 xs = np.hstack((xs, np.random.randn(n, nbnoise))) - p = 2 + p = 1 Pfda, projfda = ot.dr.fda(xs, ys, p) -- cgit v1.2.3 From e11b1d1a77f201896fd3f70bc5b910e99610e951 Mon Sep 17 00:00:00 2001 From: Rémi Flamary Date: Mon, 24 Jul 2017 17:08:25 +0200 Subject: test plot with no X --- test/test_plot.py | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) (limited to 'test') diff --git a/test/test_plot.py b/test/test_plot.py index 8916a85..69789fa 100644 --- a/test/test_plot.py +++ b/test/test_plot.py @@ -1,13 +1,14 @@ -import ot import numpy as np - -# import pytest +import matplotlib +matplotlib.use('Agg') def test_plot1D_mat(): + import ot + n = 100 # nb bins # bin positions @@ -26,6 +27,8 @@ def test_plot1D_mat(): def test_plot2D_samples_mat(): + import ot + n = 50 # nb samples mu_s = np.array([0, 0]) -- cgit v1.2.3 From 46f297f678de0051dc6d5067291d1e1046b4705e Mon Sep 17 00:00:00 2001 From: Rémi Flamary Date: Wed, 26 Jul 2017 11:26:08 +0200 Subject: import nmpy before ot --- test/test_bregman.py | 4 ++-- test/test_da.py | 4 ++-- test/test_dr.py | 5 +++-- test/test_gpu.py | 3 ++- test/test_optim.py | 4 ++-- test/test_ot.py | 4 ++-- 6 files changed, 13 insertions(+), 11 deletions(-) (limited to 'test') diff --git a/test/test_bregman.py b/test/test_bregman.py index 025568c..aaa2efc 100644 --- a/test/test_bregman.py +++ b/test/test_bregman.py @@ -1,7 +1,7 @@ - -import ot import numpy as np +import ot + # import pytest diff --git a/test/test_da.py b/test/test_da.py index 50d3aba..0d92b95 100644 --- a/test/test_da.py +++ b/test/test_da.py @@ -1,7 +1,7 @@ - -import ot import numpy as np +import ot + # import pytest diff --git a/test/test_dr.py b/test/test_dr.py index 3da7705..3faba48 100644 --- a/test/test_dr.py +++ b/test/test_dr.py @@ -1,8 +1,9 @@ -import ot + import numpy as np +import ot import pytest -try: # test if cudamat installed +try: # test if autograd and pymanopt are installed import ot.dr nogo = False except ImportError: diff --git a/test/test_gpu.py b/test/test_gpu.py index 24797f2..5184a6c 100644 --- a/test/test_gpu.py +++ b/test/test_gpu.py @@ -1,5 +1,6 @@ -import ot + import numpy as np +import ot import time import pytest diff --git a/test/test_optim.py b/test/test_optim.py index a77a37c..d5c4ad0 100644 --- a/test/test_optim.py +++ b/test/test_optim.py @@ -1,7 +1,7 @@ - -import ot import numpy as np +import ot + # import pytest diff --git a/test/test_ot.py b/test/test_ot.py index 5bf65c6..a30491d 100644 --- a/test/test_ot.py +++ b/test/test_ot.py @@ -1,7 +1,7 @@ - -import ot import numpy as np +import ot + # import pytest -- cgit v1.2.3 From 68d74902bcd3d988fff8cb7713314063f04c0089 Mon Sep 17 00:00:00 2001 From: Rémi Flamary Date: Wed, 26 Jul 2017 11:34:11 +0200 Subject: numpy assert + n_bins --- test/test_bregman.py | 63 ++++++++++++++++++++++++++-------------------------- 1 file changed, 31 insertions(+), 32 deletions(-) (limited to 'test') diff --git a/test/test_bregman.py b/test/test_bregman.py index aaa2efc..1638ef6 100644 --- a/test/test_bregman.py +++ b/test/test_bregman.py @@ -3,15 +3,12 @@ import numpy as np import ot -# import pytest - - def test_sinkhorn(): # test sinkhorn n = 100 - np.random.seed(0) + rng = np.random.RandomState(0) - x = np.random.randn(n, 2) + x = rng.randn(n, 2) u = ot.utils.unif(n) M = ot.dist(x, x) @@ -19,45 +16,47 @@ def test_sinkhorn(): G = ot.sinkhorn(u, u, M, 1, stopThr=1e-10) # check constratints - assert np.allclose(u, G.sum(1), atol=1e-05) # cf convergence sinkhorn - assert np.allclose(u, G.sum(0), atol=1e-05) # cf convergence sinkhorn + np.testing.assert_allclose( + u, G.sum(1), atol=1e-05) # cf convergence sinkhorn + np.testing.assert_allclose( + u, G.sum(0), atol=1e-05) # cf convergence sinkhorn def test_sinkhorn_empty(): # test sinkhorn n = 100 - np.random.seed(0) + rng = np.random.RandomState(0) - x = np.random.randn(n, 2) + x = rng.randn(n, 2) u = ot.utils.unif(n) M = ot.dist(x, x) G, log = ot.sinkhorn([], [], M, 1, stopThr=1e-10, verbose=True, log=True) # check constratints - assert np.allclose(u, G.sum(1), atol=1e-05) # cf convergence sinkhorn - assert np.allclose(u, G.sum(0), atol=1e-05) # cf convergence sinkhorn + np.testing.assert_allclose(u, G.sum(1), atol=1e-05) + np.testing.assert_allclose(u, G.sum(0), atol=1e-05) G, log = ot.sinkhorn([], [], M, 1, stopThr=1e-10, method='sinkhorn_stabilized', verbose=True, log=True) # check constratints - assert np.allclose(u, G.sum(1), atol=1e-05) # cf convergence sinkhorn - assert np.allclose(u, G.sum(0), atol=1e-05) # cf convergence sinkhorn + np.testing.assert_allclose(u, G.sum(1), atol=1e-05) + np.testing.assert_allclose(u, G.sum(0), atol=1e-05) G, log = ot.sinkhorn( [], [], M, 1, stopThr=1e-10, method='sinkhorn_epsilon_scaling', verbose=True, log=True) # check constratints - assert np.allclose(u, G.sum(1), atol=1e-05) # cf convergence sinkhorn - assert np.allclose(u, G.sum(0), atol=1e-05) # cf convergence sinkhorn + np.testing.assert_allclose(u, G.sum(1), atol=1e-05) + np.testing.assert_allclose(u, G.sum(0), atol=1e-05) def test_sinkhorn_variants(): # test sinkhorn n = 100 - np.random.seed(0) + rng = np.random.RandomState(0) - x = np.random.randn(n, 2) + x = rng.randn(n, 2) u = ot.utils.unif(n) M = ot.dist(x, x) @@ -69,24 +68,24 @@ def test_sinkhorn_variants(): Gerr = ot.sinkhorn(u, u, M, 1, method='do_not_exists', stopThr=1e-10) # check values - assert np.allclose(G0, Gs, atol=1e-05) - assert np.allclose(G0, Ges, atol=1e-05) - assert np.allclose(G0, Gerr) + np.testing.assert_allclose(G0, Gs, atol=1e-05) + np.testing.assert_allclose(G0, Ges, atol=1e-05) + np.testing.assert_allclose(G0, Gerr) def test_bary(): - n = 100 # nb bins + n_bins = 100 # nb bins # Gaussian distributions - a1 = ot.datasets.get_1D_gauss(n, m=30, s=10) # m= mean, s= std - a2 = ot.datasets.get_1D_gauss(n, m=40, s=10) + a1 = ot.datasets.get_1D_gauss(n_bins, m=30, s=10) # m= mean, s= std + a2 = ot.datasets.get_1D_gauss(n_bins, m=40, s=10) # creating matrix A containing all distributions A = np.vstack((a1, a2)).T # loss matrix + normalization - M = ot.utils.dist0(n) + M = ot.utils.dist0(n_bins) M /= M.max() alpha = 0.5 # 0<=alpha<=1 @@ -96,26 +95,26 @@ def test_bary(): reg = 1e-3 bary_wass = ot.bregman.barycenter(A, M, reg, weights) - assert np.allclose(1, np.sum(bary_wass)) + np.testing.assert_allclose(1, np.sum(bary_wass)) ot.bregman.barycenter(A, M, reg, log=True, verbose=True) def test_unmix(): - n = 50 # nb bins + n_bins = 50 # nb bins # Gaussian distributions - a1 = ot.datasets.get_1D_gauss(n, m=20, s=10) # m= mean, s= std - a2 = ot.datasets.get_1D_gauss(n, m=40, s=10) + a1 = ot.datasets.get_1D_gauss(n_bins, m=20, s=10) # m= mean, s= std + a2 = ot.datasets.get_1D_gauss(n_bins, m=40, s=10) - a = ot.datasets.get_1D_gauss(n, m=30, s=10) + a = ot.datasets.get_1D_gauss(n_bins, m=30, s=10) # creating matrix A containing all distributions D = np.vstack((a1, a2)).T # loss matrix + normalization - M = ot.utils.dist0(n) + M = ot.utils.dist0(n_bins) M /= M.max() M0 = ot.utils.dist0(2) @@ -126,8 +125,8 @@ def test_unmix(): reg = 1e-3 um = ot.bregman.unmix(a, D, M, M0, h0, reg, 1, alpha=0.01,) - assert np.allclose(1, np.sum(um), rtol=1e-03, atol=1e-03) - assert np.allclose([0.5, 0.5], um, rtol=1e-03, atol=1e-03) + np.testing.assert_allclose(1, np.sum(um), rtol=1e-03, atol=1e-03) + np.testing.assert_allclose([0.5, 0.5], um, rtol=1e-03, atol=1e-03) ot.bregman.unmix(a, D, M, M0, h0, reg, 1, alpha=0.01, log=True, verbose=True) -- cgit v1.2.3 From 67b011a2a6a0cb8dffbb7a2619875f0e0d79588c Mon Sep 17 00:00:00 2001 From: Rémi Flamary Date: Wed, 26 Jul 2017 11:38:17 +0200 Subject: numpy assert test_da --- test/test_da.py | 21 +++++++++++---------- 1 file changed, 11 insertions(+), 10 deletions(-) (limited to 'test') diff --git a/test/test_da.py b/test/test_da.py index 0d92b95..8df4795 100644 --- a/test/test_da.py +++ b/test/test_da.py @@ -6,9 +6,10 @@ import ot # import pytest -def test_OTDA(): +def test_otda(): - n = 150 # nb bins + n = 150 # nb samples + np.random.seed(0) xs, ys = ot.datasets.get_data_classif('3gauss', n) xt, yt = ot.datasets.get_data_classif('3gauss2', n) @@ -21,8 +22,8 @@ def test_OTDA(): da_emd.interp() # interpolation of source samples da_emd.predict(xs) # interpolation of source samples - assert np.allclose(a, np.sum(da_emd.G, 1)) - assert np.allclose(b, np.sum(da_emd.G, 0)) + np.testing.assert_allclose(a, np.sum(da_emd.G, 1)) + np.testing.assert_allclose(b, np.sum(da_emd.G, 0)) # sinkhorn regularization lambd = 1e-1 @@ -31,8 +32,8 @@ def test_OTDA(): da_entrop.interp() da_entrop.predict(xs) - assert np.allclose(a, np.sum(da_entrop.G, 1), rtol=1e-3, atol=1e-3) - assert np.allclose(b, np.sum(da_entrop.G, 0), rtol=1e-3, atol=1e-3) + np.testing.assert_allclose(a, np.sum(da_entrop.G, 1), rtol=1e-3, atol=1e-3) + np.testing.assert_allclose(b, np.sum(da_entrop.G, 0), rtol=1e-3, atol=1e-3) # non-convex Group lasso regularization reg = 1e-1 @@ -42,8 +43,8 @@ def test_OTDA(): da_lpl1.interp() da_lpl1.predict(xs) - assert np.allclose(a, np.sum(da_lpl1.G, 1), rtol=1e-3, atol=1e-3) - assert np.allclose(b, np.sum(da_lpl1.G, 0), rtol=1e-3, atol=1e-3) + np.testing.assert_allclose(a, np.sum(da_lpl1.G, 1), rtol=1e-3, atol=1e-3) + np.testing.assert_allclose(b, np.sum(da_lpl1.G, 0), rtol=1e-3, atol=1e-3) # True Group lasso regularization reg = 1e-1 @@ -53,8 +54,8 @@ def test_OTDA(): da_l1l2.interp() da_l1l2.predict(xs) - assert np.allclose(a, np.sum(da_l1l2.G, 1), rtol=1e-3, atol=1e-3) - assert np.allclose(b, np.sum(da_l1l2.G, 0), rtol=1e-3, atol=1e-3) + np.testing.assert_allclose(a, np.sum(da_l1l2.G, 1), rtol=1e-3, atol=1e-3) + np.testing.assert_allclose(b, np.sum(da_l1l2.G, 0), rtol=1e-3, atol=1e-3) # linear mapping da_emd = ot.da.OTDA_mapping_linear() # init class -- cgit v1.2.3 From 347e6288b87cbeef9b8fbc1a08cd130b96de1d61 Mon Sep 17 00:00:00 2001 From: Rémi Flamary Date: Wed, 26 Jul 2017 11:42:35 +0200 Subject: n to n_samples --- test/test_da.py | 11 ++++------- test/test_dr.py | 25 ++++++++++--------------- 2 files changed, 14 insertions(+), 22 deletions(-) (limited to 'test') diff --git a/test/test_da.py b/test/test_da.py index 8df4795..a38390f 100644 --- a/test/test_da.py +++ b/test/test_da.py @@ -3,18 +3,15 @@ import numpy as np import ot -# import pytest - - def test_otda(): - n = 150 # nb samples + n_samples = 150 # nb samples np.random.seed(0) - xs, ys = ot.datasets.get_data_classif('3gauss', n) - xt, yt = ot.datasets.get_data_classif('3gauss2', n) + xs, ys = ot.datasets.get_data_classif('3gauss', n_samples) + xt, yt = ot.datasets.get_data_classif('3gauss2', n_samples) - a, b = ot.unif(n), ot.unif(n) + a, b = ot.unif(n_samples), ot.unif(n_samples) # LP problem da_emd = ot.da.OTDA() # init class diff --git a/test/test_dr.py b/test/test_dr.py index 3faba48..e3d1e6b 100644 --- a/test/test_dr.py +++ b/test/test_dr.py @@ -13,15 +13,15 @@ except ImportError: @pytest.mark.skipif(nogo, reason="Missing modules (autograd or pymanopt)") def test_fda(): - n = 90 # nb samples in source and target datasets + n_samples = 90 # nb samples in source and target datasets np.random.seed(0) - # generate circle dataset - xs, ys = ot.datasets.get_data_classif('gaussrot', n) + # generate gaussian dataset + xs, ys = ot.datasets.get_data_classif('gaussrot', n_samples) - nbnoise = 8 + n_features_noise = 8 - xs = np.hstack((xs, np.random.randn(n, nbnoise))) + xs = np.hstack((xs, np.random.randn(n_samples, n_features_noise))) p = 1 @@ -35,20 +35,15 @@ def test_fda(): @pytest.mark.skipif(nogo, reason="Missing modules (autograd or pymanopt)") def test_wda(): - n = 100 # nb samples in source and target datasets - nz = 0.2 + n_samples = 100 # nb samples in source and target datasets np.random.seed(0) - # generate circle dataset - t = np.random.rand(n) * 2 * np.pi - ys = np.floor((np.arange(n) * 1.0 / n * 3)) + 1 - xs = np.concatenate( - (np.cos(t).reshape((-1, 1)), np.sin(t).reshape((-1, 1))), 1) - xs = xs * ys.reshape(-1, 1) + nz * np.random.randn(n, 2) + # generate gaussian dataset + xs, ys = ot.datasets.get_data_classif('gaussrot', n_samples) - nbnoise = 8 + n_features_noise = 8 - xs = np.hstack((xs, np.random.randn(n, nbnoise))) + xs = np.hstack((xs, np.random.randn(n_samples, n_features_noise))) p = 2 -- cgit v1.2.3 From 4a45135dfa3f1aeae8b3bdf0c42422f0f60426e8 Mon Sep 17 00:00:00 2001 From: Rémi Flamary Date: Wed, 26 Jul 2017 11:47:29 +0200 Subject: dr +gpu numpy assert --- test/test_dr.py | 4 ++-- test/test_gpu.py | 34 +++++++++++++++++----------------- 2 files changed, 19 insertions(+), 19 deletions(-) (limited to 'test') diff --git a/test/test_dr.py b/test/test_dr.py index e3d1e6b..bdb920e 100644 --- a/test/test_dr.py +++ b/test/test_dr.py @@ -29,7 +29,7 @@ def test_fda(): projfda(xs) - assert np.allclose(np.sum(Pfda**2, 0), np.ones(p)) + np.testing.assert_allclose(np.sum(Pfda**2, 0), np.ones(p)) @pytest.mark.skipif(nogo, reason="Missing modules (autograd or pymanopt)") @@ -51,4 +51,4 @@ def test_wda(): projwda(xs) - assert np.allclose(np.sum(Pwda**2, 0), np.ones(p)) + np.testing.assert_allclose(np.sum(Pwda**2, 0), np.ones(p)) diff --git a/test/test_gpu.py b/test/test_gpu.py index 5184a6c..7ae159b 100644 --- a/test/test_gpu.py +++ b/test/test_gpu.py @@ -16,14 +16,14 @@ def test_gpu_sinkhorn(): np.random.seed(0) - def describeRes(r): + def describe_res(r): print("min:{:.3E}, max::{:.3E}, mean::{:.3E}, std::{:.3E}".format( np.min(r), np.max(r), np.mean(r), np.std(r))) - for n in [50, 100, 500, 1000]: - print(n) - a = np.random.rand(n // 4, 100) - b = np.random.rand(n, 100) + for n_samples in [50, 100, 500, 1000]: + print(n_samples) + a = np.random.rand(n_samples // 4, 100) + b = np.random.rand(n_samples, 100) time1 = time.time() transport = ot.da.OTDA_sinkhorn() transport.fit(a, b) @@ -34,26 +34,26 @@ def test_gpu_sinkhorn(): G2 = transport.G time3 = time.time() print("Normal sinkhorn, time: {:6.2f} sec ".format(time2 - time1)) - describeRes(G1) + describe_res(G1) print(" GPU sinkhorn, time: {:6.2f} sec ".format(time3 - time2)) - describeRes(G2) + describe_res(G2) - assert np.allclose(G1, G2, rtol=1e-5, atol=1e-5) + np.testing.assert_allclose(G1, G2, rtol=1e-5, atol=1e-5) @pytest.mark.skipif(nogpu, reason="No GPU available") def test_gpu_sinkhorn_lpl1(): np.random.seed(0) - def describeRes(r): + def describe_res(r): print("min:{:.3E}, max:{:.3E}, mean:{:.3E}, std:{:.3E}" .format(np.min(r), np.max(r), np.mean(r), np.std(r))) - for n in [50, 100, 500]: - print(n) - a = np.random.rand(n // 4, 100) - labels_a = np.random.randint(10, size=(n // 4)) - b = np.random.rand(n, 100) + for n_samples in [50, 100, 500]: + print(n_samples) + a = np.random.rand(n_samples // 4, 100) + labels_a = np.random.randint(10, size=(n_samples // 4)) + b = np.random.rand(n_samples, 100) time1 = time.time() transport = ot.da.OTDA_lpl1() transport.fit(a, labels_a, b) @@ -65,9 +65,9 @@ def test_gpu_sinkhorn_lpl1(): time3 = time.time() print("Normal sinkhorn lpl1, time: {:6.2f} sec ".format( time2 - time1)) - describeRes(G1) + describe_res(G1) print(" GPU sinkhorn lpl1, time: {:6.2f} sec ".format( time3 - time2)) - describeRes(G2) + describe_res(G2) - assert np.allclose(G1, G2, rtol=1e-5, atol=1e-5) + np.testing.assert_allclose(G1, G2, rtol=1e-5, atol=1e-5) -- cgit v1.2.3 From 2bc41ad8bb54c76bade6db2c0e04fa387ff29500 Mon Sep 17 00:00:00 2001 From: Rémi Flamary Date: Wed, 26 Jul 2017 11:48:13 +0200 Subject: rng gpu --- test/test_gpu.py | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) (limited to 'test') diff --git a/test/test_gpu.py b/test/test_gpu.py index 7ae159b..98f59f7 100644 --- a/test/test_gpu.py +++ b/test/test_gpu.py @@ -14,7 +14,7 @@ except ImportError: @pytest.mark.skipif(nogpu, reason="No GPU available") def test_gpu_sinkhorn(): - np.random.seed(0) + rng = np.random.RandomState(0) def describe_res(r): print("min:{:.3E}, max::{:.3E}, mean::{:.3E}, std::{:.3E}".format( @@ -22,8 +22,8 @@ def test_gpu_sinkhorn(): for n_samples in [50, 100, 500, 1000]: print(n_samples) - a = np.random.rand(n_samples // 4, 100) - b = np.random.rand(n_samples, 100) + a = rng.rand(n_samples // 4, 100) + b = rng.rand(n_samples, 100) time1 = time.time() transport = ot.da.OTDA_sinkhorn() transport.fit(a, b) @@ -43,7 +43,8 @@ def test_gpu_sinkhorn(): @pytest.mark.skipif(nogpu, reason="No GPU available") def test_gpu_sinkhorn_lpl1(): - np.random.seed(0) + + rng = np.random.RandomState(0) def describe_res(r): print("min:{:.3E}, max:{:.3E}, mean:{:.3E}, std:{:.3E}" @@ -51,9 +52,9 @@ def test_gpu_sinkhorn_lpl1(): for n_samples in [50, 100, 500]: print(n_samples) - a = np.random.rand(n_samples // 4, 100) + a = rng.rand(n_samples // 4, 100) labels_a = np.random.randint(10, size=(n_samples // 4)) - b = np.random.rand(n_samples, 100) + b = rng.rand(n_samples, 100) time1 = time.time() transport = ot.da.OTDA_lpl1() transport.fit(a, labels_a, b) -- cgit v1.2.3 From 6a02db058e24914cd79b638f15be9a90bce7e4f3 Mon Sep 17 00:00:00 2001 From: Rémi Flamary Date: Wed, 26 Jul 2017 11:51:07 +0200 Subject: test_optim --- test/test_optim.py | 22 ++++++++++------------ 1 file changed, 10 insertions(+), 12 deletions(-) (limited to 'test') diff --git a/test/test_optim.py b/test/test_optim.py index d5c4ad0..bc0b706 100644 --- a/test/test_optim.py +++ b/test/test_optim.py @@ -3,22 +3,20 @@ import numpy as np import ot -# import pytest - def test_conditional_gradient(): - n = 100 # nb bins + n_bins = 100 # nb bins np.random.seed(0) # bin positions - x = np.arange(n, dtype=np.float64) + x = np.arange(n_bins, dtype=np.float64) # Gaussian distributions - a = ot.datasets.get_1D_gauss(n, m=20, s=5) # m= mean, s= std - b = ot.datasets.get_1D_gauss(n, m=60, s=10) + a = ot.datasets.get_1D_gauss(n_bins, m=20, s=5) # m= mean, s= std + b = ot.datasets.get_1D_gauss(n_bins, m=60, s=10) # loss matrix - M = ot.dist(x.reshape((n, 1)), x.reshape((n, 1))) + M = ot.dist(x.reshape((n_bins, 1)), x.reshape((n_bins, 1))) M /= M.max() def f(G): @@ -37,17 +35,17 @@ def test_conditional_gradient(): def test_generalized_conditional_gradient(): - n = 100 # nb bins + n_bins = 100 # nb bins np.random.seed(0) # bin positions - x = np.arange(n, dtype=np.float64) + x = np.arange(n_bins, dtype=np.float64) # Gaussian distributions - a = ot.datasets.get_1D_gauss(n, m=20, s=5) # m= mean, s= std - b = ot.datasets.get_1D_gauss(n, m=60, s=10) + a = ot.datasets.get_1D_gauss(n_bins, m=20, s=5) # m= mean, s= std + b = ot.datasets.get_1D_gauss(n_bins, m=60, s=10) # loss matrix - M = ot.dist(x.reshape((n, 1)), x.reshape((n, 1))) + M = ot.dist(x.reshape((n_bins, 1)), x.reshape((n_bins, 1))) M /= M.max() def f(G): -- cgit v1.2.3 From 86418ebf5adc11879c580e88e3eaa02691de30e7 Mon Sep 17 00:00:00 2001 From: Rémi Flamary Date: Wed, 26 Jul 2017 11:51:45 +0200 Subject: test_optim allclose --- test/test_optim.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) (limited to 'test') diff --git a/test/test_optim.py b/test/test_optim.py index bc0b706..2840cad 100644 --- a/test/test_optim.py +++ b/test/test_optim.py @@ -29,8 +29,8 @@ def test_conditional_gradient(): G, log = ot.optim.cg(a, b, M, reg, f, df, verbose=True, log=True) - assert np.allclose(a, G.sum(1)) - assert np.allclose(b, G.sum(0)) + np.testing.assert_allclose(a, G.sum(1)) + np.testing.assert_allclose(b, G.sum(0)) def test_generalized_conditional_gradient(): @@ -59,5 +59,5 @@ def test_generalized_conditional_gradient(): G, log = ot.optim.gcg(a, b, M, reg1, reg2, f, df, verbose=True, log=True) - assert np.allclose(a, G.sum(1), atol=1e-05) - assert np.allclose(b, G.sum(0), atol=1e-05) + np.testing.assert_allclose(a, G.sum(1), atol=1e-05) + np.testing.assert_allclose(b, G.sum(0), atol=1e-05) -- cgit v1.2.3 From 286de0a955bbb7e26079a8dc75abf622bf461523 Mon Sep 17 00:00:00 2001 From: Rémi Flamary Date: Wed, 26 Jul 2017 11:52:38 +0200 Subject: clean test_ot --- test/test_ot.py | 2 -- 1 file changed, 2 deletions(-) (limited to 'test') diff --git a/test/test_ot.py b/test/test_ot.py index a30491d..9c0acab 100644 --- a/test/test_ot.py +++ b/test/test_ot.py @@ -3,8 +3,6 @@ import numpy as np import ot -# import pytest - def test_doctest(): -- cgit v1.2.3 From 81118f22197cdf4553427038526c8f730be256d7 Mon Sep 17 00:00:00 2001 From: Rémi Flamary Date: Wed, 26 Jul 2017 11:55:57 +0200 Subject: test_ot random state --- test/test_ot.py | 27 +++++++++++++-------------- 1 file changed, 13 insertions(+), 14 deletions(-) (limited to 'test') diff --git a/test/test_ot.py b/test/test_ot.py index 9c0acab..7fe665f 100644 --- a/test/test_ot.py +++ b/test/test_ot.py @@ -18,9 +18,9 @@ def test_doctest(): def test_emd_emd2(): # test emd and emd2 for simple identity n = 100 - np.random.seed(0) + rng = np.random.RandomState(0) - x = np.random.randn(n, 2) + x = rng.randn(n, 2) u = ot.utils.unif(n) M = ot.dist(x, x) @@ -28,22 +28,22 @@ def test_emd_emd2(): G = ot.emd(u, u, M) # check G is identity - assert np.allclose(G, np.eye(n) / n) + np.testing.assert_allclose(G, np.eye(n) / n) # check constratints - assert np.allclose(u, G.sum(1)) # cf convergence sinkhorn - assert np.allclose(u, G.sum(0)) # cf convergence sinkhorn + np.testing.assert_allclose(u, G.sum(1)) # cf convergence sinkhorn + np.testing.assert_allclose(u, G.sum(0)) # cf convergence sinkhorn w = ot.emd2(u, u, M) # check loss=0 - assert np.allclose(w, 0) + np.testing.assert_allclose(w, 0) def test_emd_empty(): # test emd and emd2 for simple identity n = 100 - np.random.seed(0) + rng = np.random.RandomState(0) - x = np.random.randn(n, 2) + x = rng.randn(n, 2) u = ot.utils.unif(n) M = ot.dist(x, x) @@ -51,14 +51,14 @@ def test_emd_empty(): G = ot.emd([], [], M) # check G is identity - assert np.allclose(G, np.eye(n) / n) + np.testing.assert_allclose(G, np.eye(n) / n) # check constratints - assert np.allclose(u, G.sum(1)) # cf convergence sinkhorn - assert np.allclose(u, G.sum(0)) # cf convergence sinkhorn + np.testing.assert_allclose(u, G.sum(1)) # cf convergence sinkhorn + np.testing.assert_allclose(u, G.sum(0)) # cf convergence sinkhorn w = ot.emd2([], [], M) # check loss=0 - assert np.allclose(w, 0) + np.testing.assert_allclose(w, 0) def test_emd2_multi(): @@ -66,7 +66,6 @@ def test_emd2_multi(): from ot.datasets import get_1D_gauss as gauss n = 1000 # nb bins - np.random.seed(0) # bin positions x = np.arange(n, dtype=np.float64) @@ -96,4 +95,4 @@ def test_emd2_multi(): emdn = ot.emd2(a, b, M) ot.toc('multi proc : {} s') - assert np.allclose(emd1, emdn) + np.testing.assert_allclose(emd1, emdn) -- cgit v1.2.3 From 109fc2a9243d2c0f9a911fa8c02079d2fc0277ab Mon Sep 17 00:00:00 2001 From: Rémi Flamary Date: Wed, 26 Jul 2017 11:57:43 +0200 Subject: flake8 --- test/test_optim.py | 1 - test/test_ot.py | 1 - 2 files changed, 2 deletions(-) (limited to 'test') diff --git a/test/test_optim.py b/test/test_optim.py index 2840cad..05ca895 100644 --- a/test/test_optim.py +++ b/test/test_optim.py @@ -3,7 +3,6 @@ import numpy as np import ot - def test_conditional_gradient(): n_bins = 100 # nb bins diff --git a/test/test_ot.py b/test/test_ot.py index 7fe665f..531e6e0 100644 --- a/test/test_ot.py +++ b/test/test_ot.py @@ -3,7 +3,6 @@ import numpy as np import ot - def test_doctest(): import doctest -- cgit v1.2.3 From e0fa14ba146e6f92a3060b5f2f0a5c01bd18bdc4 Mon Sep 17 00:00:00 2001 From: Rémi Flamary Date: Wed, 26 Jul 2017 11:58:58 +0200 Subject: flake8 --- test/test_plot.py | 18 +++++++++--------- test/test_utils.py | 3 --- 2 files changed, 9 insertions(+), 12 deletions(-) (limited to 'test') diff --git a/test/test_plot.py b/test/test_plot.py index 69789fa..d826988 100644 --- a/test/test_plot.py +++ b/test/test_plot.py @@ -9,17 +9,17 @@ def test_plot1D_mat(): import ot - n = 100 # nb bins + n_bins = 100 # nb bins # bin positions - x = np.arange(n, dtype=np.float64) + x = np.arange(n_bins, dtype=np.float64) # Gaussian distributions - a = ot.datasets.get_1D_gauss(n, m=20, s=5) # m= mean, s= std - b = ot.datasets.get_1D_gauss(n, m=60, s=10) + a = ot.datasets.get_1D_gauss(n_bins, m=20, s=5) # m= mean, s= std + b = ot.datasets.get_1D_gauss(n_bins, m=60, s=10) # loss matrix - M = ot.dist(x.reshape((n, 1)), x.reshape((n, 1))) + M = ot.dist(x.reshape((n_bins, 1)), x.reshape((n_bins, 1))) M /= M.max() ot.plot.plot1D_mat(a, b, M, 'Cost matrix M') @@ -29,7 +29,7 @@ def test_plot2D_samples_mat(): import ot - n = 50 # nb samples + n_bins = 50 # nb samples mu_s = np.array([0, 0]) cov_s = np.array([[1, 0], [0, 1]]) @@ -37,9 +37,9 @@ def test_plot2D_samples_mat(): mu_t = np.array([4, 4]) cov_t = np.array([[1, -.8], [-.8, 1]]) - xs = ot.datasets.get_2D_samples_gauss(n, mu_s, cov_s) - xt = ot.datasets.get_2D_samples_gauss(n, mu_t, cov_t) + xs = ot.datasets.get_2D_samples_gauss(n_bins, mu_s, cov_s) + xt = ot.datasets.get_2D_samples_gauss(n_bins, mu_t, cov_t) - G = 1.0 * (np.random.rand(n, n) < 0.01) + G = 1.0 * (np.random.rand(n_bins, n_bins) < 0.01) ot.plot.plot2D_samples_mat(xs, xt, G, thr=1e-5) diff --git a/test/test_utils.py b/test/test_utils.py index 0883a8e..fe1b88d 100644 --- a/test/test_utils.py +++ b/test/test_utils.py @@ -3,9 +3,6 @@ import ot import numpy as np -# import pytest - - def test_parmap(): n = 100 -- cgit v1.2.3 From d101e088b72fa0be4648d57524946ebfc93bf34b Mon Sep 17 00:00:00 2001 From: Rémi Flamary Date: Wed, 26 Jul 2017 11:59:25 +0200 Subject: nearly all review done --- test/test_utils.py | 1 + 1 file changed, 1 insertion(+) (limited to 'test') diff --git a/test/test_utils.py b/test/test_utils.py index fe1b88d..230d126 100644 --- a/test/test_utils.py +++ b/test/test_utils.py @@ -3,6 +3,7 @@ import ot import numpy as np + def test_parmap(): n = 100 -- cgit v1.2.3 From fac003de3d3a159bb8fb6228786479cdede2df4e Mon Sep 17 00:00:00 2001 From: Rémi Flamary Date: Wed, 26 Jul 2017 12:07:03 +0200 Subject: author and license for tets files --- test/test_bregman.py | 5 +++++ test/test_da.py | 5 +++++ test/test_dr.py | 5 +++++ test/test_gpu.py | 5 +++++ test/test_optim.py | 5 +++++ test/test_ot.py | 5 +++++ test/test_plot.py | 4 ++++ test/test_utils.py | 5 +++++ 8 files changed, 39 insertions(+) (limited to 'test') diff --git a/test/test_bregman.py b/test/test_bregman.py index 1638ef6..4a800fd 100644 --- a/test/test_bregman.py +++ b/test/test_bregman.py @@ -1,3 +1,8 @@ +"""Tests for module bregman on OT with bregman projections """ + +# Author: Remi Flamary +# +# License: MIT License import numpy as np import ot diff --git a/test/test_da.py b/test/test_da.py index a38390f..dfba83f 100644 --- a/test/test_da.py +++ b/test/test_da.py @@ -1,3 +1,8 @@ +"""Tests for module da on Domain Adaptation """ + +# Author: Remi Flamary +# +# License: MIT License import numpy as np import ot diff --git a/test/test_dr.py b/test/test_dr.py index bdb920e..915012d 100644 --- a/test/test_dr.py +++ b/test/test_dr.py @@ -1,3 +1,8 @@ +"""Tests for module dr on Dimensionality Reduction """ + +# Author: Remi Flamary +# +# License: MIT License import numpy as np import ot diff --git a/test/test_gpu.py b/test/test_gpu.py index 98f59f7..615c2a7 100644 --- a/test/test_gpu.py +++ b/test/test_gpu.py @@ -1,3 +1,8 @@ +"""Tests for module gpu for gpu acceleration """ + +# Author: Remi Flamary +# +# License: MIT License import numpy as np import ot diff --git a/test/test_optim.py b/test/test_optim.py index 05ca895..69496a5 100644 --- a/test/test_optim.py +++ b/test/test_optim.py @@ -1,3 +1,8 @@ +"""Tests for module optim fro OT optimization """ + +# Author: Remi Flamary +# +# License: MIT License import numpy as np import ot diff --git a/test/test_ot.py b/test/test_ot.py index 531e6e0..acd8718 100644 --- a/test/test_ot.py +++ b/test/test_ot.py @@ -1,3 +1,8 @@ +"""Tests for main module ot """ + +# Author: Remi Flamary +# +# License: MIT License import numpy as np import ot diff --git a/test/test_plot.py b/test/test_plot.py index d826988..f7debee 100644 --- a/test/test_plot.py +++ b/test/test_plot.py @@ -1,4 +1,8 @@ +"""Tests for module plot for visualization """ +# Author: Remi Flamary +# +# License: MIT License import numpy as np import matplotlib diff --git a/test/test_utils.py b/test/test_utils.py index 230d126..9b140db 100644 --- a/test/test_utils.py +++ b/test/test_utils.py @@ -1,3 +1,8 @@ +"""Tests for module utils for timing and parallel computation """ + +# Author: Remi Flamary +# +# License: MIT License import ot -- cgit v1.2.3 From 838550ead9cc8a66d9b9c1212c5dda2457dc59a5 Mon Sep 17 00:00:00 2001 From: Rémi Flamary Date: Wed, 26 Jul 2017 15:12:44 +0200 Subject: last stuff --- test/test_utils.py | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) (limited to 'test') diff --git a/test/test_utils.py b/test/test_utils.py index 9b140db..1bd37cd 100644 --- a/test/test_utils.py +++ b/test/test_utils.py @@ -22,7 +22,7 @@ def test_parmap(): l2 = list(ot.utils.parmap(f, a)) - assert np.allclose(l1, l2) + np.testing.assert_allclose(l1, l2) def test_tic_toc(): @@ -35,10 +35,10 @@ def test_tic_toc(): t2 = ot.toq() # test timing - assert np.allclose(0.5, t, rtol=1e-2, atol=1e-2) + np.testing.assert_allclose(0.5, t, rtol=1e-2, atol=1e-2) # test toc vs toq - assert np.allclose(t, t2, rtol=1e-2, atol=1e-2) + np.testing.assert_allclose(t, t2, rtol=1e-2, atol=1e-2) def test_kernel(): @@ -50,7 +50,7 @@ def test_kernel(): K = ot.utils.kernel(x, x) # gaussian kernel has ones on the diagonal - assert np.allclose(np.diag(K), np.ones(n)) + np.testing.assert_allclose(np.diag(K), np.ones(n)) def test_unif(): @@ -59,7 +59,7 @@ def test_unif(): u = ot.unif(n) - assert np.allclose(1, np.sum(u)) + np.testing.assert_allclose(1, np.sum(u)) def test_dist(): @@ -77,8 +77,8 @@ def test_dist(): D3 = ot.dist(x) # dist shoul return squared euclidean - assert np.allclose(D, D2) - assert np.allclose(D, D3) + np.testing.assert_allclose(D, D2) + np.testing.assert_allclose(D, D3) def test_dist0(): @@ -87,7 +87,7 @@ def test_dist0(): M = ot.utils.dist0(n, method='lin_square') # dist0 default to linear sampling with quadratic loss - assert np.allclose(M[0, -1], (n - 1) * (n - 1)) + np.testing.assert_allclose(M[0, -1], (n - 1) * (n - 1)) def test_dots(): @@ -102,7 +102,7 @@ def test_dots(): X2 = A.dot(B.dot(C)) - assert np.allclose(X1, X2) + np.testing.assert_allclose(X1, X2) def test_clean_zeros(): -- cgit v1.2.3 From 122b5bf2c0c8b6ff7b46adf19c7dd72e62c85b1f Mon Sep 17 00:00:00 2001 From: Slasnista Date: Tue, 1 Aug 2017 10:42:09 +0200 Subject: update SinkhornTransport class + added test for class --- ot/da.py | 56 +++++++++++++++++++++----------------------------------- test/test_da.py | 51 +++++++++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 72 insertions(+), 35 deletions(-) (limited to 'test') diff --git a/ot/da.py b/ot/da.py index d30c821..6b98a17 100644 --- a/ot/da.py +++ b/ot/da.py @@ -15,6 +15,7 @@ from .lp import emd from .utils import unif, dist, kernel from .optim import cg from .optim import gcg +import warnings def sinkhorn_lpl1_mm(a, labels_a, b, M, reg, eta=0.1, numItermax=10, @@ -921,15 +922,8 @@ class OTDA_mapping_kernel(OTDA_mapping_linear): # proposal ############################################################################## -# from sklearn.base import BaseEstimator -# from sklearn.metrics import pairwise_distances - -############################################################################## -# adapted from scikit-learn - -import warnings -# from .externals.six import string_types, iteritems +# adapted from sklearn class BaseEstimator(object): """Base class for all estimators in scikit-learn @@ -1067,7 +1061,7 @@ def distribution_estimation_uniform(X): The uniform distribution estimated from X """ - return np.ones(X.shape[0]) / float(X.shape[0]) + return unif(X.shape[0]) class BaseTransport(BaseEstimator): @@ -1092,29 +1086,20 @@ class BaseTransport(BaseEstimator): """ # pairwise distance - Cost = dist(Xs, Xt, metric=self.metric) + self.Cost = dist(Xs, Xt, metric=self.metric) if self.mode == "semisupervised": print("TODO: modify cost matrix accordingly") pass # distribution estimation - mu_s = self.distribution_estimation(Xs) - mu_t = self.distribution_estimation(Xt) + self.mu_s = self.distribution_estimation(Xs) + self.mu_t = self.distribution_estimation(Xt) # store arrays of samples self.Xs = Xs self.Xt = Xt - # coupling estimation - if self.method == "sinkhorn": - self.gamma_ = sinkhorn( - a=mu_s, b=mu_t, M=Cost, reg=self.reg_e, - numItermax=self.max_iter, stopThr=self.tol, - verbose=self.verbose, log=self.log) - else: - print("TODO: implement the other methods") - return self def fit_transform(self, Xs=None, ys=None, Xt=None, yt=None): @@ -1157,8 +1142,7 @@ class BaseTransport(BaseEstimator): The transport source samples. """ - # TODO: check whether Xs is new or not - if self.Xs == Xs: + if np.array_equal(self.Xs, Xs): # perform standard barycentric mapping transp = self.gamma_ / np.sum(self.gamma_, 1)[:, None] @@ -1169,7 +1153,9 @@ class BaseTransport(BaseEstimator): transp_Xs = np.dot(transp, self.Xt) else: # perform out of sample mapping - print("out of sample mapping not yet implemented") + print("Warning: out of sample mapping not yet implemented") + print("input data will be returned") + transp_Xs = Xs return transp_Xs @@ -1191,8 +1177,7 @@ class BaseTransport(BaseEstimator): The transported target samples. """ - # TODO: check whether Xt is new or not - if self.Xt == Xt: + if np.array_equal(self.Xt, Xt): # perform standard barycentric mapping transp_ = self.gamma_.T / np.sum(self.gamma_, 0)[:, None] @@ -1203,7 +1188,9 @@ class BaseTransport(BaseEstimator): transp_Xt = np.dot(transp_, self.Xs) else: # perform out of sample mapping - print("out of sample mapping not yet implemented") + print("Warning: out of sample mapping not yet implemented") + print("input data will be returned") + transp_Xt = Xt return transp_Xt @@ -1254,7 +1241,7 @@ class SinkhornTransport(BaseTransport): """ def __init__(self, reg_e=1., mode="unsupervised", max_iter=1000, - tol=10e-9, verbose=False, log=False, mapping="barycentric", + tol=10e-9, verbose=False, log=False, metric="sqeuclidean", distribution_estimation=distribution_estimation_uniform, out_of_sample_map='ferradans'): @@ -1265,7 +1252,6 @@ class SinkhornTransport(BaseTransport): self.tol = tol self.verbose = verbose self.log = log - self.mapping = mapping self.metric = metric self.distribution_estimation = distribution_estimation self.method = "sinkhorn" @@ -1290,10 +1276,10 @@ class SinkhornTransport(BaseTransport): Returns self. """ - return super(SinkhornTransport, self).fit(Xs, ys, Xt, yt) - + self = super(SinkhornTransport, self).fit(Xs, ys, Xt, yt) -if __name__ == "__main__": - print("Small test") - - st = SinkhornTransport() + # coupling estimation + self.gamma_ = sinkhorn( + a=self.mu_s, b=self.mu_t, M=self.Cost, reg=self.reg_e, + numItermax=self.max_iter, stopThr=self.tol, + verbose=self.verbose, log=self.log) diff --git a/test/test_da.py b/test/test_da.py index dfba83f..e7b4ed1 100644 --- a/test/test_da.py +++ b/test/test_da.py @@ -6,6 +6,57 @@ import numpy as np import ot +from numpy.testing.utils import assert_allclose, assert_equal +from ot.datasets import get_data_classif +from ot.utils import unif + +np.random.seed(42) + + +def test_sinkhorn_transport(): + """test_sinkhorn_transport + """ + + ns = 150 + nt = 200 + + Xs, ys = get_data_classif('3gauss', ns) + Xt, yt = get_data_classif('3gauss2', nt) + + clf = ot.da.SinkhornTransport() + + # test its computed + clf.fit(Xs=Xs, Xt=Xt) + + # test dimensions of coupling + assert_equal(clf.Cost.shape, ((Xs.shape[0], Xt.shape[0]))) + assert_equal(clf.gamma_.shape, ((Xs.shape[0], Xt.shape[0]))) + + # test margin constraints + mu_s = unif(ns) + mu_t = unif(nt) + assert_allclose(np.sum(clf.gamma_, axis=0), mu_t, rtol=1e-3, atol=1e-3) + assert_allclose(np.sum(clf.gamma_, axis=1), mu_s, rtol=1e-3, atol=1e-3) + + # test transform + transp_Xs = clf.transform(Xs=Xs) + assert_equal(transp_Xs.shape, Xs.shape) + + Xs_new, _ = get_data_classif('3gauss', ns + 1) + transp_Xs_new = clf.transform(Xs_new) + + # check that the oos method is not working + assert_equal(transp_Xs_new, Xs_new) + + # test inverse transform + transp_Xt = clf.inverse_transform(Xt=Xt) + assert_equal(transp_Xt.shape, Xt.shape) + + Xt_new, _ = get_data_classif('3gauss2', nt + 1) + transp_Xt_new = clf.inverse_transform(Xt=Xt_new) + + # check that the oos method is not working and returns the input data + assert_equal(transp_Xt_new, Xt_new) def test_otda(): -- cgit v1.2.3 From d9be6c2da1c0953de1720f1e93f194c71699c3cd Mon Sep 17 00:00:00 2001 From: Slasnista Date: Tue, 1 Aug 2017 13:13:50 +0200 Subject: added EMDTransport Class from NG's code + added dedicated test --- ot/da.py | 86 +++++++++++++++++++++++++++++++++++++++++++++++++++++---- test/test_da.py | 59 ++++++++++++++++++++++++++++++++++++--- 2 files changed, 135 insertions(+), 10 deletions(-) (limited to 'test') diff --git a/ot/da.py b/ot/da.py index 6b98a17..fb2fd36 100644 --- a/ot/da.py +++ b/ot/da.py @@ -1144,7 +1144,7 @@ class BaseTransport(BaseEstimator): if np.array_equal(self.Xs, Xs): # perform standard barycentric mapping - transp = self.gamma_ / np.sum(self.gamma_, 1)[:, None] + transp = self.Coupling_ / np.sum(self.Coupling_, 1)[:, None] # set nans to 0 transp[~ np.isfinite(transp)] = 0 @@ -1179,7 +1179,7 @@ class BaseTransport(BaseEstimator): if np.array_equal(self.Xt, Xt): # perform standard barycentric mapping - transp_ = self.gamma_.T / np.sum(self.gamma_, 0)[:, None] + transp_ = self.Coupling_.T / np.sum(self.Coupling_, 0)[:, None] # set nans to 0 transp_[~ np.isfinite(transp_)] = 0 @@ -1228,7 +1228,7 @@ class SinkhornTransport(BaseTransport): Controls the logs of the optimization algorithm Attributes ---------- - gamma_ : the optimal coupling + Coupling_ : the optimal coupling References ---------- @@ -1254,7 +1254,6 @@ class SinkhornTransport(BaseTransport): self.log = log self.metric = metric self.distribution_estimation = distribution_estimation - self.method = "sinkhorn" self.out_of_sample_map = out_of_sample_map def fit(self, Xs=None, ys=None, Xt=None, yt=None): @@ -1276,10 +1275,85 @@ class SinkhornTransport(BaseTransport): Returns self. """ - self = super(SinkhornTransport, self).fit(Xs, ys, Xt, yt) + super(SinkhornTransport, self).fit(Xs, ys, Xt, yt) # coupling estimation - self.gamma_ = sinkhorn( + self.Coupling_ = sinkhorn( a=self.mu_s, b=self.mu_t, M=self.Cost, reg=self.reg_e, numItermax=self.max_iter, stopThr=self.tol, verbose=self.verbose, log=self.log) + + +class EMDTransport(BaseTransport): + """Domain Adapatation OT method based on Earth Mover's Distance + Parameters + ---------- + mode : string, optional (default="unsupervised") + The DA mode. If "unsupervised" no target labels are taken into account + to modify the cost matrix. If "semisupervised" the target labels + are taken into account to set coefficients of the pairwise distance + matrix to 0 for row and columns indices that correspond to source and + target samples which share the same labels. + mapping : string, optional (default="barycentric") + The kind of mapping to apply to transport samples from a domain into + another one. + if "barycentric" only the samples used to estimate the coupling can + be transported from a domain to another one. + metric : string, optional (default="sqeuclidean") + The ground metric for the Wasserstein problem + distribution : string, optional (default="uniform") + The kind of distribution estimation to employ + verbose : int, optional (default=0) + Controls the verbosity of the optimization algorithm + log : int, optional (default=0) + Controls the logs of the optimization algorithm + Attributes + ---------- + Coupling_ : the optimal coupling + + References + ---------- + .. [1] N. Courty; R. Flamary; D. Tuia; A. Rakotomamonjy, + "Optimal Transport for Domain Adaptation," in IEEE Transactions + on Pattern Analysis and Machine Intelligence , vol.PP, no.99, pp.1-1 + """ + + def __init__(self, mode="unsupervised", verbose=False, + log=False, metric="sqeuclidean", + distribution_estimation=distribution_estimation_uniform, + out_of_sample_map='ferradans'): + + self.mode = mode + self.verbose = verbose + self.log = log + self.metric = metric + self.distribution_estimation = distribution_estimation + self.out_of_sample_map = out_of_sample_map + + def fit(self, Xs, ys=None, Xt=None, yt=None): + """Build a coupling matrix from source and target sets of samples + (Xs, ys) and (Xt, yt) + Parameters + ---------- + Xs : array-like of shape = [n_source_samples, n_features] + The training input samples. + ys : array-like, shape = [n_source_samples] + The class labels + Xt : array-like of shape = [n_target_samples, n_features] + The training input samples. + yt : array-like, shape = [n_labeled_target_samples] + The class labels + Returns + ------- + self : object + Returns self. + """ + + super(EMDTransport, self).fit(Xs, ys, Xt, yt) + + # coupling estimation + self.Coupling_ = emd( + a=self.mu_s, b=self.mu_t, M=self.Cost, + # verbose=self.verbose, + # log=self.log + ) diff --git a/test/test_da.py b/test/test_da.py index e7b4ed1..33b3695 100644 --- a/test/test_da.py +++ b/test/test_da.py @@ -13,7 +13,7 @@ from ot.utils import unif np.random.seed(42) -def test_sinkhorn_transport(): +def test_sinkhorn_transport_class(): """test_sinkhorn_transport """ @@ -30,13 +30,59 @@ def test_sinkhorn_transport(): # test dimensions of coupling assert_equal(clf.Cost.shape, ((Xs.shape[0], Xt.shape[0]))) - assert_equal(clf.gamma_.shape, ((Xs.shape[0], Xt.shape[0]))) + assert_equal(clf.Coupling_.shape, ((Xs.shape[0], Xt.shape[0]))) # test margin constraints mu_s = unif(ns) mu_t = unif(nt) - assert_allclose(np.sum(clf.gamma_, axis=0), mu_t, rtol=1e-3, atol=1e-3) - assert_allclose(np.sum(clf.gamma_, axis=1), mu_s, rtol=1e-3, atol=1e-3) + assert_allclose(np.sum(clf.Coupling_, axis=0), mu_t, rtol=1e-3, atol=1e-3) + assert_allclose(np.sum(clf.Coupling_, axis=1), mu_s, rtol=1e-3, atol=1e-3) + + # test transform + transp_Xs = clf.transform(Xs=Xs) + assert_equal(transp_Xs.shape, Xs.shape) + + Xs_new, _ = get_data_classif('3gauss', ns + 1) + transp_Xs_new = clf.transform(Xs_new) + + # check that the oos method is not working + assert_equal(transp_Xs_new, Xs_new) + + # test inverse transform + transp_Xt = clf.inverse_transform(Xt=Xt) + assert_equal(transp_Xt.shape, Xt.shape) + + Xt_new, _ = get_data_classif('3gauss2', nt + 1) + transp_Xt_new = clf.inverse_transform(Xt=Xt_new) + + # check that the oos method is not working and returns the input data + assert_equal(transp_Xt_new, Xt_new) + + +def test_emd_transport_class(): + """test_sinkhorn_transport + """ + + ns = 150 + nt = 200 + + Xs, ys = get_data_classif('3gauss', ns) + Xt, yt = get_data_classif('3gauss2', nt) + + clf = ot.da.EMDTransport() + + # test its computed + clf.fit(Xs=Xs, Xt=Xt) + + # test dimensions of coupling + assert_equal(clf.Cost.shape, ((Xs.shape[0], Xt.shape[0]))) + assert_equal(clf.Coupling_.shape, ((Xs.shape[0], Xt.shape[0]))) + + # test margin constraints + mu_s = unif(ns) + mu_t = unif(nt) + assert_allclose(np.sum(clf.Coupling_, axis=0), mu_t, rtol=1e-3, atol=1e-3) + assert_allclose(np.sum(clf.Coupling_, axis=1), mu_s, rtol=1e-3, atol=1e-3) # test transform transp_Xs = clf.transform(Xs=Xs) @@ -119,3 +165,8 @@ def test_otda(): da_emd = ot.da.OTDA_mapping_kernel() # init class da_emd.fit(xs, xt, numItermax=10) # fit distributions da_emd.predict(xs) # interpolation of source samples + + +if __name__ == "__main__": + test_sinkhorn_transport_class() + test_emd_transport_class() -- cgit v1.2.3 From 70be03461db45de50ecd073b9795093ead1ba5f5 Mon Sep 17 00:00:00 2001 From: Slasnista Date: Fri, 4 Aug 2017 11:16:30 +0200 Subject: added test for fit_transform + correction of fit_transform bug (missing return self) --- ot/da.py | 4 ++++ test/test_da.py | 13 ++++++++----- 2 files changed, 12 insertions(+), 5 deletions(-) (limited to 'test') diff --git a/ot/da.py b/ot/da.py index fb2fd36..80649a7 100644 --- a/ot/da.py +++ b/ot/da.py @@ -1283,6 +1283,8 @@ class SinkhornTransport(BaseTransport): numItermax=self.max_iter, stopThr=self.tol, verbose=self.verbose, log=self.log) + return self + class EMDTransport(BaseTransport): """Domain Adapatation OT method based on Earth Mover's Distance @@ -1357,3 +1359,5 @@ class EMDTransport(BaseTransport): # verbose=self.verbose, # log=self.log ) + + return self diff --git a/test/test_da.py b/test/test_da.py index 33b3695..68807ec 100644 --- a/test/test_da.py +++ b/test/test_da.py @@ -58,6 +58,10 @@ def test_sinkhorn_transport_class(): # check that the oos method is not working and returns the input data assert_equal(transp_Xt_new, Xt_new) + # test fit_transform + transp_Xs = clf.fit_transform(Xs=Xs, Xt=Xt) + assert_equal(transp_Xs.shape, Xs.shape) + def test_emd_transport_class(): """test_sinkhorn_transport @@ -104,6 +108,10 @@ def test_emd_transport_class(): # check that the oos method is not working and returns the input data assert_equal(transp_Xt_new, Xt_new) + # test fit_transform + transp_Xs = clf.fit_transform(Xs=Xs, Xt=Xt) + assert_equal(transp_Xs.shape, Xs.shape) + def test_otda(): @@ -165,8 +173,3 @@ def test_otda(): da_emd = ot.da.OTDA_mapping_kernel() # init class da_emd.fit(xs, xt, numItermax=10) # fit distributions da_emd.predict(xs) # interpolation of source samples - - -if __name__ == "__main__": - test_sinkhorn_transport_class() - test_emd_transport_class() -- cgit v1.2.3 From 64880e721f45c56de4815dd41cd21b8570c9776f Mon Sep 17 00:00:00 2001 From: Slasnista Date: Fri, 4 Aug 2017 11:34:21 +0200 Subject: added new class SinkhornLpl1Transport() + dedicated test --- ot/da.py | 91 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++ test/test_da.py | 50 +++++++++++++++++++++++++++++++ 2 files changed, 141 insertions(+) (limited to 'test') diff --git a/ot/da.py b/ot/da.py index 80649a7..3031f63 100644 --- a/ot/da.py +++ b/ot/da.py @@ -1361,3 +1361,94 @@ class EMDTransport(BaseTransport): ) return self + + +class SinkhornLpl1Transport(BaseTransport): + """Domain Adapatation OT method based on sinkhorn algorithm + + LpL1 class regularization. + + Parameters + ---------- + mode : string, optional (default="unsupervised") + The DA mode. If "unsupervised" no target labels are taken into account + to modify the cost matrix. If "semisupervised" the target labels + are taken into account to set coefficients of the pairwise distance + matrix to 0 for row and columns indices that correspond to source and + target samples which share the same labels. + mapping : string, optional (default="barycentric") + The kind of mapping to apply to transport samples from a domain into + another one. + if "barycentric" only the samples used to estimate the coupling can + be transported from a domain to another one. + metric : string, optional (default="sqeuclidean") + The ground metric for the Wasserstein problem + distribution : string, optional (default="uniform") + The kind of distribution estimation to employ + verbose : int, optional (default=0) + Controls the verbosity of the optimization algorithm + log : int, optional (default=0) + Controls the logs of the optimization algorithm + Attributes + ---------- + Coupling_ : the optimal coupling + + References + ---------- + + .. [1] N. Courty; R. Flamary; D. Tuia; A. Rakotomamonjy, + "Optimal Transport for Domain Adaptation," in IEEE + Transactions on Pattern Analysis and Machine Intelligence , + vol.PP, no.99, pp.1-1 + .. [2] Rakotomamonjy, A., Flamary, R., & Courty, N. (2015). + Generalized conditional gradient: analysis of convergence + and applications. arXiv preprint arXiv:1510.06567. + + """ + + def __init__(self, reg_e=1., reg_cl=0.1, mode="unsupervised", + max_iter=10, max_inner_iter=200, + tol=10e-9, verbose=False, log=False, + metric="sqeuclidean", + distribution_estimation=distribution_estimation_uniform, + out_of_sample_map='ferradans'): + + self.reg_e = reg_e + self.reg_cl = reg_cl + self.mode = mode + self.max_iter = max_iter + self.max_inner_iter = max_inner_iter + self.tol = tol + self.verbose = verbose + self.log = log + self.metric = metric + self.distribution_estimation = distribution_estimation + self.out_of_sample_map = out_of_sample_map + + def fit(self, Xs, ys=None, Xt=None, yt=None): + """Build a coupling matrix from source and target sets of samples + (Xs, ys) and (Xt, yt) + Parameters + ---------- + Xs : array-like of shape = [n_source_samples, n_features] + The training input samples. + ys : array-like, shape = [n_source_samples] + The class labels + Xt : array-like of shape = [n_target_samples, n_features] + The training input samples. + yt : array-like, shape = [n_labeled_target_samples] + The class labels + Returns + ------- + self : object + Returns self. + """ + + super(SinkhornLpl1Transport, self).fit(Xs, ys, Xt, yt) + + self.Coupling_ = sinkhorn_lpl1_mm( + a=self.mu_s, labels_a=ys, b=self.mu_t, M=self.Cost, + reg=self.reg_e, eta=self.reg_cl, numItermax=self.max_iter, + numInnerItermax=self.max_inner_iter, stopInnerThr=self.tol, + verbose=self.verbose, log=self.log) + + return self diff --git a/test/test_da.py b/test/test_da.py index 68807ec..7d00cfb 100644 --- a/test/test_da.py +++ b/test/test_da.py @@ -13,6 +13,56 @@ from ot.utils import unif np.random.seed(42) +def test_sinkhorn_lpl1_transport_class(): + """test_sinkhorn_transport + """ + + ns = 150 + nt = 200 + + Xs, ys = get_data_classif('3gauss', ns) + Xt, yt = get_data_classif('3gauss2', nt) + + clf = ot.da.SinkhornLpl1Transport() + + # test its computed + clf.fit(Xs=Xs, ys=ys, Xt=Xt) + + # test dimensions of coupling + assert_equal(clf.Cost.shape, ((Xs.shape[0], Xt.shape[0]))) + assert_equal(clf.Coupling_.shape, ((Xs.shape[0], Xt.shape[0]))) + + # test margin constraints + mu_s = unif(ns) + mu_t = unif(nt) + assert_allclose(np.sum(clf.Coupling_, axis=0), mu_t, rtol=1e-3, atol=1e-3) + assert_allclose(np.sum(clf.Coupling_, axis=1), mu_s, rtol=1e-3, atol=1e-3) + + # test transform + transp_Xs = clf.transform(Xs=Xs) + assert_equal(transp_Xs.shape, Xs.shape) + + Xs_new, _ = get_data_classif('3gauss', ns + 1) + transp_Xs_new = clf.transform(Xs_new) + + # check that the oos method is not working + assert_equal(transp_Xs_new, Xs_new) + + # test inverse transform + transp_Xt = clf.inverse_transform(Xt=Xt) + assert_equal(transp_Xt.shape, Xt.shape) + + Xt_new, _ = get_data_classif('3gauss2', nt + 1) + transp_Xt_new = clf.inverse_transform(Xt=Xt_new) + + # check that the oos method is not working and returns the input data + assert_equal(transp_Xt_new, Xt_new) + + # test fit_transform + transp_Xs = clf.fit_transform(Xs=Xs, ys=ys, Xt=Xt) + assert_equal(transp_Xs.shape, Xs.shape) + + def test_sinkhorn_transport_class(): """test_sinkhorn_transport """ -- cgit v1.2.3 From 727077ad7db503955aea0751abf9f361f1d82af7 Mon Sep 17 00:00:00 2001 From: Slasnista Date: Fri, 4 Aug 2017 11:40:44 +0200 Subject: added new class SinkhornL1l2Transport() + dedicated test --- ot/da.py | 109 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++ test/test_da.py | 50 ++++++++++++++++++++++++++ 2 files changed, 159 insertions(+) (limited to 'test') diff --git a/ot/da.py b/ot/da.py index 3031f63..6100d15 100644 --- a/ot/da.py +++ b/ot/da.py @@ -1369,6 +1369,10 @@ class SinkhornLpl1Transport(BaseTransport): Parameters ---------- + reg_e : float, optional (default=1) + Entropic regularization parameter + reg_cl : float, optional (default=0.1) + Class regularization parameter mode : string, optional (default="unsupervised") The DA mode. If "unsupervised" no target labels are taken into account to modify the cost matrix. If "semisupervised" the target labels @@ -1384,6 +1388,11 @@ class SinkhornLpl1Transport(BaseTransport): The ground metric for the Wasserstein problem distribution : string, optional (default="uniform") The kind of distribution estimation to employ + max_iter : int, float, optional (default=10) + The minimum number of iteration before stopping the optimization + algorithm if no it has not converged + max_inner_iter : int, float, optional (default=200) + The number of iteration in the inner loop verbose : int, optional (default=0) Controls the verbosity of the optimization algorithm log : int, optional (default=0) @@ -1452,3 +1461,103 @@ class SinkhornLpl1Transport(BaseTransport): verbose=self.verbose, log=self.log) return self + + +class SinkhornL1l2Transport(BaseTransport): + """Domain Adapatation OT method based on sinkhorn algorithm + + l1l2 class regularization. + + Parameters + ---------- + reg_e : float, optional (default=1) + Entropic regularization parameter + reg_cl : float, optional (default=0.1) + Class regularization parameter + mode : string, optional (default="unsupervised") + The DA mode. If "unsupervised" no target labels are taken into account + to modify the cost matrix. If "semisupervised" the target labels + are taken into account to set coefficients of the pairwise distance + matrix to 0 for row and columns indices that correspond to source and + target samples which share the same labels. + mapping : string, optional (default="barycentric") + The kind of mapping to apply to transport samples from a domain into + another one. + if "barycentric" only the samples used to estimate the coupling can + be transported from a domain to another one. + metric : string, optional (default="sqeuclidean") + The ground metric for the Wasserstein problem + distribution : string, optional (default="uniform") + The kind of distribution estimation to employ + max_iter : int, float, optional (default=10) + The minimum number of iteration before stopping the optimization + algorithm if no it has not converged + max_inner_iter : int, float, optional (default=200) + The number of iteration in the inner loop + verbose : int, optional (default=0) + Controls the verbosity of the optimization algorithm + log : int, optional (default=0) + Controls the logs of the optimization algorithm + Attributes + ---------- + Coupling_ : the optimal coupling + + References + ---------- + + .. [1] N. Courty; R. Flamary; D. Tuia; A. Rakotomamonjy, + "Optimal Transport for Domain Adaptation," in IEEE + Transactions on Pattern Analysis and Machine Intelligence , + vol.PP, no.99, pp.1-1 + .. [2] Rakotomamonjy, A., Flamary, R., & Courty, N. (2015). + Generalized conditional gradient: analysis of convergence + and applications. arXiv preprint arXiv:1510.06567. + + """ + + def __init__(self, reg_e=1., reg_cl=0.1, mode="unsupervised", + max_iter=10, max_inner_iter=200, + tol=10e-9, verbose=False, log=False, + metric="sqeuclidean", + distribution_estimation=distribution_estimation_uniform, + out_of_sample_map='ferradans'): + + self.reg_e = reg_e + self.reg_cl = reg_cl + self.mode = mode + self.max_iter = max_iter + self.max_inner_iter = max_inner_iter + self.tol = tol + self.verbose = verbose + self.log = log + self.metric = metric + self.distribution_estimation = distribution_estimation + self.out_of_sample_map = out_of_sample_map + + def fit(self, Xs, ys=None, Xt=None, yt=None): + """Build a coupling matrix from source and target sets of samples + (Xs, ys) and (Xt, yt) + Parameters + ---------- + Xs : array-like of shape = [n_source_samples, n_features] + The training input samples. + ys : array-like, shape = [n_source_samples] + The class labels + Xt : array-like of shape = [n_target_samples, n_features] + The training input samples. + yt : array-like, shape = [n_labeled_target_samples] + The class labels + Returns + ------- + self : object + Returns self. + """ + + super(SinkhornL1l2Transport, self).fit(Xs, ys, Xt, yt) + + self.Coupling_ = sinkhorn_l1l2_gl( + a=self.mu_s, labels_a=ys, b=self.mu_t, M=self.Cost, + reg=self.reg_e, eta=self.reg_cl, numItermax=self.max_iter, + numInnerItermax=self.max_inner_iter, stopInnerThr=self.tol, + verbose=self.verbose, log=self.log) + + return self diff --git a/test/test_da.py b/test/test_da.py index 7d00cfb..68d1958 100644 --- a/test/test_da.py +++ b/test/test_da.py @@ -63,6 +63,56 @@ def test_sinkhorn_lpl1_transport_class(): assert_equal(transp_Xs.shape, Xs.shape) +def test_sinkhorn_l1l2_transport_class(): + """test_sinkhorn_transport + """ + + ns = 150 + nt = 200 + + Xs, ys = get_data_classif('3gauss', ns) + Xt, yt = get_data_classif('3gauss2', nt) + + clf = ot.da.SinkhornL1l2Transport() + + # test its computed + clf.fit(Xs=Xs, ys=ys, Xt=Xt) + + # test dimensions of coupling + assert_equal(clf.Cost.shape, ((Xs.shape[0], Xt.shape[0]))) + assert_equal(clf.Coupling_.shape, ((Xs.shape[0], Xt.shape[0]))) + + # test margin constraints + mu_s = unif(ns) + mu_t = unif(nt) + assert_allclose(np.sum(clf.Coupling_, axis=0), mu_t, rtol=1e-3, atol=1e-3) + assert_allclose(np.sum(clf.Coupling_, axis=1), mu_s, rtol=1e-3, atol=1e-3) + + # test transform + transp_Xs = clf.transform(Xs=Xs) + assert_equal(transp_Xs.shape, Xs.shape) + + Xs_new, _ = get_data_classif('3gauss', ns + 1) + transp_Xs_new = clf.transform(Xs_new) + + # check that the oos method is not working + assert_equal(transp_Xs_new, Xs_new) + + # test inverse transform + transp_Xt = clf.inverse_transform(Xt=Xt) + assert_equal(transp_Xt.shape, Xt.shape) + + Xt_new, _ = get_data_classif('3gauss2', nt + 1) + transp_Xt_new = clf.inverse_transform(Xt=Xt_new) + + # check that the oos method is not working and returns the input data + assert_equal(transp_Xt_new, Xt_new) + + # test fit_transform + transp_Xs = clf.fit_transform(Xs=Xs, ys=ys, Xt=Xt) + assert_equal(transp_Xs.shape, Xs.shape) + + def test_sinkhorn_transport_class(): """test_sinkhorn_transport """ -- cgit v1.2.3 From 0b005906f9d78adbf4d52d2ea9610eb3fde96a7c Mon Sep 17 00:00:00 2001 From: Slasnista Date: Fri, 4 Aug 2017 12:04:04 +0200 Subject: semi supervised mode supported --- ot/da.py | 21 +++++++++++++++++++-- test/test_da.py | 52 ++++++++++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 71 insertions(+), 2 deletions(-) (limited to 'test') diff --git a/ot/da.py b/ot/da.py index 6100d15..8294e8d 100644 --- a/ot/da.py +++ b/ot/da.py @@ -1089,8 +1089,25 @@ class BaseTransport(BaseEstimator): self.Cost = dist(Xs, Xt, metric=self.metric) if self.mode == "semisupervised": - print("TODO: modify cost matrix accordingly") - pass + + if (ys is not None) and (yt is not None): + + # assumes labeled source samples occupy the first rows + # and labeled target samples occupy the first columns + classes = np.unique(ys) + for c in classes: + ids = np.where(ys == c) + idt = np.where(yt == c) + + # all the coefficients corresponding to a source sample + # and a target sample with the same label gets a 0 + # transport cost + for j in idt[0]: + self.Cost[ids[0], j] = 0 + else: + print("Warning: using unsupervised mode\ + \nto use semisupervised mode, please provide ys and yt") + pass # distribution estimation self.mu_s = self.distribution_estimation(Xs) diff --git a/test/test_da.py b/test/test_da.py index 68d1958..497a8ee 100644 --- a/test/test_da.py +++ b/test/test_da.py @@ -62,6 +62,19 @@ def test_sinkhorn_lpl1_transport_class(): transp_Xs = clf.fit_transform(Xs=Xs, ys=ys, Xt=Xt) assert_equal(transp_Xs.shape, Xs.shape) + # test semi supervised mode + clf = ot.da.SinkhornTransport(mode="semisupervised") + clf.fit(Xs=Xs, Xt=Xt) + n_unsup = np.sum(clf.Cost) + + # test semi supervised mode + clf = ot.da.SinkhornTransport(mode="semisupervised") + clf.fit(Xs=Xs, ys=ys, Xt=Xt, yt=yt) + assert_equal(clf.Cost.shape, ((Xs.shape[0], Xt.shape[0]))) + n_semisup = np.sum(clf.Cost) + + assert n_unsup != n_semisup, "semisupervised mode not working" + def test_sinkhorn_l1l2_transport_class(): """test_sinkhorn_transport @@ -112,6 +125,19 @@ def test_sinkhorn_l1l2_transport_class(): transp_Xs = clf.fit_transform(Xs=Xs, ys=ys, Xt=Xt) assert_equal(transp_Xs.shape, Xs.shape) + # test semi supervised mode + clf = ot.da.SinkhornTransport(mode="semisupervised") + clf.fit(Xs=Xs, Xt=Xt) + n_unsup = np.sum(clf.Cost) + + # test semi supervised mode + clf = ot.da.SinkhornTransport(mode="semisupervised") + clf.fit(Xs=Xs, ys=ys, Xt=Xt, yt=yt) + assert_equal(clf.Cost.shape, ((Xs.shape[0], Xt.shape[0]))) + n_semisup = np.sum(clf.Cost) + + assert n_unsup != n_semisup, "semisupervised mode not working" + def test_sinkhorn_transport_class(): """test_sinkhorn_transport @@ -162,6 +188,19 @@ def test_sinkhorn_transport_class(): transp_Xs = clf.fit_transform(Xs=Xs, Xt=Xt) assert_equal(transp_Xs.shape, Xs.shape) + # test semi supervised mode + clf = ot.da.SinkhornTransport(mode="semisupervised") + clf.fit(Xs=Xs, Xt=Xt) + n_unsup = np.sum(clf.Cost) + + # test semi supervised mode + clf = ot.da.SinkhornTransport(mode="semisupervised") + clf.fit(Xs=Xs, ys=ys, Xt=Xt, yt=yt) + assert_equal(clf.Cost.shape, ((Xs.shape[0], Xt.shape[0]))) + n_semisup = np.sum(clf.Cost) + + assert n_unsup != n_semisup, "semisupervised mode not working" + def test_emd_transport_class(): """test_sinkhorn_transport @@ -212,6 +251,19 @@ def test_emd_transport_class(): transp_Xs = clf.fit_transform(Xs=Xs, Xt=Xt) assert_equal(transp_Xs.shape, Xs.shape) + # test semi supervised mode + clf = ot.da.SinkhornTransport(mode="semisupervised") + clf.fit(Xs=Xs, Xt=Xt) + n_unsup = np.sum(clf.Cost) + + # test semi supervised mode + clf = ot.da.SinkhornTransport(mode="semisupervised") + clf.fit(Xs=Xs, ys=ys, Xt=Xt, yt=yt) + assert_equal(clf.Cost.shape, ((Xs.shape[0], Xt.shape[0]))) + n_semisup = np.sum(clf.Cost) + + assert n_unsup != n_semisup, "semisupervised mode not working" + def test_otda(): -- cgit v1.2.3 From d793f1f73e6f816458d8b307762675aa9fa84d22 Mon Sep 17 00:00:00 2001 From: Slasnista Date: Fri, 4 Aug 2017 13:56:51 +0200 Subject: correction of semi supervised mode --- ot/da.py | 77 +++++++++++++++++++++++++++++++++------------------------ test/test_da.py | 20 +++++++-------- 2 files changed, 55 insertions(+), 42 deletions(-) (limited to 'test') diff --git a/ot/da.py b/ot/da.py index 8294e8d..08e8a8d 100644 --- a/ot/da.py +++ b/ot/da.py @@ -1088,26 +1088,23 @@ class BaseTransport(BaseEstimator): # pairwise distance self.Cost = dist(Xs, Xt, metric=self.metric) - if self.mode == "semisupervised": - - if (ys is not None) and (yt is not None): - - # assumes labeled source samples occupy the first rows - # and labeled target samples occupy the first columns - classes = np.unique(ys) - for c in classes: - ids = np.where(ys == c) - idt = np.where(yt == c) - - # all the coefficients corresponding to a source sample - # and a target sample with the same label gets a 0 - # transport cost - for j in idt[0]: - self.Cost[ids[0], j] = 0 - else: - print("Warning: using unsupervised mode\ - \nto use semisupervised mode, please provide ys and yt") - pass + if (ys is not None) and (yt is not None): + + if self.limit_max != np.infty: + self.limit_max = self.limit_max * np.max(self.Cost) + + # assumes labeled source samples occupy the first rows + # and labeled target samples occupy the first columns + classes = np.unique(ys) + for c in classes: + idx_s = np.where((ys != c) & (ys != -1)) + idx_t = np.where(yt == c) + + # all the coefficients corresponding to a source sample + # and a target sample : + # with different labels get a infinite + for j in idx_t[0]: + self.Cost[idx_s[0], j] = self.limit_max # distribution estimation self.mu_s = self.distribution_estimation(Xs) @@ -1243,6 +1240,9 @@ class SinkhornTransport(BaseTransport): Controls the verbosity of the optimization algorithm log : int, optional (default=0) Controls the logs of the optimization algorithm + limit_max: float, optional (defaul=np.infty) + Controls the semi supervised mode. Transport between labeled source + and target samples of different classes will exhibit an infinite cost Attributes ---------- Coupling_ : the optimal coupling @@ -1257,19 +1257,19 @@ class SinkhornTransport(BaseTransport): 26, 2013 """ - def __init__(self, reg_e=1., mode="unsupervised", max_iter=1000, + def __init__(self, reg_e=1., max_iter=1000, tol=10e-9, verbose=False, log=False, metric="sqeuclidean", distribution_estimation=distribution_estimation_uniform, - out_of_sample_map='ferradans'): + out_of_sample_map='ferradans', limit_max=np.infty): self.reg_e = reg_e - self.mode = mode self.max_iter = max_iter self.tol = tol self.verbose = verbose self.log = log self.metric = metric + self.limit_max = limit_max self.distribution_estimation = distribution_estimation self.out_of_sample_map = out_of_sample_map @@ -1326,6 +1326,10 @@ class EMDTransport(BaseTransport): Controls the verbosity of the optimization algorithm log : int, optional (default=0) Controls the logs of the optimization algorithm + limit_max: float, optional (default=10) + Controls the semi supervised mode. Transport between labeled source + and target samples of different classes will exhibit an infinite cost + (10 times the maximum value of the cost matrix) Attributes ---------- Coupling_ : the optimal coupling @@ -1337,15 +1341,15 @@ class EMDTransport(BaseTransport): on Pattern Analysis and Machine Intelligence , vol.PP, no.99, pp.1-1 """ - def __init__(self, mode="unsupervised", verbose=False, + def __init__(self, verbose=False, log=False, metric="sqeuclidean", distribution_estimation=distribution_estimation_uniform, - out_of_sample_map='ferradans'): + out_of_sample_map='ferradans', limit_max=10): - self.mode = mode self.verbose = verbose self.log = log self.metric = metric + self.limit_max = limit_max self.distribution_estimation = distribution_estimation self.out_of_sample_map = out_of_sample_map @@ -1414,6 +1418,10 @@ class SinkhornLpl1Transport(BaseTransport): Controls the verbosity of the optimization algorithm log : int, optional (default=0) Controls the logs of the optimization algorithm + limit_max: float, optional (defaul=np.infty) + Controls the semi supervised mode. Transport between labeled source + and target samples of different classes will exhibit an infinite cost + Attributes ---------- Coupling_ : the optimal coupling @@ -1431,16 +1439,15 @@ class SinkhornLpl1Transport(BaseTransport): """ - def __init__(self, reg_e=1., reg_cl=0.1, mode="unsupervised", + def __init__(self, reg_e=1., reg_cl=0.1, max_iter=10, max_inner_iter=200, tol=10e-9, verbose=False, log=False, metric="sqeuclidean", distribution_estimation=distribution_estimation_uniform, - out_of_sample_map='ferradans'): + out_of_sample_map='ferradans', limit_max=np.infty): self.reg_e = reg_e self.reg_cl = reg_cl - self.mode = mode self.max_iter = max_iter self.max_inner_iter = max_inner_iter self.tol = tol @@ -1449,6 +1456,7 @@ class SinkhornLpl1Transport(BaseTransport): self.metric = metric self.distribution_estimation = distribution_estimation self.out_of_sample_map = out_of_sample_map + self.limit_max = limit_max def fit(self, Xs, ys=None, Xt=None, yt=None): """Build a coupling matrix from source and target sets of samples @@ -1514,6 +1522,11 @@ class SinkhornL1l2Transport(BaseTransport): Controls the verbosity of the optimization algorithm log : int, optional (default=0) Controls the logs of the optimization algorithm + limit_max: float, optional (default=10) + Controls the semi supervised mode. Transport between labeled source + and target samples of different classes will exhibit an infinite cost + (10 times the maximum value of the cost matrix) + Attributes ---------- Coupling_ : the optimal coupling @@ -1531,16 +1544,15 @@ class SinkhornL1l2Transport(BaseTransport): """ - def __init__(self, reg_e=1., reg_cl=0.1, mode="unsupervised", + def __init__(self, reg_e=1., reg_cl=0.1, max_iter=10, max_inner_iter=200, tol=10e-9, verbose=False, log=False, metric="sqeuclidean", distribution_estimation=distribution_estimation_uniform, - out_of_sample_map='ferradans'): + out_of_sample_map='ferradans', limit_max=10): self.reg_e = reg_e self.reg_cl = reg_cl - self.mode = mode self.max_iter = max_iter self.max_inner_iter = max_inner_iter self.tol = tol @@ -1549,6 +1561,7 @@ class SinkhornL1l2Transport(BaseTransport): self.metric = metric self.distribution_estimation = distribution_estimation self.out_of_sample_map = out_of_sample_map + self.limit_max = limit_max def fit(self, Xs, ys=None, Xt=None, yt=None): """Build a coupling matrix from source and target sets of samples diff --git a/test/test_da.py b/test/test_da.py index 497a8ee..ecd2a3a 100644 --- a/test/test_da.py +++ b/test/test_da.py @@ -63,12 +63,12 @@ def test_sinkhorn_lpl1_transport_class(): assert_equal(transp_Xs.shape, Xs.shape) # test semi supervised mode - clf = ot.da.SinkhornTransport(mode="semisupervised") - clf.fit(Xs=Xs, Xt=Xt) + clf = ot.da.SinkhornLpl1Transport() + clf.fit(Xs=Xs, ys=ys, Xt=Xt) n_unsup = np.sum(clf.Cost) # test semi supervised mode - clf = ot.da.SinkhornTransport(mode="semisupervised") + clf = ot.da.SinkhornLpl1Transport() clf.fit(Xs=Xs, ys=ys, Xt=Xt, yt=yt) assert_equal(clf.Cost.shape, ((Xs.shape[0], Xt.shape[0]))) n_semisup = np.sum(clf.Cost) @@ -126,12 +126,12 @@ def test_sinkhorn_l1l2_transport_class(): assert_equal(transp_Xs.shape, Xs.shape) # test semi supervised mode - clf = ot.da.SinkhornTransport(mode="semisupervised") - clf.fit(Xs=Xs, Xt=Xt) + clf = ot.da.SinkhornL1l2Transport() + clf.fit(Xs=Xs, ys=ys, Xt=Xt) n_unsup = np.sum(clf.Cost) # test semi supervised mode - clf = ot.da.SinkhornTransport(mode="semisupervised") + clf = ot.da.SinkhornL1l2Transport() clf.fit(Xs=Xs, ys=ys, Xt=Xt, yt=yt) assert_equal(clf.Cost.shape, ((Xs.shape[0], Xt.shape[0]))) n_semisup = np.sum(clf.Cost) @@ -189,12 +189,12 @@ def test_sinkhorn_transport_class(): assert_equal(transp_Xs.shape, Xs.shape) # test semi supervised mode - clf = ot.da.SinkhornTransport(mode="semisupervised") + clf = ot.da.SinkhornTransport() clf.fit(Xs=Xs, Xt=Xt) n_unsup = np.sum(clf.Cost) # test semi supervised mode - clf = ot.da.SinkhornTransport(mode="semisupervised") + clf = ot.da.SinkhornTransport() clf.fit(Xs=Xs, ys=ys, Xt=Xt, yt=yt) assert_equal(clf.Cost.shape, ((Xs.shape[0], Xt.shape[0]))) n_semisup = np.sum(clf.Cost) @@ -252,12 +252,12 @@ def test_emd_transport_class(): assert_equal(transp_Xs.shape, Xs.shape) # test semi supervised mode - clf = ot.da.SinkhornTransport(mode="semisupervised") + clf = ot.da.EMDTransport() clf.fit(Xs=Xs, Xt=Xt) n_unsup = np.sum(clf.Cost) # test semi supervised mode - clf = ot.da.SinkhornTransport(mode="semisupervised") + clf = ot.da.EMDTransport() clf.fit(Xs=Xs, ys=ys, Xt=Xt, yt=yt) assert_equal(clf.Cost.shape, ((Xs.shape[0], Xt.shape[0]))) n_semisup = np.sum(clf.Cost) -- cgit v1.2.3 From 738bfb1c560ff4e349f5083fc1f81a54e4be4980 Mon Sep 17 00:00:00 2001 From: Slasnista Date: Fri, 4 Aug 2017 14:55:54 +0200 Subject: out of samples by Ferradans supported for transform and inverse_transform --- ot/da.py | 29 +++++++++++++++++++++++------ test/test_da.py | 32 ++++++++++++++++---------------- 2 files changed, 39 insertions(+), 22 deletions(-) (limited to 'test') diff --git a/ot/da.py b/ot/da.py index 92a8f12..87d056d 100644 --- a/ot/da.py +++ b/ot/da.py @@ -1167,9 +1167,18 @@ class BaseTransport(BaseEstimator): transp_Xs = np.dot(transp, self.Xt) else: # perform out of sample mapping - print("Warning: out of sample mapping not yet implemented") - print("input data will be returned") - transp_Xs = Xs + + # get the nearest neighbor in the source domain + D0 = dist(Xs, self.Xs) + idx = np.argmin(D0, axis=1) + + # transport the source samples + transp = self.Coupling_ / np.sum(self.Coupling_, 1)[:, None] + transp[~ np.isfinite(transp)] = 0 + transp_Xs_ = np.dot(transp, self.Xt) + + # define the transported points + transp_Xs = transp_Xs_[idx, :] + Xs - self.Xs[idx, :] return transp_Xs @@ -1202,9 +1211,17 @@ class BaseTransport(BaseEstimator): transp_Xt = np.dot(transp_, self.Xs) else: # perform out of sample mapping - print("Warning: out of sample mapping not yet implemented") - print("input data will be returned") - transp_Xt = Xt + + D0 = dist(Xt, self.Xt) + idx = np.argmin(D0, axis=1) + + # transport the target samples + transp_ = self.Coupling_.T / np.sum(self.Coupling_, 0)[:, None] + transp_[~ np.isfinite(transp_)] = 0 + transp_Xt_ = np.dot(transp_, self.Xs) + + # define the transported points + transp_Xt = transp_Xt_[idx, :] + Xt - self.Xt[idx, :] return transp_Xt diff --git a/test/test_da.py b/test/test_da.py index ecd2a3a..aed9f61 100644 --- a/test/test_da.py +++ b/test/test_da.py @@ -45,8 +45,8 @@ def test_sinkhorn_lpl1_transport_class(): Xs_new, _ = get_data_classif('3gauss', ns + 1) transp_Xs_new = clf.transform(Xs_new) - # check that the oos method is not working - assert_equal(transp_Xs_new, Xs_new) + # check that the oos method is working + assert_equal(transp_Xs_new.shape, Xs_new.shape) # test inverse transform transp_Xt = clf.inverse_transform(Xt=Xt) @@ -55,8 +55,8 @@ def test_sinkhorn_lpl1_transport_class(): Xt_new, _ = get_data_classif('3gauss2', nt + 1) transp_Xt_new = clf.inverse_transform(Xt=Xt_new) - # check that the oos method is not working and returns the input data - assert_equal(transp_Xt_new, Xt_new) + # check that the oos method is working + assert_equal(transp_Xt_new.shape, Xt_new.shape) # test fit_transform transp_Xs = clf.fit_transform(Xs=Xs, ys=ys, Xt=Xt) @@ -108,8 +108,8 @@ def test_sinkhorn_l1l2_transport_class(): Xs_new, _ = get_data_classif('3gauss', ns + 1) transp_Xs_new = clf.transform(Xs_new) - # check that the oos method is not working - assert_equal(transp_Xs_new, Xs_new) + # check that the oos method is working + assert_equal(transp_Xs_new.shape, Xs_new.shape) # test inverse transform transp_Xt = clf.inverse_transform(Xt=Xt) @@ -118,8 +118,8 @@ def test_sinkhorn_l1l2_transport_class(): Xt_new, _ = get_data_classif('3gauss2', nt + 1) transp_Xt_new = clf.inverse_transform(Xt=Xt_new) - # check that the oos method is not working and returns the input data - assert_equal(transp_Xt_new, Xt_new) + # check that the oos method is working + assert_equal(transp_Xt_new.shape, Xt_new.shape) # test fit_transform transp_Xs = clf.fit_transform(Xs=Xs, ys=ys, Xt=Xt) @@ -171,8 +171,8 @@ def test_sinkhorn_transport_class(): Xs_new, _ = get_data_classif('3gauss', ns + 1) transp_Xs_new = clf.transform(Xs_new) - # check that the oos method is not working - assert_equal(transp_Xs_new, Xs_new) + # check that the oos method is working + assert_equal(transp_Xs_new.shape, Xs_new.shape) # test inverse transform transp_Xt = clf.inverse_transform(Xt=Xt) @@ -181,8 +181,8 @@ def test_sinkhorn_transport_class(): Xt_new, _ = get_data_classif('3gauss2', nt + 1) transp_Xt_new = clf.inverse_transform(Xt=Xt_new) - # check that the oos method is not working and returns the input data - assert_equal(transp_Xt_new, Xt_new) + # check that the oos method is working + assert_equal(transp_Xt_new.shape, Xt_new.shape) # test fit_transform transp_Xs = clf.fit_transform(Xs=Xs, Xt=Xt) @@ -234,8 +234,8 @@ def test_emd_transport_class(): Xs_new, _ = get_data_classif('3gauss', ns + 1) transp_Xs_new = clf.transform(Xs_new) - # check that the oos method is not working - assert_equal(transp_Xs_new, Xs_new) + # check that the oos method is working + assert_equal(transp_Xs_new.shape, Xs_new.shape) # test inverse transform transp_Xt = clf.inverse_transform(Xt=Xt) @@ -244,8 +244,8 @@ def test_emd_transport_class(): Xt_new, _ = get_data_classif('3gauss2', nt + 1) transp_Xt_new = clf.inverse_transform(Xt=Xt_new) - # check that the oos method is not working and returns the input data - assert_equal(transp_Xt_new, Xt_new) + # check that the oos method is working + assert_equal(transp_Xt_new.shape, Xt_new.shape) # test fit_transform transp_Xs = clf.fit_transform(Xs=Xs, Xt=Xt) -- cgit v1.2.3 From 8149e059be7f715834d11b365855f2684bd3d6f5 Mon Sep 17 00:00:00 2001 From: Slasnista Date: Wed, 23 Aug 2017 11:45:06 +0200 Subject: make doc strings compliant with numpy / modif according to AG review --- ot/da.py | 139 +++++++++++++++++++++++++++++++++----------------------- test/test_da.py | 13 ++++-- 2 files changed, 93 insertions(+), 59 deletions(-) (limited to 'test') diff --git a/ot/da.py b/ot/da.py index 0616d17..044d567 100644 --- a/ot/da.py +++ b/ot/da.py @@ -967,11 +967,13 @@ class BaseEstimator(object): def get_params(self, deep=True): """Get parameters for this estimator. + Parameters ---------- deep : boolean, optional If True, will return the parameters for this estimator and contained subobjects that are estimators. + Returns ------- params : mapping of string to any @@ -1002,10 +1004,12 @@ class BaseEstimator(object): def set_params(self, **params): """Set the parameters of this estimator. + The method works on simple estimators as well as on nested objects (such as pipelines). The latter have parameters of the form ``__`` so that it's possible to update each component of a nested object. + Returns ------- self @@ -1053,11 +1057,12 @@ def distribution_estimation_uniform(X): Parameters ---------- - X : array-like of shape = (n_samples, n_features) + X : array-like, shape (n_samples, n_features) The array of samples + Returns ------- - mu : array-like, shape = (n_samples,) + mu : array-like, shape (n_samples,) The uniform distribution estimated from X """ @@ -1069,16 +1074,18 @@ class BaseTransport(BaseEstimator): def fit(self, Xs=None, ys=None, Xt=None, yt=None): """Build a coupling matrix from source and target sets of samples (Xs, ys) and (Xt, yt) + Parameters ---------- - Xs : array-like of shape = (n_source_samples, n_features) + Xs : array-like, shape (n_source_samples, n_features) The training input samples. - ys : array-like, shape = (n_source_samples,) + ys : array-like, shape (n_source_samples,) The class labels - Xt : array-like of shape = (n_target_samples, n_features) + Xt : array-like, shape (n_target_samples, n_features) The training input samples. - yt : array-like, shape = (n_labeled_target_samples,) + yt : array-like, shape (n_labeled_target_samples,) The class labels + Returns ------- self : object @@ -1086,12 +1093,12 @@ class BaseTransport(BaseEstimator): """ # pairwise distance - self.Cost = dist(Xs, Xt, metric=self.metric) + self.cost_ = dist(Xs, Xt, metric=self.metric) if (ys is not None) and (yt is not None): if self.limit_max != np.infty: - self.limit_max = self.limit_max * np.max(self.Cost) + self.limit_max = self.limit_max * np.max(self.cost_) # assumes labeled source samples occupy the first rows # and labeled target samples occupy the first columns @@ -1104,7 +1111,7 @@ class BaseTransport(BaseEstimator): # and a target sample : # with different labels get a infinite for j in idx_t[0]: - self.Cost[idx_s[0], j] = self.limit_max + self.cost_[idx_s[0], j] = self.limit_max # distribution estimation self.mu_s = self.distribution_estimation(Xs) @@ -1120,19 +1127,21 @@ class BaseTransport(BaseEstimator): """Build a coupling matrix from source and target sets of samples (Xs, ys) and (Xt, yt) and transports source samples Xs onto target ones Xt + Parameters ---------- - Xs : array-like of shape = (n_source_samples, n_features) + Xs : array-like, shape (n_source_samples, n_features) The training input samples. - ys : array-like, shape = (n_source_samples,) + ys : array-like, shape (n_source_samples,) The class labels - Xt : array-like of shape = (n_target_samples, n_features) + Xt : array-like, shape (n_target_samples, n_features) The training input samples. - yt : array-like, shape = (n_labeled_target_samples,) + yt : array-like, shape (n_labeled_target_samples,) The class labels + Returns ------- - transp_Xs : array-like of shape = (n_source_samples, n_features) + transp_Xs : array-like, shape (n_source_samples, n_features) The source samples samples. """ @@ -1140,25 +1149,27 @@ class BaseTransport(BaseEstimator): def transform(self, Xs=None, ys=None, Xt=None, yt=None): """Transports source samples Xs onto target ones Xt + Parameters ---------- - Xs : array-like of shape = (n_source_samples, n_features) + Xs : array-like, shape (n_source_samples, n_features) The training input samples. - ys : array-like, shape = (n_source_samples,) + ys : array-like, shape (n_source_samples,) The class labels - Xt : array-like of shape = (n_target_samples, n_features) + Xt : array-like, shape (n_target_samples, n_features) The training input samples. - yt : array-like, shape = (n_labeled_target_samples,) + yt : array-like, shape (n_labeled_target_samples,) The class labels + Returns ------- - transp_Xs : array-like of shape = (n_source_samples, n_features) + transp_Xs : array-like, shape (n_source_samples, n_features) The transport source samples. """ if np.array_equal(self.Xs, Xs): # perform standard barycentric mapping - transp = self.Coupling_ / np.sum(self.Coupling_, 1)[:, None] + transp = self.coupling_ / np.sum(self.coupling_, 1)[:, None] # set nans to 0 transp[~ np.isfinite(transp)] = 0 @@ -1173,7 +1184,7 @@ class BaseTransport(BaseEstimator): idx = np.argmin(D0, axis=1) # transport the source samples - transp = self.Coupling_ / np.sum(self.Coupling_, 1)[:, None] + transp = self.coupling_ / np.sum(self.coupling_, 1)[:, None] transp[~ np.isfinite(transp)] = 0 transp_Xs_ = np.dot(transp, self.Xt) @@ -1184,25 +1195,27 @@ class BaseTransport(BaseEstimator): def inverse_transform(self, Xs=None, ys=None, Xt=None, yt=None): """Transports target samples Xt onto target samples Xs + Parameters ---------- - Xs : array-like of shape = (n_source_samples, n_features) + Xs : array-like, shape (n_source_samples, n_features) The training input samples. ys : array-like, shape = (n_source_samples,) The class labels - Xt : array-like of shape = (n_target_samples, n_features) + Xt : array-like, shape (n_target_samples, n_features) The training input samples. yt : array-like, shape = (n_labeled_target_samples,) The class labels + Returns ------- - transp_Xt : array-like of shape = (n_source_samples, n_features) + transp_Xt : array-like, shape (n_source_samples, n_features) The transported target samples. """ if np.array_equal(self.Xt, Xt): # perform standard barycentric mapping - transp_ = self.Coupling_.T / np.sum(self.Coupling_, 0)[:, None] + transp_ = self.coupling_.T / np.sum(self.coupling_, 0)[:, None] # set nans to 0 transp_[~ np.isfinite(transp_)] = 0 @@ -1216,7 +1229,7 @@ class BaseTransport(BaseEstimator): idx = np.argmin(D0, axis=1) # transport the target samples - transp_ = self.Coupling_.T / np.sum(self.Coupling_, 0)[:, None] + transp_ = self.coupling_.T / np.sum(self.coupling_, 0)[:, None] transp_[~ np.isfinite(transp_)] = 0 transp_Xt_ = np.dot(transp_, self.Xs) @@ -1254,9 +1267,10 @@ class SinkhornTransport(BaseTransport): limit_max: float, optional (defaul=np.infty) Controls the semi supervised mode. Transport between labeled source and target samples of different classes will exhibit an infinite cost + Attributes ---------- - Coupling_ : the optimal coupling + coupling_ : the optimal coupling References ---------- @@ -1287,16 +1301,18 @@ class SinkhornTransport(BaseTransport): def fit(self, Xs=None, ys=None, Xt=None, yt=None): """Build a coupling matrix from source and target sets of samples (Xs, ys) and (Xt, yt) + Parameters ---------- - Xs : array-like of shape = (n_source_samples, n_features) + Xs : array-like, shape (n_source_samples, n_features) The training input samples. ys : array-like, shape = (n_source_samples,) The class labels - Xt : array-like of shape = (n_target_samples, n_features) + Xt : array-like, shape (n_target_samples, n_features) The training input samples. yt : array-like, shape = (n_labeled_target_samples,) The class labels + Returns ------- self : object @@ -1306,8 +1322,8 @@ class SinkhornTransport(BaseTransport): super(SinkhornTransport, self).fit(Xs, ys, Xt, yt) # coupling estimation - self.Coupling_ = sinkhorn( - a=self.mu_s, b=self.mu_t, M=self.Cost, reg=self.reg_e, + self.coupling_ = sinkhorn( + a=self.mu_s, b=self.mu_t, M=self.cost_, reg=self.reg_e, numItermax=self.max_iter, stopThr=self.tol, verbose=self.verbose, log=self.log) @@ -1316,6 +1332,7 @@ class SinkhornTransport(BaseTransport): class EMDTransport(BaseTransport): """Domain Adapatation OT method based on Earth Mover's Distance + Parameters ---------- mapping : string, optional (default="barycentric") @@ -1335,9 +1352,10 @@ class EMDTransport(BaseTransport): Controls the semi supervised mode. Transport between labeled source and target samples of different classes will exhibit an infinite cost (10 times the maximum value of the cost matrix) + Attributes ---------- - Coupling_ : the optimal coupling + coupling_ : the optimal coupling References ---------- @@ -1358,16 +1376,18 @@ class EMDTransport(BaseTransport): def fit(self, Xs, ys=None, Xt=None, yt=None): """Build a coupling matrix from source and target sets of samples (Xs, ys) and (Xt, yt) + Parameters ---------- - Xs : array-like of shape = (n_source_samples, n_features) + Xs : array-like, shape (n_source_samples, n_features) The training input samples. ys : array-like, shape = (n_source_samples,) The class labels - Xt : array-like of shape = (n_target_samples, n_features) + Xt : array-like, shape (n_target_samples, n_features) The training input samples. yt : array-like, shape = (n_labeled_target_samples,) The class labels + Returns ------- self : object @@ -1377,8 +1397,8 @@ class EMDTransport(BaseTransport): super(EMDTransport, self).fit(Xs, ys, Xt, yt) # coupling estimation - self.Coupling_ = emd( - a=self.mu_s, b=self.mu_t, M=self.Cost, + self.coupling_ = emd( + a=self.mu_s, b=self.mu_t, M=self.cost_, ) return self @@ -1418,7 +1438,7 @@ class SinkhornLpl1Transport(BaseTransport): Attributes ---------- - Coupling_ : the optimal coupling + coupling_ : the optimal coupling References ---------- @@ -1455,16 +1475,18 @@ class SinkhornLpl1Transport(BaseTransport): def fit(self, Xs, ys=None, Xt=None, yt=None): """Build a coupling matrix from source and target sets of samples (Xs, ys) and (Xt, yt) + Parameters ---------- - Xs : array-like of shape = (n_source_samples, n_features) + Xs : array-like, shape (n_source_samples, n_features) The training input samples. ys : array-like, shape = (n_source_samples,) The class labels - Xt : array-like of shape = (n_target_samples, n_features) + Xt : array-like, shape (n_target_samples, n_features) The training input samples. yt : array-like, shape = (n_labeled_target_samples,) The class labels + Returns ------- self : object @@ -1473,8 +1495,8 @@ class SinkhornLpl1Transport(BaseTransport): super(SinkhornLpl1Transport, self).fit(Xs, ys, Xt, yt) - self.Coupling_ = sinkhorn_lpl1_mm( - a=self.mu_s, labels_a=ys, b=self.mu_t, M=self.Cost, + self.coupling_ = sinkhorn_lpl1_mm( + a=self.mu_s, labels_a=ys, b=self.mu_t, M=self.cost_, reg=self.reg_e, eta=self.reg_cl, numItermax=self.max_iter, numInnerItermax=self.max_inner_iter, stopInnerThr=self.tol, verbose=self.verbose, log=self.log) @@ -1517,7 +1539,7 @@ class SinkhornL1l2Transport(BaseTransport): Attributes ---------- - Coupling_ : the optimal coupling + coupling_ : the optimal coupling References ---------- @@ -1554,16 +1576,18 @@ class SinkhornL1l2Transport(BaseTransport): def fit(self, Xs, ys=None, Xt=None, yt=None): """Build a coupling matrix from source and target sets of samples (Xs, ys) and (Xt, yt) + Parameters ---------- - Xs : array-like of shape = (n_source_samples, n_features) + Xs : array-like, shape (n_source_samples, n_features) The training input samples. ys : array-like, shape = (n_source_samples,) The class labels - Xt : array-like of shape = (n_target_samples, n_features) + Xt : array-like, shape (n_target_samples, n_features) The training input samples. yt : array-like, shape = (n_labeled_target_samples,) The class labels + Returns ------- self : object @@ -1572,8 +1596,8 @@ class SinkhornL1l2Transport(BaseTransport): super(SinkhornL1l2Transport, self).fit(Xs, ys, Xt, yt) - self.Coupling_ = sinkhorn_l1l2_gl( - a=self.mu_s, labels_a=ys, b=self.mu_t, M=self.Cost, + self.coupling_ = sinkhorn_l1l2_gl( + a=self.mu_s, labels_a=ys, b=self.mu_t, M=self.cost_, reg=self.reg_e, eta=self.reg_cl, numItermax=self.max_iter, numInnerItermax=self.max_inner_iter, stopInnerThr=self.tol, verbose=self.verbose, log=self.log) @@ -1614,8 +1638,8 @@ class MappingTransport(BaseEstimator): Attributes ---------- - Coupling_ : the optimal coupling - Mapping_ : the mapping associated + coupling_ : the optimal coupling + mapping_ : the mapping associated References ---------- @@ -1646,16 +1670,18 @@ class MappingTransport(BaseEstimator): def fit(self, Xs=None, ys=None, Xt=None, yt=None): """Builds an optimal coupling and estimates the associated mapping from source and target sets of samples (Xs, ys) and (Xt, yt) + Parameters ---------- - Xs : array-like of shape = (n_source_samples, n_features) + Xs : array-like, shape (n_source_samples, n_features) The training input samples. ys : array-like, shape = (n_source_samples,) The class labels - Xt : array-like of shape = (n_target_samples, n_features) + Xt : array-like, shape (n_target_samples, n_features) The training input samples. yt : array-like, shape = (n_labeled_target_samples,) The class labels + Returns ------- self : object @@ -1666,14 +1692,14 @@ class MappingTransport(BaseEstimator): self.Xt = Xt if self.kernel == "linear": - self.Coupling_, self.Mapping_ = joint_OT_mapping_linear( + self.coupling_, self.mapping_ = joint_OT_mapping_linear( Xs, Xt, mu=self.mu, eta=self.eta, bias=self.bias, verbose=self.verbose, verbose2=self.verbose2, numItermax=self.max_iter, numInnerItermax=self.max_inner_iter, stopThr=self.tol, stopInnerThr=self.inner_tol, log=self.log) elif self.kernel == "gaussian": - self.Coupling_, self.Mapping_ = joint_OT_mapping_kernel( + self.coupling_, self.mapping_ = joint_OT_mapping_kernel( Xs, Xt, mu=self.mu, eta=self.eta, bias=self.bias, sigma=self.sigma, verbose=self.verbose, verbose2=self.verbose, numItermax=self.max_iter, numInnerItermax=self.max_inner_iter, @@ -1683,20 +1709,21 @@ class MappingTransport(BaseEstimator): def transform(self, Xs): """Transports source samples Xs onto target ones Xt + Parameters ---------- - Xs : array-like of shape = (n_source_samples, n_features) + Xs : array-like, shape (n_source_samples, n_features) The training input samples. Returns ------- - transp_Xs : array-like of shape = (n_source_samples, n_features) + transp_Xs : array-like, shape (n_source_samples, n_features) The transport source samples. """ if np.array_equal(self.Xs, Xs): # perform standard barycentric mapping - transp = self.Coupling_ / np.sum(self.Coupling_, 1)[:, None] + transp = self.coupling_ / np.sum(self.coupling_, 1)[:, None] # set nans to 0 transp[~ np.isfinite(transp)] = 0 @@ -1710,6 +1737,6 @@ class MappingTransport(BaseEstimator): K = Xs if self.bias: K = np.hstack((K, np.ones((Xs.shape[0], 1)))) - transp_Xs = K.dot(self.Mapping_) + transp_Xs = K.dot(self.mapping_) return transp_Xs diff --git a/test/test_da.py b/test/test_da.py index aed9f61..93f7e83 100644 --- a/test/test_da.py +++ b/test/test_da.py @@ -5,13 +5,12 @@ # License: MIT License import numpy as np -import ot from numpy.testing.utils import assert_allclose, assert_equal + +import ot from ot.datasets import get_data_classif from ot.utils import unif -np.random.seed(42) - def test_sinkhorn_lpl1_transport_class(): """test_sinkhorn_transport @@ -325,3 +324,11 @@ def test_otda(): da_emd = ot.da.OTDA_mapping_kernel() # init class da_emd.fit(xs, xt, numItermax=10) # fit distributions da_emd.predict(xs) # interpolation of source samples + + +if __name__ == "__main__": + + test_sinkhorn_transport_class() + test_emd_transport_class() + test_sinkhorn_l1l2_transport_class() + test_sinkhorn_lpl1_transport_class() -- cgit v1.2.3 From 791a4a6f215033a75d5f56cd16fe2412301bec14 Mon Sep 17 00:00:00 2001 From: Slasnista Date: Wed, 23 Aug 2017 13:50:24 +0200 Subject: out of samples transform and inverse transform by batch --- ot/da.py | 89 +++++++++++++++++++++++++++++++++++++-------------------- test/test_da.py | 66 +++++++++++++++++++++--------------------- 2 files changed, 91 insertions(+), 64 deletions(-) (limited to 'test') diff --git a/ot/da.py b/ot/da.py index 044d567..0c83ae6 100644 --- a/ot/da.py +++ b/ot/da.py @@ -1147,7 +1147,7 @@ class BaseTransport(BaseEstimator): return self.fit(Xs, ys, Xt, yt).transform(Xs, ys, Xt, yt) - def transform(self, Xs=None, ys=None, Xt=None, yt=None): + def transform(self, Xs=None, ys=None, Xt=None, yt=None, batch_size=128): """Transports source samples Xs onto target ones Xt Parameters @@ -1160,6 +1160,8 @@ class BaseTransport(BaseEstimator): The training input samples. yt : array-like, shape (n_labeled_target_samples,) The class labels + batch_size : int, optional (default=128) + The batch size for out of sample inverse transform Returns ------- @@ -1178,34 +1180,48 @@ class BaseTransport(BaseEstimator): transp_Xs = np.dot(transp, self.Xt) else: # perform out of sample mapping + indices = np.arange(Xs.shape[0]) + batch_ind = [ + indices[i:i + batch_size] + for i in range(0, len(indices), batch_size)] - # get the nearest neighbor in the source domain - D0 = dist(Xs, self.Xs) - idx = np.argmin(D0, axis=1) + transp_Xs = [] + for bi in batch_ind: - # transport the source samples - transp = self.coupling_ / np.sum(self.coupling_, 1)[:, None] - transp[~ np.isfinite(transp)] = 0 - transp_Xs_ = np.dot(transp, self.Xt) + # get the nearest neighbor in the source domain + D0 = dist(Xs[bi], self.Xs) + idx = np.argmin(D0, axis=1) + + # transport the source samples + transp = self.coupling_ / np.sum(self.coupling_, 1)[:, None] + transp[~ np.isfinite(transp)] = 0 + transp_Xs_ = np.dot(transp, self.Xt) - # define the transported points - transp_Xs = transp_Xs_[idx, :] + Xs - self.Xs[idx, :] + # define the transported points + transp_Xs_ = transp_Xs_[idx, :] + Xs[bi] - self.Xs[idx, :] + + transp_Xs.append(transp_Xs_) + + transp_Xs = np.concatenate(transp_Xs, axis=0) return transp_Xs - def inverse_transform(self, Xs=None, ys=None, Xt=None, yt=None): + def inverse_transform(self, Xs=None, ys=None, Xt=None, yt=None, + batch_size=128): """Transports target samples Xt onto target samples Xs Parameters ---------- Xs : array-like, shape (n_source_samples, n_features) The training input samples. - ys : array-like, shape = (n_source_samples,) + ys : array-like, shape (n_source_samples,) The class labels Xt : array-like, shape (n_target_samples, n_features) The training input samples. - yt : array-like, shape = (n_labeled_target_samples,) + yt : array-like, shape (n_labeled_target_samples,) The class labels + batch_size : int, optional (default=128) + The batch size for out of sample inverse transform Returns ------- @@ -1224,17 +1240,28 @@ class BaseTransport(BaseEstimator): transp_Xt = np.dot(transp_, self.Xs) else: # perform out of sample mapping + indices = np.arange(Xt.shape[0]) + batch_ind = [ + indices[i:i + batch_size] + for i in range(0, len(indices), batch_size)] - D0 = dist(Xt, self.Xt) - idx = np.argmin(D0, axis=1) + transp_Xt = [] + for bi in batch_ind: - # transport the target samples - transp_ = self.coupling_.T / np.sum(self.coupling_, 0)[:, None] - transp_[~ np.isfinite(transp_)] = 0 - transp_Xt_ = np.dot(transp_, self.Xs) + D0 = dist(Xt[bi], self.Xt) + idx = np.argmin(D0, axis=1) + + # transport the target samples + transp_ = self.coupling_.T / np.sum(self.coupling_, 0)[:, None] + transp_[~ np.isfinite(transp_)] = 0 + transp_Xt_ = np.dot(transp_, self.Xs) + + # define the transported points + transp_Xt_ = transp_Xt_[idx, :] + Xt[bi] - self.Xt[idx, :] - # define the transported points - transp_Xt = transp_Xt_[idx, :] + Xt - self.Xt[idx, :] + transp_Xt.append(transp_Xt_) + + transp_Xt = np.concatenate(transp_Xt, axis=0) return transp_Xt @@ -1306,11 +1333,11 @@ class SinkhornTransport(BaseTransport): ---------- Xs : array-like, shape (n_source_samples, n_features) The training input samples. - ys : array-like, shape = (n_source_samples,) + ys : array-like, shape (n_source_samples,) The class labels Xt : array-like, shape (n_target_samples, n_features) The training input samples. - yt : array-like, shape = (n_labeled_target_samples,) + yt : array-like, shape (n_labeled_target_samples,) The class labels Returns @@ -1381,11 +1408,11 @@ class EMDTransport(BaseTransport): ---------- Xs : array-like, shape (n_source_samples, n_features) The training input samples. - ys : array-like, shape = (n_source_samples,) + ys : array-like, shape (n_source_samples,) The class labels Xt : array-like, shape (n_target_samples, n_features) The training input samples. - yt : array-like, shape = (n_labeled_target_samples,) + yt : array-like, shape (n_labeled_target_samples,) The class labels Returns @@ -1480,11 +1507,11 @@ class SinkhornLpl1Transport(BaseTransport): ---------- Xs : array-like, shape (n_source_samples, n_features) The training input samples. - ys : array-like, shape = (n_source_samples,) + ys : array-like, shape (n_source_samples,) The class labels Xt : array-like, shape (n_target_samples, n_features) The training input samples. - yt : array-like, shape = (n_labeled_target_samples,) + yt : array-like, shape (n_labeled_target_samples,) The class labels Returns @@ -1581,11 +1608,11 @@ class SinkhornL1l2Transport(BaseTransport): ---------- Xs : array-like, shape (n_source_samples, n_features) The training input samples. - ys : array-like, shape = (n_source_samples,) + ys : array-like, shape (n_source_samples,) The class labels Xt : array-like, shape (n_target_samples, n_features) The training input samples. - yt : array-like, shape = (n_labeled_target_samples,) + yt : array-like, shape (n_labeled_target_samples,) The class labels Returns @@ -1675,11 +1702,11 @@ class MappingTransport(BaseEstimator): ---------- Xs : array-like, shape (n_source_samples, n_features) The training input samples. - ys : array-like, shape = (n_source_samples,) + ys : array-like, shape (n_source_samples,) The class labels Xt : array-like, shape (n_target_samples, n_features) The training input samples. - yt : array-like, shape = (n_labeled_target_samples,) + yt : array-like, shape (n_labeled_target_samples,) The class labels Returns diff --git a/test/test_da.py b/test/test_da.py index 93f7e83..196f4c4 100644 --- a/test/test_da.py +++ b/test/test_da.py @@ -28,14 +28,14 @@ def test_sinkhorn_lpl1_transport_class(): clf.fit(Xs=Xs, ys=ys, Xt=Xt) # test dimensions of coupling - assert_equal(clf.Cost.shape, ((Xs.shape[0], Xt.shape[0]))) - assert_equal(clf.Coupling_.shape, ((Xs.shape[0], Xt.shape[0]))) + assert_equal(clf.cost_.shape, ((Xs.shape[0], Xt.shape[0]))) + assert_equal(clf.coupling_.shape, ((Xs.shape[0], Xt.shape[0]))) # test margin constraints mu_s = unif(ns) mu_t = unif(nt) - assert_allclose(np.sum(clf.Coupling_, axis=0), mu_t, rtol=1e-3, atol=1e-3) - assert_allclose(np.sum(clf.Coupling_, axis=1), mu_s, rtol=1e-3, atol=1e-3) + assert_allclose(np.sum(clf.coupling_, axis=0), mu_t, rtol=1e-3, atol=1e-3) + assert_allclose(np.sum(clf.coupling_, axis=1), mu_s, rtol=1e-3, atol=1e-3) # test transform transp_Xs = clf.transform(Xs=Xs) @@ -64,13 +64,13 @@ def test_sinkhorn_lpl1_transport_class(): # test semi supervised mode clf = ot.da.SinkhornLpl1Transport() clf.fit(Xs=Xs, ys=ys, Xt=Xt) - n_unsup = np.sum(clf.Cost) + n_unsup = np.sum(clf.cost_) # test semi supervised mode clf = ot.da.SinkhornLpl1Transport() clf.fit(Xs=Xs, ys=ys, Xt=Xt, yt=yt) - assert_equal(clf.Cost.shape, ((Xs.shape[0], Xt.shape[0]))) - n_semisup = np.sum(clf.Cost) + assert_equal(clf.cost_.shape, ((Xs.shape[0], Xt.shape[0]))) + n_semisup = np.sum(clf.cost_) assert n_unsup != n_semisup, "semisupervised mode not working" @@ -91,14 +91,14 @@ def test_sinkhorn_l1l2_transport_class(): clf.fit(Xs=Xs, ys=ys, Xt=Xt) # test dimensions of coupling - assert_equal(clf.Cost.shape, ((Xs.shape[0], Xt.shape[0]))) - assert_equal(clf.Coupling_.shape, ((Xs.shape[0], Xt.shape[0]))) + assert_equal(clf.cost_.shape, ((Xs.shape[0], Xt.shape[0]))) + assert_equal(clf.coupling_.shape, ((Xs.shape[0], Xt.shape[0]))) # test margin constraints mu_s = unif(ns) mu_t = unif(nt) - assert_allclose(np.sum(clf.Coupling_, axis=0), mu_t, rtol=1e-3, atol=1e-3) - assert_allclose(np.sum(clf.Coupling_, axis=1), mu_s, rtol=1e-3, atol=1e-3) + assert_allclose(np.sum(clf.coupling_, axis=0), mu_t, rtol=1e-3, atol=1e-3) + assert_allclose(np.sum(clf.coupling_, axis=1), mu_s, rtol=1e-3, atol=1e-3) # test transform transp_Xs = clf.transform(Xs=Xs) @@ -127,13 +127,13 @@ def test_sinkhorn_l1l2_transport_class(): # test semi supervised mode clf = ot.da.SinkhornL1l2Transport() clf.fit(Xs=Xs, ys=ys, Xt=Xt) - n_unsup = np.sum(clf.Cost) + n_unsup = np.sum(clf.cost_) # test semi supervised mode clf = ot.da.SinkhornL1l2Transport() clf.fit(Xs=Xs, ys=ys, Xt=Xt, yt=yt) - assert_equal(clf.Cost.shape, ((Xs.shape[0], Xt.shape[0]))) - n_semisup = np.sum(clf.Cost) + assert_equal(clf.cost_.shape, ((Xs.shape[0], Xt.shape[0]))) + n_semisup = np.sum(clf.cost_) assert n_unsup != n_semisup, "semisupervised mode not working" @@ -154,14 +154,14 @@ def test_sinkhorn_transport_class(): clf.fit(Xs=Xs, Xt=Xt) # test dimensions of coupling - assert_equal(clf.Cost.shape, ((Xs.shape[0], Xt.shape[0]))) - assert_equal(clf.Coupling_.shape, ((Xs.shape[0], Xt.shape[0]))) + assert_equal(clf.cost_.shape, ((Xs.shape[0], Xt.shape[0]))) + assert_equal(clf.coupling_.shape, ((Xs.shape[0], Xt.shape[0]))) # test margin constraints mu_s = unif(ns) mu_t = unif(nt) - assert_allclose(np.sum(clf.Coupling_, axis=0), mu_t, rtol=1e-3, atol=1e-3) - assert_allclose(np.sum(clf.Coupling_, axis=1), mu_s, rtol=1e-3, atol=1e-3) + assert_allclose(np.sum(clf.coupling_, axis=0), mu_t, rtol=1e-3, atol=1e-3) + assert_allclose(np.sum(clf.coupling_, axis=1), mu_s, rtol=1e-3, atol=1e-3) # test transform transp_Xs = clf.transform(Xs=Xs) @@ -190,13 +190,13 @@ def test_sinkhorn_transport_class(): # test semi supervised mode clf = ot.da.SinkhornTransport() clf.fit(Xs=Xs, Xt=Xt) - n_unsup = np.sum(clf.Cost) + n_unsup = np.sum(clf.cost_) # test semi supervised mode clf = ot.da.SinkhornTransport() clf.fit(Xs=Xs, ys=ys, Xt=Xt, yt=yt) - assert_equal(clf.Cost.shape, ((Xs.shape[0], Xt.shape[0]))) - n_semisup = np.sum(clf.Cost) + assert_equal(clf.cost_.shape, ((Xs.shape[0], Xt.shape[0]))) + n_semisup = np.sum(clf.cost_) assert n_unsup != n_semisup, "semisupervised mode not working" @@ -217,14 +217,14 @@ def test_emd_transport_class(): clf.fit(Xs=Xs, Xt=Xt) # test dimensions of coupling - assert_equal(clf.Cost.shape, ((Xs.shape[0], Xt.shape[0]))) - assert_equal(clf.Coupling_.shape, ((Xs.shape[0], Xt.shape[0]))) + assert_equal(clf.cost_.shape, ((Xs.shape[0], Xt.shape[0]))) + assert_equal(clf.coupling_.shape, ((Xs.shape[0], Xt.shape[0]))) # test margin constraints mu_s = unif(ns) mu_t = unif(nt) - assert_allclose(np.sum(clf.Coupling_, axis=0), mu_t, rtol=1e-3, atol=1e-3) - assert_allclose(np.sum(clf.Coupling_, axis=1), mu_s, rtol=1e-3, atol=1e-3) + assert_allclose(np.sum(clf.coupling_, axis=0), mu_t, rtol=1e-3, atol=1e-3) + assert_allclose(np.sum(clf.coupling_, axis=1), mu_s, rtol=1e-3, atol=1e-3) # test transform transp_Xs = clf.transform(Xs=Xs) @@ -253,13 +253,13 @@ def test_emd_transport_class(): # test semi supervised mode clf = ot.da.EMDTransport() clf.fit(Xs=Xs, Xt=Xt) - n_unsup = np.sum(clf.Cost) + n_unsup = np.sum(clf.cost_) # test semi supervised mode clf = ot.da.EMDTransport() clf.fit(Xs=Xs, ys=ys, Xt=Xt, yt=yt) - assert_equal(clf.Cost.shape, ((Xs.shape[0], Xt.shape[0]))) - n_semisup = np.sum(clf.Cost) + assert_equal(clf.cost_.shape, ((Xs.shape[0], Xt.shape[0]))) + n_semisup = np.sum(clf.cost_) assert n_unsup != n_semisup, "semisupervised mode not working" @@ -326,9 +326,9 @@ def test_otda(): da_emd.predict(xs) # interpolation of source samples -if __name__ == "__main__": +# if __name__ == "__main__": - test_sinkhorn_transport_class() - test_emd_transport_class() - test_sinkhorn_l1l2_transport_class() - test_sinkhorn_lpl1_transport_class() +# test_sinkhorn_transport_class() +# test_emd_transport_class() +# test_sinkhorn_l1l2_transport_class() +# test_sinkhorn_lpl1_transport_class() -- cgit v1.2.3 From 326d163db029515c338a963978a5d95948f78c29 Mon Sep 17 00:00:00 2001 From: Slasnista Date: Wed, 23 Aug 2017 14:11:13 +0200 Subject: test functions for MappingTransport Class --- ot/da.py | 18 ++++++--- test/test_da.py | 117 +++++++++++++++++++++++++++++++++++++++++++++++++++++--- 2 files changed, 125 insertions(+), 10 deletions(-) (limited to 'test') diff --git a/ot/da.py b/ot/da.py index 0c83ae6..3ccb1b3 100644 --- a/ot/da.py +++ b/ot/da.py @@ -1665,8 +1665,14 @@ class MappingTransport(BaseEstimator): Attributes ---------- - coupling_ : the optimal coupling - mapping_ : the mapping associated + coupling_ : array-like, shape (n_source_samples, n_features) + The optimal coupling + mapping_ : array-like, shape (n_features (+ 1), n_features) + (if bias) for kernel == linear + The associated mapping + + array-like, shape (n_source_samples (+ 1), n_features) + (if bias) for kernel == gaussian References ---------- @@ -1679,20 +1685,22 @@ class MappingTransport(BaseEstimator): def __init__(self, mu=1, eta=0.001, bias=False, metric="sqeuclidean", kernel="linear", sigma=1, max_iter=100, tol=1e-5, - max_inner_iter=10, inner_tol=1e-6, log=False, verbose=False): + max_inner_iter=10, inner_tol=1e-6, log=False, verbose=False, + verbose2=False): self.metric = metric self.mu = mu self.eta = eta self.bias = bias self.kernel = kernel - self.sigma + self.sigma = sigma self.max_iter = max_iter self.tol = tol self.max_inner_iter = max_inner_iter self.inner_tol = inner_tol self.log = log self.verbose = verbose + self.verbose2 = verbose2 def fit(self, Xs=None, ys=None, Xt=None, yt=None): """Builds an optimal coupling and estimates the associated mapping @@ -1712,7 +1720,7 @@ class MappingTransport(BaseEstimator): Returns ------- self : object - Returns self. + Returns self """ self.Xs = Xs diff --git a/test/test_da.py b/test/test_da.py index 196f4c4..162f681 100644 --- a/test/test_da.py +++ b/test/test_da.py @@ -264,6 +264,112 @@ def test_emd_transport_class(): assert n_unsup != n_semisup, "semisupervised mode not working" +def test_mapping_transport_class(): + """test_mapping_transport + """ + + ns = 150 + nt = 200 + + Xs, ys = get_data_classif('3gauss', ns) + Xt, yt = get_data_classif('3gauss2', nt) + Xs_new, _ = get_data_classif('3gauss', ns + 1) + + ########################################################################## + # kernel == linear mapping tests + ########################################################################## + + # check computation and dimensions if bias == False + clf = ot.da.MappingTransport(kernel="linear", bias=False) + clf.fit(Xs=Xs, Xt=Xt) + + assert_equal(clf.coupling_.shape, ((Xs.shape[0], Xt.shape[0]))) + assert_equal(clf.mapping_.shape, ((Xs.shape[1], Xt.shape[1]))) + + # test margin constraints + mu_s = unif(ns) + mu_t = unif(nt) + assert_allclose(np.sum(clf.coupling_, axis=0), mu_t, rtol=1e-3, atol=1e-3) + assert_allclose(np.sum(clf.coupling_, axis=1), mu_s, rtol=1e-3, atol=1e-3) + + # test transform + transp_Xs = clf.transform(Xs=Xs) + assert_equal(transp_Xs.shape, Xs.shape) + + transp_Xs_new = clf.transform(Xs_new) + + # check that the oos method is working + assert_equal(transp_Xs_new.shape, Xs_new.shape) + + # check computation and dimensions if bias == True + clf = ot.da.MappingTransport(kernel="linear", bias=True) + clf.fit(Xs=Xs, Xt=Xt) + assert_equal(clf.coupling_.shape, ((Xs.shape[0], Xt.shape[0]))) + assert_equal(clf.mapping_.shape, ((Xs.shape[1] + 1, Xt.shape[1]))) + + # test margin constraints + mu_s = unif(ns) + mu_t = unif(nt) + assert_allclose(np.sum(clf.coupling_, axis=0), mu_t, rtol=1e-3, atol=1e-3) + assert_allclose(np.sum(clf.coupling_, axis=1), mu_s, rtol=1e-3, atol=1e-3) + + # test transform + transp_Xs = clf.transform(Xs=Xs) + assert_equal(transp_Xs.shape, Xs.shape) + + transp_Xs_new = clf.transform(Xs_new) + + # check that the oos method is working + assert_equal(transp_Xs_new.shape, Xs_new.shape) + + ########################################################################## + # kernel == gaussian mapping tests + ########################################################################## + + # check computation and dimensions if bias == False + clf = ot.da.MappingTransport(kernel="gaussian", bias=False) + clf.fit(Xs=Xs, Xt=Xt) + + assert_equal(clf.coupling_.shape, ((Xs.shape[0], Xt.shape[0]))) + assert_equal(clf.mapping_.shape, ((Xs.shape[0], Xt.shape[1]))) + + # test margin constraints + mu_s = unif(ns) + mu_t = unif(nt) + assert_allclose(np.sum(clf.coupling_, axis=0), mu_t, rtol=1e-3, atol=1e-3) + assert_allclose(np.sum(clf.coupling_, axis=1), mu_s, rtol=1e-3, atol=1e-3) + + # test transform + transp_Xs = clf.transform(Xs=Xs) + assert_equal(transp_Xs.shape, Xs.shape) + + transp_Xs_new = clf.transform(Xs_new) + + # check that the oos method is working + assert_equal(transp_Xs_new.shape, Xs_new.shape) + + # check computation and dimensions if bias == True + clf = ot.da.MappingTransport(kernel="gaussian", bias=True) + clf.fit(Xs=Xs, Xt=Xt) + assert_equal(clf.coupling_.shape, ((Xs.shape[0], Xt.shape[0]))) + assert_equal(clf.mapping_.shape, ((Xs.shape[0] + 1, Xt.shape[1]))) + + # test margin constraints + mu_s = unif(ns) + mu_t = unif(nt) + assert_allclose(np.sum(clf.coupling_, axis=0), mu_t, rtol=1e-3, atol=1e-3) + assert_allclose(np.sum(clf.coupling_, axis=1), mu_s, rtol=1e-3, atol=1e-3) + + # test transform + transp_Xs = clf.transform(Xs=Xs) + assert_equal(transp_Xs.shape, Xs.shape) + + transp_Xs_new = clf.transform(Xs_new) + + # check that the oos method is working + assert_equal(transp_Xs_new.shape, Xs_new.shape) + + def test_otda(): n_samples = 150 # nb samples @@ -326,9 +432,10 @@ def test_otda(): da_emd.predict(xs) # interpolation of source samples -# if __name__ == "__main__": +if __name__ == "__main__": -# test_sinkhorn_transport_class() -# test_emd_transport_class() -# test_sinkhorn_l1l2_transport_class() -# test_sinkhorn_lpl1_transport_class() + # test_sinkhorn_transport_class() + # test_emd_transport_class() + # test_sinkhorn_l1l2_transport_class() + # test_sinkhorn_lpl1_transport_class() + test_mapping_transport_class() -- cgit v1.2.3 From 09302239b3e4e1a90c1a4e2d7a85b0af86b01365 Mon Sep 17 00:00:00 2001 From: Slasnista Date: Wed, 23 Aug 2017 15:09:08 +0200 Subject: added deprecation warning on old classes --- ot/da.py | 22 ++++++++++-- ot/deprecation.py | 103 ++++++++++++++++++++++++++++++++++++++++++++++++++++++ test/test_da.py | 5 +-- 3 files changed, 126 insertions(+), 4 deletions(-) create mode 100644 ot/deprecation.py (limited to 'test') diff --git a/ot/da.py b/ot/da.py index 3ccb1b3..8fa1895 100644 --- a/ot/da.py +++ b/ot/da.py @@ -10,12 +10,14 @@ Domain adaptation with optimal transport # License: MIT License import numpy as np +import warnings + from .bregman import sinkhorn from .lp import emd from .utils import unif, dist, kernel from .optim import cg from .optim import gcg -import warnings +from .deprecation import deprecated def sinkhorn_lpl1_mm(a, labels_a, b, M, reg, eta=0.1, numItermax=10, @@ -632,6 +634,9 @@ def joint_OT_mapping_kernel(xs, xt, mu=1, eta=0.001, kerneltype='gaussian', return G, L +@deprecated("The class OTDA is deprecated in 0.3.1 and will be " + "removed in 0.5" + "\n\tfor standard transport use class EMDTransport instead.") class OTDA(object): """Class for domain adaptation with optimal transport as proposed in [5] @@ -758,10 +763,15 @@ class OTDA(object): self.M = np.log(1 + np.log(1 + self.M)) +@deprecated("The class OTDA_sinkhorn is deprecated in 0.3.1 and will be" + " removed in 0.5 \nUse class SinkhornTransport instead.") class OTDA_sinkhorn(OTDA): """Class for domain adaptation with optimal transport with entropic - regularization""" + regularization + + + """ def fit(self, xs, xt, reg=1, ws=None, wt=None, norm=None, **kwargs): """Fit regularized domain adaptation between samples is xs and xt @@ -783,6 +793,8 @@ class OTDA_sinkhorn(OTDA): self.computed = True +@deprecated("The class OTDA_lpl1 is deprecated in 0.3.1 and will be" + " removed in 0.5 \nUse class SinkhornLpl1Transport instead.") class OTDA_lpl1(OTDA): """Class for domain adaptation with optimal transport with entropic and @@ -810,6 +822,8 @@ class OTDA_lpl1(OTDA): self.computed = True +@deprecated("The class OTDA_l1L2 is deprecated in 0.3.1 and will be" + " removed in 0.5 \nUse class SinkhornL1l2Transport instead.") class OTDA_l1l2(OTDA): """Class for domain adaptation with optimal transport with entropic @@ -837,6 +851,8 @@ class OTDA_l1l2(OTDA): self.computed = True +@deprecated("The class OTDA_mapping_linear is deprecated in 0.3.1 and will be" + " removed in 0.5 \nUse class MappingTransport instead.") class OTDA_mapping_linear(OTDA): """Class for optimal transport with joint linear mapping estimation as in @@ -882,6 +898,8 @@ class OTDA_mapping_linear(OTDA): return None +@deprecated("The class OTDA_mapping_kernel is deprecated in 0.3.1 and will be" + " removed in 0.5 \nUse class MappingTransport instead.") class OTDA_mapping_kernel(OTDA_mapping_linear): """Class for optimal transport with joint nonlinear mapping diff --git a/ot/deprecation.py b/ot/deprecation.py new file mode 100644 index 0000000..2b16427 --- /dev/null +++ b/ot/deprecation.py @@ -0,0 +1,103 @@ +""" + deprecated class from scikit-learn package + https://github.com/scikit-learn/scikit-learn/blob/master/sklearn/utils/deprecation.py +""" + +import sys +import warnings + +__all__ = ["deprecated", ] + + +class deprecated(object): + """Decorator to mark a function or class as deprecated. + Issue a warning when the function is called/the class is instantiated and + adds a warning to the docstring. + The optional extra argument will be appended to the deprecation message + and the docstring. Note: to use this with the default value for extra, put + in an empty of parentheses: + >>> from ot.deprecation import deprecated + >>> @deprecated() + ... def some_function(): pass + + Parameters + ---------- + extra : string + to be added to the deprecation messages + """ + + # Adapted from http://wiki.python.org/moin/PythonDecoratorLibrary, + # but with many changes. + + def __init__(self, extra=''): + self.extra = extra + + def __call__(self, obj): + """Call method + Parameters + ---------- + obj : object + """ + if isinstance(obj, type): + return self._decorate_class(obj) + else: + return self._decorate_fun(obj) + + def _decorate_class(self, cls): + msg = "Class %s is deprecated" % cls.__name__ + if self.extra: + msg += "; %s" % self.extra + + # FIXME: we should probably reset __new__ for full generality + init = cls.__init__ + + def wrapped(*args, **kwargs): + warnings.warn(msg, category=DeprecationWarning) + return init(*args, **kwargs) + + cls.__init__ = wrapped + + wrapped.__name__ = '__init__' + wrapped.__doc__ = self._update_doc(init.__doc__) + wrapped.deprecated_original = init + + return cls + + def _decorate_fun(self, fun): + """Decorate function fun""" + + msg = "Function %s is deprecated" % fun.__name__ + if self.extra: + msg += "; %s" % self.extra + + def wrapped(*args, **kwargs): + warnings.warn(msg, category=DeprecationWarning) + return fun(*args, **kwargs) + + wrapped.__name__ = fun.__name__ + wrapped.__dict__ = fun.__dict__ + wrapped.__doc__ = self._update_doc(fun.__doc__) + + return wrapped + + def _update_doc(self, olddoc): + newdoc = "DEPRECATED" + if self.extra: + newdoc = "%s: %s" % (newdoc, self.extra) + if olddoc: + newdoc = "%s\n\n%s" % (newdoc, olddoc) + return newdoc + + +def _is_deprecated(func): + """Helper to check if func is wraped by our deprecated decorator""" + if sys.version_info < (3, 5): + raise NotImplementedError("This is only available for python3.5 " + "or above") + closures = getattr(func, '__closure__', []) + if closures is None: + closures = [] + is_deprecated = ('deprecated' in ''.join([c.cell_contents + for c in closures + if isinstance(c.cell_contents, str)])) + return is_deprecated diff --git a/test/test_da.py b/test/test_da.py index 162f681..9578b3d 100644 --- a/test/test_da.py +++ b/test/test_da.py @@ -432,10 +432,11 @@ def test_otda(): da_emd.predict(xs) # interpolation of source samples -if __name__ == "__main__": +# if __name__ == "__main__": + # test_otda() # test_sinkhorn_transport_class() # test_emd_transport_class() # test_sinkhorn_l1l2_transport_class() # test_sinkhorn_lpl1_transport_class() - test_mapping_transport_class() + # test_mapping_transport_class() -- cgit v1.2.3 From 2d4d0b46f88c66ebc5502c840703ba6ce8910376 Mon Sep 17 00:00:00 2001 From: Slasnista Date: Fri, 25 Aug 2017 10:29:41 +0200 Subject: solving log issues to avoid errors and adding further tests --- ot/da.py | 57 ++++++++++++++++++++++++++++++++++++++++++--------------- test/test_da.py | 39 +++++++++++++++++++++++++++++++++------ 2 files changed, 75 insertions(+), 21 deletions(-) (limited to 'test') diff --git a/ot/da.py b/ot/da.py index 8fa1895..5a34979 100644 --- a/ot/da.py +++ b/ot/da.py @@ -1315,7 +1315,10 @@ class SinkhornTransport(BaseTransport): Attributes ---------- - coupling_ : the optimal coupling + coupling_ : array-like, shape (n_source_samples, n_target_samples) + The optimal coupling + log_ : dictionary + The dictionary of log, empty dic if parameter log is not True References ---------- @@ -1367,11 +1370,18 @@ class SinkhornTransport(BaseTransport): super(SinkhornTransport, self).fit(Xs, ys, Xt, yt) # coupling estimation - self.coupling_ = sinkhorn( + returned_ = sinkhorn( a=self.mu_s, b=self.mu_t, M=self.cost_, reg=self.reg_e, numItermax=self.max_iter, stopThr=self.tol, verbose=self.verbose, log=self.log) + # deal with the value of log + if self.log: + self.coupling_, self.log_ = returned_ + else: + self.coupling_ = returned_ + self.log_ = dict() + return self @@ -1400,7 +1410,8 @@ class EMDTransport(BaseTransport): Attributes ---------- - coupling_ : the optimal coupling + coupling_ : array-like, shape (n_source_samples, n_target_samples) + The optimal coupling References ---------- @@ -1475,15 +1486,14 @@ class SinkhornLpl1Transport(BaseTransport): The number of iteration in the inner loop verbose : int, optional (default=0) Controls the verbosity of the optimization algorithm - log : int, optional (default=0) - Controls the logs of the optimization algorithm limit_max: float, optional (defaul=np.infty) Controls the semi supervised mode. Transport between labeled source and target samples of different classes will exhibit an infinite cost Attributes ---------- - coupling_ : the optimal coupling + coupling_ : array-like, shape (n_source_samples, n_target_samples) + The optimal coupling References ---------- @@ -1500,7 +1510,7 @@ class SinkhornLpl1Transport(BaseTransport): def __init__(self, reg_e=1., reg_cl=0.1, max_iter=10, max_inner_iter=200, - tol=10e-9, verbose=False, log=False, + tol=10e-9, verbose=False, metric="sqeuclidean", distribution_estimation=distribution_estimation_uniform, out_of_sample_map='ferradans', limit_max=np.infty): @@ -1511,7 +1521,6 @@ class SinkhornLpl1Transport(BaseTransport): self.max_inner_iter = max_inner_iter self.tol = tol self.verbose = verbose - self.log = log self.metric = metric self.distribution_estimation = distribution_estimation self.out_of_sample_map = out_of_sample_map @@ -1544,7 +1553,7 @@ class SinkhornLpl1Transport(BaseTransport): a=self.mu_s, labels_a=ys, b=self.mu_t, M=self.cost_, reg=self.reg_e, eta=self.reg_cl, numItermax=self.max_iter, numInnerItermax=self.max_inner_iter, stopInnerThr=self.tol, - verbose=self.verbose, log=self.log) + verbose=self.verbose) return self @@ -1584,7 +1593,10 @@ class SinkhornL1l2Transport(BaseTransport): Attributes ---------- - coupling_ : the optimal coupling + coupling_ : array-like, shape (n_source_samples, n_target_samples) + The optimal coupling + log_ : dictionary + The dictionary of log, empty dic if parameter log is not True References ---------- @@ -1641,12 +1653,19 @@ class SinkhornL1l2Transport(BaseTransport): super(SinkhornL1l2Transport, self).fit(Xs, ys, Xt, yt) - self.coupling_ = sinkhorn_l1l2_gl( + returned_ = sinkhorn_l1l2_gl( a=self.mu_s, labels_a=ys, b=self.mu_t, M=self.cost_, reg=self.reg_e, eta=self.reg_cl, numItermax=self.max_iter, numInnerItermax=self.max_inner_iter, stopInnerThr=self.tol, verbose=self.verbose, log=self.log) + # deal with the value of log + if self.log: + self.coupling_, self.log_ = returned_ + else: + self.coupling_ = returned_ + self.log_ = dict() + return self @@ -1683,14 +1702,15 @@ class MappingTransport(BaseEstimator): Attributes ---------- - coupling_ : array-like, shape (n_source_samples, n_features) + coupling_ : array-like, shape (n_source_samples, n_target_samples) The optimal coupling mapping_ : array-like, shape (n_features (+ 1), n_features) (if bias) for kernel == linear The associated mapping - array-like, shape (n_source_samples (+ 1), n_features) (if bias) for kernel == gaussian + log_ : dictionary + The dictionary of log, empty dic if parameter log is not True References ---------- @@ -1745,19 +1765,26 @@ class MappingTransport(BaseEstimator): self.Xt = Xt if self.kernel == "linear": - self.coupling_, self.mapping_ = joint_OT_mapping_linear( + returned_ = joint_OT_mapping_linear( Xs, Xt, mu=self.mu, eta=self.eta, bias=self.bias, verbose=self.verbose, verbose2=self.verbose2, numItermax=self.max_iter, numInnerItermax=self.max_inner_iter, stopThr=self.tol, stopInnerThr=self.inner_tol, log=self.log) elif self.kernel == "gaussian": - self.coupling_, self.mapping_ = joint_OT_mapping_kernel( + returned_ = joint_OT_mapping_kernel( Xs, Xt, mu=self.mu, eta=self.eta, bias=self.bias, sigma=self.sigma, verbose=self.verbose, verbose2=self.verbose, numItermax=self.max_iter, numInnerItermax=self.max_inner_iter, stopInnerThr=self.inner_tol, stopThr=self.tol, log=self.log) + # deal with the value of log + if self.log: + self.coupling_, self.mapping_, self.log_ = returned_ + else: + self.coupling_, self.mapping_ = returned_ + self.log_ = dict() + return self def transform(self, Xs): diff --git a/test/test_da.py b/test/test_da.py index 9578b3d..104a798 100644 --- a/test/test_da.py +++ b/test/test_da.py @@ -26,6 +26,8 @@ def test_sinkhorn_lpl1_transport_class(): # test its computed clf.fit(Xs=Xs, ys=ys, Xt=Xt) + assert hasattr(clf, "cost_") + assert hasattr(clf, "coupling_") # test dimensions of coupling assert_equal(clf.cost_.shape, ((Xs.shape[0], Xt.shape[0]))) @@ -89,6 +91,9 @@ def test_sinkhorn_l1l2_transport_class(): # test its computed clf.fit(Xs=Xs, ys=ys, Xt=Xt) + assert hasattr(clf, "cost_") + assert hasattr(clf, "coupling_") + assert hasattr(clf, "log_") # test dimensions of coupling assert_equal(clf.cost_.shape, ((Xs.shape[0], Xt.shape[0]))) @@ -137,6 +142,11 @@ def test_sinkhorn_l1l2_transport_class(): assert n_unsup != n_semisup, "semisupervised mode not working" + # check everything runs well with log=True + clf = ot.da.SinkhornL1l2Transport(log=True) + clf.fit(Xs=Xs, ys=ys, Xt=Xt) + assert len(clf.log_.keys()) != 0 + def test_sinkhorn_transport_class(): """test_sinkhorn_transport @@ -152,6 +162,9 @@ def test_sinkhorn_transport_class(): # test its computed clf.fit(Xs=Xs, Xt=Xt) + assert hasattr(clf, "cost_") + assert hasattr(clf, "coupling_") + assert hasattr(clf, "log_") # test dimensions of coupling assert_equal(clf.cost_.shape, ((Xs.shape[0], Xt.shape[0]))) @@ -200,6 +213,11 @@ def test_sinkhorn_transport_class(): assert n_unsup != n_semisup, "semisupervised mode not working" + # check everything runs well with log=True + clf = ot.da.SinkhornTransport(log=True) + clf.fit(Xs=Xs, ys=ys, Xt=Xt) + assert len(clf.log_.keys()) != 0 + def test_emd_transport_class(): """test_sinkhorn_transport @@ -215,6 +233,8 @@ def test_emd_transport_class(): # test its computed clf.fit(Xs=Xs, Xt=Xt) + assert hasattr(clf, "cost_") + assert hasattr(clf, "coupling_") # test dimensions of coupling assert_equal(clf.cost_.shape, ((Xs.shape[0], Xt.shape[0]))) @@ -282,6 +302,9 @@ def test_mapping_transport_class(): # check computation and dimensions if bias == False clf = ot.da.MappingTransport(kernel="linear", bias=False) clf.fit(Xs=Xs, Xt=Xt) + assert hasattr(clf, "coupling_") + assert hasattr(clf, "mapping_") + assert hasattr(clf, "log_") assert_equal(clf.coupling_.shape, ((Xs.shape[0], Xt.shape[0]))) assert_equal(clf.mapping_.shape, ((Xs.shape[1], Xt.shape[1]))) @@ -369,6 +392,11 @@ def test_mapping_transport_class(): # check that the oos method is working assert_equal(transp_Xs_new.shape, Xs_new.shape) + # check everything runs well with log=True + clf = ot.da.MappingTransport(kernel="gaussian", log=True) + clf.fit(Xs=Xs, Xt=Xt) + assert len(clf.log_.keys()) != 0 + def test_otda(): @@ -434,9 +462,8 @@ def test_otda(): # if __name__ == "__main__": - # test_otda() - # test_sinkhorn_transport_class() - # test_emd_transport_class() - # test_sinkhorn_l1l2_transport_class() - # test_sinkhorn_lpl1_transport_class() - # test_mapping_transport_class() +# test_sinkhorn_transport_class() +# test_emd_transport_class() +# test_sinkhorn_l1l2_transport_class() +# test_sinkhorn_lpl1_transport_class() +# test_mapping_transport_class() -- cgit v1.2.3