From aaf80bbef65c1b8cee9bdec512ab81f00e8329e1 Mon Sep 17 00:00:00 2001 From: Rémi Flamary Date: Mon, 24 Jul 2017 09:21:12 +0200 Subject: add slack and mailing list --- Makefile | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) (limited to 'Makefile') diff --git a/Makefile b/Makefile index 22c1b50..c6a83c8 100644 --- a/Makefile +++ b/Makefile @@ -33,7 +33,10 @@ sremove : clean : $(PYTHON) setup.py clean - + +pep8 : + flake8 examples/ ot/ test/ + test: pytest -- cgit v1.2.3 From 5a6b5de9b2f28c93bef1a9db2e3b94693c05ff4f Mon Sep 17 00:00:00 2001 From: Rémi Flamary Date: Mon, 24 Jul 2017 11:15:33 +0200 Subject: add proper testing --- .travis.yml | 2 +- Makefile | 13 ++++++---- docs/source/readme.rst | 52 +++++++++++++++++++++++++++++-------- test/test_emd_multi.py | 47 --------------------------------- test/test_gpu.py | 59 ++++++++++++++++++++++++++++++++++++++++++ test/test_gpu_sinkhorn.py | 28 -------------------- test/test_gpu_sinkhorn_lpl1.py | 29 --------------------- test/test_load_module.py | 10 ------- test/test_ot.py | 55 +++++++++++++++++++++++++++++++++++++++ 9 files changed, 164 insertions(+), 131 deletions(-) delete mode 100644 test/test_emd_multi.py create mode 100644 test/test_gpu.py delete mode 100644 test/test_gpu_sinkhorn.py delete mode 100644 test/test_gpu_sinkhorn_lpl1.py delete mode 100644 test/test_load_module.py create mode 100644 test/test_ot.py (limited to 'Makefile') diff --git a/.travis.yml b/.travis.yml index 8a95d7c..1c3a18c 100644 --- a/.travis.yml +++ b/.travis.yml @@ -20,6 +20,6 @@ install: - python setup.py install # command to run tests + check syntax style script: - - python test/test_load_module.py -v - flake8 examples/ ot/ test/ + - python -m py.test -v # - py.test ot test diff --git a/Makefile b/Makefile index c6a83c8..ff03a63 100644 --- a/Makefile +++ b/Makefile @@ -31,22 +31,25 @@ sremove : tr '\n' '\0' < files.txt | sudo xargs -0 rm -f -- rm files.txt -clean : +clean : FORCE $(PYTHON) setup.py clean pep8 : flake8 examples/ ot/ test/ -test: - pytest +test : FORCE pep8 + python -m py.test -v -uploadpypi: +uploadpypi : #python setup.py register python setup.py sdist upload -r pypi -rdoc: +rdoc : pandoc --from=markdown --to=rst --output=docs/source/readme.rst README.md notebook : ipython notebook --matplotlib=inline --notebook-dir=notebooks/ + + +FORCE : diff --git a/docs/source/readme.rst b/docs/source/readme.rst index 611001b..c1e0017 100644 --- a/docs/source/readme.rst +++ b/docs/source/readme.rst @@ -28,8 +28,8 @@ available in the examples folder. Installation ------------ -The Library has been tested on Linux and MacOSX. It requires a C++ -compiler for using the EMD solver and rely on the following Python +The library has been tested on Linux, MacOSX and Windows. It requires a +C++ compiler for using the EMD solver and relies on the following Python modules: - Numpy (>=1.11) @@ -37,25 +37,34 @@ modules: - Cython (>=0.23) - Matplotlib (>=1.5) -Under debian based linux the dependencies can be installed with +Pip installation +^^^^^^^^^^^^^^^^ + +You can install the toolbox through PyPI with: :: - sudo apt-get install python-numpy python-scipy python-matplotlib cython + pip install POT -To install the library, you can install it locally (after downloading -it) on you machine using +or get the very latest version by downloading it and then running: :: python setup.py install --user # for user install (no root) -The toolbox is also available on PyPI with a possibly slightly older -version. You can install it with: +Anaconda installation with conda-forge +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +If you use the Anaconda python distribution, POT is available in +`conda-forge `__. To install it and the +required dependencies: :: - pip install POT + conda install -c conda-forge pot + +Post installation check +^^^^^^^^^^^^^^^^^^^^^^^ After a correct installation, you should be able to import the module without errors: @@ -109,6 +118,7 @@ Short examples # a,b are 1D histograms (sum to 1 and positive) # M is the ground cost matrix Wd=ot.emd2(a,b,M) # exact linear program + Wd_reg=ot.sinkhorn2(a,b,M,reg) # entropic regularized OT # if b is a matrix compute all distances to a and return a vector - Compute OT matrix @@ -117,8 +127,8 @@ Short examples # a,b are 1D histograms (sum to 1 and positive) # M is the ground cost matrix - Totp=ot.emd(a,b,M) # exact linear program - Totp_reg=ot.sinkhorn(a,b,M,reg) # entropic regularized OT + T=ot.emd(a,b,M) # exact linear program + T_reg=ot.sinkhorn(a,b,M,reg) # entropic regularized OT - Compute Wasserstein barycenter @@ -172,6 +182,7 @@ The contributors to this library are: - `Rémi Flamary `__ - `Nicolas Courty `__ +- `Alexandre Gramfort `__ - `Laetitia Chapel `__ - `Michael Perrot `__ (Mapping estimation) @@ -189,6 +200,25 @@ languages): - `Marco Cuturi `__ (Sinkhorn Knopp in Matlab/Cuda) +Contributions and code of conduct +--------------------------------- + +Every contribution is welcome and should respect the `contribution +guidelines `__. Each member of the project is expected +to follow the `code of conduct `__. + +Support +------- + +You can ask questions and join the development discussion: + +- On the `POT Slack channel `__ +- On the POT `mailing + list `__ + +You can also post bug reports and feature requests in Github issues. +Make sure to read our `guidelines `__ first. + References ---------- diff --git a/test/test_emd_multi.py b/test/test_emd_multi.py deleted file mode 100644 index 2eef242..0000000 --- a/test/test_emd_multi.py +++ /dev/null @@ -1,47 +0,0 @@ -#!/usr/bin/env python2 -# -*- coding: utf-8 -*- -""" -Created on Fri Mar 10 09:56:06 2017 - -@author: rflamary -""" - -import numpy as np - -import ot -from ot.datasets import get_1D_gauss as gauss -# reload(ot.lp) - -#%% parameters - -n = 5000 # nb bins - -# bin positions -x = np.arange(n, dtype=np.float64) - -# Gaussian distributions -a = gauss(n, m=20, s=5) # m= mean, s= std - -ls = np.arange(20, 1000, 10) -nb = len(ls) -b = np.zeros((n, nb)) -for i in range(nb): - b[:, i] = gauss(n, m=ls[i], s=10) - -# loss matrix -M = ot.dist(x.reshape((n, 1)), x.reshape((n, 1))) -# M/=M.max() - -#%% - -print('Computing {} EMD '.format(nb)) - -# emd loss 1 proc -ot.tic() -emd_loss4 = ot.emd2(a, b, M, 1) -ot.toc('1 proc : {} s') - -# emd loss multipro proc -ot.tic() -emd_loss4 = ot.emd2(a, b, M) -ot.toc('multi proc : {} s') diff --git a/test/test_gpu.py b/test/test_gpu.py new file mode 100644 index 0000000..312a2d4 --- /dev/null +++ b/test/test_gpu.py @@ -0,0 +1,59 @@ +import ot +import numpy as np +import time +import pytest + + +@pytest.mark.skip(reason="No way to test GPU on travis yet") +def test_gpu_sinkhorn(): + import ot.gpu + + def describeRes(r): + print("min:{:.3E}, max::{:.3E}, mean::{:.3E}, std::{:.3E}".format( + np.min(r), np.max(r), np.mean(r), np.std(r))) + + for n in [5000]: + print(n) + a = np.random.rand(n // 4, 100) + b = np.random.rand(n, 100) + time1 = time.time() + transport = ot.da.OTDA_sinkhorn() + transport.fit(a, b) + G1 = transport.G + time2 = time.time() + transport = ot.gpu.da.OTDA_sinkhorn() + transport.fit(a, b) + G2 = transport.G + time3 = time.time() + print("Normal sinkhorn, time: {:6.2f} sec ".format(time2 - time1)) + describeRes(G1) + print(" GPU sinkhorn, time: {:6.2f} sec ".format(time3 - time2)) + describeRes(G2) + + +@pytest.mark.skip(reason="No way to test GPU on travis yet") +def test_gpu_sinkhorn_lpl1(): + def describeRes(r): + print("min:{:.3E}, max:{:.3E}, mean:{:.3E}, std:{:.3E}" + .format(np.min(r), np.max(r), np.mean(r), np.std(r))) + + for n in [5000]: + print(n) + a = np.random.rand(n // 4, 100) + labels_a = np.random.randint(10, size=(n // 4)) + b = np.random.rand(n, 100) + time1 = time.time() + transport = ot.da.OTDA_lpl1() + transport.fit(a, labels_a, b) + G1 = transport.G + time2 = time.time() + transport = ot.gpu.da.OTDA_lpl1() + transport.fit(a, labels_a, b) + G2 = transport.G + time3 = time.time() + print("Normal sinkhorn lpl1, time: {:6.2f} sec ".format( + time2 - time1)) + describeRes(G1) + print(" GPU sinkhorn lpl1, time: {:6.2f} sec ".format( + time3 - time2)) + describeRes(G2) diff --git a/test/test_gpu_sinkhorn.py b/test/test_gpu_sinkhorn.py deleted file mode 100644 index 841f062..0000000 --- a/test/test_gpu_sinkhorn.py +++ /dev/null @@ -1,28 +0,0 @@ -import ot -import numpy as np -import time -import ot.gpu - - -def describeRes(r): - print("min:{:.3E}, max::{:.3E}, mean::{:.3E}, std::{:.3E}".format( - np.min(r), np.max(r), np.mean(r), np.std(r))) - - -for n in [5000, 10000, 15000, 20000]: - print(n) - a = np.random.rand(n // 4, 100) - b = np.random.rand(n, 100) - time1 = time.time() - transport = ot.da.OTDA_sinkhorn() - transport.fit(a, b) - G1 = transport.G - time2 = time.time() - transport = ot.gpu.da.OTDA_sinkhorn() - transport.fit(a, b) - G2 = transport.G - time3 = time.time() - print("Normal sinkhorn, time: {:6.2f} sec ".format(time2 - time1)) - describeRes(G1) - print(" GPU sinkhorn, time: {:6.2f} sec ".format(time3 - time2)) - describeRes(G2) diff --git a/test/test_gpu_sinkhorn_lpl1.py b/test/test_gpu_sinkhorn_lpl1.py deleted file mode 100644 index f0eb7e6..0000000 --- a/test/test_gpu_sinkhorn_lpl1.py +++ /dev/null @@ -1,29 +0,0 @@ -import ot -import numpy as np -import time -import ot.gpu - - -def describeRes(r): - print("min:{:.3E}, max:{:.3E}, mean:{:.3E}, std:{:.3E}" - .format(np.min(r), np.max(r), np.mean(r), np.std(r))) - - -for n in [5000, 10000, 15000, 20000]: - print(n) - a = np.random.rand(n // 4, 100) - labels_a = np.random.randint(10, size=(n // 4)) - b = np.random.rand(n, 100) - time1 = time.time() - transport = ot.da.OTDA_lpl1() - transport.fit(a, labels_a, b) - G1 = transport.G - time2 = time.time() - transport = ot.gpu.da.OTDA_lpl1() - transport.fit(a, labels_a, b) - G2 = transport.G - time3 = time.time() - print("Normal sinkhorn lpl1, time: {:6.2f} sec ".format(time2 - time1)) - describeRes(G1) - print(" GPU sinkhorn lpl1, time: {:6.2f} sec ".format(time3 - time2)) - describeRes(G2) diff --git a/test/test_load_module.py b/test/test_load_module.py deleted file mode 100644 index d77261e..0000000 --- a/test/test_load_module.py +++ /dev/null @@ -1,10 +0,0 @@ - - -import ot -import doctest - -# test lp solver -doctest.testmod(ot.lp, verbose=True) - -# test bregman solver -doctest.testmod(ot.bregman, verbose=True) diff --git a/test/test_ot.py b/test/test_ot.py new file mode 100644 index 0000000..51ee510 --- /dev/null +++ b/test/test_ot.py @@ -0,0 +1,55 @@ + + +import ot +import numpy as np + +#import pytest + + +def test_doctest(): + + import doctest + + # test lp solver + doctest.testmod(ot.lp, verbose=True) + + # test bregman solver + doctest.testmod(ot.bregman, verbose=True) + + +#@pytest.mark.skip(reason="Seems to be a conflict between pytest and multiprocessing") +def test_emd_multi(): + + from ot.datasets import get_1D_gauss as gauss + + n = 1000 # nb bins + + # bin positions + x = np.arange(n, dtype=np.float64) + + # Gaussian distributions + a = gauss(n, m=20, s=5) # m= mean, s= std + + ls = np.arange(20, 1000, 10) + nb = len(ls) + b = np.zeros((n, nb)) + for i in range(nb): + b[:, i] = gauss(n, m=ls[i], s=10) + + # loss matrix + M = ot.dist(x.reshape((n, 1)), x.reshape((n, 1))) + # M/=M.max() + + print('Computing {} EMD '.format(nb)) + + # emd loss 1 proc + ot.tic() + emd1 = ot.emd2(a, b, M, 1) + ot.toc('1 proc : {} s') + + # emd loss multipro proc + ot.tic() + emdn = ot.emd2(a, b, M) + ot.toc('multi proc : {} s') + + assert np.allclose(emd1, emdn) -- cgit v1.2.3 From beed8f49ee8d0bf828fc0b63f23254561d7566b0 Mon Sep 17 00:00:00 2001 From: Rémi Flamary Date: Mon, 24 Jul 2017 11:21:00 +0200 Subject: update test --- .travis.yml | 2 +- Makefile | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) (limited to 'Makefile') diff --git a/.travis.yml b/.travis.yml index 1c3a18c..c5ca62b 100644 --- a/.travis.yml +++ b/.travis.yml @@ -21,5 +21,5 @@ install: # command to run tests + check syntax style script: - flake8 examples/ ot/ test/ - - python -m py.test -v + - python -m py.test -v test/ # - py.test ot test diff --git a/Makefile b/Makefile index ff03a63..cabe6a9 100644 --- a/Makefile +++ b/Makefile @@ -38,7 +38,7 @@ pep8 : flake8 examples/ ot/ test/ test : FORCE pep8 - python -m py.test -v + python -m py.test -v test/ uploadpypi : #python setup.py register -- cgit v1.2.3 From 75492827c89a47cbc6807d4859be178d255c49bc Mon Sep 17 00:00:00 2001 From: Rémi Flamary Date: Mon, 24 Jul 2017 12:09:15 +0200 Subject: add test sinkhorn --- Makefile | 3 +++ ot/gpu/bregman.py | 2 +- test/test_ot.py | 46 +++++++++++++++++++++++++++++++++++++++++----- 3 files changed, 45 insertions(+), 6 deletions(-) (limited to 'Makefile') diff --git a/Makefile b/Makefile index cabe6a9..577bbbe 100644 --- a/Makefile +++ b/Makefile @@ -39,6 +39,9 @@ pep8 : test : FORCE pep8 python -m py.test -v test/ + +pytest : FORCE + python -m py.test -v test/ uploadpypi : #python setup.py register diff --git a/ot/gpu/bregman.py b/ot/gpu/bregman.py index 7881c65..2302f80 100644 --- a/ot/gpu/bregman.py +++ b/ot/gpu/bregman.py @@ -9,7 +9,7 @@ import cudamat def sinkhorn(a, b, M_GPU, reg, numItermax=1000, stopThr=1e-9, verbose=False, log=False, returnAsGPU=False): - """ + r""" Solve the entropic regularization optimal transport problem on GPU The function solves the following optimization problem: diff --git a/test/test_ot.py b/test/test_ot.py index 6976818..b69d080 100644 --- a/test/test_ot.py +++ b/test/test_ot.py @@ -18,8 +18,9 @@ def test_doctest(): def test_emd_emd2(): - # test emd + # test emd and emd2 for simple identity n = 100 + np.random.seed(0) x = np.random.randn(n, 2) u = ot.utils.unif(n) @@ -35,14 +36,13 @@ def test_emd_emd2(): # check loss=0 assert np.allclose(w, 0) - - -#@pytest.mark.skip(reason="Seems to be a conflict between pytest and multiprocessing") + def test_emd2_multi(): from ot.datasets import get_1D_gauss as gauss n = 1000 # nb bins + np.random.seed(0) # bin positions x = np.arange(n, dtype=np.float64) @@ -72,4 +72,40 @@ def test_emd2_multi(): emdn = ot.emd2(a, b, M) ot.toc('multi proc : {} s') - assert np.allclose(emd1, emdn) + assert np.allclose(emd1, emdn) + + +def test_sinkhorn(): + # test sinkhorn + n = 100 + np.random.seed(0) + + x = np.random.randn(n, 2) + u = ot.utils.unif(n) + + M = ot.dist(x, x) + + G = ot.sinkhorn(u, u, M,1,stopThr=1e-10) + + # check constratints + assert np.allclose(u, G.sum(1), atol=1e-05) # cf convergence sinkhorn + assert np.allclose(u, G.sum(0), atol=1e-05) # cf convergence sinkhorn + +def test_sinkhorn_variants(): + # test sinkhorn + n = 100 + np.random.seed(0) + + x = np.random.randn(n, 2) + u = ot.utils.unif(n) + + M = ot.dist(x, x) + + G0 = ot.sinkhorn(u, u, M,1, method='sinkhorn',stopThr=1e-10) + Gs = ot.sinkhorn(u, u, M,1, method='sinkhorn_stabilized',stopThr=1e-10) + Ges = ot.sinkhorn(u, u, M,1, method='sinkhorn_epsilon_scaling',stopThr=1e-10) + + # check constratints + assert np.allclose(G0, Gs, atol=1e-05) + assert np.allclose(G0, Ges, atol=1e-05) # + -- cgit v1.2.3 From b2f91f24796a996a82db41e91f56ba6a51989159 Mon Sep 17 00:00:00 2001 From: Rémi Flamary Date: Mon, 24 Jul 2017 14:26:25 +0200 Subject: full coveragre utils --- Makefile | 4 ++-- test/test_gpu.py | 18 ++++++++++++++---- test/test_ot.py | 4 +++- test/test_utils.py | 46 ++++++++++++++++++++++++++++++++++++++++++++++ 4 files changed, 65 insertions(+), 7 deletions(-) (limited to 'Makefile') diff --git a/Makefile b/Makefile index 577bbbe..98f5614 100644 --- a/Makefile +++ b/Makefile @@ -38,10 +38,10 @@ pep8 : flake8 examples/ ot/ test/ test : FORCE pep8 - python -m py.test -v test/ + python -m py.test -v test/ --cov=ot --cov-report html:cov_html pytest : FORCE - python -m py.test -v test/ + python -m py.test -v test/ --cov=ot uploadpypi : #python setup.py register diff --git a/test/test_gpu.py b/test/test_gpu.py index 312a2d4..49b98d0 100644 --- a/test/test_gpu.py +++ b/test/test_gpu.py @@ -3,8 +3,14 @@ import numpy as np import time import pytest +try: # test if cudamat installed + import ot.gpu + nogpu = False +except ImportError: + nogpu = True + -@pytest.mark.skip(reason="No way to test GPU on travis yet") +@pytest.mark.skipif(nogpu, reason="No GPU available") def test_gpu_sinkhorn(): import ot.gpu @@ -12,7 +18,7 @@ def test_gpu_sinkhorn(): print("min:{:.3E}, max::{:.3E}, mean::{:.3E}, std::{:.3E}".format( np.min(r), np.max(r), np.mean(r), np.std(r))) - for n in [5000]: + for n in [50, 100, 500, 1000]: print(n) a = np.random.rand(n // 4, 100) b = np.random.rand(n, 100) @@ -30,14 +36,16 @@ def test_gpu_sinkhorn(): print(" GPU sinkhorn, time: {:6.2f} sec ".format(time3 - time2)) describeRes(G2) + assert np.allclose(G1, G2, rtol=1e-5, atol=1e-5) -@pytest.mark.skip(reason="No way to test GPU on travis yet") + +@pytest.mark.skipif(nogpu, reason="No GPU available") def test_gpu_sinkhorn_lpl1(): def describeRes(r): print("min:{:.3E}, max:{:.3E}, mean:{:.3E}, std:{:.3E}" .format(np.min(r), np.max(r), np.mean(r), np.std(r))) - for n in [5000]: + for n in [50, 100, 500, 1000]: print(n) a = np.random.rand(n // 4, 100) labels_a = np.random.randint(10, size=(n // 4)) @@ -57,3 +65,5 @@ def test_gpu_sinkhorn_lpl1(): print(" GPU sinkhorn lpl1, time: {:6.2f} sec ".format( time3 - time2)) describeRes(G2) + + assert np.allclose(G1, G2, rtol=1e-5, atol=1e-5) diff --git a/test/test_ot.py b/test/test_ot.py index 3fa1bc4..16fd510 100644 --- a/test/test_ot.py +++ b/test/test_ot.py @@ -31,9 +31,11 @@ def test_emd_emd2(): # check G is identity assert np.allclose(G, np.eye(n) / n) + # check constratints + assert np.allclose(u, G.sum(1)) # cf convergence sinkhorn + assert np.allclose(u, G.sum(0)) # cf convergence sinkhorn w = ot.emd2(u, u, M) - # check loss=0 assert np.allclose(w, 0) diff --git a/test/test_utils.py b/test/test_utils.py index 3219fce..e85e5b7 100644 --- a/test/test_utils.py +++ b/test/test_utils.py @@ -71,6 +71,52 @@ def test_dist(): D[i, j] = np.sum(np.square(x[i, :] - x[j, :])) D2 = ot.dist(x, x) + D3 = ot.dist(x) # dist shoul return squared euclidean assert np.allclose(D, D2) + assert np.allclose(D, D3) + + +def test_dist0(): + + n = 100 + M = ot.utils.dist0(n, method='lin_square') + + # dist0 default to linear sampling with quadratic loss + assert np.allclose(M[0, -1], (n - 1) * (n - 1)) + + +def test_dots(): + + n1, n2, n3, n4 = 100, 50, 200, 100 + + A = np.random.randn(n1, n2) + B = np.random.randn(n2, n3) + C = np.random.randn(n3, n4) + + X1 = ot.utils.dots(A, B, C) + + X2 = A.dot(B.dot(C)) + + assert np.allclose(X1, X2) + + +def test_clean_zeros(): + + n = 100 + nz = 50 + nz2 = 20 + u1 = ot.unif(n) + u1[:nz] = 0 + u1 = u1 / u1.sum() + u2 = ot.unif(n) + u2[:nz2] = 0 + u2 = u2 / u2.sum() + + M = ot.utils.dist0(n) + + a, b, M2 = ot.utils.clean_zeros(u1, u2, M) + + assert len(a) == n - nz + assert len(b) == n - nz2 -- cgit v1.2.3