From cd9909cff342bb46c4233a0ead348dabebe9efdf Mon Sep 17 00:00:00 2001 From: arolet Date: Fri, 14 Jul 2017 15:18:57 +0900 Subject: Added a test for single process EMD The multiprocess one does not seem to work on windows --- test/test_emd.py | 35 +++++++++++++++++++++++++++++++++++ 1 file changed, 35 insertions(+) create mode 100644 test/test_emd.py (limited to 'test') diff --git a/test/test_emd.py b/test/test_emd.py new file mode 100644 index 0000000..3729d5d --- /dev/null +++ b/test/test_emd.py @@ -0,0 +1,35 @@ +#!/usr/bin/env python2 +# -*- coding: utf-8 -*- + +import numpy as np +import pylab as pl +import ot + +from ot.datasets import get_1D_gauss as gauss +reload(ot.lp) + +#%% parameters + +n=5000 # nb bins + +# bin positions +x=np.arange(n,dtype=np.float64) + +# Gaussian distributions +a=gauss(n,m=20,s=5) # m= mean, s= std + +b=gauss(n,m=30,s=10) + +# loss matrix +M=ot.dist(x.reshape((n,1)),x.reshape((n,1))) +#M/=M.max() + +#%% + +print('Computing {} EMD '.format(1)) + +# emd loss 1 proc +ot.tic() +emd_loss4 = ot.emd(a,b,M) +ot.toc('1 proc : {} s') + -- cgit v1.2.3 From d59e91450272c78dd0fdd3c6bd9bf48776f10070 Mon Sep 17 00:00:00 2001 From: arolet Date: Fri, 14 Jul 2017 15:37:46 +0900 Subject: Added a test based on closed form solution for gaussians --- test/test_emd.py | 26 ++++++++++++++++++++++---- 1 file changed, 22 insertions(+), 4 deletions(-) (limited to 'test') diff --git a/test/test_emd.py b/test/test_emd.py index 3729d5d..eb1c5c5 100644 --- a/test/test_emd.py +++ b/test/test_emd.py @@ -11,17 +11,25 @@ reload(ot.lp) #%% parameters n=5000 # nb bins +m=6000 # nb bins + +mean1 = 1000 +mean2 = 1100 + +tol = 1e-6 # bin positions x=np.arange(n,dtype=np.float64) +y=np.arange(m,dtype=np.float64) # Gaussian distributions -a=gauss(n,m=20,s=5) # m= mean, s= std +a=gauss(n,m=mean1,s=5) # m= mean, s= std -b=gauss(n,m=30,s=10) +b=gauss(m,m=mean2,s=10) # loss matrix -M=ot.dist(x.reshape((n,1)),x.reshape((n,1))) +M=ot.dist(x.reshape((-1,1)), y.reshape((-1,1))) ** (1./2) +print M[0,:] #M/=M.max() #%% @@ -30,6 +38,16 @@ print('Computing {} EMD '.format(1)) # emd loss 1 proc ot.tic() -emd_loss4 = ot.emd(a,b,M) +G = ot.emd(a,b,M) ot.toc('1 proc : {} s') +cost1 = (G * M).sum() + +ot.tic() +G = ot.emd(b, a, np.ascontiguousarray(M.T)) +ot.toc('1 proc : {} s') + +cost2 = (G * M.T).sum() + +assert np.abs(cost1-cost2) < tol +assert np.abs(cost1-np.abs(mean1-mean2)) < tol -- cgit v1.2.3 From dc3bbd4134f0e2b80e0fe72368bdcf9966f434dc Mon Sep 17 00:00:00 2001 From: arolet Date: Fri, 21 Jul 2017 12:12:21 +0900 Subject: Cleaned optimal plan and optimal cost computation --- ot/lp/EMD_wrapper.cpp | 13 ++++++------- ot/lp/emd_wrap.pyx | 5 ----- test/test_emd.py | 10 ++++++++-- 3 files changed, 14 insertions(+), 14 deletions(-) (limited to 'test') diff --git a/ot/lp/EMD_wrapper.cpp b/ot/lp/EMD_wrapper.cpp index d719c6e..cc13230 100644 --- a/ot/lp/EMD_wrapper.cpp +++ b/ot/lp/EMD_wrapper.cpp @@ -93,14 +93,13 @@ void EMD_wrap(int n1, int n2, double *X, double *Y, } } else { - for (node_id_type i=0; i a.data, b.data, M.data, G.data, &cost, maxiter) - - cost=0 - for i in range(n1): - for j in range(n2): - cost+=G[i,j]*M[i,j] return cost diff --git a/test/test_emd.py b/test/test_emd.py index eb1c5c5..4757cd1 100644 --- a/test/test_emd.py +++ b/test/test_emd.py @@ -43,11 +43,17 @@ ot.toc('1 proc : {} s') cost1 = (G * M).sum() +# emd loss 1 proc +ot.tic() +cost_emd2 = ot.emd2(a,b,M) +ot.toc('1 proc : {} s') + ot.tic() G = ot.emd(b, a, np.ascontiguousarray(M.T)) ot.toc('1 proc : {} s') cost2 = (G * M.T).sum() -assert np.abs(cost1-cost2) < tol -assert np.abs(cost1-np.abs(mean1-mean2)) < tol +assert np.abs(cost1-cost_emd2)/np.abs(cost1) < tol +assert np.abs(cost1-cost2)/np.abs(cost1) < tol +assert np.abs(cost1-np.abs(mean1-mean2))/np.abs(cost1) < tol -- cgit v1.2.3 From c1980a414c879dd1bc1d8904fd43426326741385 Mon Sep 17 00:00:00 2001 From: arolet Date: Fri, 21 Jul 2017 13:34:09 +0900 Subject: Added and passed tests for dual variables --- ot/lp/EMD_wrapper.cpp | 2 +- ot/lp/emd_wrap.pyx | 4 ++-- test/test_emd.py | 28 +++++++++++++++++++--------- 3 files changed, 22 insertions(+), 12 deletions(-) (limited to 'test') diff --git a/ot/lp/EMD_wrapper.cpp b/ot/lp/EMD_wrapper.cpp index c6cbb04..0977e75 100644 --- a/ot/lp/EMD_wrapper.cpp +++ b/ot/lp/EMD_wrapper.cpp @@ -101,7 +101,7 @@ void EMD_wrap(int n1, int n2, double *X, double *Y, double *D, double *G, double flow = net.flow(a); *cost += flow * (*(D+indI[i]*n2+indJ[j-n])); *(G+indI[i]*n2+indJ[j-n]) = flow; - *(alpha + indI[i]) = net.potential(i); + *(alpha + indI[i]) = -net.potential(i); *(beta + indJ[j-n]) = net.potential(j); } diff --git a/ot/lp/emd_wrap.pyx b/ot/lp/emd_wrap.pyx index 813596f..435a270 100644 --- a/ot/lp/emd_wrap.pyx +++ b/ot/lp/emd_wrap.pyx @@ -57,7 +57,7 @@ def emd_c( np.ndarray[double, ndim=1, mode="c"] a,np.ndarray[double, ndim=1, mod cdef int n1= M.shape[0] cdef int n2= M.shape[1] - cdef float cost=0 + cdef double cost=0 cdef np.ndarray[double, ndim=2, mode="c"] G=np.zeros([n1, n2]) cdef np.ndarray[double, ndim=1, mode="c"] alpha=np.zeros(n1) cdef np.ndarray[double, ndim=1, mode="c"] beta=np.zeros(n2) @@ -116,7 +116,7 @@ def emd2_c( np.ndarray[double, ndim=1, mode="c"] a,np.ndarray[double, ndim=1, mo cdef int n1= M.shape[0] cdef int n2= M.shape[1] - cdef float cost=0 + cdef double cost=0 cdef np.ndarray[double, ndim=2, mode="c"] G=np.zeros([n1, n2]) cdef np.ndarray[double, ndim = 1, mode = "c"] alpha = np.zeros([n1]) diff --git a/test/test_emd.py b/test/test_emd.py index 4757cd1..3bf6fa2 100644 --- a/test/test_emd.py +++ b/test/test_emd.py @@ -2,7 +2,6 @@ # -*- coding: utf-8 -*- import numpy as np -import pylab as pl import ot from ot.datasets import get_1D_gauss as gauss @@ -16,8 +15,6 @@ m=6000 # nb bins mean1 = 1000 mean2 = 1100 -tol = 1e-6 - # bin positions x=np.arange(n,dtype=np.float64) y=np.arange(m,dtype=np.float64) @@ -38,10 +35,11 @@ print('Computing {} EMD '.format(1)) # emd loss 1 proc ot.tic() -G = ot.emd(a,b,M) +G, alpha, beta = ot.emd(a,b,M, dual_variables=True) ot.toc('1 proc : {} s') cost1 = (G * M).sum() +cost_dual = np.vdot(a, alpha) + np.vdot(b, beta) # emd loss 1 proc ot.tic() @@ -49,11 +47,23 @@ cost_emd2 = ot.emd2(a,b,M) ot.toc('1 proc : {} s') ot.tic() -G = ot.emd(b, a, np.ascontiguousarray(M.T)) +G2 = ot.emd(b, a, np.ascontiguousarray(M.T)) ot.toc('1 proc : {} s') -cost2 = (G * M.T).sum() +cost2 = (G2 * M.T).sum() + +M_reduced = M - alpha.reshape(-1,1) - beta.reshape(1, -1) + +# Check that both cost computations are equivalent +np.testing.assert_almost_equal(cost1, cost_emd2) +# Check that dual and primal cost are equal +np.testing.assert_almost_equal(cost1, cost_dual) +# Check symmetry +np.testing.assert_almost_equal(cost1, cost2) +# Check with closed-form solution for gaussians +np.testing.assert_almost_equal(cost1, np.abs(mean1-mean2)) + +[ind1, ind2] = np.nonzero(G) -assert np.abs(cost1-cost_emd2)/np.abs(cost1) < tol -assert np.abs(cost1-cost2)/np.abs(cost1) < tol -assert np.abs(cost1-np.abs(mean1-mean2))/np.abs(cost1) < tol +# Check that reduced cost is zero on transport arcs +np.testing.assert_array_almost_equal((M - alpha.reshape(-1, 1) - beta.reshape(1, -1))[ind1, ind2], np.zeros(ind1.size)) \ No newline at end of file -- cgit v1.2.3 From b12edc59c0a94e1f426ae314baa006e06c062923 Mon Sep 17 00:00:00 2001 From: Slasnista Date: Tue, 5 Sep 2017 09:42:32 +0200 Subject: integrated test for semi supervised case --- test/test_da.py | 96 +++++++++++++++++++++++++++++++++++---------------------- 1 file changed, 60 insertions(+), 36 deletions(-) (limited to 'test') diff --git a/test/test_da.py b/test/test_da.py index 104a798..a757d0a 100644 --- a/test/test_da.py +++ b/test/test_da.py @@ -63,19 +63,25 @@ def test_sinkhorn_lpl1_transport_class(): transp_Xs = clf.fit_transform(Xs=Xs, ys=ys, Xt=Xt) assert_equal(transp_Xs.shape, Xs.shape) - # test semi supervised mode - clf = ot.da.SinkhornLpl1Transport() - clf.fit(Xs=Xs, ys=ys, Xt=Xt) - n_unsup = np.sum(clf.cost_) + # test unsupervised vs semi-supervised mode + clf_unsup = ot.da.SinkhornTransport() + clf_unsup.fit(Xs=Xs, Xt=Xt) + n_unsup = np.sum(clf_unsup.cost_) - # test semi supervised mode - clf = ot.da.SinkhornLpl1Transport() - clf.fit(Xs=Xs, ys=ys, Xt=Xt, yt=yt) - assert_equal(clf.cost_.shape, ((Xs.shape[0], Xt.shape[0]))) - n_semisup = np.sum(clf.cost_) + clf_semi = ot.da.SinkhornTransport() + clf_semi.fit(Xs=Xs, ys=ys, Xt=Xt, yt=yt) + assert_equal(clf_semi.cost_.shape, ((Xs.shape[0], Xt.shape[0]))) + n_semisup = np.sum(clf_semi.cost_) + # check that the cost matrix norms are indeed different assert n_unsup != n_semisup, "semisupervised mode not working" + # check that the coupling forbids mass transport between labeled source + # and labeled target samples + mass_semi = np.sum( + clf_semi.coupling_[clf_semi.cost_ == clf_semi.limit_max]) + assert mass_semi == 0, "semisupervised mode not working" + def test_sinkhorn_l1l2_transport_class(): """test_sinkhorn_transport @@ -129,19 +135,25 @@ def test_sinkhorn_l1l2_transport_class(): transp_Xs = clf.fit_transform(Xs=Xs, ys=ys, Xt=Xt) assert_equal(transp_Xs.shape, Xs.shape) - # test semi supervised mode - clf = ot.da.SinkhornL1l2Transport() - clf.fit(Xs=Xs, ys=ys, Xt=Xt) - n_unsup = np.sum(clf.cost_) + # test unsupervised vs semi-supervised mode + clf_unsup = ot.da.SinkhornTransport() + clf_unsup.fit(Xs=Xs, Xt=Xt) + n_unsup = np.sum(clf_unsup.cost_) - # test semi supervised mode - clf = ot.da.SinkhornL1l2Transport() - clf.fit(Xs=Xs, ys=ys, Xt=Xt, yt=yt) - assert_equal(clf.cost_.shape, ((Xs.shape[0], Xt.shape[0]))) - n_semisup = np.sum(clf.cost_) + clf_semi = ot.da.SinkhornTransport() + clf_semi.fit(Xs=Xs, ys=ys, Xt=Xt, yt=yt) + assert_equal(clf_semi.cost_.shape, ((Xs.shape[0], Xt.shape[0]))) + n_semisup = np.sum(clf_semi.cost_) + # check that the cost matrix norms are indeed different assert n_unsup != n_semisup, "semisupervised mode not working" + # check that the coupling forbids mass transport between labeled source + # and labeled target samples + mass_semi = np.sum( + clf_semi.coupling_[clf_semi.cost_ == clf_semi.limit_max]) + assert mass_semi == 0, "semisupervised mode not working" + # check everything runs well with log=True clf = ot.da.SinkhornL1l2Transport(log=True) clf.fit(Xs=Xs, ys=ys, Xt=Xt) @@ -200,19 +212,25 @@ def test_sinkhorn_transport_class(): transp_Xs = clf.fit_transform(Xs=Xs, Xt=Xt) assert_equal(transp_Xs.shape, Xs.shape) - # test semi supervised mode - clf = ot.da.SinkhornTransport() - clf.fit(Xs=Xs, Xt=Xt) - n_unsup = np.sum(clf.cost_) + # test unsupervised vs semi-supervised mode + clf_unsup = ot.da.SinkhornTransport() + clf_unsup.fit(Xs=Xs, Xt=Xt) + n_unsup = np.sum(clf_unsup.cost_) - # test semi supervised mode - clf = ot.da.SinkhornTransport() - clf.fit(Xs=Xs, ys=ys, Xt=Xt, yt=yt) - assert_equal(clf.cost_.shape, ((Xs.shape[0], Xt.shape[0]))) - n_semisup = np.sum(clf.cost_) + clf_semi = ot.da.SinkhornTransport() + clf_semi.fit(Xs=Xs, ys=ys, Xt=Xt, yt=yt) + assert_equal(clf_semi.cost_.shape, ((Xs.shape[0], Xt.shape[0]))) + n_semisup = np.sum(clf_semi.cost_) + # check that the cost matrix norms are indeed different assert n_unsup != n_semisup, "semisupervised mode not working" + # check that the coupling forbids mass transport between labeled source + # and labeled target samples + mass_semi = np.sum( + clf_semi.coupling_[clf_semi.cost_ == clf_semi.limit_max]) + assert mass_semi == 0, "semisupervised mode not working" + # check everything runs well with log=True clf = ot.da.SinkhornTransport(log=True) clf.fit(Xs=Xs, ys=ys, Xt=Xt) @@ -270,19 +288,25 @@ def test_emd_transport_class(): transp_Xs = clf.fit_transform(Xs=Xs, Xt=Xt) assert_equal(transp_Xs.shape, Xs.shape) - # test semi supervised mode - clf = ot.da.EMDTransport() - clf.fit(Xs=Xs, Xt=Xt) - n_unsup = np.sum(clf.cost_) + # test unsupervised vs semi-supervised mode + clf_unsup = ot.da.SinkhornTransport() + clf_unsup.fit(Xs=Xs, Xt=Xt) + n_unsup = np.sum(clf_unsup.cost_) - # test semi supervised mode - clf = ot.da.EMDTransport() - clf.fit(Xs=Xs, ys=ys, Xt=Xt, yt=yt) - assert_equal(clf.cost_.shape, ((Xs.shape[0], Xt.shape[0]))) - n_semisup = np.sum(clf.cost_) + clf_semi = ot.da.SinkhornTransport() + clf_semi.fit(Xs=Xs, ys=ys, Xt=Xt, yt=yt) + assert_equal(clf_semi.cost_.shape, ((Xs.shape[0], Xt.shape[0]))) + n_semisup = np.sum(clf_semi.cost_) + # check that the cost matrix norms are indeed different assert n_unsup != n_semisup, "semisupervised mode not working" + # check that the coupling forbids mass transport between labeled source + # and labeled target samples + mass_semi = np.sum( + clf_semi.coupling_[clf_semi.cost_ == clf_semi.limit_max]) + assert mass_semi == 0, "semisupervised mode not working" + def test_mapping_transport_class(): """test_mapping_transport -- cgit v1.2.3 From 8e4a7930cf1ff80edeb30021acaf7337a02d18a5 Mon Sep 17 00:00:00 2001 From: Slasnista Date: Tue, 5 Sep 2017 09:47:24 +0200 Subject: change name of otda object in test script: clf => otda --- test/test_da.py | 260 ++++++++++++++++++++++++++++---------------------------- 1 file changed, 130 insertions(+), 130 deletions(-) (limited to 'test') diff --git a/test/test_da.py b/test/test_da.py index a757d0a..9fc42a3 100644 --- a/test/test_da.py +++ b/test/test_da.py @@ -22,56 +22,56 @@ def test_sinkhorn_lpl1_transport_class(): Xs, ys = get_data_classif('3gauss', ns) Xt, yt = get_data_classif('3gauss2', nt) - clf = ot.da.SinkhornLpl1Transport() + otda = ot.da.SinkhornLpl1Transport() # test its computed - clf.fit(Xs=Xs, ys=ys, Xt=Xt) - assert hasattr(clf, "cost_") - assert hasattr(clf, "coupling_") + otda.fit(Xs=Xs, ys=ys, Xt=Xt) + assert hasattr(otda, "cost_") + assert hasattr(otda, "coupling_") # test dimensions of coupling - assert_equal(clf.cost_.shape, ((Xs.shape[0], Xt.shape[0]))) - assert_equal(clf.coupling_.shape, ((Xs.shape[0], Xt.shape[0]))) + assert_equal(otda.cost_.shape, ((Xs.shape[0], Xt.shape[0]))) + assert_equal(otda.coupling_.shape, ((Xs.shape[0], Xt.shape[0]))) # test margin constraints mu_s = unif(ns) mu_t = unif(nt) - assert_allclose(np.sum(clf.coupling_, axis=0), mu_t, rtol=1e-3, atol=1e-3) - assert_allclose(np.sum(clf.coupling_, axis=1), mu_s, rtol=1e-3, atol=1e-3) + assert_allclose(np.sum(otda.coupling_, axis=0), mu_t, rtol=1e-3, atol=1e-3) + assert_allclose(np.sum(otda.coupling_, axis=1), mu_s, rtol=1e-3, atol=1e-3) # test transform - transp_Xs = clf.transform(Xs=Xs) + transp_Xs = otda.transform(Xs=Xs) assert_equal(transp_Xs.shape, Xs.shape) Xs_new, _ = get_data_classif('3gauss', ns + 1) - transp_Xs_new = clf.transform(Xs_new) + transp_Xs_new = otda.transform(Xs_new) # check that the oos method is working assert_equal(transp_Xs_new.shape, Xs_new.shape) # test inverse transform - transp_Xt = clf.inverse_transform(Xt=Xt) + transp_Xt = otda.inverse_transform(Xt=Xt) assert_equal(transp_Xt.shape, Xt.shape) Xt_new, _ = get_data_classif('3gauss2', nt + 1) - transp_Xt_new = clf.inverse_transform(Xt=Xt_new) + transp_Xt_new = otda.inverse_transform(Xt=Xt_new) # check that the oos method is working assert_equal(transp_Xt_new.shape, Xt_new.shape) # test fit_transform - transp_Xs = clf.fit_transform(Xs=Xs, ys=ys, Xt=Xt) + transp_Xs = otda.fit_transform(Xs=Xs, ys=ys, Xt=Xt) assert_equal(transp_Xs.shape, Xs.shape) # test unsupervised vs semi-supervised mode - clf_unsup = ot.da.SinkhornTransport() - clf_unsup.fit(Xs=Xs, Xt=Xt) - n_unsup = np.sum(clf_unsup.cost_) + otda_unsup = ot.da.SinkhornTransport() + otda_unsup.fit(Xs=Xs, Xt=Xt) + n_unsup = np.sum(otda_unsup.cost_) - clf_semi = ot.da.SinkhornTransport() - clf_semi.fit(Xs=Xs, ys=ys, Xt=Xt, yt=yt) - assert_equal(clf_semi.cost_.shape, ((Xs.shape[0], Xt.shape[0]))) - n_semisup = np.sum(clf_semi.cost_) + otda_semi = ot.da.SinkhornTransport() + otda_semi.fit(Xs=Xs, ys=ys, Xt=Xt, yt=yt) + assert_equal(otda_semi.cost_.shape, ((Xs.shape[0], Xt.shape[0]))) + n_semisup = np.sum(otda_semi.cost_) # check that the cost matrix norms are indeed different assert n_unsup != n_semisup, "semisupervised mode not working" @@ -79,7 +79,7 @@ def test_sinkhorn_lpl1_transport_class(): # check that the coupling forbids mass transport between labeled source # and labeled target samples mass_semi = np.sum( - clf_semi.coupling_[clf_semi.cost_ == clf_semi.limit_max]) + otda_semi.coupling_[otda_semi.cost_ == otda_semi.limit_max]) assert mass_semi == 0, "semisupervised mode not working" @@ -93,57 +93,57 @@ def test_sinkhorn_l1l2_transport_class(): Xs, ys = get_data_classif('3gauss', ns) Xt, yt = get_data_classif('3gauss2', nt) - clf = ot.da.SinkhornL1l2Transport() + otda = ot.da.SinkhornL1l2Transport() # test its computed - clf.fit(Xs=Xs, ys=ys, Xt=Xt) - assert hasattr(clf, "cost_") - assert hasattr(clf, "coupling_") - assert hasattr(clf, "log_") + otda.fit(Xs=Xs, ys=ys, Xt=Xt) + assert hasattr(otda, "cost_") + assert hasattr(otda, "coupling_") + assert hasattr(otda, "log_") # test dimensions of coupling - assert_equal(clf.cost_.shape, ((Xs.shape[0], Xt.shape[0]))) - assert_equal(clf.coupling_.shape, ((Xs.shape[0], Xt.shape[0]))) + assert_equal(otda.cost_.shape, ((Xs.shape[0], Xt.shape[0]))) + assert_equal(otda.coupling_.shape, ((Xs.shape[0], Xt.shape[0]))) # test margin constraints mu_s = unif(ns) mu_t = unif(nt) - assert_allclose(np.sum(clf.coupling_, axis=0), mu_t, rtol=1e-3, atol=1e-3) - assert_allclose(np.sum(clf.coupling_, axis=1), mu_s, rtol=1e-3, atol=1e-3) + assert_allclose(np.sum(otda.coupling_, axis=0), mu_t, rtol=1e-3, atol=1e-3) + assert_allclose(np.sum(otda.coupling_, axis=1), mu_s, rtol=1e-3, atol=1e-3) # test transform - transp_Xs = clf.transform(Xs=Xs) + transp_Xs = otda.transform(Xs=Xs) assert_equal(transp_Xs.shape, Xs.shape) Xs_new, _ = get_data_classif('3gauss', ns + 1) - transp_Xs_new = clf.transform(Xs_new) + transp_Xs_new = otda.transform(Xs_new) # check that the oos method is working assert_equal(transp_Xs_new.shape, Xs_new.shape) # test inverse transform - transp_Xt = clf.inverse_transform(Xt=Xt) + transp_Xt = otda.inverse_transform(Xt=Xt) assert_equal(transp_Xt.shape, Xt.shape) Xt_new, _ = get_data_classif('3gauss2', nt + 1) - transp_Xt_new = clf.inverse_transform(Xt=Xt_new) + transp_Xt_new = otda.inverse_transform(Xt=Xt_new) # check that the oos method is working assert_equal(transp_Xt_new.shape, Xt_new.shape) # test fit_transform - transp_Xs = clf.fit_transform(Xs=Xs, ys=ys, Xt=Xt) + transp_Xs = otda.fit_transform(Xs=Xs, ys=ys, Xt=Xt) assert_equal(transp_Xs.shape, Xs.shape) # test unsupervised vs semi-supervised mode - clf_unsup = ot.da.SinkhornTransport() - clf_unsup.fit(Xs=Xs, Xt=Xt) - n_unsup = np.sum(clf_unsup.cost_) + otda_unsup = ot.da.SinkhornTransport() + otda_unsup.fit(Xs=Xs, Xt=Xt) + n_unsup = np.sum(otda_unsup.cost_) - clf_semi = ot.da.SinkhornTransport() - clf_semi.fit(Xs=Xs, ys=ys, Xt=Xt, yt=yt) - assert_equal(clf_semi.cost_.shape, ((Xs.shape[0], Xt.shape[0]))) - n_semisup = np.sum(clf_semi.cost_) + otda_semi = ot.da.SinkhornTransport() + otda_semi.fit(Xs=Xs, ys=ys, Xt=Xt, yt=yt) + assert_equal(otda_semi.cost_.shape, ((Xs.shape[0], Xt.shape[0]))) + n_semisup = np.sum(otda_semi.cost_) # check that the cost matrix norms are indeed different assert n_unsup != n_semisup, "semisupervised mode not working" @@ -151,13 +151,13 @@ def test_sinkhorn_l1l2_transport_class(): # check that the coupling forbids mass transport between labeled source # and labeled target samples mass_semi = np.sum( - clf_semi.coupling_[clf_semi.cost_ == clf_semi.limit_max]) + otda_semi.coupling_[otda_semi.cost_ == otda_semi.limit_max]) assert mass_semi == 0, "semisupervised mode not working" # check everything runs well with log=True - clf = ot.da.SinkhornL1l2Transport(log=True) - clf.fit(Xs=Xs, ys=ys, Xt=Xt) - assert len(clf.log_.keys()) != 0 + otda = ot.da.SinkhornL1l2Transport(log=True) + otda.fit(Xs=Xs, ys=ys, Xt=Xt) + assert len(otda.log_.keys()) != 0 def test_sinkhorn_transport_class(): @@ -170,57 +170,57 @@ def test_sinkhorn_transport_class(): Xs, ys = get_data_classif('3gauss', ns) Xt, yt = get_data_classif('3gauss2', nt) - clf = ot.da.SinkhornTransport() + otda = ot.da.SinkhornTransport() # test its computed - clf.fit(Xs=Xs, Xt=Xt) - assert hasattr(clf, "cost_") - assert hasattr(clf, "coupling_") - assert hasattr(clf, "log_") + otda.fit(Xs=Xs, Xt=Xt) + assert hasattr(otda, "cost_") + assert hasattr(otda, "coupling_") + assert hasattr(otda, "log_") # test dimensions of coupling - assert_equal(clf.cost_.shape, ((Xs.shape[0], Xt.shape[0]))) - assert_equal(clf.coupling_.shape, ((Xs.shape[0], Xt.shape[0]))) + assert_equal(otda.cost_.shape, ((Xs.shape[0], Xt.shape[0]))) + assert_equal(otda.coupling_.shape, ((Xs.shape[0], Xt.shape[0]))) # test margin constraints mu_s = unif(ns) mu_t = unif(nt) - assert_allclose(np.sum(clf.coupling_, axis=0), mu_t, rtol=1e-3, atol=1e-3) - assert_allclose(np.sum(clf.coupling_, axis=1), mu_s, rtol=1e-3, atol=1e-3) + assert_allclose(np.sum(otda.coupling_, axis=0), mu_t, rtol=1e-3, atol=1e-3) + assert_allclose(np.sum(otda.coupling_, axis=1), mu_s, rtol=1e-3, atol=1e-3) # test transform - transp_Xs = clf.transform(Xs=Xs) + transp_Xs = otda.transform(Xs=Xs) assert_equal(transp_Xs.shape, Xs.shape) Xs_new, _ = get_data_classif('3gauss', ns + 1) - transp_Xs_new = clf.transform(Xs_new) + transp_Xs_new = otda.transform(Xs_new) # check that the oos method is working assert_equal(transp_Xs_new.shape, Xs_new.shape) # test inverse transform - transp_Xt = clf.inverse_transform(Xt=Xt) + transp_Xt = otda.inverse_transform(Xt=Xt) assert_equal(transp_Xt.shape, Xt.shape) Xt_new, _ = get_data_classif('3gauss2', nt + 1) - transp_Xt_new = clf.inverse_transform(Xt=Xt_new) + transp_Xt_new = otda.inverse_transform(Xt=Xt_new) # check that the oos method is working assert_equal(transp_Xt_new.shape, Xt_new.shape) # test fit_transform - transp_Xs = clf.fit_transform(Xs=Xs, Xt=Xt) + transp_Xs = otda.fit_transform(Xs=Xs, Xt=Xt) assert_equal(transp_Xs.shape, Xs.shape) # test unsupervised vs semi-supervised mode - clf_unsup = ot.da.SinkhornTransport() - clf_unsup.fit(Xs=Xs, Xt=Xt) - n_unsup = np.sum(clf_unsup.cost_) + otda_unsup = ot.da.SinkhornTransport() + otda_unsup.fit(Xs=Xs, Xt=Xt) + n_unsup = np.sum(otda_unsup.cost_) - clf_semi = ot.da.SinkhornTransport() - clf_semi.fit(Xs=Xs, ys=ys, Xt=Xt, yt=yt) - assert_equal(clf_semi.cost_.shape, ((Xs.shape[0], Xt.shape[0]))) - n_semisup = np.sum(clf_semi.cost_) + otda_semi = ot.da.SinkhornTransport() + otda_semi.fit(Xs=Xs, ys=ys, Xt=Xt, yt=yt) + assert_equal(otda_semi.cost_.shape, ((Xs.shape[0], Xt.shape[0]))) + n_semisup = np.sum(otda_semi.cost_) # check that the cost matrix norms are indeed different assert n_unsup != n_semisup, "semisupervised mode not working" @@ -228,13 +228,13 @@ def test_sinkhorn_transport_class(): # check that the coupling forbids mass transport between labeled source # and labeled target samples mass_semi = np.sum( - clf_semi.coupling_[clf_semi.cost_ == clf_semi.limit_max]) + otda_semi.coupling_[otda_semi.cost_ == otda_semi.limit_max]) assert mass_semi == 0, "semisupervised mode not working" # check everything runs well with log=True - clf = ot.da.SinkhornTransport(log=True) - clf.fit(Xs=Xs, ys=ys, Xt=Xt) - assert len(clf.log_.keys()) != 0 + otda = ot.da.SinkhornTransport(log=True) + otda.fit(Xs=Xs, ys=ys, Xt=Xt) + assert len(otda.log_.keys()) != 0 def test_emd_transport_class(): @@ -247,56 +247,56 @@ def test_emd_transport_class(): Xs, ys = get_data_classif('3gauss', ns) Xt, yt = get_data_classif('3gauss2', nt) - clf = ot.da.EMDTransport() + otda = ot.da.EMDTransport() # test its computed - clf.fit(Xs=Xs, Xt=Xt) - assert hasattr(clf, "cost_") - assert hasattr(clf, "coupling_") + otda.fit(Xs=Xs, Xt=Xt) + assert hasattr(otda, "cost_") + assert hasattr(otda, "coupling_") # test dimensions of coupling - assert_equal(clf.cost_.shape, ((Xs.shape[0], Xt.shape[0]))) - assert_equal(clf.coupling_.shape, ((Xs.shape[0], Xt.shape[0]))) + assert_equal(otda.cost_.shape, ((Xs.shape[0], Xt.shape[0]))) + assert_equal(otda.coupling_.shape, ((Xs.shape[0], Xt.shape[0]))) # test margin constraints mu_s = unif(ns) mu_t = unif(nt) - assert_allclose(np.sum(clf.coupling_, axis=0), mu_t, rtol=1e-3, atol=1e-3) - assert_allclose(np.sum(clf.coupling_, axis=1), mu_s, rtol=1e-3, atol=1e-3) + assert_allclose(np.sum(otda.coupling_, axis=0), mu_t, rtol=1e-3, atol=1e-3) + assert_allclose(np.sum(otda.coupling_, axis=1), mu_s, rtol=1e-3, atol=1e-3) # test transform - transp_Xs = clf.transform(Xs=Xs) + transp_Xs = otda.transform(Xs=Xs) assert_equal(transp_Xs.shape, Xs.shape) Xs_new, _ = get_data_classif('3gauss', ns + 1) - transp_Xs_new = clf.transform(Xs_new) + transp_Xs_new = otda.transform(Xs_new) # check that the oos method is working assert_equal(transp_Xs_new.shape, Xs_new.shape) # test inverse transform - transp_Xt = clf.inverse_transform(Xt=Xt) + transp_Xt = otda.inverse_transform(Xt=Xt) assert_equal(transp_Xt.shape, Xt.shape) Xt_new, _ = get_data_classif('3gauss2', nt + 1) - transp_Xt_new = clf.inverse_transform(Xt=Xt_new) + transp_Xt_new = otda.inverse_transform(Xt=Xt_new) # check that the oos method is working assert_equal(transp_Xt_new.shape, Xt_new.shape) # test fit_transform - transp_Xs = clf.fit_transform(Xs=Xs, Xt=Xt) + transp_Xs = otda.fit_transform(Xs=Xs, Xt=Xt) assert_equal(transp_Xs.shape, Xs.shape) # test unsupervised vs semi-supervised mode - clf_unsup = ot.da.SinkhornTransport() - clf_unsup.fit(Xs=Xs, Xt=Xt) - n_unsup = np.sum(clf_unsup.cost_) + otda_unsup = ot.da.SinkhornTransport() + otda_unsup.fit(Xs=Xs, Xt=Xt) + n_unsup = np.sum(otda_unsup.cost_) - clf_semi = ot.da.SinkhornTransport() - clf_semi.fit(Xs=Xs, ys=ys, Xt=Xt, yt=yt) - assert_equal(clf_semi.cost_.shape, ((Xs.shape[0], Xt.shape[0]))) - n_semisup = np.sum(clf_semi.cost_) + otda_semi = ot.da.SinkhornTransport() + otda_semi.fit(Xs=Xs, ys=ys, Xt=Xt, yt=yt) + assert_equal(otda_semi.cost_.shape, ((Xs.shape[0], Xt.shape[0]))) + n_semisup = np.sum(otda_semi.cost_) # check that the cost matrix norms are indeed different assert n_unsup != n_semisup, "semisupervised mode not working" @@ -304,7 +304,7 @@ def test_emd_transport_class(): # check that the coupling forbids mass transport between labeled source # and labeled target samples mass_semi = np.sum( - clf_semi.coupling_[clf_semi.cost_ == clf_semi.limit_max]) + otda_semi.coupling_[otda_semi.cost_ == otda_semi.limit_max]) assert mass_semi == 0, "semisupervised mode not working" @@ -324,47 +324,47 @@ def test_mapping_transport_class(): ########################################################################## # check computation and dimensions if bias == False - clf = ot.da.MappingTransport(kernel="linear", bias=False) - clf.fit(Xs=Xs, Xt=Xt) - assert hasattr(clf, "coupling_") - assert hasattr(clf, "mapping_") - assert hasattr(clf, "log_") + otda = ot.da.MappingTransport(kernel="linear", bias=False) + otda.fit(Xs=Xs, Xt=Xt) + assert hasattr(otda, "coupling_") + assert hasattr(otda, "mapping_") + assert hasattr(otda, "log_") - assert_equal(clf.coupling_.shape, ((Xs.shape[0], Xt.shape[0]))) - assert_equal(clf.mapping_.shape, ((Xs.shape[1], Xt.shape[1]))) + assert_equal(otda.coupling_.shape, ((Xs.shape[0], Xt.shape[0]))) + assert_equal(otda.mapping_.shape, ((Xs.shape[1], Xt.shape[1]))) # test margin constraints mu_s = unif(ns) mu_t = unif(nt) - assert_allclose(np.sum(clf.coupling_, axis=0), mu_t, rtol=1e-3, atol=1e-3) - assert_allclose(np.sum(clf.coupling_, axis=1), mu_s, rtol=1e-3, atol=1e-3) + assert_allclose(np.sum(otda.coupling_, axis=0), mu_t, rtol=1e-3, atol=1e-3) + assert_allclose(np.sum(otda.coupling_, axis=1), mu_s, rtol=1e-3, atol=1e-3) # test transform - transp_Xs = clf.transform(Xs=Xs) + transp_Xs = otda.transform(Xs=Xs) assert_equal(transp_Xs.shape, Xs.shape) - transp_Xs_new = clf.transform(Xs_new) + transp_Xs_new = otda.transform(Xs_new) # check that the oos method is working assert_equal(transp_Xs_new.shape, Xs_new.shape) # check computation and dimensions if bias == True - clf = ot.da.MappingTransport(kernel="linear", bias=True) - clf.fit(Xs=Xs, Xt=Xt) - assert_equal(clf.coupling_.shape, ((Xs.shape[0], Xt.shape[0]))) - assert_equal(clf.mapping_.shape, ((Xs.shape[1] + 1, Xt.shape[1]))) + otda = ot.da.MappingTransport(kernel="linear", bias=True) + otda.fit(Xs=Xs, Xt=Xt) + assert_equal(otda.coupling_.shape, ((Xs.shape[0], Xt.shape[0]))) + assert_equal(otda.mapping_.shape, ((Xs.shape[1] + 1, Xt.shape[1]))) # test margin constraints mu_s = unif(ns) mu_t = unif(nt) - assert_allclose(np.sum(clf.coupling_, axis=0), mu_t, rtol=1e-3, atol=1e-3) - assert_allclose(np.sum(clf.coupling_, axis=1), mu_s, rtol=1e-3, atol=1e-3) + assert_allclose(np.sum(otda.coupling_, axis=0), mu_t, rtol=1e-3, atol=1e-3) + assert_allclose(np.sum(otda.coupling_, axis=1), mu_s, rtol=1e-3, atol=1e-3) # test transform - transp_Xs = clf.transform(Xs=Xs) + transp_Xs = otda.transform(Xs=Xs) assert_equal(transp_Xs.shape, Xs.shape) - transp_Xs_new = clf.transform(Xs_new) + transp_Xs_new = otda.transform(Xs_new) # check that the oos method is working assert_equal(transp_Xs_new.shape, Xs_new.shape) @@ -374,52 +374,52 @@ def test_mapping_transport_class(): ########################################################################## # check computation and dimensions if bias == False - clf = ot.da.MappingTransport(kernel="gaussian", bias=False) - clf.fit(Xs=Xs, Xt=Xt) + otda = ot.da.MappingTransport(kernel="gaussian", bias=False) + otda.fit(Xs=Xs, Xt=Xt) - assert_equal(clf.coupling_.shape, ((Xs.shape[0], Xt.shape[0]))) - assert_equal(clf.mapping_.shape, ((Xs.shape[0], Xt.shape[1]))) + assert_equal(otda.coupling_.shape, ((Xs.shape[0], Xt.shape[0]))) + assert_equal(otda.mapping_.shape, ((Xs.shape[0], Xt.shape[1]))) # test margin constraints mu_s = unif(ns) mu_t = unif(nt) - assert_allclose(np.sum(clf.coupling_, axis=0), mu_t, rtol=1e-3, atol=1e-3) - assert_allclose(np.sum(clf.coupling_, axis=1), mu_s, rtol=1e-3, atol=1e-3) + assert_allclose(np.sum(otda.coupling_, axis=0), mu_t, rtol=1e-3, atol=1e-3) + assert_allclose(np.sum(otda.coupling_, axis=1), mu_s, rtol=1e-3, atol=1e-3) # test transform - transp_Xs = clf.transform(Xs=Xs) + transp_Xs = otda.transform(Xs=Xs) assert_equal(transp_Xs.shape, Xs.shape) - transp_Xs_new = clf.transform(Xs_new) + transp_Xs_new = otda.transform(Xs_new) # check that the oos method is working assert_equal(transp_Xs_new.shape, Xs_new.shape) # check computation and dimensions if bias == True - clf = ot.da.MappingTransport(kernel="gaussian", bias=True) - clf.fit(Xs=Xs, Xt=Xt) - assert_equal(clf.coupling_.shape, ((Xs.shape[0], Xt.shape[0]))) - assert_equal(clf.mapping_.shape, ((Xs.shape[0] + 1, Xt.shape[1]))) + otda = ot.da.MappingTransport(kernel="gaussian", bias=True) + otda.fit(Xs=Xs, Xt=Xt) + assert_equal(otda.coupling_.shape, ((Xs.shape[0], Xt.shape[0]))) + assert_equal(otda.mapping_.shape, ((Xs.shape[0] + 1, Xt.shape[1]))) # test margin constraints mu_s = unif(ns) mu_t = unif(nt) - assert_allclose(np.sum(clf.coupling_, axis=0), mu_t, rtol=1e-3, atol=1e-3) - assert_allclose(np.sum(clf.coupling_, axis=1), mu_s, rtol=1e-3, atol=1e-3) + assert_allclose(np.sum(otda.coupling_, axis=0), mu_t, rtol=1e-3, atol=1e-3) + assert_allclose(np.sum(otda.coupling_, axis=1), mu_s, rtol=1e-3, atol=1e-3) # test transform - transp_Xs = clf.transform(Xs=Xs) + transp_Xs = otda.transform(Xs=Xs) assert_equal(transp_Xs.shape, Xs.shape) - transp_Xs_new = clf.transform(Xs_new) + transp_Xs_new = otda.transform(Xs_new) # check that the oos method is working assert_equal(transp_Xs_new.shape, Xs_new.shape) # check everything runs well with log=True - clf = ot.da.MappingTransport(kernel="gaussian", log=True) - clf.fit(Xs=Xs, Xt=Xt) - assert len(clf.log_.keys()) != 0 + otda = ot.da.MappingTransport(kernel="gaussian", log=True) + otda.fit(Xs=Xs, Xt=Xt) + assert len(otda.log_.keys()) != 0 def test_otda(): -- cgit v1.2.3 From d43ce6fdca486fdb0fe049ab3cae4daf8652f5d0 Mon Sep 17 00:00:00 2001 From: Antoine Rolet Date: Tue, 5 Sep 2017 16:58:10 +0900 Subject: Removed print --- test/test_emd.py | 1 - 1 file changed, 1 deletion(-) (limited to 'test') diff --git a/test/test_emd.py b/test/test_emd.py index 3bf6fa2..0025583 100644 --- a/test/test_emd.py +++ b/test/test_emd.py @@ -26,7 +26,6 @@ b=gauss(m,m=mean2,s=10) # loss matrix M=ot.dist(x.reshape((-1,1)), y.reshape((-1,1))) ** (1./2) -print M[0,:] #M/=M.max() #%% -- cgit v1.2.3 From 49c100de34583329058b39d414d2aa49b7fd15bf Mon Sep 17 00:00:00 2001 From: Slasnista Date: Tue, 5 Sep 2017 10:00:01 +0200 Subject: test semi supervised mode ok written for all class | need different tolerance for EMDTransport MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- test/test_da.py | 29 ++++++++++++++++++----------- 1 file changed, 18 insertions(+), 11 deletions(-) (limited to 'test') diff --git a/test/test_da.py b/test/test_da.py index 9fc42a3..3602db9 100644 --- a/test/test_da.py +++ b/test/test_da.py @@ -64,11 +64,11 @@ def test_sinkhorn_lpl1_transport_class(): assert_equal(transp_Xs.shape, Xs.shape) # test unsupervised vs semi-supervised mode - otda_unsup = ot.da.SinkhornTransport() - otda_unsup.fit(Xs=Xs, Xt=Xt) + otda_unsup = ot.da.SinkhornLpl1Transport() + otda_unsup.fit(Xs=Xs, ys=ys, Xt=Xt) n_unsup = np.sum(otda_unsup.cost_) - otda_semi = ot.da.SinkhornTransport() + otda_semi = ot.da.SinkhornLpl1Transport() otda_semi.fit(Xs=Xs, ys=ys, Xt=Xt, yt=yt) assert_equal(otda_semi.cost_.shape, ((Xs.shape[0], Xt.shape[0]))) n_semisup = np.sum(otda_semi.cost_) @@ -136,11 +136,11 @@ def test_sinkhorn_l1l2_transport_class(): assert_equal(transp_Xs.shape, Xs.shape) # test unsupervised vs semi-supervised mode - otda_unsup = ot.da.SinkhornTransport() - otda_unsup.fit(Xs=Xs, Xt=Xt) + otda_unsup = ot.da.SinkhornL1l2Transport() + otda_unsup.fit(Xs=Xs, ys=ys, Xt=Xt) n_unsup = np.sum(otda_unsup.cost_) - otda_semi = ot.da.SinkhornTransport() + otda_semi = ot.da.SinkhornL1l2Transport() otda_semi.fit(Xs=Xs, ys=ys, Xt=Xt, yt=yt) assert_equal(otda_semi.cost_.shape, ((Xs.shape[0], Xt.shape[0]))) n_semisup = np.sum(otda_semi.cost_) @@ -152,7 +152,9 @@ def test_sinkhorn_l1l2_transport_class(): # and labeled target samples mass_semi = np.sum( otda_semi.coupling_[otda_semi.cost_ == otda_semi.limit_max]) - assert mass_semi == 0, "semisupervised mode not working" + mass_semi = otda_semi.coupling_[otda_semi.cost_ == otda_semi.limit_max] + assert_allclose(mass_semi, np.zeros_like(mass_semi), + rtol=1e-9, atol=1e-9) # check everything runs well with log=True otda = ot.da.SinkhornL1l2Transport(log=True) @@ -289,11 +291,11 @@ def test_emd_transport_class(): assert_equal(transp_Xs.shape, Xs.shape) # test unsupervised vs semi-supervised mode - otda_unsup = ot.da.SinkhornTransport() - otda_unsup.fit(Xs=Xs, Xt=Xt) + otda_unsup = ot.da.EMDTransport() + otda_unsup.fit(Xs=Xs, ys=ys, Xt=Xt) n_unsup = np.sum(otda_unsup.cost_) - otda_semi = ot.da.SinkhornTransport() + otda_semi = ot.da.EMDTransport() otda_semi.fit(Xs=Xs, ys=ys, Xt=Xt, yt=yt) assert_equal(otda_semi.cost_.shape, ((Xs.shape[0], Xt.shape[0]))) n_semisup = np.sum(otda_semi.cost_) @@ -305,7 +307,11 @@ def test_emd_transport_class(): # and labeled target samples mass_semi = np.sum( otda_semi.coupling_[otda_semi.cost_ == otda_semi.limit_max]) - assert mass_semi == 0, "semisupervised mode not working" + mass_semi = otda_semi.coupling_[otda_semi.cost_ == otda_semi.limit_max] + + # we need to use a small tolerance here, otherwise the test breaks + assert_allclose(mass_semi, np.zeros_like(mass_semi), + rtol=1e-2, atol=1e-2) def test_mapping_transport_class(): @@ -491,3 +497,4 @@ def test_otda(): # test_sinkhorn_l1l2_transport_class() # test_sinkhorn_lpl1_transport_class() # test_mapping_transport_class() + -- cgit v1.2.3 From 2097116c7db725a88876d617e20a94f32627f7c9 Mon Sep 17 00:00:00 2001 From: Slasnista Date: Tue, 5 Sep 2017 10:09:55 +0200 Subject: solving pb --- test/test_da.py | 61 ++++++++++++++++++++++++++++++++------------------------- 1 file changed, 34 insertions(+), 27 deletions(-) (limited to 'test') diff --git a/test/test_da.py b/test/test_da.py index 3602db9..593dc53 100644 --- a/test/test_da.py +++ b/test/test_da.py @@ -36,8 +36,10 @@ def test_sinkhorn_lpl1_transport_class(): # test margin constraints mu_s = unif(ns) mu_t = unif(nt) - assert_allclose(np.sum(otda.coupling_, axis=0), mu_t, rtol=1e-3, atol=1e-3) - assert_allclose(np.sum(otda.coupling_, axis=1), mu_s, rtol=1e-3, atol=1e-3) + assert_allclose( + np.sum(otda.coupling_, axis=0), mu_t, rtol=1e-3, atol=1e-3) + assert_allclose( + np.sum(otda.coupling_, axis=1), mu_s, rtol=1e-3, atol=1e-3) # test transform transp_Xs = otda.transform(Xs=Xs) @@ -108,8 +110,10 @@ def test_sinkhorn_l1l2_transport_class(): # test margin constraints mu_s = unif(ns) mu_t = unif(nt) - assert_allclose(np.sum(otda.coupling_, axis=0), mu_t, rtol=1e-3, atol=1e-3) - assert_allclose(np.sum(otda.coupling_, axis=1), mu_s, rtol=1e-3, atol=1e-3) + assert_allclose( + np.sum(otda.coupling_, axis=0), mu_t, rtol=1e-3, atol=1e-3) + assert_allclose( + np.sum(otda.coupling_, axis=1), mu_s, rtol=1e-3, atol=1e-3) # test transform transp_Xs = otda.transform(Xs=Xs) @@ -187,8 +191,10 @@ def test_sinkhorn_transport_class(): # test margin constraints mu_s = unif(ns) mu_t = unif(nt) - assert_allclose(np.sum(otda.coupling_, axis=0), mu_t, rtol=1e-3, atol=1e-3) - assert_allclose(np.sum(otda.coupling_, axis=1), mu_s, rtol=1e-3, atol=1e-3) + assert_allclose( + np.sum(otda.coupling_, axis=0), mu_t, rtol=1e-3, atol=1e-3) + assert_allclose( + np.sum(otda.coupling_, axis=1), mu_s, rtol=1e-3, atol=1e-3) # test transform transp_Xs = otda.transform(Xs=Xs) @@ -263,8 +269,10 @@ def test_emd_transport_class(): # test margin constraints mu_s = unif(ns) mu_t = unif(nt) - assert_allclose(np.sum(otda.coupling_, axis=0), mu_t, rtol=1e-3, atol=1e-3) - assert_allclose(np.sum(otda.coupling_, axis=1), mu_s, rtol=1e-3, atol=1e-3) + assert_allclose( + np.sum(otda.coupling_, axis=0), mu_t, rtol=1e-3, atol=1e-3) + assert_allclose( + np.sum(otda.coupling_, axis=1), mu_s, rtol=1e-3, atol=1e-3) # test transform transp_Xs = otda.transform(Xs=Xs) @@ -342,8 +350,10 @@ def test_mapping_transport_class(): # test margin constraints mu_s = unif(ns) mu_t = unif(nt) - assert_allclose(np.sum(otda.coupling_, axis=0), mu_t, rtol=1e-3, atol=1e-3) - assert_allclose(np.sum(otda.coupling_, axis=1), mu_s, rtol=1e-3, atol=1e-3) + assert_allclose( + np.sum(otda.coupling_, axis=0), mu_t, rtol=1e-3, atol=1e-3) + assert_allclose( + np.sum(otda.coupling_, axis=1), mu_s, rtol=1e-3, atol=1e-3) # test transform transp_Xs = otda.transform(Xs=Xs) @@ -363,8 +373,10 @@ def test_mapping_transport_class(): # test margin constraints mu_s = unif(ns) mu_t = unif(nt) - assert_allclose(np.sum(otda.coupling_, axis=0), mu_t, rtol=1e-3, atol=1e-3) - assert_allclose(np.sum(otda.coupling_, axis=1), mu_s, rtol=1e-3, atol=1e-3) + assert_allclose( + np.sum(otda.coupling_, axis=0), mu_t, rtol=1e-3, atol=1e-3) + assert_allclose( + np.sum(otda.coupling_, axis=1), mu_s, rtol=1e-3, atol=1e-3) # test transform transp_Xs = otda.transform(Xs=Xs) @@ -389,8 +401,10 @@ def test_mapping_transport_class(): # test margin constraints mu_s = unif(ns) mu_t = unif(nt) - assert_allclose(np.sum(otda.coupling_, axis=0), mu_t, rtol=1e-3, atol=1e-3) - assert_allclose(np.sum(otda.coupling_, axis=1), mu_s, rtol=1e-3, atol=1e-3) + assert_allclose( + np.sum(otda.coupling_, axis=0), mu_t, rtol=1e-3, atol=1e-3) + assert_allclose( + np.sum(otda.coupling_, axis=1), mu_s, rtol=1e-3, atol=1e-3) # test transform transp_Xs = otda.transform(Xs=Xs) @@ -410,8 +424,10 @@ def test_mapping_transport_class(): # test margin constraints mu_s = unif(ns) mu_t = unif(nt) - assert_allclose(np.sum(otda.coupling_, axis=0), mu_t, rtol=1e-3, atol=1e-3) - assert_allclose(np.sum(otda.coupling_, axis=1), mu_s, rtol=1e-3, atol=1e-3) + assert_allclose( + np.sum(otda.coupling_, axis=0), mu_t, rtol=1e-3, atol=1e-3) + assert_allclose( + np.sum(otda.coupling_, axis=1), mu_s, rtol=1e-3, atol=1e-3) # test transform transp_Xs = otda.transform(Xs=Xs) @@ -454,7 +470,8 @@ def test_otda(): da_entrop.interp() da_entrop.predict(xs) - np.testing.assert_allclose(a, np.sum(da_entrop.G, 1), rtol=1e-3, atol=1e-3) + np.testing.assert_allclose( + a, np.sum(da_entrop.G, 1), rtol=1e-3, atol=1e-3) np.testing.assert_allclose(b, np.sum(da_entrop.G, 0), rtol=1e-3, atol=1e-3) # non-convex Group lasso regularization @@ -488,13 +505,3 @@ def test_otda(): da_emd = ot.da.OTDA_mapping_kernel() # init class da_emd.fit(xs, xt, numItermax=10) # fit distributions da_emd.predict(xs) # interpolation of source samples - - -# if __name__ == "__main__": - -# test_sinkhorn_transport_class() -# test_emd_transport_class() -# test_sinkhorn_l1l2_transport_class() -# test_sinkhorn_lpl1_transport_class() -# test_mapping_transport_class() - -- cgit v1.2.3 From d52b4ea415d9bb669be04ccd0940f9b3d258d0e1 Mon Sep 17 00:00:00 2001 From: Antoine Rolet Date: Tue, 5 Sep 2017 17:15:45 +0900 Subject: Fixed typo and merged emd tests --- ot/lp/__init__.py | 2 +- test/test_emd.py | 68 ------------------------------------------------------- test/test_ot.py | 66 ++++++++++++++++++++++++++++++++++++++++++++++++++--- 3 files changed, 64 insertions(+), 72 deletions(-) delete mode 100644 test/test_emd.py (limited to 'test') diff --git a/ot/lp/__init__.py b/ot/lp/__init__.py index a14d4e4..6048f60 100644 --- a/ot/lp/__init__.py +++ b/ot/lp/__init__.py @@ -168,6 +168,6 @@ def emd2(a, b, M, processes=multiprocessing.cpu_count(), numItermax=100000): # res = [emd2_c(a, b[:, i].copy(), M, numItermax) for i in range(nb)] def f(b): - return emd2_c(a,b,M, max_iter)[0] + return emd2_c(a,b,M, numItermax)[0] res= parmap(f, [b[:,i] for i in range(nb)],processes) return np.array(res) diff --git a/test/test_emd.py b/test/test_emd.py deleted file mode 100644 index 0025583..0000000 --- a/test/test_emd.py +++ /dev/null @@ -1,68 +0,0 @@ -#!/usr/bin/env python2 -# -*- coding: utf-8 -*- - -import numpy as np -import ot - -from ot.datasets import get_1D_gauss as gauss -reload(ot.lp) - -#%% parameters - -n=5000 # nb bins -m=6000 # nb bins - -mean1 = 1000 -mean2 = 1100 - -# bin positions -x=np.arange(n,dtype=np.float64) -y=np.arange(m,dtype=np.float64) - -# Gaussian distributions -a=gauss(n,m=mean1,s=5) # m= mean, s= std - -b=gauss(m,m=mean2,s=10) - -# loss matrix -M=ot.dist(x.reshape((-1,1)), y.reshape((-1,1))) ** (1./2) -#M/=M.max() - -#%% - -print('Computing {} EMD '.format(1)) - -# emd loss 1 proc -ot.tic() -G, alpha, beta = ot.emd(a,b,M, dual_variables=True) -ot.toc('1 proc : {} s') - -cost1 = (G * M).sum() -cost_dual = np.vdot(a, alpha) + np.vdot(b, beta) - -# emd loss 1 proc -ot.tic() -cost_emd2 = ot.emd2(a,b,M) -ot.toc('1 proc : {} s') - -ot.tic() -G2 = ot.emd(b, a, np.ascontiguousarray(M.T)) -ot.toc('1 proc : {} s') - -cost2 = (G2 * M.T).sum() - -M_reduced = M - alpha.reshape(-1,1) - beta.reshape(1, -1) - -# Check that both cost computations are equivalent -np.testing.assert_almost_equal(cost1, cost_emd2) -# Check that dual and primal cost are equal -np.testing.assert_almost_equal(cost1, cost_dual) -# Check symmetry -np.testing.assert_almost_equal(cost1, cost2) -# Check with closed-form solution for gaussians -np.testing.assert_almost_equal(cost1, np.abs(mean1-mean2)) - -[ind1, ind2] = np.nonzero(G) - -# Check that reduced cost is zero on transport arcs -np.testing.assert_array_almost_equal((M - alpha.reshape(-1, 1) - beta.reshape(1, -1))[ind1, ind2], np.zeros(ind1.size)) \ No newline at end of file diff --git a/test/test_ot.py b/test/test_ot.py index acd8718..ded6c9f 100644 --- a/test/test_ot.py +++ b/test/test_ot.py @@ -6,6 +6,8 @@ import numpy as np import ot + +from ot.datasets import get_1D_gauss as gauss def test_doctest(): @@ -66,9 +68,6 @@ def test_emd_empty(): def test_emd2_multi(): - - from ot.datasets import get_1D_gauss as gauss - n = 1000 # nb bins # bin positions @@ -100,3 +99,64 @@ def test_emd2_multi(): ot.toc('multi proc : {} s') np.testing.assert_allclose(emd1, emdn) + +def test_dual_variables(): + #%% parameters + + n=5000 # nb bins + m=6000 # nb bins + + mean1 = 1000 + mean2 = 1100 + + # bin positions + x=np.arange(n,dtype=np.float64) + y=np.arange(m,dtype=np.float64) + + # Gaussian distributions + a=gauss(n,m=mean1,s=5) # m= mean, s= std + + b=gauss(m,m=mean2,s=10) + + # loss matrix + M=ot.dist(x.reshape((-1,1)), y.reshape((-1,1))) ** (1./2) + #M/=M.max() + + #%% + + print('Computing {} EMD '.format(1)) + + # emd loss 1 proc + ot.tic() + G, alpha, beta = ot.emd(a,b,M, dual_variables=True) + ot.toc('1 proc : {} s') + + cost1 = (G * M).sum() + cost_dual = np.vdot(a, alpha) + np.vdot(b, beta) + + # emd loss 1 proc + ot.tic() + cost_emd2 = ot.emd2(a,b,M) + ot.toc('1 proc : {} s') + + ot.tic() + G2 = ot.emd(b, a, np.ascontiguousarray(M.T)) + ot.toc('1 proc : {} s') + + cost2 = (G2 * M.T).sum() + + M_reduced = M - alpha.reshape(-1,1) - beta.reshape(1, -1) + + # Check that both cost computations are equivalent + np.testing.assert_almost_equal(cost1, cost_emd2) + # Check that dual and primal cost are equal + np.testing.assert_almost_equal(cost1, cost_dual) + # Check symmetry + np.testing.assert_almost_equal(cost1, cost2) + # Check with closed-form solution for gaussians + np.testing.assert_almost_equal(cost1, np.abs(mean1-mean2)) + + [ind1, ind2] = np.nonzero(G) + + # Check that reduced cost is zero on transport arcs + np.testing.assert_array_almost_equal((M - alpha.reshape(-1, 1) - beta.reshape(1, -1))[ind1, ind2], np.zeros(ind1.size)) -- cgit v1.2.3 From a3497b123b4802c7960a07a899ac7ce4525c5995 Mon Sep 17 00:00:00 2001 From: Antoine Rolet Date: Tue, 5 Sep 2017 17:51:58 +0900 Subject: Reformat --- test/test_ot.py | 67 ++++++++++++++++++++++++++++----------------------------- 1 file changed, 33 insertions(+), 34 deletions(-) (limited to 'test') diff --git a/test/test_ot.py b/test/test_ot.py index ded6c9f..6f0f7c9 100644 --- a/test/test_ot.py +++ b/test/test_ot.py @@ -5,13 +5,12 @@ # License: MIT License import numpy as np + import ot - from ot.datasets import get_1D_gauss as gauss def test_doctest(): - import doctest # test lp solver @@ -100,53 +99,52 @@ def test_emd2_multi(): np.testing.assert_allclose(emd1, emdn) + def test_dual_variables(): - #%% parameters - - n=5000 # nb bins - m=6000 # nb bins - + # %% parameters + + n = 5000 # nb bins + m = 6000 # nb bins + mean1 = 1000 mean2 = 1100 - + # bin positions - x=np.arange(n,dtype=np.float64) - y=np.arange(m,dtype=np.float64) - + x = np.arange(n, dtype=np.float64) + y = np.arange(m, dtype=np.float64) + # Gaussian distributions - a=gauss(n,m=mean1,s=5) # m= mean, s= std - - b=gauss(m,m=mean2,s=10) - + a = gauss(n, m=mean1, s=5) # m= mean, s= std + + b = gauss(m, m=mean2, s=10) + # loss matrix - M=ot.dist(x.reshape((-1,1)), y.reshape((-1,1))) ** (1./2) - #M/=M.max() - - #%% - + M = ot.dist(x.reshape((-1, 1)), y.reshape((-1, 1))) ** (1. / 2) + # M/=M.max() + + # %% + print('Computing {} EMD '.format(1)) - + # emd loss 1 proc ot.tic() - G, alpha, beta = ot.emd(a,b,M, dual_variables=True) + G, alpha, beta = ot.emd(a, b, M, dual_variables=True) ot.toc('1 proc : {} s') - + cost1 = (G * M).sum() cost_dual = np.vdot(a, alpha) + np.vdot(b, beta) - + # emd loss 1 proc ot.tic() - cost_emd2 = ot.emd2(a,b,M) + cost_emd2 = ot.emd2(a, b, M) ot.toc('1 proc : {} s') - + ot.tic() G2 = ot.emd(b, a, np.ascontiguousarray(M.T)) ot.toc('1 proc : {} s') - + cost2 = (G2 * M.T).sum() - - M_reduced = M - alpha.reshape(-1,1) - beta.reshape(1, -1) - + # Check that both cost computations are equivalent np.testing.assert_almost_equal(cost1, cost_emd2) # Check that dual and primal cost are equal @@ -154,9 +152,10 @@ def test_dual_variables(): # Check symmetry np.testing.assert_almost_equal(cost1, cost2) # Check with closed-form solution for gaussians - np.testing.assert_almost_equal(cost1, np.abs(mean1-mean2)) - + np.testing.assert_almost_equal(cost1, np.abs(mean1 - mean2)) + [ind1, ind2] = np.nonzero(G) - + # Check that reduced cost is zero on transport arcs - np.testing.assert_array_almost_equal((M - alpha.reshape(-1, 1) - beta.reshape(1, -1))[ind1, ind2], np.zeros(ind1.size)) + np.testing.assert_array_almost_equal((M - alpha.reshape(-1, 1) - beta.reshape(1, -1))[ind1, ind2], + np.zeros(ind1.size)) -- cgit v1.2.3 From f8c1c8740f9974dcf4aaf191851d62149dceb91c Mon Sep 17 00:00:00 2001 From: Antoine Rolet Date: Thu, 7 Sep 2017 13:29:46 +0900 Subject: Added MAX_ITER_REACHED flag and warning --- ot/lp/EMD.h | 3 ++- ot/lp/EMD_wrapper.cpp | 21 +++++++---------- ot/lp/emd_wrap.pyx | 29 +++++++++++++---------- ot/lp/network_simplex_simple.h | 46 ++++++++++++++++++++----------------- test/test_ot.py | 52 ++++++++++++++++++++++++++++++++++++++++-- 5 files changed, 102 insertions(+), 49 deletions(-) (limited to 'test') diff --git a/ot/lp/EMD.h b/ot/lp/EMD.h index 15e9115..bb486de 100644 --- a/ot/lp/EMD.h +++ b/ot/lp/EMD.h @@ -26,7 +26,8 @@ typedef unsigned int node_id_type; enum ProblemType { INFEASIBLE, OPTIMAL, - UNBOUNDED + UNBOUNDED, + MAX_ITER_REACHED }; int EMD_wrap(int n1,int n2, double *X, double *Y,double *D, double *G, double* alpha, double* beta, double *cost, int max_iter); diff --git a/ot/lp/EMD_wrapper.cpp b/ot/lp/EMD_wrapper.cpp index 8e74462..92663dc 100644 --- a/ot/lp/EMD_wrapper.cpp +++ b/ot/lp/EMD_wrapper.cpp @@ -29,14 +29,18 @@ int EMD_wrap(int n1, int n2, double *X, double *Y, double *D, double *G, double val=*(X+i); if (val>0) { n++; - } + }else if(val<0){ + return INFEASIBLE; + } } m=0; for (int i=0; i0) { m++; - } + }else if(val<0){ + return INFEASIBLE; + } } // Define the graph @@ -83,16 +87,7 @@ int EMD_wrap(int n1, int n2, double *X, double *Y, double *D, double *G, // Solve the problem with the network simplex algorithm int ret=net.run(); - if (ret!=(int)net.OPTIMAL) { - if (ret==(int)net.INFEASIBLE) { - std::cout << "Infeasible problem"; - } - if (ret==(int)net.UNBOUNDED) - { - std::cout << "Unbounded problem"; - } - } else - { + if (ret==(int)net.OPTIMAL || ret==(int)net.MAX_ITER_REACHED) { *cost = 0; Arc a; di.first(a); for (; a != INVALID; di.next(a)) { @@ -105,7 +100,7 @@ int EMD_wrap(int n1, int n2, double *X, double *Y, double *D, double *G, *(beta + indJ[j-n]) = net.potential(j); } - }; + } return ret; diff --git a/ot/lp/emd_wrap.pyx b/ot/lp/emd_wrap.pyx index 7056e0e..9bea154 100644 --- a/ot/lp/emd_wrap.pyx +++ b/ot/lp/emd_wrap.pyx @@ -7,6 +7,7 @@ Cython linker with C solver # # License: MIT License +import warnings import numpy as np cimport numpy as np @@ -15,14 +16,14 @@ cimport cython cdef extern from "EMD.h": - int EMD_wrap(int n1,int n2, double *X, double *Y,double *D, double *G, double* alpha, double* beta, double *cost, int max_iter) - cdef enum ProblemType: INFEASIBLE, OPTIMAL, UNBOUNDED + int EMD_wrap(int n1,int n2, double *X, double *Y,double *D, double *G, double* alpha, double* beta, double *cost, int numItermax) + cdef enum ProblemType: INFEASIBLE, OPTIMAL, UNBOUNDED, MAX_ITER_REACHED @cython.boundscheck(False) @cython.wraparound(False) -def emd_c( np.ndarray[double, ndim=1, mode="c"] a,np.ndarray[double, ndim=1, mode="c"] b,np.ndarray[double, ndim=2, mode="c"] M, int max_iter): +def emd_c( np.ndarray[double, ndim=1, mode="c"] a,np.ndarray[double, ndim=1, mode="c"] b,np.ndarray[double, ndim=2, mode="c"] M, int numItermax): """ Solves the Earth Movers distance problem and returns the optimal transport matrix @@ -49,7 +50,7 @@ def emd_c( np.ndarray[double, ndim=1, mode="c"] a,np.ndarray[double, ndim=1, mod target histogram M : (ns,nt) ndarray, float64 loss matrix - max_iter : int + numItermax : int The maximum number of iterations before stopping the optimization algorithm if it has not converged. @@ -76,18 +77,20 @@ def emd_c( np.ndarray[double, ndim=1, mode="c"] a,np.ndarray[double, ndim=1, mod b=np.ones((n2,))/n2 # calling the function - cdef int resultSolver = EMD_wrap(n1,n2, a.data, b.data, M.data, G.data, alpha.data, beta.data, &cost, max_iter) + cdef int resultSolver = EMD_wrap(n1,n2, a.data, b.data, M.data, G.data, alpha.data, beta.data, &cost, numItermax) if resultSolver != OPTIMAL: if resultSolver == INFEASIBLE: - print("Problem infeasible. Try to increase numItermax.") + warnings.warn("Problem infeasible. Check that a and b are in the simplex") elif resultSolver == UNBOUNDED: - print("Problem unbounded") + warnings.warn("Problem unbounded") + elif resultSolver == MAX_ITER_REACHED: + warnings.warn("numItermax reached before optimality. Try to increase numItermax.") return G, alpha, beta @cython.boundscheck(False) @cython.wraparound(False) -def emd2_c( np.ndarray[double, ndim=1, mode="c"] a,np.ndarray[double, ndim=1, mode="c"] b,np.ndarray[double, ndim=2, mode="c"] M, int max_iter): +def emd2_c( np.ndarray[double, ndim=1, mode="c"] a,np.ndarray[double, ndim=1, mode="c"] b,np.ndarray[double, ndim=2, mode="c"] M, int numItermax): """ Solves the Earth Movers distance problem and returns the optimal transport loss @@ -114,7 +117,7 @@ def emd2_c( np.ndarray[double, ndim=1, mode="c"] a,np.ndarray[double, ndim=1, mo target histogram M : (ns,nt) ndarray, float64 loss matrix - max_iter : int + numItermax : int The maximum number of iterations before stopping the optimization algorithm if it has not converged. @@ -140,12 +143,14 @@ def emd2_c( np.ndarray[double, ndim=1, mode="c"] a,np.ndarray[double, ndim=1, mo if not len(b): b=np.ones((n2,))/n2 # calling the function - cdef int resultSolver = EMD_wrap(n1,n2, a.data, b.data, M.data, G.data, alpha.data, beta.data, &cost, max_iter) + cdef int resultSolver = EMD_wrap(n1,n2, a.data, b.data, M.data, G.data, alpha.data, beta.data, &cost, numItermax) if resultSolver != OPTIMAL: if resultSolver == INFEASIBLE: - print("Problem infeasible. Try to inscrease numItermax.") + warnings.warn("Problem infeasible. Check that a and b are in the simplex") elif resultSolver == UNBOUNDED: - print("Problem unbounded") + warnings.warn("Problem unbounded") + elif resultSolver == MAX_ITER_REACHED: + warnings.warn("numItermax reached before optimality. Try to increase numItermax.") return cost, alpha, beta diff --git a/ot/lp/network_simplex_simple.h b/ot/lp/network_simplex_simple.h index a7743ee..7c6a4ce 100644 --- a/ot/lp/network_simplex_simple.h +++ b/ot/lp/network_simplex_simple.h @@ -34,7 +34,8 @@ #endif -#define EPSILON 10*2.2204460492503131e-016 +#define EPSILON 2.2204460492503131e-15 +#define _EPSILON 1e-8 #define MAX_DEBUG_ITER 100000 @@ -260,7 +261,9 @@ namespace lemon { /// The objective function of the problem is unbounded, i.e. /// there is a directed cycle having negative total cost and /// infinite upper bound. - UNBOUNDED + UNBOUNDED, + /// The maximum number of iteration has been reached + MAX_ITER_REACHED }; /// \brief Constants for selecting the type of the supply constraints. @@ -683,7 +686,7 @@ namespace lemon { /// \see resetParams(), reset() ProblemType run() { #if DEBUG_LVL>0 - std::cout << "OPTIMAL = " << OPTIMAL << "\nINFEASIBLE = " << INFEASIBLE << "nUNBOUNDED = " << UNBOUNDED << "\n"; + std::cout << "OPTIMAL = " << OPTIMAL << "\nINFEASIBLE = " << INFEASIBLE << "\nUNBOUNDED = " << UNBOUNDED << "\nMAX_ITER_REACHED" << MAX_ITER_REACHED\n"; #endif if (!init()) return INFEASIBLE; @@ -941,15 +944,15 @@ namespace lemon { // Initialize internal data structures bool init() { if (_node_num == 0) return false; - /* + // Check the sum of supply values _sum_supply = 0; for (int i = 0; i != _node_num; ++i) { _sum_supply += _supply[i]; } - if ( !((_stype == GEQ && _sum_supply <= _epsilon ) || - (_stype == LEQ && _sum_supply >= -_epsilon )) ) return false; - */ + if ( fabs(_sum_supply) > _EPSILON ) return false; + + _sum_supply = 0; // Initialize artifical cost Cost ART_COST; @@ -1416,13 +1419,11 @@ namespace lemon { ProblemType start() { PivotRuleImpl pivot(*this); double prevCost=-1; + ProblemType retVal = OPTIMAL; // Perform heuristic initial pivots if (!initialPivots()) return UNBOUNDED; -#if DEBUG_LVL>0 - int niter=0; -#endif int iter_number=0; //pivot.setDantzig(true); // Execute the Network Simplex algorithm @@ -1431,12 +1432,13 @@ namespace lemon { char errMess[1000]; sprintf( errMess, "RESULT MIGHT BE INACURATE\nMax number of iteration reached, currently \%d. Sometimes iterations go on in cycle even though the solution has been reached, to check if it's the case here have a look at the minimal reduced cost. If it is very close to machine precision, you might actually have the correct solution, if not try setting the maximum number of iterations a bit higher\n",iter_number ); std::cerr << errMess; + retVal = MAX_ITER_REACHED; break; } #if DEBUG_LVL>0 - if(niter>MAX_DEBUG_ITER) + if(iter_number>MAX_DEBUG_ITER) break; - if(++niter%1000==0||niter%1000==1){ + if(iter_number%1000==0||iter_number%1000==1){ double curCost=totalCost(); double sumFlow=0; double a; @@ -1445,7 +1447,7 @@ namespace lemon { for (int i=0; i<_flow.size(); i++) { sumFlow+=_state[i]*_flow[i]; } - std::cout << "Sum of the flow " << std::setprecision(20) << sumFlow << "\n" << niter << " iterations, current cost=" << curCost << "\nReduced cost=" << _state[in_arc] * (_cost[in_arc] + _pi[_source[in_arc]] -_pi[_target[in_arc]]) << "\nPrecision = "<< -EPSILON*(a) << "\n"; + std::cout << "Sum of the flow " << std::setprecision(20) << sumFlow << "\n" << iter_number << " iterations, current cost=" << curCost << "\nReduced cost=" << _state[in_arc] * (_cost[in_arc] + _pi[_source[in_arc]] -_pi[_target[in_arc]]) << "\nPrecision = "<< -EPSILON*(a) << "\n"; std::cout << "Arc in = (" << _node_id(_source[in_arc]) << ", " << _node_id(_target[in_arc]) <<")\n"; std::cout << "Supplies = (" << _supply[_source[in_arc]] << ", " << _supply[_target[in_arc]] << ")\n"; std::cout << _cost[in_arc] << "\n"; @@ -1503,15 +1505,17 @@ namespace lemon { std::cout << "Sum of the flow " << sumFlow << "\n"<< niter <<" iterations, current cost=" << totalCost() << "\n"; #endif // Check feasibility - for (int e = _search_arc_num; e != _all_arc_num; ++e) { - if (_flow[e] != 0){ - if (abs(_flow[e]) > EPSILON) - return INFEASIBLE; - else - _flow[e]=0; + if( retVal == OPTIMAL){ + for (int e = _search_arc_num; e != _all_arc_num; ++e) { + if (_flow[e] != 0){ + if (abs(_flow[e]) > EPSILON) + return INFEASIBLE; + else + _flow[e]=0; + } } - } + } // Shift potentials to meet the requirements of the GEQ/LEQ type // optimality conditions @@ -1537,7 +1541,7 @@ namespace lemon { } } - return OPTIMAL; + return retVal; } }; //class NetworkSimplexSimple diff --git a/test/test_ot.py b/test/test_ot.py index 6f0f7c9..8a19cf6 100644 --- a/test/test_ot.py +++ b/test/test_ot.py @@ -8,6 +8,7 @@ import numpy as np import ot from ot.datasets import get_1D_gauss as gauss +import warnings def test_doctest(): @@ -100,9 +101,56 @@ def test_emd2_multi(): np.testing.assert_allclose(emd1, emdn) -def test_dual_variables(): - # %% parameters +def test_warnings(): + n = 100 # nb bins + m = 100 # nb bins + + mean1 = 30 + mean2 = 50 + + # bin positions + x = np.arange(n, dtype=np.float64) + y = np.arange(m, dtype=np.float64) + + # Gaussian distributions + a = gauss(n, m=mean1, s=5) # m= mean, s= std + + b = gauss(m, m=mean2, s=10) + # loss matrix + M = ot.dist(x.reshape((-1, 1)), y.reshape((-1, 1))) ** (1. / 2) + # M/=M.max() + + # %% + + print('Computing {} EMD '.format(1)) + G, alpha, beta = ot.emd(a, b, M, dual_variables=True) + with warnings.catch_warnings(record=True) as w: + # Cause all warnings to always be triggered. + warnings.simplefilter("always") + # Trigger a warning. + print('Computing {} EMD '.format(1)) + G, alpha, beta = ot.emd(a, b, M, dual_variables=True, numItermax=1) + # Verify some things + assert "numItermax" in str(w[-1].message) + assert len(w) == 1 + # Trigger a warning. + a[0]=100 + print('Computing {} EMD '.format(2)) + G, alpha, beta = ot.emd(a, b, M, dual_variables=True) + # Verify some things + assert "infeasible" in str(w[-1].message) + assert len(w) == 2 + # Trigger a warning. + a[0]=-1 + print('Computing {} EMD '.format(2)) + G, alpha, beta = ot.emd(a, b, M, dual_variables=True) + # Verify some things + assert "infeasible" in str(w[-1].message) + assert len(w) == 3 + + +def test_dual_variables(): n = 5000 # nb bins m = 6000 # nb bins -- cgit v1.2.3 From 12d9b3ff72e9669ccc0162e82b7a33beb51d3e25 Mon Sep 17 00:00:00 2001 From: Antoine Rolet Date: Thu, 7 Sep 2017 13:50:41 +0900 Subject: Return dual variables in an optional dictionary Also removed some code duplication --- ot/lp/__init__.py | 24 +++++++++++++------ ot/lp/emd_wrap.pyx | 69 +----------------------------------------------------- test/test_ot.py | 20 ++++++---------- 3 files changed, 25 insertions(+), 88 deletions(-) (limited to 'test') diff --git a/ot/lp/__init__.py b/ot/lp/__init__.py index 6048f60..c15e6b9 100644 --- a/ot/lp/__init__.py +++ b/ot/lp/__init__.py @@ -9,12 +9,12 @@ Solvers for the original linear program OT problem import numpy as np # import compiled emd -from .emd_wrap import emd_c, emd2_c +from .emd_wrap import emd_c from ..utils import parmap import multiprocessing -def emd(a, b, M, numItermax=100000, dual_variables=False): +def emd(a, b, M, numItermax=100000, log=False): """Solves the Earth Movers distance problem and returns the OT matrix @@ -42,11 +42,17 @@ def emd(a, b, M, numItermax=100000, dual_variables=False): numItermax : int, optional (default=100000) The maximum number of iterations before stopping the optimization algorithm if it has not converged. + log: boolean, optional (default=False) + If True, returns a dictionary containing the cost and dual + variables. Otherwise returns only the optimal transportation matrix. Returns ------- gamma: (ns x nt) ndarray Optimal transportation matrix for the given parameters + log: dict + If input log is true, a dictionary containing the cost and dual + variables Examples @@ -86,9 +92,13 @@ def emd(a, b, M, numItermax=100000, dual_variables=False): if len(b) == 0: b = np.ones((M.shape[1], ), dtype=np.float64)/M.shape[1] - G, alpha, beta = emd_c(a, b, M, numItermax) - if dual_variables: - return G, alpha, beta + G, cost, u, v = emd_c(a, b, M, numItermax) + if log: + log = {} + log['cost'] = cost + log['u'] = u + log['v'] = v + return G, log return G def emd2(a, b, M, processes=multiprocessing.cpu_count(), numItermax=100000): @@ -163,11 +173,11 @@ def emd2(a, b, M, processes=multiprocessing.cpu_count(), numItermax=100000): b = np.ones((M.shape[1], ), dtype=np.float64)/M.shape[1] if len(b.shape)==1: - return emd2_c(a, b, M, numItermax)[0] + return emd_c(a, b, M, numItermax)[1] nb = b.shape[1] # res = [emd2_c(a, b[:, i].copy(), M, numItermax) for i in range(nb)] def f(b): - return emd2_c(a,b,M, numItermax)[0] + return emd_c(a,b,M, numItermax)[1] res= parmap(f, [b[:,i] for i in range(nb)],processes) return np.array(res) diff --git a/ot/lp/emd_wrap.pyx b/ot/lp/emd_wrap.pyx index 9bea154..5618dfc 100644 --- a/ot/lp/emd_wrap.pyx +++ b/ot/lp/emd_wrap.pyx @@ -86,71 +86,4 @@ def emd_c( np.ndarray[double, ndim=1, mode="c"] a,np.ndarray[double, ndim=1, mod elif resultSolver == MAX_ITER_REACHED: warnings.warn("numItermax reached before optimality. Try to increase numItermax.") - return G, alpha, beta - -@cython.boundscheck(False) -@cython.wraparound(False) -def emd2_c( np.ndarray[double, ndim=1, mode="c"] a,np.ndarray[double, ndim=1, mode="c"] b,np.ndarray[double, ndim=2, mode="c"] M, int numItermax): - """ - Solves the Earth Movers distance problem and returns the optimal transport loss - - gamm=emd(a,b,M) - - .. math:: - \gamma = arg\min_\gamma <\gamma,M>_F - - s.t. \gamma 1 = a - - \gamma^T 1= b - - \gamma\geq 0 - where : - - - M is the metric cost matrix - - a and b are the sample weights - - Parameters - ---------- - a : (ns,) ndarray, float64 - source histogram - b : (nt,) ndarray, float64 - target histogram - M : (ns,nt) ndarray, float64 - loss matrix - numItermax : int - The maximum number of iterations before stopping the optimization - algorithm if it has not converged. - - - Returns - ------- - gamma: (ns x nt) ndarray - Optimal transportation matrix for the given parameters - - """ - cdef int n1= M.shape[0] - cdef int n2= M.shape[1] - - cdef double cost=0 - cdef np.ndarray[double, ndim=2, mode="c"] G=np.zeros([n1, n2]) - - cdef np.ndarray[double, ndim = 1, mode = "c"] alpha = np.zeros([n1]) - cdef np.ndarray[double, ndim = 1, mode = "c"] beta = np.zeros([n2]) - - if not len(a): - a=np.ones((n1,))/n1 - - if not len(b): - b=np.ones((n2,))/n2 - # calling the function - cdef int resultSolver = EMD_wrap(n1,n2, a.data, b.data, M.data, G.data, alpha.data, beta.data, &cost, numItermax) - if resultSolver != OPTIMAL: - if resultSolver == INFEASIBLE: - warnings.warn("Problem infeasible. Check that a and b are in the simplex") - elif resultSolver == UNBOUNDED: - warnings.warn("Problem unbounded") - elif resultSolver == MAX_ITER_REACHED: - warnings.warn("numItermax reached before optimality. Try to increase numItermax.") - - return cost, alpha, beta - + return G, cost, alpha, beta diff --git a/test/test_ot.py b/test/test_ot.py index 8a19cf6..78f64ab 100644 --- a/test/test_ot.py +++ b/test/test_ot.py @@ -124,27 +124,26 @@ def test_warnings(): # %% print('Computing {} EMD '.format(1)) - G, alpha, beta = ot.emd(a, b, M, dual_variables=True) with warnings.catch_warnings(record=True) as w: # Cause all warnings to always be triggered. warnings.simplefilter("always") # Trigger a warning. print('Computing {} EMD '.format(1)) - G, alpha, beta = ot.emd(a, b, M, dual_variables=True, numItermax=1) + G = ot.emd(a, b, M, numItermax=1) # Verify some things assert "numItermax" in str(w[-1].message) assert len(w) == 1 # Trigger a warning. a[0]=100 print('Computing {} EMD '.format(2)) - G, alpha, beta = ot.emd(a, b, M, dual_variables=True) + G = ot.emd(a, b, M) # Verify some things assert "infeasible" in str(w[-1].message) assert len(w) == 2 # Trigger a warning. a[0]=-1 print('Computing {} EMD '.format(2)) - G, alpha, beta = ot.emd(a, b, M, dual_variables=True) + G = ot.emd(a, b, M) # Verify some things assert "infeasible" in str(w[-1].message) assert len(w) == 3 @@ -176,16 +175,11 @@ def test_dual_variables(): # emd loss 1 proc ot.tic() - G, alpha, beta = ot.emd(a, b, M, dual_variables=True) + G, log = ot.emd(a, b, M, log=True) ot.toc('1 proc : {} s') cost1 = (G * M).sum() - cost_dual = np.vdot(a, alpha) + np.vdot(b, beta) - - # emd loss 1 proc - ot.tic() - cost_emd2 = ot.emd2(a, b, M) - ot.toc('1 proc : {} s') + cost_dual = np.vdot(a, log['u']) + np.vdot(b, log['v']) ot.tic() G2 = ot.emd(b, a, np.ascontiguousarray(M.T)) @@ -194,7 +188,7 @@ def test_dual_variables(): cost2 = (G2 * M.T).sum() # Check that both cost computations are equivalent - np.testing.assert_almost_equal(cost1, cost_emd2) + np.testing.assert_almost_equal(cost1, log['cost']) # Check that dual and primal cost are equal np.testing.assert_almost_equal(cost1, cost_dual) # Check symmetry @@ -205,5 +199,5 @@ def test_dual_variables(): [ind1, ind2] = np.nonzero(G) # Check that reduced cost is zero on transport arcs - np.testing.assert_array_almost_equal((M - alpha.reshape(-1, 1) - beta.reshape(1, -1))[ind1, ind2], + np.testing.assert_array_almost_equal((M - log['u'].reshape(-1, 1) - log['v'].reshape(1, -1))[ind1, ind2], np.zeros(ind1.size)) -- cgit v1.2.3 From ab65f86304b03a967054eeeaf73b8c8277618d65 Mon Sep 17 00:00:00 2001 From: Antoine Rolet Date: Thu, 7 Sep 2017 14:35:35 +0900 Subject: Added log option to muliprocess emd --- ot/lp/__init__.py | 39 ++++++++++++++++++++++++------------- test/test_ot.py | 57 ++++++++++++++++++++++++++++++------------------------- 2 files changed, 57 insertions(+), 39 deletions(-) (limited to 'test') diff --git a/ot/lp/__init__.py b/ot/lp/__init__.py index c15e6b9..8edd8ec 100644 --- a/ot/lp/__init__.py +++ b/ot/lp/__init__.py @@ -7,11 +7,13 @@ Solvers for the original linear program OT problem # # License: MIT License +import multiprocessing + import numpy as np + # import compiled emd from .emd_wrap import emd_c from ..utils import parmap -import multiprocessing def emd(a, b, M, numItermax=100000, log=False): @@ -88,9 +90,9 @@ def emd(a, b, M, numItermax=100000, log=False): # if empty array given then use unifor distributions if len(a) == 0: - a = np.ones((M.shape[0], ), dtype=np.float64)/M.shape[0] + a = np.ones((M.shape[0],), dtype=np.float64) / M.shape[0] if len(b) == 0: - b = np.ones((M.shape[1], ), dtype=np.float64)/M.shape[1] + b = np.ones((M.shape[1],), dtype=np.float64) / M.shape[1] G, cost, u, v = emd_c(a, b, M, numItermax) if log: @@ -101,7 +103,8 @@ def emd(a, b, M, numItermax=100000, log=False): return G, log return G -def emd2(a, b, M, processes=multiprocessing.cpu_count(), numItermax=100000): + +def emd2(a, b, M, processes=multiprocessing.cpu_count(), numItermax=100000, log=False): """Solves the Earth Movers distance problem and returns the loss .. math:: @@ -168,16 +171,26 @@ def emd2(a, b, M, processes=multiprocessing.cpu_count(), numItermax=100000): # if empty array given then use unifor distributions if len(a) == 0: - a = np.ones((M.shape[0], ), dtype=np.float64)/M.shape[0] + a = np.ones((M.shape[0],), dtype=np.float64) / M.shape[0] if len(b) == 0: - b = np.ones((M.shape[1], ), dtype=np.float64)/M.shape[1] - - if len(b.shape)==1: - return emd_c(a, b, M, numItermax)[1] + b = np.ones((M.shape[1],), dtype=np.float64) / M.shape[1] + + if log: + def f(b): + G, cost, u, v = emd_c(a, b, M, numItermax) + log = {} + log['G'] = G + log['u'] = u + log['v'] = v + return [cost, log] + else: + def f(b): + return emd_c(a, b, M, numItermax)[1] + + if len(b.shape) == 1: + return f(b) nb = b.shape[1] # res = [emd2_c(a, b[:, i].copy(), M, numItermax) for i in range(nb)] - def f(b): - return emd_c(a,b,M, numItermax)[1] - res= parmap(f, [b[:,i] for i in range(nb)],processes) - return np.array(res) + res = parmap(f, [b[:, i] for i in range(nb)], processes) + return res diff --git a/test/test_ot.py b/test/test_ot.py index 78f64ab..feadef4 100644 --- a/test/test_ot.py +++ b/test/test_ot.py @@ -4,11 +4,12 @@ # # License: MIT License +import warnings + import numpy as np import ot from ot.datasets import get_1D_gauss as gauss -import warnings def test_doctest(): @@ -100,6 +101,21 @@ def test_emd2_multi(): np.testing.assert_allclose(emd1, emdn) + # emd loss multipro proc with log + ot.tic() + emdn = ot.emd2(a, b, M, log=True) + ot.toc('multi proc : {} s') + + for i in range(len(emdn)): + emd = emdn[i] + log = emd[1] + cost = emd[0] + check_duality_gap(a, b[:, i], M, log['G'], log['u'], log['v'], cost) + emdn[i] = cost + + emdn = np.array(emdn) + np.testing.assert_allclose(emd1, emdn) + def test_warnings(): n = 100 # nb bins @@ -119,32 +135,22 @@ def test_warnings(): # loss matrix M = ot.dist(x.reshape((-1, 1)), y.reshape((-1, 1))) ** (1. / 2) - # M/=M.max() - - # %% print('Computing {} EMD '.format(1)) with warnings.catch_warnings(record=True) as w: - # Cause all warnings to always be triggered. warnings.simplefilter("always") - # Trigger a warning. print('Computing {} EMD '.format(1)) G = ot.emd(a, b, M, numItermax=1) - # Verify some things assert "numItermax" in str(w[-1].message) assert len(w) == 1 - # Trigger a warning. - a[0]=100 + a[0] = 100 print('Computing {} EMD '.format(2)) G = ot.emd(a, b, M) - # Verify some things assert "infeasible" in str(w[-1].message) assert len(w) == 2 - # Trigger a warning. - a[0]=-1 + a[0] = -1 print('Computing {} EMD '.format(2)) G = ot.emd(a, b, M) - # Verify some things assert "infeasible" in str(w[-1].message) assert len(w) == 3 @@ -167,9 +173,6 @@ def test_dual_variables(): # loss matrix M = ot.dist(x.reshape((-1, 1)), y.reshape((-1, 1))) ** (1. / 2) - # M/=M.max() - - # %% print('Computing {} EMD '.format(1)) @@ -178,26 +181,28 @@ def test_dual_variables(): G, log = ot.emd(a, b, M, log=True) ot.toc('1 proc : {} s') - cost1 = (G * M).sum() - cost_dual = np.vdot(a, log['u']) + np.vdot(b, log['v']) - ot.tic() G2 = ot.emd(b, a, np.ascontiguousarray(M.T)) ot.toc('1 proc : {} s') - cost2 = (G2 * M.T).sum() + cost1 = (G * M).sum() + # Check symmetry + np.testing.assert_array_almost_equal(cost1, (M * G2.T).sum()) + # Check with closed-form solution for gaussians + np.testing.assert_almost_equal(cost1, np.abs(mean1 - mean2)) # Check that both cost computations are equivalent np.testing.assert_almost_equal(cost1, log['cost']) + check_duality_gap(a, b, M, G, log['u'], log['v'], log['cost']) + + +def check_duality_gap(a, b, M, G, u, v, cost): + cost_dual = np.vdot(a, u) + np.vdot(b, v) # Check that dual and primal cost are equal - np.testing.assert_almost_equal(cost1, cost_dual) - # Check symmetry - np.testing.assert_almost_equal(cost1, cost2) - # Check with closed-form solution for gaussians - np.testing.assert_almost_equal(cost1, np.abs(mean1 - mean2)) + np.testing.assert_almost_equal(cost_dual, cost) [ind1, ind2] = np.nonzero(G) # Check that reduced cost is zero on transport arcs - np.testing.assert_array_almost_equal((M - log['u'].reshape(-1, 1) - log['v'].reshape(1, -1))[ind1, ind2], + np.testing.assert_array_almost_equal((M - u.reshape(-1, 1) - v.reshape(1, -1))[ind1, ind2], np.zeros(ind1.size)) -- cgit v1.2.3 From a37e52e64f300fa0165a58932d5ac0ef1dd8c6f7 Mon Sep 17 00:00:00 2001 From: Antoine Rolet Date: Thu, 7 Sep 2017 14:38:53 +0900 Subject: Removed unused variable declaration --- test/test_ot.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'test') diff --git a/test/test_ot.py b/test/test_ot.py index feadef4..cf5839e 100644 --- a/test/test_ot.py +++ b/test/test_ot.py @@ -140,17 +140,17 @@ def test_warnings(): with warnings.catch_warnings(record=True) as w: warnings.simplefilter("always") print('Computing {} EMD '.format(1)) - G = ot.emd(a, b, M, numItermax=1) + ot.emd(a, b, M, numItermax=1) assert "numItermax" in str(w[-1].message) assert len(w) == 1 a[0] = 100 print('Computing {} EMD '.format(2)) - G = ot.emd(a, b, M) + ot.emd(a, b, M) assert "infeasible" in str(w[-1].message) assert len(w) == 2 a[0] = -1 print('Computing {} EMD '.format(2)) - G = ot.emd(a, b, M) + ot.emd(a, b, M) assert "infeasible" in str(w[-1].message) assert len(w) == 3 -- cgit v1.2.3 From 85c56d96f609c4ad458f0963a068386cc910c66c Mon Sep 17 00:00:00 2001 From: Antoine Rolet Date: Sat, 9 Sep 2017 17:28:38 +0900 Subject: Renamed variables --- ot/da.py | 2 +- ot/lp/__init__.py | 31 +++++++++++++++++-------------- ot/lp/emd_wrap.pyx | 18 +++++++++--------- test/test_ot.py | 2 +- 4 files changed, 28 insertions(+), 25 deletions(-) (limited to 'test') diff --git a/ot/da.py b/ot/da.py index 1d3d0ba..eb70305 100644 --- a/ot/da.py +++ b/ot/da.py @@ -1370,7 +1370,7 @@ class EMDTransport(BaseTransport): # coupling estimation self.coupling_ = emd( - a=self.mu_s, b=self.mu_t, M=self.cost_, numItermax=self.max_iter + a=self.mu_s, b=self.mu_t, M=self.cost_, num_iter_max=self.max_iter ) return self diff --git a/ot/lp/__init__.py b/ot/lp/__init__.py index 0f40c19..ab7cb97 100644 --- a/ot/lp/__init__.py +++ b/ot/lp/__init__.py @@ -12,11 +12,11 @@ import multiprocessing import numpy as np # import compiled emd -from .emd_wrap import emd_c, checkResult +from .emd_wrap import emd_c, check_result from ..utils import parmap -def emd(a, b, M, numItermax=100000, log=False): +def emd(a, b, M, num_iter_max=100000, log=False): """Solves the Earth Movers distance problem and returns the OT matrix @@ -41,7 +41,7 @@ def emd(a, b, M, numItermax=100000, log=False): Target histogram (uniform weigth if empty list) M : (ns,nt) ndarray, float64 loss matrix - numItermax : int, optional (default=100000) + num_iter_max : int, optional (default=100000) The maximum number of iterations before stopping the optimization algorithm if it has not converged. log: boolean, optional (default=False) @@ -54,7 +54,7 @@ def emd(a, b, M, numItermax=100000, log=False): Optimal transportation matrix for the given parameters log: dict If input log is true, a dictionary containing the cost and dual - variables + variables and exit status Examples @@ -94,20 +94,20 @@ def emd(a, b, M, numItermax=100000, log=False): if len(b) == 0: b = np.ones((M.shape[1],), dtype=np.float64) / M.shape[1] - G, cost, u, v, resultCode = emd_c(a, b, M, numItermax) - resultCodeString = checkResult(resultCode) + G, cost, u, v, result_code = emd_c(a, b, M, num_iter_max) + resultCodeString = check_result(result_code) if log: log = {} log['cost'] = cost log['u'] = u log['v'] = v log['warning'] = resultCodeString - log['resultCode'] = resultCode + log['result_code'] = result_code return G, log return G -def emd2(a, b, M, processes=multiprocessing.cpu_count(), numItermax=100000, log=False): +def emd2(a, b, M, processes=multiprocessing.cpu_count(), num_iter_max=100000, log=False): """Solves the Earth Movers distance problem and returns the loss .. math:: @@ -131,7 +131,7 @@ def emd2(a, b, M, processes=multiprocessing.cpu_count(), numItermax=100000, log= Target histogram (uniform weigth if empty list) M : (ns,nt) ndarray, float64 loss matrix - numItermax : int, optional (default=100000) + num_iter_max : int, optional (default=100000) The maximum number of iterations before stopping the optimization algorithm if it has not converged. @@ -139,6 +139,9 @@ def emd2(a, b, M, processes=multiprocessing.cpu_count(), numItermax=100000, log= ------- gamma: (ns x nt) ndarray Optimal transportation matrix for the given parameters + log: dict + If input log is true, a dictionary containing the cost and dual + variables and exit status Examples @@ -180,19 +183,19 @@ def emd2(a, b, M, processes=multiprocessing.cpu_count(), numItermax=100000, log= if log: def f(b): - G, cost, u, v, resultCode = emd_c(a, b, M, numItermax) - resultCodeString = checkResult(resultCode) + G, cost, u, v, resultCode = emd_c(a, b, M, num_iter_max) + resultCodeString = check_result(resultCode) log = {} log['G'] = G log['u'] = u log['v'] = v log['warning'] = resultCodeString - log['resultCode'] = resultCode + log['result_code'] = resultCode return [cost, log] else: def f(b): - G, cost, u, v, resultCode = emd_c(a, b, M, numItermax) - checkResult(resultCode) + G, cost, u, v, result_code = emd_c(a, b, M, num_iter_max) + check_result(result_code) return cost if len(b.shape) == 1: diff --git a/ot/lp/emd_wrap.pyx b/ot/lp/emd_wrap.pyx index 19bcdd8..7ebdd2a 100644 --- a/ot/lp/emd_wrap.pyx +++ b/ot/lp/emd_wrap.pyx @@ -20,15 +20,15 @@ cdef extern from "EMD.h": cdef enum ProblemType: INFEASIBLE, OPTIMAL, UNBOUNDED, MAX_ITER_REACHED -def checkResult(resultCode): - if resultCode == OPTIMAL: +def check_result(result_code): + if result_code == OPTIMAL: return None - if resultCode == INFEASIBLE: + if result_code == INFEASIBLE: message = "Problem infeasible. Check that a and b are in the simplex" - elif resultCode == UNBOUNDED: + elif result_code == UNBOUNDED: message = "Problem unbounded" - elif resultCode == MAX_ITER_REACHED: + elif result_code == MAX_ITER_REACHED: message = "numItermax reached before optimality. Try to increase numItermax." warnings.warn(message) return message @@ -36,7 +36,7 @@ def checkResult(resultCode): @cython.boundscheck(False) @cython.wraparound(False) -def emd_c( np.ndarray[double, ndim=1, mode="c"] a,np.ndarray[double, ndim=1, mode="c"] b,np.ndarray[double, ndim=2, mode="c"] M, int numItermax): +def emd_c(np.ndarray[double, ndim=1, mode="c"] a, np.ndarray[double, ndim=1, mode="c"] b, np.ndarray[double, ndim=2, mode="c"] M, int num_iter_max): """ Solves the Earth Movers distance problem and returns the optimal transport matrix @@ -63,7 +63,7 @@ def emd_c( np.ndarray[double, ndim=1, mode="c"] a,np.ndarray[double, ndim=1, mod target histogram M : (ns,nt) ndarray, float64 loss matrix - numItermax : int + num_iter_max : int The maximum number of iterations before stopping the optimization algorithm if it has not converged. @@ -90,6 +90,6 @@ def emd_c( np.ndarray[double, ndim=1, mode="c"] a,np.ndarray[double, ndim=1, mod b=np.ones((n2,))/n2 # calling the function - cdef int resultCode = EMD_wrap(n1,n2, a.data, b.data, M.data, G.data, alpha.data, beta.data, &cost, numItermax) + cdef int result_code = EMD_wrap(n1, n2, a.data, b.data, M.data, G.data, alpha.data, beta.data, &cost, num_iter_max) - return G, cost, alpha, beta, resultCode + return G, cost, alpha, beta, result_code diff --git a/test/test_ot.py b/test/test_ot.py index cf5839e..c9b5154 100644 --- a/test/test_ot.py +++ b/test/test_ot.py @@ -140,7 +140,7 @@ def test_warnings(): with warnings.catch_warnings(record=True) as w: warnings.simplefilter("always") print('Computing {} EMD '.format(1)) - ot.emd(a, b, M, numItermax=1) + ot.emd(a, b, M, num_iter_max=1) assert "numItermax" in str(w[-1].message) assert len(w) == 1 a[0] = 100 -- cgit v1.2.3 From cd8c04246b6d1f15b68d6433741e8c808fd517d8 Mon Sep 17 00:00:00 2001 From: Antoine Rolet Date: Sat, 9 Sep 2017 17:38:31 +0900 Subject: Renamed variable --- ot/da.py | 2 +- ot/lp/EMD.h | 2 +- ot/lp/EMD_wrapper.cpp | 4 ++-- ot/lp/__init__.py | 14 +++++++------- ot/lp/emd_wrap.pyx | 8 ++++---- test/test_ot.py | 2 +- 6 files changed, 16 insertions(+), 16 deletions(-) (limited to 'test') diff --git a/ot/da.py b/ot/da.py index eb70305..f3e7433 100644 --- a/ot/da.py +++ b/ot/da.py @@ -1370,7 +1370,7 @@ class EMDTransport(BaseTransport): # coupling estimation self.coupling_ = emd( - a=self.mu_s, b=self.mu_t, M=self.cost_, num_iter_max=self.max_iter + a=self.mu_s, b=self.mu_t, M=self.cost_, max_iter=self.max_iter ) return self diff --git a/ot/lp/EMD.h b/ot/lp/EMD.h index bb486de..f42e222 100644 --- a/ot/lp/EMD.h +++ b/ot/lp/EMD.h @@ -30,6 +30,6 @@ enum ProblemType { MAX_ITER_REACHED }; -int EMD_wrap(int n1,int n2, double *X, double *Y,double *D, double *G, double* alpha, double* beta, double *cost, int max_iter); +int EMD_wrap(int n1,int n2, double *X, double *Y,double *D, double *G, double* alpha, double* beta, double *cost, int maxIter); #endif diff --git a/ot/lp/EMD_wrapper.cpp b/ot/lp/EMD_wrapper.cpp index 92663dc..fc7ca63 100644 --- a/ot/lp/EMD_wrapper.cpp +++ b/ot/lp/EMD_wrapper.cpp @@ -16,7 +16,7 @@ int EMD_wrap(int n1, int n2, double *X, double *Y, double *D, double *G, - double* alpha, double* beta, double *cost, int max_iter) { + double* alpha, double* beta, double *cost, int maxIter) { // beware M and C anre strored in row major C style!!! int n, m, i, cur; @@ -48,7 +48,7 @@ int EMD_wrap(int n1, int n2, double *X, double *Y, double *D, double *G, std::vector indI(n), indJ(m); std::vector weights1(n), weights2(m); Digraph di(n, m); - NetworkSimplexSimple net(di, true, n+m, n*m, max_iter); + NetworkSimplexSimple net(di, true, n+m, n*m, maxIter); // Set supply and demand, don't account for 0 values (faster) diff --git a/ot/lp/__init__.py b/ot/lp/__init__.py index 1238cdb..9a0cb1c 100644 --- a/ot/lp/__init__.py +++ b/ot/lp/__init__.py @@ -16,7 +16,7 @@ from .emd_wrap import emd_c, check_result from ..utils import parmap -def emd(a, b, M, num_iter_max=100000, log=False): +def emd(a, b, M, max_iter=100000, log=False): """Solves the Earth Movers distance problem and returns the OT matrix @@ -41,7 +41,7 @@ def emd(a, b, M, num_iter_max=100000, log=False): Target histogram (uniform weigth if empty list) M : (ns,nt) ndarray, float64 loss matrix - num_iter_max : int, optional (default=100000) + max_iter : int, optional (default=100000) The maximum number of iterations before stopping the optimization algorithm if it has not converged. log: boolean, optional (default=False) @@ -94,7 +94,7 @@ def emd(a, b, M, num_iter_max=100000, log=False): if len(b) == 0: b = np.ones((M.shape[1],), dtype=np.float64) / M.shape[1] - G, cost, u, v, result_code = emd_c(a, b, M, num_iter_max) + G, cost, u, v, result_code = emd_c(a, b, M, max_iter) result_code_string = check_result(result_code) if log: log = {} @@ -107,7 +107,7 @@ def emd(a, b, M, num_iter_max=100000, log=False): return G -def emd2(a, b, M, processes=multiprocessing.cpu_count(), num_iter_max=100000, log=False): +def emd2(a, b, M, processes=multiprocessing.cpu_count(), max_iter=100000, log=False): """Solves the Earth Movers distance problem and returns the loss .. math:: @@ -131,7 +131,7 @@ def emd2(a, b, M, processes=multiprocessing.cpu_count(), num_iter_max=100000, lo Target histogram (uniform weigth if empty list) M : (ns,nt) ndarray, float64 loss matrix - num_iter_max : int, optional (default=100000) + max_iter : int, optional (default=100000) The maximum number of iterations before stopping the optimization algorithm if it has not converged. @@ -183,7 +183,7 @@ def emd2(a, b, M, processes=multiprocessing.cpu_count(), num_iter_max=100000, lo if log: def f(b): - G, cost, u, v, resultCode = emd_c(a, b, M, num_iter_max) + G, cost, u, v, resultCode = emd_c(a, b, M, max_iter) result_code_string = check_result(resultCode) log = {} log['G'] = G @@ -194,7 +194,7 @@ def emd2(a, b, M, processes=multiprocessing.cpu_count(), num_iter_max=100000, lo return [cost, log] else: def f(b): - G, cost, u, v, result_code = emd_c(a, b, M, num_iter_max) + G, cost, u, v, result_code = emd_c(a, b, M, max_iter) check_result(result_code) return cost diff --git a/ot/lp/emd_wrap.pyx b/ot/lp/emd_wrap.pyx index 7ebdd2a..83ee6aa 100644 --- a/ot/lp/emd_wrap.pyx +++ b/ot/lp/emd_wrap.pyx @@ -16,7 +16,7 @@ import warnings cdef extern from "EMD.h": - int EMD_wrap(int n1,int n2, double *X, double *Y,double *D, double *G, double* alpha, double* beta, double *cost, int numItermax) + int EMD_wrap(int n1,int n2, double *X, double *Y,double *D, double *G, double* alpha, double* beta, double *cost, int maxIter) cdef enum ProblemType: INFEASIBLE, OPTIMAL, UNBOUNDED, MAX_ITER_REACHED @@ -36,7 +36,7 @@ def check_result(result_code): @cython.boundscheck(False) @cython.wraparound(False) -def emd_c(np.ndarray[double, ndim=1, mode="c"] a, np.ndarray[double, ndim=1, mode="c"] b, np.ndarray[double, ndim=2, mode="c"] M, int num_iter_max): +def emd_c(np.ndarray[double, ndim=1, mode="c"] a, np.ndarray[double, ndim=1, mode="c"] b, np.ndarray[double, ndim=2, mode="c"] M, int max_iter): """ Solves the Earth Movers distance problem and returns the optimal transport matrix @@ -63,7 +63,7 @@ def emd_c(np.ndarray[double, ndim=1, mode="c"] a, np.ndarray[double, ndim=1, mod target histogram M : (ns,nt) ndarray, float64 loss matrix - num_iter_max : int + max_iter : int The maximum number of iterations before stopping the optimization algorithm if it has not converged. @@ -90,6 +90,6 @@ def emd_c(np.ndarray[double, ndim=1, mode="c"] a, np.ndarray[double, ndim=1, mod b=np.ones((n2,))/n2 # calling the function - cdef int result_code = EMD_wrap(n1, n2, a.data, b.data, M.data, G.data, alpha.data, beta.data, &cost, num_iter_max) + cdef int result_code = EMD_wrap(n1, n2, a.data, b.data, M.data, G.data, alpha.data, beta.data, &cost, max_iter) return G, cost, alpha, beta, result_code diff --git a/test/test_ot.py b/test/test_ot.py index c9b5154..ca921c5 100644 --- a/test/test_ot.py +++ b/test/test_ot.py @@ -140,7 +140,7 @@ def test_warnings(): with warnings.catch_warnings(record=True) as w: warnings.simplefilter("always") print('Computing {} EMD '.format(1)) - ot.emd(a, b, M, num_iter_max=1) + ot.emd(a, b, M, max_iter=1) assert "numItermax" in str(w[-1].message) assert len(w) == 1 a[0] = 100 -- cgit v1.2.3 From 8cc04ef5ae8806c81811b2081b1880b46ca063a3 Mon Sep 17 00:00:00 2001 From: Antoine Rolet Date: Sat, 9 Sep 2017 18:05:12 +0900 Subject: Renamed variable in string --- ot/lp/__init__.py | 1 - ot/lp/emd_wrap.pyx | 2 +- test/test_ot.py | 2 +- 3 files changed, 2 insertions(+), 3 deletions(-) (limited to 'test') diff --git a/ot/lp/__init__.py b/ot/lp/__init__.py index 9a0cb1c..ae5b08c 100644 --- a/ot/lp/__init__.py +++ b/ot/lp/__init__.py @@ -201,7 +201,6 @@ def emd2(a, b, M, processes=multiprocessing.cpu_count(), max_iter=100000, log=Fa if len(b.shape) == 1: return f(b) nb = b.shape[1] - # res = [emd2_c(a, b[:, i].copy(), M, numItermax) for i in range(nb)] res = parmap(f, [b[:, i] for i in range(nb)], processes) return res diff --git a/ot/lp/emd_wrap.pyx b/ot/lp/emd_wrap.pyx index 83ee6aa..2fcc0e4 100644 --- a/ot/lp/emd_wrap.pyx +++ b/ot/lp/emd_wrap.pyx @@ -29,7 +29,7 @@ def check_result(result_code): elif result_code == UNBOUNDED: message = "Problem unbounded" elif result_code == MAX_ITER_REACHED: - message = "numItermax reached before optimality. Try to increase numItermax." + message = "max_iter reached before optimality. Try to increase max_iter." warnings.warn(message) return message diff --git a/test/test_ot.py b/test/test_ot.py index ca921c5..46fc634 100644 --- a/test/test_ot.py +++ b/test/test_ot.py @@ -141,7 +141,7 @@ def test_warnings(): warnings.simplefilter("always") print('Computing {} EMD '.format(1)) ot.emd(a, b, M, max_iter=1) - assert "numItermax" in str(w[-1].message) + assert "max_iter" in str(w[-1].message) assert len(w) == 1 a[0] = 100 print('Computing {} EMD '.format(2)) -- cgit v1.2.3 From 06429e5a34790ec51eb1c921293b24c37b81b952 Mon Sep 17 00:00:00 2001 From: Antoine Rolet Date: Sat, 9 Sep 2017 18:23:05 +0900 Subject: Returned to old variable name to follow repo convention --- ot/da.py | 2 +- ot/lp/__init__.py | 12 ++++++------ ot/lp/emd_wrap.pyx | 2 +- test/test_ot.py | 4 ++-- 4 files changed, 10 insertions(+), 10 deletions(-) (limited to 'test') diff --git a/ot/da.py b/ot/da.py index f3e7433..eb70305 100644 --- a/ot/da.py +++ b/ot/da.py @@ -1370,7 +1370,7 @@ class EMDTransport(BaseTransport): # coupling estimation self.coupling_ = emd( - a=self.mu_s, b=self.mu_t, M=self.cost_, max_iter=self.max_iter + a=self.mu_s, b=self.mu_t, M=self.cost_, num_iter_max=self.max_iter ) return self diff --git a/ot/lp/__init__.py b/ot/lp/__init__.py index ae5b08c..17f5bb4 100644 --- a/ot/lp/__init__.py +++ b/ot/lp/__init__.py @@ -16,7 +16,7 @@ from .emd_wrap import emd_c, check_result from ..utils import parmap -def emd(a, b, M, max_iter=100000, log=False): +def emd(a, b, M, num_iter_max=100000, log=False): """Solves the Earth Movers distance problem and returns the OT matrix @@ -41,7 +41,7 @@ def emd(a, b, M, max_iter=100000, log=False): Target histogram (uniform weigth if empty list) M : (ns,nt) ndarray, float64 loss matrix - max_iter : int, optional (default=100000) + num_iter_max : int, optional (default=100000) The maximum number of iterations before stopping the optimization algorithm if it has not converged. log: boolean, optional (default=False) @@ -94,7 +94,7 @@ def emd(a, b, M, max_iter=100000, log=False): if len(b) == 0: b = np.ones((M.shape[1],), dtype=np.float64) / M.shape[1] - G, cost, u, v, result_code = emd_c(a, b, M, max_iter) + G, cost, u, v, result_code = emd_c(a, b, M, num_iter_max) result_code_string = check_result(result_code) if log: log = {} @@ -107,7 +107,7 @@ def emd(a, b, M, max_iter=100000, log=False): return G -def emd2(a, b, M, processes=multiprocessing.cpu_count(), max_iter=100000, log=False): +def emd2(a, b, M, processes=multiprocessing.cpu_count(), num_iter_max=100000, log=False): """Solves the Earth Movers distance problem and returns the loss .. math:: @@ -183,7 +183,7 @@ def emd2(a, b, M, processes=multiprocessing.cpu_count(), max_iter=100000, log=Fa if log: def f(b): - G, cost, u, v, resultCode = emd_c(a, b, M, max_iter) + G, cost, u, v, resultCode = emd_c(a, b, M, num_iter_max) result_code_string = check_result(resultCode) log = {} log['G'] = G @@ -194,7 +194,7 @@ def emd2(a, b, M, processes=multiprocessing.cpu_count(), max_iter=100000, log=Fa return [cost, log] else: def f(b): - G, cost, u, v, result_code = emd_c(a, b, M, max_iter) + G, cost, u, v, result_code = emd_c(a, b, M, num_iter_max) check_result(result_code) return cost diff --git a/ot/lp/emd_wrap.pyx b/ot/lp/emd_wrap.pyx index 2fcc0e4..45fc988 100644 --- a/ot/lp/emd_wrap.pyx +++ b/ot/lp/emd_wrap.pyx @@ -29,7 +29,7 @@ def check_result(result_code): elif result_code == UNBOUNDED: message = "Problem unbounded" elif result_code == MAX_ITER_REACHED: - message = "max_iter reached before optimality. Try to increase max_iter." + message = "num_iter_max reached before optimality. Try to increase num_iter_max." warnings.warn(message) return message diff --git a/test/test_ot.py b/test/test_ot.py index 46fc634..e05e8aa 100644 --- a/test/test_ot.py +++ b/test/test_ot.py @@ -140,8 +140,8 @@ def test_warnings(): with warnings.catch_warnings(record=True) as w: warnings.simplefilter("always") print('Computing {} EMD '.format(1)) - ot.emd(a, b, M, max_iter=1) - assert "max_iter" in str(w[-1].message) + ot.emd(a, b, M, num_iter_max=1) + assert "num_iter_max" in str(w[-1].message) assert len(w) == 1 a[0] = 100 print('Computing {} EMD '.format(2)) -- cgit v1.2.3 From dd6f8260d01ce173ef3fe0c900112f0ed5288950 Mon Sep 17 00:00:00 2001 From: Antoine Rolet Date: Tue, 12 Sep 2017 19:58:46 +0900 Subject: Made the return of the matrix optional in emd2 --- ot/lp/__init__.py | 12 +++++++++--- test/test_ot.py | 6 +++--- 2 files changed, 12 insertions(+), 6 deletions(-) (limited to 'test') diff --git a/ot/lp/__init__.py b/ot/lp/__init__.py index f2eaa2b..d0f682b 100644 --- a/ot/lp/__init__.py +++ b/ot/lp/__init__.py @@ -107,7 +107,7 @@ def emd(a, b, M, num_iter_max=100000, log=False): return G -def emd2(a, b, M, processes=multiprocessing.cpu_count(), num_iter_max=100000, log=False): +def emd2(a, b, M, processes=multiprocessing.cpu_count(), num_iter_max=100000, log=False, return_matrix=False): """Solves the Earth Movers distance problem and returns the loss .. math:: @@ -134,6 +134,11 @@ def emd2(a, b, M, processes=multiprocessing.cpu_count(), num_iter_max=100000, lo num_iter_max : int, optional (default=100000) The maximum number of iterations before stopping the optimization algorithm if it has not converged. + log: boolean, optional (default=False) + If True, returns a dictionary containing the cost and dual + variables. Otherwise returns only the optimal transportation cost. + return_matrix: boolean, optional (default=False) + If True, returns the optimal transportation matrix in the log. Returns ------- @@ -181,12 +186,13 @@ def emd2(a, b, M, processes=multiprocessing.cpu_count(), num_iter_max=100000, lo if len(b) == 0: b = np.ones((M.shape[1],), dtype=np.float64) / M.shape[1] - if log: + if log or return_matrix: def f(b): G, cost, u, v, resultCode = emd_c(a, b, M, num_iter_max) result_code_string = check_result(resultCode) log = {} - log['G'] = G + if return_matrix: + log['G'] = G log['u'] = u log['v'] = v log['warning'] = result_code_string diff --git a/test/test_ot.py b/test/test_ot.py index e05e8aa..ea6d9dc 100644 --- a/test/test_ot.py +++ b/test/test_ot.py @@ -103,7 +103,7 @@ def test_emd2_multi(): # emd loss multipro proc with log ot.tic() - emdn = ot.emd2(a, b, M, log=True) + emdn = ot.emd2(a, b, M, log=True, return_matrix=True) ot.toc('multi proc : {} s') for i in range(len(emdn)): @@ -140,8 +140,8 @@ def test_warnings(): with warnings.catch_warnings(record=True) as w: warnings.simplefilter("always") print('Computing {} EMD '.format(1)) - ot.emd(a, b, M, num_iter_max=1) - assert "num_iter_max" in str(w[-1].message) + ot.emd(a, b, M, numItermax=1) + assert "numItermax" in str(w[-1].message) assert len(w) == 1 a[0] = 100 print('Computing {} EMD '.format(2)) -- cgit v1.2.3