From ac7917ab2cbece048e554e32cc653c14440dbcc0 Mon Sep 17 00:00:00 2001 From: Marc Glisse Date: Sun, 3 May 2020 20:43:11 +0200 Subject: Fewer copies and no GIL for hera Now the input arrays are not copied as long as they use a float64 data type, even if they are not contiguous. That's not important here, but I wanted an example of how to do it. More importantly, no need to hold the GIL. I was too lazy to benchmark to see if that changed anything... --- src/python/gudhi/hera.cc | 28 ++++++++++++++++++++-------- 1 file changed, 20 insertions(+), 8 deletions(-) diff --git a/src/python/gudhi/hera.cc b/src/python/gudhi/hera.cc index 0d562b4c..50d49c77 100644 --- a/src/python/gudhi/hera.cc +++ b/src/python/gudhi/hera.cc @@ -11,14 +11,24 @@ #include #include -#include +#include +#include #include // Hera -#include +#include namespace py = pybind11; -typedef py::array_t Dgm; +typedef py::array_t Dgm; + +// Get m[i,0] and m[i,1] as a pair +auto pairify(void* p, ssize_t h, ssize_t w) { + return [=](ssize_t i){ + char* birth = (char*)p + i * h; + char* death = birth + w; + return std::make_pair(*(double*)birth, *(double*)death); + }; +} double wasserstein_distance( Dgm d1, Dgm d2, @@ -27,16 +37,18 @@ double wasserstein_distance( { py::buffer_info buf1 = d1.request(); py::buffer_info buf2 = d2.request(); + + py::gil_scoped_release release; + // shape (n,2) or (0) for empty if((buf1.ndim!=2 || buf1.shape[1]!=2) && (buf1.ndim!=1 || buf1.shape[0]!=0)) throw std::runtime_error("Diagram 1 must be an array of size n x 2"); if((buf2.ndim!=2 || buf2.shape[1]!=2) && (buf2.ndim!=1 || buf2.shape[0]!=0)) throw std::runtime_error("Diagram 2 must be an array of size n x 2"); - typedef std::array Point; - auto p1 = (Point*)buf1.ptr; - auto p2 = (Point*)buf2.ptr; - auto diag1 = boost::make_iterator_range(p1, p1+buf1.shape[0]); - auto diag2 = boost::make_iterator_range(p2, p2+buf2.shape[0]); + auto cnt1 = boost::counting_range(0, buf1.shape[0]); + auto diag1 = boost::adaptors::transform(cnt1, pairify(buf1.ptr, buf1.strides[0], buf1.strides[1])); + auto cnt2 = boost::counting_range(0, buf2.shape[0]); + auto diag2 = boost::adaptors::transform(cnt2, pairify(buf2.ptr, buf2.strides[0], buf2.strides[1])); hera::AuctionParams params; params.wasserstein_power = wasserstein_power; -- cgit v1.2.3 From dac92c5ae9da6aa21fdcd261737e08d6898dbbdc Mon Sep 17 00:00:00 2001 From: Marc Glisse Date: Wed, 6 May 2020 12:54:21 +0200 Subject: Avoid reading outside of allocated region The result was unused, but better be safe. --- src/python/gudhi/hera.cc | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/src/python/gudhi/hera.cc b/src/python/gudhi/hera.cc index 50d49c77..63bbb075 100644 --- a/src/python/gudhi/hera.cc +++ b/src/python/gudhi/hera.cc @@ -45,10 +45,12 @@ double wasserstein_distance( throw std::runtime_error("Diagram 1 must be an array of size n x 2"); if((buf2.ndim!=2 || buf2.shape[1]!=2) && (buf2.ndim!=1 || buf2.shape[0]!=0)) throw std::runtime_error("Diagram 2 must be an array of size n x 2"); + ssize_t stride11 = buf1.ndim == 2 ? buf1.strides[1] : 0; + ssize_t stride21 = buf2.ndim == 2 ? buf2.strides[1] : 0; auto cnt1 = boost::counting_range(0, buf1.shape[0]); - auto diag1 = boost::adaptors::transform(cnt1, pairify(buf1.ptr, buf1.strides[0], buf1.strides[1])); + auto diag1 = boost::adaptors::transform(cnt1, pairify(buf1.ptr, buf1.strides[0], stride11)); auto cnt2 = boost::counting_range(0, buf2.shape[0]); - auto diag2 = boost::adaptors::transform(cnt2, pairify(buf2.ptr, buf2.strides[0], buf2.strides[1])); + auto diag2 = boost::adaptors::transform(cnt2, pairify(buf2.ptr, buf2.strides[0], stride21)); hera::AuctionParams params; params.wasserstein_power = wasserstein_power; -- cgit v1.2.3 From 5c5e2c3075235079fda94fc6a159cc5275f85a0c Mon Sep 17 00:00:00 2001 From: Marc Glisse Date: Wed, 6 May 2020 14:13:14 +0200 Subject: Refactor the numpy -> C++ range conversion If we want to reuse it for bottleneck... --- src/python/gudhi/hera.cc | 31 ++++++++++++++++--------------- 1 file changed, 16 insertions(+), 15 deletions(-) diff --git a/src/python/gudhi/hera.cc b/src/python/gudhi/hera.cc index 63bbb075..5aec1806 100644 --- a/src/python/gudhi/hera.cc +++ b/src/python/gudhi/hera.cc @@ -22,7 +22,7 @@ namespace py = pybind11; typedef py::array_t Dgm; // Get m[i,0] and m[i,1] as a pair -auto pairify(void* p, ssize_t h, ssize_t w) { +static auto pairify(void* p, ssize_t h, ssize_t w) { return [=](ssize_t i){ char* birth = (char*)p + i * h; char* death = birth + w; @@ -30,28 +30,29 @@ auto pairify(void* p, ssize_t h, ssize_t w) { }; } +inline auto numpy_to_range_of_pairs(py::array_t dgm) { + py::buffer_info buf = dgm.request(); + // shape (n,2) or (0) for empty + if((buf.ndim!=2 || buf.shape[1]!=2) && (buf.ndim!=1 || buf.shape[0]!=0)) + throw std::runtime_error("Diagram must be an array of size n x 2"); + // In the case of shape (0), avoid reading non-existing strides[1] even if we won't use it. + ssize_t stride1 = buf.ndim == 2 ? buf.strides[1] : 0; + auto cnt = boost::counting_range(0, buf.shape[0]); + return boost::adaptors::transform(cnt, pairify(buf.ptr, buf.strides[0], stride1)); + // Be careful that the returned range cannot contain references to dead temporaries. +} + double wasserstein_distance( Dgm d1, Dgm d2, double wasserstein_power, double internal_p, double delta) { - py::buffer_info buf1 = d1.request(); - py::buffer_info buf2 = d2.request(); + // I *think* the call to request() has to be before releasing the GIL. + auto diag1 = numpy_to_range_of_pairs(d1); + auto diag2 = numpy_to_range_of_pairs(d2); py::gil_scoped_release release; - // shape (n,2) or (0) for empty - if((buf1.ndim!=2 || buf1.shape[1]!=2) && (buf1.ndim!=1 || buf1.shape[0]!=0)) - throw std::runtime_error("Diagram 1 must be an array of size n x 2"); - if((buf2.ndim!=2 || buf2.shape[1]!=2) && (buf2.ndim!=1 || buf2.shape[0]!=0)) - throw std::runtime_error("Diagram 2 must be an array of size n x 2"); - ssize_t stride11 = buf1.ndim == 2 ? buf1.strides[1] : 0; - ssize_t stride21 = buf2.ndim == 2 ? buf2.strides[1] : 0; - auto cnt1 = boost::counting_range(0, buf1.shape[0]); - auto diag1 = boost::adaptors::transform(cnt1, pairify(buf1.ptr, buf1.strides[0], stride11)); - auto cnt2 = boost::counting_range(0, buf2.shape[0]); - auto diag2 = boost::adaptors::transform(cnt2, pairify(buf2.ptr, buf2.strides[0], stride21)); - hera::AuctionParams params; params.wasserstein_power = wasserstein_power; // hera encodes infinity as -1... -- cgit v1.2.3