From c7b82f49f01075519189f1fdb56cc485e4ad9f46 Mon Sep 17 00:00:00 2001 From: ROUVREAU Vincent Date: Thu, 21 May 2020 11:08:25 +0200 Subject: Use unique_pointer and template alpha complex interface for python interface --- src/python/gudhi/alpha_complex.pyx | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'src/python/gudhi') diff --git a/src/python/gudhi/alpha_complex.pyx b/src/python/gudhi/alpha_complex.pyx index d75e374a..2b7ce00d 100644 --- a/src/python/gudhi/alpha_complex.pyx +++ b/src/python/gudhi/alpha_complex.pyx @@ -26,7 +26,7 @@ __copyright__ = "Copyright (C) 2016 Inria" __license__ = "GPL v3" cdef extern from "Alpha_complex_interface.h" namespace "Gudhi": - cdef cppclass Alpha_complex_interface "Gudhi::alpha_complex::Alpha_complex_interface": + cdef cppclass Alpha_complex_interface "Gudhi::alpha_complex::Alpha_complex_interface": Alpha_complex_interface(vector[vector[double]] points) nogil except + # bool from_file is a workaround for cython to find the correct signature Alpha_complex_interface(string off_file, bool from_file) nogil except + -- cgit v1.2.3 From 4826b8bf49aafb23c57eb6983c886235e8f7c0b2 Mon Sep 17 00:00:00 2001 From: ROUVREAU Vincent Date: Fri, 22 May 2020 18:04:33 +0200 Subject: brute force version - complexity is read only --- src/python/gudhi/alpha_complex.pyx | 69 ++++++++++++++++++++++++++++---------- 1 file changed, 52 insertions(+), 17 deletions(-) (limited to 'src/python/gudhi') diff --git a/src/python/gudhi/alpha_complex.pyx b/src/python/gudhi/alpha_complex.pyx index 2b7ce00d..dc2fcb01 100644 --- a/src/python/gudhi/alpha_complex.pyx +++ b/src/python/gudhi/alpha_complex.pyx @@ -26,10 +26,18 @@ __copyright__ = "Copyright (C) 2016 Inria" __license__ = "GPL v3" cdef extern from "Alpha_complex_interface.h" namespace "Gudhi": - cdef cppclass Alpha_complex_interface "Gudhi::alpha_complex::Alpha_complex_interface": - Alpha_complex_interface(vector[vector[double]] points) nogil except + + cdef cppclass Alpha_complex_exact_interface "Gudhi::alpha_complex::Alpha_complex_interface": + Alpha_complex_exact_interface(vector[vector[double]] points) nogil except + # bool from_file is a workaround for cython to find the correct signature - Alpha_complex_interface(string off_file, bool from_file) nogil except + + Alpha_complex_exact_interface(string off_file, bool from_file) nogil except + + vector[double] get_point(int vertex) nogil except + + void create_simplex_tree(Simplex_tree_interface_full_featured* simplex_tree, double max_alpha_square) nogil except + + +cdef extern from "Alpha_complex_interface.h" namespace "Gudhi": + cdef cppclass Alpha_complex_inexact_interface "Gudhi::alpha_complex::Alpha_complex_interface": + Alpha_complex_inexact_interface(vector[vector[double]] points) nogil except + + # bool from_file is a workaround for cython to find the correct signature + Alpha_complex_inexact_interface(string off_file, bool from_file) nogil except + vector[double] get_point(int vertex) nogil except + void create_simplex_tree(Simplex_tree_interface_full_featured* simplex_tree, double max_alpha_square) nogil except + @@ -53,10 +61,12 @@ cdef class AlphaComplex: """ - cdef Alpha_complex_interface * thisptr + cdef Alpha_complex_exact_interface * exact_ptr + cdef Alpha_complex_inexact_interface * inexact_ptr + complexity = 'safe' # Fake constructor that does nothing but documenting the constructor - def __init__(self, points=None, off_file=''): + def __init__(self, points=None, off_file='', complexity='safe'): """AlphaComplex constructor. :param points: A list of points in d-Dimension. @@ -66,15 +76,23 @@ cdef class AlphaComplex: :param off_file: An OFF file style name. :type off_file: string + + :param complexity: Alpha complex complexity can be 'fast', 'safe' or 'exact'. Default is 'safe'. + :type complexity: string """ # The real cython constructor - def __cinit__(self, points = None, off_file = ''): + def __cinit__(self, points = None, off_file = '', complexity = 'safe'): + assert complexity == 'fast' or complexity == 'safe' or complexity == 'exact', "Alpha complex complexity can be 'fast', 'safe' or 'exact'" + self.complexity = complexity + cdef vector[vector[double]] pts if off_file: if os.path.isfile(off_file): - self.thisptr = new Alpha_complex_interface( - off_file.encode('utf-8'), True) + if complexity == 'fast': + self.inexact_ptr = new Alpha_complex_inexact_interface(off_file.encode('utf-8'), True) + else: + self.exact_ptr = new Alpha_complex_exact_interface(off_file.encode('utf-8'), True) else: print("file " + off_file + " not found.") else: @@ -82,18 +100,28 @@ cdef class AlphaComplex: # Empty Alpha construction points=[] pts = points - with nogil: - self.thisptr = new Alpha_complex_interface(pts) - + if complexity == 'fast': + with nogil: + self.inexact_ptr = new Alpha_complex_inexact_interface(pts) + else: + with nogil: + self.exact_ptr = new Alpha_complex_exact_interface(pts) def __dealloc__(self): - if self.thisptr != NULL: - del self.thisptr + if self.complexity == 'fast': + if self.inexact_ptr != NULL: + del self.inexact_ptr + else: + if self.exact_ptr != NULL: + del self.exact_ptr def __is_defined(self): """Returns true if AlphaComplex pointer is not NULL. """ - return self.thisptr != NULL + if self.complexity == 'fast': + return self.inexact_ptr != NULL + else: + return self.exact_ptr != NULL def get_point(self, vertex): """This function returns the point corresponding to a given vertex. @@ -103,7 +131,10 @@ cdef class AlphaComplex: :rtype: list of float :returns: the point. """ - return self.thisptr.get_point(vertex) + if self.complexity == 'fast': + return self.inexact_ptr.get_point(vertex) + else: + return self.exact_ptr.get_point(vertex) def create_simplex_tree(self, max_alpha_square = float('inf')): """ @@ -118,6 +149,10 @@ cdef class AlphaComplex: stree = SimplexTree() cdef double mas = max_alpha_square cdef intptr_t stree_int_ptr=stree.thisptr - with nogil: - self.thisptr.create_simplex_tree(stree_int_ptr, mas) + if self.complexity == 'fast': + with nogil: + self.inexact_ptr.create_simplex_tree(stree_int_ptr, mas) + else: + with nogil: + self.exact_ptr.create_simplex_tree(stree_int_ptr, mas) return stree -- cgit v1.2.3 From 28a6889dc9865c75acc3d3be4edce01b3942e56f Mon Sep 17 00:00:00 2001 From: ROUVREAU Vincent Date: Sat, 23 May 2020 08:45:13 +0200 Subject: First working version --- src/python/gudhi/alpha_complex.pyx | 18 ++++++++++-------- 1 file changed, 10 insertions(+), 8 deletions(-) (limited to 'src/python/gudhi') diff --git a/src/python/gudhi/alpha_complex.pyx b/src/python/gudhi/alpha_complex.pyx index dc2fcb01..e2d3db9c 100644 --- a/src/python/gudhi/alpha_complex.pyx +++ b/src/python/gudhi/alpha_complex.pyx @@ -63,7 +63,8 @@ cdef class AlphaComplex: cdef Alpha_complex_exact_interface * exact_ptr cdef Alpha_complex_inexact_interface * inexact_ptr - complexity = 'safe' + cdef bool fast + cdef bool exact # Fake constructor that does nothing but documenting the constructor def __init__(self, points=None, off_file='', complexity='safe'): @@ -84,12 +85,13 @@ cdef class AlphaComplex: # The real cython constructor def __cinit__(self, points = None, off_file = '', complexity = 'safe'): assert complexity == 'fast' or complexity == 'safe' or complexity == 'exact', "Alpha complex complexity can be 'fast', 'safe' or 'exact'" - self.complexity = complexity + self.fast = complexity == 'fast' + self.exact = complexity == 'safe' cdef vector[vector[double]] pts if off_file: if os.path.isfile(off_file): - if complexity == 'fast': + if self.fast: self.inexact_ptr = new Alpha_complex_inexact_interface(off_file.encode('utf-8'), True) else: self.exact_ptr = new Alpha_complex_exact_interface(off_file.encode('utf-8'), True) @@ -100,7 +102,7 @@ cdef class AlphaComplex: # Empty Alpha construction points=[] pts = points - if complexity == 'fast': + if self.fast: with nogil: self.inexact_ptr = new Alpha_complex_inexact_interface(pts) else: @@ -108,7 +110,7 @@ cdef class AlphaComplex: self.exact_ptr = new Alpha_complex_exact_interface(pts) def __dealloc__(self): - if self.complexity == 'fast': + if self.fast: if self.inexact_ptr != NULL: del self.inexact_ptr else: @@ -118,7 +120,7 @@ cdef class AlphaComplex: def __is_defined(self): """Returns true if AlphaComplex pointer is not NULL. """ - if self.complexity == 'fast': + if self.fast: return self.inexact_ptr != NULL else: return self.exact_ptr != NULL @@ -131,7 +133,7 @@ cdef class AlphaComplex: :rtype: list of float :returns: the point. """ - if self.complexity == 'fast': + if self.fast: return self.inexact_ptr.get_point(vertex) else: return self.exact_ptr.get_point(vertex) @@ -149,7 +151,7 @@ cdef class AlphaComplex: stree = SimplexTree() cdef double mas = max_alpha_square cdef intptr_t stree_int_ptr=stree.thisptr - if self.complexity == 'fast': + if self.fast: with nogil: self.inexact_ptr.create_simplex_tree(stree_int_ptr, mas) else: -- cgit v1.2.3 From 50b460f867b5801ce3459d60fb86b02051eb4a7d Mon Sep 17 00:00:00 2001 From: ROUVREAU Vincent Date: Sat, 23 May 2020 09:52:24 +0200 Subject: Delaunay complex and tests all possibilities --- src/python/gudhi/alpha_complex.pyx | 22 ++++---- src/python/include/Alpha_complex_interface.h | 5 +- src/python/test/test_alpha_complex.py | 75 ++++++++++++++++++++++++---- 3 files changed, 82 insertions(+), 20 deletions(-) (limited to 'src/python/gudhi') diff --git a/src/python/gudhi/alpha_complex.pyx b/src/python/gudhi/alpha_complex.pyx index e2d3db9c..700fa738 100644 --- a/src/python/gudhi/alpha_complex.pyx +++ b/src/python/gudhi/alpha_complex.pyx @@ -31,7 +31,7 @@ cdef extern from "Alpha_complex_interface.h" namespace "Gudhi": # bool from_file is a workaround for cython to find the correct signature Alpha_complex_exact_interface(string off_file, bool from_file) nogil except + vector[double] get_point(int vertex) nogil except + - void create_simplex_tree(Simplex_tree_interface_full_featured* simplex_tree, double max_alpha_square) nogil except + + void create_simplex_tree(Simplex_tree_interface_full_featured* simplex_tree, double max_alpha_square, bool exact_version, bool default_filtration_value) nogil except + cdef extern from "Alpha_complex_interface.h" namespace "Gudhi": cdef cppclass Alpha_complex_inexact_interface "Gudhi::alpha_complex::Alpha_complex_interface": @@ -39,7 +39,7 @@ cdef extern from "Alpha_complex_interface.h" namespace "Gudhi": # bool from_file is a workaround for cython to find the correct signature Alpha_complex_inexact_interface(string off_file, bool from_file) nogil except + vector[double] get_point(int vertex) nogil except + - void create_simplex_tree(Simplex_tree_interface_full_featured* simplex_tree, double max_alpha_square) nogil except + + void create_simplex_tree(Simplex_tree_interface_full_featured* simplex_tree, double max_alpha_square, bool exact_version, bool default_filtration_value) nogil except + # AlphaComplex python interface cdef class AlphaComplex: @@ -138,23 +138,27 @@ cdef class AlphaComplex: else: return self.exact_ptr.get_point(vertex) - def create_simplex_tree(self, max_alpha_square = float('inf')): + def create_simplex_tree(self, max_alpha_square = float('inf'), default_filtration_value = False): """ - :param max_alpha_square: The maximum alpha square threshold the - simplices shall not exceed. Default is set to infinity, and - there is very little point using anything else since it does - not save time. + :param max_alpha_square: The maximum alpha square threshold the simplices shall not exceed. Default is set to + infinity, and there is very little point using anything else since it does not save time. :type max_alpha_square: float + :param default_filtration_value: Set this value to `True` if filtration values are not needed to be computed + (will be set to `NaN`). Default value is `False` (which means compute the filtration values). + :type default_filtration_value: bool :returns: A simplex tree created from the Delaunay Triangulation. :rtype: SimplexTree """ stree = SimplexTree() cdef double mas = max_alpha_square cdef intptr_t stree_int_ptr=stree.thisptr + cdef bool compute_filtration = default_filtration_value == True if self.fast: with nogil: - self.inexact_ptr.create_simplex_tree(stree_int_ptr, mas) + self.inexact_ptr.create_simplex_tree(stree_int_ptr, + mas, False, compute_filtration) else: with nogil: - self.exact_ptr.create_simplex_tree(stree_int_ptr, mas) + self.exact_ptr.create_simplex_tree(stree_int_ptr, + mas, self.exact, compute_filtration) return stree diff --git a/src/python/include/Alpha_complex_interface.h b/src/python/include/Alpha_complex_interface.h index 5fb694cd..3dd01345 100644 --- a/src/python/include/Alpha_complex_interface.h +++ b/src/python/include/Alpha_complex_interface.h @@ -56,8 +56,9 @@ class Alpha_complex_interface { return vd; } - void create_simplex_tree(Simplex_tree_interface<>* simplex_tree, double max_alpha_square) { - alpha_complex_->create_complex(*simplex_tree, max_alpha_square); + void create_simplex_tree(Simplex_tree_interface<>* simplex_tree, double max_alpha_square, + bool exact_version, bool default_filtration_value) { + alpha_complex_->create_complex(*simplex_tree, max_alpha_square, exact_version, default_filtration_value); } private: diff --git a/src/python/test/test_alpha_complex.py b/src/python/test/test_alpha_complex.py index 77121302..913397dd 100755 --- a/src/python/test/test_alpha_complex.py +++ b/src/python/test/test_alpha_complex.py @@ -24,14 +24,18 @@ __copyright__ = "Copyright (C) 2016 Inria" __license__ = "MIT" -def test_empty_alpha(): - alpha_complex = AlphaComplex(points=[[0, 0]]) +def _empty_alpha(complexity): + alpha_complex = AlphaComplex(points=[[0, 0]], complexity = complexity) assert alpha_complex.__is_defined() == True +def test_empty_alpha(): + _empty_alpha('fast') + _empty_alpha('safe') + _empty_alpha('exact') -def test_infinite_alpha(): +def _infinite_alpha(complexity): point_list = [[0, 0], [1, 0], [0, 1], [1, 1]] - alpha_complex = AlphaComplex(points=point_list) + alpha_complex = AlphaComplex(points=point_list, complexity = complexity) assert alpha_complex.__is_defined() == True simplex_tree = alpha_complex.create_simplex_tree() @@ -79,10 +83,14 @@ def test_infinite_alpha(): else: assert False +def test_infinite_alpha(): + _infinite_alpha('fast') + _infinite_alpha('safe') + _infinite_alpha('exact') -def test_filtered_alpha(): +def _filtered_alpha(complexity): point_list = [[0, 0], [1, 0], [0, 1], [1, 1]] - filtered_alpha = AlphaComplex(points=point_list) + filtered_alpha = AlphaComplex(points=point_list, complexity = complexity) simplex_tree = filtered_alpha.create_simplex_tree(max_alpha_square=0.25) @@ -119,7 +127,12 @@ def test_filtered_alpha(): assert simplex_tree.get_star([0]) == [([0], 0.0), ([0, 1], 0.25), ([0, 2], 0.25)] assert simplex_tree.get_cofaces([0], 1) == [([0, 1], 0.25), ([0, 2], 0.25)] -def test_safe_alpha_persistence_comparison(): +def test_filtered_alpha(): + _filtered_alpha('fast') + _filtered_alpha('safe') + _filtered_alpha('exact') + +def _safe_alpha_persistence_comparison(complexity): #generate periodic signal time = np.arange(0, 10, 1) signal = [math.sin(x) for x in time] @@ -131,10 +144,10 @@ def test_safe_alpha_persistence_comparison(): embedding2 = [[signal[i], delayed[i]] for i in range(len(time))] #build alpha complex and simplex tree - alpha_complex1 = AlphaComplex(points=embedding1) + alpha_complex1 = AlphaComplex(points=embedding1, complexity = complexity) simplex_tree1 = alpha_complex1.create_simplex_tree() - alpha_complex2 = AlphaComplex(points=embedding2) + alpha_complex2 = AlphaComplex(points=embedding2, complexity = complexity) simplex_tree2 = alpha_complex2.create_simplex_tree() diag1 = simplex_tree1.persistence() @@ -143,3 +156,47 @@ def test_safe_alpha_persistence_comparison(): for (first_p, second_p) in zip_longest(diag1, diag2): assert first_p[0] == pytest.approx(second_p[0]) assert first_p[1] == pytest.approx(second_p[1]) + + +def test_safe_alpha_persistence_comparison(): + # Won't work for 'fast' version + _safe_alpha_persistence_comparison('safe') + _safe_alpha_persistence_comparison('exact') + +def _delaunay_complex(complexity): + point_list = [[0, 0], [1, 0], [0, 1], [1, 1]] + filtered_alpha = AlphaComplex(points=point_list, complexity = complexity) + + simplex_tree = filtered_alpha.create_simplex_tree(default_filtration_value = True) + + assert simplex_tree.num_simplices() == 11 + assert simplex_tree.num_vertices() == 4 + + assert point_list[0] == filtered_alpha.get_point(0) + assert point_list[1] == filtered_alpha.get_point(1) + assert point_list[2] == filtered_alpha.get_point(2) + assert point_list[3] == filtered_alpha.get_point(3) + try: + filtered_alpha.get_point(4) == [] + except IndexError: + pass + else: + assert False + try: + filtered_alpha.get_point(125) == [] + except IndexError: + pass + else: + assert False + + for filtered_value in simplex_tree.get_filtration(): + assert math.isnan(filtered_value[1]) + for filtered_value in simplex_tree.get_star([0]): + assert math.isnan(filtered_value[1]) + for filtered_value in simplex_tree.get_cofaces([0], 1): + assert math.isnan(filtered_value[1]) + +def test_delaunay_complex(): + _delaunay_complex('fast') + _delaunay_complex('safe') + _delaunay_complex('exact') -- cgit v1.2.3 From 78fb7ccd413ca655bdbe4adc9b4b256f20e11fe5 Mon Sep 17 00:00:00 2001 From: ROUVREAU Vincent Date: Sun, 24 May 2020 10:16:58 +0200 Subject: c++ version to trigger exact/inexact kernel --- src/python/gudhi/alpha_complex.pyx | 59 ++++++---------------- src/python/include/Alpha_complex_interface.h | 73 ++++++++++++++++++++-------- 2 files changed, 68 insertions(+), 64 deletions(-) (limited to 'src/python/gudhi') diff --git a/src/python/gudhi/alpha_complex.pyx b/src/python/gudhi/alpha_complex.pyx index 700fa738..5bc9ebc4 100644 --- a/src/python/gudhi/alpha_complex.pyx +++ b/src/python/gudhi/alpha_complex.pyx @@ -26,18 +26,10 @@ __copyright__ = "Copyright (C) 2016 Inria" __license__ = "GPL v3" cdef extern from "Alpha_complex_interface.h" namespace "Gudhi": - cdef cppclass Alpha_complex_exact_interface "Gudhi::alpha_complex::Alpha_complex_interface": - Alpha_complex_exact_interface(vector[vector[double]] points) nogil except + + cdef cppclass Alpha_complex_interface "Gudhi::alpha_complex::Alpha_complex_interface": + Alpha_complex_interface(vector[vector[double]] points, bool fast_version) nogil except + # bool from_file is a workaround for cython to find the correct signature - Alpha_complex_exact_interface(string off_file, bool from_file) nogil except + - vector[double] get_point(int vertex) nogil except + - void create_simplex_tree(Simplex_tree_interface_full_featured* simplex_tree, double max_alpha_square, bool exact_version, bool default_filtration_value) nogil except + - -cdef extern from "Alpha_complex_interface.h" namespace "Gudhi": - cdef cppclass Alpha_complex_inexact_interface "Gudhi::alpha_complex::Alpha_complex_interface": - Alpha_complex_inexact_interface(vector[vector[double]] points) nogil except + - # bool from_file is a workaround for cython to find the correct signature - Alpha_complex_inexact_interface(string off_file, bool from_file) nogil except + + Alpha_complex_interface(string off_file, bool fast_version, bool from_file) nogil except + vector[double] get_point(int vertex) nogil except + void create_simplex_tree(Simplex_tree_interface_full_featured* simplex_tree, double max_alpha_square, bool exact_version, bool default_filtration_value) nogil except + @@ -61,8 +53,7 @@ cdef class AlphaComplex: """ - cdef Alpha_complex_exact_interface * exact_ptr - cdef Alpha_complex_inexact_interface * inexact_ptr + cdef Alpha_complex_interface * this_ptr cdef bool fast cdef bool exact @@ -91,10 +82,7 @@ cdef class AlphaComplex: cdef vector[vector[double]] pts if off_file: if os.path.isfile(off_file): - if self.fast: - self.inexact_ptr = new Alpha_complex_inexact_interface(off_file.encode('utf-8'), True) - else: - self.exact_ptr = new Alpha_complex_exact_interface(off_file.encode('utf-8'), True) + self.this_ptr = new Alpha_complex_interface(off_file.encode('utf-8'), self.fast, True) else: print("file " + off_file + " not found.") else: @@ -102,28 +90,17 @@ cdef class AlphaComplex: # Empty Alpha construction points=[] pts = points - if self.fast: - with nogil: - self.inexact_ptr = new Alpha_complex_inexact_interface(pts) - else: - with nogil: - self.exact_ptr = new Alpha_complex_exact_interface(pts) + with nogil: + self.this_ptr = new Alpha_complex_interface(pts, self.fast) def __dealloc__(self): - if self.fast: - if self.inexact_ptr != NULL: - del self.inexact_ptr - else: - if self.exact_ptr != NULL: - del self.exact_ptr + if self.this_ptr != NULL: + del self.this_ptr def __is_defined(self): """Returns true if AlphaComplex pointer is not NULL. """ - if self.fast: - return self.inexact_ptr != NULL - else: - return self.exact_ptr != NULL + return self.this_ptr != NULL def get_point(self, vertex): """This function returns the point corresponding to a given vertex. @@ -133,10 +110,7 @@ cdef class AlphaComplex: :rtype: list of float :returns: the point. """ - if self.fast: - return self.inexact_ptr.get_point(vertex) - else: - return self.exact_ptr.get_point(vertex) + return self.this_ptr.get_point(vertex) def create_simplex_tree(self, max_alpha_square = float('inf'), default_filtration_value = False): """ @@ -153,12 +127,7 @@ cdef class AlphaComplex: cdef double mas = max_alpha_square cdef intptr_t stree_int_ptr=stree.thisptr cdef bool compute_filtration = default_filtration_value == True - if self.fast: - with nogil: - self.inexact_ptr.create_simplex_tree(stree_int_ptr, - mas, False, compute_filtration) - else: - with nogil: - self.exact_ptr.create_simplex_tree(stree_int_ptr, - mas, self.exact, compute_filtration) + with nogil: + self.this_ptr.create_simplex_tree(stree_int_ptr, + mas, self.exact, compute_filtration) return stree diff --git a/src/python/include/Alpha_complex_interface.h b/src/python/include/Alpha_complex_interface.h index 3dd01345..46f2ba03 100644 --- a/src/python/include/Alpha_complex_interface.h +++ b/src/python/include/Alpha_complex_interface.h @@ -29,40 +29,75 @@ namespace Gudhi { namespace alpha_complex { -using Exact_kernel = CGAL::Epeck_d< CGAL::Dynamic_dimension_tag >; -using Inexact_kernel = CGAL::Epick_d< CGAL::Dynamic_dimension_tag >; - -template class Alpha_complex_interface { - using Point_d = typename Kernel::Point_d; + private: + using Exact_kernel = CGAL::Epeck_d< CGAL::Dynamic_dimension_tag >; + using Inexact_kernel = CGAL::Epick_d< CGAL::Dynamic_dimension_tag >; + using Point_exact_kernel = typename Exact_kernel::Point_d; + using Point_inexact_kernel = typename Inexact_kernel::Point_d; + + template + std::vector pt_cgal_to_cython(CgalPointType& ph) { + std::vector vd; + for (auto coord = ph.cartesian_begin(); coord != ph.cartesian_end(); coord++) + vd.push_back(CGAL::to_double(*coord)); + return vd; + } + + template + CgalPointType pt_cython_to_cgal(std::vector const& vec) { + return CgalPointType(vec.size(), vec.begin(), vec.end()); + } public: - Alpha_complex_interface(const std::vector>& points) { - auto mkpt = [](std::vector const& vec){ - return Point_d(vec.size(), vec.begin(), vec.end()); - }; - alpha_complex_ = std::make_unique>(boost::adaptors::transform(points, mkpt)); + Alpha_complex_interface(const std::vector>& points, bool fast_version) + : fast_version_(fast_version) { + auto pt = pt_cython_to_cgal(points[0]); + if (fast_version_) { + auto mkpt = [](std::vector const& vec) { + return Point_inexact_kernel(vec.size(), vec.begin(), vec.end()); + }; + ac_inexact_ptr_ = std::make_unique>(boost::adaptors::transform(points, mkpt)); + //ac_inexact_ptr_ = std::make_unique>(boost::adaptors::transform(points, pt_cython_to_cgal)); + } else { + auto mkpt = [](std::vector const& vec) { + return Point_exact_kernel(vec.size(), vec.begin(), vec.end()); + }; + ac_exact_ptr_ = std::make_unique>(boost::adaptors::transform(points, mkpt)); + //ac_exact_ptr_ = std::make_unique>(boost::adaptors::transform(points, pt_cython_to_cgal)); + } } - Alpha_complex_interface(const std::string& off_file_name, bool from_file = true) { - alpha_complex_ = std::make_unique>(off_file_name); + Alpha_complex_interface(const std::string& off_file_name, bool fast_version, bool from_file = true) + : fast_version_(fast_version) { + if (fast_version_) + ac_inexact_ptr_ = std::make_unique>(off_file_name); + else + ac_exact_ptr_ = std::make_unique>(off_file_name); } std::vector get_point(int vh) { - std::vector vd; - Point_d const& ph = alpha_complex_->get_point(vh); - for (auto coord = ph.cartesian_begin(); coord != ph.cartesian_end(); coord++) - vd.push_back(CGAL::to_double(*coord)); - return vd; + if (fast_version_) { + Point_inexact_kernel const& ph = ac_inexact_ptr_->get_point(vh); + return pt_cgal_to_cython(ph); + } else { + Point_exact_kernel const& ph = ac_exact_ptr_->get_point(vh); + return pt_cgal_to_cython(ph); + } } void create_simplex_tree(Simplex_tree_interface<>* simplex_tree, double max_alpha_square, bool exact_version, bool default_filtration_value) { - alpha_complex_->create_complex(*simplex_tree, max_alpha_square, exact_version, default_filtration_value); + if (fast_version_) + ac_inexact_ptr_->create_complex(*simplex_tree, max_alpha_square, exact_version, default_filtration_value); + else + ac_exact_ptr_->create_complex(*simplex_tree, max_alpha_square, exact_version, default_filtration_value); } private: - std::unique_ptr> alpha_complex_; + bool fast_version_; + std::unique_ptr> ac_exact_ptr_; + std::unique_ptr> ac_inexact_ptr_; }; } // namespace alpha_complex -- cgit v1.2.3 From 4aff8dc700a0790373d82ae24076359c09ee04c8 Mon Sep 17 00:00:00 2001 From: Marc Glisse Date: Mon, 1 Jun 2020 15:24:28 +0200 Subject: Interface for hera's bottleneck_distance --- ext/hera | 2 +- .../modules/GUDHI_third_party_libraries.cmake | 1 + src/cmake/modules/GUDHI_user_version_target.cmake | 2 +- src/python/CMakeLists.txt | 8 +++- src/python/doc/bottleneck_distance_user.rst | 18 +++++-- src/python/gudhi/hera.cc | 56 ---------------------- src/python/gudhi/hera/__init__.py | 2 + src/python/gudhi/hera/bottleneck.cc | 45 +++++++++++++++++ src/python/gudhi/hera/wasserstein.cc | 56 ++++++++++++++++++++++ src/python/setup.py.in | 6 ++- src/python/test/test_bottleneck_distance.py | 6 ++- 11 files changed, 134 insertions(+), 68 deletions(-) delete mode 100644 src/python/gudhi/hera.cc create mode 100644 src/python/gudhi/hera/__init__.py create mode 100644 src/python/gudhi/hera/bottleneck.cc create mode 100644 src/python/gudhi/hera/wasserstein.cc (limited to 'src/python/gudhi') diff --git a/ext/hera b/ext/hera index 0019cae9..2c5e6c60 160000 --- a/ext/hera +++ b/ext/hera @@ -1 +1 @@ -Subproject commit 0019cae9dc1e9d11aa03bc59681435ba7f21eea8 +Subproject commit 2c5e6c606ee37cd68bbe9f9915dba99f7677dd87 diff --git a/src/cmake/modules/GUDHI_third_party_libraries.cmake b/src/cmake/modules/GUDHI_third_party_libraries.cmake index f92fe93e..d80283d2 100644 --- a/src/cmake/modules/GUDHI_third_party_libraries.cmake +++ b/src/cmake/modules/GUDHI_third_party_libraries.cmake @@ -69,6 +69,7 @@ endif() # For those who dislike bundled dependencies, this indicates where to find a preinstalled Hera. set(HERA_WASSERSTEIN_INCLUDE_DIR ${CMAKE_SOURCE_DIR}/ext/hera/wasserstein/include CACHE PATH "Directory where one can find Hera's wasserstein.h") +set(HERA_BOTTLENECK_INCLUDE_DIR ${CMAKE_SOURCE_DIR}/ext/hera/bottleneck/include CACHE PATH "Directory where one can find Hera's bottleneck.h") option(WITH_GUDHI_USE_TBB "Build with Intel TBB parallelization" ON) diff --git a/src/cmake/modules/GUDHI_user_version_target.cmake b/src/cmake/modules/GUDHI_user_version_target.cmake index e99bb42d..491fa459 100644 --- a/src/cmake/modules/GUDHI_user_version_target.cmake +++ b/src/cmake/modules/GUDHI_user_version_target.cmake @@ -68,7 +68,7 @@ add_custom_command(TARGET user_version PRE_BUILD COMMAND ${CMAKE_COMMAND} -E copy_directory ${CMAKE_SOURCE_DIR}/src/GudhUI ${GUDHI_USER_VERSION_DIR}/GudhUI) add_custom_command(TARGET user_version PRE_BUILD COMMAND ${CMAKE_COMMAND} -E - copy_directory ${CMAKE_SOURCE_DIR}/ext/hera/wasserstein/include ${GUDHI_USER_VERSION_DIR}/ext/hera/wasserstein/include) + copy_directory ${CMAKE_SOURCE_DIR}/ext/hera ${GUDHI_USER_VERSION_DIR}/ext/hera) set(GUDHI_DIRECTORIES "doc;example;concept;utilities") diff --git a/src/python/CMakeLists.txt b/src/python/CMakeLists.txt index 96dd3f6f..1e81cac8 100644 --- a/src/python/CMakeLists.txt +++ b/src/python/CMakeLists.txt @@ -130,7 +130,8 @@ if(PYTHONINTERP_FOUND) set(GUDHI_CYTHON_MODULES "${GUDHI_CYTHON_MODULES}'reader_utils', ") set(GUDHI_CYTHON_MODULES "${GUDHI_CYTHON_MODULES}'witness_complex', ") set(GUDHI_CYTHON_MODULES "${GUDHI_CYTHON_MODULES}'strong_witness_complex', ") - set(GUDHI_PYBIND11_MODULES "${GUDHI_PYBIND11_MODULES}'hera', ") + set(GUDHI_PYBIND11_MODULES "${GUDHI_PYBIND11_MODULES}'hera/wasserstein', ") + set(GUDHI_PYBIND11_MODULES "${GUDHI_PYBIND11_MODULES}'hera/bottleneck', ") if (NOT CGAL_VERSION VERSION_LESS 4.11.0) set(GUDHI_PYBIND11_MODULES "${GUDHI_PYBIND11_MODULES}'bottleneck', ") set(GUDHI_CYTHON_MODULES "${GUDHI_CYTHON_MODULES}'nerve_gic', ") @@ -236,6 +237,7 @@ if(PYTHONINTERP_FOUND) file(COPY "gudhi/point_cloud" DESTINATION "${CMAKE_CURRENT_BINARY_DIR}/gudhi") file(COPY "gudhi/weighted_rips_complex.py" DESTINATION "${CMAKE_CURRENT_BINARY_DIR}/gudhi") file(COPY "gudhi/dtm_rips_complex.py" DESTINATION "${CMAKE_CURRENT_BINARY_DIR}/gudhi") + file(COPY "gudhi/hera/__init__.py" DESTINATION "${CMAKE_CURRENT_BINARY_DIR}/gudhi/hera") add_custom_command( OUTPUT gudhi.so @@ -355,7 +357,9 @@ if(PYTHONINTERP_FOUND) COMMAND ${CMAKE_COMMAND} -E env "PYTHONPATH=${CMAKE_CURRENT_BINARY_DIR}" ${PYTHON_EXECUTABLE} "${CMAKE_CURRENT_SOURCE_DIR}/example/bottleneck_basic_example.py") - add_gudhi_py_test(test_bottleneck_distance) + if (PYBIND11_FOUND) + add_gudhi_py_test(test_bottleneck_distance) + endif() # Cover complex file(COPY ${CMAKE_SOURCE_DIR}/data/points/human.off DESTINATION ${CMAKE_CURRENT_BINARY_DIR}/) diff --git a/src/python/doc/bottleneck_distance_user.rst b/src/python/doc/bottleneck_distance_user.rst index 89da89d3..49bd3706 100644 --- a/src/python/doc/bottleneck_distance_user.rst +++ b/src/python/doc/bottleneck_distance_user.rst @@ -9,14 +9,22 @@ Definition .. include:: bottleneck_distance_sum.inc -This implementation is based on ideas from "Geometry Helps in Bottleneck Matching and Related Problems" -:cite:`DBLP:journals/algorithmica/EfratIK01`. Another relevant publication, although it was not used is -"Geometry Helps to Compare Persistence Diagrams" :cite:`Kerber:2017:GHC:3047249.3064175`. +This implementation by François Godi is based on ideas from "Geometry Helps in Bottleneck Matching and Related Problems" +:cite:`DBLP:journals/algorithmica/EfratIK01` and requires `CGAL `_. -Function --------- .. autofunction:: gudhi.bottleneck_distance +This other implementation comes from `Hera +`_ (BSD-3-Clause) which is +based on "Geometry Helps to Compare Persistence Diagrams" +:cite:`Kerber:2017:GHC:3047249.3064175` by Michael Kerber, Dmitriy +Morozov, and Arnur Nigmetov. + +Beware that its approximation allows for a multiplicative error, while the function above uses an additive error. + +.. autofunction:: gudhi.hera.bottleneck_distance + + Distance computation -------------------- diff --git a/src/python/gudhi/hera.cc b/src/python/gudhi/hera.cc deleted file mode 100644 index ea80a9a8..00000000 --- a/src/python/gudhi/hera.cc +++ /dev/null @@ -1,56 +0,0 @@ -/* This file is part of the Gudhi Library - https://gudhi.inria.fr/ - which is released under MIT. - * See file LICENSE or go to https://gudhi.inria.fr/licensing/ for full license details. - * Author(s): Marc Glisse - * - * Copyright (C) 2020 Inria - * - * Modification(s): - * - YYYY/MM Author: Description of the modification - */ - -#include // Hera - -#include - -double wasserstein_distance( - Dgm d1, Dgm d2, - double wasserstein_power, double internal_p, - double delta) -{ - // I *think* the call to request() has to be before releasing the GIL. - auto diag1 = numpy_to_range_of_pairs(d1); - auto diag2 = numpy_to_range_of_pairs(d2); - - py::gil_scoped_release release; - - hera::AuctionParams params; - params.wasserstein_power = wasserstein_power; - // hera encodes infinity as -1... - if(std::isinf(internal_p)) internal_p = hera::get_infinity(); - params.internal_p = internal_p; - params.delta = delta; - // The extra parameters are purposedly not exposed for now. - return hera::wasserstein_dist(diag1, diag2, params); -} - -PYBIND11_MODULE(hera, m) { - m.def("wasserstein_distance", &wasserstein_distance, - py::arg("X"), py::arg("Y"), - py::arg("order") = 1, - py::arg("internal_p") = std::numeric_limits::infinity(), - py::arg("delta") = .01, - R"pbdoc( - Compute the Wasserstein distance between two diagrams. - Points at infinity are supported. - - Parameters: - X (n x 2 numpy array): First diagram - Y (n x 2 numpy array): Second diagram - order (float): Wasserstein exponent W_q - internal_p (float): Internal Minkowski norm L^p in R^2 - delta (float): Relative error 1+delta - - Returns: - float: Approximate Wasserstein distance W_q(X,Y) - )pbdoc"); -} diff --git a/src/python/gudhi/hera/__init__.py b/src/python/gudhi/hera/__init__.py new file mode 100644 index 00000000..044f81cd --- /dev/null +++ b/src/python/gudhi/hera/__init__.py @@ -0,0 +1,2 @@ +from .wasserstein import wasserstein_distance +from .bottleneck import bottleneck_distance diff --git a/src/python/gudhi/hera/bottleneck.cc b/src/python/gudhi/hera/bottleneck.cc new file mode 100644 index 00000000..e00b4682 --- /dev/null +++ b/src/python/gudhi/hera/bottleneck.cc @@ -0,0 +1,45 @@ +/* This file is part of the Gudhi Library - https://gudhi.inria.fr/ - which is released under MIT. + * See file LICENSE or go to https://gudhi.inria.fr/licensing/ for full license details. + * Author(s): Marc Glisse + * + * Copyright (C) 2020 Inria + * + * Modification(s): + * - YYYY/MM Author: Description of the modification + */ + +#include // Hera + +#include + +double bottleneck_distance(Dgm d1, Dgm d2, double delta) +{ + // I *think* the call to request() has to be before releasing the GIL. + auto diag1 = numpy_to_range_of_pairs(d1); + auto diag2 = numpy_to_range_of_pairs(d2); + + py::gil_scoped_release release; + + if (delta == 0) + return hera::bottleneckDistExact(diag1, diag2); + else + return hera::bottleneckDistApprox(diag1, diag2, delta); +} + +PYBIND11_MODULE(bottleneck, m) { + m.def("bottleneck_distance", &bottleneck_distance, + py::arg("X"), py::arg("Y"), + py::arg("delta") = .01, + R"pbdoc( + Compute the Bottleneck distance between two diagrams. + Points at infinity are supported. + + Parameters: + X (n x 2 numpy array): First diagram + Y (n x 2 numpy array): Second diagram + delta (float): Relative error 1+delta + + Returns: + float: (approximate) bottleneck distance d_B(X,Y) + )pbdoc"); +} diff --git a/src/python/gudhi/hera/wasserstein.cc b/src/python/gudhi/hera/wasserstein.cc new file mode 100644 index 00000000..1a21f02f --- /dev/null +++ b/src/python/gudhi/hera/wasserstein.cc @@ -0,0 +1,56 @@ +/* This file is part of the Gudhi Library - https://gudhi.inria.fr/ - which is released under MIT. + * See file LICENSE or go to https://gudhi.inria.fr/licensing/ for full license details. + * Author(s): Marc Glisse + * + * Copyright (C) 2020 Inria + * + * Modification(s): + * - YYYY/MM Author: Description of the modification + */ + +#include // Hera + +#include + +double wasserstein_distance( + Dgm d1, Dgm d2, + double wasserstein_power, double internal_p, + double delta) +{ + // I *think* the call to request() has to be before releasing the GIL. + auto diag1 = numpy_to_range_of_pairs(d1); + auto diag2 = numpy_to_range_of_pairs(d2); + + py::gil_scoped_release release; + + hera::AuctionParams params; + params.wasserstein_power = wasserstein_power; + // hera encodes infinity as -1... + if(std::isinf(internal_p)) internal_p = hera::get_infinity(); + params.internal_p = internal_p; + params.delta = delta; + // The extra parameters are purposedly not exposed for now. + return hera::wasserstein_dist(diag1, diag2, params); +} + +PYBIND11_MODULE(wasserstein, m) { + m.def("wasserstein_distance", &wasserstein_distance, + py::arg("X"), py::arg("Y"), + py::arg("order") = 1, + py::arg("internal_p") = std::numeric_limits::infinity(), + py::arg("delta") = .01, + R"pbdoc( + Compute the Wasserstein distance between two diagrams. + Points at infinity are supported. + + Parameters: + X (n x 2 numpy array): First diagram + Y (n x 2 numpy array): Second diagram + order (float): Wasserstein exponent W_q + internal_p (float): Internal Minkowski norm L^p in R^2 + delta (float): Relative error 1+delta + + Returns: + float: Approximate Wasserstein distance W_q(X,Y) + )pbdoc"); +} diff --git a/src/python/setup.py.in b/src/python/setup.py.in index b9f4e3f0..9b3c7521 100644 --- a/src/python/setup.py.in +++ b/src/python/setup.py.in @@ -48,10 +48,12 @@ ext_modules = cythonize(ext_modules) for module in pybind11_modules: my_include_dirs = include_dirs + [pybind11.get_include(False), pybind11.get_include(True)] - if module == 'hera': + if module == 'hera/wasserstein': my_include_dirs = ['@HERA_WASSERSTEIN_INCLUDE_DIR@'] + my_include_dirs + elif module == 'hera/bottleneck': + my_include_dirs = ['@HERA_BOTTLENECK_INCLUDE_DIR@'] + my_include_dirs ext_modules.append(Extension( - 'gudhi.' + module, + 'gudhi.' + module.replace('/', '.'), sources = [source_dir + module + '.cc'], language = 'c++', include_dirs = my_include_dirs, diff --git a/src/python/test/test_bottleneck_distance.py b/src/python/test/test_bottleneck_distance.py index 70b2abad..6915bea8 100755 --- a/src/python/test/test_bottleneck_distance.py +++ b/src/python/test/test_bottleneck_distance.py @@ -9,6 +9,8 @@ """ import gudhi +import gudhi.hera +import pytest __author__ = "Vincent Rouvreau" __copyright__ = "Copyright (C) 2016 Inria" @@ -19,5 +21,7 @@ def test_basic_bottleneck(): diag1 = [[2.7, 3.7], [9.6, 14.0], [34.2, 34.974], [3.0, float("Inf")]] diag2 = [[2.8, 4.45], [9.5, 14.1], [3.2, float("Inf")]] - assert gudhi.bottleneck_distance(diag1, diag2, 0.1) == 0.8081763781405569 assert gudhi.bottleneck_distance(diag1, diag2) == 0.75 + assert gudhi.bottleneck_distance(diag1, diag2, 0.1) == pytest.approx(0.75, abs=0.1) + assert gudhi.hera.bottleneck_distance(diag1, diag2, 0) == 0.75 + assert gudhi.hera.bottleneck_distance(diag1, diag2, 0.1) == pytest.approx(0.75, rel=0.1) -- cgit v1.2.3 From cc42bcdf3323f2eb6edeaca105d29b32b394ca66 Mon Sep 17 00:00:00 2001 From: Marc Glisse Date: Tue, 2 Jun 2020 07:33:31 +0200 Subject: Parallelism in pairwise_distances --- src/python/gudhi/representations/kernel_methods.py | 14 +++++------ src/python/gudhi/representations/metrics.py | 27 +++++++++++++++++----- 2 files changed, 28 insertions(+), 13 deletions(-) (limited to 'src/python/gudhi') diff --git a/src/python/gudhi/representations/kernel_methods.py b/src/python/gudhi/representations/kernel_methods.py index 596f4f07..c9bd9d01 100644 --- a/src/python/gudhi/representations/kernel_methods.py +++ b/src/python/gudhi/representations/kernel_methods.py @@ -10,7 +10,7 @@ import numpy as np from sklearn.base import BaseEstimator, TransformerMixin from sklearn.metrics import pairwise_distances, pairwise_kernels -from .metrics import SlicedWassersteinDistance, PersistenceFisherDistance, _sklearn_wrapper, pairwise_persistence_diagram_distances, _sliced_wasserstein_distance, _persistence_fisher_distance +from .metrics import SlicedWassersteinDistance, PersistenceFisherDistance, _sklearn_wrapper, _pairwise, pairwise_persistence_diagram_distances, _sliced_wasserstein_distance, _persistence_fisher_distance from .preprocessing import Padding ############################################# @@ -60,7 +60,7 @@ def _persistence_scale_space_kernel(D1, D2, kernel_approx=None, bandwidth=1.): weight_pss = lambda x: 1 if x[1] >= x[0] else -1 return 0.5 * _persistence_weighted_gaussian_kernel(DD1, DD2, weight=weight_pss, kernel_approx=kernel_approx, bandwidth=bandwidth) -def pairwise_persistence_diagram_kernels(X, Y=None, kernel="sliced_wasserstein", **kwargs): +def pairwise_persistence_diagram_kernels(X, Y=None, kernel="sliced_wasserstein", n_jobs=None, **kwargs): """ This function computes the kernel matrix between two lists of persistence diagrams given as numpy arrays of shape (nx2). @@ -76,15 +76,15 @@ def pairwise_persistence_diagram_kernels(X, Y=None, kernel="sliced_wasserstein", XX = np.reshape(np.arange(len(X)), [-1,1]) YY = None if Y is None else np.reshape(np.arange(len(Y)), [-1,1]) if kernel == "sliced_wasserstein": - return np.exp(-pairwise_persistence_diagram_distances(X, Y, metric="sliced_wasserstein", num_directions=kwargs["num_directions"]) / kwargs["bandwidth"]) + return np.exp(-pairwise_persistence_diagram_distances(X, Y, metric="sliced_wasserstein", num_directions=kwargs["num_directions"], n_jobs=n_jobs) / kwargs["bandwidth"]) elif kernel == "persistence_fisher": - return np.exp(-pairwise_persistence_diagram_distances(X, Y, metric="persistence_fisher", kernel_approx=kwargs["kernel_approx"], bandwidth=kwargs["bandwidth"]) / kwargs["bandwidth_fisher"]) + return np.exp(-pairwise_persistence_diagram_distances(X, Y, metric="persistence_fisher", kernel_approx=kwargs["kernel_approx"], bandwidth=kwargs["bandwidth"], n_jobs=n_jobs) / kwargs["bandwidth_fisher"]) elif kernel == "persistence_scale_space": - return pairwise_kernels(XX, YY, metric=_sklearn_wrapper(_persistence_scale_space_kernel, X, Y, **kwargs)) + return _pairwise(pairwise_kernels, False, XX, YY, metric=_sklearn_wrapper(_persistence_scale_space_kernel, X, Y, **kwargs), n_jobs=n_jobs) elif kernel == "persistence_weighted_gaussian": - return pairwise_kernels(XX, YY, metric=_sklearn_wrapper(_persistence_weighted_gaussian_kernel, X, Y, **kwargs)) + return _pairwise(pairwise_kernels, False, XX, YY, metric=_sklearn_wrapper(_persistence_weighted_gaussian_kernel, X, Y, **kwargs), n_jobs=n_jobs) else: - return pairwise_kernels(XX, YY, metric=_sklearn_wrapper(metric, **kwargs)) + return _pairwise(pairwise_kernels, False, XX, YY, metric=_sklearn_wrapper(metric, **kwargs), n_jobs=n_jobs) class SlicedWassersteinKernel(BaseEstimator, TransformerMixin): """ diff --git a/src/python/gudhi/representations/metrics.py b/src/python/gudhi/representations/metrics.py index 8a32f7e9..23bccd68 100644 --- a/src/python/gudhi/representations/metrics.py +++ b/src/python/gudhi/representations/metrics.py @@ -12,6 +12,7 @@ from sklearn.base import BaseEstimator, TransformerMixin from sklearn.metrics import pairwise_distances from gudhi.hera import wasserstein_distance as hera_wasserstein_distance from .preprocessing import Padding +from joblib import Parallel, delayed, effective_n_jobs ############################################# # Metrics ################################### @@ -116,6 +117,20 @@ def _persistence_fisher_distance(D1, D2, kernel_approx=None, bandwidth=1.): vectorj = vectorj/vectorj_sum return np.arccos( min(np.dot(np.sqrt(vectori), np.sqrt(vectorj)), 1.) ) +def _pairwise(fallback, skipdiag, X, Y, metric, n_jobs): + if Y is not None: + return fallback(X, Y, metric=metric, n_jobs=n_jobs) + triu = np.triu_indices(len(X), k=skipdiag) + tril = (triu[1], triu[0]) + par = Parallel(n_jobs=n_jobs, prefer="threads") + d = par(delayed(metric)([triu[0][i]], [triu[1][i]]) for i in range(len(triu[0]))) + m = np.empty((len(X), len(X))) + m[triu] = d + m[tril] = d + if skipdiag: + np.fill_diagonal(m, 0) + return m + def _sklearn_wrapper(metric, X, Y, **kwargs): """ This function is a wrapper for any metric between two persistence diagrams that takes two numpy arrays of shapes (nx2) and (mx2) as arguments. @@ -134,7 +149,7 @@ PAIRWISE_DISTANCE_FUNCTIONS = { "persistence_fisher": _persistence_fisher_distance, } -def pairwise_persistence_diagram_distances(X, Y=None, metric="bottleneck", **kwargs): +def pairwise_persistence_diagram_distances(X, Y=None, metric="bottleneck", n_jobs=None, **kwargs): """ This function computes the distance matrix between two lists of persistence diagrams given as numpy arrays of shape (nx2). @@ -152,25 +167,25 @@ def pairwise_persistence_diagram_distances(X, Y=None, metric="bottleneck", **kwa if metric == "bottleneck": try: from .. import bottleneck_distance - return pairwise_distances(XX, YY, metric=_sklearn_wrapper(bottleneck_distance, X, Y, **kwargs)) + return _pairwise(pairwise_distances, True, XX, YY, metric=_sklearn_wrapper(bottleneck_distance, X, Y, **kwargs), n_jobs=n_jobs) except ImportError: print("Gudhi built without CGAL") raise elif metric == "pot_wasserstein": try: from gudhi.wasserstein import wasserstein_distance as pot_wasserstein_distance - return pairwise_distances(XX, YY, metric=_sklearn_wrapper(pot_wasserstein_distance, X, Y, **kwargs)) + return _pairwise(pairwise_distances, True, XX, YY, metric=_sklearn_wrapper(pot_wasserstein_distance, X, Y, **kwargs), n_jobs=n_jobs) except ImportError: print("POT (Python Optimal Transport) is not installed. Please install POT or use metric='wasserstein' or metric='hera_wasserstein'") raise elif metric == "sliced_wasserstein": Xproj = _compute_persistence_diagram_projections(X, **kwargs) Yproj = None if Y is None else _compute_persistence_diagram_projections(Y, **kwargs) - return pairwise_distances(XX, YY, metric=_sklearn_wrapper(_sliced_wasserstein_distance_on_projections, Xproj, Yproj)) + return _pairwise(pairwise_distances, True, XX, YY, metric=_sklearn_wrapper(_sliced_wasserstein_distance_on_projections, Xproj, Yproj), n_jobs=n_jobs) elif type(metric) == str: - return pairwise_distances(XX, YY, metric=_sklearn_wrapper(PAIRWISE_DISTANCE_FUNCTIONS[metric], X, Y, **kwargs)) + return _pairwise(pairwise_distances, True, XX, YY, metric=_sklearn_wrapper(PAIRWISE_DISTANCE_FUNCTIONS[metric], X, Y, **kwargs), n_jobs=n_jobs) else: - return pairwise_distances(XX, YY, metric=_sklearn_wrapper(metric, X, Y, **kwargs)) + return _pairwise(pairwise_distances, True, XX, YY, metric=_sklearn_wrapper(metric, X, Y, **kwargs), n_jobs=n_jobs) class SlicedWassersteinDistance(BaseEstimator, TransformerMixin): """ -- cgit v1.2.3 From aebfa07217f58943753967cfd3d7daa9f28d5650 Mon Sep 17 00:00:00 2001 From: Marc Glisse Date: Tue, 2 Jun 2020 17:56:18 +0200 Subject: More n_jobs in metrics --- src/python/gudhi/representations/metrics.py | 29 +++++++++++++++++++---------- 1 file changed, 19 insertions(+), 10 deletions(-) (limited to 'src/python/gudhi') diff --git a/src/python/gudhi/representations/metrics.py b/src/python/gudhi/representations/metrics.py index 23bccd68..84907160 100644 --- a/src/python/gudhi/representations/metrics.py +++ b/src/python/gudhi/representations/metrics.py @@ -12,7 +12,7 @@ from sklearn.base import BaseEstimator, TransformerMixin from sklearn.metrics import pairwise_distances from gudhi.hera import wasserstein_distance as hera_wasserstein_distance from .preprocessing import Padding -from joblib import Parallel, delayed, effective_n_jobs +from joblib import Parallel, delayed ############################################# # Metrics ################################### @@ -157,6 +157,7 @@ def pairwise_persistence_diagram_distances(X, Y=None, metric="bottleneck", n_job X (list of n numpy arrays of shape (numx2)): first list of persistence diagrams. Y (list of m numpy arrays of shape (numx2)): second list of persistence diagrams (optional). If None, pairwise distances are computed from the first list only. metric: distance to use. It can be either a string ("sliced_wasserstein", "wasserstein", "hera_wasserstein" (Wasserstein distance computed with Hera---note that Hera is also used for the default option "wasserstein"), "pot_wasserstein" (Wasserstein distance computed with POT), "bottleneck", "persistence_fisher") or a function taking two numpy arrays of shape (nx2) and (mx2) as inputs. If it is a function, make sure that it is symmetric and that it outputs 0 if called on the same two arrays. + n_jobs (int): number of jobs to use for the computation. This uses joblib.Parallel(prefer="threads"), so metrics that do not release the GIL may not scale unless run inside a `joblib.parallel_backend `_ block. **kwargs: optional keyword parameters. Any further parameters are passed directly to the distance function. See the docs of the various distance classes in this module. Returns: @@ -191,14 +192,16 @@ class SlicedWassersteinDistance(BaseEstimator, TransformerMixin): """ This is a class for computing the sliced Wasserstein distance matrix from a list of persistence diagrams. The Sliced Wasserstein distance is computed by projecting the persistence diagrams onto lines, comparing the projections with the 1-norm, and finally integrating over all possible lines. See http://proceedings.mlr.press/v70/carriere17a.html for more details. """ - def __init__(self, num_directions=10): + def __init__(self, num_directions=10, n_jobs=None): """ Constructor for the SlicedWassersteinDistance class. Parameters: num_directions (int): number of lines evenly sampled from [-pi/2,pi/2] in order to approximate and speed up the distance computation (default 10). + n_jobs (int): number of jobs to use for the computation. See :func:`pairwise_persistence_diagram_distances` for details. """ self.num_directions = num_directions + self.n_jobs = n_jobs def fit(self, X, y=None): """ @@ -221,7 +224,7 @@ class SlicedWassersteinDistance(BaseEstimator, TransformerMixin): Returns: numpy array of shape (number of diagrams in **diagrams**) x (number of diagrams in X): matrix of pairwise sliced Wasserstein distances. """ - return pairwise_persistence_diagram_distances(X, self.diagrams_, metric="sliced_wasserstein", num_directions=self.num_directions) + return pairwise_persistence_diagram_distances(X, self.diagrams_, metric="sliced_wasserstein", num_directions=self.num_directions, n_jobs=self.n_jobs) def __call__(self, diag1, diag2): """ @@ -242,14 +245,16 @@ class BottleneckDistance(BaseEstimator, TransformerMixin): :Requires: `CGAL `_ :math:`\geq` 4.11.0 """ - def __init__(self, epsilon=None): + def __init__(self, epsilon=None, n_jobs=None): """ Constructor for the BottleneckDistance class. Parameters: epsilon (double): absolute (additive) error tolerated on the distance (default is the smallest positive float), see :func:`gudhi.bottleneck_distance`. + n_jobs (int): number of jobs to use for the computation. See :func:`pairwise_persistence_diagram_distances` for details. """ self.epsilon = epsilon + self.n_jobs = n_jobs def fit(self, X, y=None): """ @@ -272,7 +277,7 @@ class BottleneckDistance(BaseEstimator, TransformerMixin): Returns: numpy array of shape (number of diagrams in **diagrams**) x (number of diagrams in X): matrix of pairwise bottleneck distances. """ - Xfit = pairwise_persistence_diagram_distances(X, self.diagrams_, metric="bottleneck", e=self.epsilon) + Xfit = pairwise_persistence_diagram_distances(X, self.diagrams_, metric="bottleneck", e=self.epsilon, n_jobs=self.n_jobs) return Xfit def __call__(self, diag1, diag2): @@ -297,15 +302,17 @@ class PersistenceFisherDistance(BaseEstimator, TransformerMixin): """ This is a class for computing the persistence Fisher distance matrix from a list of persistence diagrams. The persistence Fisher distance is obtained by computing the original Fisher distance between the probability distributions associated to the persistence diagrams given by convolving them with a Gaussian kernel. See http://papers.nips.cc/paper/8205-persistence-fisher-kernel-a-riemannian-manifold-kernel-for-persistence-diagrams for more details. """ - def __init__(self, bandwidth=1., kernel_approx=None): + def __init__(self, bandwidth=1., kernel_approx=None, n_jobs=None): """ Constructor for the PersistenceFisherDistance class. Parameters: bandwidth (double): bandwidth of the Gaussian kernel used to turn persistence diagrams into probability distributions (default 1.). kernel_approx (class): kernel approximation class used to speed up computation (default None). Common kernel approximations classes can be found in the scikit-learn library (such as RBFSampler for instance). + n_jobs (int): number of jobs to use for the computation. See :func:`pairwise_persistence_diagram_distances` for details. """ self.bandwidth, self.kernel_approx = bandwidth, kernel_approx + self.n_jobs = n_jobs def fit(self, X, y=None): """ @@ -328,7 +335,7 @@ class PersistenceFisherDistance(BaseEstimator, TransformerMixin): Returns: numpy array of shape (number of diagrams in **diagrams**) x (number of diagrams in X): matrix of pairwise persistence Fisher distances. """ - return pairwise_persistence_diagram_distances(X, self.diagrams_, metric="persistence_fisher", bandwidth=self.bandwidth, kernel_approx=self.kernel_approx) + return pairwise_persistence_diagram_distances(X, self.diagrams_, metric="persistence_fisher", bandwidth=self.bandwidth, kernel_approx=self.kernel_approx, n_jobs=self.n_jobs) def __call__(self, diag1, diag2): """ @@ -347,7 +354,7 @@ class WassersteinDistance(BaseEstimator, TransformerMixin): """ This is a class for computing the Wasserstein distance matrix from a list of persistence diagrams. """ - def __init__(self, order=2, internal_p=2, mode="pot", delta=0.01): + def __init__(self, order=2, internal_p=2, mode="pot", delta=0.01, n_jobs=None): """ Constructor for the WassersteinDistance class. @@ -356,10 +363,12 @@ class WassersteinDistance(BaseEstimator, TransformerMixin): internal_p (int): ground metric on the (upper-half) plane (i.e. norm l_p in R^2), default value is 2 (euclidean norm), see :func:`gudhi.wasserstein.wasserstein_distance`. mode (str): method for computing Wasserstein distance. Either "pot" or "hera". delta (float): relative error 1+delta. Used only if mode == "hera". + n_jobs (int): number of jobs to use for the computation. See :func:`pairwise_persistence_diagram_distances` for details. """ self.order, self.internal_p, self.mode = order, internal_p, mode self.metric = "pot_wasserstein" if mode == "pot" else "hera_wasserstein" self.delta = delta + self.n_jobs = n_jobs def fit(self, X, y=None): """ @@ -383,9 +392,9 @@ class WassersteinDistance(BaseEstimator, TransformerMixin): numpy array of shape (number of diagrams in **diagrams**) x (number of diagrams in X): matrix of pairwise Wasserstein distances. """ if self.metric == "hera_wasserstein": - Xfit = pairwise_persistence_diagram_distances(X, self.diagrams_, metric=self.metric, order=self.order, internal_p=self.internal_p, delta=self.delta) + Xfit = pairwise_persistence_diagram_distances(X, self.diagrams_, metric=self.metric, order=self.order, internal_p=self.internal_p, delta=self.delta, n_jobs=self.n_jobs) else: - Xfit = pairwise_persistence_diagram_distances(X, self.diagrams_, metric=self.metric, order=self.order, internal_p=self.internal_p, matching=False) + Xfit = pairwise_persistence_diagram_distances(X, self.diagrams_, metric=self.metric, order=self.order, internal_p=self.internal_p, matching=False, n_jobs=self.n_jobs) return Xfit def __call__(self, diag1, diag2): -- cgit v1.2.3 From 7706056bb9c0396188201570f9399e636df63df7 Mon Sep 17 00:00:00 2001 From: Marc Glisse Date: Tue, 2 Jun 2020 18:08:52 +0200 Subject: n_jobs for kernels --- src/python/gudhi/representations/kernel_methods.py | 25 +++++++++++++++------- 1 file changed, 17 insertions(+), 8 deletions(-) (limited to 'src/python/gudhi') diff --git a/src/python/gudhi/representations/kernel_methods.py b/src/python/gudhi/representations/kernel_methods.py index c9bd9d01..6e4f0619 100644 --- a/src/python/gudhi/representations/kernel_methods.py +++ b/src/python/gudhi/representations/kernel_methods.py @@ -68,6 +68,7 @@ def pairwise_persistence_diagram_kernels(X, Y=None, kernel="sliced_wasserstein", X (list of n numpy arrays of shape (numx2)): first list of persistence diagrams. Y (list of m numpy arrays of shape (numx2)): second list of persistence diagrams (optional). If None, pairwise kernel values are computed from the first list only. kernel: kernel to use. It can be either a string ("sliced_wasserstein", "persistence_scale_space", "persistence_weighted_gaussian", "persistence_fisher") or a function taking two numpy arrays of shape (nx2) and (mx2) as inputs. If it is a function, make sure that it is symmetric. + n_jobs (int): number of jobs to use for the computation. This uses joblib.Parallel(prefer="threads"), so kernels that do not release the GIL may not scale unless run inside a `joblib.parallel_backend `_ block. **kwargs: optional keyword parameters. Any further parameters are passed directly to the kernel function. See the docs of the various kernel classes in this module. Returns: @@ -90,16 +91,18 @@ class SlicedWassersteinKernel(BaseEstimator, TransformerMixin): """ This is a class for computing the sliced Wasserstein kernel matrix from a list of persistence diagrams. The sliced Wasserstein kernel is computed by exponentiating the corresponding sliced Wasserstein distance with a Gaussian kernel. See http://proceedings.mlr.press/v70/carriere17a.html for more details. """ - def __init__(self, num_directions=10, bandwidth=1.0): + def __init__(self, num_directions=10, bandwidth=1.0, n_jobs=None): """ Constructor for the SlicedWassersteinKernel class. Parameters: bandwidth (double): bandwidth of the Gaussian kernel applied to the sliced Wasserstein distance (default 1.). num_directions (int): number of lines evenly sampled from [-pi/2,pi/2] in order to approximate and speed up the kernel computation (default 10). + n_jobs (int): number of jobs to use for the computation. See :func:`pairwise_persistence_diagram_kernels` for details. """ self.bandwidth = bandwidth self.num_directions = num_directions + self.n_jobs = n_jobs def fit(self, X, y=None): """ @@ -122,7 +125,7 @@ class SlicedWassersteinKernel(BaseEstimator, TransformerMixin): Returns: numpy array of shape (number of diagrams in **diagrams**) x (number of diagrams in X): matrix of pairwise sliced Wasserstein kernel values. """ - return pairwise_persistence_diagram_kernels(X, self.diagrams_, kernel="sliced_wasserstein", bandwidth=self.bandwidth, num_directions=self.num_directions) + return pairwise_persistence_diagram_kernels(X, self.diagrams_, kernel="sliced_wasserstein", bandwidth=self.bandwidth, num_directions=self.num_directions, n_jobs=self.n_jobs) def __call__(self, diag1, diag2): """ @@ -141,7 +144,7 @@ class PersistenceWeightedGaussianKernel(BaseEstimator, TransformerMixin): """ This is a class for computing the persistence weighted Gaussian kernel matrix from a list of persistence diagrams. The persistence weighted Gaussian kernel is computed by convolving the persistence diagram points with weighted Gaussian kernels. See http://proceedings.mlr.press/v48/kusano16.html for more details. """ - def __init__(self, bandwidth=1., weight=lambda x: 1, kernel_approx=None): + def __init__(self, bandwidth=1., weight=lambda x: 1, kernel_approx=None, n_jobs=None): """ Constructor for the PersistenceWeightedGaussianKernel class. @@ -149,9 +152,11 @@ class PersistenceWeightedGaussianKernel(BaseEstimator, TransformerMixin): bandwidth (double): bandwidth of the Gaussian kernel with which persistence diagrams will be convolved (default 1.) weight (function): weight function for the persistence diagram points (default constant function, ie lambda x: 1). This function must be defined on 2D points, ie lists or numpy arrays of the form [p_x,p_y]. kernel_approx (class): kernel approximation class used to speed up computation (default None). Common kernel approximations classes can be found in the scikit-learn library (such as RBFSampler for instance). + n_jobs (int): number of jobs to use for the computation. See :func:`pairwise_persistence_diagram_kernels` for details. """ self.bandwidth, self.weight = bandwidth, weight self.kernel_approx = kernel_approx + self.n_jobs = n_jobs def fit(self, X, y=None): """ @@ -174,7 +179,7 @@ class PersistenceWeightedGaussianKernel(BaseEstimator, TransformerMixin): Returns: numpy array of shape (number of diagrams in **diagrams**) x (number of diagrams in X): matrix of pairwise persistence weighted Gaussian kernel values. """ - return pairwise_persistence_diagram_kernels(X, self.diagrams_, kernel="persistence_weighted_gaussian", bandwidth=self.bandwidth, weight=self.weight, kernel_approx=self.kernel_approx) + return pairwise_persistence_diagram_kernels(X, self.diagrams_, kernel="persistence_weighted_gaussian", bandwidth=self.bandwidth, weight=self.weight, kernel_approx=self.kernel_approx, n_jobs=self.n_jobs) def __call__(self, diag1, diag2): """ @@ -193,15 +198,17 @@ class PersistenceScaleSpaceKernel(BaseEstimator, TransformerMixin): """ This is a class for computing the persistence scale space kernel matrix from a list of persistence diagrams. The persistence scale space kernel is computed by adding the symmetric to the diagonal of each point in each persistence diagram, with negative weight, and then convolving the points with a Gaussian kernel. See https://www.cv-foundation.org/openaccess/content_cvpr_2015/papers/Reininghaus_A_Stable_Multi-Scale_2015_CVPR_paper.pdf for more details. """ - def __init__(self, bandwidth=1., kernel_approx=None): + def __init__(self, bandwidth=1., kernel_approx=None, n_jobs=None): """ Constructor for the PersistenceScaleSpaceKernel class. Parameters: bandwidth (double): bandwidth of the Gaussian kernel with which persistence diagrams will be convolved (default 1.) kernel_approx (class): kernel approximation class used to speed up computation (default None). Common kernel approximations classes can be found in the scikit-learn library (such as RBFSampler for instance). + n_jobs (int): number of jobs to use for the computation. See :func:`pairwise_persistence_diagram_kernels` for details. """ self.bandwidth, self.kernel_approx = bandwidth, kernel_approx + self.n_jobs = n_jobs def fit(self, X, y=None): """ @@ -224,7 +231,7 @@ class PersistenceScaleSpaceKernel(BaseEstimator, TransformerMixin): Returns: numpy array of shape (number of diagrams in **diagrams**) x (number of diagrams in X): matrix of pairwise persistence scale space kernel values. """ - return pairwise_persistence_diagram_kernels(X, self.diagrams_, kernel="persistence_scale_space", bandwidth=self.bandwidth, kernel_approx=self.kernel_approx) + return pairwise_persistence_diagram_kernels(X, self.diagrams_, kernel="persistence_scale_space", bandwidth=self.bandwidth, kernel_approx=self.kernel_approx, n_jobs=self.n_jobs) def __call__(self, diag1, diag2): """ @@ -243,7 +250,7 @@ class PersistenceFisherKernel(BaseEstimator, TransformerMixin): """ This is a class for computing the persistence Fisher kernel matrix from a list of persistence diagrams. The persistence Fisher kernel is computed by exponentiating the corresponding persistence Fisher distance with a Gaussian kernel. See papers.nips.cc/paper/8205-persistence-fisher-kernel-a-riemannian-manifold-kernel-for-persistence-diagrams for more details. """ - def __init__(self, bandwidth_fisher=1., bandwidth=1., kernel_approx=None): + def __init__(self, bandwidth_fisher=1., bandwidth=1., kernel_approx=None, n_jobs=None): """ Constructor for the PersistenceFisherKernel class. @@ -251,9 +258,11 @@ class PersistenceFisherKernel(BaseEstimator, TransformerMixin): bandwidth (double): bandwidth of the Gaussian kernel applied to the persistence Fisher distance (default 1.). bandwidth_fisher (double): bandwidth of the Gaussian kernel used to turn persistence diagrams into probability distributions by PersistenceFisherDistance class (default 1.). kernel_approx (class): kernel approximation class used to speed up computation (default None). Common kernel approximations classes can be found in the scikit-learn library (such as RBFSampler for instance). + n_jobs (int): number of jobs to use for the computation. See :func:`pairwise_persistence_diagram_kernels` for details. """ self.bandwidth = bandwidth self.bandwidth_fisher, self.kernel_approx = bandwidth_fisher, kernel_approx + self.n_jobs = n_jobs def fit(self, X, y=None): """ @@ -276,7 +285,7 @@ class PersistenceFisherKernel(BaseEstimator, TransformerMixin): Returns: numpy array of shape (number of diagrams in **diagrams**) x (number of diagrams in X): matrix of pairwise persistence Fisher kernel values. """ - return pairwise_persistence_diagram_kernels(X, self.diagrams_, kernel="persistence_fisher", bandwidth=self.bandwidth, bandwidth_fisher=self.bandwidth_fisher, kernel_approx=self.kernel_approx) + return pairwise_persistence_diagram_kernels(X, self.diagrams_, kernel="persistence_fisher", bandwidth=self.bandwidth, bandwidth_fisher=self.bandwidth_fisher, kernel_approx=self.kernel_approx, n_jobs=self.n_jobs) def __call__(self, diag1, diag2): """ -- cgit v1.2.3 From 69852030a6d1b68f3283b5727c6b944a9c7f5e73 Mon Sep 17 00:00:00 2001 From: Marc Glisse Date: Tue, 2 Jun 2020 21:07:29 +0200 Subject: Some test --- src/python/gudhi/representations/kernel_methods.py | 2 +- src/python/gudhi/representations/metrics.py | 2 +- src/python/test/test_representations.py | 33 +++++++++++++++++++++- 3 files changed, 34 insertions(+), 3 deletions(-) (limited to 'src/python/gudhi') diff --git a/src/python/gudhi/representations/kernel_methods.py b/src/python/gudhi/representations/kernel_methods.py index 6e4f0619..23fd23c7 100644 --- a/src/python/gudhi/representations/kernel_methods.py +++ b/src/python/gudhi/representations/kernel_methods.py @@ -75,7 +75,7 @@ def pairwise_persistence_diagram_kernels(X, Y=None, kernel="sliced_wasserstein", numpy array of shape (nxm): kernel matrix. """ XX = np.reshape(np.arange(len(X)), [-1,1]) - YY = None if Y is None else np.reshape(np.arange(len(Y)), [-1,1]) + YY = None if Y is None or Y is X else np.reshape(np.arange(len(Y)), [-1,1]) if kernel == "sliced_wasserstein": return np.exp(-pairwise_persistence_diagram_distances(X, Y, metric="sliced_wasserstein", num_directions=kwargs["num_directions"], n_jobs=n_jobs) / kwargs["bandwidth"]) elif kernel == "persistence_fisher": diff --git a/src/python/gudhi/representations/metrics.py b/src/python/gudhi/representations/metrics.py index 84907160..cf2e0879 100644 --- a/src/python/gudhi/representations/metrics.py +++ b/src/python/gudhi/representations/metrics.py @@ -164,7 +164,7 @@ def pairwise_persistence_diagram_distances(X, Y=None, metric="bottleneck", n_job numpy array of shape (nxm): distance matrix """ XX = np.reshape(np.arange(len(X)), [-1,1]) - YY = None if Y is None else np.reshape(np.arange(len(Y)), [-1,1]) + YY = None if Y is None or Y is X else np.reshape(np.arange(len(Y)), [-1,1]) if metric == "bottleneck": try: from .. import bottleneck_distance diff --git a/src/python/test/test_representations.py b/src/python/test/test_representations.py index dba7f952..589cee00 100755 --- a/src/python/test/test_representations.py +++ b/src/python/test/test_representations.py @@ -1,12 +1,43 @@ import os import sys import matplotlib.pyplot as plt +import numpy as np +import pytest + def test_representations_examples(): # Disable graphics for testing purposes - plt.show = lambda:None + plt.show = lambda: None here = os.path.dirname(os.path.realpath(__file__)) sys.path.append(here + "/../example") import diagram_vectorizations_distances_kernels return None + + +from gudhi.representations.metrics import * +from gudhi.representations.kernel_methods import * + + +def _n_diags(n): + l = [] + for _ in range(n): + a = np.random.rand(50, 2) + a[:, 1] += a[:, 0] # So that y >= x + l.append(a) + return l + + +def test_multiple(): + l1 = _n_diags(9) + l2 = _n_diags(11) + l1b = l1.copy() + d1 = pairwise_persistence_diagram_distances(l1, e=0.00001, n_jobs=4) + d2 = BottleneckDistance(epsilon=0.00001).fit_transform(l1) + d3 = pairwise_persistence_diagram_distances(l1, l1b, e=0.00001, n_jobs=4) + assert d1 == pytest.approx(d2) + assert d3 == pytest.approx(d2, abs=1e-5) # Because of 0 entries (on the diagonal) + d1 = pairwise_persistence_diagram_distances(l1, l2, metric="wasserstein", order=2, internal_p=2) + d2 = WassersteinDistance(order=2, internal_p=2, n_jobs=4).fit(l2).transform(l1) + print(d1.shape, d2.shape) + assert d1 == pytest.approx(d2, rel=.02) -- cgit v1.2.3 From 25176e3ba75005f36cfe3ebc15e1fcba8cd3227b Mon Sep 17 00:00:00 2001 From: Marc Glisse Date: Tue, 2 Jun 2020 21:58:48 +0200 Subject: Support e=None for bottleneck_distance --- src/python/gudhi/bottleneck.cc | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) (limited to 'src/python/gudhi') diff --git a/src/python/gudhi/bottleneck.cc b/src/python/gudhi/bottleneck.cc index 732cb9a8..9337ce59 100644 --- a/src/python/gudhi/bottleneck.cc +++ b/src/python/gudhi/bottleneck.cc @@ -12,22 +12,26 @@ #include -double bottleneck(Dgm d1, Dgm d2, double epsilon) +// For compatibility with older versions, we want to support e=None. +// In C++17, the recommended way is std::optional. +double bottleneck(Dgm d1, Dgm d2, py::object epsilon) { + double e = (std::numeric_limits::min)(); + if (!epsilon.is_none()) e = epsilon.cast(); // I *think* the call to request() has to be before releasing the GIL. auto diag1 = numpy_to_range_of_pairs(d1); auto diag2 = numpy_to_range_of_pairs(d2); py::gil_scoped_release release; - return Gudhi::persistence_diagram::bottleneck_distance(diag1, diag2, epsilon); + return Gudhi::persistence_diagram::bottleneck_distance(diag1, diag2, e); } PYBIND11_MODULE(bottleneck, m) { m.attr("__license__") = "GPL v3"; m.def("bottleneck_distance", &bottleneck, py::arg("diagram_1"), py::arg("diagram_2"), - py::arg("e") = (std::numeric_limits::min)(), + py::arg("e") = py::none(), R"pbdoc( This function returns the point corresponding to a given vertex. -- cgit v1.2.3 From 18619efce47ef7fc44ba97e9b37b7f6162f5fe1b Mon Sep 17 00:00:00 2001 From: Marc Glisse Date: Thu, 4 Jun 2020 16:41:53 +0200 Subject: Workaround for ssize_t on windows --- src/python/gudhi/hera/bottleneck.cc | 8 ++++++++ 1 file changed, 8 insertions(+) (limited to 'src/python/gudhi') diff --git a/src/python/gudhi/hera/bottleneck.cc b/src/python/gudhi/hera/bottleneck.cc index e00b4682..f543613e 100644 --- a/src/python/gudhi/hera/bottleneck.cc +++ b/src/python/gudhi/hera/bottleneck.cc @@ -8,6 +8,14 @@ * - YYYY/MM Author: Description of the modification */ +// https://github.com/grey-narn/hera/issues/3 +// ssize_t is a non-standard type (well, posix) +// BaseTsd.h provides SSIZE_T on windows, this one should be the same there. +#ifdef _MSC_VER +#include +typedef std::ptrdiff_t ssize_t; +#endif + #include // Hera #include -- cgit v1.2.3 From bea81f2d7bc53876a6f071c919663261314965ab Mon Sep 17 00:00:00 2001 From: Marc Glisse Date: Thu, 4 Jun 2020 17:21:05 +0200 Subject: Use ssize_t from pybind11 --- src/python/gudhi/hera/bottleneck.cc | 10 ++++------ src/python/include/pybind11_diagram_utils.h | 8 ++++---- 2 files changed, 8 insertions(+), 10 deletions(-) (limited to 'src/python/gudhi') diff --git a/src/python/gudhi/hera/bottleneck.cc b/src/python/gudhi/hera/bottleneck.cc index f543613e..846a3525 100644 --- a/src/python/gudhi/hera/bottleneck.cc +++ b/src/python/gudhi/hera/bottleneck.cc @@ -8,18 +8,16 @@ * - YYYY/MM Author: Description of the modification */ +#include + +#ifdef _MSC_VER // https://github.com/grey-narn/hera/issues/3 // ssize_t is a non-standard type (well, posix) -// BaseTsd.h provides SSIZE_T on windows, this one should be the same there. -#ifdef _MSC_VER -#include -typedef std::ptrdiff_t ssize_t; +using py::ssize_t; #endif #include // Hera -#include - double bottleneck_distance(Dgm d1, Dgm d2, double delta) { // I *think* the call to request() has to be before releasing the GIL. diff --git a/src/python/include/pybind11_diagram_utils.h b/src/python/include/pybind11_diagram_utils.h index d9627258..2d5194f4 100644 --- a/src/python/include/pybind11_diagram_utils.h +++ b/src/python/include/pybind11_diagram_utils.h @@ -18,8 +18,8 @@ namespace py = pybind11; typedef py::array_t Dgm; // Get m[i,0] and m[i,1] as a pair -static auto pairify(void* p, ssize_t h, ssize_t w) { - return [=](ssize_t i){ +static auto pairify(void* p, py::ssize_t h, py::ssize_t w) { + return [=](py::ssize_t i){ char* birth = (char*)p + i * h; char* death = birth + w; return std::make_pair(*(double*)birth, *(double*)death); @@ -32,8 +32,8 @@ inline auto numpy_to_range_of_pairs(py::array_t dgm) { if((buf.ndim!=2 || buf.shape[1]!=2) && (buf.ndim!=1 || buf.shape[0]!=0)) throw std::runtime_error("Diagram must be an array of size n x 2"); // In the case of shape (0), avoid reading non-existing strides[1] even if we won't use it. - ssize_t stride1 = buf.ndim == 2 ? buf.strides[1] : 0; - auto cnt = boost::counting_range(0, buf.shape[0]); + py::ssize_t stride1 = buf.ndim == 2 ? buf.strides[1] : 0; + auto cnt = boost::counting_range(0, buf.shape[0]); return boost::adaptors::transform(cnt, pairify(buf.ptr, buf.strides[0], stride1)); // Be careful that the returned range cannot contain references to dead temporaries. } -- cgit v1.2.3 From a4598f043824db493369d3e78048139988bde9aa Mon Sep 17 00:00:00 2001 From: Vincent Rouvreau <10407034+VincentRouvreau@users.noreply.github.com> Date: Fri, 5 Jun 2020 17:59:37 +0200 Subject: Update src/python/gudhi/alpha_complex.pyx Co-authored-by: Marc Glisse --- src/python/gudhi/alpha_complex.pyx | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'src/python/gudhi') diff --git a/src/python/gudhi/alpha_complex.pyx b/src/python/gudhi/alpha_complex.pyx index 5bc9ebc4..80e54da0 100644 --- a/src/python/gudhi/alpha_complex.pyx +++ b/src/python/gudhi/alpha_complex.pyx @@ -75,7 +75,7 @@ cdef class AlphaComplex: # The real cython constructor def __cinit__(self, points = None, off_file = '', complexity = 'safe'): - assert complexity == 'fast' or complexity == 'safe' or complexity == 'exact', "Alpha complex complexity can be 'fast', 'safe' or 'exact'" + assert complexity in ['fast', 'safe', 'exact'], "Alpha complex complexity can only be 'fast', 'safe' or 'exact'" self.fast = complexity == 'fast' self.exact = complexity == 'safe' -- cgit v1.2.3 From 58cb456a33b4eec8ecc926457668d7d77d6662c3 Mon Sep 17 00:00:00 2001 From: ROUVREAU Vincent Date: Fri, 5 Jun 2020 18:23:02 +0200 Subject: Code review: 'fast' class attribute was not necessary and redundant with C++ class attribute --- src/python/gudhi/alpha_complex.pyx | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) (limited to 'src/python/gudhi') diff --git a/src/python/gudhi/alpha_complex.pyx b/src/python/gudhi/alpha_complex.pyx index 80e54da0..3855f1ac 100644 --- a/src/python/gudhi/alpha_complex.pyx +++ b/src/python/gudhi/alpha_complex.pyx @@ -54,7 +54,6 @@ cdef class AlphaComplex: """ cdef Alpha_complex_interface * this_ptr - cdef bool fast cdef bool exact # Fake constructor that does nothing but documenting the constructor @@ -76,13 +75,13 @@ cdef class AlphaComplex: # The real cython constructor def __cinit__(self, points = None, off_file = '', complexity = 'safe'): assert complexity in ['fast', 'safe', 'exact'], "Alpha complex complexity can only be 'fast', 'safe' or 'exact'" - self.fast = complexity == 'fast' + cdef bool fast = complexity == 'fast' self.exact = complexity == 'safe' cdef vector[vector[double]] pts if off_file: if os.path.isfile(off_file): - self.this_ptr = new Alpha_complex_interface(off_file.encode('utf-8'), self.fast, True) + self.this_ptr = new Alpha_complex_interface(off_file.encode('utf-8'), fast, True) else: print("file " + off_file + " not found.") else: @@ -91,7 +90,7 @@ cdef class AlphaComplex: points=[] pts = points with nogil: - self.this_ptr = new Alpha_complex_interface(pts, self.fast) + self.this_ptr = new Alpha_complex_interface(pts, fast) def __dealloc__(self): if self.this_ptr != NULL: -- cgit v1.2.3 From 4f02ba9c5a3233ff9d4554578fbe3ae456b9711f Mon Sep 17 00:00:00 2001 From: ROUVREAU Vincent Date: Fri, 5 Jun 2020 18:44:17 +0200 Subject: code review: rename complexity with precision --- src/python/doc/alpha_complex_user.rst | 8 ++++---- src/python/gudhi/alpha_complex.pyx | 14 +++++++------- src/python/test/test_alpha_complex.py | 22 +++++++++++----------- 3 files changed, 22 insertions(+), 22 deletions(-) (limited to 'src/python/gudhi') diff --git a/src/python/doc/alpha_complex_user.rst b/src/python/doc/alpha_complex_user.rst index bc9fe513..e8b4f25e 100644 --- a/src/python/doc/alpha_complex_user.rst +++ b/src/python/doc/alpha_complex_user.rst @@ -23,14 +23,14 @@ Remarks equivalent to the `Čech complex `_ and much smaller if you do not bound the radii. `Čech complex `_ can still make sense in higher dimension precisely because you can bound the radii. -* Using the default :code:`complexity = 'safe'` makes the construction safe. - If you pass :code:`complexity = 'exact'` to :func:`~gudhi.AlphaComplex.__init__`, the filtration values are the exact +* Using the default :code:`precision = 'safe'` makes the construction safe. + If you pass :code:`precision = 'exact'` to :func:`~gudhi.AlphaComplex.__init__`, the filtration values are the exact ones converted to float. This can be very slow. - If you pass :code:`complexity = 'safe'` (the default), the filtration values are only + If you pass :code:`precision = 'safe'` (the default), the filtration values are only guaranteed to have a small multiplicative error compared to the exact value. A drawback, when computing persistence, is that an empty exact interval [10^12,10^12] may become a non-empty approximate interval [10^12,10^12+10^6]. - Using :code:`complexity = 'fast'` makes the computations slightly faster, and the combinatorics are still exact, but + Using :code:`precision = 'fast'` makes the computations slightly faster, and the combinatorics are still exact, but the computation of filtration values can exceptionally be arbitrarily bad. In all cases, we still guarantee that the output is a valid filtration (faces have a filtration value no larger than their cofaces). * For performances reasons, it is advised to use Alpha_complex with `CGAL `_ :math:`\geq` 5.0.0. diff --git a/src/python/gudhi/alpha_complex.pyx b/src/python/gudhi/alpha_complex.pyx index 3855f1ac..d9c2be81 100644 --- a/src/python/gudhi/alpha_complex.pyx +++ b/src/python/gudhi/alpha_complex.pyx @@ -57,7 +57,7 @@ cdef class AlphaComplex: cdef bool exact # Fake constructor that does nothing but documenting the constructor - def __init__(self, points=None, off_file='', complexity='safe'): + def __init__(self, points=None, off_file='', precision='safe'): """AlphaComplex constructor. :param points: A list of points in d-Dimension. @@ -68,15 +68,15 @@ cdef class AlphaComplex: :param off_file: An OFF file style name. :type off_file: string - :param complexity: Alpha complex complexity can be 'fast', 'safe' or 'exact'. Default is 'safe'. - :type complexity: string + :param precision: Alpha complex precision can be 'fast', 'safe' or 'exact'. Default is 'safe'. + :type precision: string """ # The real cython constructor - def __cinit__(self, points = None, off_file = '', complexity = 'safe'): - assert complexity in ['fast', 'safe', 'exact'], "Alpha complex complexity can only be 'fast', 'safe' or 'exact'" - cdef bool fast = complexity == 'fast' - self.exact = complexity == 'safe' + def __cinit__(self, points = None, off_file = '', precision = 'safe'): + assert precision in ['fast', 'safe', 'exact'], "Alpha complex precision can only be 'fast', 'safe' or 'exact'" + cdef bool fast = precision == 'fast' + self.exact = precision == 'safe' cdef vector[vector[double]] pts if off_file: diff --git a/src/python/test/test_alpha_complex.py b/src/python/test/test_alpha_complex.py index 913397dd..943ad2c4 100755 --- a/src/python/test/test_alpha_complex.py +++ b/src/python/test/test_alpha_complex.py @@ -24,8 +24,8 @@ __copyright__ = "Copyright (C) 2016 Inria" __license__ = "MIT" -def _empty_alpha(complexity): - alpha_complex = AlphaComplex(points=[[0, 0]], complexity = complexity) +def _empty_alpha(precision): + alpha_complex = AlphaComplex(points=[[0, 0]], precision = precision) assert alpha_complex.__is_defined() == True def test_empty_alpha(): @@ -33,9 +33,9 @@ def test_empty_alpha(): _empty_alpha('safe') _empty_alpha('exact') -def _infinite_alpha(complexity): +def _infinite_alpha(precision): point_list = [[0, 0], [1, 0], [0, 1], [1, 1]] - alpha_complex = AlphaComplex(points=point_list, complexity = complexity) + alpha_complex = AlphaComplex(points=point_list, precision = precision) assert alpha_complex.__is_defined() == True simplex_tree = alpha_complex.create_simplex_tree() @@ -88,9 +88,9 @@ def test_infinite_alpha(): _infinite_alpha('safe') _infinite_alpha('exact') -def _filtered_alpha(complexity): +def _filtered_alpha(precision): point_list = [[0, 0], [1, 0], [0, 1], [1, 1]] - filtered_alpha = AlphaComplex(points=point_list, complexity = complexity) + filtered_alpha = AlphaComplex(points=point_list, precision = precision) simplex_tree = filtered_alpha.create_simplex_tree(max_alpha_square=0.25) @@ -132,7 +132,7 @@ def test_filtered_alpha(): _filtered_alpha('safe') _filtered_alpha('exact') -def _safe_alpha_persistence_comparison(complexity): +def _safe_alpha_persistence_comparison(precision): #generate periodic signal time = np.arange(0, 10, 1) signal = [math.sin(x) for x in time] @@ -144,10 +144,10 @@ def _safe_alpha_persistence_comparison(complexity): embedding2 = [[signal[i], delayed[i]] for i in range(len(time))] #build alpha complex and simplex tree - alpha_complex1 = AlphaComplex(points=embedding1, complexity = complexity) + alpha_complex1 = AlphaComplex(points=embedding1, precision = precision) simplex_tree1 = alpha_complex1.create_simplex_tree() - alpha_complex2 = AlphaComplex(points=embedding2, complexity = complexity) + alpha_complex2 = AlphaComplex(points=embedding2, precision = precision) simplex_tree2 = alpha_complex2.create_simplex_tree() diag1 = simplex_tree1.persistence() @@ -163,9 +163,9 @@ def test_safe_alpha_persistence_comparison(): _safe_alpha_persistence_comparison('safe') _safe_alpha_persistence_comparison('exact') -def _delaunay_complex(complexity): +def _delaunay_complex(precision): point_list = [[0, 0], [1, 0], [0, 1], [1, 1]] - filtered_alpha = AlphaComplex(points=point_list, complexity = complexity) + filtered_alpha = AlphaComplex(points=point_list, precision = precision) simplex_tree = filtered_alpha.create_simplex_tree(default_filtration_value = True) -- cgit v1.2.3 From 4a437177cb2b10b6462380c39739a923c08dc121 Mon Sep 17 00:00:00 2001 From: Marc Glisse Date: Fri, 5 Jun 2020 19:33:53 +0200 Subject: Doc tweak by Vincent --- src/python/gudhi/bottleneck.cc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'src/python/gudhi') diff --git a/src/python/gudhi/bottleneck.cc b/src/python/gudhi/bottleneck.cc index 9337ce59..838bf9eb 100644 --- a/src/python/gudhi/bottleneck.cc +++ b/src/python/gudhi/bottleneck.cc @@ -46,7 +46,7 @@ PYBIND11_MODULE(bottleneck, m) { bits of the mantissa may be wrong). This version of the algorithm takes advantage of the limited precision of `double` and is usually a lot faster to compute, whatever the value of `e`. - Thus, by default, `e` is the smallest positive double. + Thus, by default (`e=None`), `e` is the smallest positive double. :type e: float :rtype: float :returns: the bottleneck distance. -- cgit v1.2.3 From 82dc35dd749a1f388be268d7a7e3bd22f18afcf7 Mon Sep 17 00:00:00 2001 From: Marc Glisse Date: Fri, 5 Jun 2020 20:16:38 +0200 Subject: Doc changes after Vincent's review --- src/python/doc/bottleneck_distance_user.rst | 5 +++-- src/python/gudhi/bottleneck.cc | 3 ++- src/python/gudhi/hera/bottleneck.cc | 3 +++ 3 files changed, 8 insertions(+), 3 deletions(-) (limited to 'src/python/gudhi') diff --git a/src/python/doc/bottleneck_distance_user.rst b/src/python/doc/bottleneck_distance_user.rst index 49bd3706..6c6e08d9 100644 --- a/src/python/doc/bottleneck_distance_user.rst +++ b/src/python/doc/bottleneck_distance_user.rst @@ -10,7 +10,7 @@ Definition .. include:: bottleneck_distance_sum.inc This implementation by François Godi is based on ideas from "Geometry Helps in Bottleneck Matching and Related Problems" -:cite:`DBLP:journals/algorithmica/EfratIK01` and requires `CGAL `_. +:cite:`DBLP:journals/algorithmica/EfratIK01` and requires `CGAL `_ (`GPL v3 `_). .. autofunction:: gudhi.bottleneck_distance @@ -20,7 +20,8 @@ based on "Geometry Helps to Compare Persistence Diagrams" :cite:`Kerber:2017:GHC:3047249.3064175` by Michael Kerber, Dmitriy Morozov, and Arnur Nigmetov. -Beware that its approximation allows for a multiplicative error, while the function above uses an additive error. +.. warning:: + Beware that its approximation allows for a multiplicative error, while the function above uses an additive error. .. autofunction:: gudhi.hera.bottleneck_distance diff --git a/src/python/gudhi/bottleneck.cc b/src/python/gudhi/bottleneck.cc index 732cb9a8..59be6088 100644 --- a/src/python/gudhi/bottleneck.cc +++ b/src/python/gudhi/bottleneck.cc @@ -29,7 +29,8 @@ PYBIND11_MODULE(bottleneck, m) { py::arg("diagram_1"), py::arg("diagram_2"), py::arg("e") = (std::numeric_limits::min)(), R"pbdoc( - This function returns the point corresponding to a given vertex. + Compute the Bottleneck distance between two diagrams. + Points at infinity and on the diagonal are supported. :param diagram_1: The first diagram. :type diagram_1: numpy array of shape (m,2) diff --git a/src/python/gudhi/hera/bottleneck.cc b/src/python/gudhi/hera/bottleneck.cc index 846a3525..0cb562ce 100644 --- a/src/python/gudhi/hera/bottleneck.cc +++ b/src/python/gudhi/hera/bottleneck.cc @@ -40,6 +40,9 @@ PYBIND11_MODULE(bottleneck, m) { Compute the Bottleneck distance between two diagrams. Points at infinity are supported. + .. note:: + Points on the diagonal are not supported and must be filtered out before calling this function. + Parameters: X (n x 2 numpy array): First diagram Y (n x 2 numpy array): Second diagram -- cgit v1.2.3 From 19dc69e6edaac076da5138fe091be367a3652057 Mon Sep 17 00:00:00 2001 From: ROUVREAU Vincent Date: Fri, 5 Jun 2020 22:28:03 +0200 Subject: code review: exact version was not correct --- src/python/gudhi/alpha_complex.pyx | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'src/python/gudhi') diff --git a/src/python/gudhi/alpha_complex.pyx b/src/python/gudhi/alpha_complex.pyx index d9c2be81..a356384d 100644 --- a/src/python/gudhi/alpha_complex.pyx +++ b/src/python/gudhi/alpha_complex.pyx @@ -76,7 +76,7 @@ cdef class AlphaComplex: def __cinit__(self, points = None, off_file = '', precision = 'safe'): assert precision in ['fast', 'safe', 'exact'], "Alpha complex precision can only be 'fast', 'safe' or 'exact'" cdef bool fast = precision == 'fast' - self.exact = precision == 'safe' + self.exact = precision == 'exact' cdef vector[vector[double]] pts if off_file: -- cgit v1.2.3 From dacc3e363ded8b68bb4b71c1602e2c52b10b36e5 Mon Sep 17 00:00:00 2001 From: Marc Glisse Date: Fri, 5 Jun 2020 22:29:32 +0200 Subject: author, etc --- src/python/gudhi/hera/__init__.py | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'src/python/gudhi') diff --git a/src/python/gudhi/hera/__init__.py b/src/python/gudhi/hera/__init__.py index 044f81cd..f70b92b9 100644 --- a/src/python/gudhi/hera/__init__.py +++ b/src/python/gudhi/hera/__init__.py @@ -1,2 +1,7 @@ from .wasserstein import wasserstein_distance from .bottleneck import bottleneck_distance + + +__author__ = "Marc Glisse" +__copyright__ = "Copyright (C) 2020 Inria" +__license__ = "MIT" -- cgit v1.2.3