From 303b014508f849d8cb8a4369430068f54fa74c46 Mon Sep 17 00:00:00 2001 From: Hind Montassif Date: Mon, 10 May 2021 10:27:57 +0200 Subject: Add __init__.py files at every module level Standardize functions to match the existing ones in sphere --- src/python/gudhi/datasets/generators/points.py | 42 ++++++++++++++++++++++++++ 1 file changed, 42 insertions(+) create mode 100644 src/python/gudhi/datasets/generators/points.py (limited to 'src/python/gudhi/datasets/generators/points.py') diff --git a/src/python/gudhi/datasets/generators/points.py b/src/python/gudhi/datasets/generators/points.py new file mode 100644 index 00000000..d5a370ad --- /dev/null +++ b/src/python/gudhi/datasets/generators/points.py @@ -0,0 +1,42 @@ +# This file is part of the Gudhi Library - https://gudhi.inria.fr/ - which is released under MIT. +# See file LICENSE or go to https://gudhi.inria.fr/licensing/ for full license details. +# Author(s): Hind Montassif +# +# Copyright (C) 2021 Inria +# +# Modification(s): +# - YYYY/MM Author: Description of the modification + +import numpy as np +import itertools + +def _generate_random_points(n_samples, dim): + + # Generate random angles of size n_samples*dim + alpha = 2*np.pi*np.random.rand(n_samples*dim) + + # Based on angles, construct points of size n_samples*dim on a circle and reshape the result in a n_samples*2*dim array + array_points = np.column_stack([np.cos(alpha), np.sin(alpha)]).reshape(-1, 2*dim) + + return array_points + +def _generate_grid_points(n_samples, dim): + + n_samples_grid = int(n_samples**(1./dim)) + alpha = np.linspace(0, 2*np.pi, n_samples_grid, endpoint=False) + + array_points_inter = np.column_stack([np.cos(alpha), np.sin(alpha)]) + array_points = np.array(list(itertools.product(array_points_inter, repeat=dim))).reshape(-1, 2*dim) + + return array_points + +def torus(n_samples, dim, sample='random'): + if sample == 'random': + print("Sample is random") + return _generate_random_points(n_samples, dim) + elif sample == 'grid': + print("Sample is grid") + return _generate_grid_points(n_samples, dim) + else: + raise Exception("Sample type '{}' is not supported".format(sample)) + return -- cgit v1.2.3 From 0b238a336f15128d777252cd084ee996491e6882 Mon Sep 17 00:00:00 2001 From: Hind-M Date: Thu, 27 May 2021 09:56:02 +0200 Subject: Add documentation to python torus fonction and apply some modifications according to PR comments --- src/python/gudhi/datasets/generators/_points.cc | 2 +- src/python/gudhi/datasets/generators/points.py | 24 +++++++++++++++++++----- 2 files changed, 20 insertions(+), 6 deletions(-) (limited to 'src/python/gudhi/datasets/generators/points.py') diff --git a/src/python/gudhi/datasets/generators/_points.cc b/src/python/gudhi/datasets/generators/_points.cc index 561fd6d8..003b65a3 100644 --- a/src/python/gudhi/datasets/generators/_points.cc +++ b/src/python/gudhi/datasets/generators/_points.cc @@ -56,7 +56,7 @@ PYBIND11_MODULE(_points, m) { :param n_samples: The number of points to be generated. :type n_samples: integer - :param dim: The dimension. + :param dim: The dimension of the torus on which points would be generated in R^2*dim. :type dim: integer :param uniform: A flag to define if the points generation is uniform (i.e generated as a grid). :type uniform: bool diff --git a/src/python/gudhi/datasets/generators/points.py b/src/python/gudhi/datasets/generators/points.py index d5a370ad..a8f5ad54 100644 --- a/src/python/gudhi/datasets/generators/points.py +++ b/src/python/gudhi/datasets/generators/points.py @@ -10,7 +10,7 @@ import numpy as np import itertools -def _generate_random_points(n_samples, dim): +def _generate_random_points_on_torus(n_samples, dim): # Generate random angles of size n_samples*dim alpha = 2*np.pi*np.random.rand(n_samples*dim) @@ -20,8 +20,9 @@ def _generate_random_points(n_samples, dim): return array_points -def _generate_grid_points(n_samples, dim): +def _generate_grid_points_on_torus(n_samples, dim): + # Generate points on a dim-torus as a grid n_samples_grid = int(n_samples**(1./dim)) alpha = np.linspace(0, 2*np.pi, n_samples_grid, endpoint=False) @@ -31,12 +32,25 @@ def _generate_grid_points(n_samples, dim): return array_points def torus(n_samples, dim, sample='random'): + ''' + Generate points on a dim-torus in R^2dim either randomly or on a grid + + :param n_samples: The number of points to be generated. + :param dim: The dimension of the torus on which points would be generated in R^2*dim. + :param sample: The sample type of the generated points. Can be 'random' or 'grid'. + :returns: numpy array containing the generated points on a torus. + The shape of returned numpy array is : + if sample is 'random' : (n_samples, 2*dim) + if sample is 'grid' : ((int(n_samples**(1./dim)))**dim, 2*dim) + ''' if sample == 'random': + # Generate points randomly print("Sample is random") - return _generate_random_points(n_samples, dim) + return _generate_random_points_on_torus(n_samples, dim) elif sample == 'grid': + # Generate points on a grid print("Sample is grid") - return _generate_grid_points(n_samples, dim) + return _generate_grid_points_on_torus(n_samples, dim) else: - raise Exception("Sample type '{}' is not supported".format(sample)) + raise ValueError("Sample type '{}' is not supported".format(sample)) return -- cgit v1.2.3 From 09214d0ad3abd0c81b3a2c8051bf8b370350d6e5 Mon Sep 17 00:00:00 2001 From: Hind-M Date: Tue, 1 Jun 2021 11:20:26 +0200 Subject: Add datasets generators documentation --- src/python/doc/datasets_generators.inc | 14 ++++ src/python/doc/datasets_generators.rst | 97 ++++++++++++++++++++++++++ src/python/doc/examples.rst | 1 + src/python/doc/index.rst | 5 ++ src/python/gudhi/datasets/generators/points.py | 10 +-- 5 files changed, 122 insertions(+), 5 deletions(-) create mode 100644 src/python/doc/datasets_generators.inc create mode 100644 src/python/doc/datasets_generators.rst (limited to 'src/python/gudhi/datasets/generators/points.py') diff --git a/src/python/doc/datasets_generators.inc b/src/python/doc/datasets_generators.inc new file mode 100644 index 00000000..c88115c3 --- /dev/null +++ b/src/python/doc/datasets_generators.inc @@ -0,0 +1,14 @@ +.. table:: + :widths: 30 40 30 + + +-----------------------------------+--------------------------------------------+--------------------------------------------------------------------------------------+ + | | :math:`(x_1, x_2, \ldots, x_d)` | Datasets generators (points). | :Authors: Hind Montassif | + | | | | + | | | :Since: GUDHI 3.5.0 | + | | | | + | | | :License: MIT (`LGPL v3 `_) | + | | | | + | | | :Requires: `CGAL `_ | + +-----------------------------------+--------------------------------------------+--------------------------------------------------------------------------------------+ + | * :doc:`datasets_generators` | + +-----------------------------------+-----------------------------------------------------------------------------------------------------------------------------------+ diff --git a/src/python/doc/datasets_generators.rst b/src/python/doc/datasets_generators.rst new file mode 100644 index 00000000..ef21c9d2 --- /dev/null +++ b/src/python/doc/datasets_generators.rst @@ -0,0 +1,97 @@ + +:orphan: + +.. To get rid of WARNING: document isn't included in any toctree + +=========================== +Datasets generators manual +=========================== + +We provide the generation of different customizable datasets to use as inputs for Gudhi complexes and data structures. + + +Points generators +------------------ + +Points on sphere +^^^^^^^^^^^^^^^^ + +The module **_points** enables the generation of random i.i.d. points uniformly on a (d-1)-sphere in :math:`R^d`. +The user should provide the number of points to be generated on the sphere :code:`n_samples` and the ambient dimension :code:`ambient_dim`. +The :code:`radius` of sphere is optional and is equal to **1** by default. +Only random points generation is currently available. + +The generated points are given as an array of shape :math:`(n\_samples, ambient\_dim)`. + +Example +""""""" + +.. code-block:: python + + from gudhi.datasets.generators import _points + from gudhi import AlphaComplex + + # Generate 50 points on a sphere in R^2 + gen_points = _points.sphere(n_samples = 50, ambient_dim = 2, radius = 1, sample = "random") + + # Create an alpha complex from the generated points + alpha_complex = AlphaComplex(points = gen_points) + +.. autofunction:: gudhi.datasets.generators._points.sphere + +Points on torus +^^^^^^^^^^^^^^^^ + +You can also generate points on a torus. + +Two modules are available and give the same output: the first one depends on **CGAL** and the second does not and consists of full python code. + +On another hand, two sample types are provided : you can either generate i.i.d. points on a d-torus in :math:`R^{2d}` *randomly* or on a *grid*. + +First module : **_points** +"""""""""""""""""""""""""" + +The user should provide the number of points to be generated on the torus :code:`n_samples`, and the dimension :code:`dim` of the torus on which points would be generated in :math:`R^{2dim}`. +The flag :code:`uniform` is optional and is set to **False** by default, meaning that the points will be generated randomly. +In this case, the returned generated points would be an array of shape :math:`(n\_samples, 2*dim)`. +Otherwise, if set to **True**, the points are generated as a grid and would be given as an array of shape : + +.. math:: + + ( [n\_samples^{1 \over {dim}}]^{dim}, 2*dim ) + +Example +""""""" +.. code-block:: python + + from gudhi.datasets.generators import _points + + # Generate 50 points randomly on a torus in R^6 + gen_points = _points.torus(n_samples = 50, dim = 3) + + # Generate 27 points on a torus as a grid in R^6 + gen_points = _points.torus(n_samples = 50, dim = 3, uniform = True) + +.. autofunction:: gudhi.datasets.generators._points.torus + +Second module : **points** +"""""""""""""""""""""""""" + +The user should provide the number of points to be generated on the torus :code:`n_samples` and the dimension :code:`dim` of the torus on which points would be generated in :math:`R^{2dim}`. +The :code:`sample` argument is optional and is set to **'random'** by default. +The other allowed value of sample type is **'grid'** and is equivalent to :code:`uniform = True` in the first module. + +Example +""""""" +.. code-block:: python + + from gudhi.datasets.generators import points + + # Generate 50 points randomly on a torus in R^6 + gen_points = points.torus(n_samples = 50, dim = 3) + + # Generate 27 points on a torus as a grid in R^6 + gen_points = points.torus(n_samples = 50, dim = 3, sample = 'grid') + + +.. autofunction:: gudhi.datasets.generators.points.torus diff --git a/src/python/doc/examples.rst b/src/python/doc/examples.rst index 76e5d4c7..1442f185 100644 --- a/src/python/doc/examples.rst +++ b/src/python/doc/examples.rst @@ -8,6 +8,7 @@ Examples .. only:: builder_html * :download:`alpha_complex_diagram_persistence_from_off_file_example.py <../example/alpha_complex_diagram_persistence_from_off_file_example.py>` + * :download:`alpha_complex_from_generated_points_on_sphere_example.py <../example/alpha_complex_from_generated_points_on_sphere_example.py>` * :download:`alpha_complex_from_points_example.py <../example/alpha_complex_from_points_example.py>` * :download:`alpha_rips_persistence_bottleneck_distance.py <../example/alpha_rips_persistence_bottleneck_distance.py>` * :download:`bottleneck_basic_example.py <../example/bottleneck_basic_example.py>` diff --git a/src/python/doc/index.rst b/src/python/doc/index.rst index 040e57a4..2d7921ae 100644 --- a/src/python/doc/index.rst +++ b/src/python/doc/index.rst @@ -91,3 +91,8 @@ Clustering ********** .. include:: clustering.inc + +Datasets generators +******************* + +.. include:: datasets_generators.inc diff --git a/src/python/gudhi/datasets/generators/points.py b/src/python/gudhi/datasets/generators/points.py index a8f5ad54..3870dea6 100644 --- a/src/python/gudhi/datasets/generators/points.py +++ b/src/python/gudhi/datasets/generators/points.py @@ -32,17 +32,17 @@ def _generate_grid_points_on_torus(n_samples, dim): return array_points def torus(n_samples, dim, sample='random'): - ''' + """ Generate points on a dim-torus in R^2dim either randomly or on a grid :param n_samples: The number of points to be generated. :param dim: The dimension of the torus on which points would be generated in R^2*dim. :param sample: The sample type of the generated points. Can be 'random' or 'grid'. :returns: numpy array containing the generated points on a torus. - The shape of returned numpy array is : - if sample is 'random' : (n_samples, 2*dim) - if sample is 'grid' : ((int(n_samples**(1./dim)))**dim, 2*dim) - ''' + The shape of returned numpy array is : + if sample is 'random' : (n_samples, 2*dim). + if sample is 'grid' : ([n_samples**(1./dim)]**dim, 2*dim). + """ if sample == 'random': # Generate points randomly print("Sample is random") -- cgit v1.2.3 From 575beed582f9288d83a403f4f578731f172f7f5f Mon Sep 17 00:00:00 2001 From: Hind-M Date: Wed, 11 Aug 2021 14:35:25 +0200 Subject: Add test for sphere and torus Fix numerical approximations inconsistencies with dim fraction exponent when generating points as grid on torus Add notes in doc regarding the torus versions use cases --- src/common/include/gudhi/random_point_generators.h | 2 +- src/python/CMakeLists.txt | 3 ++ src/python/doc/datasets_generators.rst | 5 +++ src/python/gudhi/datasets/generators/_points.cc | 4 +++ src/python/gudhi/datasets/generators/points.py | 5 +-- src/python/test/test_datasets_generators.py | 40 ++++++++++++++++++++++ 6 files changed, 54 insertions(+), 5 deletions(-) create mode 100755 src/python/test/test_datasets_generators.py (limited to 'src/python/gudhi/datasets/generators/points.py') diff --git a/src/common/include/gudhi/random_point_generators.h b/src/common/include/gudhi/random_point_generators.h index 07e4f3da..25a7392d 100644 --- a/src/common/include/gudhi/random_point_generators.h +++ b/src/common/include/gudhi/random_point_generators.h @@ -227,7 +227,7 @@ std::vector generate_points_on_torus_d(std::size_t num std::vector points; points.reserve(num_points); if (sample == "grid") { - std::size_t num_slices = (std::size_t)std::pow(num_points, 1. / dim); + std::size_t num_slices = (std::size_t)std::pow(num_points + .5, 1. / dim); // add .5 to avoid rounding down with numerical approximations generate_grid_points_on_torus_d( k, dim, num_slices, std::back_inserter(points), radius_noise_percentage); } else { diff --git a/src/python/CMakeLists.txt b/src/python/CMakeLists.txt index 8c46004a..f30dfe6d 100644 --- a/src/python/CMakeLists.txt +++ b/src/python/CMakeLists.txt @@ -443,6 +443,9 @@ if(PYTHONINTERP_FOUND) # Euclidean witness add_gudhi_py_test(test_euclidean_witness_complex) + # Datasets generators + add_gudhi_py_test(test_datasets_generators) # TODO separate full python datasets generators in another test file independant from CGAL ? + endif (NOT CGAL_WITH_EIGEN3_VERSION VERSION_LESS 4.11.0) # Cubical diff --git a/src/python/doc/datasets_generators.rst b/src/python/doc/datasets_generators.rst index 2802eccd..e63dde82 100644 --- a/src/python/doc/datasets_generators.rst +++ b/src/python/doc/datasets_generators.rst @@ -60,6 +60,9 @@ Otherwise, if set to **'grid'**, the points are generated on a grid and would be ( [n\_samples^{1 \over {dim}}]^{dim}, 2*dim ) + +**Note:** This version is recommended when the user wishes to use **'grid'** as sample type, or **'random'** with a relatively small number of samples (~ less than 150). + Example """"""" .. code-block:: python @@ -81,6 +84,8 @@ The user should provide the number of points to be generated on the torus :code: The :code:`sample` argument is optional and is set to **'random'** by default. The other allowed value of sample type is **'grid'**. +**Note:** This version is recommended when the user wishes to use **'random'** as sample type with a great number of samples and a low dimension. + Example """"""" .. code-block:: python diff --git a/src/python/gudhi/datasets/generators/_points.cc b/src/python/gudhi/datasets/generators/_points.cc index 6bbdf284..3d38ff90 100644 --- a/src/python/gudhi/datasets/generators/_points.cc +++ b/src/python/gudhi/datasets/generators/_points.cc @@ -48,6 +48,10 @@ py::array_t generate_points_on_sphere(size_t n_samples, int ambient_dim, py::array_t generate_points_on_torus(size_t n_samples, int dim, std::string sample) { + if ( (sample != "random") && (sample != "grid")) { + throw pybind11::value_error("This sample type is not supported"); + } + std::vector points_generated; { diff --git a/src/python/gudhi/datasets/generators/points.py b/src/python/gudhi/datasets/generators/points.py index 3870dea6..daada486 100644 --- a/src/python/gudhi/datasets/generators/points.py +++ b/src/python/gudhi/datasets/generators/points.py @@ -23,7 +23,7 @@ def _generate_random_points_on_torus(n_samples, dim): def _generate_grid_points_on_torus(n_samples, dim): # Generate points on a dim-torus as a grid - n_samples_grid = int(n_samples**(1./dim)) + n_samples_grid = int((n_samples+.5)**(1./dim)) # add .5 to avoid rounding down with numerical approximations alpha = np.linspace(0, 2*np.pi, n_samples_grid, endpoint=False) array_points_inter = np.column_stack([np.cos(alpha), np.sin(alpha)]) @@ -45,12 +45,9 @@ def torus(n_samples, dim, sample='random'): """ if sample == 'random': # Generate points randomly - print("Sample is random") return _generate_random_points_on_torus(n_samples, dim) elif sample == 'grid': # Generate points on a grid - print("Sample is grid") return _generate_grid_points_on_torus(n_samples, dim) else: raise ValueError("Sample type '{}' is not supported".format(sample)) - return diff --git a/src/python/test/test_datasets_generators.py b/src/python/test/test_datasets_generators.py new file mode 100755 index 00000000..656c30ee --- /dev/null +++ b/src/python/test/test_datasets_generators.py @@ -0,0 +1,40 @@ +""" This file is part of the Gudhi Library - https://gudhi.inria.fr/ - which is released under MIT. + See file LICENSE or go to https://gudhi.inria.fr/licensing/ for full license details. + Author(s): Hind Montassif + + Copyright (C) 2021 Inria + + Modification(s): + - YYYY/MM Author: Description of the modification +""" + +from gudhi.datasets.generators import points +from gudhi.datasets.generators import _points + +import pytest + +def test_sphere(): + assert _points.sphere(n_samples = 10, ambient_dim = 2, radius = 1., sample = 'random').shape == (10, 2) + + with pytest.raises(ValueError): + _points.sphere(n_samples = 10, ambient_dim = 2, radius = 1., sample = 'other') + +def test_torus(): + assert _points.torus(n_samples = 64, dim = 3, sample = 'random').shape == (64, 6) + assert _points.torus(n_samples = 64, dim = 3, sample = 'grid').shape == (64, 6) + + assert _points.torus(n_samples = 10, dim = 4, sample = 'random').shape == (10, 8) + assert _points.torus(n_samples = 10, dim = 4, sample = 'grid').shape == (1, 8) + + with pytest.raises(ValueError): + _points.torus(n_samples = 10, dim = 4, sample = 'other') + +def test_torus_full_python(): + assert points.torus(n_samples = 64, dim = 3, sample = 'random').shape == (64, 6) + assert points.torus(n_samples = 64, dim = 3, sample = 'grid').shape == (64, 6) + + assert points.torus(n_samples = 10, dim = 4, sample = 'random').shape == (10, 8) + assert points.torus(n_samples = 10, dim = 4, sample = 'grid').shape == (1, 8) + + with pytest.raises(ValueError): + points.torus(n_samples = 10, dim = 4, sample = 'other') -- cgit v1.2.3 From e23ca84fadcc2c65fd8cf2d141be804bf18b2fd6 Mon Sep 17 00:00:00 2001 From: Hind-M Date: Wed, 22 Sep 2021 15:20:03 +0200 Subject: Rename function of torus cpp version and import it with sphere in points Change documentation accordingly --- src/python/doc/datasets_generators.rst | 28 +++++++++++++------------ src/python/gudhi/datasets/generators/_points.cc | 9 +++++--- src/python/gudhi/datasets/generators/points.py | 3 +++ src/python/test/test_datasets_generators.py | 15 +++++++------ 4 files changed, 31 insertions(+), 24 deletions(-) (limited to 'src/python/gudhi/datasets/generators/points.py') diff --git a/src/python/doc/datasets_generators.rst b/src/python/doc/datasets_generators.rst index e63dde82..c0bbb973 100644 --- a/src/python/doc/datasets_generators.rst +++ b/src/python/doc/datasets_generators.rst @@ -13,10 +13,12 @@ We provide the generation of different customizable datasets to use as inputs fo Points generators ------------------ +The module **points** enables the generation of random points on a sphere, random points on a torus and as a grid. + Points on sphere ^^^^^^^^^^^^^^^^ -The module **_points** enables the generation of random i.i.d. points uniformly on a (d-1)-sphere in :math:`R^d`. +The function **sphere** enables the generation of random i.i.d. points uniformly on a (d-1)-sphere in :math:`R^d`. The user should provide the number of points to be generated on the sphere :code:`n_samples` and the ambient dimension :code:`ambient_dim`. The :code:`radius` of sphere is optional and is equal to **1** by default. Only random points generation is currently available. @@ -28,28 +30,28 @@ Example .. code-block:: python - from gudhi.datasets.generators import _points + from gudhi.datasets.generators import points from gudhi import AlphaComplex # Generate 50 points on a sphere in R^2 - gen_points = _points.sphere(n_samples = 50, ambient_dim = 2, radius = 1, sample = "random") + gen_points = points.sphere(n_samples = 50, ambient_dim = 2, radius = 1, sample = "random") # Create an alpha complex from the generated points alpha_complex = AlphaComplex(points = gen_points) -.. autofunction:: gudhi.datasets.generators._points.sphere +.. autofunction:: gudhi.datasets.generators.points.sphere Points on torus ^^^^^^^^^^^^^^^^ You can also generate points on a torus. -Two modules are available and give the same output: the first one depends on **CGAL** and the second does not and consists of full python code. +Two functions are available and give the same output: the first one depends on **CGAL** and the second does not and consists of full python code. On another hand, two sample types are provided : you can either generate i.i.d. points on a d-torus in :math:`R^{2d}` *randomly* or on a *grid*. -First module : **_points** -"""""""""""""""""""""""""" +First function : **ctorus** +""""""""""""""""""""""""""" The user should provide the number of points to be generated on the torus :code:`n_samples`, and the dimension :code:`dim` of the torus on which points would be generated in :math:`R^{2dim}`. The :code:`sample` argument is optional and is set to **'random'** by default. @@ -67,18 +69,18 @@ Example """"""" .. code-block:: python - from gudhi.datasets.generators import _points + from gudhi.datasets.generators import points # Generate 50 points randomly on a torus in R^6 - gen_points = _points.torus(n_samples = 50, dim = 3) + gen_points = points.ctorus(n_samples = 50, dim = 3) # Generate 27 points on a torus as a grid in R^6 - gen_points = _points.torus(n_samples = 50, dim = 3, sample = 'grid') + gen_points = points.ctorus(n_samples = 50, dim = 3, sample = 'grid') -.. autofunction:: gudhi.datasets.generators._points.torus +.. autofunction:: gudhi.datasets.generators.points.ctorus -Second module : **points** -"""""""""""""""""""""""""" +Second function : **torus** +""""""""""""""""""""""""""" The user should provide the number of points to be generated on the torus :code:`n_samples` and the dimension :code:`dim` of the torus on which points would be generated in :math:`R^{2dim}`. The :code:`sample` argument is optional and is set to **'random'** by default. diff --git a/src/python/gudhi/datasets/generators/_points.cc b/src/python/gudhi/datasets/generators/_points.cc index 3d38ff90..536fa949 100644 --- a/src/python/gudhi/datasets/generators/_points.cc +++ b/src/python/gudhi/datasets/generators/_points.cc @@ -96,10 +96,10 @@ PYBIND11_MODULE(_points, m) { :returns: the generated points on a sphere. )pbdoc"); - m.def("torus", &generate_points_on_torus, + m.def("ctorus", &generate_points_on_torus, py::arg("n_samples"), py::arg("dim"), py::arg("sample") = "random", R"pbdoc( - Generate random i.i.d. points on a d-torus in R^2d + Generate random i.i.d. points on a d-torus in R^2d or as a grid :param n_samples: The number of points to be generated. :type n_samples: integer @@ -107,7 +107,10 @@ PYBIND11_MODULE(_points, m) { :type dim: integer :param sample: The sample type. Available values are: `"random"` and `"grid"`. Default value is `"random"`. :type sample: string - :rtype: numpy array of float + :rtype: numpy array of float. + The shape of returned numpy array is : + if sample is 'random' : (n_samples, 2*dim). + if sample is 'grid' : ([n_samples**(1./dim)]**dim, 2*dim). :returns: the generated points on a torus. )pbdoc"); } diff --git a/src/python/gudhi/datasets/generators/points.py b/src/python/gudhi/datasets/generators/points.py index daada486..1995f769 100644 --- a/src/python/gudhi/datasets/generators/points.py +++ b/src/python/gudhi/datasets/generators/points.py @@ -10,6 +10,9 @@ import numpy as np import itertools +from ._points import ctorus +from ._points import sphere + def _generate_random_points_on_torus(n_samples, dim): # Generate random angles of size n_samples*dim diff --git a/src/python/test/test_datasets_generators.py b/src/python/test/test_datasets_generators.py index 656c30ee..4c087c57 100755 --- a/src/python/test/test_datasets_generators.py +++ b/src/python/test/test_datasets_generators.py @@ -9,25 +9,24 @@ """ from gudhi.datasets.generators import points -from gudhi.datasets.generators import _points import pytest def test_sphere(): - assert _points.sphere(n_samples = 10, ambient_dim = 2, radius = 1., sample = 'random').shape == (10, 2) + assert points.sphere(n_samples = 10, ambient_dim = 2, radius = 1., sample = 'random').shape == (10, 2) with pytest.raises(ValueError): - _points.sphere(n_samples = 10, ambient_dim = 2, radius = 1., sample = 'other') + points.sphere(n_samples = 10, ambient_dim = 2, radius = 1., sample = 'other') def test_torus(): - assert _points.torus(n_samples = 64, dim = 3, sample = 'random').shape == (64, 6) - assert _points.torus(n_samples = 64, dim = 3, sample = 'grid').shape == (64, 6) + assert points.ctorus(n_samples = 64, dim = 3, sample = 'random').shape == (64, 6) + assert points.ctorus(n_samples = 64, dim = 3, sample = 'grid').shape == (64, 6) - assert _points.torus(n_samples = 10, dim = 4, sample = 'random').shape == (10, 8) - assert _points.torus(n_samples = 10, dim = 4, sample = 'grid').shape == (1, 8) + assert points.ctorus(n_samples = 10, dim = 4, sample = 'random').shape == (10, 8) + assert points.ctorus(n_samples = 10, dim = 4, sample = 'grid').shape == (1, 8) with pytest.raises(ValueError): - _points.torus(n_samples = 10, dim = 4, sample = 'other') + points.ctorus(n_samples = 10, dim = 4, sample = 'other') def test_torus_full_python(): assert points.torus(n_samples = 64, dim = 3, sample = 'random').shape == (64, 6) -- cgit v1.2.3 From bb8c4994b89fb6bfdd80b76912acadf6197f93cc Mon Sep 17 00:00:00 2001 From: Hind-M Date: Tue, 26 Oct 2021 13:59:44 +0200 Subject: Add comments and some minor changes following code review --- src/python/doc/datasets_generators.rst | 13 +++++++------ src/python/gudhi/datasets/generators/_points.cc | 2 +- src/python/gudhi/datasets/generators/points.py | 6 +++--- src/python/test/test_datasets_generators.py | 2 ++ 4 files changed, 13 insertions(+), 10 deletions(-) (limited to 'src/python/gudhi/datasets/generators/points.py') diff --git a/src/python/doc/datasets_generators.rst b/src/python/doc/datasets_generators.rst index c0bbb973..3700b8a2 100644 --- a/src/python/doc/datasets_generators.rst +++ b/src/python/doc/datasets_generators.rst @@ -48,22 +48,23 @@ You can also generate points on a torus. Two functions are available and give the same output: the first one depends on **CGAL** and the second does not and consists of full python code. -On another hand, two sample types are provided : you can either generate i.i.d. points on a d-torus in :math:`R^{2d}` *randomly* or on a *grid*. +On another hand, two sample types are provided: you can either generate i.i.d. points on a d-torus in :math:`R^{2d}` *randomly* or on a *grid*. -First function : **ctorus** +First function: **ctorus** """"""""""""""""""""""""""" The user should provide the number of points to be generated on the torus :code:`n_samples`, and the dimension :code:`dim` of the torus on which points would be generated in :math:`R^{2dim}`. The :code:`sample` argument is optional and is set to **'random'** by default. In this case, the returned generated points would be an array of shape :math:`(n\_samples, 2*dim)`. -Otherwise, if set to **'grid'**, the points are generated on a grid and would be given as an array of shape : +Otherwise, if set to **'grid'**, the points are generated on a grid and would be given as an array of shape: .. math:: - ( [n\_samples^{1 \over {dim}}]^{dim}, 2*dim ) + ( ⌊n\_samples^{1 \over {dim}}⌋^{dim}, 2*dim ) +**Note 1:** The output array first shape is rounded down to the closest perfect :math:`dim^{th}` power. -**Note:** This version is recommended when the user wishes to use **'grid'** as sample type, or **'random'** with a relatively small number of samples (~ less than 150). +**Note 2:** This version is recommended when the user wishes to use **'grid'** as sample type, or **'random'** with a relatively small number of samples (~ less than 150). Example """"""" @@ -79,7 +80,7 @@ Example .. autofunction:: gudhi.datasets.generators.points.ctorus -Second function : **torus** +Second function: **torus** """"""""""""""""""""""""""" The user should provide the number of points to be generated on the torus :code:`n_samples` and the dimension :code:`dim` of the torus on which points would be generated in :math:`R^{2dim}`. diff --git a/src/python/gudhi/datasets/generators/_points.cc b/src/python/gudhi/datasets/generators/_points.cc index 536fa949..5d675930 100644 --- a/src/python/gudhi/datasets/generators/_points.cc +++ b/src/python/gudhi/datasets/generators/_points.cc @@ -110,7 +110,7 @@ PYBIND11_MODULE(_points, m) { :rtype: numpy array of float. The shape of returned numpy array is : if sample is 'random' : (n_samples, 2*dim). - if sample is 'grid' : ([n_samples**(1./dim)]**dim, 2*dim). + if sample is 'grid' : (⌊n_samples**(1./dim)⌋**dim, 2*dim), where shape[0] is rounded down to the closest perfect 'dim'th power. :returns: the generated points on a torus. )pbdoc"); } diff --git a/src/python/gudhi/datasets/generators/points.py b/src/python/gudhi/datasets/generators/points.py index 1995f769..7f4667af 100644 --- a/src/python/gudhi/datasets/generators/points.py +++ b/src/python/gudhi/datasets/generators/points.py @@ -36,15 +36,15 @@ def _generate_grid_points_on_torus(n_samples, dim): def torus(n_samples, dim, sample='random'): """ - Generate points on a dim-torus in R^2dim either randomly or on a grid + Generate points on a flat dim-torus in R^2dim either randomly or on a grid :param n_samples: The number of points to be generated. :param dim: The dimension of the torus on which points would be generated in R^2*dim. :param sample: The sample type of the generated points. Can be 'random' or 'grid'. :returns: numpy array containing the generated points on a torus. - The shape of returned numpy array is : + The shape of returned numpy array is: if sample is 'random' : (n_samples, 2*dim). - if sample is 'grid' : ([n_samples**(1./dim)]**dim, 2*dim). + if sample is 'grid' : (⌊n_samples**(1./dim)⌋**dim, 2*dim), where shape[0] is rounded down to the closest perfect 'dim'th power. """ if sample == 'random': # Generate points randomly diff --git a/src/python/test/test_datasets_generators.py b/src/python/test/test_datasets_generators.py index e2d300e0..933a763e 100755 --- a/src/python/test/test_datasets_generators.py +++ b/src/python/test/test_datasets_generators.py @@ -23,6 +23,8 @@ def _basic_torus(impl): assert impl(n_samples = 64, dim = 3, sample = 'grid').shape == (64, 6) assert impl(n_samples = 10, dim = 4, sample = 'random').shape == (10, 8) + + # Here 1**dim < n_samples < 2**dim, the output shape is therefore (1, 2*dim) = (1, 8), where shape[0] is rounded down to the closest perfect 'dim'th power assert impl(n_samples = 10, dim = 4, sample = 'grid').shape == (1, 8) with pytest.raises(ValueError): -- cgit v1.2.3 From 3a29558decccafe0b07dbf07d66f1410df6c187f Mon Sep 17 00:00:00 2001 From: Hind-M Date: Wed, 27 Oct 2021 09:58:52 +0200 Subject: Replace itertools in grid torus generation function with something faster in most general use cases --- src/python/gudhi/datasets/generators/points.py | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) (limited to 'src/python/gudhi/datasets/generators/points.py') diff --git a/src/python/gudhi/datasets/generators/points.py b/src/python/gudhi/datasets/generators/points.py index 7f4667af..cf97777d 100644 --- a/src/python/gudhi/datasets/generators/points.py +++ b/src/python/gudhi/datasets/generators/points.py @@ -8,7 +8,6 @@ # - YYYY/MM Author: Description of the modification import numpy as np -import itertools from ._points import ctorus from ._points import sphere @@ -29,10 +28,11 @@ def _generate_grid_points_on_torus(n_samples, dim): n_samples_grid = int((n_samples+.5)**(1./dim)) # add .5 to avoid rounding down with numerical approximations alpha = np.linspace(0, 2*np.pi, n_samples_grid, endpoint=False) - array_points_inter = np.column_stack([np.cos(alpha), np.sin(alpha)]) - array_points = np.array(list(itertools.product(array_points_inter, repeat=dim))).reshape(-1, 2*dim) - - return array_points + array_points = np.column_stack([np.cos(alpha), np.sin(alpha)]) + array_points_idx = np.empty([n_samples_grid]*dim + [dim], dtype=int) + for i, x in enumerate(np.ix_(*([np.arange(n_samples_grid)]*dim))): + array_points_idx[...,i] = x + return array_points[array_points_idx].reshape(-1, 2*dim) def torus(n_samples, dim, sample='random'): """ -- cgit v1.2.3 From fe75d33d715d038e348b7e48512b14c7488ee4f4 Mon Sep 17 00:00:00 2001 From: Hind-M Date: Tue, 9 Nov 2021 16:08:10 +0100 Subject: Remove sphinx warnings for torus --- src/python/doc/datasets_generators.rst | 2 +- src/python/gudhi/datasets/generators/_points.cc | 13 +++---------- src/python/gudhi/datasets/generators/points.py | 5 +---- 3 files changed, 5 insertions(+), 15 deletions(-) (limited to 'src/python/gudhi/datasets/generators/points.py') diff --git a/src/python/doc/datasets_generators.rst b/src/python/doc/datasets_generators.rst index 6f36bce1..260c3882 100644 --- a/src/python/doc/datasets_generators.rst +++ b/src/python/doc/datasets_generators.rst @@ -42,7 +42,7 @@ Example .. autofunction:: gudhi.datasets.generators.points.sphere Points on a flat torus -^^^^^^^^^^^^^^^^ +^^^^^^^^^^^^^^^^^^^^^^ You can also generate points on a torus. diff --git a/src/python/gudhi/datasets/generators/_points.cc b/src/python/gudhi/datasets/generators/_points.cc index 70ce4925..6baed673 100644 --- a/src/python/gudhi/datasets/generators/_points.cc +++ b/src/python/gudhi/datasets/generators/_points.cc @@ -85,9 +85,7 @@ PYBIND11_MODULE(_points, m) { m.def("sphere", &generate_points_on_sphere, py::arg("n_samples"), py::arg("ambient_dim"), py::arg("radius") = 1., py::arg("sample") = "random", - R"pbdoc( - Generate random i.i.d. points uniformly on a (d-1)-sphere in R^d - + R"pbdoc( Generate random i.i.d. points uniformly on a (d-1)-sphere in R^d :param n_samples: The number of points to be generated. :type n_samples: integer :param ambient_dim: The ambient dimension d. @@ -102,19 +100,14 @@ PYBIND11_MODULE(_points, m) { m.def("ctorus", &generate_points_on_torus, py::arg("n_samples"), py::arg("dim"), py::arg("sample") = "random", - R"pbdoc( - Generate random i.i.d. points on a d-torus in R^2d or as a grid - + R"pbdoc( Generate random i.i.d. points on a d-torus in R^2d or as a grid :param n_samples: The number of points to be generated. :type n_samples: integer :param dim: The dimension of the torus on which points would be generated in R^2*dim. :type dim: integer :param sample: The sample type. Available values are: `"random"` and `"grid"`. Default value is `"random"`. :type sample: string - :rtype: numpy array of float. - The shape of returned numpy array is : - if sample is 'random' : (n_samples, 2*dim). - if sample is 'grid' : (⌊n_samples**(1./dim)⌋**dim, 2*dim), where shape[0] is rounded down to the closest perfect 'dim'th power. + :rtype: numpy array of float. The shape of returned numpy array is: If sample is 'random': (n_samples, 2*dim). If sample is 'grid': (⌊n_samples**(1./dim)⌋**dim, 2*dim), where shape[0] is rounded down to the closest perfect 'dim'th power. :returns: the generated points on a torus. )pbdoc"); } diff --git a/src/python/gudhi/datasets/generators/points.py b/src/python/gudhi/datasets/generators/points.py index cf97777d..481f3f71 100644 --- a/src/python/gudhi/datasets/generators/points.py +++ b/src/python/gudhi/datasets/generators/points.py @@ -41,10 +41,7 @@ def torus(n_samples, dim, sample='random'): :param n_samples: The number of points to be generated. :param dim: The dimension of the torus on which points would be generated in R^2*dim. :param sample: The sample type of the generated points. Can be 'random' or 'grid'. - :returns: numpy array containing the generated points on a torus. - The shape of returned numpy array is: - if sample is 'random' : (n_samples, 2*dim). - if sample is 'grid' : (⌊n_samples**(1./dim)⌋**dim, 2*dim), where shape[0] is rounded down to the closest perfect 'dim'th power. + :returns: numpy array containing the generated points on a torus. The shape of returned numpy array is: If sample is 'random': (n_samples, 2*dim). If sample is 'grid': (⌊n_samples**(1./dim)⌋**dim, 2*dim), where shape[0] is rounded down to the closest perfect 'dim'th power. """ if sample == 'random': # Generate points randomly -- cgit v1.2.3 From b1a635c72d3e287c012212a491da07357b0c6136 Mon Sep 17 00:00:00 2001 From: Hind-M Date: Fri, 3 Dec 2021 16:17:53 +0100 Subject: Indent properly the docstring Remove redundant return type --- src/python/gudhi/datasets/generators/_points.cc | 16 ++++++++++++---- src/python/gudhi/datasets/generators/points.py | 18 ++++++++++++------ 2 files changed, 24 insertions(+), 10 deletions(-) (limited to 'src/python/gudhi/datasets/generators/points.py') diff --git a/src/python/gudhi/datasets/generators/_points.cc b/src/python/gudhi/datasets/generators/_points.cc index 6baed673..82fea25b 100644 --- a/src/python/gudhi/datasets/generators/_points.cc +++ b/src/python/gudhi/datasets/generators/_points.cc @@ -85,7 +85,9 @@ PYBIND11_MODULE(_points, m) { m.def("sphere", &generate_points_on_sphere, py::arg("n_samples"), py::arg("ambient_dim"), py::arg("radius") = 1., py::arg("sample") = "random", - R"pbdoc( Generate random i.i.d. points uniformly on a (d-1)-sphere in R^d + R"pbdoc( + Generate random i.i.d. points uniformly on a (d-1)-sphere in R^d + :param n_samples: The number of points to be generated. :type n_samples: integer :param ambient_dim: The ambient dimension d. @@ -94,20 +96,26 @@ PYBIND11_MODULE(_points, m) { :type radius: float :param sample: The sample type. Default and only available value is `"random"`. :type sample: string - :rtype: numpy array of float :returns: the generated points on a sphere. )pbdoc"); m.def("ctorus", &generate_points_on_torus, py::arg("n_samples"), py::arg("dim"), py::arg("sample") = "random", - R"pbdoc( Generate random i.i.d. points on a d-torus in R^2d or as a grid + R"pbdoc( + Generate random i.i.d. points on a d-torus in R^2d or as a grid + :param n_samples: The number of points to be generated. :type n_samples: integer :param dim: The dimension of the torus on which points would be generated in R^2*dim. :type dim: integer :param sample: The sample type. Available values are: `"random"` and `"grid"`. Default value is `"random"`. :type sample: string - :rtype: numpy array of float. The shape of returned numpy array is: If sample is 'random': (n_samples, 2*dim). If sample is 'grid': (⌊n_samples**(1./dim)⌋**dim, 2*dim), where shape[0] is rounded down to the closest perfect 'dim'th power. :returns: the generated points on a torus. + + The shape of returned numpy array is: + + If sample is 'random': (n_samples, 2*dim). + + If sample is 'grid': (⌊n_samples**(1./dim)⌋**dim, 2*dim), where shape[0] is rounded down to the closest perfect 'dim'th power. )pbdoc"); } diff --git a/src/python/gudhi/datasets/generators/points.py b/src/python/gudhi/datasets/generators/points.py index 481f3f71..9bb2799d 100644 --- a/src/python/gudhi/datasets/generators/points.py +++ b/src/python/gudhi/datasets/generators/points.py @@ -19,15 +19,15 @@ def _generate_random_points_on_torus(n_samples, dim): # Based on angles, construct points of size n_samples*dim on a circle and reshape the result in a n_samples*2*dim array array_points = np.column_stack([np.cos(alpha), np.sin(alpha)]).reshape(-1, 2*dim) - + return array_points def _generate_grid_points_on_torus(n_samples, dim): - + # Generate points on a dim-torus as a grid n_samples_grid = int((n_samples+.5)**(1./dim)) # add .5 to avoid rounding down with numerical approximations alpha = np.linspace(0, 2*np.pi, n_samples_grid, endpoint=False) - + array_points = np.column_stack([np.cos(alpha), np.sin(alpha)]) array_points_idx = np.empty([n_samples_grid]*dim + [dim], dtype=int) for i, x in enumerate(np.ix_(*([np.arange(n_samples_grid)]*dim))): @@ -35,13 +35,19 @@ def _generate_grid_points_on_torus(n_samples, dim): return array_points[array_points_idx].reshape(-1, 2*dim) def torus(n_samples, dim, sample='random'): - """ + """ Generate points on a flat dim-torus in R^2dim either randomly or on a grid - + :param n_samples: The number of points to be generated. :param dim: The dimension of the torus on which points would be generated in R^2*dim. :param sample: The sample type of the generated points. Can be 'random' or 'grid'. - :returns: numpy array containing the generated points on a torus. The shape of returned numpy array is: If sample is 'random': (n_samples, 2*dim). If sample is 'grid': (⌊n_samples**(1./dim)⌋**dim, 2*dim), where shape[0] is rounded down to the closest perfect 'dim'th power. + :returns: numpy array containing the generated points on a torus. + + The shape of returned numpy array is: + + If sample is 'random': (n_samples, 2*dim). + + If sample is 'grid': (⌊n_samples**(1./dim)⌋**dim, 2*dim), where shape[0] is rounded down to the closest perfect 'dim'th power. """ if sample == 'random': # Generate points randomly -- cgit v1.2.3