From d941ebc854880a06707999f677137a9d6ff7473f Mon Sep 17 00:00:00 2001 From: Hind-M Date: Wed, 26 Jan 2022 15:21:20 +0100 Subject: Add datasets remote fetching module to doc --- src/python/doc/datasets.inc | 14 ++++ src/python/doc/datasets.rst | 118 +++++++++++++++++++++++++++++++++ src/python/doc/datasets_generators.inc | 14 ---- src/python/doc/datasets_generators.rst | 105 ----------------------------- src/python/doc/index.rst | 6 +- 5 files changed, 135 insertions(+), 122 deletions(-) create mode 100644 src/python/doc/datasets.inc create mode 100644 src/python/doc/datasets.rst delete mode 100644 src/python/doc/datasets_generators.inc delete mode 100644 src/python/doc/datasets_generators.rst (limited to 'src/python/doc') diff --git a/src/python/doc/datasets.inc b/src/python/doc/datasets.inc new file mode 100644 index 00000000..95a87678 --- /dev/null +++ b/src/python/doc/datasets.inc @@ -0,0 +1,14 @@ +.. table:: + :widths: 30 40 30 + + +-----------------------------------+--------------------------------------------+--------------------------------------------------------------------------------------+ + | .. figure:: | Datasets either generated or fetched. | :Authors: Hind Montassif | + | img/sphere_3d.png | | | + | | | :Since: GUDHI 3.5.0 | + | | | | + | | | :License: MIT (`LGPL v3 `_) | + | | | | + | | | :Requires: `CGAL `_ | + +-----------------------------------+--------------------------------------------+--------------------------------------------------------------------------------------+ + | * :doc:`datasets` | + +-----------------------------------+-----------------------------------------------------------------------------------------------------------------------------------+ diff --git a/src/python/doc/datasets.rst b/src/python/doc/datasets.rst new file mode 100644 index 00000000..4fa8a628 --- /dev/null +++ b/src/python/doc/datasets.rst @@ -0,0 +1,118 @@ + +:orphan: + +.. To get rid of WARNING: document isn't included in any toctree + +================ +Datasets manual +================ + +Datasets generators +=================== + +We provide the generation of different customizable datasets to use as inputs for Gudhi complexes and data structures. + +Points generators +------------------ + +The module **points** enables the generation of random points on a sphere, random points on a torus and as a grid. + +Points on sphere +^^^^^^^^^^^^^^^^ + +The function **sphere** enables the generation of random i.i.d. points uniformly on a (d-1)-sphere in :math:`R^d`. +The user should provide the number of points to be generated on the sphere :code:`n_samples` and the ambient dimension :code:`ambient_dim`. +The :code:`radius` of sphere is optional and is equal to **1** by default. +Only random points generation is currently available. + +The generated points are given as an array of shape :math:`(n\_samples, ambient\_dim)`. + +Example +""""""" + +.. code-block:: python + + from gudhi.datasets.generators import points + from gudhi import AlphaComplex + + # Generate 50 points on a sphere in R^2 + gen_points = points.sphere(n_samples = 50, ambient_dim = 2, radius = 1, sample = "random") + + # Create an alpha complex from the generated points + alpha_complex = AlphaComplex(points = gen_points) + +.. autofunction:: gudhi.datasets.generators.points.sphere + +Points on a flat torus +^^^^^^^^^^^^^^^^^^^^^^ + +You can also generate points on a torus. + +Two functions are available and give the same output: the first one depends on **CGAL** and the second does not and consists of full python code. + +On another hand, two sample types are provided: you can either generate i.i.d. points on a d-torus in :math:`R^{2d}` *randomly* or on a *grid*. + +First function: **ctorus** +""""""""""""""""""""""""""" + +The user should provide the number of points to be generated on the torus :code:`n_samples`, and the dimension :code:`dim` of the torus on which points would be generated in :math:`R^{2dim}`. +The :code:`sample` argument is optional and is set to **'random'** by default. +In this case, the returned generated points would be an array of shape :math:`(n\_samples, 2*dim)`. +Otherwise, if set to **'grid'**, the points are generated on a grid and would be given as an array of shape: + +.. math:: + + ( ⌊n\_samples^{1 \over {dim}}⌋^{dim}, 2*dim ) + +**Note 1:** The output array first shape is rounded down to the closest perfect :math:`dim^{th}` power. + +**Note 2:** This version is recommended when the user wishes to use **'grid'** as sample type, or **'random'** with a relatively small number of samples (~ less than 150). + +Example +""""""" +.. code-block:: python + + from gudhi.datasets.generators import points + + # Generate 50 points randomly on a torus in R^6 + gen_points = points.ctorus(n_samples = 50, dim = 3) + + # Generate 27 points on a torus as a grid in R^6 + gen_points = points.ctorus(n_samples = 50, dim = 3, sample = 'grid') + +.. autofunction:: gudhi.datasets.generators.points.ctorus + +Second function: **torus** +""""""""""""""""""""""""""" + +The user should provide the number of points to be generated on the torus :code:`n_samples` and the dimension :code:`dim` of the torus on which points would be generated in :math:`R^{2dim}`. +The :code:`sample` argument is optional and is set to **'random'** by default. +The other allowed value of sample type is **'grid'**. + +**Note:** This version is recommended when the user wishes to use **'random'** as sample type with a great number of samples and a low dimension. + +Example +""""""" +.. code-block:: python + + from gudhi.datasets.generators import points + + # Generate 50 points randomly on a torus in R^6 + gen_points = points.torus(n_samples = 50, dim = 3) + + # Generate 27 points on a torus as a grid in R^6 + gen_points = points.torus(n_samples = 50, dim = 3, sample = 'grid') + + +.. autofunction:: gudhi.datasets.generators.points.torus + + +Fetching datasets +================= + +We provide some ready-to-use datasets that are not available by default when getting GUDHI, and need to be fetched explicitly. + +.. automodule:: gudhi.datasets.remote + :members: + :special-members: + :show-inheritance: diff --git a/src/python/doc/datasets_generators.inc b/src/python/doc/datasets_generators.inc deleted file mode 100644 index 8d169275..00000000 --- a/src/python/doc/datasets_generators.inc +++ /dev/null @@ -1,14 +0,0 @@ -.. table:: - :widths: 30 40 30 - - +-----------------------------------+--------------------------------------------+--------------------------------------------------------------------------------------+ - | .. figure:: | Datasets generators (points). | :Authors: Hind Montassif | - | img/sphere_3d.png | | | - | | | :Since: GUDHI 3.5.0 | - | | | | - | | | :License: MIT (`LGPL v3 `_) | - | | | | - | | | :Requires: `CGAL `_ | - +-----------------------------------+--------------------------------------------+--------------------------------------------------------------------------------------+ - | * :doc:`datasets_generators` | - +-----------------------------------+-----------------------------------------------------------------------------------------------------------------------------------+ diff --git a/src/python/doc/datasets_generators.rst b/src/python/doc/datasets_generators.rst deleted file mode 100644 index 260c3882..00000000 --- a/src/python/doc/datasets_generators.rst +++ /dev/null @@ -1,105 +0,0 @@ - -:orphan: - -.. To get rid of WARNING: document isn't included in any toctree - -=========================== -Datasets generators manual -=========================== - -We provide the generation of different customizable datasets to use as inputs for Gudhi complexes and data structures. - - -Points generators ------------------- - -The module **points** enables the generation of random points on a sphere, random points on a torus and as a grid. - -Points on sphere -^^^^^^^^^^^^^^^^ - -The function **sphere** enables the generation of random i.i.d. points uniformly on a (d-1)-sphere in :math:`R^d`. -The user should provide the number of points to be generated on the sphere :code:`n_samples` and the ambient dimension :code:`ambient_dim`. -The :code:`radius` of sphere is optional and is equal to **1** by default. -Only random points generation is currently available. - -The generated points are given as an array of shape :math:`(n\_samples, ambient\_dim)`. - -Example -""""""" - -.. code-block:: python - - from gudhi.datasets.generators import points - from gudhi import AlphaComplex - - # Generate 50 points on a sphere in R^2 - gen_points = points.sphere(n_samples = 50, ambient_dim = 2, radius = 1, sample = "random") - - # Create an alpha complex from the generated points - alpha_complex = AlphaComplex(points = gen_points) - -.. autofunction:: gudhi.datasets.generators.points.sphere - -Points on a flat torus -^^^^^^^^^^^^^^^^^^^^^^ - -You can also generate points on a torus. - -Two functions are available and give the same output: the first one depends on **CGAL** and the second does not and consists of full python code. - -On another hand, two sample types are provided: you can either generate i.i.d. points on a d-torus in :math:`R^{2d}` *randomly* or on a *grid*. - -First function: **ctorus** -""""""""""""""""""""""""""" - -The user should provide the number of points to be generated on the torus :code:`n_samples`, and the dimension :code:`dim` of the torus on which points would be generated in :math:`R^{2dim}`. -The :code:`sample` argument is optional and is set to **'random'** by default. -In this case, the returned generated points would be an array of shape :math:`(n\_samples, 2*dim)`. -Otherwise, if set to **'grid'**, the points are generated on a grid and would be given as an array of shape: - -.. math:: - - ( ⌊n\_samples^{1 \over {dim}}⌋^{dim}, 2*dim ) - -**Note 1:** The output array first shape is rounded down to the closest perfect :math:`dim^{th}` power. - -**Note 2:** This version is recommended when the user wishes to use **'grid'** as sample type, or **'random'** with a relatively small number of samples (~ less than 150). - -Example -""""""" -.. code-block:: python - - from gudhi.datasets.generators import points - - # Generate 50 points randomly on a torus in R^6 - gen_points = points.ctorus(n_samples = 50, dim = 3) - - # Generate 27 points on a torus as a grid in R^6 - gen_points = points.ctorus(n_samples = 50, dim = 3, sample = 'grid') - -.. autofunction:: gudhi.datasets.generators.points.ctorus - -Second function: **torus** -""""""""""""""""""""""""""" - -The user should provide the number of points to be generated on the torus :code:`n_samples` and the dimension :code:`dim` of the torus on which points would be generated in :math:`R^{2dim}`. -The :code:`sample` argument is optional and is set to **'random'** by default. -The other allowed value of sample type is **'grid'**. - -**Note:** This version is recommended when the user wishes to use **'random'** as sample type with a great number of samples and a low dimension. - -Example -""""""" -.. code-block:: python - - from gudhi.datasets.generators import points - - # Generate 50 points randomly on a torus in R^6 - gen_points = points.torus(n_samples = 50, dim = 3) - - # Generate 27 points on a torus as a grid in R^6 - gen_points = points.torus(n_samples = 50, dim = 3, sample = 'grid') - - -.. autofunction:: gudhi.datasets.generators.points.torus diff --git a/src/python/doc/index.rst b/src/python/doc/index.rst index 2d7921ae..35f4ba46 100644 --- a/src/python/doc/index.rst +++ b/src/python/doc/index.rst @@ -92,7 +92,7 @@ Clustering .. include:: clustering.inc -Datasets generators -******************* +Datasets +******** -.. include:: datasets_generators.inc +.. include:: datasets.inc -- cgit v1.2.3 From 0047eaacaffef2b3da6207123da3ef3d919c0b27 Mon Sep 17 00:00:00 2001 From: Hind-M Date: Wed, 9 Mar 2022 15:56:23 +0100 Subject: Add bunny image to the datasets doc --- src/python/doc/datasets.rst | 6 ++++++ src/python/doc/img/bunny.png | Bin 0 -> 48040 bytes 2 files changed, 6 insertions(+) create mode 100644 src/python/doc/img/bunny.png (limited to 'src/python/doc') diff --git a/src/python/doc/datasets.rst b/src/python/doc/datasets.rst index 4fa8a628..62b7dca0 100644 --- a/src/python/doc/datasets.rst +++ b/src/python/doc/datasets.rst @@ -112,6 +112,12 @@ Fetching datasets We provide some ready-to-use datasets that are not available by default when getting GUDHI, and need to be fetched explicitly. +.. figure:: ./img/bunny.png + :figclass: align-center + + 3D Stanford bunny with 35947 vertices. + + .. automodule:: gudhi.datasets.remote :members: :special-members: diff --git a/src/python/doc/img/bunny.png b/src/python/doc/img/bunny.png new file mode 100644 index 00000000..769aa530 Binary files /dev/null and b/src/python/doc/img/bunny.png differ -- cgit v1.2.3 From dcd4204d62a4c9a4f3d9ebc61341fba25ae19687 Mon Sep 17 00:00:00 2001 From: Hind-M Date: Tue, 24 May 2022 11:44:49 +0200 Subject: Use autofunction instead of automodule in doc and add 2d spiral image --- src/python/doc/datasets.rst | 16 ++++++++++++---- src/python/doc/img/spiral_2d.png | Bin 0 -> 279276 bytes 2 files changed, 12 insertions(+), 4 deletions(-) create mode 100644 src/python/doc/img/spiral_2d.png (limited to 'src/python/doc') diff --git a/src/python/doc/datasets.rst b/src/python/doc/datasets.rst index 62b7dca0..d2975533 100644 --- a/src/python/doc/datasets.rst +++ b/src/python/doc/datasets.rst @@ -112,13 +112,21 @@ Fetching datasets We provide some ready-to-use datasets that are not available by default when getting GUDHI, and need to be fetched explicitly. +.. autofunction:: gudhi.datasets.remote.fetch_bunny + .. figure:: ./img/bunny.png :figclass: align-center 3D Stanford bunny with 35947 vertices. -.. automodule:: gudhi.datasets.remote - :members: - :special-members: - :show-inheritance: +.. autofunction:: gudhi.datasets.remote.fetch_spiral_2d + +.. figure:: ./img/spiral_2d.png + :figclass: align-center + + 2D spiral with 114562 vertices. + +.. autofunction:: gudhi.datasets.remote.get_data_home + +.. autofunction:: gudhi.datasets.remote.clear_data_home diff --git a/src/python/doc/img/spiral_2d.png b/src/python/doc/img/spiral_2d.png new file mode 100644 index 00000000..abd247cd Binary files /dev/null and b/src/python/doc/img/spiral_2d.png differ -- cgit v1.2.3 From ce34ee3e5c28c48d605f23332cfa3c10e471a047 Mon Sep 17 00:00:00 2001 From: Hind-M Date: Tue, 24 May 2022 15:57:52 +0200 Subject: Make get_data_home function private --- src/python/doc/datasets.rst | 2 -- src/python/gudhi/datasets/remote.py | 6 +++--- src/python/test/test_remote_datasets.py | 4 ++-- 3 files changed, 5 insertions(+), 7 deletions(-) (limited to 'src/python/doc') diff --git a/src/python/doc/datasets.rst b/src/python/doc/datasets.rst index d2975533..8b0912c4 100644 --- a/src/python/doc/datasets.rst +++ b/src/python/doc/datasets.rst @@ -127,6 +127,4 @@ We provide some ready-to-use datasets that are not available by default when get 2D spiral with 114562 vertices. -.. autofunction:: gudhi.datasets.remote.get_data_home - .. autofunction:: gudhi.datasets.remote.clear_data_home diff --git a/src/python/gudhi/datasets/remote.py b/src/python/gudhi/datasets/remote.py index d2ae2a75..7e6f647f 100644 --- a/src/python/gudhi/datasets/remote.py +++ b/src/python/gudhi/datasets/remote.py @@ -16,7 +16,7 @@ import shutil import numpy as np -def get_data_home(data_home = None): +def _get_data_home(data_home = None): """ Return the path of the remote datasets directory. This folder is used to store remotely fetched datasets. @@ -55,7 +55,7 @@ def clear_data_home(data_home = None): If `None` and the 'GUDHI_DATA' environment variable does not exist, the default directory to be removed is set to "~/gudhi_data". """ - data_home = get_data_home(data_home) + data_home = _get_data_home(data_home) shutil.rmtree(data_home) def _checksum_sha256(file_path): @@ -130,7 +130,7 @@ def _get_archive_path(file_path, label): Full path of archive including filename. """ if file_path is None: - archive_path = join(get_data_home(), label) + archive_path = join(_get_data_home(), label) dirname = split(archive_path)[0] makedirs(dirname, exist_ok=True) else: diff --git a/src/python/test/test_remote_datasets.py b/src/python/test/test_remote_datasets.py index cde9fa22..e5d2de82 100644 --- a/src/python/test/test_remote_datasets.py +++ b/src/python/test/test_remote_datasets.py @@ -18,8 +18,8 @@ from os.path import isdir, expanduser, exists from os import remove, environ def test_data_home(): - # Test get_data_home and clear_data_home on new empty folder - empty_data_home = remote.get_data_home(data_home="empty_folder_for_test") + # Test _get_data_home and clear_data_home on new empty folder + empty_data_home = remote._get_data_home(data_home="empty_folder_for_test") assert isdir(empty_data_home) remote.clear_data_home(data_home=empty_data_home) -- cgit v1.2.3 From 899fb73b33cb6976c39a42ba26a31cf2acde63ee Mon Sep 17 00:00:00 2001 From: Hind-M Date: Wed, 25 May 2022 16:53:04 +0200 Subject: Add info in the doc concerning default data_home and 'GUDHI_DATA' env variable --- src/python/doc/datasets.rst | 3 +++ src/python/gudhi/datasets/remote.py | 13 +++++++++++++ 2 files changed, 16 insertions(+) (limited to 'src/python/doc') diff --git a/src/python/doc/datasets.rst b/src/python/doc/datasets.rst index 8b0912c4..2d11a19d 100644 --- a/src/python/doc/datasets.rst +++ b/src/python/doc/datasets.rst @@ -112,6 +112,9 @@ Fetching datasets We provide some ready-to-use datasets that are not available by default when getting GUDHI, and need to be fetched explicitly. +By **default**, the fetched datasets directory is set to a folder named **'gudhi_data'** in the **user home folder**. +Alternatively, it can be set using the **'GUDHI_DATA'** environment variable. + .. autofunction:: gudhi.datasets.remote.fetch_bunny .. figure:: ./img/bunny.png diff --git a/src/python/gudhi/datasets/remote.py b/src/python/gudhi/datasets/remote.py index 7e6f647f..48bdcfa6 100644 --- a/src/python/gudhi/datasets/remote.py +++ b/src/python/gudhi/datasets/remote.py @@ -143,6 +143,7 @@ def _get_archive_path(file_path, label): def fetch_spiral_2d(file_path = None): """ Fetch spiral_2d dataset remotely. + Note that if the dataset already exists in the target location, it is not downloaded again, and the corresponding array is returned from cache. @@ -150,8 +151,12 @@ def fetch_spiral_2d(file_path = None): ---------- file_path : string Full path of the downloaded file including filename. + Default is None, meaning that it's set to "data_home/points/spiral_2d/spiral_2d.npy". + The "data_home" directory is set by default to "~/gudhi_data", + unless the 'GUDHI_DATA' environment variable is set. + Returns ------- points: numpy array @@ -170,7 +175,9 @@ def fetch_spiral_2d(file_path = None): def fetch_bunny(file_path = None, accept_license = False): """ Fetch Stanford bunny dataset remotely and its LICENSE file. + This dataset contains 35947 vertices. + Note that if the dataset already exists in the target location, it is not downloaded again, and the corresponding array is returned from cache. @@ -178,10 +185,16 @@ def fetch_bunny(file_path = None, accept_license = False): ---------- file_path : string Full path of the downloaded file including filename. + Default is None, meaning that it's set to "data_home/points/bunny/bunny.npy". In this case, the LICENSE file would be downloaded as "data_home/points/bunny/bunny.LICENSE". + + The "data_home" directory is set by default to "~/gudhi_data", + unless the 'GUDHI_DATA' environment variable is set. + accept_license : boolean Flag to specify if user accepts the file LICENSE and prevents from printing the corresponding license terms. + Default is False. Returns -- cgit v1.2.3